├── .gitignore ├── LICENSE ├── Makefile.am ├── NOTICE ├── README.md ├── affinity ├── README.md ├── affinity.h ├── cpu.c ├── gpu.cu ├── makefile.mk ├── mpi+omp.c ├── mpi.c └── omp.c ├── aux ├── deepclean.sh └── load_env.sh ├── bootstrap ├── config └── tap-driver.py ├── configure.ac ├── doc ├── README.md └── mpibind.bib ├── etc ├── Makefile.am └── mpibind.pc.in ├── flux ├── Makefile.am ├── README.md ├── mpibind-flux-ex1.lua ├── mpibind-flux-ex2.lua ├── mpibind-flux.lua.in ├── options.md └── plugin.c ├── gpu-tests ├── makefile.mk ├── orig.mk ├── retrieve.cu ├── simple.cpp ├── visdevs-hwloc.cu └── visdevs.cu ├── python ├── Makefile.am ├── README.md ├── mpibind.py.in ├── mpibind_map.py ├── setup.py ├── test-mpi.py └── test-simple.py ├── slurm ├── Makefile.am ├── README.md └── plugin.c ├── src ├── Makefile.am ├── dev_tests.c ├── hwloc_tests.c ├── hwloc_utils.c ├── hwloc_utils.h ├── internals.c ├── main.c ├── manual.mk ├── mpibind-priv.h ├── mpibind.c ├── mpibind.h └── simple.mk ├── test-suite ├── Makefile.am ├── README.md ├── coral-ea.c ├── coral-lassen.c ├── cts1-quartz.c ├── environment.c ├── epyc-corona.c ├── error.c ├── expected │ ├── expected.coral-ea │ ├── expected.coral-lassen │ ├── expected.coral-lassen.v1 │ ├── expected.cts1-quartz │ ├── expected.epyc-corona │ └── expected.epyc-corona.v1 ├── python │ ├── py-coral-ea.py │ ├── py-coral-lassen.py │ ├── py-cts1-quartz.py │ ├── py-epyc-corona.py │ └── test_utils.py ├── test_utils.c └── test_utils.h ├── topo-xml ├── arm64-ulna-hwloc1.xml ├── cascade-lake-ap-snl-hwloc1.xml ├── coral-butte-hwloc1.xml ├── coral-ea-hwloc1.xml ├── coral-lassen-hwloc1.xml ├── coral-lassen.xml ├── coral-rzansel-hwloc1.xml ├── cts1-pascal.xml ├── cts1-quartz-smt1.xml ├── eas-tioga.xml ├── epyc-corona-hwloc1.xml ├── epyc-corona-p2.xml ├── epyc-corona.xml ├── epyc-dual-sock-hwloc1.xml ├── g4dnmetal.xml ├── knl-quad-cache-hwloc1.xml ├── knl-quad-flat-hwloc1.xml ├── knl-snc4-cache-hwloc1.xml └── knl-snc4-flat-hwloc1.xml └── tutorials ├── common └── archs.md ├── cug23 ├── README.md ├── module1.md ├── module2.md └── module3.md ├── cug24 ├── README.md ├── archs.md ├── module1.md ├── module2.md ├── module3.md └── module4.md ├── eurosys25 ├── README.md ├── module1.md ├── module2.md └── module3.md ├── figures ├── aws-architecture.png ├── aws-g4dn-metal.png ├── cache.png ├── computing-architecture.png ├── corona-merge.png ├── corona-no-cache-io-physical.png ├── corona-no-cache-io.png ├── corona-physical.png ├── corona-web.png ├── corona.pdf ├── corona.png ├── hwloc-objects.png ├── lassen-web.png ├── lassen.pdf ├── lassen.png ├── mammoth.pdf ├── mammoth.png ├── module4_sockets.png ├── module4_specifyplacement.png ├── module4_spread.png ├── module4_threadstocores.png ├── module4_threadstocpus.png ├── module4_threadstosockets.png ├── module5_tioga.png ├── numa.png ├── pascal-web.png ├── pascal.pdf ├── pascal.png ├── poodle-web.png ├── ruby.pdf ├── ruby.png ├── rzadams-web.png ├── rzadams │ ├── FigureA.png │ ├── FigureB.png │ ├── FigureC.png │ ├── FigureD.png │ ├── FigureE.png │ ├── FigureF.png │ ├── FigureG.png │ ├── FigureH.png │ ├── FigureI.png │ └── FigureJ.png ├── sierra.pdf ├── sierra.png ├── tioga-web.png ├── tioga.pdf ├── tioga.png └── tioga │ ├── Tioga-Mod1-Ex5.png │ ├── Tioga-Mod1-Ex6.png │ ├── Tioga-Mod1-Ex7.png │ ├── Tioga-Mod1-noprocs.png │ ├── Tioga-Mod2-Ex1.png │ ├── Tioga-Mod2-Ex10.png │ ├── Tioga-Mod2-Ex11.png │ ├── Tioga-Mod2-Ex2.png │ ├── Tioga-Mod2-Ex4.png │ ├── Tioga-Mod2-Ex5.png │ ├── Tioga-Mod2-Ex6.png │ ├── Tioga-Mod2-Ex7.png │ ├── Tioga-Mod2-Ex8.png │ ├── Tioga-Mod2-Ex9.png │ ├── Tioga-Mod3-Ex1a.png │ ├── Tioga-Mod3-Ex1b.png │ ├── Tioga-Mod3-Ex2a.png │ ├── Tioga-Mod3-Ex2b.png │ ├── Tioga-Mod3-Ex3a.png │ ├── Tioga-Mod3-Ex3b.png │ ├── Tioga-Mod3-Ex5a.png │ ├── Tioga-Mod3-Ex5b.png │ ├── Tioga-Mod3-Ex6.png │ ├── figureA.png │ ├── figureB.png │ ├── figureC.png │ ├── figureD.png │ ├── figureE.png │ ├── figureF.png │ ├── figureG.png │ ├── figureH.png │ ├── figureI.png │ ├── figureJ.png │ ├── figureK.png │ ├── figureL.png │ ├── figureM.png │ ├── figureN.png │ ├── tioga-merge.png │ ├── tioga-no-cache-io-physical.png │ └── tioga-no-cache-io.png ├── flux ├── README.md ├── module1.md └── module2.md ├── lanl22 └── README.md ├── main ├── README.md ├── module1.md ├── module2.md ├── module3.md ├── module4.md └── module5.md └── tapia22 ├── README.md ├── Sep-09-1045-Supercomputing-Systems-101.pdf └── tapia-setup-instructions.md /.gitignore: -------------------------------------------------------------------------------- 1 | # http://www.gnu.org/software/automake 2 | Makefile.in 3 | # http://www.gnu.org/software/autoconf 4 | autom4te.cache 5 | compile 6 | configure 7 | aclocal.m4 8 | stamp-h1 9 | aclocal.m4 10 | config.guess 11 | config.sub 12 | depcomp 13 | install-sh 14 | ltmain.sh 15 | missing 16 | config.log 17 | config.status 18 | config.h 19 | config.h.in 20 | config.h.in~ 21 | libtool 22 | .deps/ 23 | .libs/ 24 | libltdl/ 25 | 26 | # libtool pull-ins 27 | /config/libtool.m4 28 | /config/ltoptions.m4 29 | /config/ltsugar.m4 30 | /config/ltversion.m4 31 | /config/lt~obsolete.m4 32 | /config/ar-lib 33 | /config/tap-driver.sh 34 | 35 | # docs intermediate files 36 | /doc/man*/*.xml 37 | /doc/_build 38 | 39 | # Object files 40 | *.o 41 | *.ko 42 | *.obj 43 | *.elf 44 | # Libraries 45 | *.lib 46 | *.a 47 | *.la 48 | *.lo 49 | # Shared objects (inc. Windows DLLs) 50 | *.dll 51 | *.so 52 | *.so.* 53 | *.dylib 54 | # Executables 55 | *.exe 56 | *.out 57 | *.app 58 | *.i*86 59 | *.x86_64 60 | *.hex 61 | *.pyc 62 | *.pyo 63 | # gcov output 64 | *.gcno 65 | *.gcda 66 | # Test files 67 | *.t 68 | 69 | # autoconf-preprocessed 70 | Makefile 71 | *.1 72 | *.3 73 | *.5 74 | *.7 75 | *.8 76 | *.spec 77 | *.pc 78 | 79 | # misc 80 | *.swp 81 | *.diff 82 | *.tar.gz 83 | *.orig 84 | *.core 85 | *.tap 86 | .coverage* 87 | *.trs 88 | *.log 89 | .dirstamp 90 | 91 | # ignore mypy generated cache directory 92 | .mypy_cache 93 | 94 | # ignore local, maybe generated tooling files 95 | compile_commands.json 96 | compile_flags.txt 97 | 98 | # local editor config dirs 99 | .vscode 100 | .idea 101 | .clangd 102 | 103 | # Python virtual environments 104 | .venv*/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020, Lawrence Livermore National Security, LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | .NOTPARALLEL: 2 | 3 | SUBDIRS = src test-suite python flux slurm etc 4 | 5 | ACLOCAL_AMFLAGS = -I config 6 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | This work was produced under the auspices of the U.S. Department of 2 | Energy by Lawrence Livermore National Laboratory under Contract 3 | DE-AC52-07NA27344. 4 | 5 | This work was prepared as an account of work sponsored by an agency of 6 | the United States Government. Neither the United States Government nor 7 | Lawrence Livermore National Security, LLC, nor any of their employees 8 | makes any warranty, expressed or implied, or assumes any legal liability 9 | or responsibility for the accuracy, completeness, or usefulness of any 10 | information, apparatus, product, or process disclosed, or represents that 11 | its use would not infringe privately owned rights. 12 | 13 | Reference herein to any specific commercial product, process, or service 14 | by trade name, trademark, manufacturer, or otherwise does not necessarily 15 | constitute or imply its endorsement, recommendation, or favoring by the 16 | United States Government or Lawrence Livermore National Security, LLC. 17 | 18 | The views and opinions of authors expressed herein do not necessarily 19 | state or reflect those of the United States Government or Lawrence 20 | Livermore National Security, LLC, and shall not be used for advertising 21 | or product endorsement purposes. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## A Memory-Driven Mapping Algorithm for Heterogeneous Systems 2 | 3 | `mpibind` is a memory-driven algorithm to map parallel hybrid 4 | applications to the underlying hardware resources transparently, 5 | efficiently, and portably. Unlike other mappings, its primary design point 6 | is the memory system, including the cache hierarchy. Compute elements 7 | are selected based on a memory mapping and not vice versa. In 8 | addition, mpibind embodies a global awareness of hybrid programming 9 | abstractions as well as heterogeneous systems with accelerators. 10 | 11 | ### Getting started 12 | 13 | The easiest way to get `mpibind` is using 14 | [spack](https://github.com/spack/spack). 15 | 16 | ``` 17 | spack install mpibind 18 | 19 | # On systems with NVIDIA GPUs 20 | spack install mpibind+cuda 21 | 22 | # On systems with AMD GPUs 23 | spack install mpibind+rocm 24 | 25 | # More details 26 | spack info mpibind 27 | ``` 28 | 29 | Alternatively, one can build the package manually as described below. 30 | 31 | ### Building and installing 32 | 33 | This project uses GNU Autotools. 34 | 35 | ``` 36 | $ ./bootstrap 37 | 38 | $ ./configure --prefix= 39 | 40 | $ make 41 | 42 | $ make install 43 | ``` 44 | 45 | If building from a release tarball, please specify MPIBIND_VERSION appropriately. For example: 46 | 47 | ``` 48 | $ MPIBIND_VERSION=0.15.1 ./bootstrap 49 | 50 | $ ./configure --prefix= 51 | 52 | $ make 53 | 54 | $ make install 55 | ``` 56 | 57 | 58 | The resulting library is `/lib/libmpibind` and a simple program using it is `src/main.c` 59 | 60 | 61 | ### Test suite 62 | 63 | ``` 64 | $ make check 65 | ``` 66 | 67 | ### Dependencies 68 | 69 | * `GNU Autotools` is the build system. 70 | 71 | * `hwloc` version 2 is required to detect the machine topology. 72 | 73 | Before building mpibind, make sure `hwloc` can be detected with `pkg-config`: 74 | ``` 75 | pkg-config --variable=libdir --modversion hwloc 76 | ``` 77 | If this fails, add hwloc's pkg-config directory to `PKG_CONFIG_PATH`, e.g., 78 | ``` 79 | export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/lib/pkgconfig 80 | ``` 81 | 82 | * `libtap` is required to build the test suite. 83 | 84 | To verify `tap` can be detected with `pkg-config`, follow a 85 | similar procedure as for `hwloc` above. 86 | 87 | 88 | ### Contributing 89 | 90 | Contributions for bug fixes and new features are welcome and follow 91 | the GitHub 92 | [fork and pull model](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-collaborative-development-models). 93 | Contributors develop on a branch of their personal fork and create 94 | pull requests to merge their changes into the main repository. 95 | 96 | The steps are similar to those of the Flux framework: 97 | 98 | 1. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) `mpibind`. 99 | 2. [Clone](https://help.github.com/en/github/getting-started-with-github/fork-a-repo#keep-your-fork-synced) 100 | your fork: `git clone git@github.com:[username]/mpibind.git` 101 | 3. Create a topic branch for your changes: `git checkout -b new_feature` 102 | 4. Create feature or add fix (and add tests if possible) 103 | 5. Make sure everything still passes: `make check` 104 | 6. Push the branch to your GitHub repo: `git push origin new_feature` 105 | 7. Create a pull request against `mpibind` and describe what your 106 | changes do and why you think it should be merged. List any 107 | outstanding *todo* items. 108 | 109 | 110 | ### Authors 111 | 112 | `mpibind` was created by Edgar A. León. 113 | 114 | ### Citing mpibind 115 | 116 | To reference mpibind, please cite one of the 117 | following papers: 118 | 119 | * Edgar A. León and Matthieu Hautreux. *Achieving Transparency Mapping 120 | Parallel Applications: A Memory Hierarchy Affair*. In International 121 | Symposium on Memory Systems, MEMSYS'18, Washington, DC, 122 | October 2018. ACM. 123 | 124 | * Edgar A. León. *mpibind: A Memory-Centric Affinity Algorithm for 125 | Hybrid Applications*. In International Symposium on Memory Systems, 126 | MEMSYS'17, Washington, DC, October 2017. ACM. 127 | 128 | * Edgar A. León, Ian Karlin, and Adam T. Moody. *System Noise 129 | Revisited: Enabling Application Scalability and Reproducibility with 130 | SMT*. In International Parallel & Distributed Processing Symposium, 131 | IPDPS'16, Chicago, IL, May 2016. IEEE. 132 | 133 | Other references: 134 | 135 | * J. P. Dahm, D. F. Richards, A. Black, A. D. Bertsch, L. Grinberg, I. Karlin, S. Kokkila-Schumacher, E. A. León, R. Neely, R. Pankajakshan, and O. Pearce. *Sierra Center of Excellence: Lessons learned*. In IBM Journal of Research and Development, vol. 64, no. 3/4, May-July 2020. 136 | 137 | * Edgar A. León. *Cross-Architecture Affinity of Supercomputers*. In International Supercomputing Conference (Research Poster), ISC’19, Frankfurt, Germany, June 2019. 138 | 139 | * Edgar A. León. *Mapping MPI+X Applications to Multi-GPU 140 | Architectures: A Performance-Portable Approach*. In GPU Technology 141 | Conference, GTC'18, San Jose, CA, March 2018. 142 | 143 | 144 | [Bibtex file](doc/mpibind.bib). 145 | 146 | 147 | ### License 148 | 149 | `mpibind` is distributed under the terms of the MIT license. All new 150 | contributions must be made under this license. 151 | 152 | See [LICENSE](LICENSE) and [NOTICE](NOTICE) for details. 153 | 154 | SPDX-License-Identifier: MIT. 155 | 156 | LLNL-CODE-812647. 157 | -------------------------------------------------------------------------------- /affinity/README.md: -------------------------------------------------------------------------------- 1 | 2 | ### Report the mapping of workers to the hardware 3 | 4 | These programs report the mapping of CPUs and GPUs for each process 5 | and thread. There are three variants: 6 | 7 | * MPI: `mpi` 8 | * OpenMP: `omp` 9 | * MPI+OpenMP: `mpi+omp` 10 | 11 | #### Running 12 | 13 | Usage is straightforward. Use the `-v` option for verbose GPU output and 14 | the `-h` option for help. 15 | 16 | ``` 17 | $ srun -n4 ./mpi 18 | node173 Task 0/ 4 running on 4 CPUs: 0,3,6,9 19 | Task 0/ 4 has 2 GPUs: 0x63 0x43 20 | node173 Task 1/ 4 running on 4 CPUs: 12,15,18,21 21 | Task 1/ 4 has 2 GPUs: 0x3 0x27 22 | node173 Task 2/ 4 running on 4 CPUs: 24,27,30,33 23 | Task 2/ 4 has 2 GPUs: 0xe3 0xc3 24 | node173 Task 3/ 4 running on 4 CPUs: 36,39,42,45 25 | Task 3/ 4 has 2 GPUs: 0x83 0xa3 26 | ``` 27 | 28 | ``` 29 | $ srun -n4 ./mpi -v 30 | node173 Task 0/ 4 running on 4 CPUs: 0,3,6,9 31 | Task 0/ 4 has 2 GPUs: 0x63 0x43 32 | Default device: 0x63 33 | 0x63: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 34 | 0x43: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 35 | node173 Task 1/ 4 running on 4 CPUs: 12,15,18,21 36 | Task 1/ 4 has 2 GPUs: 0x3 0x27 37 | Default device: 0x3 38 | 0x03: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 39 | 0x27: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 40 | node173 Task 2/ 4 running on 4 CPUs: 24,27,30,33 41 | Task 2/ 4 has 2 GPUs: 0xe3 0xc3 42 | Default device: 0xe3 43 | 0xe3: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 44 | 0xc3: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 45 | node173 Task 3/ 4 running on 4 CPUs: 36,39,42,45 46 | Task 3/ 4 has 2 GPUs: 0x83 0xa3 47 | Default device: 0x83 48 | 0x83: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 49 | 0xa3: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 50 | ``` 51 | 52 | ``` 53 | $ OMP_NUM_THREADS=4 srun -n2 ./omp 54 | Process running on 1 CPUs: 0 55 | Process has 4 GPUs: 0x63 0x43 0x3 0x27 56 | Default device: 0x63 57 | 0x63: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 58 | 0x43: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 59 | 0x03: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 60 | 0x27: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 61 | Thread 0/ 4 running on 1 CPUs: 0 62 | Thread 0/ 4 assigned to GPU: 0x63 63 | Thread 1/ 4 running on 1 CPUs: 6 64 | Thread 1/ 4 assigned to GPU: 0x43 65 | Thread 2/ 4 running on 1 CPUs: 12 66 | Thread 2/ 4 assigned to GPU: 0x3 67 | Thread 3/ 4 running on 1 CPUs: 18 68 | Thread 3/ 4 assigned to GPU: 0x27 69 | 70 | Process running on 1 CPUs: 24 71 | Process has 4 GPUs: 0xe3 0xc3 0x83 0xa3 72 | Default device: 0xe3 73 | 0xe3: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 74 | 0xc3: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 75 | 0x83: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 76 | 0xa3: Vega 20, 31 GB Mem, 60 Multiprocessors, 1.725 GHZ, 9.6 CC 77 | Thread 0/ 4 running on 1 CPUs: 24 78 | Thread 0/ 4 assigned to GPU: 0xe3 79 | Thread 1/ 4 running on 1 CPUs: 30 80 | Thread 1/ 4 assigned to GPU: 0xc3 81 | Thread 2/ 4 running on 1 CPUs: 36 82 | Thread 2/ 4 assigned to GPU: 0x83 83 | Thread 3/ 4 running on 1 CPUs: 42 84 | Thread 3/ 4 assigned to GPU: 0xa3 85 | ``` 86 | 87 | #### Building 88 | 89 | These program are built with a single Makefile. By default typing 90 | `make` will only build the CPU-related programs. To enable GPU 91 | information, the user needs to set an environment variable. 92 | 93 | ``` 94 | # Build with CPU support 95 | $ make -f makefile.mk 96 | 97 | # Build with support for AMD GPUs 98 | $ HAVE_AMD_GPUS=1 make -f makefile.mk 99 | 100 | # Build with support for NVIDIA GPUs 101 | $ HAVE_NVIDIA_GPUS=1 make -f makefile.mk 102 | ``` 103 | 104 | To build with AMD GPU support, the ROCm environment must be 105 | present. Similarly, for NVIDIA support, the CUDA environment must be 106 | present. 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /affinity/affinity.h: -------------------------------------------------------------------------------- 1 | /*********************************************************** 2 | * Edgar A. Leon 3 | * Lawrence Livermore National Laboratory 4 | ***********************************************************/ 5 | 6 | #ifndef AFFINITY_H_INCLUDED 7 | #define AFFINITY_H_INCLUDED 8 | 9 | #define SHORT_STR_SIZE 32 10 | #define LONG_STR_SIZE 4096 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | int get_gpu_count(); 17 | 18 | int get_gpu_pci_id(int dev); 19 | 20 | int get_gpu_affinity(char *buf); 21 | 22 | int get_gpu_info(int dev, char *buf); 23 | 24 | int get_gpu_info_all(char *buf); 25 | 26 | int get_num_cpus(); 27 | 28 | int get_cpu_affinity(char *buf); 29 | 30 | #ifdef __cplusplus 31 | } /* extern "C" */ 32 | #endif 33 | 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /affinity/cpu.c: -------------------------------------------------------------------------------- 1 | /*********************************************************** 2 | * Edgar A. Leon 3 | * Lawrence Livermore National Laboratory 4 | ***********************************************************/ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | /* __USE_GNU is needed for CPU_ISSET definition */ 11 | #ifndef __USE_GNU 12 | #define __USE_GNU 1 13 | #endif 14 | #include // sched_getaffinity 15 | 16 | 17 | /* 18 | * Convert a non-negative array of ints to a range 19 | */ 20 | int int2range(int *intarr, int size, char *range) 21 | { 22 | int i, curr; 23 | int nc = 0; 24 | int start = -1; 25 | int prev = -2; 26 | 27 | for (i=0; i= 0) 32 | nc += sprintf(range+nc, "-%d", prev); 33 | 34 | /* Record start of range */ 35 | if (prev >= 0) 36 | nc += sprintf(range+nc, ","); 37 | nc += sprintf(range+nc, "%d", curr); 38 | start = curr; 39 | } else 40 | /* The last int is end of range */ 41 | if (i == size-1) 42 | nc += sprintf(range+nc, "-%d", curr); 43 | 44 | prev = curr; 45 | } 46 | 47 | return nc; 48 | } 49 | 50 | 51 | /* 52 | * Get number of processing units (cores or hwthreads) 53 | */ 54 | static 55 | int get_total_num_pus() 56 | { 57 | int pus = sysconf(_SC_NPROCESSORS_ONLN); 58 | 59 | if ( pus < 0 ) 60 | perror("sysconf"); 61 | 62 | return pus; 63 | } 64 | 65 | 66 | 67 | 68 | /* 69 | * Get the affinity. 70 | */ 71 | static 72 | int get_affinity(int *cpus, int *count) 73 | { 74 | int i; 75 | cpu_set_t resmask; 76 | 77 | CPU_ZERO(&resmask); 78 | 79 | int rc = sched_getaffinity(0, sizeof(resmask), &resmask); 80 | if ( rc < 0 ) { 81 | perror("sched_getaffinity"); 82 | return rc; 83 | } 84 | 85 | *count = 0; 86 | int pus = get_total_num_pus(); 87 | for (i=0; i 7 | #include /* Documentation in hip_runtime_api.h */ 8 | #include "affinity.h" /* Do not perform name mangling */ 9 | 10 | 11 | int get_gpu_count() 12 | { 13 | /* 14 | Surprinsingly, I must set 'count' to zero before 15 | passing it to cudaGetDeviceCount(&count) 16 | If CUDA_VISIBLE_DEVICES is set to '', calling 17 | this function will not set a value for count. 18 | Then, count will be used uninitialized and 19 | most likely the program will segfault. 20 | */ 21 | int count=0; 22 | 23 | cudaGetDeviceCount(&count); 24 | 25 | return count; 26 | } 27 | 28 | 29 | int get_gpu_pci_id(int dev) 30 | { 31 | int value = -1; 32 | cudaError_t err = cudaDeviceGetAttribute(&value, cudaDevAttrPciBusId, dev); 33 | 34 | if ( err ) 35 | fprintf(stderr, "Could not get PCI ID for GPU %d\n", dev); 36 | 37 | return value; 38 | } 39 | 40 | 41 | int get_gpu_affinity(char *buf) 42 | { 43 | int count=0; 44 | cudaGetDeviceCount(&count); 45 | 46 | int nc=0; 47 | int i; 48 | for (i=0; i> 30); 89 | nc += sprintf(buf+nc, "\tMultiprocessor count: %d\n", prop.multiProcessorCount); 90 | nc += sprintf(buf+nc, "\tClock rate: %.3f Ghz\n", ghz); 91 | nc += sprintf(buf+nc, "\tCompute capability: %d.%d\n", 92 | prop.major, prop.minor); 93 | nc += sprintf(buf+nc, "\tECC enabled: %d\n", prop.ECCEnabled); 94 | #else 95 | nc += sprintf(buf+nc, "\t0x%.2x: %s, %lu GB Mem, " 96 | "%d Multiprocessors, %.3f GHZ, %d.%d CC\n", 97 | prop.pciBusID, prop.name, prop.totalGlobalMem >> 30, 98 | prop.multiProcessorCount, ghz, prop.major, prop.minor); 99 | #endif 100 | 101 | return nc; 102 | } 103 | 104 | 105 | int get_gpu_info_all(char *buf) 106 | { 107 | cudaError_t err; 108 | int i, myid, count=0; 109 | int nc=0; 110 | 111 | cudaGetDeviceCount(&count); 112 | err = cudaGetDevice(&myid); 113 | if ( err ) { 114 | fprintf(stderr, "Could not get default device\n"); 115 | return -1; 116 | } 117 | 118 | char pcibusid[SHORT_STR_SIZE]; 119 | cudaDeviceGetPCIBusId(pcibusid, sizeof(pcibusid), myid); 120 | nc += sprintf(buf+nc, "\tDefault device: %s\n", pcibusid); 121 | 122 | for (i=0; i $@ 76 | 77 | 78 | clean: 79 | rm -f *.o *~ $(PROGS) gpu.cpp 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /affinity/mpi+omp.c: -------------------------------------------------------------------------------- 1 | /*********************************************************** 2 | * Edgar A. Leon 3 | * Lawrence Livermore National Laboratory 4 | ***********************************************************/ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "affinity.h" 11 | 12 | 13 | static 14 | void usage(char *name) 15 | { 16 | printf("Usage: %s [options]\n", name); 17 | printf("\t -mpi: Show MPI info only (no OpenMP)\n"); 18 | printf("\t-verbose: Show detailed GPU info when -mpi enabled\n"); 19 | printf("\t -help: Show this page\n"); 20 | } 21 | 22 | 23 | int main(int argc, char *argv[]) 24 | { 25 | char buf[LONG_STR_SIZE]; 26 | char hostname[MPI_MAX_PROCESSOR_NAME]; 27 | int rank, np, size, i, ngpus, ncpus; 28 | int verbose = 0; 29 | int help = 0; 30 | int mpi = 0; 31 | int nc = 0; 32 | 33 | /* Command-line options */ 34 | if (argc > 1) 35 | for (i=1; i= 0 ) 38 | verbose = 1; 39 | else if ( strcmp(argv[i], "-m") >= 0 ) 40 | mpi = 1; 41 | else if ( strcmp(argv[i], "-h") >= 0 ) 42 | help = 1; 43 | } 44 | 45 | MPI_Init(&argc, &argv); 46 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 47 | MPI_Comm_size(MPI_COMM_WORLD, &np); 48 | MPI_Get_processor_name(hostname, &size); 49 | 50 | if (help) { 51 | if (rank == 0) 52 | usage(argv[0]); 53 | 54 | MPI_Finalize(); 55 | return 0; 56 | } 57 | 58 | if ( mpi ) { 59 | 60 | /* MPI */ 61 | ncpus = get_num_cpus(); 62 | nc += sprintf(buf+nc, "%s Task %2d/%2d with %d cpus: ", 63 | hostname, rank, np, ncpus); 64 | nc += get_cpu_affinity(buf+nc); 65 | #ifdef HAVE_GPUS 66 | ngpus = get_gpu_count(); 67 | nc += sprintf(buf+nc, "%s Task %2d/%2d with %d gpus: ", 68 | hostname, rank, np, ngpus); 69 | nc += get_gpu_affinity(buf+nc); 70 | if (verbose) 71 | nc += get_gpu_info_all(buf+nc); 72 | #endif 73 | 74 | /* Print per-task information */ 75 | printf("%s", buf); 76 | 77 | } else { 78 | 79 | /* MPI+OpenMP */ 80 | #ifdef HAVE_GPUS 81 | ngpus = get_gpu_count(); 82 | #endif 83 | 84 | #pragma omp parallel firstprivate(buf, nc) private(ncpus) shared(rank, np, ngpus, verbose) 85 | { 86 | int tid = omp_get_thread_num(); 87 | int nthreads = omp_get_num_threads(); 88 | ncpus = get_num_cpus(); 89 | 90 | nc += sprintf(buf+nc, "%s Task %3d/%3d Thread %3d/%3d with %2d cpus: ", 91 | hostname, rank, np, tid, nthreads, ncpus); 92 | nc += get_cpu_affinity(buf+nc); 93 | #ifdef HAVE_GPUS 94 | nc += sprintf(buf+nc, "%s Task %3d/%3d Thread %3d/%3d with %2d gpus: ", 95 | hostname, rank, np, tid, nthreads, ngpus); 96 | nc += get_gpu_affinity(buf+nc); 97 | #endif 98 | 99 | /* Print per-worker information */ 100 | printf("%s", buf); 101 | } 102 | 103 | } 104 | 105 | MPI_Finalize(); 106 | return 0; 107 | } 108 | -------------------------------------------------------------------------------- /affinity/mpi.c: -------------------------------------------------------------------------------- 1 | /*********************************************************** 2 | * Edgar A. Leon 3 | * Lawrence Livermore National Laboratory 4 | ***********************************************************/ 5 | 6 | #include 7 | #include 8 | #include 9 | #include "affinity.h" 10 | 11 | 12 | int main(int argc, char *argv[]) 13 | { 14 | char buf[LONG_STR_SIZE]; 15 | char hostname[MPI_MAX_PROCESSOR_NAME]; 16 | int rank, np, size, i; 17 | int verbose = 0; 18 | int ncpus = get_num_cpus(); 19 | int nc = 0; 20 | 21 | /* Get rid of compiler warning. Ay. */ 22 | (void) verbose; 23 | 24 | /* Command-line options */ 25 | if (argc > 1) 26 | for (i=1; i 7 | #include 8 | #include 9 | #include "affinity.h" 10 | 11 | 12 | int main(int argc, char *argv[]) 13 | { 14 | char buf[LONG_STR_SIZE]; 15 | int i; 16 | int ncpus = get_num_cpus(); 17 | int verbose = 0; 18 | int nc = 0; 19 | 20 | /* Get rid of compiler warning. Ay. */ 21 | (void) verbose; 22 | 23 | /* Command-line options */ 24 | if (argc > 1) 25 | for (i=1; i= 2.1], [ 45 | hwloc_dir=`$PKG_CONFIG --variable=libdir hwloc` 46 | AC_MSG_NOTICE([$hwloc_dir]) 47 | ] 48 | ) 49 | 50 | # In newer versions of pkgconf, I could use 51 | # PKG_HAVE_WITH_MODULES and PKG_CHECK_VAR. 52 | # I could also request min-version above with 53 | # PKG_PROG_PKG_CONFIG([MIN-VERSION]) 54 | PKG_CHECK_MODULES([TAP], [tap], [ 55 | libtap=true 56 | libtap_dir=`$PKG_CONFIG --variable=libdir tap` 57 | AC_SUBST(TAP_LIBDIR, $libtap_dir) 58 | AC_REQUIRE_AUX_FILE([tap-driver.sh]) 59 | AC_MSG_NOTICE([$libtap_dir]) 60 | ], 61 | [AC_MSG_NOTICE([C test suite will not be built])] 62 | ) 63 | AM_CONDITIONAL([HAVE_LIBTAP], [test x$libtap = xtrue]) 64 | # PKG_CHECK_VAR([TAP_LIBDIR], [tap], [libdir], 65 | # [libtap_libdir=true], 66 | # [AC_MSG_NOTICE([TAP's libdir not found])] 67 | # ) 68 | 69 | PKG_CHECK_MODULES([FLUX_CORE], [flux-core], [ 70 | flux_core=true 71 | flux_dir=`$PKG_CONFIG --variable=libdir flux-core` 72 | AC_MSG_NOTICE([$flux_dir]) 73 | ], 74 | [AC_MSG_NOTICE([Flux plugin will not be built])] 75 | ) 76 | AM_CONDITIONAL([HAVE_FLUX_CORE], [test x$flux_core = xtrue]) 77 | #PKG_CHECK_MODULES([FLUX_CORE], [flux-core], [ 78 | # fluxcore=true 79 | # flux_shell_plugin_dir=`$PKG_CONFIG --variable=libdir flux-core` 80 | # flux_shell_plugin_dir+=/flux/shell/plugins 81 | # AC_SUBST(FLUX_SHELL_PLUGIN_DIR, $flux_shell_plugin_dir) 82 | # ], 83 | # [AC_MSG_NOTICE([Flux plugin will not be built])] 84 | #) 85 | #AM_CONDITIONAL([HAVE_FLUX_CORE], [test x$fluxcore = xtrue]) 86 | 87 | PKG_CHECK_MODULES([SLURM], [slurm], [ 88 | slurm=true 89 | slurm_dir=`$PKG_CONFIG --variable=includedir slurm` 90 | AC_MSG_NOTICE([$slurm_dir]) 91 | ], 92 | [AC_MSG_NOTICE([Slurm plugin will not be built])] 93 | ) 94 | AM_CONDITIONAL([HAVE_SLURM], [test x$slurm = xtrue]) 95 | 96 | # Notes 97 | # AC_DEFINE([HAVE_LIBTAP], 1, [Define libtap to build the test suite]) 98 | # AC_CHECK_FUNC([hwloc_topology_set_all_types_filter]) 99 | # AC_CHECK_HEADERS([pkgconf/libpkgconf/libpkgconf.h]) 100 | # defines HAVE_LIBTAP and prepends -ltap to LIBS 101 | # AC_CHECK_LIB(tap, plan) 102 | # AC_SEARCH_LIBS([pkgconf_pkg_free], [pkgconf], 103 | # [AC_MSG_NOTICE([Greetings from pkgconf])], 104 | # [AC_MSG_ERROR([unable to find pkgconf_pkg_free()]) 105 | # ]) 106 | # AS_IF(test-1, [run-if-true-1], ..., [run-if-false]) 107 | # AC_SUBST(SLURM_INCDIR, $slurm_dir) 108 | 109 | # Define pkgconfigdir to install mpibind.pc 110 | PKG_INSTALLDIR 111 | 112 | # Path for mpibind modules, e.g., flux plugin 113 | AS_VAR_SET(mpibindmoddir, $libdir/mpibind) 114 | AC_SUBST(mpibindmoddir) 115 | 116 | 117 | ## Dependencies for Python bindings 118 | # AC_CHECK_PYMOD(module, [action-if-found], [action-if-not-found]) 119 | # ---------------------------------------------------------------- 120 | # Didn't use AC_CACHE_CHECK because AC_CHECK_PYMOD 121 | # may be called multiple times, but with different arguments! 122 | AC_DEFUN([AC_CHECK_PYMOD], 123 | [AC_REQUIRE([AM_PATH_PYTHON]) 124 | AC_MSG_CHECKING([for $1 in python]) 125 | have_pymod=no 126 | prog=" 127 | import sys 128 | try: 129 | import $1 130 | except ImportError: 131 | sys.exit(1) 132 | except: 133 | sys.exit(0) 134 | sys.exit(0)" 135 | ($PYTHON -c "$prog") && have_pymod=yes 136 | AC_MSG_RESULT($have_pymod) 137 | if test "$have_pymod" = yes; then 138 | ifelse([$2], [], true, [$2]) 139 | else 140 | ifelse([$3], [], true, [$3]) 141 | fi]) 142 | 143 | AM_PATH_PYTHON([3],, [:]) 144 | AM_CONDITIONAL([HAVE_PYTHON], [test "$PYTHON" != :]) 145 | 146 | AC_CHECK_PYMOD(cffi, 147 | have_cffi=true, 148 | AC_MSG_NOTICE([Python bindings and test suite will not be built])) 149 | AM_CONDITIONAL([HAVE_CFFI], [test x$have_cffi = xtrue]) 150 | 151 | AC_CHECK_PYMOD(pycotap, 152 | have_pycotap=true, 153 | AC_MSG_NOTICE([Python test suite will not be built])) 154 | AM_CONDITIONAL([HAVE_PYCOTAP], [test x$have_pycotap = xtrue]) 155 | 156 | #AM_COND_IF(HAVE_CFFI, [echo "cffi yes!"], [echo "cffi no!"]) 157 | #AM_COND_IF(HAVE_PYCOTAP, [echo "pycotap yes!"], [echo "pycotap no!"]) 158 | 159 | 160 | ## Epilogue 161 | AC_CONFIG_FILES([ 162 | Makefile 163 | src/Makefile 164 | test-suite/Makefile 165 | python/Makefile 166 | flux/Makefile 167 | slurm/Makefile 168 | etc/Makefile 169 | etc/mpibind.pc 170 | ]) 171 | 172 | AC_OUTPUT 173 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | # Mapping Applications to Heterogeneous Systems 2 | 3 | ## mpibind 4 | 5 | * [Slurm Plugin and User Options](https://github.com/LLNL/mpibind/blob/master/slurm/README.md) 6 | * [Slurm Tutorial](https://github.com/LLNL/mpibind/blob/master/tutorials/main/module2.md) 7 | * [Flux Plugin](https://github.com/LLNL/mpibind/blob/master/flux/README.md) 8 | * [Flux User Options](https://github.com/LLNL/mpibind/blob/master/flux/options.md) 9 | * [Flux Tutorial](https://github.com/LLNL/mpibind/blob/master/tutorials/flux/module2.md) 10 | 11 | 12 | ## Conference Tutorials 13 | 14 | [Bridging Applications and Hardware](https://github.com/LLNL/mpibind/blob/master/tutorials/eurosys25/README.md) 15 | * 4th Tutorial on Mapping and Affinity (MAP) 16 | * ASPLOS and EuroSys 17 | * March 2025, Rotterdam, The Netherlands 18 | 19 | [Supercomputer Affinity on HPE Systems](https://github.com/LLNL/mpibind/blob/master/tutorials/cug24/README.md) 20 | * 3rd Tutorial on Mapping and Affinity (MAP) 21 | * Cray User Group 2024 22 | * May 2024, Perth, Australia 23 | 24 | [Supercomputer Affinity on HPE Systems](https://github.com/LLNL/mpibind/blob/master/tutorials/cug23/README.md) 25 | * 2nd Tutorial on Mapping and Affinity (MAP) 26 | * Cray User Group 2023 27 | * May 2023, Helsinki, Finland 28 | 29 | [Supercomputing Systems 101](https://github.com/LLNL/mpibind/tree/master/tutorials/tapia22/README.md) 30 | * 2022 CMD-IT/ACM Richard Tapia Celebration of Diversity in Computing Conference 31 | * September 2022, Washington, D.C. 32 | 33 | Supercomputer Affinity 34 | * 1st Tutorial on Mapping and Affinity (MAP) 35 | * CEA/EDF/Inria 2022 Summer School on Informatics: Hybrid and Asynchronous High-Performance Programming 36 | * July 2022, Reims, France 37 | 38 | [Machine Topology and Binding](https://github.com/LLNL/mpibind/blob/master/tutorials/lanl22/README.md) 39 | * Los Alamos Parallel Computing Summer Lecture Series 40 | * June 2022, Virtual 41 | 42 | 43 | -------------------------------------------------------------------------------- /doc/mpibind.bib: -------------------------------------------------------------------------------- 1 | 2 | ## mpibind for two-level memory systems 3 | @InProceedings{ leon.memsys18, 4 | author = {Edgar A. Le{\'o}n and Matthieu Hautreux}, 5 | title = {Achieving Transparency Mapping Parallel Applications: A Memory Hierarchy Affair}, 6 | booktitle = {International Symposium on Memory Systems}, 7 | series = {MEMSYS'18}, 8 | publisher = {ACM}, 9 | address = {Washington, DC}, 10 | year = {2018}, 11 | month = oct 12 | } 13 | 14 | ## mpibind for multi-GPU systems 15 | @InProceedings{ leon.gtc18, 16 | author = {Edgar A. Le{\'o}n}, 17 | title = {Mapping {MPI+X} Applications to Multi-{GPU} Architectures: A Performance-Portable Approach}, 18 | booktitle = {GPU Technology Conference}, 19 | series = {GTC'18}, 20 | address = {San Jose, CA}, 21 | year = {2018}, 22 | month = mar 23 | } 24 | 25 | ## The initial mpibind algorithm 26 | @InProceedings{ leon.memsys17, 27 | author = {Edgar A. Le{\'o}n}, 28 | title = {{mpibind}: A Memory-Centric Affinity Algorithm for Hybrid Applications}, 29 | booktitle = {International Symposium on Memory Systems}, 30 | series = {MEMSYS'17}, 31 | publisher = {ACM}, 32 | address = {Washington, DC}, 33 | year = {2017}, 34 | month = oct 35 | } 36 | 37 | ## Using mpibind to reduce system noise through thread specialization 38 | @Inproceedings{ leon.ipdps16, 39 | author = {Edgar A. Le{\'o}n and Ian Karlin and Adam T. Moody}, 40 | title = {System Noise Revisited: Enabling Application Scalability and Reproducibility with {SMT}}, 41 | booktitle = {International Parallel \& Distributed Processing Symposium}, 42 | series = {IPDPS'16}, 43 | publisher = {IEEE}, 44 | address = {Chicago, IL}, 45 | year = {2016}, 46 | month = may 47 | } 48 | 49 | ## mpibind on IBM Spectrum LSF 50 | @Article{ dahm.ea:ibm20 51 | author = {J. P. Dahm and D. F. Richards and A. Black and A. D. 52 | Bertsch and L. Grinberg and I. Karlin and S. 53 | Kokkila-Schumacher and \Edgar and R. Neely and R. 54 | Pankajakshan and O. Pearce}, 55 | journal = {IBM Journal of Research and Development}, 56 | title = {{Sierra Center of Excellence}: Lessons Learned}, 57 | volume = {64}, 58 | number = {3/4}, 59 | month = may, 60 | year = {2020}, 61 | pages = {2:1--2:14}, 62 | doi = {10.1147/JRD.2019.2961069}, 63 | issn = {0018-8646} 64 | } 65 | 66 | ## Three case studies for mpibind 67 | @InProceedings{ :19:cross-architecture, 68 | author = {Edgar A. Le{\'o}n}, 69 | title = {Cross-Architecture Affinity of Supercomputers}, 70 | booktitle = {International Supercomputing Conference; Research Poster}, 71 | series = {ISC'19}, 72 | address = {Frankfurt, Germany}, 73 | year = 2019, 74 | month = jun 75 | } 76 | 77 | -------------------------------------------------------------------------------- /etc/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | #if WITH_PKG_CONFIG 3 | pkgconfig_DATA = mpibind.pc 4 | #endif 5 | -------------------------------------------------------------------------------- /etc/mpibind.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | plugindir=@libdir@/mpibind 6 | 7 | Name: mpibind 8 | Description: A memory-driven mapping algorithm for heterogeneous systems 9 | URL: https://github.com/LLNL/mpibind 10 | Version: @PACKAGE_VERSION@ 11 | Requires: hwloc >= 2.1 12 | Cflags: -I${includedir} 13 | Libs: -L${libdir} -lmpibind -------------------------------------------------------------------------------- /flux/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | ####################################################### 3 | # libmpibind_flux 4 | ####################################################### 5 | 6 | #if HAVE_FLUX_CORE 7 | #mpibindmod_LTLIBRARIES = mpibind_flux.la 8 | # Not installing the lua file anymore 9 | #mpibindmod_SCRIPTS = mpibind_flux.lua 10 | 11 | # The mpibind plugin for Flux depends on a Flux header. 12 | # Therefore, when using Spack, Flux is a dependency 13 | # of mpibind. To install mpibind and Flux, one would use: 14 | # spack install mpibind+flux 15 | # There's an issue though: mpibind may not be able 16 | # to install the plugin into the flux plugins directory, 17 | # e.g., when Flux is a system-wide distribution. 18 | # To work around this Flux has created an environment 19 | # variable that allows loading a plugin without writing 20 | # it to Flux's installation directory. 21 | # 22 | # Install mpibind_flux.so into the flux shell 23 | # plugin path so it is loaded by default with 24 | # 'plugin.load("*.so")'. 25 | # plugin_name = ${mpibindmod_LTLIBRARIES:la=so} 26 | # install-exec-hook: 27 | # $(AM_V_at)echo Installing the mpibind flux plugin... 28 | # $(MKDIR_P) $(FLUX_SHELL_PLUGIN_DIR) && \ 29 | # $(INSTALL) $(builddir)/.libs/$(plugin_name) $(FLUX_SHELL_PLUGIN_DIR)/ 30 | 31 | # Install using a symbolic link 32 | # install-exec-hook: 33 | # $(AM_V_at)echo Installing the mpibind flux plugin... 34 | # cd /g/g99/leon/firefall && \ 35 | # $(MKDIR_P) $(FLUX_SHELL_PLUGIN_DIR) && \ 36 | # cd $(FLUX_SHELL_PLUGIN_DIR) && \ 37 | # (test -e $(plugin_name) && rm $(plugin_name)) && \ 38 | # $(LN_S) $(mpibindmoddir)/$(plugin_name) . 39 | #endif 40 | 41 | # The build directory of the plugin 42 | plugin_int_dir = $(abs_top_srcdir)/flux/.libs 43 | 44 | if HAVE_FLUX_CORE 45 | ## The Flux plugin 46 | mpibindmod_LTLIBRARIES = mpibind_flux.la 47 | 48 | ## Script to load the plugin--to be used by Flux 49 | pkgdata_SCRIPTS = mpibind-flux.lua 50 | CLEANFILES = $(pkgdata_SCRIPTS) 51 | 52 | # Script to load the Flux plugin 53 | # from the installation directory 54 | install-data-hook: 55 | sed -i.tmp 's|$(plugin_int_dir)|$(libdir)/mpibind|g' \ 56 | $(pkgdatadir)/mpibind-flux.lua && \ 57 | rm $(pkgdatadir)/mpibind-flux.lua.tmp 58 | endif 59 | 60 | # The Flux plugin 61 | mpibind_flux_la_SOURCES = plugin.c 62 | mpibind_flux_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/src 63 | mpibind_flux_la_CFLAGS += $(HWLOC_CFLAGS) $(FLUX_CORE_CFLAGS) 64 | mpibind_flux_la_LIBADD = $(top_builddir)/src/libmpibind.la 65 | mpibind_flux_la_LDFLAGS = -module 66 | 67 | # Script to load the Flux plugin 68 | # from the '.libs' directory 69 | mpibind-flux.lua: mpibind-flux.lua.in Makefile 70 | sed 's|[@]fluxplugindir[@]|$(plugin_int_dir)|g' \ 71 | mpibind-flux.lua.in > $@ 72 | 73 | # If mpibind was already installed: 74 | #mpibind_flux_la_CFLAGS = -Wall -Werror $(MPIBIND_CFLAGS) 75 | #mpibind_flux_la_LIBADD = $(MPIBIND_LIBS) 76 | -------------------------------------------------------------------------------- /flux/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## The mpibind Flux Plugin 3 | 4 | The `mpibind_flux.so` plugin enables the use of the mpibind algorithm 5 | in Flux to map parallel codes to the hardware. It replaces Flux's 6 | cpu-affinity and gpu-affinity modules. 7 | 8 | 20 | 21 | ### Installing the plugin in Flux 22 | 23 | The `mpibind_flux.so` plugin is installed here: 24 | ``` 25 | /lib/mpibind/ 26 | 27 | # It can be obtained with the command 28 | pkg-config --variable=plugindir mpibind 29 | ``` 30 | 31 | There are many ways to load a plugin into Flux. Here, I outline three. 32 | 1. Extend the Flux plugin search path. 33 | ``` 34 | export FLUX_SHELL_RC_PATH=/share/mpibind 35 | ``` 36 | 2. Add to the Flux shell plugins directory. 37 | ``` 38 | # Copy or link mpibind_flux.so to the Flux shell plugins directory 39 | cp mpibind_flux.so /lib/flux/shell/plugins/ 40 | 41 | # The plugins directory can be obtained as follows 42 | pkg-config --variable=fluxshellpluginpath flux-core 43 | ``` 44 | This method assumes write access to the Flux shell plugins directory, i.e., one owns the Flux installation. 45 | 46 | 3. Load the plugin explicitly at runtime. 47 | 48 | One can create a job shell `initrc` file (e.g., mpibind-flux.lua) that will load the mpibind plugin: 49 | ``` 50 | -- mpibind-flux.lua 51 | 52 | plugin.load { file="/flux/.libs/mpibind_flux.so" } 53 | ``` 54 | Load the plugin explicitly every time a program is run, e.g., 55 | ``` 56 | flux run -n2 -o initrc=mpibind-flux.lua hostname 57 | ``` 58 | Make sure to specify the path of `mpibind-flux.lua` and, within the lua 59 | script, make sure the location of `mpibind_flux.so` is accurate. 60 | 61 | To verify the mpibind flux plugin was loaded successfully, one can use the Flux verbose option: 62 | ``` 63 | flux run -n2 -o initrc=mpibind-flux.lua -o verbose=1 hostname 64 | ``` 65 | 66 | ### Usage 67 | 68 | Using the mpibind plugin should be transparent to the user, i.e., no additional parameters to `flux run` should be needed to execute the plugin. To verify that indeed the plugin has been loaded one can run the following: 69 | 70 | ``` 71 | flux run -n2 -o mpibind=verbose:1 hostname 72 | ``` 73 | 74 | To disable the plugin and enable Flux's cpu-affinity module: 75 | 76 | ``` 77 | flux run -n2 -o mpibind=off -o cpu-affinity=on hostname 78 | ``` 79 | 80 | The options of mpibind are documented [here](options.md). A [tutorial](../tutorials/flux/README.md) is also available. 81 | 82 | 83 | ### Other details about Flux 84 | 85 | You need at least `v0.17.0` of `flux-core` built with `hwloc v2.1` or 86 | above. 87 | 88 | Verify hwloc's installation and version: 89 | ``` 90 | pkg-config --variable=libdir --modversion hwloc 91 | ``` 92 | If this fails, add hwloc's pkgconf directory to `PKG_CONFIG_PATH`, e.g., 93 | ``` 94 | export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/lib/pkgconfig 95 | ``` 96 | 97 | Configure and build `flux-core` against `hwloc v2.1+` and install into 98 | `flux-install-dir`. 99 | ``` 100 | flux-core$ ./configure --prefix= 101 | 102 | flux-core$ make -j 24 103 | ``` 104 | 105 | Ensure Flux was built with `hwloc v2.1+`: 106 | ``` 107 | flux-core$ src/cmd/flux version 108 | commands: 0.18.0-120-g96b3edc 109 | libflux-core: 0.18.0-120-g96b3edc 110 | build-options: +hwloc==2.1.0 111 | ``` 112 | 113 | Then install into the prefix path: 114 | ``` 115 | flux-core$ make install 116 | ``` 117 | 118 | (Optional) Build and install `flux-sched` to the same installation 119 | path. 120 | 121 | Add Flux to `pkg-config`: 122 | ``` 123 | export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/lib/pkgconfig 124 | ``` 125 | 126 | 127 | 128 | 159 | 160 | -------------------------------------------------------------------------------- /flux/mpibind-flux-ex1.lua: -------------------------------------------------------------------------------- 1 | 2 | -- While it requires less user intervention, as it tries to figure 3 | -- locations and paths for the user, this script may be overly 4 | -- complex. This file (.in) was intended to be read by autoconf 5 | -- to generate the final lua file. 6 | 7 | 8 | -- initrc file to load the mpibind plugin into flux. 9 | -- To use, add the following to to the flux mini run command 10 | -- '-o initrc=mpibind_flux.lua' 11 | 12 | 13 | -- Disable Flux's 'cpu- and gpu-affinity' 14 | --shell.options['cpu-affinity'] = "off" 15 | --shell.options['gpu-affinity'] = "off" 16 | 17 | 18 | -- This construct allows a site to set default mpibind parameters. 19 | -- Note that by default when the plugin is loaded, mpibind is on 20 | -- even when '-o mpibind' is not used. 21 | --if not shell.options.mpibind then 22 | -- shell.options.mpibind = on 23 | --end 24 | 25 | 26 | -- Load the system initrc.lua to get the system plugins 27 | -- Todo: In the future, this won't be necessary: 28 | -- Use '-o userrc=mpibind.lua' instead of '-o initrc=mpibind.lua. 29 | -- https://github.com/flux-framework/flux-core/pull/3132 30 | source_if_exists(os.getenv("FLUX_DIR").."/etc/flux/shell/initrc.lua") 31 | 32 | 33 | -- Load the mpibind plugin into flux-shell 34 | -- Can use shell.log() or shell.debug() for output 35 | shell.debug("Flux plugin search path: "..plugin.searchpath) 36 | 37 | -- Look for the mpibind flux plugin in the mpibind installation 38 | --f = assert(io.popen("pkg-config --variable=libdir mpibind")) 39 | --s = f:read("*l") 40 | --if s then 41 | -- sofile = s .. "/mpibind/mpibind_flux.so" 42 | -- if not io.open(sofile) then 43 | -- sofile = nil 44 | -- end 45 | --end 46 | sofile = /g/g99/leon/firefall/nick/install/lib/mpibind/mpibind_flux.so 47 | if not io.open(sofile) then 48 | sofile = nil 49 | end 50 | 51 | -- If not found, look for the MPIBIND_FLUX_PLUGIN env var 52 | varname = "MPIBIND_FLUX_PLUGIN" 53 | if sofile == nil then 54 | sofile = os.getenv(varname) 55 | if sofile and not io.open(sofile) then 56 | sofile = nil 57 | end 58 | end 59 | 60 | if sofile == nil then 61 | shell.log("Could not find mpibind flux plugin.\n" .. 62 | "\tMake sure /g/g99/leon/firefall/nick/install/lib/mpibind/mpibind_flux.so exists or\n" .. 63 | "\texport " .. varname .. "=/mpibind_flux.so") 64 | else 65 | shell.log("Loading plugin: "..sofile) 66 | plugin.load { file = sofile } 67 | --plugin.load { file = "./.libs/mpibind_flux.so", conf = {} } 68 | end 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /flux/mpibind-flux-ex2.lua: -------------------------------------------------------------------------------- 1 | -- initrc file to load the mpibind plugin into flux. 2 | -- To use, add the following to to the flux mini run command 3 | -- '-o initrc=mpibind_flux.lua' 4 | 5 | 6 | -- This construct allows a site to set default mpibind parameters. 7 | -- Note that by default when the plugin is loaded, mpibind is on 8 | -- even when '-o mpibind' is not used. 9 | --if not shell.options.mpibind then 10 | -- shell.options.mpibind = on 11 | --end 12 | 13 | 14 | -- Load the system initrc.lua to get the system plugins 15 | -- Todo: In the future, this won't be necessary: 16 | -- Use '-o userrc=mpibind.lua' instead of '-o initrc=mpibind.lua. 17 | -- https://github.com/flux-framework/flux-core/pull/3132 18 | source_if_exists(os.getenv("FLUX_DIR").."/etc/flux/shell/initrc.lua") 19 | 20 | 21 | -- Load the mpibind plugin into flux-shell 22 | shell.debug("Flux plugin search path: "..plugin.searchpath) 23 | -- sofile = /lib/mpibind/mpibind_flux.so 24 | sofile = "./.libs/mpibind_flux.so" 25 | shell.debug("Loading plugin: " .. sofile) 26 | plugin.load { file = sofile, conf = {} } 27 | 28 | -------------------------------------------------------------------------------- /flux/mpibind-flux.lua.in: -------------------------------------------------------------------------------- 1 | 2 | -- Load the mpibind plugin for Flux 3 | plugin.load ("@fluxplugindir@/mpibind_flux.so") 4 | -------------------------------------------------------------------------------- /flux/options.md: -------------------------------------------------------------------------------- 1 | ## mpibind options 2 | 3 | These are the options and environment variables to control mpibind: 4 | 5 | ### Options 6 | 7 | ``` 8 | -o mpibind=off|on 9 | -o mpibind=verbose 10 | -o mpibind=smt: 11 | -o mpibind=greedy:0|1 12 | -o mpibind=gpu_optim:0|1 13 | -o mpibind=omp_places|omp_proc_bind|visible_devices 14 | ``` 15 | 16 | When setting more than one option, use commas to separate them, e.g., `-o mpibind=smt:2,verbose`. 17 | 18 | ### Environment variables 19 | 20 | ``` 21 | MPIBIND_RESTRICT_TYPE=cpu|mem 22 | MPIBIND_RESTRICT= 23 | MPIBIND_TOPOFILE= 24 | FLUX_MPIBIND_USE_TOPOFILE= 25 | ``` 26 | 27 | --- 28 | 29 | ### Turn mpibind on or off 30 | 31 | If mpibind is disabled by default, turn it on with `-o mpibind=on` 32 | 33 | If mpibind is enabled by default, turn it off with `-o mpibind=off` 34 | 35 | ### Enable verbosity 36 | 37 | To display the mapping of tasks to CPUs and GPUs, use `-o mpibind=verbose` 38 | 39 | ### Specify an SMT level 40 | 41 | To specify how many hardware threads per core to use for the application, use `-o mpibind=smt:`, where `n` ranges between 1 and the number of hardware threads per core. 42 | 43 | To use two hardware threads per core on an SMT-4 architecture, for instance, use `-o mpibind=smt:2` 44 | 45 | By default mpibind uses one hardware thread per core. 46 | 47 | ### Turn greedy on or off 48 | 49 | To minimize remote memory accesses, mpibind nominally assigns one NUMA domain per task. When launching less tasks than NUMA domains, this can significantly limit the resources available to the application. 50 | 51 | To use all of the resources of a node when using less tasks than NUMA domains, use `-o mpibind=greedy:1` 52 | 53 | To assign a single NUMA domain to every task even when using less tasks than NUMA domains, use `-o mpibind=greedy:0` 54 | 55 | By default greedy mode is on. 56 | 57 | ### Enable GPU optimized mappings 58 | 59 | On some heterogeneous architectures the best mapping depends on the type of resource the application will use the most. 60 | 61 | To fine-tune the mapping provided by mpibind for CPU usage, use `-o mpibind=gpu_optim:0` 62 | 63 | To fine-tune the mapping provided by mpibind for GPU usage, use `-o mpibind=gpu_optim:1` 64 | 65 | On systems with GPUs, GPU-optimized mapping is on by default. 66 | 67 | ### Enable core or thread specialization to mitigate system noise 68 | 69 | On systems with significant noise generated by system processes, hardware resources can be dedicated for running these processes, e.g., system cores. On such systems user jobs should not be scheduled on these resources. 70 | 71 | One can tell mpibind to schedule application work on a specific subset of the compute node to, for example, avoid using system resources. 72 | 73 | One can specify the application resources in a memory-driven or compute-driven fashion: When MPIBIND_RESTRICT_TYPE is set to `cpu` one specifies a set of Linux CPUs and when this variable is set to `mem` one specifies a list of (NUMA) memory domains. When specifying a NUMA domain all of the compute resources local to that domain are included in the set. By default MPIBIND_RESTRICT_TYPE is set to `cpu`. 74 | 75 | The MPIBIND_RESTRICT variable is then used to specify the IDs of the resources to use for application work. 76 | 77 | For example, to restrict the application resources to the first and third NUMA domains (and their local resources) one would set `MPIBIND_RESTRICT_TYPE=mem` and `MPIBIND_RESTRICT=0,2`; and to restrict the application to CPUs 12-24 one would set `MPIBIND_RESTRICT_TYPE=cpu` and `MPIBIND_RESTRICT=12-24`. 78 | 79 | On machines where these variables are set by default (presumably to mitigate system noise), one can unset these variables to regain access to the full node, but one has to be cognizant of the potential implications of running on the same resources as other system processes. 80 | 81 | 82 | ### Disable OpenMP affinity or GPU affinity 83 | 84 | To enable OpenMP affinity and GPU affinity, mpibind sets certain environment variables. One can instruct mpibind not to set them as follows. 85 | 86 | To not set OMP_PROC_BIND and OMP_PLACES, use `-o mpibind=omp_proc_bind` and `-o mpibind=omp_places`, respectively. To not set both, use `-o mpibind=omp_proc_bind,omp_places` 87 | 88 | To not set GPU affinity, use `-o mpibind=visible_devices`. This setting applies to AMD and NVIDIA GPUs. 89 | 90 | By default OpenMP affinity and GPU affinity are enabled. 91 | 92 | ### Read in the machine topology 93 | 94 | Discovering the node topology can be an expensive operation. When running under Flux, mpibind gets the topology specification from Flux rather than querying the topology once again. 95 | 96 | Alternatively, one can tell mpibind to read the topology (1) from a static hwloc file or (2) dynamically. The former can be accomplished by setting `FLUX_MPIBIND_USE_TOPOFILE` to any non-empty value and `MPIBIND_TOPOFILE` to the hwloc-xml-file. The latter can be accomplished by setting `FLUX_MPIBIND_USE_TOPOFILE` only. 97 | -------------------------------------------------------------------------------- /gpu-tests/makefile.mk: -------------------------------------------------------------------------------- 1 | 2 | HIP_PLATFORM = $(shell hipconfig --platform) 3 | HWLOC_CFLAGS = $(shell pkg-config --cflags hwloc) 4 | HWLOC_LDLIBS = $(shell pkg-config --libs hwloc) 5 | 6 | PROGS = retrieve visdevs visdevs-hwloc 7 | 8 | all: $(PROGS) 9 | 10 | 11 | ifneq ($(strip $(HAVE_AMD_GPUS)),) 12 | retrieve: retrieve.cpp 13 | hipcc -Wall -Werror -DHAVE_AMD_GPUS $< -o $@ 14 | else 15 | retrieve: retrieve.cu 16 | nvcc --Werror all-warnings -x cu $< -o $@ 17 | endif 18 | 19 | ifneq ($(strip $(HAVE_AMD_GPUS)),) 20 | visdevs: visdevs.cpp 21 | hipcc -Wall -Werror -DHAVE_AMD_GPUS $< -o $@ 22 | else 23 | visdevs: visdevs.cu 24 | nvcc --Werror all-warnings -x cu $< -o $@ 25 | endif 26 | 27 | ifneq ($(strip $(HAVE_AMD_GPUS)),) 28 | visdevs-hwloc: visdevs-hwloc.cpp 29 | hipcc -Wall -Werror -DHAVE_AMD_GPUS $(HWLOC_CFLAGS) $< -o $@ $(HWLOC_LDLIBS) 30 | else 31 | visdevs-hwloc: visdevs-hwloc.cu 32 | nvcc --Werror all-warnings $(HWLOC_CFLAGS) -x cu $< -o $@ $(HWLOC_LDLIBS) 33 | endif 34 | 35 | 36 | retrieve.cpp: retrieve.cu 37 | hipify-perl $< > $@ 38 | 39 | visdevs.cpp: visdevs.cu 40 | hipify-perl $< > $@ 41 | 42 | visdevs-hwloc.cpp: visdevs-hwloc.cu 43 | hipify-perl $< > $@ 44 | 45 | clean: 46 | rm -f *.o $(PROGS) $(PROGS:=.cpp) 47 | -------------------------------------------------------------------------------- /gpu-tests/orig.mk: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | # Edgar A. Leon 3 | # Lawrence Livermore National Laboratory 4 | ############################################################## 5 | 6 | 7 | # Check if we have AMD GPUs 8 | #HAVE_AMD_GPUS = $(shell rocm-smi --showbus 2>/dev/null | grep GPU) 9 | #HAVE_NVIDIA_GPUS = 1 10 | #HAVE_AMD_GPUS = 1 11 | 12 | CFLAGS = -Wall -Werror 13 | HIP_LDFLAGS = -L$(shell hipconfig --path)/lib -lamdhip64 14 | 15 | OBJS = cpu.o 16 | ifneq ($(strip $(or $(HAVE_AMD_GPUS),$(HAVE_NVIDIA_GPUS))),) 17 | GPU_FLAGS = -DHAVE_GPUS 18 | OBJS += gpu.o 19 | endif 20 | 21 | 22 | # Get system configuration with 'hipconfig' 23 | # hipconfig --platform 24 | # hipconfig --version 25 | # hipconfig --compiler 26 | # hipconfig --runtime 27 | 28 | ############################################################## 29 | # Build a HIP program with nvcc (for NVIDIA hardware) 30 | ############################################################## 31 | # nvcc -I$(HIP_ROOT)/include $(MPI_CFLAGS) -Xcompiler -DCUDA_ENABLE_DEPRECATED -x cu $< -Xlinker -lcuda -Xlinker "$(MPI_LIBS)" 32 | # nvcc -I$(HIP_ROOT)/include -Xcompiler -DCUDA_ENABLE_DEPRECATED -x cu -ccbin mpicc $< -Xlinker -lcuda 33 | 34 | ############################################################## 35 | # Build a HIP program with hipcc (for NVIDIA hardware) 36 | # To start with a CUDA program, hipify first, e.g., 37 | # hipify-perl square.cu > square.cpp 38 | # Note: hipcc takes .cpp programs (not .c for example) 39 | ############################################################## 40 | # Export the following environment variables 41 | # HIP_PLATFORM=nvcc 42 | # HIP_COMPILER=nvcc 43 | # HIPCC_VERBOSE=1 44 | # hipcc -Xcompiler -DCUDA_ENABLE_DEPRECATED $(MPI_CFLAGS) $< $(MPI_LIBS) -o $@ 45 | # Could use HIP_PLATFORM to determine the flags to use 46 | #ifeq (${HIP_PLATFORM}, nvcc) 47 | # HIPCC_FLAGS = -Xcompiler -DCUDA_ENABLE_DEPRECATED 48 | # HIPCC_FLAGS = -gencode=arch=compute_20,code=sm_20 49 | #endif 50 | 51 | ############################################################## 52 | # Build an MPI program with hipcc 53 | ############################################################## 54 | # MPI_ROOT = /usr/tce/packages/mvapich2/mvapich2-2.3-intel-19.0.4 55 | # MPI_CFLAGS = -I$(MPI_ROOT)/include 56 | # MPI_LIBS = -L$(MPI_ROOT)/lib -lmpi 57 | # ifneq ($(strip $(HAVE_AMD_GPUS)),) 58 | # simple: simple.cpp 59 | # hipcc $(MPI_CFLAGS) $^ $(MPI_LIBS) -o $@ 60 | # endif 61 | 62 | 63 | ############################################################## 64 | # Link an OpenMP program with hipcc 65 | ############################################################## 66 | # Find the OpenMP lib 67 | # HIP_CLANG_LIB = $(shell hipconfig --hipclangpath)/../lib 68 | # omp: omp.o gpu.o 69 | # hipcc -fopenmp -Xlinker -rpath=$(HIP_CLANG_LIB) $^ -o $@ 70 | 71 | 72 | ## I could have chosen to build GPU programs with hipcc 73 | ## for both AMD and NVIDIA devices, but the hipcc options 74 | ## for NVIDIA are almost like calling nvcc directly... 75 | ## I might as well call nvcc directly and no need 76 | ## for HIP on NVIDIA architectures! 77 | 78 | 79 | PROGS = mpi omp mpi+omp 80 | 81 | 82 | all: $(PROGS) 83 | 84 | 85 | mpi: mpi.o $(OBJS) 86 | ifneq ($(strip $(HAVE_AMD_GPUS)),) 87 | mpicc $^ -o $@ $(HIP_LDFLAGS) 88 | else ifneq ($(strip $(HAVE_NVIDIA_GPUS)),) 89 | nvcc -ccbin mpicc -Xlinker -lcuda $^ -o $@ 90 | else 91 | mpicc $^ -o $@ 92 | endif 93 | 94 | omp: omp.o $(OBJS) 95 | ifneq ($(strip $(HAVE_AMD_GPUS)),) 96 | $(CC) -fopenmp $^ -o $@ $(HIP_LDFLAGS) 97 | else ifneq ($(strip $(HAVE_NVIDIA_GPUS)),) 98 | nvcc $^ -Xcompiler -fopenmp -o $@ 99 | else 100 | $(CC) -fopenmp $^ -o $@ 101 | endif 102 | 103 | mpi+omp: mpi+omp.o $(OBJS) 104 | ifneq ($(strip $(HAVE_AMD_GPUS)),) 105 | mpicc -fopenmp $^ -o $@ $(HIP_LDFLAGS) 106 | else ifneq ($(strip $(HAVE_NVIDIA_GPUS)),) 107 | nvcc -ccbin mpicc -Xcompiler -fopenmp -Xlinker -lcuda $^ -o $@ 108 | else 109 | mpicc -fopenmp $^ -o $@ 110 | endif 111 | 112 | 113 | ifneq ($(strip $(HAVE_AMD_GPUS)),) 114 | gpu.o: gpu.cpp affinity.h 115 | hipcc -c $< 116 | else 117 | gpu.o: gpu.cu affinity.h 118 | nvcc --Werror all-warnings -x cu -c $< 119 | endif 120 | 121 | omp.o: omp.c affinity.h 122 | $(CC) $(CFLAGS) $(GPU_FLAGS) -fopenmp -c $< 123 | 124 | mpi.o: mpi.c affinity.h 125 | mpicc $(CFLAGS) $(GPU_FLAGS) -c $< 126 | 127 | mpi+omp.o: mpi+omp.c affinity.h 128 | mpicc $(CFLAGS) $(GPU_FLAGS) -fopenmp -c $< 129 | 130 | cpu.o: cpu.c 131 | $(CC) $(CFLAGS) -c $< 132 | 133 | gpu.cpp: gpu.cu 134 | hipify-perl $< > $@ 135 | 136 | 137 | clean: 138 | rm -f *.o *~ $(PROGS) 139 | 140 | 141 | 142 | # gpu-hip.o: gpu-hip.cpp affinity.h 143 | # ifneq ($(strip $(HAVE_AMD_GPUS)),) 144 | # hipcc -g -c -o $@ $< 145 | # else 146 | # nvcc -I$(HIP_ROOT)/include -Xcompiler -DCUDA_ENABLE_DEPRECATED -c -o $@ $< 147 | # endif 148 | 149 | #/usr/tce/packages/cuda/cuda-10.1.243/nvidia/bin/nvcc -I/usr/tce/packages/hip/hip-3.0.0/include -Xcompiler -DCUDA_ENABLE_DEPRECATED -Xcompiler -DHIP_VERSION_MAJOR=3 -Xcompiler -DHIP_VERSION_MINOR=0 -Xcompiler -DHIP_VERSION_PATCH=0 -x cu square.hipref.cpp -Xlinker '"-rpath=/usr/tce/packages/cuda/cuda-10.1.243/nvidia/lib64:/usr/tce/packages/cuda/cuda-10.1.243"' 150 | 151 | -------------------------------------------------------------------------------- /gpu-tests/retrieve.cu: -------------------------------------------------------------------------------- 1 | /****************************************************** 2 | * Edgar A. Leon 3 | * Lawrence Livermore National Laboratory 4 | ******************************************************/ 5 | 6 | #include 7 | #ifdef HAVE_AMD_GPUS 8 | #include "hip/hip_runtime.h" 9 | #endif 10 | 11 | #define MAX_PCI_LEN 20 12 | 13 | void chooseDevPartial(int dev) 14 | { 15 | int odev=-1; 16 | int busId=-1, deviceId=-1, domainId=-1; 17 | char pci[MAX_PCI_LEN]; 18 | cudaDeviceProp prop; 19 | 20 | // Get selected device properties 21 | cudaDeviceGetPCIBusId(pci, MAX_PCI_LEN, dev); 22 | sscanf(pci, "%04x:%02x:%02x", &domainId, &busId, &deviceId); 23 | 24 | // Partially fill device properties and match 25 | memset(&prop, 0, sizeof(cudaDeviceProp)); 26 | prop.pciDomainID = domainId; 27 | prop.pciBusID = busId; 28 | prop.pciDeviceID = deviceId; 29 | 30 | cudaChooseDevice(&odev, &prop); 31 | printf("Partial match of device %d: device %d\n", dev, odev); 32 | printf("\tInput: DomainID=0x%x BusId=0x%x DeviceId=0x%x\n", 33 | domainId, busId, deviceId); 34 | if (dev != odev) 35 | printf("\tError: ChooseDevice did not match the correct device\n"); 36 | } 37 | 38 | void chooseDevFull(int dev) 39 | { 40 | int odev=-1; 41 | cudaDeviceProp prop; 42 | 43 | // Get all device properties 44 | cudaGetDeviceProperties(&prop, dev); 45 | 46 | cudaChooseDevice(&odev, &prop); 47 | printf("Full match of device %d: device %d\n", dev, odev); 48 | printf("\tInput: DomainID=0x%x BusId=0x%x DeviceId=0x%x\n", 49 | prop.pciDomainID, prop.pciBusID, prop.pciDeviceID); 50 | #ifndef HAVE_AMD_GPUS 51 | // HIP does not have a uuid field! 52 | printf("\t UUID=0x%x\n", prop.uuid); 53 | #endif 54 | 55 | if (dev != odev) 56 | printf("\tError: ChooseDevice did not match the correct device\n"); 57 | } 58 | 59 | void getDevByPCI(int dev, char *pci) 60 | { 61 | int pciBusID=-1, pciDeviceID=-1, pciDomainID=-1; 62 | int odev=-1; 63 | 64 | sscanf(pci, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID); 65 | 66 | // PCI ID: String in one of the following forms: 67 | // [domain]:[bus]:[device].[function] 68 | // [domain]:[bus]:[device] 69 | // [bus]:[device].[function] 70 | // where domain, bus, device, and function are all hex values 71 | cudaDeviceGetByPCIBusId(&odev, pci); 72 | 73 | printf("GetbyPCI match of device %d: device %d\n", dev, odev); 74 | printf("\tInput: DomainID=0x%x BusId=0x%x DeviceId=0x%x\n", 75 | pciDomainID, pciBusID, pciDeviceID); 76 | if (odev != dev) 77 | printf("Error: GetByPCI did not match the correct device\n"); 78 | } 79 | 80 | 81 | int main(int argc, char *argv[]) 82 | { 83 | int dev, ndevs; 84 | char pci[MAX_PCI_LEN]; 85 | 86 | 87 | cudaGetDeviceCount(&ndevs); 88 | if (ndevs <= 0) { 89 | printf("No devices found\n"); 90 | return 0; 91 | } 92 | 93 | // Select input device 94 | // Avoid choosing device 0, if possible, to enhance testing 95 | // dev = 1; 96 | dev = ndevs-1; 97 | 98 | cudaDeviceGetPCIBusId(pci, MAX_PCI_LEN, dev); 99 | printf("PCI ID of device %d = %s\n", dev, pci); 100 | 101 | getDevByPCI(dev, pci); 102 | 103 | chooseDevPartial(dev); 104 | 105 | chooseDevFull(dev); 106 | 107 | cudaSetDevice(dev); 108 | 109 | return 0; 110 | } 111 | 112 | -------------------------------------------------------------------------------- /gpu-tests/simple.cpp: -------------------------------------------------------------------------------- 1 | /*********************************************************** 2 | * Edgar A. Leon 3 | * Lawrence Livermore National Laboratory 4 | ***********************************************************/ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #define STR_SIZE 100 11 | 12 | void check_devices(char *buf) 13 | { 14 | hipDevice_t mydev; 15 | hipDeviceProp_t devProp; 16 | int i, ndevs, myid; 17 | char pciBusId[STR_SIZE] = ""; 18 | int nc = 0; 19 | 20 | hipGetDeviceCount(&ndevs); 21 | nc += sprintf(buf+nc, "Num devices: %d\n", ndevs); 22 | 23 | hipGetDevice(&myid); 24 | hipDeviceGet(&mydev, myid); 25 | hipDeviceGetPCIBusId(pciBusId, STR_SIZE, mydev); 26 | nc += sprintf(buf+nc, "Default device: %s\n", pciBusId); 27 | 28 | for (i=0; i 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #ifdef HAVE_AMD_GPUS 14 | #include "hip/hip_runtime.h" 15 | #endif 16 | 17 | #define MAX_PCI_LEN 20 18 | #define MAX_STR_LEN 512 19 | 20 | 21 | int obj_attr_snprintf(char *str, size_t size, hwloc_obj_t obj, 22 | int verbose) 23 | { 24 | int nc=0; 25 | 26 | if (obj->type == HWLOC_OBJ_OS_DEVICE) 27 | switch (obj->attr->osdev.type) { 28 | case HWLOC_OBJ_OSDEV_COPROC : 29 | nc += hwloc_obj_type_snprintf(str+nc, size-nc, obj, 1); 30 | nc += snprintf(str+nc, size-nc, ": name=%s ", obj->name); 31 | nc += snprintf(str+nc, size-nc, "subtype=%s ", obj->subtype); 32 | nc += snprintf(str+nc, size-nc, "GPUModel=%s ", 33 | hwloc_obj_get_info_by_name(obj, "GPUModel")); 34 | nc += snprintf(str+nc, size-nc, " "); 35 | /* Get obj->infos in one shot */ 36 | nc += hwloc_obj_attr_snprintf(str+nc, size-nc, obj, " ", verbose); 37 | break; 38 | 39 | default: 40 | break; 41 | } 42 | 43 | 44 | return nc; 45 | } 46 | 47 | 48 | 49 | void set_vis_devs(char *str) 50 | { 51 | // Don't invoke any GPU calls before resetting the environment! 52 | // Otherwise, there's no effect of setting VISIBLE_DEVICES. 53 | //cudaGetDeviceCount(&ndevs); 54 | //printf("Initial num. devices %d\n", ndevs); 55 | 56 | printf("Resetting environment to devices %s\n", str); 57 | unsetenv("ROCR_VISIBLE_DEVICES"); 58 | unsetenv("HIP_VISIBLE_DEVICES"); 59 | unsetenv("CUDA_VISIBLE_DEVICES"); 60 | #ifdef HAVE_AMD_GPUS 61 | setenv("ROCR_VISIBLE_DEVICES", str, 1); 62 | #else 63 | setenv("CUDA_VISIBLE_DEVICES", str, 1); 64 | #endif 65 | } 66 | 67 | 68 | void print_devices(hwloc_topology_t topo) 69 | { 70 | char str[MAX_STR_LEN]; 71 | hwloc_obj_t obj = NULL; 72 | while ( (obj=hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_OS_DEVICE, obj)) != NULL ) 73 | if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC) { 74 | str[0] = '\0'; 75 | obj_attr_snprintf(str, MAX_STR_LEN, obj, 0); 76 | printf("%s\n", str); 77 | } 78 | } 79 | 80 | int get_list_len(char *lst) 81 | { 82 | // Copy VISDEVS string since strtok modifies the input string 83 | char tmp[strlen(lst)]; 84 | strcpy(tmp, lst); 85 | 86 | /* Get list size */ 87 | int idevs = 0; 88 | char *token = strtok(tmp, ","); 89 | while( token != NULL ) { 90 | idevs++; 91 | token = strtok(NULL, ","); 92 | } 93 | 94 | return idevs; 95 | } 96 | 97 | 98 | void test_wdup(char *visdevs, hwloc_topology_t topo) 99 | { 100 | set_vis_devs(visdevs); 101 | 102 | hwloc_topology_t topo2; 103 | printf("Duplicating the topology\n"); 104 | hwloc_topology_dup(&topo2, topo); 105 | 106 | set_vis_devs(visdevs); 107 | 108 | print_devices(topo2); 109 | hwloc_topology_destroy(topo2); 110 | } 111 | 112 | void test_wfork(char *vds) 113 | { 114 | set_vis_devs(vds); 115 | pid_t cpid = fork(); 116 | 117 | if (cpid == 0) { 118 | unsetenv("ROCR_VISIBLE_DEVICES"); 119 | unsetenv("HIP_VISIBLE_DEVICES"); 120 | printf("Child:\n"); 121 | set_vis_devs(vds); 122 | 123 | hwloc_topology_t topo; 124 | hwloc_topology_init(&topo); 125 | hwloc_topology_set_io_types_filter(topo, 126 | HWLOC_TYPE_FILTER_KEEP_IMPORTANT); 127 | hwloc_topology_load(topo); 128 | print_devices(topo); 129 | hwloc_topology_destroy(topo); 130 | 131 | exit(0); 132 | } else if (cpid > 0) { 133 | printf("Parent: Nothing to do but wait...\n"); 134 | wait(NULL); 135 | } else { 136 | printf("fork() failed\n"); 137 | } 138 | } 139 | 140 | void test_wnew_topo(char *vds) 141 | { 142 | set_vis_devs(vds); 143 | 144 | hwloc_topology_t topo; 145 | hwloc_topology_init(&topo); 146 | hwloc_topology_set_io_types_filter(topo, 147 | HWLOC_TYPE_FILTER_KEEP_IMPORTANT); 148 | hwloc_topology_load(topo); 149 | print_devices(topo); 150 | hwloc_topology_destroy(topo); 151 | } 152 | 153 | 154 | void test_wdev_api(char *vds) 155 | { 156 | int i, odevs=-1; 157 | /* Cannot call the device driver before settting 158 | VISIBLE DEVICES. Otherwise, the devices are set 159 | and cannot be changed */ 160 | //cudaGetDeviceCount(&odevs); 161 | //printf("Modified num. devices %d\n", odevs); 162 | 163 | set_vis_devs(vds); 164 | cudaGetDeviceCount(&odevs); 165 | printf("Modified num. devices %d\n", odevs); 166 | 167 | /* Get device PCI ID */ 168 | char pci[MAX_PCI_LEN]; 169 | for (i=0; i 0) { 208 | printf("Parent: Nothing to do but wait...\n"); 209 | wait(NULL); 210 | } else { 211 | printf("fork() failed\n"); 212 | } 213 | } 214 | 215 | 216 | 217 | /* Lessons learned: 218 | 1. Setting VISIBLE DEVICES in the context of hwloc: 219 | The environmnet variables must be set before the 220 | first time the topology is loaded. 221 | 2. Setting VISIBLE DEVICES in the context of device API calls: 222 | The environment variables must be called before the 223 | first invocation of a device function. 224 | 3. Using fork does not really allows to overwrite the points 225 | above. 226 | 4. hwloc loading a topology has the same effect as calling 227 | a device function, i.e., after this setting VISIBLE 228 | DEVICES is too late. 229 | */ 230 | 231 | int main(int argc, char *argv[]) 232 | { 233 | char vds[] = "1"; 234 | //int idevs = get_list_len(vds); 235 | 236 | hwloc_topology_t topo; 237 | hwloc_topology_init(&topo); 238 | /* OS devices are filtered by default, enable to see GPUs */ 239 | hwloc_topology_set_type_filter(topo, HWLOC_OBJ_OS_DEVICE, 240 | HWLOC_TYPE_FILTER_KEEP_IMPORTANT); 241 | /* Include PCI devices to determine whether two GPUs 242 | are the same device, i.e., opencl1d1 and cuda1 */ 243 | hwloc_topology_set_type_filter(topo, HWLOC_OBJ_PCI_DEVICE, 244 | HWLOC_TYPE_FILTER_KEEP_IMPORTANT); 245 | 246 | /* Setting visible devices must be done before 247 | loading the topology the first time! */ 248 | set_vis_devs(vds); 249 | 250 | /* If testing whether VISIBLE DEVICES work with 251 | the device API functions, don't load the topology 252 | because this set the devices and can't be changed later */ 253 | hwloc_topology_load(topo); 254 | //print_devices(topo); 255 | 256 | 257 | #if 1 258 | test_wnew_topo(vds); 259 | #endif 260 | #if 0 261 | test_wdup(vds, topo); 262 | #endif 263 | #if 0 264 | test_wfork(vds); 265 | #endif 266 | #if 0 267 | test_wdev_api(vds); 268 | #endif 269 | #if 0 270 | test_wfork_api(vds); 271 | #endif 272 | 273 | hwloc_topology_destroy(topo); 274 | 275 | return 0; 276 | } -------------------------------------------------------------------------------- /gpu-tests/visdevs.cu: -------------------------------------------------------------------------------- 1 | /****************************************************** 2 | * Edgar A. Leon 3 | * Lawrence Livermore National Laboratory 4 | ******************************************************/ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #ifdef HAVE_AMD_GPUS 11 | #include "hip/hip_runtime.h" 12 | #endif 13 | 14 | #define MAX_PCI_LEN 20 15 | 16 | int main(int argc, char *argv[]) 17 | { 18 | char str[] = "1,7"; 19 | 20 | // Copy VISDEVS string since strtok modifies the input string 21 | char tmp[strlen(str)]; 22 | strcpy(tmp, str); 23 | 24 | /* Get list size */ 25 | int i, idevs = 0; 26 | char *token = strtok(tmp, ","); 27 | while( token != NULL ) { 28 | idevs++; 29 | token = strtok(NULL, ","); 30 | } 31 | 32 | /* Convert VISDEVS list into ints */ 33 | //int i=0, visdevs[idevs]; 34 | //strcpy(tmp, str); 35 | //token = strtok(tmp, ","); 36 | //while( token != NULL ) { 37 | // visdevs[i++] = atoi(token); 38 | // token = strtok(NULL, ","); 39 | //} 40 | 41 | // Don't invoke any GPU calls before resetting the environment! 42 | // Otherwise, there's no effect of setting VISIBLE_DEVICES. 43 | //cudaGetDeviceCount(&ndevs); 44 | //printf("Initial num. devices %d\n", ndevs); 45 | 46 | printf("Resetting environment to devices %s\n", str); 47 | unsetenv("ROCR_VISIBLE_DEVICES"); 48 | unsetenv("HIP_VISIBLE_DEVICES"); 49 | unsetenv("CUDA_VISIBLE_DEVICES"); 50 | #ifdef HAVE_AMD_GPUS 51 | setenv("ROCR_VISIBLE_DEVICES", str, 1); 52 | #else 53 | setenv("CUDA_VISIBLE_DEVICES", str, 1); 54 | #endif 55 | 56 | int odevs=-1; 57 | cudaGetDeviceCount(&odevs); 58 | printf("Modified num. devices %d\n", odevs); 59 | 60 | /* Get device PCI ID */ 61 | char pci[MAX_PCI_LEN]; 62 | for (i=0; i $@ 25 | 26 | # After installation modify mpibind.py to point to libmpibind in the install tree 27 | install-data-hook: 28 | sed -i.tmp 's|$(abs_top_srcdir)/src/.libs/$(mpibind_lib_name)|$(libdir)/$(mpibind_lib_name)|g' \ 29 | $(datadir)/mpibind.py && \ 30 | rm $(datadir)/mpibind.py.tmp 31 | 32 | CLEANFILES = mpibind.py 33 | 34 | endif 35 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # The Python interface 2 | 3 | `mpibind` for Python enables the use of the mpibind algorithm in 4 | arbitrary Python programs. 5 | 6 | ## Building and installing 7 | 8 | ### Spack 9 | 10 | The easiest way to build and install the Python interface is through 11 | `spack` 12 | ``` 13 | spack install mpibind+python 14 | spack load mpibind 15 | ``` 16 | 17 | ### Autotools 18 | 19 | Otherwise use the Autools process described at the top 20 | directory. Basically, the Python bindings are built provided 21 | that Python 3 and CFFI are present at `configure` time. Let's assume 22 | that mpibind's installation directory is `install_dir` 23 | 24 | Options available to install the Python interface: 25 | 26 | * Add the bindings to the Python path 27 | ``` 28 | export PYTHONPATH=$PYTHONPATH:install_dir/share" 29 | ``` 30 | * Use `setup.py` 31 | ``` 32 | cd install_dir/share 33 | python setup.py install 34 | ``` 35 | 36 | ### Dependencies 37 | 38 | * Python 3 39 | * The C Foreign Function Interface for Python 40 | ([CFFI](https://cffi.readthedocs.io/en/latest/)) 41 | * [Pycotap](https://pypi.org/project/pycotap/) (for unit testing) 42 | 43 | 44 | ## Usage 45 | 46 | Here is a simple [program](test-simple.py) that demonstrates the Python 47 | interface. 48 | 49 | ```python 50 | import os 51 | import mpibind 52 | 53 | # This simple example does not use MPI, thus 54 | # specify my rank and total number of tasks 55 | rank = 2 56 | ntasks_per_node = 4 57 | 58 | # Is sched_getaffinity supported? 59 | affinity = True if hasattr(os, 'sched_getaffinity') else False 60 | 61 | if affinity: 62 | cpus = sorted(os.sched_getaffinity(0)) 63 | affstr = "\n>Before\n" 64 | affstr += "Running on {:2d} cpus: {}\n".format(len(cpus), cpus) 65 | 66 | # Create a handle 67 | # Num tasks is a required parameter 68 | handle = mpibind.MpibindHandle(ntasks=ntasks_per_node) 69 | 70 | # Create the mapping 71 | handle.mpibind() 72 | 73 | # Print the mapping 74 | handle.mapping_print() 75 | 76 | # Apply the mapping as if I am worker 'rank' 77 | # This function is not supported on some platforms 78 | if affinity: 79 | handle.apply(rank) 80 | cpus = sorted(os.sched_getaffinity(0)) 81 | print(affstr + ">After\n" + 82 | "Running on {:2d} cpus: {}".format(len(cpus), cpus)) 83 | ``` 84 | 85 | Running it on a dual-socket system with 18x2 SMT-2 cores results in the 86 | output below. Note that the resulting mapping uses only the first socket 87 | because `mpibind` optimizes placement for GPUs by default (configurable 88 | parameter) and both GPUs are located on the first socket. 89 | 90 | ```bash 91 | $ python3 test-simple.py 92 | mpibind: task 0 nths 4 gpus 0 cpus 0-4 93 | mpibind: task 1 nths 4 gpus 0 cpus 5-9 94 | mpibind: task 2 nths 4 gpus 1 cpus 10-13 95 | mpibind: task 3 nths 4 gpus 1 cpus 14-17 96 | 97 | >Before 98 | Task 2: Running on 72 cpus: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71] 99 | >After 100 | Task 2: Running on 4 cpus: [10, 11, 12, 13] 101 | ``` 102 | 103 | A more realistic example that uses MPI is provided in 104 | [test-mpi.py](test-mpi.py). This program uses `mpi4py` so make sure it is 105 | installed on your system, e.g., `pip install mpi4py`. It can be run as follows: 106 | 107 | ```bash 108 | $ srun -N2 -n8 python3 test-mpi.py 109 | pascal7 task 0/8: lrank 0/4 nths 4 gpus ['0'] cpus [0, 1, 2, 3, 4] 110 | pascal7 task 1/8: lrank 1/4 nths 4 gpus ['0'] cpus [5, 6, 7, 8, 9] 111 | pascal7 task 2/8: lrank 2/4 nths 4 gpus ['1'] cpus [10, 11, 12, 13] 112 | pascal7 task 3/8: lrank 3/4 nths 4 gpus ['1'] cpus [14, 15, 16, 17] 113 | pascal8 task 4/8: lrank 0/4 nths 4 gpus ['0'] cpus [0, 1, 2, 3, 4] 114 | pascal8 task 5/8: lrank 1/4 nths 4 gpus ['0'] cpus [5, 6, 7, 8, 9] 115 | pascal8 task 6/8: lrank 2/4 nths 4 gpus ['1'] cpus [10, 11, 12, 13] 116 | pascal8 task 7/8: lrank 3/4 nths 4 gpus ['1'] cpus [14, 15, 16, 17] 117 | ``` 118 | 119 | ## Unit tests 120 | 121 | Unit tests are located in [test-suite/python](../test-suite/python) and can be 122 | launched from the top directory with `make check`. We use `pycotap` to emit the 123 | Test Anything Protocol (TAP) from the Python tests. Make sure `pycotap` is 124 | installed, e.g., `pip install pycotap` before running `configure` from the top 125 | directory. 126 | 127 | Two modifications are required to add a Python test. 128 | 129 | 1. Create a new test file under [test-suite/python](../test-suite/python) 130 | 2. Add the new test file to the `PYTHON_TESTS` variable in 131 | [test-suite/Makefile.am](../test-suite/Makefile.am) 132 | 133 | 134 | ## Development 135 | 136 | We use CFFI to build mpibind for Python. 137 | CFFI is a Python library that allows building Python wrappers for C 138 | code. CFFI allows for several modes of interaction between C and 139 | Python: API vs ABI and out-of-line vs in-line. For mpibind, we use 140 | CFFI in ABI, in-line mode. 141 | 142 | Exposing an mpibind C function to Python requires two modifications to 143 | [mpibind.py.in](mpibind.py.in) 144 | 145 | 1. Add the C function definition to the `cdef` argument 146 | 2. Add a wrapper for the function to the class `MpibindHandle` 147 | 148 | -------------------------------------------------------------------------------- /python/mpibind_map.py: -------------------------------------------------------------------------------- 1 | ################################################### 2 | # Edgar A. Leon 3 | # Lawrence Livermore National Laboratory 4 | # 5 | # This is a wrapper of mpibind functions to easily 6 | # get an application's mapping to the hardware 7 | # in the context of MPI. 8 | # 9 | # This wrapper calls mpibind once per compute node 10 | # so that the hardware topology is discovered once 11 | # rather than n times, where n is the number of 12 | # processes per node. 13 | # 14 | ################################################### 15 | 16 | 17 | ## Todo: Add a variable number of parameters to this 18 | ## function and pass them to MpibindHandle(). 19 | def mpibind_get_mapping(verbose=False): 20 | '''Get the mpibind mapping of an MPI program. 21 | The return value is a dictionary with the keys 22 | nthreads, cpus, and gpus.''' 23 | from mpi4py import MPI 24 | import mpibind 25 | import re 26 | 27 | comm = MPI.COMM_WORLD 28 | size = comm.Get_size() 29 | rank = comm.Get_rank() 30 | name = MPI.Get_processor_name() 31 | 32 | ## Get a leader for each compute node 33 | match = re.search('\d+', name) 34 | if not match: 35 | print("mpibind: Could not determine node id") 36 | return None 37 | 38 | nodeid = int(match.group()) 39 | node_comm = comm.Split(color=nodeid, key=rank) 40 | node_rank = node_comm.Get_rank() 41 | node_size = node_comm.Get_size() 42 | 43 | ## One task per node calculates the mapping. 44 | ## This is not a hard requirement, but it is 45 | ## more efficient than every process discovering 46 | ## the topology of the compute node. 47 | if node_rank == 0: 48 | # Create an mpibind handle, 'ntasks' is a required parameter 49 | # See 'help(mpibind.MpibindHandle)' for detailed usage 50 | handle = mpibind.MpibindHandle(ntasks=node_size) 51 | 52 | # Create the mapping 53 | handle.mpibind() 54 | #handle.mapping_print() 55 | 56 | # Distribute the mapping 57 | nthreads = handle.nthreads 58 | cpus = handle.get_cpus_ptask(0) 59 | gpus = handle.get_gpus_ptask(0) 60 | for i in range(1, node_size): 61 | node_comm.send(handle.get_cpus_ptask(i), dest=i) 62 | node_comm.send(handle.get_gpus_ptask(i), dest=i) 63 | else: 64 | nthreads = None 65 | cpus = node_comm.recv(source=0) 66 | gpus = node_comm.recv(source=0) 67 | 68 | nthreads = node_comm.scatter(nthreads, root=0) 69 | 70 | if verbose: 71 | print('{} task {}/{}: lrank {}/{} nths {} gpus {} cpus {}'\ 72 | .format(name, rank, size, node_rank, node_size, 73 | nthreads, gpus, cpus)) 74 | 75 | return {"nthreads": nthreads, "cpus": cpus, "gpus": gpus} 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | # https://packaging.python.org/guides/distributing-packages-using-setuptools/#setup-py 4 | 5 | longtext = """Python bindings for mpibind, a memory-driven algorithm to map parallel hybrid 6 | applications to the underlying hardware resources transparently, 7 | efficiently, and portably.""" 8 | 9 | setup( 10 | name='mpibind', 11 | version='0.5.0', 12 | author='LLNL', 13 | url='https://github.com/LLNL/mpibind', 14 | description='Memory-First Affinity Scheduler', 15 | long_description=longtext, 16 | keywords='affinity, NUMA, hybrid applications, heterogeneous systems', 17 | py_modules=['mpibind'], 18 | # install_requires=['cffi'], 19 | platforms=['posix'], 20 | license='MIT', 21 | classifiers=[ 22 | 'Development Status :: 4 - Beta', 23 | 'License :: OSI Approved :: MIT License', 24 | 'Operating System :: POSIX :: Linux', 25 | 'Programming Language :: Python :: 3', 26 | ], 27 | ) 28 | -------------------------------------------------------------------------------- /python/test-mpi.py: -------------------------------------------------------------------------------- 1 | ################################################### 2 | # Edgar A. Leon 3 | # Lawrence Livermore National Laboratory 4 | ################################################### 5 | 6 | # There is an issue when mpi4py implicitly calls 7 | # MPI_Finalize and loading the mpibind module, 8 | # which leads to a segmentation fault. To avoid this, 9 | # make sure Finalize is not called automatically. 10 | import mpi4py 11 | #mpi4py.rc.threads = True # thread support 12 | #mpi4py.rc.thread_level = "funneled" # thread support level 13 | #mpi4py.rc.initialize = False # do not initialize MPI automatically 14 | mpi4py.rc.finalize = False # do not finalize MPI automatically 15 | from mpi4py import MPI 16 | 17 | from mpibind_map import mpibind_get_mapping 18 | 19 | 20 | # The search path for Python modules 21 | #import sys 22 | #print("sys.path:") 23 | #print(sys.path) 24 | 25 | # Path to the MPI module 26 | #print("MPI module:") 27 | #print(MPI.__file__) 28 | 29 | 30 | # Print either the mapping from mpibind (true) 31 | # or the actual mapping from the runtime system (false) 32 | mpibind_verbose = True 33 | 34 | 35 | ## Is sched_getaffinity supported? 36 | import os 37 | affinity = True if hasattr(os, 'sched_getaffinity') else False 38 | 39 | 40 | comm = MPI.COMM_WORLD 41 | size = comm.Get_size() 42 | rank = comm.Get_rank() 43 | # if rank == 0: 44 | # (version, subversion) = MPI.Get_version() 45 | # print("Using MPI {}.{}".format(version, subversion)) 46 | 47 | 48 | # Get the mapping 49 | # mapping["nthreads"]: The number of threads this process can launch 50 | # mapping["cpus"]: The CPUs assigned to this process 51 | # mapping["gpus"]: The GPUs assigned to this process 52 | mapping = mpibind_get_mapping(mpibind_verbose) 53 | 54 | 55 | ## Apply the CPU mapping 56 | if affinity: 57 | pid = 0 58 | cpus = sorted(os.sched_getaffinity(pid)) 59 | affstr = "{:2d}/{:2d} was running on {:2d} cpus: {}\n"\ 60 | .format(rank, size, len(cpus), cpus) 61 | 62 | os.sched_setaffinity(pid, mapping["cpus"]) 63 | 64 | cpus = sorted(os.sched_getaffinity(pid)) 65 | if not mpibind_verbose: 66 | print(affstr + " now running on {:2d} cpus: {}"\ 67 | .format(len(cpus), cpus)) 68 | 69 | 70 | ## Use mapping["gpus"] to launch work on GPUs 71 | ## ... 72 | -------------------------------------------------------------------------------- /python/test-simple.py: -------------------------------------------------------------------------------- 1 | ################################################### 2 | # Edgar A. Leon 3 | # Lawrence Livermore National Laboratory 4 | ################################################### 5 | 6 | import os 7 | import mpibind 8 | 9 | # This simple example does not use MPI, thus 10 | # specify my rank and total number of tasks 11 | rank = 2 12 | ntasks_per_node = 4 13 | 14 | # Is sched_getaffinity supported? 15 | affinity = True if hasattr(os, 'sched_getaffinity') else False 16 | 17 | if affinity: 18 | cpus = sorted(os.sched_getaffinity(0)) 19 | affstr = "\n>Before\n" 20 | affstr += "{}: Running on {:2d} cpus: {}\n"\ 21 | .format(rank, len(cpus), cpus) 22 | 23 | # Create a handle 24 | # Num tasks is a required parameter 25 | handle = mpibind.MpibindHandle(ntasks=ntasks_per_node) 26 | 27 | # Create the mapping 28 | handle.mpibind() 29 | 30 | # Print the mapping 31 | handle.mapping_print() 32 | 33 | # Apply the mapping as if I am worker 'rank' 34 | # This function is not supported on some platforms 35 | if affinity: 36 | handle.apply(rank) 37 | cpus = sorted(os.sched_getaffinity(0)) 38 | print(affstr + ">After\n" + 39 | "{}: Running on {:2d} cpus: {}"\ 40 | .format(rank, len(cpus), cpus)) 41 | 42 | -------------------------------------------------------------------------------- /slurm/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | ####################################################### 3 | # Slurm plugin 4 | ####################################################### 5 | 6 | if HAVE_SLURM 7 | mpibindmod_LTLIBRARIES = mpibind_slurm.la 8 | endif 9 | 10 | mpibind_slurm_la_SOURCES = plugin.c 11 | mpibind_slurm_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/src 12 | mpibind_slurm_la_CFLAGS += $(HWLOC_CFLAGS) $(SLURM_CFLAGS) 13 | # Need slurm/spank.h 14 | # SLURM_CFLAGS is empty because 'pkg-config --cflags slurm' 15 | # is empty: default paths, e.g., /usr/include, are not returned! 16 | # Because the Slurm headers/libs are on standard locations, 17 | # e.g., /usr/include, everything works. 18 | # mpibind_slurm_la_CFLAGS += -I$(SLURM_INCDIR) $(HWLOC_CFLAGS) 19 | mpibind_slurm_la_LIBADD = $(top_builddir)/src/libmpibind.la 20 | mpibind_slurm_la_LDFLAGS = -module 21 | -------------------------------------------------------------------------------- /slurm/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## The mpibind Slurm Plugin 3 | 4 | The `mpibind_slurm.so` library is a SPANK plugin that enables using 5 | mpibind in Slurm to map parallel codes to the hardware. 6 | 7 | ### Requirements 8 | 9 | The file `slurm/spank.h` is necessary to build the plugin. This file is distributed with Slurm. 10 | 11 | ### Building and installing 12 | 13 | The building system looks for a Slurm installation using `pkg-config` and, if 14 | found, the plugin is built and installed here: 15 | ``` 16 | /lib/mpibind/ 17 | # which can be obtained with the command 18 | pkg-config --variable=plugindir mpibind 19 | ``` 20 | 21 | To install the plugin into your Slurm installation, add the following 22 | line to the `plugstack.conf` file: 23 | ``` 24 | required /lib/mpibind/mpibind_slurm.so 25 | ``` 26 | The plugin configuration options are below. Separate multiple options with commas. 27 | ``` 28 | # Disable the plugin by default 29 | # To use mpibind add --mpibind=on to srun 30 | default_off 31 | 32 | # By default, mpibind is enabled only on full-node allocations 33 | # This option enables mpibind on partial-node allocations as well 34 | exclusive_only_off 35 | ``` 36 | For example: 37 | ``` 38 | required /lib/mpibind/mpibind_slurm.so default_off 39 | ``` 40 | ### Usage 41 | 42 | mpibind can be used with the `srun` command as follows. 43 | 44 | ``` 45 | Automatically map tasks/threads/GPU kernels to heterogeneous hardware 46 | 47 | Usage: --mpibind=[args] 48 | 49 | where args is a comma separated list of one or more of the following: 50 | gpu[:0|1] Enable(1)/disable(0) GPU-optimized mappings 51 | greedy[:0|1] Allow(1)/disallow(0) multiple NUMAs per task 52 | help Display this message 53 | off Disable mpibind 54 | on Enable mpibind 55 | smt: Enable worker use of SMT- 56 | v[erbose] Print affinty for each task 57 | ``` 58 | 59 | For example: 60 | 61 | ``` 62 | # Turn off mpibind to check the mapping without it 63 | $ srun -N1 -n8 --mpibind=off ./mpi 64 | mutt29 Task 0/ 8 running on 224 CPUs: 0-223 65 | mutt29 Task 1/ 8 running on 224 CPUs: 0-223 66 | mutt29 Task 2/ 8 running on 224 CPUs: 0-223 67 | mutt29 Task 3/ 8 running on 224 CPUs: 0-223 68 | mutt29 Task 4/ 8 running on 224 CPUs: 0-223 69 | mutt29 Task 5/ 8 running on 224 CPUs: 0-223 70 | mutt29 Task 6/ 8 running on 224 CPUs: 0-223 71 | mutt29 Task 7/ 8 running on 224 CPUs: 0-223 72 | 73 | # mpibind should be on by default, but can by enabled explicitly 74 | $ srun -N1 -n8 --mpibind=on ./mpi 75 | mutt29 Task 0/ 8 running on 14 CPUs: 0-13 76 | mutt29 Task 1/ 8 running on 14 CPUs: 14-27 77 | mutt29 Task 2/ 8 running on 14 CPUs: 28-41 78 | mutt29 Task 3/ 8 running on 14 CPUs: 42-55 79 | mutt29 Task 4/ 8 running on 14 CPUs: 56-69 80 | mutt29 Task 5/ 8 running on 14 CPUs: 70-83 81 | mutt29 Task 6/ 8 running on 14 CPUs: 84-97 82 | mutt29 Task 7/ 8 running on 14 CPUs: 98-111 83 | 84 | # Get the mapping from mpibind itself 85 | $ srun -N1 -n8 --mpibind=v /bin/true 86 | mpibind: 0 GPUs on this node 87 | mpibind: task 0 nths 14 gpus cpus 0-13 88 | mpibind: task 1 nths 14 gpus cpus 14-27 89 | mpibind: task 2 nths 14 gpus cpus 28-41 90 | mpibind: task 3 nths 14 gpus cpus 42-55 91 | mpibind: task 4 nths 14 gpus cpus 56-69 92 | mpibind: task 5 nths 14 gpus cpus 70-83 93 | mpibind: task 6 nths 14 gpus cpus 84-97 94 | mpibind: task 7 nths 14 gpus cpus 98-111 95 | ``` 96 | 97 | ### Environment variables 98 | 99 | ``` 100 | # The type of resource to restrict mpibind to 101 | MPIBIND_RESTRICT_TYPE= 102 | 103 | # Restrict mpibind to a list of CPUs or NUMA domains 104 | MPIBIND_RESTRICT= 105 | 106 | # The hwloc topology file, in XML format, matching the cluster's topology 107 | MPIBIND_TOPOFILE= 108 | ``` 109 | 110 | To restrict mpibind to a subset of the node resources, MPIBIND_RESTRICT must be defined with the resource IDs. Optionally, MPIBIND_RESTRICT_TYPE can be specified with the type of resource: CPUs or NUMA memory (the default is CPUs). 111 | 112 | 113 | For example, restrict mpibind to the third and forth NUMA domains: 114 | 115 | ``` 116 | $ export MPIBIND_RESTRICT_TYPE=mem 117 | 118 | $ export MPIBIND_RESTRICT=2,3 119 | 120 | $ srun --mpibind=on -N1 -n4 ./mpi 121 | mutt124 Task 0/ 4 running on 7 CPUs: 28-34 122 | mutt124 Task 1/ 4 running on 7 CPUs: 35-41 123 | mutt124 Task 2/ 4 running on 7 CPUs: 42-48 124 | mutt124 Task 3/ 4 running on 7 CPUs: 49-55 125 | ``` 126 | 127 | To have mpibind use a topology file defining the node architecture, one can use MPIBIND_TOPOFILE. This can speed up launch time since the topology does not need to be discovered every srun command. 128 | 129 | Notes: 130 | * This variable may already be defined in the user's environment. To check use `printenv MPIBIND_TOPOFILE` 131 | * The topology file *must* match the node architecture where mpibind is run. Otherwise, the job may fail due to invalid mapping assignments. 132 | * To generate a topology file, run `hwloc` on a compute node as follows `lstopo .xml` 133 | 134 | For example: 135 | 136 | ``` 137 | # Generate the topology file using hwloc 138 | $ lstopo mutt.xml 139 | 140 | # Run mpibind on the same node architecture as where the file was created 141 | $ export MPIBIND_TOPOFILE=mutt.xml 142 | 143 | $ srun -N1 -n8 --mpibind=v /bin/true 144 | mpibind: 0 GPUs on this node 145 | mpibind: task 0 nths 14 gpus cpus 0-13 146 | mpibind: task 1 nths 14 gpus cpus 14-27 147 | mpibind: task 2 nths 14 gpus cpus 28-41 148 | mpibind: task 3 nths 14 gpus cpus 42-55 149 | mpibind: task 4 nths 14 gpus cpus 56-69 150 | mpibind: task 5 nths 14 gpus cpus 70-83 151 | mpibind: task 6 nths 14 gpus cpus 84-97 152 | mpibind: task 7 nths 14 gpus cpus 98-111 153 | ``` 154 | 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AM_CPPFLAGS = -Wall -Werror $(HWLOC_CFLAGS) 3 | 4 | ####################################################### 5 | # libmpibind 6 | ####################################################### 7 | lib_LTLIBRARIES = libmpibind.la 8 | 9 | libmpibind_la_SOURCES = \ 10 | mpibind.c mpibind-priv.h \ 11 | internals.c \ 12 | hwloc_utils.c hwloc_utils.h 13 | 14 | include_HEADERS = mpibind.h 15 | libmpibind_la_LIBADD = $(HWLOC_LIBS) 16 | #libmpibind_la_CPPFLAGS = -Wall -Werror $(HWLOC_CFLAGS) 17 | 18 | ####################################################### 19 | # Program using libmpibind and other auxiliary progs 20 | ####################################################### 21 | 22 | noinst_PROGRAMS = main hwloc_tests 23 | 24 | hwloc_tests_SOURCES = hwloc_tests.c hwloc_utils.c hwloc_utils.h 25 | hwloc_tests_LDADD = $(HWLOC_LIBS) 26 | # Rename hwloc_utils object file since it is used by both 27 | # a libtool library and a non-libtool target. 28 | hwloc_tests_CFLAGS = $(AM_CFLAGS) 29 | #hwloc_tests_CPPFLAGS = -Wall -Werror $(HWLOC_CFLAGS) 30 | 31 | main_SOURCES = main.c mpibind.h 32 | main_LDADD = libmpibind.la $(HWLOC_LIBS) 33 | 34 | -------------------------------------------------------------------------------- /src/hwloc_tests.c: -------------------------------------------------------------------------------- 1 | /****************************************************** 2 | * Edgar A. Leon 3 | * Lawrence Livermore National Laboratory 4 | ******************************************************/ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "hwloc_utils.h" 12 | 13 | 14 | 15 | 16 | void get_cpuset_of_nobjs(hwloc_topology_t topo, 17 | int nobjs, hwloc_obj_type_t type, 18 | hwloc_bitmap_t cpuset) 19 | { 20 | int i; 21 | hwloc_obj_t obj; 22 | 23 | hwloc_bitmap_zero(cpuset); 24 | 25 | for (i=0; icpuset); 28 | } 29 | } 30 | 31 | void test_distrib(hwloc_topology_t topo, int wks) 32 | { 33 | hwloc_obj_t root = hwloc_get_root_obj(topo); 34 | int i, n_roots = 1, flags = 0; 35 | int until = INT_MAX; 36 | hwloc_bitmap_t set[wks]; 37 | char str[128]; 38 | 39 | for (i=0; idepth, obj->gp_index); 93 | 94 | if (hwloc_obj_type_is_normal(obj->type)) { 95 | nc += snprintf(str+nc, size-nc, "\n "); 96 | nc += snprintf(str+nc, size-nc, "os_idx=%d ", obj->os_index); 97 | nc += snprintf(str+nc, size-nc, "l_idx=%d ", obj->logical_index); 98 | nc += snprintf(str+nc, size-nc, "cpuset="); 99 | nc += hwloc_bitmap_list_snprintf(str+nc, size-nc, obj->cpuset); 100 | nc += snprintf(str+nc, size-nc, " nodeset="); 101 | nc += hwloc_bitmap_list_snprintf(str+nc, size-nc, obj->nodeset); 102 | nc += snprintf(str+nc, size-nc, " arity=%d ", obj->arity); 103 | nc += snprintf(str+nc, size-nc, "amem=%d ", obj->memory_arity); 104 | nc += snprintf(str+nc, size-nc, "aio=%d ", obj->io_arity); 105 | } 106 | 107 | if (obj->type == HWLOC_OBJ_OS_DEVICE) 108 | switch (obj->attr->osdev.type) { 109 | case HWLOC_OBJ_OSDEV_COPROC : 110 | nc += snprintf(str+nc, size-nc, "subtype=%s ", 111 | obj->subtype); 112 | case HWLOC_OBJ_OSDEV_GPU : 113 | nc += snprintf(str+nc, size-nc, "\n uuid="); 114 | nc += gpu_uuid_snprintf(str+nc, size-nc, obj); 115 | case HWLOC_OBJ_OSDEV_OPENFABRICS : 116 | nc += snprintf(str+nc, size-nc, " busid="); 117 | nc += pci_busid_snprintf(str+nc, size-nc, obj); 118 | nc += snprintf(str+nc, size-nc, " name=%s ", 119 | obj->name); 120 | if (verbose > 0) { 121 | nc += snprintf(str+nc, size-nc, "\n "); 122 | /* Get obj->infos in one shot */ 123 | nc += hwloc_obj_attr_snprintf(str+nc, size-nc, obj, " ", 1); 124 | } 125 | default: 126 | break; 127 | } 128 | 129 | if (obj->type == HWLOC_OBJ_PCI_DEVICE) { 130 | nc += snprintf(str+nc, size-nc, "\n "); 131 | /* Get the obj->infos attributes */ 132 | nc += hwloc_obj_attr_snprintf(str+nc, size-nc, obj, " ", 1); 133 | } 134 | 135 | //hwloc_pci_class_string(obj->attr->pcidev.class_id) 136 | 137 | return nc; 138 | } 139 | 140 | 141 | void check_topo_filters(hwloc_topology_t topo) 142 | { 143 | enum hwloc_type_filter_e f1, f2; 144 | hwloc_topology_get_type_filter(topo, 145 | HWLOC_OBJ_PCI_DEVICE, &f1); 146 | hwloc_topology_get_type_filter(topo, 147 | HWLOC_OBJ_MISC, &f2); 148 | 149 | if (f1 == HWLOC_TYPE_FILTER_KEEP_IMPORTANT || 150 | f1 == HWLOC_TYPE_FILTER_KEEP_ALL) 151 | printf("PCI devices enabled\n"); 152 | if (f2 == HWLOC_TYPE_FILTER_KEEP_IMPORTANT || 153 | f2 == HWLOC_TYPE_FILTER_KEEP_ALL) 154 | printf("Misc objects enabled\n"); 155 | } 156 | 157 | 158 | 159 | int main(int argc, char *argv[]) 160 | { 161 | hwloc_topology_t topology; 162 | 163 | printf("hwloc: API version=0x%x, HWLOC_API_VERSION=0x%x\n", 164 | hwloc_get_api_version(), HWLOC_API_VERSION); 165 | 166 | hwloc_topology_init(&topology); 167 | /* OS devices are filtered by default, enable to see GPUs */ 168 | hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, 169 | HWLOC_TYPE_FILTER_KEEP_IMPORTANT); 170 | /* Include PCI devices to determine whether two GPUs 171 | are the same device, i.e., opencl1d1 and cuda1 */ 172 | hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, 173 | HWLOC_TYPE_FILTER_KEEP_IMPORTANT); 174 | hwloc_topology_load(topology); 175 | 176 | 177 | printf("=====Begin brief topology\n"); 178 | print_topo_brief(topology); 179 | printf("=====End brief topology\n"); 180 | 181 | printf("=====Begin I/O topology\n"); 182 | print_topo_io(topology); 183 | printf("=====End I/O topology\n"); 184 | 185 | printf("=====Begin flat list of devices\n"); 186 | print_devices(topology, HWLOC_OBJ_GROUP); 187 | print_devices(topology, HWLOC_OBJ_OS_DEVICE); 188 | printf("=====End flat list of devices\n"); 189 | 190 | printf("=====Begin filter type\n"); 191 | check_topo_filters(topology); 192 | printf("=====End filter type\n"); 193 | 194 | #if 0 195 | /* I haven't been able to use VISIBLE_DEVICES 196 | within a process to restrict the GPU set */ 197 | printf("=====Begin ENV\n"); 198 | print_devices(topology, HWLOC_OBJ_OS_DEVICE); 199 | 200 | int rc = putenv("CUDA_VISIBLE_DEVICES=1"); 201 | printf("===CUDA_VISIBLE_DEVICES=1 rc=%d===\n", rc); 202 | 203 | hwloc_topology_t topo2; 204 | hwloc_topology_init(&topo2); 205 | hwloc_topology_set_io_types_filter(topo2, 206 | HWLOC_TYPE_FILTER_KEEP_IMPORTANT); 207 | hwloc_topology_load(topo2); 208 | print_devices(topo2, HWLOC_OBJ_OS_DEVICE); 209 | hwloc_topology_destroy(topo2); 210 | printf("=====End ENV\n"); 211 | #endif 212 | 213 | printf("=====Begin root\n"); 214 | print_obj(hwloc_get_root_obj(topology), 0); 215 | printf("=====End root\n"); 216 | 217 | printf("=====Begin hwloc_restrict\n"); 218 | restr_topo_to_n_cores(topology, 4); 219 | print_topo_brief(topology); 220 | printf("=====End hwloc_restrict\n"); 221 | 222 | printf("=====Begin hwloc_distrib\n"); 223 | test_distrib(topology, 3); 224 | printf("=====End hwloc_distrib\n"); 225 | 226 | hwloc_topology_destroy(topology); 227 | 228 | return 0; 229 | } 230 | -------------------------------------------------------------------------------- /src/hwloc_utils.h: -------------------------------------------------------------------------------- 1 | /****************************************************** 2 | * Edgar A Leon 3 | * Lawrence Livermore National Laboratory 4 | ******************************************************/ 5 | #ifndef HWLOC_UTILS_H_INCLUDED 6 | #define HWLOC_UTILS_H_INCLUDED 7 | 8 | #include 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | int pci_busid_snprintf(char *buf, size_t size, hwloc_obj_t io); 15 | 16 | int gpu_uuid_snprintf(char *buf, size_t size, hwloc_obj_t dev); 17 | 18 | void tree_walk_io(void (*apply)(hwloc_obj_t, void*, int), 19 | hwloc_obj_t root, void *args, int depth); 20 | 21 | void print_obj_info(hwloc_obj_t obj); 22 | 23 | void print_obj(hwloc_obj_t obj, int indent); 24 | 25 | void print_devices(hwloc_topology_t topo, hwloc_obj_type_t type); 26 | 27 | void print_topo_brief(hwloc_topology_t topo); 28 | 29 | void print_topo_io(hwloc_topology_t topo); 30 | 31 | #ifdef __cplusplus 32 | } /* extern "C" */ 33 | #endif 34 | 35 | #endif // HWLOC_UTILS_H_INCLUDED -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Edgar A. Leon 3 | * Lawrence Livermore National Laboratory 4 | */ 5 | #include "mpibind.h" 6 | 7 | 8 | /* 9 | * Show the GPU mapping using various ID types. 10 | */ 11 | void howto_gpu_ids(mpibind_t *handle) 12 | { 13 | char **gpu_str; 14 | char str[128]; 15 | int i, j, k, ngpus; 16 | int ids[] = {MPIBIND_ID_PCIBUS, MPIBIND_ID_UNIV, 17 | MPIBIND_ID_SMI, MPIBIND_ID_NAME}; 18 | char *desc[] = { "PCI", "UUID", "VISDEVS", "NAME"}; 19 | int ntasks = mpibind_get_ntasks(handle); 20 | 21 | for (k=0; k 0) { 29 | printf("\tTask %d: ", i); 30 | for (j=0; j 0) 43 | printf("\tTask %d: %s\n", i, str); 44 | } 45 | 46 | 47 | /* 48 | * Show how to extract and use certain environment 49 | * variables related to affinity. 50 | */ 51 | void howto_env_vars(mpibind_t *handle) 52 | { 53 | /* Set the environment variables first */ 54 | mpibind_set_env_vars(handle); 55 | 56 | #if 0 57 | /* Take a comprehensive look */ 58 | mpibind_env_vars_print(handle); 59 | #else 60 | int i, nvars; 61 | char **names; 62 | char **values; 63 | int ntasks = mpibind_get_ntasks(handle); 64 | 65 | /* Get the names of the environment variables */ 66 | names = mpibind_get_env_var_names(handle, &nvars); 67 | printf("Environment variables:\n"); 68 | for (i=0; i 1) 87 | ntasks = atoi(argv[1]); 88 | 89 | mpibind_t *handle; 90 | mpibind_init(&handle); 91 | 92 | /* User input */ 93 | mpibind_set_ntasks(handle, ntasks); 94 | //mpibind_set_nthreads(handle, 3); 95 | //mpibind_set_greedy(handle, 0); 96 | mpibind_set_gpu_optim(handle, 0); 97 | //mpibind_set_smt(handle, 2); 98 | //params.restr_type = MEM; 99 | //mpibind_set_restrict_type(handle, MPIBIND_RESTRICT_CPU); 100 | //params.restr_set = "24-29,72-77,36-41,84-89"; 101 | //params.restr_set = "24-35,72-83"; 102 | //params.restr_set = "4-6"; 103 | //params.restr_set = "8"; 104 | //mpibind_set_restrict_ids(handle, "24-35,72-83"); 105 | //mpibind_set_restrict_ids(handle, "0-11,24-47"); 106 | 107 | 108 | /* Use an hwloc topology file */ 109 | #if 0 110 | char xml[] = "../../hwloc/flash-v100.xml"; 111 | hwloc_topology_t etopo; 112 | if (hwloc_topology_init(&etopo) < 0) { 113 | fprintf(stderr, "hwloc_topology_init failed\n"); 114 | return 0; 115 | } 116 | if (hwloc_topology_set_xml(etopo, xml) < 0) { 117 | fprintf(stderr, "hwloc_topology_set_xml(%s) failed\n", xml); 118 | return 0; 119 | } 120 | if (mpibind_filter_topology(etopo) < 0) { 121 | fprintf(stderr, "mpibind_filter_topology failed\n"); 122 | return 0; 123 | } 124 | if (hwloc_topology_load(etopo) < 0) { 125 | fprintf(stderr, "hwloc_topology_load failed"); 126 | return 0; 127 | } 128 | mpibind_set_topology(handle, etopo); 129 | #endif 130 | 131 | 132 | /* Get the mapping */ 133 | if ( mpibind(handle) ) 134 | return 1; 135 | 136 | /* Get the hwloc topology to parse the hwloc_bitmaps */ 137 | hwloc_topology_t topo; 138 | topo = mpibind_get_topology(handle); 139 | 140 | /* Verbose mapping */ 141 | //Specify the type of GPU IDs to use 142 | mpibind_set_gpu_ids(handle, MPIBIND_ID_SMI); 143 | mpibind_mapping_print(handle); 144 | 145 | /* Test popping CPUs/cores */ 146 | //mpibind_pop_cpus_ptask(handle, 2, 4); 147 | //mpibind_pop_cores_ptask(handle, 1, 3); 148 | //mpibind_mapping_print(handle); 149 | 150 | int ngpus = mpibind_get_num_gpus(handle); 151 | printf("There are %d GPUs\n", ngpus); 152 | if (ngpus > 0) 153 | /* Example using various GPU IDs */ 154 | howto_gpu_ids(handle); 155 | 156 | /* Example using affinity environment variables */ 157 | howto_env_vars(handle); 158 | 159 | /* Clean up */ 160 | mpibind_finalize(handle); 161 | 162 | /* Last clean up activity: destroy the topology */ 163 | hwloc_topology_destroy(topo); 164 | 165 | return 0; 166 | } 167 | -------------------------------------------------------------------------------- /src/manual.mk: -------------------------------------------------------------------------------- 1 | 2 | UNAME = $(shell uname) 3 | BASIC_CFLAGS = -Wall -Werror 4 | 5 | HWLOC_CFLAGS = $(shell pkg-config --cflags hwloc) 6 | HWLOC_LDLIBS = $(shell pkg-config --libs hwloc) 7 | 8 | # Building libmpibind 9 | VER = 1 10 | MIN = 0 11 | REL = 1 12 | MPIBIND_LIB = libmpibind.so 13 | MPIBIND_SONAME = $(MPIBIND_LIB).$(VER) 14 | MPIBIND_FNAME = $(MPIBIND_SONAME).$(MIN).$(REL) 15 | 16 | # Using libmpibind 17 | MPIBIND_DIR = $(shell pwd) 18 | MPIBIND_CFLAGS = -I$(MPIBIND_DIR) $(HWLOC_CFLAGS) 19 | MPIBIND_LDLIBS = -L$(MPIBIND_DIR) -lmpibind $(HWLOC_LDLIBS) 20 | ifeq ($(UNAME),Linux) 21 | MPIBIND_LDLIBS += -Wl,-rpath=$(MPIBIND_DIR) 22 | endif 23 | 24 | 25 | PROGS = main 26 | 27 | ## HWLOC_XML_VERBOSE=1 28 | ## HWLOC_XMLFILE=../topo-xml/coral-lassen.xml ./main 29 | 30 | all: $(PROGS) $(MPIBIND_SONAME) $(MPIBIND_LIB) 31 | 32 | hwloc_tests: hwloc_tests.c 33 | $(CC) $(BASIC_CFLAGS) $(HWLOC_CFLAGS) $@.c $(HWLOC_LDLIBS) -o $@ 34 | 35 | dev_tests: dev_tests.c mpibind.h 36 | $(CC) $(BASIC_CFLAGS) $(HWLOC_CFLAGS) $@.c $(HWLOC_LDLIBS) -o $@ 37 | 38 | main: main.c $(MPIBIND_LIB) 39 | $(CC) $@.c $(BASIC_CFLAGS) $(MPIBIND_CFLAGS) -o $@ $(MPIBIND_LDLIBS) 40 | 41 | # Todo 42 | #check: 43 | #install: 44 | 45 | $(MPIBIND_SONAME): $(MPIBIND_FNAME) 46 | ln -s -f $< $@ 47 | 48 | $(MPIBIND_LIB): $(MPIBIND_FNAME) 49 | ln -s -f $< $@ 50 | 51 | $(MPIBIND_FNAME): mpibind.o 52 | ifeq ($(UNAME),Linux) 53 | $(CC) -shared -Wl,-soname,$(MPIBIND_SONAME) -o $@ $< $(HWLOC_LDLIBS) 54 | else 55 | $(CC) -shared -o $@ $< $(HWLOC_LDLIBS) 56 | endif 57 | 58 | mpibind.o: mpibind.c mpibind.h mpibind-priv.h 59 | $(CC) -fPIC $(BASIC_CFLAGS) $(HWLOC_CFLAGS) -c mpibind.c 60 | 61 | 62 | clean: 63 | rm -f $(PROGS) $(MPIBIND_LIB) $(MPIBIND_SONAME) $(MPIBIND_FNAME) *.dSYM *.o *~ 64 | -------------------------------------------------------------------------------- /src/mpibind-priv.h: -------------------------------------------------------------------------------- 1 | /****************************************************** 2 | * Edgar A Leon 3 | * Lawrence Livermore National Laboratory 4 | ******************************************************/ 5 | #ifndef MPIBIND_PRIV_H_INCLUDED 6 | #define MPIBIND_PRIV_H_INCLUDED 7 | 8 | #define SHORT_STR_SIZE 32 9 | #define LONG_STR_SIZE 1024 10 | 11 | #define PCI_BUSID_LEN 16 12 | #define UUID_LEN 64 13 | #define MAX_IO_DEVICES 128 14 | #define MAX_CPUS_PER_TASK 1024 15 | 16 | #define VERBOSE 0 17 | #define DEBUG 0 18 | #define OUT_STREAM stderr 19 | 20 | #define ERR_MSG(whatstr) \ 21 | do { \ 22 | fprintf(stderr, "%s failed: %s.\n", __func__, (whatstr)); \ 23 | } while (0) 24 | 25 | #define PRINT(...) fprintf(stderr, __VA_ARGS__) 26 | 27 | 28 | /* 29 | * An environment variable with one value per task 30 | */ 31 | typedef struct { 32 | int size; 33 | char *name; 34 | char **values; 35 | } mpibind_env_var; 36 | 37 | /* 38 | * The type of I/O devices 39 | */ 40 | enum { 41 | DEV_GPU, 42 | DEV_NIC, 43 | }; 44 | 45 | /* 46 | * The various I/O device IDs. 47 | * GPU devices are different from other I/O devices 48 | * by having visdevs (and optionally smi) set 49 | * to a non-negative integer. 50 | */ 51 | struct device { 52 | char name[SHORT_STR_SIZE]; // Device name 53 | char pci[PCI_BUSID_LEN]; // PCI bus ID 54 | char univ[UUID_LEN]; // Universally unique ID 55 | hwloc_obj_t ancestor; // First (smallest) non-I/O ancestor object 56 | int type; // Type of I/O device, e.g., DEV_GPU 57 | int vendor_id; // Device vendor 58 | /* GPU specific */ 59 | int smi; // System management ID (RSMI and NVML) 60 | char vendor[SHORT_STR_SIZE]; // Vendor of GPU/COPROC devices 61 | char model[SHORT_STR_SIZE]; // Model of GPU/COPROC devices 62 | }; 63 | 64 | /* 65 | * The mpibind handle 66 | */ 67 | struct mpibind_t { 68 | /* Input parameters */ 69 | int ntasks; 70 | int in_nthreads; 71 | int greedy; 72 | int gpu_optim; 73 | int smt; 74 | char *restr_set; 75 | int restr_type; 76 | 77 | /* Input/Output parameters */ 78 | hwloc_topology_t topo; 79 | 80 | /* Output parameters */ 81 | int *nthreads; 82 | hwloc_bitmap_t *cpus; 83 | hwloc_bitmap_t *gpus; 84 | char ***gpus_usr; 85 | int **cpus_usr; 86 | 87 | /* Environment variables */ 88 | int nvars; 89 | char **names; 90 | mpibind_env_var *env_vars; 91 | 92 | /* IDs of I/O devices */ 93 | int ndevs; 94 | struct device **devs; 95 | }; 96 | 97 | 98 | #endif // MPIBIND_PRIV_H_INCLUDED 99 | -------------------------------------------------------------------------------- /src/simple.mk: -------------------------------------------------------------------------------- 1 | 2 | CFLAGS += -Wall -Werror 3 | CFLAGS += $(shell pkg-config --cflags hwloc) 4 | LDLIBS += $(shell pkg-config --libs hwloc) 5 | 6 | PROGS = dev_tests main 7 | 8 | OBJS = mpibind_v2.1.o 9 | 10 | ## HWLOC_XML_VERBOSE=1 11 | ## HWLOC_XMLFILE=topo-xml/lassen-hw2.xml 12 | ## ./mpibind_v0.14.1 13 | 14 | all: $(PROGS) 15 | 16 | main: $(OBJS) 17 | 18 | $(OBJS): mpibind.h 19 | 20 | clean: 21 | rm -f $(PROGS) *.o *~ 22 | -------------------------------------------------------------------------------- /test-suite/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CPPFLAGS = -Wall -Werror -I$(top_srcdir)/src $(HWLOC_CFLAGS) $(TAP_CFLAGS) 2 | AM_LDFLAGS = -rpath $(TAP_LIBDIR) 3 | LDADD = $(top_srcdir)/src/libmpibind.la $(TAP_LIBS) $(HWLOC_LIBS) 4 | 5 | AM_TESTS_ENVIRONMENT = \ 6 | export PYTHONPATH=:"$(top_srcdir)/python:$(PYTHON_SITE_PKG):$$PYTHONPATH"; 7 | 8 | TEST_EXTENSIONS = .t .py 9 | 10 | T_LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) \ 11 | $(top_srcdir)/config/tap-driver.sh 12 | 13 | PY_LOG_DRIVER = $(PYTHON) $(top_srcdir)/config/tap-driver.py 14 | 15 | coral_lassen_t_SOURCES = coral-lassen.c test_utils.c test_utils.h 16 | epyc_corona_t_SOURCES = epyc-corona.c test_utils.c test_utils.h 17 | coral_ea_t_SOURCES = coral-ea.c test_utils.c test_utils.h 18 | cts1_quartz_t_SOURCES = cts1-quartz.c test_utils.c test_utils.h 19 | error_t_SOURCES = error.c test_utils.c test_utils.h 20 | environment_t_SOURCES = environment.c test_utils.c test_utils.h 21 | 22 | # Fix to make tests work on macOS: 23 | # The tap library path is not set correctly in the executable. 24 | # Statement must terminate with a semicolon. 25 | if HAVE_DARWIN_OS 26 | AM_TESTS_ENVIRONMENT += \ 27 | if [[ ! $@ =~ python ]]; then \ 28 | echo "Executing dylib fix for `echo $@ | sed 's/\.log/\.t/'`"; \ 29 | install_name_tool -change libtap.dylib \ 30 | $(TAP_LIBDIR)/libtap.dylib .libs/`echo $@ | sed 's/\.log/\.t/'`; fi; 31 | endif 32 | 33 | C_TESTS = \ 34 | error.t \ 35 | environment.t \ 36 | coral_lassen.t \ 37 | epyc_corona.t \ 38 | coral_ea.t \ 39 | cts1_quartz.t 40 | 41 | PYTHON_TESTS = \ 42 | python/py-coral-ea.py \ 43 | python/py-coral-lassen.py \ 44 | python/py-cts1-quartz.py \ 45 | python/py-epyc-corona.py 46 | 47 | if HAVE_LIBTAP 48 | TESTS = $(C_TESTS) 49 | check_PROGRAMS = $(C_TESTS) 50 | endif 51 | 52 | if HAVE_CFFI 53 | if HAVE_PYCOTAP 54 | check_SCRIPTS = $(PYTHON_TESTS) 55 | if HAVE_LIBTAP 56 | TESTS += $(PYTHON_TESTS) 57 | else 58 | TESTS = $(PYTHON_TESTS) 59 | endif 60 | endif 61 | endif 62 | -------------------------------------------------------------------------------- /test-suite/README.md: -------------------------------------------------------------------------------- 1 | # mpibind tests 2 | 3 | The current iteration of the test suite is designed to generate a set of tests 4 | based on a given topology, then compare the resultant mappings to a file that 5 | defines expected output (see `expected` directory). Generating the tests 6 | involves gathering basic information about a topology, and using that 7 | information to tweak each test to be suitable for the topology. 8 | 9 | An example of the answers file is below: 10 | 11 | ``` 12 | # Line that start with a pound are comments! 13 | # Each answer description consist of 6 lines: 14 | # 1. A comment with the test number 15 | # 2. A comment describing the parameters used for the test in JSON format 16 | # 3. The test description 17 | # 4. the thread mapping 18 | # 5. the cpu mapping 19 | # 6. the gpu_mapping 20 | # The mapping for each task is separated by a defined character. 21 | # This separator can be changed in test_utils.c::parse_answer() 22 | 23 | # 1: 24 | # {"params": {"ntasks": 40, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 25 | Map one task to every core 26 | "1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1" 27 | "8;12;16;20;24;28;32;36;40;44;48;52;56;60;64;68;72;76;80;84;96;100;104;108;112;116;120;124;128;132;136;140;144;148;152;156;160;164;168;172" 28 | "0;0;0;0;0;0;0;0;0;0;1;1;1;1;1;1;1;1;1;1;2;2;2;2;2;2;2;2;2;2;3;3;3;3;3;3;3;3;3;3" 29 | 30 | ``` 31 | 32 | The Python tests parse the params comment to initialize mpibind handles correctly. 33 | 34 | ## Test details 35 | 36 | 1. Valid mpibind configurations 37 | * Map one task to every core 38 | * Map one task greedily 39 | * Map two tasks greedily 40 | * Mapping such that ntasks < #NUMA nodes but nworkers > #NUMA nodes (this makes sure mpibind accounts * for the number of threads as well 41 | * Restrict x tasks a single core (x == machine's smt level) 42 | * Map two tasks at smt 1 43 | * Map two tasks at smt (max_smt - 1) 44 | * Map two tasks, but restrict them to a single NUMA domain 45 | * Map number_numas tasks without GPU optimization 46 | * Map number_numas tasks with GPU optimization 47 | * Map 8 tasks to a single PU 48 | 2. Error checking 49 | * Passing NULL in place of the handle to all of the setter and getter functions. 50 | * Trying to run mpibind with an invalid number of threads (e.g. -1) 51 | * Trying to run mpibind with an invalid number of tasks (e.g. -1) 52 | * Trying to run mpibind with an invalid SMT level (e.g. -1 or 8 on a machine with SMT-4 53 | 3. Environment Varibles 54 | * Check that AMD and NVIDIA gpus can be properly detected 55 | * Check that the OMP_PLACES variable is formatted correctly 56 | 57 | ## Debugging 58 | 59 | The tests are fired off by running `make check` from the top directory. One can 60 | use `V=1` to show the verbose compilation lines and `VERBOSE=1` to show any 61 | libtap error(s). 62 | ``` 63 | make V=1 VERBOSE=1 check 64 | ``` 65 | 66 | The .t files are libtool scripts that call the `.libs/.h` binaries. 67 | 68 | The expected mappings are in the `expected` directory. -------------------------------------------------------------------------------- /test-suite/coral-ea.c: -------------------------------------------------------------------------------- 1 | #include "test_utils.h" 2 | 3 | int main(int argc, char **argv) { 4 | plan(NO_PLAN); 5 | 6 | char* topology_file = "../topo-xml/coral-ea-hwloc1.xml"; 7 | char* answer_file = "./expected/expected.coral-ea"; 8 | unit_test_topology(topology_file, answer_file); 9 | 10 | done_testing(); 11 | return (0); 12 | } 13 | -------------------------------------------------------------------------------- /test-suite/coral-lassen.c: -------------------------------------------------------------------------------- 1 | #include "test_utils.h" 2 | 3 | int main(int argc, char **argv) { 4 | plan(NO_PLAN); 5 | 6 | char* topology_file = "../topo-xml/coral-lassen.xml"; 7 | char* answer_file = "./expected/expected.coral-lassen"; 8 | unit_test_topology(topology_file, answer_file); 9 | 10 | done_testing(); 11 | return (0); 12 | } 13 | -------------------------------------------------------------------------------- /test-suite/cts1-quartz.c: -------------------------------------------------------------------------------- 1 | #include "test_utils.h" 2 | 3 | int main(int argc, char **argv) { 4 | plan(NO_PLAN); 5 | 6 | char* topology_file = "../topo-xml/cts1-quartz-smt1.xml"; 7 | char* answer_file = "./expected/expected.cts1-quartz"; 8 | unit_test_topology(topology_file, answer_file); 9 | 10 | done_testing(); 11 | return (0); 12 | } 13 | -------------------------------------------------------------------------------- /test-suite/environment.c: -------------------------------------------------------------------------------- 1 | #include "test_utils.h" 2 | 3 | static void check_amd_env() { 4 | mpibind_t* handle; 5 | hwloc_topology_t topo; 6 | 7 | load_topology(&topo, "../topo-xml/epyc-corona.xml"); 8 | 9 | mpibind_init(&handle); 10 | mpibind_set_topology(handle, topo); 11 | mpibind_set_ntasks(handle, 1); 12 | mpibind_set_gpu_optim(handle, 1); 13 | mpibind_set_greedy(handle, 1); 14 | 15 | mpibind(handle); 16 | 17 | mpibind_set_env_vars(handle); 18 | //ok(mpibind_get_gpu_type(handle) == MPIBIND_GPU_AMD, 19 | // "mpibind correctly identifies AMD gpus"); 20 | 21 | int num, i, idx = -1; 22 | char** env_var_names = mpibind_get_env_var_names(handle, &num); 23 | 24 | for (i = 0; i < num; i++) { 25 | if (!strcmp(env_var_names[i], "ROCR_VISIBLE_DEVICES")) { 26 | idx = i; 27 | } 28 | } 29 | 30 | ok(idx >= 0, "GPU variable is correct"); 31 | 32 | mpibind_finalize(handle); 33 | hwloc_topology_destroy(topo); 34 | } 35 | 36 | static void check_omp_places() { 37 | mpibind_t* handle; 38 | hwloc_topology_t topo; 39 | 40 | load_topology(&topo, "../topo-xml/epyc-corona.xml"); 41 | 42 | mpibind_init(&handle); 43 | mpibind_set_topology(handle, topo); 44 | mpibind_set_ntasks(handle, 1); 45 | mpibind_set_gpu_optim(handle, 0); 46 | mpibind_set_smt(handle, 1); 47 | mpibind_set_greedy(handle, 1); 48 | mpibind_set_restrict_type(handle, MPIBIND_RESTRICT_CPU); 49 | mpibind_set_restrict_ids(handle, "6-11"); 50 | 51 | mpibind(handle); 52 | 53 | mpibind_set_env_vars(handle); 54 | 55 | int num, i, rc = -1; 56 | char** env_var_names = mpibind_get_env_var_names(handle, &num); 57 | char** env_var_values; 58 | 59 | for (i = 0; i < num; i++) { 60 | if (!strcmp(env_var_names[i], "OMP_PLACES")) { 61 | env_var_values = mpibind_get_env_var_values(handle, "OMP_PLACES"); 62 | rc = is(env_var_values[0], "{6},{7},{8},{9},{10},{11}", 63 | "Checking OMP_PLACES mapping"); 64 | break; 65 | } 66 | } 67 | 68 | if (rc == -1) { 69 | fail("failed to find OMP_PLACES variable"); 70 | } 71 | 72 | mpibind_finalize(handle); 73 | hwloc_topology_destroy(topo); 74 | } 75 | 76 | static void check_nvidia_env() { 77 | mpibind_t* handle; 78 | hwloc_topology_t topo; 79 | 80 | load_topology(&topo, "../topo-xml/coral-lassen.xml"); 81 | 82 | mpibind_init(&handle); 83 | mpibind_set_topology(handle, topo); 84 | mpibind_set_ntasks(handle, 1); 85 | mpibind_set_gpu_optim(handle, 1); 86 | mpibind_set_greedy(handle, 1); 87 | 88 | mpibind(handle); 89 | 90 | mpibind_set_env_vars(handle); 91 | //ok(mpibind_get_gpu_type(handle) == MPIBIND_GPU_NVIDIA, 92 | // "mpibind correctly identifies NVIDIA gpus"); 93 | 94 | int num, i, idx = -1; 95 | char** env_var_names = mpibind_get_env_var_names(handle, &num); 96 | for (i = 0; i < num; i++) { 97 | if (!strcmp(env_var_names[i], "CUDA_VISIBLE_DEVICES")) { 98 | idx = i; 99 | } 100 | } 101 | ok(idx >= 0, "GPU variable is correct"); 102 | 103 | mpibind_finalize(handle); 104 | hwloc_topology_destroy(topo); 105 | } 106 | 107 | int main(int argc, char** argv) { 108 | plan(NO_PLAN); 109 | 110 | check_amd_env(); 111 | check_nvidia_env(); 112 | check_omp_places(); 113 | 114 | done_testing(); 115 | return (0); 116 | } 117 | -------------------------------------------------------------------------------- /test-suite/epyc-corona.c: -------------------------------------------------------------------------------- 1 | #include "test_utils.h" 2 | 3 | int main(int argc, char **argv) { 4 | plan(NO_PLAN); 5 | 6 | char* topology_file = "../topo-xml/epyc-corona.xml"; 7 | char* answer_file = "./expected/expected.epyc-corona"; 8 | unit_test_topology(topology_file, answer_file); 9 | 10 | done_testing(); 11 | return (0); 12 | } 13 | -------------------------------------------------------------------------------- /test-suite/error.c: -------------------------------------------------------------------------------- 1 | #include "test_utils.h" 2 | #define XML_PATH "../topo-xml/coral-lassen.xml" 3 | 4 | /**Test passing null to all setters and getter functions**/ 5 | int test_null_handle() { 6 | diag("Testing passing a null handle to setters and getters"); 7 | mpibind_t *handle = NULL; 8 | int count; //for mpibind_get_env_var_names 9 | 10 | ok(mpibind_set_ntasks(handle, 4) == 1, 11 | "mpibind_set_ntasks fails when handle == NULL"); 12 | ok(mpibind_set_nthreads(handle, 4) == 1, 13 | "mpibind_set_nthreads fails when handle == NULL"); 14 | ok(mpibind_set_greedy(handle, 1) == 1, 15 | "mpibind_set_greedy fails when handle == NULL"); 16 | ok(mpibind_set_gpu_optim(handle, 1) == 1, 17 | "mpibind_set_gpu_optim fails when handle == NULL"); 18 | ok(mpibind_set_smt(handle, 1) == 1, 19 | "mpibind_set_smt fails when handle == NULL"); 20 | ok(mpibind_set_restrict_ids(handle, NULL) == 1, 21 | "mpibind_set_restrict_ids fails when handle == NULL"); 22 | ok(mpibind_set_restrict_type(handle, 1) == 1, 23 | "mpibind_set_restrict_type fails when handle == NULL"); 24 | ok(mpibind_set_topology(handle, NULL) == 1, 25 | "mpibind_set_topology fails when handle == NULL"); 26 | ok(mpibind_set_env_vars(handle) == 1, 27 | "mpibind_set_end_vars fails when handle == NULL"); 28 | 29 | ok(mpibind_get_ntasks(handle) == -1, 30 | "mpibind_get_ntasks return -1 when handle == NULL"); 31 | ok(mpibind_get_greedy(handle) == -1, 32 | "mpibind_get_greedy return -1 when handle == NULL"); 33 | ok(mpibind_get_gpu_optim(handle) == -1, 34 | "mpibind_get_gpu_optim return -1 when handle == NULL"); 35 | ok(mpibind_get_smt(handle) == -1, 36 | "mpibind_get_smt return -1 when handle == NULL"); 37 | ok(mpibind_get_restrict_ids(handle) == NULL, 38 | "mpibind_get_ntasks return NULL when handle == NULL"); 39 | ok(mpibind_get_restrict_type(handle) == -1, 40 | "mpibind_get_restrict_type return -1 when handle == NULL"); 41 | 42 | ok(mpibind_get_nthreads(handle) == NULL, 43 | "mpibind_get_nthreads returns NULL when handle == NULL"); 44 | ok(mpibind_get_cpus(handle) == NULL, 45 | "mpibind_get_cpus returns NULL when handle == NULL"); 46 | ok(mpibind_get_gpus(handle) == NULL, 47 | "mpibind_get_gpus returns NULL when handle == NULL"); 48 | //ok(mpibind_get_gpu_type(handle) == -1, 49 | // "mpibind_get_gpu_type returns NULL when handle == NULL"); 50 | ok(mpibind_get_topology(handle) == NULL, 51 | "mpibind_get_topology returns NULL when handle == NULL"); 52 | ok(mpibind_get_env_var_values(handle, NULL) == NULL, 53 | "mpibind_get_env_var_values returns NULL when handle == NULL"); 54 | ok(mpibind_get_env_var_names(handle, &count) == NULL, 55 | "mpibind_get_env_var_names returns NULL when handle == NULL"); 56 | ok(mpibind_finalize(handle) == 1, 57 | "mpibind_finalize fails when handle == NULL"); 58 | 59 | return 0; 60 | } 61 | 62 | int test_mpibind_errors() { 63 | mpibind_t *handle; 64 | hwloc_topology_t topo; 65 | 66 | // setup topology 67 | hwloc_topology_init(&topo); 68 | hwloc_topology_set_xml(topo, XML_PATH); 69 | hwloc_topology_set_all_types_filter(topo, HWLOC_TYPE_FILTER_KEEP_STRUCTURE); 70 | hwloc_topology_set_type_filter(topo, HWLOC_OBJ_OS_DEVICE, 71 | HWLOC_TYPE_FILTER_KEEP_IMPORTANT); 72 | hwloc_topology_load(topo); 73 | 74 | mpibind_init(&handle); 75 | 76 | int ntasks = 5; 77 | mpibind_set_ntasks(handle, ntasks); 78 | mpibind_set_nthreads(handle, 4); 79 | mpibind_set_greedy(handle, 0); 80 | mpibind_set_gpu_optim(handle, 0); 81 | mpibind_set_smt(handle, 2); 82 | 83 | diag("Testing error handling in mpibind()"); 84 | 85 | mpibind_set_nthreads(handle, -4); 86 | ok(mpibind(handle) == 1, "Mapping fails if nthreads is invalid"); 87 | 88 | mpibind_set_nthreads(handle, 4); 89 | mpibind_set_ntasks(handle, -1); 90 | ok(mpibind(handle) == 1, "Mapping fails if ntasks is invalid"); 91 | 92 | mpibind_set_ntasks(handle, 4); 93 | mpibind_set_smt(handle, -1); 94 | ok(mpibind(handle) == 1, "Mapping fails if smt is invalid"); 95 | 96 | mpibind_set_smt(handle, 16); 97 | ok(mpibind(handle) == 1, "Mapping fails if smt is valid but too high"); 98 | 99 | // TODO: ERROR CODES RELATED TO RESTRICT SETS 100 | todo("Error codes related to restrict sets"); 101 | return 0; 102 | } 103 | 104 | int main(int argc, char **argv) { 105 | plan(NO_PLAN); 106 | test_null_handle(); 107 | test_mpibind_errors(); 108 | done_testing(); 109 | return (0); 110 | } 111 | -------------------------------------------------------------------------------- /test-suite/expected/expected.coral-ea: -------------------------------------------------------------------------------- 1 | # Line that start with a pound are comments! 2 | # Each answer description consist of 6 lines: 3 | # 1. A comment with the test number 4 | # 2. A comment describing the parameters used for the test in JSON format 5 | # 3. The test description 6 | # 4. the thread mapping 7 | # 5. the cpu mapping 8 | # 6. the gpu_mapping 9 | # The mapping for each task is separated by a defined character. 10 | # This separator can be changed in test_utils.c::parse_answer() 11 | 12 | 13 | # 1: 14 | # {"params": {"ntasks": 20, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 15 | Map one task to every core 16 | "1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1" 17 | "0;8;16;24;32;40;48;56;64;72;80;88;96;104;112;120;128;136;144;152" 18 | ";;;;;;;;;;;;;;;;;;;" 19 | 20 | # 2: 21 | # {"params": {"ntasks": 1, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 22 | Map one task greedily 23 | "160" 24 | "0-159" 25 | "" 26 | 27 | 28 | # 3: 29 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 30 | Map two tasks greedily 31 | "10;10" 32 | "0,8,16,24,32,40,48,56,64,72;80,88,96,104,112,120,128,136,144,152" 33 | ";" 34 | 35 | # 4: 36 | # {"params": {"ntasks": 1, "in_nthreads": 4, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 37 | Map such that ntasks < #NUMA nodes but nworkers > #NUMA nodes 38 | "160" 39 | "0-159" 40 | "" 41 | 42 | 43 | # 5: 44 | # {"params": {"ntasks": 8, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": "0-7", "restrict_type": 0}} 45 | Restrict x tasks a single core (x == machine's smt level) 46 | "1;1;1;1;1;1;1;1" 47 | "0;1;2;3;4;5;6;7" 48 | ";;;;;;;" 49 | 50 | 51 | # 6: 52 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 1, "restr_set": null, "restrict_type": 0}} 53 | Map two tasks at SMT 1 54 | "10;10" 55 | "0,8,16,24,32,40,48,56,64,72;80,88,96,104,112,120,128,136,144,152" 56 | ";" 57 | 58 | 59 | # 7: 60 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 8, "restr_set": null, "restrict_type": 0}} 61 | Map two tasks at max smt (across all cores) 62 | "80;80" 63 | "0-79;80-159" 64 | ";" 65 | 66 | 67 | # 8: 68 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 7, "restr_set": null, "restrict_type": 0}} 69 | Map two tasks at max smt-1 70 | "70;70" 71 | "0-6,8-14,16-22,24-30,32-38,40-46,48-54,56-62,64-70,72-78;80-86,88-94,96-102,104-110,112-118,120-126,128-134,136-142,144-150,152-158" 72 | ";" 73 | 74 | 75 | # 9: 76 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": "0", "restrict_type": 1}} 77 | Map two tasks, but restrict them to a single NUMA domain 78 | "5;5" 79 | "0,8,16,24,32;40,48,56,64,72" 80 | ";" 81 | 82 | 83 | # 10: 84 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 0, "smt": 0, "restr_set": null, "restrict_type": 0}} 85 | Map num_numa tasks without GPU optimization 86 | "10;10" 87 | "0,8,16,24,32,40,48,56,64,72;80,88,96,104,112,120,128,136,144,152" 88 | ";" 89 | 90 | 91 | # 11: 92 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 93 | Map num_numa tasks with GPU optimization 94 | "10;10" 95 | "0,8,16,24,32,40,48,56,64,72;80,88,96,104,112,120,128,136,144,152" 96 | ";" 97 | 98 | 99 | # 12: 100 | # {"params": {"ntasks": 8, "in_nthreads": 0, "greedy": 1, "gpu_optim": 0, "smt": 0, "restr_set": "0", "restrict_type": 0}} 101 | Map eight tasks to a single pu 102 | "1;1;1;1;1;1;1;1" 103 | "0;0;0;0;0;0;0;0" 104 | ";;;;;;;" 105 | -------------------------------------------------------------------------------- /test-suite/expected/expected.coral-lassen: -------------------------------------------------------------------------------- 1 | # Line that start with a pound are comments! 2 | # Each answer description consist of 6 lines: 3 | # 1. A comment with the test number 4 | # 2. A comment describing the parameters used for the test in JSON format 5 | # 3. The test description 6 | # 4. the thread mapping 7 | # 5. the cpu mapping 8 | # 6. the gpu_mapping 9 | # The mapping for each task is separated by a defined character. 10 | # This separator can be changed in test_utils.c::parse_answer() 11 | 12 | 13 | 14 | # 1: 15 | # {"params": {"ntasks": 40, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 16 | Map one task to every core 17 | "1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1" 18 | "8;12;16;20;24;28;32;36;40;44;48;52;56;60;64;68;72;76;80;84;96;100;104;108;112;116;120;124;128;132;136;140;144;148;152;156;160;164;168;172" 19 | "0;0;0;0;0;0;0;0;0;0;1;1;1;1;1;1;1;1;1;1;2;2;2;2;2;2;2;2;2;2;3;3;3;3;3;3;3;3;3;3" 20 | 21 | 22 | # 2: 23 | # {"params": {"ntasks": 1, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 24 | Map one task greedily 25 | "160" 26 | "8-87,96-175" 27 | "0,1,2,3" 28 | 29 | 30 | # 3: 31 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 32 | Map two tasks greedily 33 | "20;20" 34 | "8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84;96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172" 35 | "0,1;2,3" 36 | 37 | 38 | # 4: 39 | # {"params": {"ntasks": 1, "in_nthreads": 4, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 40 | Map such that ntasks < #NUMA nodes but nworkers > #NUMA nodes 41 | "160" 42 | "8-87,96-175" 43 | "0,1,2,3" 44 | 45 | 46 | # 5: 47 | # {"params": {"ntasks": 4, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": "8-11", "restrict_type": 0}} 48 | Restrict x tasks a single core (x == machine's smt level) 49 | "1;1;1;1" 50 | "8;9;10;11" 51 | "0;0;1;1" 52 | 53 | 54 | # 6: 55 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 1, "restr_set": null, "restrict_type": 0}} 56 | Map two tasks at SMT 1 57 | "20;20" 58 | "8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84;96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172" 59 | "0,1;2,3" 60 | 61 | 62 | # 7: 63 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 4, "restr_set": null, "restrict_type": 0}} 64 | Map two tasks at max smt (across all cores) 65 | "80;80" 66 | "8-87;96-175" 67 | "0,1;2,3" 68 | 69 | 70 | # 8: 71 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 3, "restr_set": null, "restrict_type": 0}} 72 | Map two tasks at max smt-1 73 | "60;60" 74 | "8-10,12-14,16-18,20-22,24-26,28-30,32-34,36-38,40-42,44-46,48-50,52-54,56-58,60-62,64-66,68-70,72-74,76-78,80-82,84-86;96-98,100-102,104-106,108-110,112-114,116-118,120-122,124-126,128-130,132-134,136-138,140-142,144-146,148-150,152-154,156-158,160-162,164-166,168-170,172-174" 75 | "0,1;2,3" 76 | 77 | 78 | # 9: 79 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": "0", "restrict_type": 1}} 80 | Map two tasks, but restrict them to a single NUMA domain 81 | "10;10" 82 | "8,12,16,20,24,28,32,36,40,44;48,52,56,60,64,68,72,76,80,84" 83 | "0;1" 84 | 85 | 86 | # 10: 87 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 0, "smt": 0, "restr_set": null, "restrict_type": 0}} 88 | Map num_numa tasks without GPU optimization 89 | "20;20" 90 | "8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84;96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172" 91 | "0,1;2,3" 92 | 93 | 94 | # 11: 95 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 96 | Map num_numa tasks with GPU optimization 97 | "20;20" 98 | "8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84;96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172" 99 | "0,1;2,3" 100 | 101 | 102 | # 12: 103 | # {"params": {"ntasks": 8, "in_nthreads": 0, "greedy": 1, "gpu_optim": 0, "smt": 0, "restr_set": "8", "restrict_type": 0}} 104 | Map eight tasks to a single pu 105 | "1;1;1;1;1;1;1;1" 106 | "8;8;8;8;8;8;8;8" 107 | "0;0;0;0;1;1;1;1" 108 | -------------------------------------------------------------------------------- /test-suite/expected/expected.coral-lassen.v1: -------------------------------------------------------------------------------- 1 | # Line that start with a pound are comments! 2 | # The first non-commented line should be the number of tests. 3 | # After the number of tests, each answer description consist of 4 lines: 4 | # The test description, the thread mapping, the cpu mapping, and the 5 | # gpu_mapping.The mapping for each task is separated by a defined character. 6 | # This separator can be changed in test_utils.c::parse_answer() 7 | 8 | 9 | # 1: Map one task to every core 10 | Map one task to every core 11 | "1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1" 12 | "8;12;16;20;24;28;32;36;40;44;48;52;56;60;64;68;72;76;80;84;96;100;104;108;112;116;120;124;128;132;136;140;144;148;152;156;160;164;168;172" 13 | "0;0;0;0;0;0;0;0;0;0;1;1;1;1;1;1;1;1;1;1;2;2;2;2;2;2;2;2;2;2;3;3;3;3;3;3;3;3;3;3" 14 | 15 | 16 | # 2: Map 1 task greedily 17 | Map 1 task greedily 18 | "160" 19 | "8-87,96-175" 20 | "0-3" 21 | 22 | 23 | # 3: Map two tasks greedily 24 | Map two tasks greedily 25 | "20;20" 26 | "8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84;96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172" 27 | "0-1;2-3" 28 | 29 | # 4: 30 | Mapping such that ntasks < #NUMA nodes but nworkers > #NUMA nodes 31 | "160" 32 | "8-87,96-175" 33 | "0-3" 34 | 35 | 36 | # 5: 37 | Restrict x tasks a single core (x == machine's smt level) 38 | "1;1;1;1" 39 | "8;9;10;11" 40 | "0;0;1;1" 41 | 42 | 43 | # 6: 44 | Map two tasks at SMT 1 45 | "20;20" 46 | "8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84;96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172" 47 | "0-1;2-3" 48 | 49 | 50 | # 7: 51 | Map 2 tasks at max smt (across all cores) 52 | "80;80" 53 | "8-87;96-175" 54 | "0-1;2-3" 55 | 56 | 57 | # 8: 58 | Map tasks at max smt-1 59 | "60;60" 60 | "8-10,12-14,16-18,20-22,24-26,28-30,32-34,36-38,40-42,44-46,48-50,52-54,56-58,60-62,64-66,68-70,72-74,76-78,80-82,84-86;96-98,100-102,104-106,108-110,112-114,116-118,120-122,124-126,128-130,132-134,136-138,140-142,144-146,148-150,152-154,156-158,160-162,164-166,168-170,172-174" 61 | "0-1;2-3" 62 | 63 | 64 | # 9: 65 | Map two tasks, but restrict them to a single NUMA domain 66 | "10;10" 67 | "8,12,16,20,24,28,32,36,40,44;48,52,56,60,64,68,72,76,80,84" 68 | "0;1" 69 | 70 | 71 | # 10: 72 | Map num_numa tasks without GPU optimization 73 | "20;20" 74 | "8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84;96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172" 75 | "0-1;2-3" 76 | 77 | 78 | # 11: 79 | Map num_numa tasks with GPU optimization 80 | "20;20" 81 | "8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84;96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172" 82 | "0-1;2-3" 83 | 84 | 85 | # 12: 86 | Map 8 tasks to a single pu 87 | "1;1;1;1;1;1;1;1" 88 | "8;8;8;8;8;8;8;8" 89 | "0;0;0;0;1;1;1;1" 90 | -------------------------------------------------------------------------------- /test-suite/expected/expected.cts1-quartz: -------------------------------------------------------------------------------- 1 | # Line that start with a pound are comments! 2 | # Each answer description consist of 6 lines: 3 | # 1. A comment with the test number 4 | # 2. A comment describing the parameters used for the test in JSON format 5 | # 3. The test description 6 | # 4. the thread mapping 7 | # 5. the cpu mapping 8 | # 6. the gpu_mapping 9 | # The mapping for each task is separated by a defined character. 10 | # This separator can be changed in test_utils.c::parse_answer() 11 | 12 | 13 | # 1: 14 | # {"params": {"ntasks": 36, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 15 | Map one task to every core 16 | "1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1" 17 | "0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;25;26;27;28;29;30;31;32;33;34;35" 18 | ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;" 19 | 20 | 21 | # 2: 22 | # {"params": {"ntasks": 1, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 23 | Map one task greedily 24 | "36" 25 | "0-35" 26 | "" 27 | 28 | 29 | # 3: 30 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 31 | Map two tasks greedily 32 | "18;18" 33 | "0-17;18-35" 34 | ";" 35 | 36 | # 4: 37 | # {"params": {"ntasks": 1, "in_nthreads": 4, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 38 | Map such that ntasks < #NUMA nodes but nworkers > #NUMA nodes 39 | "36" 40 | "0-35" 41 | "" 42 | 43 | 44 | # 5: 45 | # {"params": {"ntasks": 1, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": "0", "restrict_type": 0}} 46 | Restrict x tasks a single core (x == machine's smt level) 47 | "1" 48 | "0" 49 | "" 50 | 51 | 52 | # 6: 53 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 1, "restr_set": null, "restrict_type": 0}} 54 | Map two tasks at SMT 1 55 | "18;18" 56 | "0-17;18-35" 57 | ";" 58 | 59 | 60 | # 7: 61 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 1, "restr_set": null, "restrict_type": 0}} 62 | Map two tasks at max smt (across all cores) 63 | "18;18" 64 | "0-17;18-35" 65 | ";" 66 | 67 | 68 | # 8: 69 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 1, "restr_set": null, "restrict_type": 0}} 70 | Map two tasks at max smt-1 71 | "18;18" 72 | "0-17;18-35" 73 | ";" 74 | 75 | 76 | # 9: 77 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": "0", "restrict_type": 1}} 78 | Map two tasks, but restrict them to a single NUMA domain 79 | "9;9" 80 | "0-8;9-17" 81 | ";" 82 | 83 | 84 | # 10: 85 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 0, "smt": 0, "restr_set": null, "restrict_type": 0}} 86 | Map num_numa tasks without GPU optimization 87 | "18;18" 88 | "0-17;18-35" 89 | ";" 90 | 91 | # 11: 92 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 93 | Map num_numa tasks with GPU optimization 94 | "18;18" 95 | "0-17;18-35" 96 | ";" 97 | 98 | # 12: 99 | # {"params": {"ntasks": 8, "in_nthreads": 0, "greedy": 1, "gpu_optim": 0, "smt": 0, "restr_set": "0", "restrict_type": 0}} 100 | Map eight tasks to a single pu 101 | "1;1;1;1;1;1;1;1" 102 | "0;0;0;0;0;0;0;0" 103 | ";;;;;;;" 104 | -------------------------------------------------------------------------------- /test-suite/expected/expected.epyc-corona: -------------------------------------------------------------------------------- 1 | # Line that start with a pound are comments! 2 | # Each answer description consist of 6 lines: 3 | # 1. A comment with the test number 4 | # 2. A comment describing the parameters used for the test in JSON format 5 | # 3. The test description 6 | # 4. the thread mapping 7 | # 5. the cpu mapping 8 | # 6. the gpu_mapping 9 | # The mapping for each task is separated by a defined character. 10 | # This separator can be changed in test_utils.c::parse_answer() 11 | 12 | # 1: 13 | # {"params": {"ntasks": 48, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 14 | Map one task to every core 15 | "1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1" 16 | "6;54;7;55;8;56;9;57;10;58;11;59;12;60;13;61;14;62;15;63;16;64;17;65;30;78;31;79;32;80;33;81;34;82;35;83;42;90;43;91;44;92;45;93;46;94;47;95" 17 | "0;0;0;0;0;0;0;0;0;0;0;0;1;1;1;1;1;1;1;1;1;1;1;1;2;2;2;2;2;2;2;2;2;2;2;2;3;3;3;3;3;3;3;3;3;3;3;3" 18 | 19 | # 2: 20 | # {"params": {"ntasks": 1, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 21 | Map one task greedily 22 | "96" 23 | "0-95" 24 | "0,1,2,3" 25 | 26 | 27 | # 3: 28 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 29 | Map two tasks greedily 30 | "48;48" 31 | "0-23,48-71;24-47,72-95" 32 | "0,1;2,3" 33 | 34 | # 4: 35 | # {"params": {"ntasks": 7, "in_nthreads": 16, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 36 | Map such that ntasks < #NUMA nodes but nworkers > #NUMA nodes 37 | "24;12;12;12;12;12;12" 38 | "0-11,48-59;12-17,60-65;18-23,66-71;24-29,72-77;30-35,78-83;36-41,84-89;42-47,90-95" 39 | "0;1;;;2;;3" 40 | 41 | # 5: 42 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": "0,48", "restrict_type": 0}} 43 | Restrict x tasks a single core (x == machine's smt level) 44 | "1;1" 45 | "0;48" 46 | ";" 47 | 48 | # 6: 49 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 1, "restr_set": null, "restrict_type": 0}} 50 | Map two tasks at SMT 1 51 | "48;48" 52 | "0-23,48-71;24-47,72-95" 53 | "0,1;2,3" 54 | 55 | 56 | # 7: 57 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 2, "restr_set": null, "restrict_type": 0}} 58 | Map two tasks at max smt (across all cores) 59 | "48;48" 60 | "0-23,48-71;24-47,72-95" 61 | "0,1;2,3" 62 | 63 | 64 | # 8: 65 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 1, "restr_set": null, "restrict_type": 0}} 66 | Map two tasks at max smt-1 67 | "48;48" 68 | "0-23,48-71;24-47,72-95" 69 | "0,1;2,3" 70 | 71 | 72 | # 9: 73 | # {"params": {"ntasks": 2, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": "0", "restrict_type": 1}} 74 | Map two tasks, but restrict them to a single NUMA domain 75 | "3;3" 76 | "0-2;3-5" 77 | ";" 78 | 79 | 80 | # 10: 81 | # {"params": {"ntasks": 8, "in_nthreads": 0, "greedy": 1, "gpu_optim": 0, "smt": 0, "restr_set": null, "restrict_type": 0}} 82 | Map num_numa tasks without GPU optimization 83 | "6;6;6;6;6;6;6;6" 84 | "0-5;6-11;12-17;18-23;24-29;30-35;36-41;42-47" 85 | ";0;1;;;2;;3" 86 | 87 | 88 | # 11: 89 | # {"params": {"ntasks": 8, "in_nthreads": 0, "greedy": 1, "gpu_optim": 1, "smt": 0, "restr_set": null, "restrict_type": 0}} 90 | Map num_numa tasks with GPU optimization 91 | "3;3;3;3;3;3;3;3" 92 | "6-8;9-11;12-14;15-17;30-32;33-35;42-44;45-47" 93 | "0;0;1;1;2;2;3;3" 94 | 95 | 96 | # 12: 97 | # {"params": {"ntasks": 8, "in_nthreads": 0, "greedy": 1, "gpu_optim": 0, "smt": 0, "restr_set": "0", "restrict_type": 0}} 98 | Map eight tasks to a single pu 99 | "1;1;1;1;1;1;1;1" 100 | "0;0;0;0;0;0;0;0" 101 | ";;;;;;;" 102 | -------------------------------------------------------------------------------- /test-suite/expected/expected.epyc-corona.v1: -------------------------------------------------------------------------------- 1 | # Line that start with a pound are comments! 2 | # The first non-commented line should be the number of tests. 3 | # After the number of tests, each answer description consist of 4 lines: 4 | # The test description, the thread mapping, the cpu mapping, and the 5 | # gpu_mapping.The mapping for each task is separated by a defined character. 6 | # This separator can be changed in test_utils.c::parse_answer() 7 | 8 | 9 | # 1: Map one task to every core 10 | Map one task to every core 11 | "1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1" 12 | "6,54;6,54;7,55;7,55;8,56;8,56;9,57;9,57;10,58;10,58;11,59;11,59;12,60;12,60;13,61;13,61;14,62;14,62;15,63;15,63;16,64;16,64;17,65;17,65;30,78;30,78;31,79;31,79;32,80;32,80;33,81;33,81;34,82;34,82;35,83;35,83;42,90;42,90;43,91;43,91;44,92;44,92;45,93;45,93;46,94;46,94;47,95;47,95" 13 | "0;0;0;0;0;0;0;0;0;0;0;0;1;1;1;1;1;1;1;1;1;1;1;1;2;2;2;2;2;2;2;2;2;2;2;2;3;3;3;3;3;3;3;3;3;3;3;3" 14 | 15 | # 2: Map 1 task greedily 16 | Map 1 task greedily 17 | "96" 18 | "0-95" 19 | "0-3" 20 | 21 | 22 | # 3: Map two tasks greedily 23 | Map two tasks greedily 24 | "48;48" 25 | "0-23,48-71;24-47,72-95" 26 | "0-1;2-3" 27 | 28 | # 4: 29 | Mapping such that ntasks < #NUMA nodes but nworkers > #NUMA nodes 30 | "24;12;12;12;12;12;12" 31 | "0-11,48-59;12-17,60-65;18-23,66-71;24-29,72-77;30-35,78-83;36-41,84-89;42-47,90-95" 32 | "0;1;;;2;;3" 33 | 34 | # 5: 35 | Restrict x tasks a single core (x == machine's smt level) 36 | "1;1" 37 | "0;48" 38 | ";" 39 | 40 | # 6: 41 | Map two tasks at SMT 1 42 | "48;48" 43 | "0-23,48-71;24-47,72-95" 44 | "0-1;2-3" 45 | 46 | 47 | # 7: 48 | Map 2 tasks at max smt (across all cores) 49 | "48;48" 50 | "0-23,48-71;24-47,72-95" 51 | "0-1;2-3" 52 | 53 | 54 | # 8: 55 | Map tasks at max smt-1 56 | "48;48" 57 | "0-23,48-71;24-47,72-95" 58 | "0-1;2-3" 59 | 60 | 61 | # 9: 62 | Map two tasks, but restrict them to a single NUMA domain 63 | "3;3" 64 | "0-2;3-5" 65 | ";" 66 | 67 | 68 | # 10: 69 | Map num_numa tasks without GPU optimization 70 | "6;6;6;6;6;6;6;6" 71 | "0-5;6-11;12-17;18-23;24-29;30-35;36-41;42-47" 72 | ";0;1;;;2;;3" 73 | 74 | 75 | # 11: 76 | Map num_numa tasks with GPU optimization 77 | "3;3;3;3;3;3;3;3" 78 | "6-8;9-11;12-14;15-17;30-32;33-35;42-44;45-47" 79 | "0;0;1;1;2;2;3;3" 80 | 81 | 82 | # 12: 83 | Map 8 tasks to a single pu 84 | "1;1;1;1;1;1;1;1" 85 | "0;0;0;0;0;0;0;0" 86 | ";;;;;;;" 87 | -------------------------------------------------------------------------------- /test-suite/python/py-coral-ea.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import unittest 4 | from test_utils import * 5 | 6 | topology_file = "../topo-xml/coral-ea-hwloc1.xml" 7 | answer_file = "./expected/expected.coral-ea" 8 | 9 | # test class that inherits from unittest 10 | # test cases are added in main body 11 | class TestCoralEA(unittest.TestCase): 12 | pass 13 | 14 | if __name__ == "__main__": 15 | # read expected file 16 | test_info = parse_expected(answer_file) 17 | 18 | # add setup and teardown functions 19 | setattr(TestCoralEA, 'setUp', setup_generator(topology_file)) 20 | setattr(TestCoralEA, 'tearDown', teardown_generator()) 21 | 22 | # build and add test cases from the expected file 23 | for single_test_info in test_info: 24 | setattr(TestCoralEA, make_test_name(single_test_info['description']), 25 | test_generator(single_test_info)) 26 | 27 | #use pycotap to emit TAP from python unit tests 28 | from pycotap import TAPTestRunner 29 | suite = unittest.TestLoader().loadTestsFromTestCase(TestCoralEA) 30 | TAPTestRunner().run(suite) -------------------------------------------------------------------------------- /test-suite/python/py-coral-lassen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import unittest 4 | from test_utils import * 5 | 6 | topology_file = "../topo-xml/coral-lassen.xml" 7 | answer_file = "./expected/expected.coral-lassen" 8 | 9 | # test class that inherits from unittest 10 | # test cases are added in main body 11 | class TestCoralLassen(unittest.TestCase): 12 | pass 13 | 14 | if __name__ == "__main__": 15 | # read expected file 16 | test_info = parse_expected(answer_file) 17 | 18 | # add setup and teardown functions 19 | setattr(TestCoralLassen, 'setUp', setup_generator(topology_file)) 20 | setattr(TestCoralLassen, 'tearDown', teardown_generator()) 21 | 22 | # build and add test cases from the expected file 23 | for single_test_info in test_info: 24 | setattr(TestCoralLassen, make_test_name(single_test_info['description']), 25 | test_generator(single_test_info)) 26 | 27 | #use pycotap to emit TAP from python unit tests 28 | from pycotap import TAPTestRunner 29 | suite = unittest.TestLoader().loadTestsFromTestCase(TestCoralLassen) 30 | TAPTestRunner().run(suite) -------------------------------------------------------------------------------- /test-suite/python/py-cts1-quartz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import unittest 4 | from test_utils import * 5 | 6 | topology_file = "../topo-xml/cts1-quartz-smt1.xml" 7 | answer_file = "./expected/expected.cts1-quartz" 8 | 9 | # test class that inherits from unittest 10 | # test cases are added in main body 11 | class TestCTS1Quartz(unittest.TestCase): 12 | pass 13 | 14 | if __name__ == "__main__": 15 | # read expected file 16 | test_info = parse_expected(answer_file) 17 | 18 | # add setup and teardown functions 19 | setattr(TestCTS1Quartz, 'setUp', setup_generator(topology_file)) 20 | setattr(TestCTS1Quartz, 'tearDown', teardown_generator()) 21 | 22 | # build and add test cases from the expected file 23 | for single_test_info in test_info: 24 | setattr(TestCTS1Quartz, make_test_name(single_test_info['description']), 25 | test_generator(single_test_info)) 26 | 27 | #use pycotap to emit TAP from python unit tests 28 | from pycotap import TAPTestRunner 29 | suite = unittest.TestLoader().loadTestsFromTestCase(TestCTS1Quartz) 30 | TAPTestRunner().run(suite) -------------------------------------------------------------------------------- /test-suite/python/py-epyc-corona.py: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env python3 3 | 4 | import unittest 5 | from test_utils import * 6 | 7 | topology_file = "../topo-xml/epyc-corona.xml" 8 | answer_file = "./expected/expected.epyc-corona" 9 | 10 | # test class that inherits from unittest 11 | # test cases are added in main body 12 | class TestEpycCorona(unittest.TestCase): 13 | pass 14 | 15 | if __name__ == "__main__": 16 | # read expected file 17 | test_info = parse_expected(answer_file) 18 | 19 | # add setup and teardown functions 20 | setattr(TestEpycCorona, 'setUp', setup_generator(topology_file)) 21 | setattr(TestEpycCorona, 'tearDown', teardown_generator()) 22 | 23 | # build and add test cases from the expected file 24 | for single_test_info in test_info: 25 | setattr(TestEpycCorona, make_test_name(single_test_info['description']), 26 | test_generator(single_test_info)) 27 | 28 | #use pycotap to emit TAP from python unit tests 29 | from pycotap import TAPTestRunner 30 | suite = unittest.TestLoader().loadTestsFromTestCase(TestEpycCorona) 31 | TAPTestRunner().run(suite) -------------------------------------------------------------------------------- /test-suite/python/test_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import mpibind 3 | import unittest 4 | import re 5 | import itertools 6 | 7 | 8 | # Based on https://stackoverflow.com/questions/4628333/\ 9 | # converting-a-list-of-integers-into-range-in-python 10 | # lst: [0, 1, 2, 3, 4, 7, 8, 9, 11] 11 | # key: 0 group: [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)] 12 | # key: 2 group: [(5, 7), (6, 8), (7, 9)] 13 | # key: 3 group: [(8, 11)] 14 | def ints2ranges(lst): 15 | '''Convert an integer list into a range generator''' 16 | key_func = lambda pair: pair[1] - pair[0] 17 | for key, grp in itertools.groupby(enumerate(lst), key_func): 18 | grp = list(grp) 19 | beg = grp[0][1] 20 | end = grp[-1][1] 21 | 22 | res = "{}".format(beg) 23 | if beg != end: 24 | res += "-{}".format(end) 25 | 26 | yield res 27 | 28 | 29 | def parse_expected(answer_file): 30 | """ 31 | Parse parameters and answers from the expected file 32 | 33 | :param answer_file: file path of the expected file 34 | :type answer_file: string or path-like 35 | :return: test information for each of the tests in the answer file 36 | :rtype: list of dictionaries 37 | """ 38 | line_types = [ 39 | 'description', 40 | 'thread_mapping', 41 | 'cpu_mapping', 42 | 'gpu_mapping' 43 | ] 44 | test_info = [] 45 | cur_answer = dict() 46 | type_idx = 0 47 | with open(answer_file, 'r') as f: 48 | for line in f.readlines(): 49 | line = line.strip() 50 | if not line: 51 | continue 52 | if line[0] == '#' and 'params' not in line: 53 | continue 54 | 55 | if 'params' in line: 56 | json_string = line.replace('# ', '') 57 | cur_answer['params'] = json.loads(json_string)['params'] 58 | else: 59 | cur_answer[line_types[type_idx]] = line.replace('"', '') 60 | type_idx += 1 61 | 62 | if type_idx == 4: 63 | test_info.append(cur_answer) 64 | type_idx = 0 65 | cur_answer = dict() 66 | 67 | return test_info 68 | 69 | def get_actual(handle, single_test_info): 70 | """ 71 | Use the test info to paramaterize a mpibind handle 72 | and then compute a mapping. 73 | 74 | :param handle: the mpibind handle 75 | :type handle: MpibindHandle 76 | :param single_test_info: the test information for a single test 77 | :type single_test_info: dictionary 78 | :return: 3-tuple of answers 79 | :rtype: tuple of strings 80 | """ 81 | handle.ntasks = single_test_info['params']['ntasks'] 82 | handle.nthreads = single_test_info['params']['in_nthreads'] 83 | handle.greedy = single_test_info['params']['greedy'] 84 | handle.gpu_optim = single_test_info['params']['gpu_optim'] 85 | handle.smt = single_test_info['params']['smt'] 86 | handle.restrict_ids = single_test_info['params']['restr_set'] 87 | handle.restrict_type = single_test_info['params']['restrict_type'] 88 | handle.mpibind() 89 | 90 | thread_mapping = ';'.join([str(ele) for ele in handle.nthreads]) 91 | gpu_mapping = ';'.join([','.join(handle.get_gpus_ptask(i)) for i in range(handle.ntasks)]) 92 | #cpu_mapping = ';'.join([handle.get_cpus_ptask(i) 93 | # for i in range(handle.ntasks)]) 94 | # Since 'get_cpus_ptask' now returns a list of ints, 95 | # convert the list into ranges as a string 96 | cpu_mapping = [] 97 | for i in range(handle.ntasks): 98 | # ints2ranges is a generator, thus make it a list 99 | # and join the ranges with commas 100 | cpu_lst = list(ints2ranges(handle.get_cpus_ptask(i))) 101 | cpu_mapping.append(','.join(cpu_lst)) 102 | # The mapping of tasks is separated by semicolons 103 | cpu_mapping = ';'.join(cpu_mapping) 104 | 105 | return thread_mapping, cpu_mapping, gpu_mapping 106 | 107 | def make_test_name(description): 108 | """ 109 | generate a test name from a description 110 | 111 | :param description: plain english description of a test 112 | :type description: string 113 | :return: the generated test name 114 | :rtype: string 115 | """ 116 | return 'test_' + re.sub(r'\s+', '_', description.strip().lower()) 117 | 118 | def test_generator(single_test_info): 119 | """ 120 | generate a python unit test from the given test info 121 | 122 | :param single_test_info: the test information for a single test 123 | :type single_test_info: dictionary 124 | :return: the generated test 125 | :rtype: function 126 | """ 127 | def test(self): 128 | thread_mapping, cpu_mapping, gpu_mapping = get_actual(self.handle, single_test_info) 129 | self.assertEqual(thread_mapping, single_test_info['thread_mapping']) 130 | self.assertEqual(cpu_mapping, single_test_info['cpu_mapping']) 131 | self.assertEqual(gpu_mapping, single_test_info['gpu_mapping']) 132 | return test 133 | 134 | def setup_generator(topology_file): 135 | """ 136 | generate the setup functino 137 | 138 | :param topology_file: the path to the topology file to use during testing 139 | :type topology_file: string 140 | :return: the generated setup function 141 | :rtype: function 142 | """ 143 | def setUp(self): 144 | mpibind.topology_set_xml(topology_file) 145 | self.handle = mpibind.MpibindHandle() 146 | return setUp 147 | 148 | def teardown_generator(): 149 | """ 150 | generate the teardown function 151 | 152 | :return: the generated teardown function 153 | :rtype: function 154 | """ 155 | def tearDown(self): 156 | self.handle.finalize() 157 | return tearDown 158 | -------------------------------------------------------------------------------- /test-suite/test_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef MPIBIND_TEST_UTILS_H 2 | #define MPIBIND_TEST_UTILS_H 3 | #include 4 | #include 5 | #include 6 | #include "mpibind.h" 7 | #include "tap.h" 8 | 9 | /** 10 | * The number of tests present in the test_suite. 11 | * This will be referenced when parsing answers and 12 | * generating tests. This is also used to ensure the number of 13 | * tests and number of answers are consistent. 14 | * **/ 15 | #define NUM_TESTS 12 16 | 17 | /** 18 | * Representation of a test answer 19 | * **/ 20 | typedef struct { 21 | char* description; 22 | char* thread_mapping; 23 | char* cpu_mapping; 24 | char* gpu_mapping; 25 | } mpibind_test_out_t; 26 | /** 27 | * Input parameters for a test. This mimics 28 | * the structure of mpibind_t, but is defined 29 | * separately to make the tests independent of 30 | * mpibind_t's definition. 31 | * **/ 32 | typedef struct { 33 | /* Input parameters */ 34 | hwloc_topology_t topo; 35 | int ntasks; 36 | int in_nthreads; 37 | int greedy; 38 | int gpu_optim; 39 | int smt; 40 | char* restr_set; 41 | int restr_type; 42 | } mpibind_test_in_t; 43 | /** 44 | * Initialize a test struct to default values. 45 | * This mimics the behavior of mpibind_init 46 | * **/ 47 | int mpibind_test_in_t_init(mpibind_test_in_t* hdl); 48 | /** 49 | * Frees an answer 50 | * **/ 51 | void mpibind_test_out_t_free(mpibind_test_out_t* t); 52 | /** 53 | * Frees a test object 54 | * **/ 55 | void mpibind_test_in_t_free(mpibind_test_in_t* t); 56 | /** 57 | * Prints the current state of an mpibind_test_in_t object 58 | * **/ 59 | void mpibind_test_in_t_print(mpibind_test_in_t* params); 60 | /** Helper function to check the cpu, gpu, and thread mappings**/ 61 | void check_mapping(mpibind_t* handle, mpibind_test_out_t* expected); 62 | /** 63 | * Runs a set of tests and compares them to their answers. 64 | * **/ 65 | void run_test(hwloc_topology_t topo, mpibind_test_in_t *params, mpibind_test_out_t *expected); 66 | /** 67 | * Generate unit test information from a topology. 68 | * This will take high level information and create an array of 69 | * of objects containing parameters for each of the tests. The number 70 | * of tests created is passed back via num_test_ptr 71 | * **/ 72 | mpibind_test_in_t** generate_test_information(hwloc_topology_t); 73 | /** load an xml file into a topology **/ 74 | void load_topology(hwloc_topology_t* topo, char* xml_file); 75 | /** 76 | * Loads a set of test answers from a file. 77 | * num_test_ptr will be used to store the number of answers parsed 78 | * **/ 79 | mpibind_test_out_t** load_answers(char* filename); 80 | /** Performs a unit test using a given topology xml and an answer file. 81 | * Test drivers should call this method 82 | * **/ 83 | void unit_test_topology(char* topology_filename, char* answer_filename); 84 | #endif 85 | -------------------------------------------------------------------------------- /tutorials/common/archs.md: -------------------------------------------------------------------------------- 1 | # Example architectures 2 | 3 | Here is the node architecture of several Livermore Computing supercomputers. A summary of key features for these machines, including differing numbers of processors, NUMA domains, GPUs, and cores, are summarized in the table below, with images depicting the topology of each below that. 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 |
Per nodePer processorPer core
Corona2 AMD Rome processors8 AMD MI50 GPUs1 NUMA domain8 L3s, 24 L2s, 24 L1s24 cores2 HW threads
Lassen2 IBM Power9 processors4 NVIDIA Volta GPUs1 NUMA domain10 L3s, 10 L2s, 20 L1s20 cores4 HW threads
Pascal2 Intel Broadwell processors2 NVIDIA Pascal GPUs1 NUMA domain1 L3, 18 L2s, 18 L1s18 cores2 HW threads
Poodle2 Intel Sapphire Rapids processors4 NUMA domains1 L3, 56 L2s, 56 L1s56 cores2 HW threads
RZAdams4 AMD Instinct MI300A APUs:
52 | 4 processors + 4 GPUs
1 NUMA domain3 L3s, 24 L2s, 24 L1s24 cores2 HW threads
Tioga1 AMD 3rd Gen EPYC processor8 AMD Instinct MI250X GPUs4 NUMA domains8 L3s, 64 L2s, 64 L1s64 cores2 HW threads
77 | 78 | 79 | 80 | | Tioga | 81 | |:--:| 82 | |![Tioga](../figures/tioga-web.png "Tioga (MI250X)")| 83 | 84 | | RZAdams | 85 | |:--:| 86 | |![Tioga](../figures/rzadams-web.png "RZAdams (MI300A)")| 87 | 88 | | Corona | 89 | |:--:| 90 | |![Corona](../figures/corona-web.png "Corona")| 91 | 92 | | Pascal | 93 | |:--:| 94 | |![Pascal](../figures/pascal-web.png "Pascal")| 95 | 96 | | Poodle | 97 | |:--:| 98 | |![Poodle](../figures/poodle-web.png "Poodle")| 99 | 100 | | Lassen | 101 | |:--:| 102 | |![Lassen](../figures/lassen-web.png "Lassen")| 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 120 | -------------------------------------------------------------------------------- /tutorials/cug23/README.md: -------------------------------------------------------------------------------- 1 | # CUG 2023: Supercomputer Affinity 2 | 3 | *Edgar A. León* and *Jane E. Herriman*
4 | Lawrence Livermore National Laboratory 5 | 6 | ## Schedule 7 | 8 |
9 | 10 | | Begin | End | Topic | 11 | |-:|-:|:-| 12 | | 8:30 | 8:50 | Introduction + Setup | 13 | | 8:50 | 9:30 | Architecture Topology | 14 | | 9:30 | 10:00 | Process Affinity | 15 | | *10:00* | *10:30* | *Coffee* | 16 | | 10:30 | 10:40 | Process Affinity Cont. | 17 | | 10:40 | 11:00 | Hands-on Exercises | 18 | | 11:00 | 11:40 | GPU Affinity | 19 | |11:40 | 12:00 | Hands-on Exercises | 20 | 21 |
22 | 23 | ## AWS Cluster 24 | 25 | Accounts: `user5`, `user6`, ..., `user35` 26 | 27 | Password: 28 | 29 | ``` 30 | ssh user5@ 31 | 32 | source /home/tutorial/scripts/user-env.sh 33 | 34 | srun -N1 -n1 mpi 35 | ``` 36 | 37 | 38 | ## Tutorial Notebook 39 | 40 |
41 |

42 | 43 |

44 | 45 | 46 | 1. Making sense of affinity: [Discovering the node architecture topology](module1.md) 47 | 48 | Learn how to identify the compute and memory components of a 49 | compute node using `hwloc`. A precise understanding of the hardware 50 | resources is needed to map an application to the machine 51 | efficiently. This includes identifying the node's GPUs, cores, 52 | hardware threads, cache hierarchy, NUMA domains, and network 53 | interfaces. Furthermore, attendees will be introduced to locality, 54 | will identify local hardware resources, and will select resources 55 | using affinity masks. 56 | 57 | 2. Exerting resource manager affinity: [Process affinity with Slurm](module2.md) 58 | 59 | Learn how to use Slurm’s affinity to map a parallel program to the 60 | hardware at runtime when submitting a job. Attendees will learn to 61 | construct CPU-based bindings using low-level and high-level 62 | abstractions. High-level bindings are driven by hardware components 63 | such as Cores and Sockets. 64 | 65 | 3. Putting it all together: [Adding in GPUs](module3.md) 66 | 67 | Learn how to assign GPUs to MPI processes to leverage 68 | locality. Learn how to apply combined process and GPU 69 | affinity policies. Attendees will learn to 70 | manage CPU and GPU affinity concurrently to take advantage of local 71 | resources and reduce data movement. 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /tutorials/cug24/README.md: -------------------------------------------------------------------------------- 1 | # Supercomputer Affinity on HPE Systems 2 | ## CUG 2024 3 | 4 | *Edgar A. León* and *Jane E. Herriman*
5 | Lawrence Livermore National Laboratory 6 | 7 | ## Schedule 8 | 9 |
10 | 11 | | Begin | End | Topic | 12 | |-:|-:|:-| 13 | | 8:30 | 8:50 | Introduction + Setup | 14 | | 8:50 | 9:20 | Module 1: Architecture Topology | 15 | | 9:20 | 9:50 | Module 2: Process Affinity | 16 | | 9:50 | 10:00 | Module 2: Hands-on Exercises | 17 | | *10:00* | *10:30* | *Coffee* | 18 | | 10:30 | 10:40 | Module 2: Hands-on Exercises | 19 | | 10:40 | 11:10 | Module 3: GPU Affinity | 20 | | 11:10 | 11:30 | Module 3: Hands-on exercises | 21 | | 11:30 | 12:00 | Module 4: Flux affinity on the AMD MI300A APU | 22 | 23 |
24 | 25 | 40 | 41 | ## Tutorial Notebook 42 | 43 |
44 |

45 | 46 |

47 | 48 | 49 | 1. [Discovering the node architecture topology](module1.md) 50 | 51 | Learn how to identify the compute and memory components of a 52 | compute node using `hwloc`. A precise understanding of the hardware 53 | resources is needed to map an application to the machine 54 | efficiently. This includes identifying the node's GPUs, cores, 55 | hardware threads, cache hierarchy, NUMA domains, and network 56 | interfaces. Furthermore, attendees will be introduced to locality, 57 | will identify local hardware resources, and will select resources 58 | using affinity masks. 59 | 60 | 2. [Process affinity with Slurm](module2.md) 61 | 62 | Learn how to use Slurm’s affinity to map a parallel program to the 63 | hardware at runtime when submitting a job. Attendees will learn to 64 | construct CPU-based bindings using low-level and high-level 65 | abstractions. High-level bindings are driven by hardware components 66 | such as Cores and Sockets. 67 | 68 | 3. [Adding in GPUs](module3.md) 69 | 70 | Learn how to assign GPUs to MPI processes to leverage 71 | locality. Learn how to apply combined process and GPU 72 | affinity policies. Attendees will learn to 73 | manage CPU and GPU affinity concurrently to take advantage of local 74 | resources and reduce data movement. 75 | 76 | 4. [Process and GPU affinity with Flux](module4.md) 77 | 78 | Learn the basics of the Flux resource manager to launch parallel programs on a supercomputer. Attendees will learn how to apply combined process and GPU affinity policies using Flux. 79 | 80 | 81 | -------------------------------------------------------------------------------- /tutorials/cug24/archs.md: -------------------------------------------------------------------------------- 1 | # Example architectures 2 | 3 | Here is the node architecture of a few Livermore Computing supercomputer: `Tioga`, `RZAdams`, `Corona`, and `Pascal`. A summary of key features for these machines, including differing numbers of processors, NUMA domains, GPUs, and cores, are summarized in the table below, with images depicting the topology of each below that. 4 | 5 | In particular, note that `Tioga` includes MI250X GPUs and `RZAdams` is composed of MI300A APUs. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
TiogaRZAdamsCoronaPascal
Per
node
1 AMD 3rd Gen EPYC processor4 AMD Instinct MI300A APUs: 19 | 4 processors + 4 GPUs2 AMD Rome processors2 Intel Broadwell processors
8 AMD Instinct MI250X GPUs8 AMD MI50 GPUs2 NVIDIA Pascal GPUs
Per
processor
4 NUMA domains1 NUMA domain1 NUMA domain1 NUMA domain
8 L3s, 64 L2s, 64 L1s3 L3s, 24 L2s, 24 L1s8 L3s, 24 L2s, 24 L1s1 L3, 18 L2s, 18 L1s
64 cores24 cores24 cores18 cores
Per
core
2 hardware threads2 hardware threads2 hardware threads2 hardware threads
55 | 56 | | Tioga | 57 | |:--:| 58 | |![Tioga](../figures/tioga-web.png "Tioga (MI250X)")| 59 | 60 | | RZAdams | 61 | |:--:| 62 | |![Tioga](../figures/rzadams-web.png "RZAdams (MI300A)")| 63 | 64 | | Corona | 65 | |:--:| 66 | |![Corona](../figures/corona-web.png "Corona")| 67 | 68 | | Pascal | 69 | |:--:| 70 | |![Pascal](../figures/pascal-web.png "Pascal")| 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 90 | -------------------------------------------------------------------------------- /tutorials/eurosys25/README.md: -------------------------------------------------------------------------------- 1 | # 4th Tutorial on Mapping and Affinity (MAP) 2 | 3 | *Edgar A. León*
4 | Lawrence Livermore National Laboratory 5 | 6 | ## Bridging Applications and Hardware 7 | 8 | When we consider the grand challenges addressed by distributed 9 | systems, we likely imagine large-scale machines running parallel 10 | code. Yet, these two pillars of computing – hardware and software – 11 | are not enough to ensure high efficiency and reproducible 12 | performance. When unaware of the topology of the underlying hardware, 13 | even well-designed applications and research software can fail to 14 | achieve their scientific goals. Affinity – how software maps to and 15 | leverages local hardware resources – forms a third pillar critical to 16 | computing systems. 17 | 18 | Multiple factors motivate an understanding of affinity for parallel- 19 | and distributed-computing users. On the software side, applications 20 | are increasingly memory-bandwidth limited making locality more 21 | important. On the hardware side, today’s computer architectures offer 22 | increasingly complex memory and compute topologies, making proper 23 | affinity policies crucial to effective software-hardware assignments. 24 | 25 | In this half-day tutorial, attendees will learn principles behind 26 | effective affinity policies – like understanding the hardware topology 27 | and the importance of memory and GPU locality. They will learn how to 28 | control and apply these policies to create effective, locality-aware 29 | mappings for MPI processes and GPU kernels and to ensure reproducible 30 | performance. These techniques are relevant to both on-premise users 31 | and those using the cloud such as AWS. 32 | 33 | 34 | ## Requirements and Prerequisites 35 | 36 | * Attendees will need a laptop equipped with Wi-Fi, a shell terminal, 37 | and the ssh program. Users will be provided accounts 38 | to access a supercomputer-like environment required for 39 | demonstrations and hands-on exercises. 40 | 41 | * Attendees should have a working knowledge of Unix-like systems. For 42 | example, they should know how to navigate a filesystem and launch 43 | applications from the command line. 44 | 45 | * Attendees will also need some familiarity with high-level parallel 46 | programming concepts. For example, attendees should be comfortable 47 | with terms like thread, process, and GPU, but do not need experience 48 | writing parallel programs. 49 | 50 | 51 | ## Schedule 52 | 53 |
54 | 55 | | Begin | End | Topic | 56 | |-:|-:|:-| 57 | | 14:00 | 14:20 | Introduction + Setup | 58 | | 14:20 | 15:00 | Module 1: Discovering the node architecture topology| 59 | | 15:00 | 15:20 | Module 1: Hands-on exercises | 60 | | 15:20 | 15:30 | Module 2: Mapping processes to the hardware | 61 | | *15:30* | *16:00* | *Coffee* | 62 | | 16:00 | 16:30 | Module 2: Mapping processes to the hardware (cont.)| 63 | | 16:30 | 16:50 | Module 2: Hands-on exercises | 64 | | 16:50 | 17:30 | Module 3: Adding in GPU kernels: Putting it all together | 65 | | 17:30 | 17:45 | Module 3: Hands-on exercises (optional)| 66 | 67 |
68 | 69 | 84 | 85 | ## Notebook 86 | 87 |
88 |

89 | 90 |

91 | 92 | 93 | 1. [Discovering the node architecture topology](module1.md) 94 | 95 | Learn how to identify the compute and memory components of a 96 | compute node using `hwloc`. A precise understanding of the hardware 97 | resources is needed to map an application to the machine 98 | efficiently. This includes identifying the node's GPUs, cores, 99 | hardware threads, cache hierarchy, NUMA domains, and network 100 | interfaces. Furthermore, attendees will be introduced to locality, 101 | will identify local hardware resources, and will select resources 102 | using affinity masks. 103 | 104 | 2. [Mapping processes to the hardware](module2.md) 105 | 106 | Learn how to use the resource manager to map a parallel 107 | program to the 108 | hardware at runtime when submitting a job. Attendees will learn to 109 | construct CPU-based bindings using low-level and high-level 110 | abstractions. High-level bindings are driven by hardware components 111 | such as Cores and Sockets. Furthermore, attendees will learn how to 112 | report affinity on a given system. 113 | 114 | 3. [Adding in GPU kernels: Putting it all together](module3.md) 115 | 116 | Learn how to assign GPUs to MPI processes to leverage 117 | locality. Learn how to apply combined process and GPU 118 | affinity policies. Attendees will learn to 119 | manage CPU and GPU affinity concurrently to take advantage of local 120 | resources and reduce data movement. 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /tutorials/figures/aws-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/aws-architecture.png -------------------------------------------------------------------------------- /tutorials/figures/aws-g4dn-metal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/aws-g4dn-metal.png -------------------------------------------------------------------------------- /tutorials/figures/cache.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/cache.png -------------------------------------------------------------------------------- /tutorials/figures/computing-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/computing-architecture.png -------------------------------------------------------------------------------- /tutorials/figures/corona-merge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/corona-merge.png -------------------------------------------------------------------------------- /tutorials/figures/corona-no-cache-io-physical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/corona-no-cache-io-physical.png -------------------------------------------------------------------------------- /tutorials/figures/corona-no-cache-io.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/corona-no-cache-io.png -------------------------------------------------------------------------------- /tutorials/figures/corona-physical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/corona-physical.png -------------------------------------------------------------------------------- /tutorials/figures/corona-web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/corona-web.png -------------------------------------------------------------------------------- /tutorials/figures/corona.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/corona.pdf -------------------------------------------------------------------------------- /tutorials/figures/corona.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/corona.png -------------------------------------------------------------------------------- /tutorials/figures/hwloc-objects.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/hwloc-objects.png -------------------------------------------------------------------------------- /tutorials/figures/lassen-web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/lassen-web.png -------------------------------------------------------------------------------- /tutorials/figures/lassen.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/lassen.pdf -------------------------------------------------------------------------------- /tutorials/figures/lassen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/lassen.png -------------------------------------------------------------------------------- /tutorials/figures/mammoth.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/mammoth.pdf -------------------------------------------------------------------------------- /tutorials/figures/mammoth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/mammoth.png -------------------------------------------------------------------------------- /tutorials/figures/module4_sockets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/module4_sockets.png -------------------------------------------------------------------------------- /tutorials/figures/module4_specifyplacement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/module4_specifyplacement.png -------------------------------------------------------------------------------- /tutorials/figures/module4_spread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/module4_spread.png -------------------------------------------------------------------------------- /tutorials/figures/module4_threadstocores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/module4_threadstocores.png -------------------------------------------------------------------------------- /tutorials/figures/module4_threadstocpus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/module4_threadstocpus.png -------------------------------------------------------------------------------- /tutorials/figures/module4_threadstosockets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/module4_threadstosockets.png -------------------------------------------------------------------------------- /tutorials/figures/module5_tioga.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/module5_tioga.png -------------------------------------------------------------------------------- /tutorials/figures/numa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/numa.png -------------------------------------------------------------------------------- /tutorials/figures/pascal-web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/pascal-web.png -------------------------------------------------------------------------------- /tutorials/figures/pascal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/pascal.pdf -------------------------------------------------------------------------------- /tutorials/figures/pascal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/pascal.png -------------------------------------------------------------------------------- /tutorials/figures/poodle-web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/poodle-web.png -------------------------------------------------------------------------------- /tutorials/figures/ruby.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/ruby.pdf -------------------------------------------------------------------------------- /tutorials/figures/ruby.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/ruby.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams-web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams-web.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams/FigureA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams/FigureA.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams/FigureB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams/FigureB.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams/FigureC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams/FigureC.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams/FigureD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams/FigureD.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams/FigureE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams/FigureE.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams/FigureF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams/FigureF.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams/FigureG.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams/FigureG.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams/FigureH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams/FigureH.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams/FigureI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams/FigureI.png -------------------------------------------------------------------------------- /tutorials/figures/rzadams/FigureJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/rzadams/FigureJ.png -------------------------------------------------------------------------------- /tutorials/figures/sierra.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/sierra.pdf -------------------------------------------------------------------------------- /tutorials/figures/sierra.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/sierra.png -------------------------------------------------------------------------------- /tutorials/figures/tioga-web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga-web.png -------------------------------------------------------------------------------- /tutorials/figures/tioga.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga.pdf -------------------------------------------------------------------------------- /tutorials/figures/tioga.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod1-Ex5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod1-Ex5.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod1-Ex6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod1-Ex6.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod1-Ex7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod1-Ex7.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod1-noprocs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod1-noprocs.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod2-Ex1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod2-Ex1.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod2-Ex10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod2-Ex10.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod2-Ex11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod2-Ex11.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod2-Ex2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod2-Ex2.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod2-Ex4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod2-Ex4.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod2-Ex5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod2-Ex5.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod2-Ex6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod2-Ex6.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod2-Ex7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod2-Ex7.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod2-Ex8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod2-Ex8.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod2-Ex9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod2-Ex9.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod3-Ex1a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod3-Ex1a.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod3-Ex1b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod3-Ex1b.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod3-Ex2a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod3-Ex2a.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod3-Ex2b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod3-Ex2b.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod3-Ex3a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod3-Ex3a.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod3-Ex3b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod3-Ex3b.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod3-Ex5a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod3-Ex5a.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod3-Ex5b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod3-Ex5b.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/Tioga-Mod3-Ex6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/Tioga-Mod3-Ex6.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureA.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureB.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureC.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureD.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureE.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureF.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureG.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureG.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureH.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureI.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureJ.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureK.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureK.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureL.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureM.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/figureN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/figureN.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/tioga-merge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/tioga-merge.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/tioga-no-cache-io-physical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/tioga-no-cache-io-physical.png -------------------------------------------------------------------------------- /tutorials/figures/tioga/tioga-no-cache-io.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/figures/tioga/tioga-no-cache-io.png -------------------------------------------------------------------------------- /tutorials/flux/README.md: -------------------------------------------------------------------------------- 1 | 5 | 6 | # Exercising Affinity in Flux 7 | 8 | *Edgar A. Leon* and *Jane E. Herriman*
9 | Lawrence Livermore National Laboratory 10 | 11 | ## Table of contents 12 | 13 | 1. [Flux basics and affinity](module1.md) 14 | 1. [Affinity with mpibind](module2.md) 15 | 16 | 17 | 18 | 19 | 32 | -------------------------------------------------------------------------------- /tutorials/main/README.md: -------------------------------------------------------------------------------- 1 | # Supercomputer Affinity 2 | 3 | *Edgar A. León* and *Jane E. Herriman*
4 | Lawrence Livermore National Laboratory 5 | 6 | ## Tutorial Notebook 7 | 8 |
9 |

10 | 11 |

12 | 13 | 14 | 1. Making sense of affinity: [Discovering the node architecture topology](module1.md) 15 | 16 | Learn how to identify the compute and memory components of a compute node using `hwloc` before learning how to leverage these resources to improve program performance. This includes identifying the node's GPUs, cores, hardware threads, cache hierarchy, NUMA domains, and network interfaces. Furthermore, attendees will be introduced to locality and will identify local hardware resources. 17 | 18 | 2. Applying automatic affinity: [mpibind](module2.md) 19 | 20 | Learn how to map parallel codes to the hardware automatically using `mpibind`. Attendees will learn to turn mpibind on and off and to identify the resources available to processes and threads in eacy case. They will explore locality effects and learn to tune mpibind to best leverage locality for hybrid applications that are either CPU or GPU constrained. 21 | 22 | 3. Exerting resource manger affinity: [Process affinity with Slurm](module3.md) 23 | 24 | Learn how to use Slurm’s affinity to map a program to the hardware at runtime when submitting a job. Attendees will learn low-level and policy-based binding, e.g., compute-bound, before covering task distribution enumerations. Finally, they will learn how to create affinity masks to specify sets of CPUs. 25 | 26 | 4. Exerting thread affinity: [OpenMP](module4.md) 27 | 28 | Learn how to map OpenMP threads to specific hardware resources. Attendees will learn how to map threads explicitly and implicitly using OpenMP’s predefined policies. 29 | 30 | 5. Putting it all together: [Adding in GPUs](module5.md) 31 | 32 | Learn how to assign GPUs to MPI processes and then to OpenMP threads. Learn how to apply combined process, thread, and GPU affinity policies to hybrid applications. Attendees will learn to avoid conflicting directives from the different types of affinity. Furthermore, they will assess whether automatic affinity policies may be sufficient for their use cases. 33 | 34 | -------------------------------------------------------------------------------- /tutorials/tapia22/Sep-09-1045-Supercomputing-Systems-101.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/mpibind/48411c26c9016560083c66b1b87cd8781fdb7fa3/tutorials/tapia22/Sep-09-1045-Supercomputing-Systems-101.pdf -------------------------------------------------------------------------------- /tutorials/tapia22/tapia-setup-instructions.md: -------------------------------------------------------------------------------- 1 | # Setup instructions 2 | 3 | 1. **ssh to AWS** 4 | 5 | If you'd like to ssh to our AWS environment from a native terminal application, you can run `ssh @18.219.49.4` with one of our provided usernames. 6 | 7 | All usernames are of the form `user`with corresponding passwords `userPASS`. 8 | 9 | For example, you might connect with 10 | 11 | ``` 12 | ssh user15@18.219.49.4 13 | ``` 14 | 15 | When prompted for a password, `user15` would provide `user15PASS`. 16 | 17 | 2. **Run setup script** 18 | 19 | Once you've ssh'ed to AWS, you'll want to run the script `/home/tutorial/aws/user-env.sh` to set up your environment via 20 | 21 | ``` 22 | source /home/tutorial/aws/user-env.sh 23 | ``` 24 | --------------------------------------------------------------------------------