├── .gitignore ├── AUTHORS ├── LICENSE ├── README.md ├── examples └── mpipool-demo.py ├── mpipool ├── __init__.py └── core.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build 3 | dist 4 | *.egg-info 5 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Adrian Price-Whelan 2 | Dan Foreman-Mackey 3 | Joe Zuntz 4 | Júlio Hoffimann Mendes 5 | Manodeep Sinha 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014, 2015 Adrian Price-Whelan & Dan Foreman-Mackey 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | NO LONGER SUPPORTED 3 | =================== 4 | 5 | Check out [The Schwimmbad](https://github.com/adrn/schwimmbad) instead. 6 | 7 | --------- 8 | 9 | mpipool 10 | ======= 11 | 12 | A Python MPI Pool 13 | 14 | Minimal workin example 15 | ---------------------- 16 | 17 | See code in `examples/mpipool-demo.py`: 18 | 19 | ```python 20 | # mpipool-demo.py 21 | 22 | # Standard library 23 | import sys 24 | import numpy as np 25 | from mpipool import MPIPool 26 | 27 | def worker(task): 28 | x,y = task 29 | return 5*x + y**2 30 | 31 | def main(): 32 | 33 | # Initialize the MPI pool 34 | pool = MPIPool() 35 | 36 | # Make sure only we run map() on the master process 37 | if not pool.is_master(): 38 | pool.wait() 39 | sys.exit(0) 40 | 41 | # create some random input data 42 | x = np.random.uniform(size=10000) 43 | y = np.random.uniform(size=10000) 44 | tasks = np.vstack((x,y)).T 45 | 46 | vals = pool.map(worker, tasks) 47 | 48 | pool.close() 49 | 50 | if __name__ == "__main__": 51 | main() 52 | ``` 53 | 54 | Execute the script using `mpiexec` or your computer/cluster's MPI execute script, e.g., here we 55 | will use 8 cores: `mpiexec -n 8 python mpipool-demo.py` 56 | -------------------------------------------------------------------------------- /examples/mpipool-demo.py: -------------------------------------------------------------------------------- 1 | # mpipool-demo.py 2 | 3 | # Standard library 4 | import sys 5 | import numpy as np 6 | from mpipool import MPIPool 7 | 8 | def worker(task): 9 | x,y = task 10 | return 5*x + y**2 11 | 12 | def main(): 13 | 14 | # Initialize the MPI pool 15 | pool = MPIPool() 16 | 17 | # Make sure only we run map() on the master process 18 | if not pool.is_master(): 19 | pool.wait() 20 | sys.exit(0) 21 | 22 | # create some random input data 23 | x = np.random.uniform(size=10000) 24 | y = np.random.uniform(size=10000) 25 | tasks = np.vstack((x,y)).T 26 | 27 | vals = pool.map(worker, tasks) 28 | 29 | pool.close() 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /mpipool/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __version__ = "0.0.2.dev0" 4 | 5 | try: 6 | __MPIPOOL_SETUP__ 7 | except NameError: 8 | __MPIPOOL_SETUP__ = False 9 | 10 | if not __MPIPOOL_SETUP__: 11 | __all__ = ["MPIPool", "MPIPoolException"] 12 | from .core import MPIPool, MPIPoolException 13 | -------------------------------------------------------------------------------- /mpipool/core.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import (division, print_function, absolute_import, 4 | unicode_literals) 5 | 6 | __all__ = ["MPIPool", "MPIPoolException"] 7 | 8 | import traceback 9 | from mpi4py import MPI 10 | 11 | 12 | class MPIPool(object): 13 | """ 14 | A pool that distributes tasks over a set of MPI processes using 15 | mpi4py. MPI is an API for distributed memory parallelism, used 16 | by large cluster computers. This class provides a similar interface 17 | to Python's multiprocessing Pool, but currently only supports the 18 | :func:`map` method. 19 | 20 | Contributed initially by `Joe Zuntz `_. 21 | 22 | Parameters 23 | ---------- 24 | comm : (optional) 25 | The ``mpi4py`` communicator. 26 | 27 | debug : bool (optional) 28 | If ``True``, print out a lot of status updates at each step. 29 | 30 | loadbalance : bool (optional) 31 | if ``True`` and the number of taskes is greater than the 32 | number of processes, tries to loadbalance by sending out 33 | one task to each cpu first and then sending out the rest 34 | as the cpus get done. 35 | """ 36 | def __init__(self, comm=None, debug=False, loadbalance=False): 37 | self.comm = MPI.COMM_WORLD if comm is None else comm 38 | self.rank = self.comm.Get_rank() 39 | self.size = self.comm.Get_size() - 1 40 | self.debug = debug 41 | self.function = _error_function 42 | self.loadbalance = loadbalance 43 | if self.size == 0: 44 | raise ValueError("Tried to create an MPI pool, but there " 45 | "was only one MPI process available. " 46 | "Need at least two.") 47 | 48 | def is_master(self): 49 | """ 50 | Is the current process the master? 51 | 52 | """ 53 | return self.rank == 0 54 | 55 | def wait(self): 56 | """ 57 | If this isn't the master process, wait for instructions. 58 | 59 | """ 60 | if self.is_master(): 61 | raise RuntimeError("Master node told to await jobs.") 62 | 63 | status = MPI.Status() 64 | 65 | while True: 66 | # Event loop. 67 | # Sit here and await instructions. 68 | if self.debug: 69 | print("Worker {0} waiting for task.".format(self.rank)) 70 | 71 | # Blocking receive to wait for instructions. 72 | task = self.comm.recv(source=0, tag=MPI.ANY_TAG, status=status) 73 | if self.debug: 74 | print("Worker {0} got task {1} with tag {2}." 75 | .format(self.rank, task, status.tag)) 76 | 77 | # Check if message is special sentinel signaling end. 78 | # If so, stop. 79 | if isinstance(task, _close_pool_message): 80 | if self.debug: 81 | print("Worker {0} told to quit.".format(self.rank)) 82 | break 83 | 84 | # Check if message is special type containing new function 85 | # to be applied 86 | if isinstance(task, _function_wrapper): 87 | self.function = task.function 88 | if self.debug: 89 | print("Worker {0} replaced its task function: {1}." 90 | .format(self.rank, self.function)) 91 | continue 92 | 93 | # If not a special message, just run the known function on 94 | # the input and return it asynchronously. 95 | try: 96 | result = self.function(task) 97 | except: 98 | tb = traceback.format_exc() 99 | self.comm.isend(MPIPoolException(tb), dest=0, tag=status.tag) 100 | return 101 | if self.debug: 102 | print("Worker {0} sending answer {1} with tag {2}." 103 | .format(self.rank, result, status.tag)) 104 | self.comm.isend(result, dest=0, tag=status.tag) 105 | 106 | def map(self, function, tasks, callback=None): 107 | """ 108 | Like the built-in :func:`map` function, apply a function to all 109 | of the values in a list and return the list of results. 110 | 111 | Parameters 112 | ---------- 113 | function : callable 114 | The function to apply to each element in the list. 115 | 116 | tasks : 117 | A list of tasks -- each element is passed to the input 118 | function. 119 | 120 | callback : callable (optional) 121 | A callback function to call on each result. 122 | 123 | """ 124 | ntask = len(tasks) 125 | 126 | # If not the master just wait for instructions. 127 | if not self.is_master(): 128 | self.wait() 129 | return 130 | 131 | if function is not self.function: 132 | if self.debug: 133 | print("Master replacing pool function with {0}." 134 | .format(function)) 135 | 136 | self.function = function 137 | F = _function_wrapper(function) 138 | 139 | # Tell all the workers what function to use. 140 | requests = [] 141 | for i in range(self.size): 142 | r = self.comm.isend(F, dest=i + 1) 143 | requests.append(r) 144 | 145 | # Wait until all of the workers have responded. See: 146 | # https://gist.github.com/4176241 147 | MPI.Request.waitall(requests) 148 | 149 | if (not self.loadbalance) or (ntask <= self.size): 150 | # Do not perform load-balancing - the default load-balancing 151 | # scheme emcee uses. 152 | 153 | # Send all the tasks off and wait for them to be received. 154 | # Again, see the bug in the above gist. 155 | requests = [] 156 | for i, task in enumerate(tasks): 157 | worker = i % self.size + 1 158 | if self.debug: 159 | print("Sent task {0} to worker {1} with tag {2}." 160 | .format(task, worker, i)) 161 | r = self.comm.isend(task, dest=worker, tag=i) 162 | requests.append(r) 163 | 164 | MPI.Request.waitall(requests) 165 | 166 | # Now wait for the answers. 167 | results = [] 168 | for i in range(ntask): 169 | worker = i % self.size + 1 170 | if self.debug: 171 | print("Master waiting for worker {0} with tag {1}" 172 | .format(worker, i)) 173 | result = self.comm.recv(source=worker, tag=i) 174 | if isinstance(result, MPIPoolException): 175 | print("One of the MPIPool workers failed with the " 176 | "exception:") 177 | print(result.traceback) 178 | raise result 179 | 180 | if callback is not None: 181 | callback(result) 182 | 183 | results.append(result) 184 | 185 | return results 186 | 187 | else: 188 | # Perform load-balancing. The order of the results are likely to 189 | # be different from the previous case. 190 | for i, task in enumerate(tasks[0:self.size]): 191 | worker = i+1 192 | if self.debug: 193 | print("Sent task {0} to worker {1} with tag {2}." 194 | .format(task, worker, i)) 195 | # Send out the tasks asynchronously. 196 | self.comm.isend(task, dest=worker, tag=i) 197 | 198 | ntasks_dispatched = self.size 199 | results = [None]*ntask 200 | for itask in range(ntask): 201 | status = MPI.Status() 202 | # Receive input from workers. 203 | result = self.comm.recv(source=MPI.ANY_SOURCE, 204 | tag=MPI.ANY_TAG, status=status) 205 | worker = status.source 206 | i = status.tag 207 | 208 | if callback is not None: 209 | callback(result) 210 | 211 | results[i] = result 212 | if self.debug: 213 | print("Master received from worker {0} with tag {1}" 214 | .format(worker, i)) 215 | 216 | # Now send the next task to this idle worker (if there are any 217 | # left). 218 | if ntasks_dispatched < ntask: 219 | task = tasks[ntasks_dispatched] 220 | i = ntasks_dispatched 221 | if self.debug: 222 | print("Sent task {0} to worker {1} with tag {2}." 223 | .format(task, worker, i)) 224 | # Send out the tasks asynchronously. 225 | self.comm.isend(task, dest=worker, tag=i) 226 | ntasks_dispatched += 1 227 | 228 | return results 229 | 230 | def bcast(self, *args, **kwargs): 231 | """ 232 | Equivalent to mpi4py :func:`bcast` collective operation. 233 | """ 234 | return self.comm.bcast(*args, **kwargs) 235 | 236 | def close(self): 237 | """ 238 | Just send a message off to all the pool members which contains 239 | the special :class:`_close_pool_message` sentinel. 240 | 241 | """ 242 | if self.is_master(): 243 | for i in range(self.size): 244 | self.comm.isend(_close_pool_message(), dest=i + 1) 245 | 246 | def __enter__(self): 247 | return self 248 | 249 | def __exit__(self, *args): 250 | self.close() 251 | 252 | 253 | class _close_pool_message(object): 254 | def __repr__(self): 255 | return "" 256 | 257 | 258 | class _function_wrapper(object): 259 | def __init__(self, function): 260 | self.function = function 261 | 262 | 263 | def _error_function(task): 264 | raise RuntimeError("Pool was sent tasks before being told what " 265 | "function to apply.") 266 | 267 | 268 | class MPIPoolException(Exception): 269 | def __init__(self, tb): 270 | self.traceback = tb 271 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from __future__ import division, print_function 4 | 5 | import sys 6 | try: 7 | from setuptools import setup 8 | setup 9 | except ImportError: 10 | from distutils.core import setup 11 | setup 12 | 13 | if sys.version_info[0] < 3: 14 | import __builtin__ as builtins 15 | else: 16 | import builtins 17 | builtins.__MPIPOOL_SETUP__ = True 18 | import mpipool 19 | 20 | setup( 21 | name="mpipool", 22 | version=mpipool.__version__, 23 | author="Adrian Price-Whelan", 24 | author_email="adrn@astro.columbia.edu", 25 | packages=["mpipool"], 26 | url="https://github.com/adrn/mpipool/", 27 | license="MIT", 28 | description="MPI pool", 29 | package_data={"": ["LICENSE", "AUTHORS"]}, 30 | include_package_data=True, 31 | classifiers=[ 32 | "Intended Audience :: Developers", 33 | "Intended Audience :: Science/Research", 34 | "License :: OSI Approved :: MIT License", 35 | "Programming Language :: Python", 36 | ], 37 | ) 38 | --------------------------------------------------------------------------------