├── README.md ├── pyparallelize ├── __init__.py └── threaded.py ├── screenshot.png ├── setup.py └── tests ├── GIL_timing.py ├── tests.py └── tests_for.py /README.md: -------------------------------------------------------------------------------- 1 | # py-parallelize 2 | 3 | This package lets you parallelize computations. 4 | 5 | ![Screenshot](screenshot.png) 6 | 7 | It is a: 8 | - drop-in replacemenet for `map`, `apply`, and `for`. 9 | - wrapper around `multiprocessing`. 10 | - quick and relatively tidy way to parallelize computations 11 | - nice choice if your data does not fit into dask's data model, but you do not want to write enormous amounts of code 12 | using raw joblib/multitask/ipyparallel : everything is wrapped neatly here 13 | 14 | It is **NOT** a great idea to use this package if: 15 | - you will rely heavily on parallel computations, or need something more than plain `map` (for example, computation graphs). 16 | Please refer to [dask](https://dask.org/), as it provides mode functionality. 17 | - you are starting project from scratch, using Jupyter and have a spare hour or two. In this case please spend this time 18 | productively by getting used to verbose but fantastic [ipyparallel](https://ipyparallel.readthedocs.io/en/latest/) API. 19 | - you are operating primarily with numpy arrays / vectorized operations. [Numba](http://numba.pydata.org/) is a great 20 | fit for such tasks. 21 | 22 | # Examples 23 | 24 | ## Parallelizing map and list comprehension 25 | 26 | ```python 27 | def some_fun(x): 28 | return x ** 2 29 | x = [1, 2, 3] 30 | 31 | # Single-thread variants 32 | y = [some_fun(i) for i in x] 33 | y = map(some_fun(i) for i in x) 34 | 35 | # Parallelized variant 36 | y = parallelize(x, some_fun) 37 | ``` 38 | 39 | A bit more practical example. This snippet loads images from URLs, resizes them, and transforms into a feature vector 40 | using VGG19 pre-trained on Imagenet. 41 | 42 | ```python 43 | import tensorflow as tf 44 | from pyparallelize import parallelize 45 | import keras.applications.vgg19 as vgg 46 | from skimage.io import imread 47 | import cv2 48 | import numpy as np 49 | 50 | graph = tf.get_default_graph() 51 | inception_model = vgg.VGG19(weights='imagenet', include_top=False) 52 | 53 | def process_image(img_full_path): 54 | # We have to reattach the graph (because it was created in different thread). 55 | # Otherwise a exception will be raised 56 | with graph.as_default(): 57 | img = imread(img_full_path) 58 | target_size = (128, 128) 59 | img = cv2.resize(img, dsize=target_size, interpolation=cv2.INTER_CUBIC) 60 | img = np.array([img]).astype(np.float) 61 | img = vgg.preprocess_input(img) 62 | vector = np.array(inception_model.predict(img)).reshape(8192) 63 | return vector 64 | 65 | urls = [ 66 | "https://upload.wikimedia.org/wikipedia/commons/c/c4/Savannah_Cat_portrait.jpg", 67 | "https://upload.wikimedia.org/wikipedia/commons/4/40/BEN_Bengalian_kitten_%284492540155%29.jpg", 68 | "http://an-url-that-does-not-exist.com/", 69 | "https://upload.wikimedia.org/wikipedia/commons/7/7b/Cat_Janna.jpg" 70 | ] 71 | 72 | # We can set number of threads to a number greater than number of CPUs because it's most likely that image downloading 73 | # will be the bottleneck. 74 | x = parallelize(urls, process_image, thread_count=25) 75 | x 76 | ``` 77 | 78 | ## Parallel for 79 | It is a bit clumsy to use because it requires multithreading.Manager to create 80 | process-shared lists, but so far it's best way to implement `pfor`. 81 | 82 | ```python 83 | # Single-thread variant 84 | result = [] 85 | for x in range(10): 86 | result.append(x ** 2) 87 | print(result) 88 | 89 | # Parallelized variant 90 | from multithreading import Manager 91 | 92 | with Manager() as m: 93 | l = m.list() 94 | for x in pfor(range(10)): 95 | l.append(x ** 2) 96 | print(l) 97 | ``` 98 | 99 | # Features 100 | 101 | What's the difference between this and \? 102 | Well, unlike alternatives and homebrew solutions, this package: 103 | - Has progressbar! 104 | - Does not crash when stopped using Ctrl+C or "Stop" button in Jupyter 105 | - Works in Wandows 106 | - Continues working if stumbled upon occasional exception (i.e. you won't have to rerun whole process just because record #45673 out of 100M is broken) 107 | - Properly works with Series 108 | 109 | # What's under the hood? 110 | 111 | This package uses `multiprocessing` to launch new threads and processes. It means that there is no GIL-circumvention 112 | logic. Thus, all GIL-related quirks are present. For example, you might not get expected speed-up if your functions 113 | do not spend much time in I/O. 114 | 115 | # Installation 116 | 117 | Run 118 | 119 | `pip3 install git+https://github.com/rampeer/py-parallelize --user` 120 | 121 | or 122 | 123 | `sudo pip3 install git+https://github.com/rampeer/py-parallelize` 124 | -------------------------------------------------------------------------------- /pyparallelize/__init__.py: -------------------------------------------------------------------------------- 1 | name = "pyparallelize" 2 | __all__ = ['parallelize', 'StoppableThread', 'pfor'] 3 | 4 | from .threaded import StoppableThread, parallelize, pfor 5 | -------------------------------------------------------------------------------- /pyparallelize/threaded.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import os 3 | import sys 4 | from multiprocessing import Queue 5 | from queue import Empty 6 | from threading import Thread, Lock 7 | from time import time 8 | from typing import Callable, Iterable 9 | from warnings import warn 10 | 11 | import numpy as np 12 | import pandas as pd 13 | 14 | fork_available = hasattr(os, "fork") 15 | 16 | 17 | class StoppableThread(Thread): 18 | def __init__(self, fun: Callable, items: Iterable, 19 | callback: Callable = None, callback_each: int = 1, 20 | continue_on_exception: bool = False, exception_impute=None, exception_callback: Callable = None): 21 | super().__init__() 22 | self.callback = callback 23 | self.callback_each = callback_each 24 | self.fun = fun 25 | self.items = items 26 | self.running = False 27 | self.current_index = 0 28 | self.results = [] 29 | self.continue_on_exception = continue_on_exception 30 | self.exception_impute = exception_impute 31 | self.exception = None 32 | self.exception_callback = exception_callback 33 | 34 | def run(self): 35 | self.running = True 36 | self.results = [] 37 | for self.current_index, item in enumerate(self.items): 38 | if not self.running: 39 | break 40 | try: 41 | self.results.append(self.fun(item)) 42 | except Exception as e: 43 | if not self.continue_on_exception: 44 | self.exception = e 45 | self.exception_callback() 46 | break 47 | self.results.append(self.exception_impute) 48 | warn("%s processing element %s" % (repr(sys.exc_info()[1]), str(item))) 49 | if self.callback is not None: 50 | if self.current_index % self.callback_each == 0: 51 | self.callback() 52 | self.running = False 53 | 54 | 55 | def parallelize(items: Iterable, fun: Callable, thread_count: int = None, progressbar: bool = True, 56 | progressbar_tick: int = 1, continue_on_exception: bool = True, exception_impute=None, 57 | display_eta: bool = True): 58 | """ 59 | This function iterates (in multithreaded fashion) over `items` and calls `fun` for each item. 60 | :param items: items to process. 61 | :param fun: function to apply to each `items` element. 62 | :param progressbar: should progressbar be displayed? 63 | :param progressbar_tick: how often should we update progressbar? 64 | :param thread_count: how many threads should be allocated? If None, this parameter will be chosen automatically. 65 | :param continue_on_exception: if True, it will print warning if `fun` fails on some element, instead of halting 66 | :param exception_impute: which value should be put into output when `fun` throws an exception? 67 | :param display_eta: Should an estimation of remaining time be displayed? 68 | """ 69 | 70 | if thread_count is None: 71 | thread_count = multiprocessing.cpu_count() 72 | 73 | lock = Lock() 74 | 75 | def _progressbar_callback(): 76 | def report(): 77 | lock.acquire() 78 | total = int(sum([len(t.items) for t in threads])) 79 | current = int(sum([t.current_index + 1.0 if len(t.items) > 0 else 0 for t in threads])) 80 | 81 | if current > 0: 82 | eta = (time() - start_time) / current * (total - current) 83 | else: 84 | eta = 0 85 | message = "[{0: <40}] {1} / {2} ({3: .2%})".format( 86 | "#" * int(current / max(total, 1) * 40), 87 | current, 88 | total, 89 | current / max(total, 1)) 90 | if display_eta: 91 | message += " (ETA: {0}s) ".format(round(eta)) 92 | print(message, end="\r", file=sys.stderr, flush=True) 93 | lock.release() 94 | 95 | return report 96 | 97 | def _stop_all_threads(): 98 | for t in threads: 99 | t.running = False 100 | 101 | items_split = np.array_split(items, thread_count) 102 | if progressbar: 103 | callback = _progressbar_callback() 104 | else: 105 | callback = None 106 | threads = [StoppableThread(fun, x, 107 | callback, progressbar_tick, 108 | continue_on_exception, exception_impute, _stop_all_threads) for x in items_split] 109 | start_time = time() 110 | for t in threads: 111 | t.start() 112 | try: 113 | for t in threads: 114 | t.join() 115 | except KeyboardInterrupt: 116 | print("Interrupting threads...") 117 | _stop_all_threads() 118 | # We have to wait for all threads to process their current elements 119 | for t in threads: 120 | t.join() 121 | if callback is not None: 122 | callback() 123 | print("\n") 124 | 125 | # Any exceptions? 126 | for t in threads: 127 | if t.exception is not None: 128 | raise t.exception 129 | 130 | collected_results = [item for thread in threads for item in thread.results] 131 | 132 | if isinstance(items, pd.Series): 133 | return pd.Series(list(collected_results), index=items.index) 134 | else: 135 | return collected_results 136 | 137 | 138 | def pfor(items: Iterable, process_count: int = None, progressbar: bool = True, 139 | progressbar_tick: int = 1, i_know_what_im_doing: bool = False) -> Iterable: 140 | """ 141 | This function is supposed to be used in conjunction with `for`. It effectively executes `for` body in multithreaded 142 | fashion: 143 | ``` 144 | for x in pfor(range(10)): 145 | # Something useful. It will be executed in parallel. 146 | pass 147 | ``` 148 | :param items: Iterate over what? 149 | :param progressbar: should progressbar be displayed? 150 | :param progressbar_tick: how often should we update progressbar? 151 | :param process_count: how many processes should be allocated? If None, this parameter will be chosen automatically. 152 | :param i_know_what_im_doing: use of this function required `for` body to use `multithreading.Manager`'s lists and 153 | dicts because they are shared across processes: 154 | ``` 155 | from multithreading import Manager 156 | with Manager() as m: 157 | l = m.list() 158 | ``` 159 | tick this flag if you already did that. Otherwise, a warning will be displayed. 160 | """ 161 | if not fork_available: 162 | raise Exception("No os.fork function available. Probably, you are using Windows, that does not support it.\n" 163 | "Please use `parallelize` instead (or switch to other OS ;)") 164 | if process_count is None: 165 | process_count = multiprocessing.cpu_count() 166 | if not i_know_what_im_doing: 167 | warn("Please note that processes do not share memory.\n" 168 | "Therefore, you have to use multiprocessing lists and dicts\n" 169 | "as they are shared across processes:\n\n" 170 | "from multiprocessing import Manager\n" 171 | "with Manager() as m:\n" 172 | "\tl = m.list()\n" 173 | "\tfor x in pfor(range(10)):" 174 | "\t\tl.append(x ** 2)" 175 | "\tprint(l)\n\n") 176 | 177 | lock = Lock() 178 | 179 | q = Queue() 180 | item_count = 0 181 | for i in items: 182 | item_count += 1 183 | q.put(i) 184 | 185 | def report(): 186 | lock.acquire() 187 | current = item_count - q.qsize() 188 | message = "[{0: <40}] {1} / {2} ({3: .2%})".format( 189 | "#" * int(current / item_count * 40), 190 | current, 191 | item_count, 192 | current / item_count) 193 | print(message, end="\r", file=sys.stderr, flush=True) 194 | lock.release() 195 | 196 | pids = [] 197 | 198 | def _stop_all_processes(): 199 | for pid in pids: 200 | os.kill(pid, 9) 201 | 202 | try: 203 | report() 204 | for _ in range(process_count): 205 | pid = os.fork() 206 | if pid == 0: 207 | try: 208 | ticks = 0 209 | while True: 210 | ticks += 1 211 | item = q.get(block=False) 212 | yield item 213 | if progressbar: 214 | if ticks % progressbar_tick == 0: 215 | report() 216 | except GeneratorExit: 217 | lock.acquire() 218 | print( 219 | "An exception occured when processing element < %s > " % (str(item))) 220 | print("Unfortunately, exception cannot be printed because ") 221 | print("processes die on stumbling upon exception, and there is no way to recover it. ") 222 | lock.release() 223 | break 224 | except Empty: 225 | pass 226 | finally: 227 | os._exit(0) 228 | else: 229 | pids.append(pid) 230 | 231 | for pid in pids: 232 | os.waitpid(pid, 0) 233 | report() 234 | except KeyboardInterrupt: 235 | print("Interrupting threads...") 236 | _stop_all_processes() 237 | for pid in pids: 238 | os.waitpid(pid, 0) 239 | q = Queue() 240 | finally: 241 | print("\n") 242 | if not q.empty(): 243 | print("Element queue is not empty. Apparently, all processes died. Probably, something is wrong " 244 | "with your data or code.") 245 | -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rampeer/py-parallelize/130387a968fdd1b7f85f2ba83823878abaad5c05/screenshot.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name='pyparallelize', 5 | version='0.1', 6 | packages=setuptools.find_packages(), 7 | license='AS-IS', 8 | long_description=open('README.md').read(), requires=['numpy'] 9 | ) 10 | -------------------------------------------------------------------------------- /tests/GIL_timing.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from time import sleep, time 3 | 4 | import numpy as np 5 | 6 | from pyparallelize import parallelize 7 | 8 | 9 | class MyTestCase(unittest.TestCase): 10 | @staticmethod 11 | def allocation_fun(x): 12 | return np.random.normal(x, 1.0, (1000, 1000)).sum() 13 | 14 | @staticmethod 15 | def pure_io_fun(x): 16 | return sleep(0.1) 17 | 18 | def mixed_fun(self, x): 19 | x = np.random.normal(0.0, 1e-5, (1000, 1000)) 20 | for _ in range(10): 21 | x = np.multiply(x, x) 22 | 23 | def common_struct_fun(self, x): 24 | return np.add(self.a, self.b).sum() 25 | 26 | def time_that(self, N, fun): 27 | x = [0] * N 28 | 29 | # Parallelized execution 30 | start = time() 31 | parallelize(x, fun, progressbar=False) 32 | duration_multithreaded = time() - start 33 | print(f"Multi-threaded execuition: {duration_multithreaded} seconds") 34 | 35 | # Single-thread execution 36 | start = time() 37 | list([fun(i) for i in x]) 38 | duration_single = time() - start 39 | print(f"Single-threaded execuition: {duration_single} seconds") 40 | 41 | def test_funcs(self): 42 | self.a = np.random.normal(0.0, 1.0, (30, 30)) 43 | self.b = np.random.normal(0.0, 1.0, (30, 30)) 44 | 45 | print("\nFunction that allocates memory and does some calculations") 46 | self.time_that(100, self.mixed_fun) 47 | 48 | print("\nFunction that use common struct") 49 | self.time_that(100000, self.common_struct_fun) 50 | 51 | print("\nFunction that spends time in I/O") 52 | self.time_that(50, self.pure_io_fun) 53 | 54 | print("\nFunction that just allocates memory") 55 | self.time_that(100, self.allocation_fun) 56 | 57 | 58 | if __name__ == '__main__': 59 | unittest.main() 60 | -------------------------------------------------------------------------------- /tests/tests.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import unittest 3 | from time import sleep, time 4 | 5 | from pyparallelize import parallelize 6 | 7 | 8 | class MyTestCase(unittest.TestCase): 9 | def test_something(self): 10 | x = [1, 2, 3, 4] 11 | y = parallelize(x, lambda x: x * x) 12 | self.assertEqual(y, [1, 4, 9, 16]) 13 | 14 | def test_exceptions(self): 15 | x = [1, 2, 0, 5] 16 | 17 | def fun(x): 18 | if x == 0: 19 | raise ZeroDivisionError 20 | else: 21 | return 10 / x 22 | 23 | y = parallelize(x, fun) 24 | self.assertEqual(y, [10, 5, None, 2]) 25 | 26 | try: 27 | y = parallelize(x, fun, continue_on_exception=False) 28 | self.assertTrue(False) 29 | except ZeroDivisionError: 30 | pass 31 | 32 | obj = object() 33 | y = parallelize(x, fun, exception_impute=obj) 34 | self.assertEqual(y[2], obj) 35 | 36 | def test_empty(self): 37 | e = parallelize([], lambda x: x) 38 | self.assertEqual(len(e), 0) 39 | 40 | def test_order(self): 41 | def fun(x): 42 | sleep(x) 43 | return x 44 | x = [0.5, 0.3, 0.2, 0.6, 0.4, 0.1, 0.5, 0.1, 0.2, 0.5, 0.6] 45 | y = parallelize(x, fun) 46 | self.assertEqual(x, y) 47 | 48 | def test_series(self): 49 | import pandas as pd 50 | x = pd.Series([1, 2, 3], index=["a", "b", "c"]) 51 | y = parallelize(x, lambda p: p * 2) 52 | y_true = x.apply(lambda p: p * 2) 53 | self.assertTrue((y == y_true).all()) 54 | self.assertTrue((y.index == y_true.index).all()) 55 | 56 | def test_speed_increase(self): 57 | x = [0.1, 0.1, 0.1, 0.1] 58 | 59 | # Single-thread execution 60 | start = time() 61 | list([sleep(i) for i in x]) 62 | duration_single = time() - start 63 | 64 | # Parallelized execution 65 | start = time() 66 | def fun(x): 67 | sleep(x) 68 | return x 69 | y = parallelize(x, fun) 70 | duration_multithreaded = time() - start 71 | 72 | if multiprocessing.cpu_count() > 1: 73 | # Strictly speaking, this might fail occasionally when 74 | # some heavy process starts eating CPU right after single-threaded variant is executed. 75 | self.assertLess(duration_multithreaded, duration_single) 76 | 77 | 78 | if __name__ == '__main__': 79 | unittest.main() 80 | -------------------------------------------------------------------------------- /tests/tests_for.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from multiprocessing import Manager 3 | from pyparallelize.threaded import pfor 4 | 5 | 6 | class MyTestCase(unittest.TestCase): 7 | def test_pfor(self): 8 | with Manager() as m: 9 | l = m.list() 10 | for x in pfor(range(10), i_know_what_im_doing=True): 11 | l.append(x ** 2) 12 | print(l) 13 | self.assertEqual(set(l), set([x**2 for x in range(10)])) 14 | 15 | def test_resilience(self): 16 | with Manager() as m: 17 | l = m.list() 18 | for x in pfor(range(10), i_know_what_im_doing=True): 19 | # An 1/0 exception will be thrown on first element. 20 | l.append(1 / x) 21 | print(l) 22 | self.assertEqual(set(l), set([1 / x for x in range(1, 10)])) 23 | 24 | 25 | if __name__ == '__main__': 26 | unittest.main() 27 | --------------------------------------------------------------------------------