├── .gitignore ├── README.md ├── asyncpool └── __init__.py ├── example.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | .idea/ 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AsyncPool 2 | 3 | Asyncio coroutine worker pool. No more juggling bounded semaphores and annoying timeouts, and allows you to run through millions of pieces of data efficiently. 4 | 5 | Adapted from the awesome worker pool found at https://gist.github.com/thehesiod/7081ab165b9a0d4de2e07d321cc2391d 6 | 7 | # Installation 8 | 9 | ``` 10 | pip install asyncpool 11 | ``` 12 | 13 | # Example Usage 14 | 15 | ```python 16 | import asyncpool 17 | import logging 18 | import asyncio 19 | 20 | async def example_coro(initial_number, result_queue): 21 | print("Processing Value! -> {} * 2 = {}".format(initial_number, initial_number * 2)) 22 | await asyncio.sleep(1) 23 | await result_queue.put(initial_number * 2) 24 | 25 | async def result_reader(queue): 26 | while True: 27 | value = await queue.get() 28 | if value is None: 29 | break 30 | print("Got value! -> {}".format(value)) 31 | 32 | async def run(): 33 | result_queue = asyncio.Queue() 34 | 35 | reader_future = asyncio.ensure_future(result_reader(result_queue), loop=loop) 36 | 37 | # Start a worker pool with 10 coroutines, invokes `example_coro` and waits for it to complete or 5 minutes to pass. 38 | async with asyncpool.AsyncPool(loop, num_workers=10, name="ExamplePool", 39 | logger=logging.getLogger("ExamplePool"), 40 | worker_co=example_coro, max_task_time=300, 41 | log_every_n=10) as pool: 42 | for i in range(50): 43 | await pool.push(i, result_queue) 44 | 45 | await result_queue.put(None) 46 | await reader_future 47 | 48 | loop = asyncio.get_event_loop() 49 | 50 | loop.run_until_complete(run()) 51 | 52 | ``` 53 | -------------------------------------------------------------------------------- /asyncpool/__init__.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from datetime import datetime, timezone 3 | 4 | def utc_now(): 5 | # utcnow returns a naive datetime, so we have to set the timezone manually 6 | return datetime.utcnow().replace(tzinfo=timezone.utc) 7 | 8 | class Terminator: 9 | pass 10 | 11 | class AsyncPool: 12 | def __init__(self, loop, num_workers: int, name: str, logger, worker_co, load_factor: int = 1, 13 | job_accept_duration: int = None, max_task_time: int = None, return_futures: bool = False, 14 | raise_on_join: bool = False, log_every_n: int = None, expected_total=None): 15 | """ 16 | This class will create `num_workers` asyncio tasks to work against a queue of 17 | `num_workers * load_factor` items of back-pressure (IOW we will block after such 18 | number of items of work is in the queue). `worker_co` will be called 19 | against each item retrieved from the queue. If any exceptions are raised out of 20 | worker_co, self.exceptions will be set to True. 21 | @param loop: asyncio loop to use 22 | @param num_workers: number of async tasks which will pull from the internal queue 23 | @param name: name of the worker pool (used for logging) 24 | @param logger: logger to use 25 | @param worker_co: async coroutine to call when an item is retrieved from the queue 26 | @param load_factor: multiplier used for number of items in queue 27 | @param job_accept_duration: maximum number of seconds from first push to last push before a TimeoutError will be thrown. 28 | Set to None for no limit. Note this does not get reset on aenter/aexit. 29 | @param max_task_time: maximum time allowed for each task before a CancelledError is raised in the task. 30 | Set to None for no limit. 31 | @param return_futures: set to reture to return a future for each `push` (imposes CPU overhead) 32 | @param raise_on_join: raise on join if any exceptions have occurred, default is False 33 | @param log_every_n: (optional) set to number of `push`s each time a log statement should be printed (default does not print every-n pushes) 34 | @param expected_total: (optional) expected total number of jobs (used for `log_event_n` logging) 35 | @return: instance of AsyncWorkerPool 36 | """ 37 | loop = loop or asyncio.get_event_loop() 38 | self._loop = loop 39 | self._num_workers = num_workers 40 | self._logger = logger 41 | self._queue = asyncio.Queue(num_workers * load_factor) 42 | self._workers = None 43 | self._exceptions = False 44 | self._job_accept_duration = job_accept_duration 45 | self._first_push_dt = None 46 | self._max_task_time = max_task_time 47 | self._return_futures = return_futures 48 | self._raise_on_join = raise_on_join 49 | self._name = name 50 | self._worker_co = worker_co 51 | self._total_queued = 0 52 | self._log_every_n = log_every_n 53 | self._expected_total = expected_total 54 | 55 | async def _worker_loop(self): 56 | while True: 57 | got_obj = False 58 | future = None 59 | 60 | try: 61 | item = await self._queue.get() 62 | got_obj = True 63 | 64 | if item.__class__ is Terminator: 65 | break 66 | 67 | future, args, kwargs = item 68 | # the wait_for will cancel the task (task sees CancelledError) and raises a TimeoutError from here 69 | # so be wary of catching TimeoutErrors in this loop 70 | result = await asyncio.wait_for(self._worker_co(*args, **kwargs), self._max_task_time, loop=self._loop) 71 | 72 | if future: 73 | future.set_result(result) 74 | except (KeyboardInterrupt, MemoryError, SystemExit) as e: 75 | if future: 76 | future.set_exception(e) 77 | self._exceptions = True 78 | raise 79 | except BaseException as e: 80 | self._exceptions = True 81 | 82 | if future: 83 | # don't log the failure when the client is receiving the future 84 | future.set_exception(e) 85 | else: 86 | self._logger.exception('Worker call failed') 87 | finally: 88 | if got_obj: 89 | self._queue.task_done() 90 | 91 | @property 92 | def exceptions(self): 93 | return self._exceptions 94 | 95 | @property 96 | def total_queued(self): 97 | return self._total_queued 98 | 99 | async def __aenter__(self): 100 | self.start() 101 | return self 102 | 103 | async def __aexit__(self, exc_type, exc_val, exc_tb): 104 | await self.join() 105 | 106 | async def push(self, *args, **kwargs) -> asyncio.Future: 107 | """ Method to push work to `worker_co` passed to `__init__`. 108 | :param args: position arguments to be passed to `worker_co` 109 | :param kwargs: keyword arguments to be passed to `worker_co` 110 | :return: future of result """ 111 | if self._first_push_dt is None: 112 | self._first_push_dt = utc_now() 113 | 114 | if self._job_accept_duration is not None and (utc_now() - self._first_push_dt) > self._job_accept_duration: 115 | raise TimeoutError("Maximum lifetime of {} seconds of AsyncWorkerPool: {} exceeded".format(self._job_accept_duration, self._name)) 116 | 117 | future = asyncio.futures.Future(loop=self._loop) if self._return_futures else None 118 | await self._queue.put((future, args, kwargs)) 119 | self._total_queued += 1 120 | 121 | if self._log_every_n is not None and (self._total_queued % self._log_every_n) == 0: 122 | self._logger.info("pushed {}/{} items to {} AsyncWorkerPool".format(self._total_queued, self._expected_total, self._name)) 123 | 124 | return future 125 | 126 | def start(self): 127 | """ Will start up worker pool and reset exception state """ 128 | assert self._workers is None 129 | self._exceptions = False 130 | 131 | self._workers = [asyncio.ensure_future(self._worker_loop(), loop=self._loop) for _ in range(self._num_workers)] 132 | 133 | async def join(self): 134 | # no-op if workers aren't running 135 | if not self._workers: 136 | return 137 | 138 | self._logger.debug('Joining {}'.format(self._name)) 139 | # The Terminators will kick each worker from being blocked against the _queue.get() and allow 140 | # each one to exit 141 | for _ in range(self._num_workers): 142 | await self._queue.put(Terminator()) 143 | 144 | try: 145 | await asyncio.gather(*self._workers, loop=self._loop) 146 | self._workers = None 147 | except: 148 | self._logger.exception('Exception joining {}'.format(self._name)) 149 | raise 150 | finally: 151 | self._logger.debug('Completed {}'.format(self._name)) 152 | 153 | if self._exceptions and self._raise_on_join: 154 | raise Exception("Exception occurred in pool {}".format(self._name)) -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | import asyncpool 2 | import logging 3 | import asyncio 4 | 5 | async def example_coro(initial_number, result_queue): 6 | print("Processing Value! -> {} * 2 = {}".format(initial_number, initial_number * 2)) 7 | await asyncio.sleep(1) 8 | await result_queue.put(initial_number * 2) 9 | 10 | async def result_reader(queue): 11 | while True: 12 | value = await queue.get() 13 | if value is None: 14 | break 15 | print("Got value! -> {}".format(value)) 16 | 17 | async def run(): 18 | 19 | result_queue = asyncio.Queue() 20 | 21 | reader_future = asyncio.ensure_future(result_reader(result_queue), loop=loop) 22 | 23 | # Start a worker pool with 10 coroutines, invokes `example_coro` and waits for it to complete or 5 minutes to pass. 24 | async with asyncpool.AsyncPool(loop, num_workers=10, name="ExamplePool", 25 | logger=logging.getLogger("ExamplePool"), 26 | worker_co=example_coro, max_task_time=300, 27 | log_every_n=10) as pool: 28 | for i in range(50): 29 | await pool.push(i, result_queue) 30 | 31 | await result_queue.put(None) 32 | await reader_future 33 | 34 | loop = asyncio.get_event_loop() 35 | 36 | loop.run_until_complete(run()) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools.command.test import test as TestCommand 3 | import os 4 | import sys 5 | 6 | py_version = sys.version_info[:2] 7 | 8 | if py_version < (3, 3): 9 | raise Exception("AsyncPool requires Python >= 3.3.") 10 | 11 | long_description = """ 12 | AsyncPool is a asyncio-based coroutine worker pool, intended to process through large amounts of jobs 13 | efficiently and with explicit timeouts. 14 | """ 15 | 16 | setup( 17 | name='asyncpool', 18 | version='1.0', 19 | url='http://github.com/calidog/asyncpool/', 20 | author='Ryan Sears', 21 | author_email='ryan@calidog.io', 22 | description='Async coroutine worker pool', 23 | long_description=long_description, 24 | packages=['asyncpool'], 25 | include_package_data=True, 26 | license = "MIT", 27 | classifiers = [ 28 | "License :: OSI Approved :: MIT License", 29 | "Topic :: Internet :: WWW/HTTP", 30 | "Topic :: Software Development :: Testing", 31 | "Environment :: Console", 32 | "Operating System :: MacOS :: MacOS X", 33 | "Operating System :: POSIX", 34 | 'Programming Language :: Python :: 3', 35 | 'Programming Language :: Python :: 3.4', 36 | 'Programming Language :: Python :: 3.5', 37 | ], 38 | ) --------------------------------------------------------------------------------