├── .gitignore ├── LICENSE ├── README.md ├── collapsing_thread_pool_executor ├── __init__.py ├── collapsing_thread_pool_executor.py └── collapsing_thread_pool_executor_test.py ├── requirements.txt ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | .pytest* 2 | .vscode* 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Edward Beech 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CollapsingThreadPoolExecutor 2 | 3 | The CollapsingThreadPoolExecutor is inspired by and compatible with the ThreadPoolExecutor from the 4 | "futures" module, it operates differently in that worker threads are handled with a stack which results in the same worker or workers doing all the work (and idle workers being destroyed). 5 | 6 | ## How to install 7 | 8 | $ pip install collapsing-thread-pool-executor 9 | 10 | ## How to develop 11 | 12 | **Prerequisites** 13 | 14 | * python3 w/ pip 15 | * python2 w/ pip 16 | * virtualenvwrapper 17 | * entr 18 | 19 | **Set up the environments** 20 | 21 | $ mkvirtualenv -p `which python2.7` collapsing-thread-pool-executor-py2 22 | $ pip install . 23 | $ pip install -r requirements.txt 24 | 25 | $ mkvirtualenv -p `which python3` collapsing-thread-pool-executor-py3 26 | $ pip install . 27 | $ pip install -r requirements.txt 28 | 29 | **Watch the tests** 30 | 31 | # watch python2 tests in one window 32 | $ workon collapsing-thread-pool-executor-py2 33 | $ find ./ -name '*.py' | entr -c py.test -v --log-level=DEBUG collapsing_thread_pool_executor 34 | 35 | # watch python3 tests in one window 36 | $ workon collapsing-thread-pool-executor-py3 37 | $ find ./ -name '*.py' | entr -c py.test -v --log-level=DEBUG collapsing_thread_pool_executor 38 | 39 | ## Examples 40 | 41 | The example below will execute `some_task()` 100 times; as `some_task()` should take a second to execute and as we've allocated 10 workers, the whole thing should take about 10 seconds. 42 | 43 | import time 44 | 45 | from collapsing_thread_pool_executor import CollapsingThreadPoolExecutor 46 | 47 | def some_task(): 48 | time.sleep(1) 49 | 50 | # all arguments are optional 51 | pool = CollapsingThreadPoolExecutor( 52 | workers=10, 53 | thread_name_prefix='SomePool', 54 | permitted_thread_age_in_seconds=60, 55 | ) 56 | 57 | for i in range(0, 100): 58 | pool.submit(some_task) 59 | -------------------------------------------------------------------------------- /collapsing_thread_pool_executor/__init__.py: -------------------------------------------------------------------------------- 1 | try: # Python2 2 | from collapsing_thread_pool_executor import CollapsingThreadPoolExecutor 3 | except BaseException: # Python3 4 | from collapsing_thread_pool_executor.collapsing_thread_pool_executor import CollapsingThreadPoolExecutor 5 | 6 | # stop editors like PyCharm from optimizing this away (as it's not used in this module) 7 | _ = CollapsingThreadPoolExecutor 8 | -------------------------------------------------------------------------------- /collapsing_thread_pool_executor/collapsing_thread_pool_executor.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import threading 3 | import weakref 4 | from concurrent.futures import _base # noqa 5 | from logging import getLogger 6 | from uuid import uuid4 7 | 8 | try: # Python3 9 | import queue 10 | except Exception: # Python2 11 | import Queue as queue # noqa 12 | 13 | try: # Python2 14 | from concurrent.futures.thread import cpu_count # noqa 15 | except BaseException: # Python3 16 | from multiprocessing import cpu_count 17 | 18 | # for the clean shutdown piece 19 | _workers = weakref.WeakSet() 20 | _shutdown = False 21 | _thread_pools = weakref.WeakSet() 22 | 23 | 24 | # for the clean shutdown piece 25 | def _python_exit(): 26 | global _shutdown 27 | 28 | _shutdown = True 29 | 30 | for w in _workers: 31 | w.work_item_manager.work_item = None 32 | w.work_item_available_event.set() 33 | 34 | for tp in _thread_pools: 35 | tp.shutdown() 36 | 37 | 38 | atexit.register(_python_exit) # for the clean shutdown piece 39 | 40 | 41 | class _WorkItem(object): 42 | def __init__(self, future, fn, args, kwargs): 43 | self.future = future 44 | self.fn = fn 45 | self.args = args 46 | self.kwargs = kwargs 47 | 48 | def run(self): 49 | if not self.future.set_running_or_notify_cancel(): 50 | return 51 | 52 | try: 53 | result = self.fn(*self.args, **self.kwargs) 54 | except BaseException as exc: 55 | self.future.set_exception(exc) 56 | else: 57 | self.future.set_result(result) 58 | 59 | 60 | class _WorkItemManager(object): 61 | def __init__(self): 62 | self._lock = threading.Lock() 63 | self._work_item = None 64 | 65 | @property 66 | def work_item(self): 67 | with self._lock: 68 | return self._work_item 69 | 70 | @work_item.setter 71 | def work_item(self, work_item): 72 | with self._lock: 73 | self._work_item = work_item 74 | 75 | 76 | class _Worker(threading.Thread): 77 | def __init__(self, executor_reference, work_item_manager, work_item_available_event, worker_available_callback, 78 | timeout, name): 79 | super(_Worker, self).__init__( 80 | name=name 81 | ) 82 | 83 | self._executor_reference = executor_reference 84 | self._work_item_manager = work_item_manager 85 | self._work_item_available_event = work_item_available_event 86 | self._worker_available_callback = worker_available_callback 87 | self._timeout = timeout 88 | 89 | @property 90 | def work_item_manager(self): 91 | return self._work_item_manager 92 | 93 | @property 94 | def work_item_available_event(self): 95 | return self._work_item_available_event 96 | 97 | def run(self): 98 | try: 99 | while True: 100 | # declare this thread as available 101 | self._worker_available_callback(self) 102 | 103 | # wait until task or shutdown on timeout 104 | work_available = self._work_item_available_event.wait(timeout=self._timeout) 105 | 106 | self._work_item_available_event.clear() 107 | 108 | if work_available: 109 | work_item = self._work_item_manager.work_item 110 | if work_item is not None: 111 | self._work_item_manager.work_item = None 112 | else: # shutdown this thread if there no was no work given 113 | return 114 | 115 | if work_item is not None: # do the work 116 | work_item.run() # noqa 117 | del work_item # Delete references to object. See issue16284 118 | continue 119 | 120 | # this path only executes if the work_item was None (pool shutdown commanded) 121 | executor = self._executor_reference() 122 | # Exit if: 123 | # - The interpreter is shutting down OR 124 | # - The executor that owns the worker has been collected OR 125 | # - The executor that owns the worker has been shutdown. 126 | if _shutdown or executor is None or executor._shutdown: 127 | return 128 | 129 | del executor 130 | except BaseException: 131 | _base.LOGGER.critical('Exception in worker', exc_info=True) 132 | 133 | 134 | # based on concurrent.futures.thread.ThreadPoolExecutor 135 | class CollapsingThreadPoolExecutor(_base.Executor): 136 | def __init__(self, max_workers=None, thread_name_prefix=None, 137 | permitted_thread_age_in_seconds=30, logger=None): 138 | if max_workers is None: 139 | # Use this number because ThreadPoolExecutor is often 140 | # used to overlap I/O instead of CPU work. 141 | max_workers = (cpu_count() or 1) * 5 142 | if max_workers <= 0: 143 | raise ValueError("max_workers must be greater than 0") 144 | 145 | self._max_workers = max_workers 146 | self._thread_name_prefix = thread_name_prefix or '{0}'.format(hex(id(self))[2:]) 147 | self._permitted_thread_age_in_seconds = permitted_thread_age_in_seconds 148 | 149 | self._logger = logger if logger is not None else getLogger(self.__class__.__name__) 150 | 151 | self._work_queue = queue.Queue() 152 | self._workers = set() 153 | self._workers_lock = threading.Lock() 154 | self._available_workers_queue = queue.LifoQueue() 155 | 156 | self._shutdown = False 157 | self._shutdown_lock = threading.Lock() 158 | 159 | self._cleanup_thread_shutdown_queue = queue.Queue() 160 | self._cleanup_threads_lock = threading.Lock() 161 | self._cleanup_thread = threading.Thread( 162 | target=self._cleanup_threads 163 | ) 164 | self._cleanup_thread.daemon = True 165 | self._cleanup_thread.start() 166 | 167 | self._work_queue_thread = threading.Thread( 168 | target=self._handle_work_queue, 169 | ) 170 | self._work_queue_thread.daemon = True 171 | self._work_queue_thread.start() 172 | self._work_queue_finished = False 173 | 174 | _thread_pools.add(self) 175 | 176 | def _worker_available(self, worker): 177 | if self._work_queue_finished: 178 | # wake the worker to exit right away 179 | worker.work_item_available_event.set() 180 | else: 181 | self._available_workers_queue.put(worker) 182 | 183 | def _cleanup_threads(self): 184 | last_num_workers = -1 185 | while True: 186 | with self._shutdown_lock: 187 | if self._shutdown: 188 | return 189 | 190 | dead_workers = [] 191 | with self._workers_lock: 192 | for w in self._workers: 193 | if w.ident and not w.is_alive(): 194 | dead_workers += [w] 195 | 196 | for w in dead_workers: 197 | self._workers.remove(w) 198 | self._logger.debug('removed {0}'.format(w)) 199 | 200 | num_workers = len(self._workers) 201 | 202 | for w in dead_workers: 203 | self._logger.debug('joining {0}'.format(w)) 204 | w.join() 205 | self._logger.debug('joined {0}'.format(w)) 206 | 207 | if num_workers != last_num_workers: 208 | last_num_workers = num_workers 209 | self._logger.debug('{0} workers running'.format( 210 | num_workers 211 | )) 212 | 213 | # makes for an interruptable sleep 214 | try: 215 | self._cleanup_thread_shutdown_queue.get( 216 | timeout=self._permitted_thread_age_in_seconds) 217 | return 218 | except queue.Empty: 219 | pass 220 | 221 | def _handle_work_queue(self): 222 | while True: 223 | with self._shutdown_lock: 224 | if self._shutdown: 225 | return 226 | 227 | # wait for some work 228 | try: 229 | work_item = self._work_queue.get(timeout=5) 230 | if work_item is None: # shutdown commanded 231 | # wake all the workers so they exit quickly 232 | self._work_queue_finished = True 233 | try: 234 | while True: 235 | w = self._available_workers_queue.get_nowait() 236 | if w: 237 | w.work_item_available_event.set() 238 | except queue.Empty: 239 | pass 240 | return 241 | except queue.Empty: 242 | continue 243 | 244 | # wait for a worker 245 | wait = False 246 | worker = None 247 | while worker is None: 248 | try: 249 | w = self._available_workers_queue.get_nowait() if not wait else self._available_workers_queue.get( 250 | timeout=5 251 | ) 252 | except queue.Empty: 253 | wait = self._adjust_thread_count() 254 | continue 255 | 256 | if w is None: # shutdown commanded 257 | return 258 | elif w.ident and not w.is_alive(): # dead worker 259 | continue 260 | 261 | worker = w 262 | 263 | break 264 | 265 | # give the work_item to the worker 266 | worker.work_item_manager.work_item = work_item 267 | 268 | # notify it of work to be done 269 | worker.work_item_available_event.set() 270 | 271 | def _adjust_thread_count(self): 272 | # When the executor gets lost, the weakref callback will wake up 273 | # the worker threads. 274 | def weakref_cb(_, q=self._work_queue): 275 | q.put(None) 276 | 277 | with self._workers_lock: 278 | num_workers = len(self._workers) 279 | 280 | if num_workers == self._max_workers: 281 | return False 282 | 283 | thread_name = '{0}_{1}'.format(self._thread_name_prefix, uuid4()) 284 | 285 | work_item_manager = _WorkItemManager() 286 | work_item_available_event = threading.Event() 287 | work_item_available_event.clear() 288 | 289 | w = _Worker( 290 | weakref.ref(self, weakref_cb), 291 | work_item_manager, 292 | work_item_available_event, 293 | self._worker_available, 294 | self._permitted_thread_age_in_seconds, 295 | name=thread_name, 296 | ) 297 | w.daemon = True 298 | 299 | w.start() 300 | 301 | self._logger.debug('added {0}'.format(w)) 302 | 303 | with self._workers_lock: 304 | self._workers.add(w) 305 | 306 | _workers.add(w) 307 | 308 | def submit(self, fn, *args, **kwargs): 309 | with self._shutdown_lock: 310 | if self._shutdown: 311 | raise RuntimeError('cannot schedule new futures after shutdown') 312 | 313 | f = _base.Future() 314 | w = _WorkItem(f, fn, args, kwargs) 315 | 316 | self._work_queue.put(w) 317 | 318 | return f 319 | 320 | submit.__doc__ = _base.Executor.submit.__doc__ 321 | 322 | def shutdown(self, wait=True): 323 | self._logger.debug('setting shutdown flag') 324 | with self._shutdown_lock: 325 | self._shutdown = True 326 | 327 | self._logger.debug('shutting down work queue') 328 | self._work_queue.put(None) 329 | 330 | self._logger.debug('shutting down work queue thread') 331 | self._available_workers_queue.put(None) 332 | 333 | self._logger.debug('shutting down cleanup thread') 334 | self._cleanup_thread_shutdown_queue.put(1) 335 | 336 | self._logger.debug('joining cleanup thread') 337 | self._cleanup_thread.join() 338 | self._logger.debug('joined cleanup thread') 339 | 340 | if wait: 341 | with self._workers_lock: 342 | for w in self._workers: 343 | self._logger.debug('joining {0}'.format(w)) 344 | w.join() 345 | self._logger.debug('joined {0}'.format(w)) 346 | 347 | shutdown.__doc__ = _base.Executor.shutdown.__doc__ 348 | -------------------------------------------------------------------------------- /collapsing_thread_pool_executor/collapsing_thread_pool_executor_test.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import time 3 | import unittest 4 | from concurrent.futures import as_completed 5 | 6 | from hamcrest import assert_that, less_than_or_equal_to, greater_than_or_equal_to 7 | 8 | from collapsing_thread_pool_executor import CollapsingThreadPoolExecutor 9 | 10 | 11 | def short_sleep(): 12 | time.sleep(0.1) 13 | 14 | 15 | def long_sleep(): 16 | time.sleep(1) 17 | 18 | 19 | class CollapsingThreadPoolExecutorTest(unittest.TestCase): 20 | def setUp(self): 21 | self._subject = CollapsingThreadPoolExecutor( 22 | max_workers=10, 23 | thread_name_prefix='Test', 24 | permitted_thread_age_in_seconds=1 25 | ) 26 | 27 | def test_some_fast_work(self): 28 | before = datetime.datetime.now() 29 | 30 | futures = [] 31 | for i in range(0, 20): 32 | futures += [self._subject.submit(short_sleep)] 33 | 34 | for future in as_completed(futures): 35 | _ = future.result() 36 | 37 | after = datetime.datetime.now() 38 | 39 | assert_that( 40 | (after - before).total_seconds(), 41 | less_than_or_equal_to(0.3) 42 | ) 43 | 44 | def test_some_slow_work(self): 45 | before = datetime.datetime.now() 46 | 47 | futures = [] 48 | for i in range(0, 20): 49 | futures += [self._subject.submit(long_sleep)] 50 | 51 | for future in as_completed(futures): 52 | _ = future.result() 53 | 54 | after = datetime.datetime.now() 55 | 56 | assert_that( 57 | (after - before).total_seconds(), 58 | greater_than_or_equal_to(1.9) 59 | ) 60 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mock==4.0.2; python_version > '2.7' 2 | PyHamcrest==2.0.2; python_version > '2.7' 3 | 4 | mock==3.0.5; python_version <= '2.7' 5 | PyHamcrest==1.10.1; python_version <= '2.7' 6 | 7 | pytest==4.6.11 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=0 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """A setuptools based setup module. 2 | See: 3 | https://packaging.python.org/en/latest/distributing.html 4 | https://github.com/pypa/sampleproject 5 | """ 6 | 7 | import sys 8 | # To use a consistent encoding 9 | from codecs import open 10 | from os import path 11 | 12 | # Always prefer setuptools over distutils 13 | from setuptools import find_packages, setup 14 | 15 | here = path.abspath(path.dirname(__file__)) 16 | 17 | # Get the long description from the README file 18 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 19 | long_description = f.read() 20 | 21 | install_requires = [ 22 | 'futures>=3.2.0,<4.0.0', 23 | ] if sys.version_info <= (3, 2) else [] 24 | 25 | setup( 26 | name='collapsing-thread-pool-executor', 27 | 28 | # Versions should comply with PEP440. For a discussion on single-sourcing 29 | # the version across setup.py and the project code, see 30 | # https://packaging.python.org/en/latest/single_source_version.html 31 | version='2019.11', 32 | 33 | description='A thread pool that auto-manages the number of worker threads and is compatible with the "futures" module ThreadPoolExecutor interface.', 34 | long_description=long_description, 35 | long_description_content_type="text/markdown", 36 | 37 | # The project's main homepage. 38 | url='https://github.com/initialed85/collapsing-thread-pool-executor', 39 | 40 | # Author details 41 | author='Edward Beech', 42 | author_email='initialed85@gmail.com', 43 | 44 | # Choose your license 45 | license='MIT', 46 | 47 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 48 | classifiers=[ 49 | # How mature is this project? Common values are 50 | # 3 - Alpha 51 | # 4 - Beta 52 | # 5 - Production/Stable 53 | 'Development Status :: 3 - Alpha', 54 | 55 | # Indicate who your project is intended for 56 | 'Intended Audience :: Developers', 57 | 'Topic :: Software Development :: Libraries', 58 | 59 | # Pick your license as you wish (should match "license" above) 60 | 'License :: OSI Approved :: MIT License', 61 | 62 | # Specify the Python versions you support here. In particular, ensure 63 | # that you indicate whether you support Python 2, Python 3 or both. 64 | 'Programming Language :: Python :: 2.7', 65 | 'Programming Language :: Python :: 3.6', 66 | ], 67 | 68 | # What does your project relate to? 69 | keywords='sample setuptools development', 70 | 71 | # You can just specify the packages manually here if your project is 72 | # simple. Or you can use find_packages(). 73 | packages=find_packages(exclude=['contrib', 'docs', 'tests']), 74 | 75 | # Alternatively, if you want to distribute just a my_module.py, uncomment 76 | # this: 77 | # py_modules=["my_module"], 78 | 79 | # List run-time dependencies here. These will be installed by pip when 80 | # your project is installed. For an analysis of "install_requires" vs pip's 81 | # requirements files see: 82 | # https://packaging.python.org/en/latest/requirements.html 83 | install_requires=install_requires, 84 | 85 | # List additional groups of dependencies here (e.g. development 86 | # dependencies). You can install these using the following syntax, 87 | # for example: 88 | # $ pip install -e .[dev,test] 89 | extras_require={ 90 | 'dev': ['check-manifest'], 91 | 'test': ['coverage'], 92 | }, 93 | 94 | # If there are data files included in your packages that need to be 95 | # installed, specify them here. If using Python 2.6 or less, then these 96 | # have to be included in MANIFEST.in as well. 97 | package_data={ 98 | }, 99 | 100 | # Although 'package_data' is the preferred approach, in some case you may 101 | # need to place data files outside of your packages. See: 102 | # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa 103 | # In this case, 'data_file' will be installed into '/my_data' 104 | data_files=[ 105 | ('', ['README.md']) 106 | ], 107 | 108 | # To provide executable scripts, use entry points in preference to the 109 | # "scripts" keyword. Entry points provide cross-platform support and allow 110 | # pip to create the appropriate form of executable for the target platform. 111 | entry_points={ 112 | }, 113 | ) 114 | --------------------------------------------------------------------------------