├── tests ├── __init__.py └── test_futurepool.py ├── .gitignore ├── futurepool ├── __init__.py └── futurepool.py ├── docs └── index.md ├── LICENCE.md ├── README.md ├── pyproject.toml ├── mkdocs.yml └── poetry.lock /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .history 2 | __pycache__ 3 | .venv 4 | dist 5 | site 6 | .cache 7 | .pytest_cache -------------------------------------------------------------------------------- /futurepool/__init__.py: -------------------------------------------------------------------------------- 1 | from .futurepool import FuturePool # noqa: F401 2 | 3 | __version__ = "1.0.0" 4 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # FuturePool 2 | 3 | :::futurepool.futurepool 4 | options: 5 | inherited_members: true -------------------------------------------------------------------------------- /LICENCE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Michal Karol 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FuturePool [![PyPI - Version](https://img.shields.io/pypi/v/futurepool?style=for-the-badge)](https://pypi.org/project/futurepool/) 2 | 3 | FuturePool is a package that introduce known concept of multiprocessing Pool to the async/await world, resulting in async workers pool library. It allows for easy translation from multiprocessing to async/await, while keeping the core principle - specified number of workers. FuturePool allows for more flexible usage by providing starimap/starimap_unordered. 4 | 5 | FuturePool was created to handle web scrapping, where in order to not overwhelm website with connections and comply with website requirements, specified number of workers was used. FuturePool was extended to handle generic scenarios and published. 6 | 7 | ## License 8 | MIT 9 | 10 | ## Example 11 | To see library docs visit [https://MichalKarol.github.io/futurepool/](https://MichalKarol.github.io/futurepool/). 12 | 13 | Example translation from multiprocessing to FuturePool 14 | 15 | ```python 16 | # multiprocessing 17 | from multiprocessing import Pool 18 | from time import sleep 19 | 20 | def pool_fn(i): 21 | sleep(i) 22 | return i 23 | 24 | with Pool(2) as p: 25 | result = p.map(pool_fn, range(10)) 26 | ``` 27 | 28 | ```python 29 | # FuturePool 30 | from futurepool import FuturePool 31 | from asyncio import sleep 32 | 33 | async def async_pool_fn(i): 34 | await sleep(i) 35 | return i 36 | 37 | async with FuturePool(2) as fp: 38 | result = await fp.map(async_pool_fn, range(10)) 39 | ``` 40 | 41 | ## Author 42 | Michał Karol 43 | [Mastodon](https://mastodon.pl/@mkarol) 44 | [Github](https://github.com/MichalKarol) -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "futurepool" 3 | version = "1.0.1" 4 | description = "FuturePool is a package that introduce known concept of multiprocessing Pool to the async/await world, resulting in async workers pool library. It allows for easy translation from multiprocessing to async/await, while keeping the core principle - specified number of workers. FuturePool allows for more flexible usage by providing starimap/starimap_unordered." 5 | authors = ["Michal Karol "] 6 | license = "MIT License" 7 | readme = "README.md" 8 | keywords = [ 9 | "async/await", 10 | "worker", 11 | "pool", 12 | "scrappig", 13 | "future", 14 | "future pool", 15 | "async pool", 16 | ] 17 | classifiers = [ 18 | "Development Status :: 5 - Production/Stable", 19 | "Intended Audience :: Developers", 20 | "Framework :: AsyncIO", 21 | "Topic :: Software Development :: Libraries", 22 | "License :: OSI Approved :: MIT License", 23 | "Programming Language :: Python :: 3", 24 | "Programming Language :: Python :: 3.10", 25 | ] 26 | 27 | [tool.poetry.urls] 28 | Homepage = "https://michalkarol.github.io/futurepool/" 29 | Documentation = "https://michalkarol.github.io/futurepool/" 30 | Repository = "https://github.com/MichalKarol/futurepool" 31 | Issues = "https://github.com/MichalKarol/futurepool/issues" 32 | Releases = "https://github.com/MichalKarol/futurepool/releases" 33 | 34 | [tool.poetry.dependencies] 35 | python = "^3.10" 36 | asyncio = "^3.4.3" 37 | ruff = "^0.7.0" 38 | pytest = "^8.3.3" 39 | pytest-asyncio = "^0.24.0" 40 | 41 | [tool.pytest.ini_options] 42 | asyncio_default_fixture_loop_scope = "function" 43 | 44 | [tool.ruff.lint] 45 | extend-select = ["I"] 46 | 47 | [build-system] 48 | requires = ["poetry-core>=1.0.0"] 49 | build-backend = "poetry.core.masonry.api" 50 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: FuturePool 2 | 3 | nav: 4 | - Home: index.md 5 | 6 | plugins: 7 | - search 8 | - privacy 9 | - mkdocstrings: 10 | handlers: 11 | python: 12 | options: 13 | heading_level: 2 14 | parameter_headings: true 15 | signature_crossrefs: true 16 | show_root_toc_entry: false 17 | show_symbol_type_heading: true 18 | show_symbol_type_toc: true 19 | members_order: alphabetical 20 | separate_signature: true 21 | show_signature_annotations: true 22 | docstring_options: 23 | ignore_init_summary: true 24 | merge_init_into_class: true 25 | allow_inspection: true 26 | show_labels: false 27 | relative_crossrefs: true 28 | filters: 29 | - "!^_" 30 | - "!^T$" 31 | - "^__" 32 | repo_name: MichalKarol/futurepool 33 | repo_url: https://github.com/MichalKarol/futurepool 34 | 35 | theme: 36 | name: material 37 | font: 38 | text: Roboto 39 | features: 40 | - navigation.instant 41 | - navigation.tracking 42 | - search.suggest 43 | - search.highlight 44 | palette: 45 | - media: "(prefers-color-scheme: light)" 46 | scheme: default 47 | primary: lime 48 | toggle: 49 | icon: material/brightness-7 50 | name: Switch to dark mode 51 | - media: "(prefers-color-scheme: dark)" 52 | scheme: slate 53 | primary: lime 54 | toggle: 55 | icon: material/brightness-4 56 | name: Switch to light mode 57 | 58 | extra: 59 | social: 60 | - icon: fontawesome/brands/mastodon 61 | link: https://mastodon.pl/@mkarol 62 | name: mkarol on Mastodon 63 | - icon: fontawesome/brands/github 64 | link: https://github.com/MichalKarol 65 | name: MichalKarol on Github 66 | 67 | 68 | copyright: Copyright © 2024 Michał Karol 69 | 70 | validation: 71 | omitted_files: warn 72 | absolute_links: warn 73 | unrecognized_links: warn 74 | anchors: warn 75 | 76 | markdown_extensions: 77 | - pymdownx.highlight: 78 | anchor_linenums: true 79 | line_spans: __span 80 | pygments_lang_class: true 81 | - pymdownx.inlinehilite 82 | - pymdownx.snippets 83 | - pymdownx.superfences -------------------------------------------------------------------------------- /tests/test_futurepool.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import itertools 3 | from datetime import datetime 4 | from typing import Any, Iterable, TypeVar 5 | 6 | import pytest 7 | 8 | from futurepool import FuturePool, __version__ 9 | 10 | T = TypeVar("T") 11 | 12 | BASE_TIME = 1 13 | THRESHOLD = 1 14 | 15 | 16 | async def throwing_async_fn(nbr: int): 17 | if nbr == 1: 18 | raise Exception("New exception") 19 | await asyncio.sleep(BASE_TIME) 20 | return nbr 21 | 22 | 23 | async def good_async_fn(nbr: int, _: int = 0): 24 | await asyncio.sleep(BASE_TIME) 25 | return nbr 26 | 27 | 28 | async def unordered_good_async_fn(nbr: int, _: int = 0): 29 | await asyncio.sleep(BASE_TIME + nbr) 30 | return nbr 31 | 32 | 33 | def test_version(): 34 | assert __version__ == "1.0.0" 35 | 36 | 37 | class SavingIterator: 38 | def __init__(self, items: Iterable[Any]): 39 | self.items = items 40 | self.yields = list[tuple[datetime, Any]]() 41 | 42 | def __iter__(self): 43 | return self 44 | 45 | def __next__(self): 46 | item = next(self.items) 47 | self.yields.append((datetime.now(), item)) 48 | return item 49 | 50 | 51 | @pytest.mark.asyncio 52 | async def test_map(): 53 | async with FuturePool(3) as fp: 54 | future = fp.map(good_async_fn, range(3)) 55 | result = await asyncio.wait_for(future, BASE_TIME + THRESHOLD) 56 | assert result == [0, 1, 2] 57 | 58 | 59 | @pytest.mark.asyncio 60 | async def test_map_lazy(): 61 | async with FuturePool(2) as fp: 62 | it = SavingIterator(iter(range(3))) 63 | result = await asyncio.wait_for( 64 | fp.map(good_async_fn, it), 2 * BASE_TIME + THRESHOLD 65 | ) 66 | assert result == [0, 1, 2] 67 | assert (it.yields[0][0].timestamp() - it.yields[1][0].timestamp()) < THRESHOLD 68 | assert (it.yields[2][0] - it.yields[1][0]).seconds == BASE_TIME 69 | 70 | 71 | @pytest.mark.asyncio 72 | async def test_starmap(): 73 | async with FuturePool(3) as fp: 74 | result = await asyncio.wait_for( 75 | fp.starmap(good_async_fn, zip(range(3), range(3))), 76 | BASE_TIME + THRESHOLD, 77 | ) 78 | assert result == [0, 1, 2] 79 | 80 | 81 | @pytest.mark.asyncio 82 | async def test_imap(): 83 | async with FuturePool(3) as fp: 84 | iterator = fp.imap(good_async_fn, range(10000)) 85 | result = await asyncio.wait_for( 86 | asyncio.gather(*itertools.islice(iterator, 3)), BASE_TIME + THRESHOLD 87 | ) 88 | assert result == [0, 1, 2] 89 | 90 | 91 | @pytest.mark.asyncio 92 | async def test_imap_workers(): 93 | async with FuturePool(3) as fp: 94 | iterator_1 = fp.imap(good_async_fn, range(9, 0, -1)) 95 | iterator_2 = fp.imap(good_async_fn, range(0, 9, 1)) 96 | 97 | async def get_results(): 98 | start = datetime.now() 99 | a = await next(iterator_1) 100 | b = await next(iterator_2) 101 | end = datetime.now() 102 | return (a, b, (end - start).seconds) 103 | 104 | (a, b, time) = await asyncio.wait_for(get_results(), 4 * BASE_TIME + THRESHOLD) 105 | assert (a, b) == (9, 0) 106 | assert time <= 4 * BASE_TIME 107 | 108 | 109 | @pytest.mark.asyncio 110 | async def test_imap_async(): 111 | async with FuturePool(3) as fp: 112 | result = [] 113 | async for i in fp.imap_async(good_async_fn, range(10000)): 114 | result.append(i) 115 | if len(result) == 3: 116 | break 117 | assert result == [0, 1, 2] 118 | 119 | 120 | @pytest.mark.asyncio 121 | async def test_starimap(): 122 | async with FuturePool(3) as fp: 123 | iterator = fp.starimap(good_async_fn, zip(range(10000), range(10000))) 124 | result = await asyncio.wait_for( 125 | asyncio.gather(*itertools.islice(iterator, 3)), BASE_TIME + THRESHOLD 126 | ) 127 | assert result == [0, 1, 2] 128 | 129 | 130 | @pytest.mark.asyncio 131 | async def test_starimap_async(): 132 | async with FuturePool(3) as fp: 133 | result = [] 134 | async for i in fp.starimap_async( 135 | good_async_fn, zip(range(10000), range(10000)) 136 | ): 137 | result.append(i) 138 | if len(result) == 3: 139 | break 140 | assert result == [0, 1, 2] 141 | 142 | 143 | @pytest.mark.asyncio 144 | async def test_imap_unordered(): 145 | async with FuturePool(3) as fp: 146 | items = [2, 1, 0] 147 | iterator = fp.imap_unordered(unordered_good_async_fn, items) 148 | result = await asyncio.wait_for( 149 | asyncio.gather(*itertools.islice(iterator, 3)), 150 | BASE_TIME + max(items[:3]) + THRESHOLD, 151 | ) 152 | assert result == [0, 1, 2] 153 | 154 | 155 | @pytest.mark.asyncio 156 | async def test_imap_unordered_single(): 157 | async with FuturePool(1) as fp: 158 | items = [2, 1, 0] 159 | iterator = fp.imap_unordered(unordered_good_async_fn, items) 160 | result = await asyncio.wait_for( 161 | asyncio.gather(*itertools.islice(iterator, 3)), 162 | 3 * BASE_TIME + sum(items[:3]) + THRESHOLD, 163 | ) 164 | assert result == [2, 1, 0] 165 | 166 | 167 | @pytest.mark.asyncio 168 | async def test_imap_unordered_async(): 169 | async with FuturePool(3) as fp: 170 | result = [] 171 | async for i in fp.imap_unordered_async(unordered_good_async_fn, [2, 1, 0]): 172 | result.append(i) 173 | if len(result) == 3: 174 | break 175 | assert result == [0, 1, 2] 176 | 177 | 178 | @pytest.mark.asyncio 179 | async def test_starimap_unordered(): 180 | async with FuturePool(3) as fp: 181 | items = [2, 1, 0] 182 | iterator = fp.starimap_unordered( 183 | unordered_good_async_fn, zip(items, range(10000)) 184 | ) 185 | result = await asyncio.wait_for( 186 | asyncio.gather(*itertools.islice(iterator, 3)), 187 | BASE_TIME + max(items[:3]) + THRESHOLD, 188 | ) 189 | assert result == [0, 1, 2] 190 | 191 | 192 | @pytest.mark.asyncio 193 | async def test_starimap_unordered_async(): 194 | async with FuturePool(3) as fp: 195 | result = [] 196 | async for i in fp.starimap_unordered_async( 197 | unordered_good_async_fn, zip([2, 1, 0], range(10000)) 198 | ): 199 | result.append(i) 200 | if len(result) == 3: 201 | break 202 | assert result == [0, 1, 2] 203 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. 2 | 3 | [[package]] 4 | name = "asyncio" 5 | version = "3.4.3" 6 | description = "reference implementation of PEP 3156" 7 | optional = false 8 | python-versions = "*" 9 | files = [ 10 | {file = "asyncio-3.4.3-cp33-none-win32.whl", hash = "sha256:b62c9157d36187eca799c378e572c969f0da87cd5fc42ca372d92cdb06e7e1de"}, 11 | {file = "asyncio-3.4.3-cp33-none-win_amd64.whl", hash = "sha256:c46a87b48213d7464f22d9a497b9eef8c1928b68320a2fa94240f969f6fec08c"}, 12 | {file = "asyncio-3.4.3-py3-none-any.whl", hash = "sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d"}, 13 | {file = "asyncio-3.4.3.tar.gz", hash = "sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41"}, 14 | ] 15 | 16 | [[package]] 17 | name = "colorama" 18 | version = "0.4.6" 19 | description = "Cross-platform colored terminal text." 20 | optional = false 21 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" 22 | files = [ 23 | {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, 24 | {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, 25 | ] 26 | 27 | [[package]] 28 | name = "exceptiongroup" 29 | version = "1.2.2" 30 | description = "Backport of PEP 654 (exception groups)" 31 | optional = false 32 | python-versions = ">=3.7" 33 | files = [ 34 | {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, 35 | {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, 36 | ] 37 | 38 | [package.extras] 39 | test = ["pytest (>=6)"] 40 | 41 | [[package]] 42 | name = "iniconfig" 43 | version = "2.0.0" 44 | description = "brain-dead simple config-ini parsing" 45 | optional = false 46 | python-versions = ">=3.7" 47 | files = [ 48 | {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, 49 | {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, 50 | ] 51 | 52 | [[package]] 53 | name = "packaging" 54 | version = "24.1" 55 | description = "Core utilities for Python packages" 56 | optional = false 57 | python-versions = ">=3.8" 58 | files = [ 59 | {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, 60 | {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, 61 | ] 62 | 63 | [[package]] 64 | name = "pluggy" 65 | version = "1.5.0" 66 | description = "plugin and hook calling mechanisms for python" 67 | optional = false 68 | python-versions = ">=3.8" 69 | files = [ 70 | {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, 71 | {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, 72 | ] 73 | 74 | [package.extras] 75 | dev = ["pre-commit", "tox"] 76 | testing = ["pytest", "pytest-benchmark"] 77 | 78 | [[package]] 79 | name = "pytest" 80 | version = "8.3.3" 81 | description = "pytest: simple powerful testing with Python" 82 | optional = false 83 | python-versions = ">=3.8" 84 | files = [ 85 | {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"}, 86 | {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"}, 87 | ] 88 | 89 | [package.dependencies] 90 | colorama = {version = "*", markers = "sys_platform == \"win32\""} 91 | exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} 92 | iniconfig = "*" 93 | packaging = "*" 94 | pluggy = ">=1.5,<2" 95 | tomli = {version = ">=1", markers = "python_version < \"3.11\""} 96 | 97 | [package.extras] 98 | dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] 99 | 100 | [[package]] 101 | name = "pytest-asyncio" 102 | version = "0.24.0" 103 | description = "Pytest support for asyncio" 104 | optional = false 105 | python-versions = ">=3.8" 106 | files = [ 107 | {file = "pytest_asyncio-0.24.0-py3-none-any.whl", hash = "sha256:a811296ed596b69bf0b6f3dc40f83bcaf341b155a269052d82efa2b25ac7037b"}, 108 | {file = "pytest_asyncio-0.24.0.tar.gz", hash = "sha256:d081d828e576d85f875399194281e92bf8a68d60d72d1a2faf2feddb6c46b276"}, 109 | ] 110 | 111 | [package.dependencies] 112 | pytest = ">=8.2,<9" 113 | 114 | [package.extras] 115 | docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] 116 | testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] 117 | 118 | [[package]] 119 | name = "ruff" 120 | version = "0.7.1" 121 | description = "An extremely fast Python linter and code formatter, written in Rust." 122 | optional = false 123 | python-versions = ">=3.7" 124 | files = [ 125 | {file = "ruff-0.7.1-py3-none-linux_armv6l.whl", hash = "sha256:cb1bc5ed9403daa7da05475d615739cc0212e861b7306f314379d958592aaa89"}, 126 | {file = "ruff-0.7.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:27c1c52a8d199a257ff1e5582d078eab7145129aa02721815ca8fa4f9612dc35"}, 127 | {file = "ruff-0.7.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:588a34e1ef2ea55b4ddfec26bbe76bc866e92523d8c6cdec5e8aceefeff02d99"}, 128 | {file = "ruff-0.7.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94fc32f9cdf72dc75c451e5f072758b118ab8100727168a3df58502b43a599ca"}, 129 | {file = "ruff-0.7.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:985818742b833bffa543a84d1cc11b5e6871de1b4e0ac3060a59a2bae3969250"}, 130 | {file = "ruff-0.7.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32f1e8a192e261366c702c5fb2ece9f68d26625f198a25c408861c16dc2dea9c"}, 131 | {file = "ruff-0.7.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:699085bf05819588551b11751eff33e9ca58b1b86a6843e1b082a7de40da1565"}, 132 | {file = "ruff-0.7.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:344cc2b0814047dc8c3a8ff2cd1f3d808bb23c6658db830d25147339d9bf9ea7"}, 133 | {file = "ruff-0.7.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4316bbf69d5a859cc937890c7ac7a6551252b6a01b1d2c97e8fc96e45a7c8b4a"}, 134 | {file = "ruff-0.7.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79d3af9dca4c56043e738a4d6dd1e9444b6d6c10598ac52d146e331eb155a8ad"}, 135 | {file = "ruff-0.7.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5c121b46abde94a505175524e51891f829414e093cd8326d6e741ecfc0a9112"}, 136 | {file = "ruff-0.7.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8422104078324ea250886954e48f1373a8fe7de59283d747c3a7eca050b4e378"}, 137 | {file = "ruff-0.7.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:56aad830af8a9db644e80098fe4984a948e2b6fc2e73891538f43bbe478461b8"}, 138 | {file = "ruff-0.7.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:658304f02f68d3a83c998ad8bf91f9b4f53e93e5412b8f2388359d55869727fd"}, 139 | {file = "ruff-0.7.1-py3-none-win32.whl", hash = "sha256:b517a2011333eb7ce2d402652ecaa0ac1a30c114fbbd55c6b8ee466a7f600ee9"}, 140 | {file = "ruff-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f38c41fcde1728736b4eb2b18850f6d1e3eedd9678c914dede554a70d5241307"}, 141 | {file = "ruff-0.7.1-py3-none-win_arm64.whl", hash = "sha256:19aa200ec824c0f36d0c9114c8ec0087082021732979a359d6f3c390a6ff2a37"}, 142 | {file = "ruff-0.7.1.tar.gz", hash = "sha256:9d8a41d4aa2dad1575adb98a82870cf5db5f76b2938cf2206c22c940034a36f4"}, 143 | ] 144 | 145 | [[package]] 146 | name = "tomli" 147 | version = "2.0.2" 148 | description = "A lil' TOML parser" 149 | optional = false 150 | python-versions = ">=3.8" 151 | files = [ 152 | {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, 153 | {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, 154 | ] 155 | 156 | [metadata] 157 | lock-version = "2.0" 158 | python-versions = "^3.10" 159 | content-hash = "f3c2c35dbcba7015dc83dddf07e07d1e76a0b39100e92d8363ea09a2e0552dfa" 160 | -------------------------------------------------------------------------------- /futurepool/futurepool.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | from asyncio import Future, Task, get_running_loop 4 | from collections import deque 5 | from typing import ( 6 | Any, 7 | AsyncIterator, 8 | Awaitable, 9 | Callable, 10 | Iterable, 11 | Iterator, 12 | TypeVar, 13 | ) 14 | 15 | T = TypeVar("T") 16 | U = TypeVar("U") 17 | 18 | 19 | class FuturePool: 20 | """ 21 | Class representing a pool of async workers to simplify working with async functions that need to be restrained. 22 | Common use is fetching data from sites, where in order to not influence the website performance, limit on active connections is set. 23 | """ 24 | 25 | class IteratorToAsyncIterator: 26 | def __init__(self, iterator: Iterator[Awaitable[T]]): 27 | self.iterator = iterator 28 | 29 | def __aiter__(self): 30 | return self 31 | 32 | async def __anext__(self) -> U: 33 | future = next(self.iterator, None) 34 | if not future: 35 | raise StopAsyncIteration() 36 | return await future 37 | 38 | def __init__(self, number_of_workers: int = (os.cpu_count() or 1)): 39 | """ """ 40 | assert number_of_workers > 0, "Number of workers must be a positive number" 41 | self.number_of_workers = number_of_workers 42 | self.loop = get_running_loop() 43 | self.workers_lock = asyncio.Semaphore(self.number_of_workers) 44 | self.tasks = set[Task]() 45 | self.tasks_lock = asyncio.Lock() 46 | 47 | async def __aenter__(self): 48 | return self 49 | 50 | async def __aexit__(self, type, value, traceback): 51 | if self.loop.is_running(): 52 | for _ in range(self.number_of_workers): 53 | await self.workers_lock.acquire() 54 | async with self.tasks_lock: 55 | for task in self.tasks: 56 | task.cancel() 57 | try: 58 | await task 59 | except asyncio.InvalidStateError: 60 | pass 61 | except asyncio.CancelledError: 62 | pass 63 | for _ in range(self.number_of_workers): 64 | self.workers_lock.release() 65 | return False 66 | 67 | def _get_iterator_( 68 | self, 69 | fn: Callable[[T], Awaitable[U]], 70 | iterable: Iterable[Iterable[T]], 71 | ordered: bool, 72 | ) -> Iterator[Awaitable[U]]: 73 | iterator = enumerate(iterable) 74 | futures = list[Future[U]]() 75 | args = deque[tuple[int, T]]() 76 | not_finished_futures = deque[Future[U]]() 77 | args_lock = asyncio.Lock() 78 | not_finished_futures_lock = asyncio.Lock() 79 | 80 | def add_task(): 81 | arg_tuple = next(iterator, None) 82 | if arg_tuple is None: 83 | return False 84 | args.append(arg_tuple) 85 | future = self.loop.create_future() 86 | futures.append(future) 87 | if not ordered: 88 | not_finished_futures.append(future) 89 | return True 90 | 91 | async def worker(): 92 | async with self.tasks_lock: 93 | self.tasks.add(asyncio.current_task()) 94 | while True: 95 | async with self.workers_lock: 96 | async with args_lock: 97 | if args: 98 | i, arg = args.popleft() 99 | else: 100 | break 101 | try: 102 | result = await fn(*arg) 103 | if ordered: 104 | future = futures[i] 105 | else: 106 | async with not_finished_futures_lock: 107 | future = not_finished_futures.popleft() 108 | future.set_result(result) 109 | except asyncio.InvalidStateError: 110 | return 111 | except asyncio.CancelledError: 112 | return 113 | except Exception as e: 114 | if ordered: 115 | future = futures[i] 116 | else: 117 | async with not_finished_futures_lock: 118 | future = not_finished_futures.popleft() 119 | future.set_exception(e) 120 | add_task() 121 | async with self.tasks_lock: 122 | self.tasks.remove(asyncio.current_task()) 123 | 124 | def create_workers(): 125 | for _ in range(self.number_of_workers): 126 | if not add_task(): 127 | return 128 | self.loop.create_task(worker()) 129 | 130 | class FutureIterator: 131 | def __init__(self): 132 | self.current = 0 133 | 134 | def __iter__(self): 135 | return self 136 | 137 | def __next__(self) -> Awaitable[U]: 138 | if len(futures) == 0: 139 | create_workers() 140 | 141 | if len(futures) == self.current: 142 | if not add_task(): 143 | raise StopIteration() 144 | future = futures[self.current] 145 | self.current += 1 146 | return future 147 | 148 | return FutureIterator() 149 | 150 | def map( 151 | self, fn: Callable[[T], Awaitable[U]], iterable: Iterable[T] 152 | ) -> Awaitable[list[U]]: 153 | """Parallel equivalent of standard map function. Applies provided fn on each item in iterable 154 | utilizing number_of_workers workers. Function returns Future of all results. 155 | 156 | Example: 157 | ``` python 158 | async with FuturePool(3) as fp: 159 | result = await fp.map(async_fn, range(3)) 160 | ``` 161 | 162 | """ 163 | return self.starmap(fn, ((arg,) for arg in iterable)) 164 | 165 | def starmap( 166 | self, fn: Callable[[Any], Awaitable[U]], iterable: Iterable[Iterable[Any]] 167 | ) -> Awaitable[list[U]]: 168 | """ 169 | Like `map()` except that the elements of the iterable are expected to be iterables that are unpacked as arguments. 170 | Hence an iterable of `[(1,2), (3, 4)]` results in `[func(1,2), func(3,4)]`. 171 | 172 | Example: 173 | ``` python 174 | async with FuturePool(3) as fp: 175 | result = await fp.starmap(async_fn_with_2_args, zip(range(3), range(3))) 176 | ``` 177 | """ 178 | 179 | async def lazy_resolve(): 180 | return [await future for future in self._get_iterator_(fn, iterable, True)] 181 | 182 | return lazy_resolve() 183 | 184 | def imap( 185 | self, fn: Callable[[T], Awaitable[U]], iterable: Iterable[T] 186 | ) -> Iterator[Awaitable[U]]: 187 | """ 188 | Lazy equivalent of `map()`. Returns iterator of futures. 189 | 190 | Examples: 191 | ``` python 192 | async with FuturePool(3) as fp: 193 | iterator = fp.imap(async_fn, range(10000)) 194 | a = await next(iterator) 195 | b = await next(iterator) 196 | c = await next(iterator) 197 | 198 | async with FuturePool(3) as fp: 199 | for future in fp.imap(async_fn, range(10000)): 200 | result = await future 201 | do_sth(result) 202 | ``` 203 | """ 204 | 205 | return self._get_iterator_(fn, ((arg,) for arg in iterable), True) 206 | 207 | def imap_async( 208 | self, fn: Callable[[T], Awaitable[U]], iterable: Iterable[T] 209 | ) -> AsyncIterator[U]: 210 | """ 211 | Lazy async equivalent of `map()`. Returns async iterator of U. 212 | 213 | Example: 214 | ``` python 215 | async with FuturePool(3) as fp: 216 | async for result in fp.imap_async(async_fn, range(10000)): 217 | do_sth(result) 218 | ``` 219 | """ 220 | 221 | return FuturePool.IteratorToAsyncIterator(self.imap(fn, iterable)) 222 | 223 | def imap_unordered( 224 | self, fn: Callable[[T], Awaitable[U]], iterable: Iterable[T] 225 | ) -> Iterator[Awaitable[U]]: 226 | """ 227 | The same as `imap()` except that the ordering of the results from the returned iterator should be considered arbitrary. 228 | (Only when there is only one worker process is the order guaranteed to be 'correct'.) 229 | 230 | Examples: 231 | ``` python 232 | async with FuturePool(3) as fp: 233 | iterator = fp.imap_unordered(async_fn_that_takes_variable_time, range(10000)) 234 | a = await next(iterator) // could be async_fn_that_takes_variable_time(0) or async_fn_that_takes_variable_time(1) or ... 235 | b = await next(iterator) // could be async_fn_that_takes_variable_time(0) or async_fn_that_takes_variable_time(1) or ... 236 | c = await next(iterator) // could be async_fn_that_takes_variable_time(0) or async_fn_that_takes_variable_time(1) or ... 237 | 238 | async with FuturePool(3) as fp: 239 | for future in fp.imap_unordered(async_fn_that_takes_variable_time, range(10000)): 240 | result = await future // could be async_fn_that_takes_variable_time(0) or async_fn_that_takes_variable_time(1) or ... 241 | do_sth(result) 242 | ``` 243 | """ 244 | return self._get_iterator_(fn, ((arg,) for arg in iterable), False) 245 | 246 | def imap_unordered_async( 247 | self, fn: Callable[[T], Awaitable[U]], iterable: Iterable[T] 248 | ) -> AsyncIterator[U]: 249 | """ 250 | The same as `imap_async()` except that the ordering of the results from the returned iterator should be considered arbitrary. 251 | (Only when there is only one worker process is the order guaranteed to be 'correct'.) 252 | 253 | Example: 254 | ``` python 255 | async with FuturePool(3) as fp: 256 | async for result in fp.imap_unordered_async(async_fn_that_takes_variable_time, range(10000)): 257 | // result could be async_fn_that_takes_variable_time(0) or async_fn_that_takes_variable_time(1) or ... 258 | do_sth(result) 259 | ``` 260 | """ 261 | return FuturePool.IteratorToAsyncIterator(self.imap_unordered(fn, iterable)) 262 | 263 | def starimap( 264 | self, fn: Callable[[Any], Awaitable[U]], iterable: Iterable[Iterable[Any]] 265 | ) -> Iterator[Awaitable[U]]: 266 | """ 267 | Like `imap()` except that the elements of the iterable are expected to be iterables that are unpacked as arguments. 268 | 269 | Example: 270 | ``` python 271 | async with FuturePool(3) as fp: 272 | iterator = fp.starimap(async_fn_with_2_args, zip(range(10000), range(10000))) 273 | a = await next(iterator) 274 | b = await next(iterator) 275 | c = await next(iterator) 276 | 277 | async with FuturePool(3) as fp: 278 | for future in fp.starimap(async_fn_with_2_args, zip(range(10000), range(10000))): 279 | result = await future 280 | do_sth(result) 281 | ``` 282 | """ 283 | 284 | return self._get_iterator_(fn, iterable, True) 285 | 286 | def starimap_async( 287 | self, fn: Callable[[Any], Awaitable[U]], iterable: Iterable[Iterable[Any]] 288 | ) -> AsyncIterator[U]: 289 | """ 290 | Lazy async equivalent of `starimap()`. Returns async iterator of U. 291 | 292 | Example: 293 | ``` python 294 | async with FuturePool(3) as fp: 295 | async for result in fp.starimap_async(async_fn_with_2_args, zip(range(10000), range(10000))): 296 | do_sth(result) 297 | ``` 298 | """ 299 | 300 | return FuturePool.IteratorToAsyncIterator(self.starimap(fn, iterable)) 301 | 302 | def starimap_unordered( 303 | self, fn: Callable[[T], Awaitable[U]], iterable: Iterable[T] 304 | ) -> Iterator[Awaitable[U]]: 305 | """ 306 | The same as `starimap()` except that the ordering of the results from the returned iterator should be considered arbitrary. 307 | (Only when there is only one worker process is the order guaranteed to be 'correct'.) 308 | 309 | Examples: 310 | ``` python 311 | async with FuturePool(3) as fp: 312 | iterator = fp.starimap_unordered(async_fn_that_takes_variable_time_with_2_args, zip(range(10000), range(10000))) 313 | a = await next(iterator) // could be async_fn_that_takes_variable_time_with_2_args(0, 0) or async_fn_that_takes_variable_time_with_2_args(1, 1) or ... 314 | b = await next(iterator) // could be async_fn_that_takes_variable_time_with_2_args(0, 0) or async_fn_that_takes_variable_time_with_2_args(1, 1) or ... 315 | c = await next(iterator) // could be async_fn_that_takes_variable_time_with_2_args(0, 0) or async_fn_that_takes_variable_time_with_2_args(1, 1) or ... 316 | 317 | async with FuturePool(3) as fp: 318 | for future in fp.starimap_unordered(async_fn_that_takes_variable_time_with_2_args, zip(range(10000), range(10000))): 319 | result = await future // could be async_fn_that_takes_variable_time_with_2_args(0, 0) or async_fn_that_takes_variable_time_with_2_args(1, 1) or ... 320 | do_sth(result) 321 | ``` 322 | """ 323 | return self._get_iterator_(fn, iterable, False) 324 | 325 | def starimap_unordered_async( 326 | self, fn: Callable[[T], Awaitable[U]], iterable: Iterable[T] 327 | ) -> AsyncIterator[U]: 328 | """ 329 | The same as `starimap_async()` except that the ordering of the results from the returned iterator should be considered arbitrary. 330 | (Only when there is only one worker process is the order guaranteed to be 'correct'.) 331 | 332 | Example: 333 | ``` python 334 | async with FuturePool(3) as fp: 335 | async for result in fp.starimap_unordered_async(async_fn_that_takes_variable_time_with_2_args, zip(range(10000), range(10000))): 336 | // result could be async_fn_that_takes_variable_time_with_2_args(0, 0) or async_fn_that_takes_variable_time_with_2_args(1, 1) or ... 337 | do_sth(result) 338 | ``` 339 | """ 340 | return FuturePool.IteratorToAsyncIterator(self.starimap_unordered(fn, iterable)) 341 | --------------------------------------------------------------------------------