├── t ├── __init__.py └── integration │ ├── __init__.py │ ├── conftest.py │ ├── tasks.py │ └── test_batches.py ├── .github ├── CODEOWNERS ├── FUNDING.yml └── workflows │ └── main.yml ├── .coveragerc ├── docs ├── history.rst ├── _ext │ └── celerydocs.py ├── api.rst ├── examples │ ├── counter.py │ ├── bulk_insert.py │ ├── last_seen.py │ └── github_api.py ├── make.bat ├── Makefile ├── how_it_works.rst ├── examples.rst ├── conf.py └── index.rst ├── requirements ├── docs.txt ├── test.txt └── pkgutils.txt ├── .git-blame-ignore-revs ├── .readthedocs.yaml ├── .gitignore ├── pyproject.toml ├── RELEASING.rst ├── tox.ini ├── setup.cfg ├── README.rst ├── celery_batches ├── trace.py └── __init__.py ├── LICENSE └── CHANGELOG.rst /t/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /t/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @clokep 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: clokep 2 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | include = 3 | celery_batches/* 4 | t/* 5 | -------------------------------------------------------------------------------- /docs/history.rst: -------------------------------------------------------------------------------- 1 | .. \_history: 2 | 3 | .. include:: ../CHANGELOG.rst 4 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | sphinx_celery==2.0.0 2 | Sphinx>=3.0.0 3 | furo>=2022.2.23 4 | -------------------------------------------------------------------------------- /requirements/test.txt: -------------------------------------------------------------------------------- 1 | pytest-celery~=0.0.0 2 | pytest~=6.2 3 | coverage 4 | pytest-timeout 5 | -------------------------------------------------------------------------------- /requirements/pkgutils.txt: -------------------------------------------------------------------------------- 1 | pyupgrade==2.31.1 2 | flake8==4.0.1 3 | isort==5.10.1 4 | black==24.3.0 5 | mypy==0.942 6 | -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Run black. 2 | 4dd0f9d4f457e603929ca584009fbf11dd54a165 3 | # Run isort. 4 | 64791c455263a12cf5e148f9aadb18a91b3f26b2 5 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | formats: all 4 | 5 | build: 6 | os: ubuntu-20.04 7 | tools: 8 | python: "3.10" 9 | 10 | python: 11 | install: 12 | - method: pip 13 | path: . 14 | - requirements: requirements/docs.txt 15 | -------------------------------------------------------------------------------- /docs/_ext/celerydocs.py: -------------------------------------------------------------------------------- 1 | # This is partially copied from celery/docs/_ext/celerydocs.py. 2 | 3 | 4 | def setup(app): 5 | app.add_crossref_type( 6 | directivename="sig", 7 | rolename="sig", 8 | indextemplate="pair: %s; sig", 9 | ) 10 | 11 | return {"parallel_read_safe": True} 12 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | ### 2 | API 3 | ### 4 | 5 | .. currentmodule:: celery_batches 6 | 7 | .. automodule:: celery_batches 8 | 9 | **API** 10 | 11 | .. autoclass:: Batches 12 | :members: 13 | :undoc-members: 14 | 15 | .. autoclass:: SimpleRequest 16 | :members: 17 | :undoc-members: 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | 3 | # Build related. 4 | _build 5 | build 6 | dist 7 | celery_batches.egg-info 8 | 9 | # Testing related. 10 | .cache 11 | .pytest_cache 12 | .tox 13 | 14 | # Coverage related. 15 | .coverage 16 | htmlcov 17 | 18 | # Editor related. 19 | .idea 20 | .python-version 21 | 22 | # virtualenvs 23 | .*env*/ 24 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42.0.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.mypy] 6 | warn_unused_configs = true 7 | strict = false 8 | warn_return_any = true 9 | follow_imports = "normal" 10 | show_error_codes = true 11 | disallow_untyped_defs = true 12 | ignore_missing_imports = true 13 | warn_unreachable = true 14 | no_implicit_optional = true 15 | files = [ 16 | "celery_batches", 17 | "t", 18 | ] 19 | -------------------------------------------------------------------------------- /docs/examples/counter.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | 3 | from celery_batches import Batches 4 | 5 | from celery import Celery 6 | 7 | app = Celery("counter") 8 | 9 | 10 | # Flush after 100 messages, or 10 seconds. 11 | @app.task(base=Batches, flush_every=100, flush_interval=10) 12 | def count_click(requests): 13 | """Count the number of times each URL is requested.""" 14 | count = Counter(request.kwargs["url"] for request in requests) 15 | for url, count in count.items(): 16 | print(f">>> Clicks: {url} -> {count}") 17 | -------------------------------------------------------------------------------- /docs/examples/bulk_insert.py: -------------------------------------------------------------------------------- 1 | from celery_batches import Batches 2 | 3 | from celery import Celery 4 | 5 | from my_app import MyModel 6 | 7 | app = Celery("bulk_insert") 8 | 9 | 10 | @app.task(base=Batches, flush_every=100, flush_interval=10) 11 | def bulk_insert(requests): 12 | """Insert many rows into a database at once instead of individually.""" 13 | data = [] 14 | for request in requests: 15 | data.append(MyModel(**request.kwargs)) 16 | 17 | # Create all the new rows at once. 18 | MyModel.objects.bulk_create(data) 19 | -------------------------------------------------------------------------------- /t/integration/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Any, Dict 3 | 4 | import pytest 5 | from _pytest.fixtures import SubRequest 6 | 7 | TEST_BROKER = os.environ.get("TEST_BROKER", "memory://") 8 | TEST_BACKEND = os.environ.get("TEST_BACKEND", "cache+memory://") 9 | 10 | 11 | @pytest.fixture(scope="session", params=[1, 2]) 12 | def celery_config(request: SubRequest) -> Dict[str, Any]: 13 | return { 14 | "broker_url": TEST_BROKER, 15 | "result_backend": TEST_BACKEND, 16 | # Test both protocol 1 and 2 via the parameterized fixture. 17 | "task_protocol": request.param, 18 | } 19 | -------------------------------------------------------------------------------- /RELEASING.rst: -------------------------------------------------------------------------------- 1 | Releasing django-querysetsequence 2 | ================================= 3 | 4 | 1. Bump the version in ``setup.cfg`` and ``CHANGELOG.rst``. 5 | 2. Double check the trove classifiers in ``setup.cfg`` (they should match the 6 | supported Python version in ``README.rst`` and ``tox.ini``). 7 | 3. Make a git commit. 8 | 4. Create a git tag: ``git tag `` 9 | 5. Push to GitHub: ``git push origin main`` & ``git push --tags`` 10 | 6. Build the package via ``python -m build``. 11 | 7. Run twine checks: ``twine check dist/*`` 12 | 8. Upload to PyPI: ``twine upload dist/*`` 13 | 9. Create a new release on GitHub: https://github.com/clokep/celery-batches/releases/new 14 | -------------------------------------------------------------------------------- /docs/examples/last_seen.py: -------------------------------------------------------------------------------- 1 | from celery_batches import Batches 2 | 3 | from celery import Celery 4 | 5 | from my_app import User 6 | 7 | app = Celery("last_seen") 8 | 9 | 10 | @app.task(base=Batches, flush_every=100, flush_interval=10) 11 | def last_seen(requests): 12 | """De-duplicate incoming arguments to only do a task once per input.""" 13 | # Generate a map of unique args -> requests. 14 | last_seen = {} 15 | for request in requests: 16 | user_id, when = request.args 17 | if user_id not in last_seen or last_seen[user_id] < when: 18 | last_seen[user_id] = when 19 | 20 | # Update the datastore once per user. 21 | for user_id, when in last_seen.items(): 22 | User.objects.filter(id=user_id).update(last_logged_in=when) 23 | -------------------------------------------------------------------------------- /docs/examples/github_api.py: -------------------------------------------------------------------------------- 1 | import json 2 | from urllib.request import urlopen 3 | 4 | from celery_batches import Batches 5 | 6 | from celery import Celery 7 | 8 | app = Celery("github_api") 9 | 10 | emoji_endpoint = "https://api.github.com/emojis" 11 | 12 | 13 | @app.task(base=Batches, flush_every=100, flush_interval=10) 14 | def check_emoji(requests): 15 | """Check if the requested emoji are supported by GitHub.""" 16 | supported_emoji = get_supported_emoji() 17 | # use mark_as_done to manually return response data 18 | for request in requests: 19 | response = request.args[0] in supported_emoji 20 | app.backend.mark_as_done(request.id, response, request=request) 21 | 22 | 23 | def get_supported_emoji(): 24 | """Fetch the supported GitHub emojis.""" 25 | response = urlopen(emoji_endpoint) 26 | # The response is a map of emoji name to image. 27 | return set(json.load(response)) 28 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=CeleryBatches 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = CeleryBatches 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Custom variables we added (not from sphinx itself) 12 | HTTP_PORT = 8038 # For usage with "make serve", "make serve HTTP_PORT=8999" 13 | 14 | # Put it first so that "make" without argument is like "make help". 15 | help: 16 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 17 | 18 | .PHONY: help Makefile 19 | 20 | # Catch-all target: route all unknown targets to Sphinx using the new 21 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 22 | %: Makefile 23 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 24 | 25 | # Custom tasks we added (not from sphinx itself) 26 | serve: 27 | python -m http.server ${HTTP_PORT} --directory _build/html 28 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | {pypy3,3.9,3.10}-celery{52,main}-unit, 4 | # Celery 5.3 added support for Python 3.11. 5 | 3.11-celery{53,main}-unit, 6 | # Celery 5.4 added support for Python 3.12. 7 | 3.12-celery{54,main}-unit, 8 | # Celery 5.5 added support for Python 3.13. 9 | 3.13-celery{55,main}-unit, 10 | # Integration tests. 11 | 3.10-celery52-integration-{rabbitmq,redis}, 12 | flake8 13 | isolated_build = True 14 | 15 | [gh-actions] 16 | python = 17 | pypy-3: pypy3 18 | 3.9: 3.9 19 | 3.10: 3.10 20 | 3.11: 3.11 21 | 3.12: 3.12 22 | 3.13: 3.13 23 | 24 | [testenv] 25 | deps= 26 | -r{toxinidir}/requirements/test.txt 27 | celery52: celery>=5.2.0,<5.3 28 | celery53: celery>=5.3.0,<5.4 29 | celery54: celery>=5.4.0,<5.5 30 | celery55: celery>=5.5.0,<5.6 31 | celerymain: https://codeload.github.com/celery/celery/zip/main 32 | 33 | # By default celery (via kombu) install py-amqp. 34 | redis: celery[redis] 35 | sitepackages = False 36 | recreate = False 37 | commands = 38 | coverage run -m pytest --timeout=60 39 | coverage html 40 | setenv = 41 | redis: TEST_BROKER=redis:// 42 | redis: TEST_BACKEND=redis:// 43 | 44 | rabbitmq: TEST_BROKER=pyamqp:// 45 | rabbitmq: TEST_BACKEND=rpc 46 | -------------------------------------------------------------------------------- /t/integration/tasks.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from celery_batches import Batches, SimpleRequest 4 | 5 | from celery import shared_task 6 | from celery.utils.log import get_task_logger 7 | 8 | logger = get_task_logger(__name__) 9 | 10 | 11 | @shared_task(base=Batches, flush_every=2, flush_interval=0.1) 12 | def add(requests: List[SimpleRequest]) -> int: 13 | """ 14 | Add the first argument of each task. 15 | 16 | Marks the result of each task as the sum. 17 | """ 18 | from celery import current_app 19 | 20 | result = 0 21 | for request in requests: 22 | result += sum(request.args) + sum(request.kwargs.values()) 23 | 24 | for request in requests: 25 | current_app.backend.mark_as_done(request.id, result, request=request) 26 | 27 | # TODO For EagerResults to work. 28 | return result 29 | 30 | 31 | @shared_task(base=Batches, flush_every=2, flush_interval=0.1) 32 | def cumadd(requests: List[SimpleRequest]) -> None: 33 | """ 34 | Calculate the cumulative sum of the first argument of each task. 35 | 36 | Marks the result of each task as the sum at the point. 37 | """ 38 | from celery import current_app 39 | 40 | result = 0 41 | for request in requests: 42 | result += request.args[0] 43 | current_app.backend.mark_as_done(request.id, result, request=request) 44 | -------------------------------------------------------------------------------- /docs/how_it_works.rst: -------------------------------------------------------------------------------- 1 | How it works 2 | ############ 3 | 4 | celery-batches makes no changes to how tasks are created or sent to the broker, 5 | but operates on the workers to process multiple tasks at once. Exactly how tasks 6 | are processed depends on the configuration, but the below assumes usage of the 7 | default `"prefork" configuration`_ of a celery worker (the explanation doesn't 8 | change significantly if the gevent, eventlet, or threads worker pools are used, 9 | but the math is different). 10 | 11 | As background, Celery workers have a "main" process which fetches tasks from the 12 | broker. By default it fetches the ":setting:`worker_prefetch_multiplier` x :setting:`worker_concurrency`" 13 | number of tasks (if available). For example, if the prefetch multiplier is 100 and the 14 | concurrency is 4, it attempts to fetch up to 400 items from the broker's queue. 15 | Once in memory the worker deserializes the messages and runs whatever their 16 | :attr:`~celery.app.task.Task.Strategy` is -- for a normal celery 17 | :class:`~celery.app.task.Task` this passes the tasks to the workers in the 18 | processing pool one at a time. (This is the :func:`~celery.worker.strategy.default` strategy.) 19 | 20 | The :class:`~celery_batches.Batches` task provides a different strategy which instructs 21 | the "main" celery worker process to queue tasks in memory until either 22 | the :attr:`~celery_batches.Batches.flush_interval` or :attr:`~celery_batches.Batches.flush_every` 23 | is reached and passes that list of tasks to the worker in the processing pool 24 | together. 25 | 26 | .. _"prefork" configuration: https://docs.celeryq.dev/en/stable/userguide/workers.html#concurrency 27 | -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | ######## 2 | Examples 3 | ######## 4 | 5 | Below are some simple examples of using ``Batches`` tasks. Note that they the 6 | eamples do not fully configure the :class:`~celery.Celery` instance, which depends 7 | on your setup (e.g. which broker/backend you're planning to use). 8 | 9 | Simple Example 10 | ############## 11 | 12 | A click counter that flushes the buffer every 100 messages, and every 13 | 10 seconds. Does not do anything with the data, but can easily be modified 14 | to store it in a database. 15 | 16 | .. literalinclude:: examples/counter.py 17 | :language: python 18 | 19 | Then you can ask for a click to be counted by doing:: 20 | 21 | >>> count_click.delay(url='http://example.com') 22 | 23 | Database example 24 | ################ 25 | 26 | It can be useful to batch together tasks to reduce database updates (in situations 27 | where a missed update is not important), e.g. updating the last seen time of a user: 28 | 29 | .. literalinclude:: examples/last_seen.py 30 | :language: python 31 | 32 | Bulk inserting/updating data 33 | ############################ 34 | 35 | It can also be useful to just bulk insert data as quickly as possible, but when the discrete data is from separate tasks. 36 | 37 | .. literalinclude:: examples/bulk_insert.py 38 | :language: python 39 | 40 | Example returning results 41 | ######################### 42 | 43 | An interface to the GitHub API that avoids requesting the API endpoint for each 44 | task. It flushes the buffer every 100 messages, and every 10 seconds. 45 | 46 | .. literalinclude:: examples/github_api.py 47 | :language: python 48 | 49 | Using the API is done as follows:: 50 | 51 | >>> result = check_emoji.delay('celery') 52 | >>> assert result.get() is False 53 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = celery-batches 3 | version = 0.10 4 | description = Experimental task class that buffers messages and processes them as a list. 5 | long_description = file: README.rst 6 | long_description_content_type = text/x-rst 7 | author = Patrick Cloke 8 | author_email = clokep@patrick.cloke.us 9 | url = https://github.com/clokep/celery-batches 10 | keywords = task, job, queue, distributed, messaging, actor 11 | license = BSD 12 | license_files = LICENSE 13 | classifiers = 14 | Development Status :: 3 - Alpha 15 | License :: OSI Approved :: BSD License 16 | Topic :: System :: Distributed Computing 17 | Topic :: Software Development :: Object Brokering 18 | Programming Language :: Python 19 | Programming Language :: Python :: 3 :: Only 20 | Programming Language :: Python :: 3 21 | Programming Language :: Python :: 3.9 22 | Programming Language :: Python :: 3.10 23 | Programming Language :: Python :: 3.11 24 | Programming Language :: Python :: 3.12 25 | Programming Language :: Python :: 3.13 26 | Programming Language :: Python :: Implementation :: CPython 27 | Programming Language :: Python :: Implementation :: PyPy 28 | Operating System :: OS Independent 29 | project_urls = 30 | Documentation = https://celery-batches.readthedocs.io 31 | Release notes = https://github.com/clokep/celery-batches/blob/main/CHANGELOG.rst 32 | Source = https://github.com/clokep/celery-batches 33 | Funding = https://github.com/sponsors/clokep 34 | Tracker = https://github.com/clokep/celery-batches/issues 35 | 36 | [options] 37 | packages = 38 | celery_batches 39 | install_requires = celery>=5.0,<5.6 40 | python_requires = >=3.9 41 | 42 | [flake8] 43 | extend-ignore = E203 44 | max-line-length = 88 45 | 46 | [isort] 47 | profile = black 48 | default_section = THIRDPARTY 49 | sections=FUTURE,STDLIB,FIRSTPARTY,CELERY,THIRDPARTY,LOCALFOLDER 50 | known_celery=billiard,celery,kombu,vine 51 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from importlib import metadata 4 | 5 | sys.path.insert(0, os.path.abspath("..")) 6 | sys.path.insert(0, os.path.abspath("./_ext")) 7 | 8 | extensions = [ 9 | "sphinx.ext.autodoc", 10 | "sphinx.ext.intersphinx", 11 | "sphinx_celery.setting_crossref", 12 | "celerydocs", 13 | ] 14 | 15 | templates_path = ["_templates"] 16 | 17 | source_suffix = ".rst" 18 | 19 | master_doc = "index" 20 | 21 | project = "Celery Batches" 22 | copyright = "2017, Percipient Networks; 2020-, Patrick Cloke" 23 | author = "Patrick Cloke" 24 | 25 | release = metadata.version("celery-batches") 26 | version = ".".join(release.split(".")[0:2]) 27 | 28 | language = "en" 29 | 30 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 31 | 32 | pygments_style = "sphinx" 33 | 34 | html_theme = "furo" 35 | 36 | html_sidebars = { 37 | "**": [ 38 | "sidebar/scroll-start.html", 39 | "sidebar/brand.html", 40 | "sidebar/search.html", 41 | "sidebar/navigation.html", 42 | "sidebar/ethical-ads.html", 43 | "sidebar/scroll-end.html", 44 | ] 45 | } 46 | 47 | htmlhelp_basename = "CeleryBatchesdoc" 48 | 49 | latex_documents = [ 50 | ( 51 | master_doc, 52 | "CeleryBatches.tex", 53 | "Celery Batches Documentation", 54 | "Percipient Networks", 55 | "manual", 56 | ), 57 | ] 58 | 59 | man_pages = [(master_doc, "celerybatches", "Celery Batches Documentation", [author], 1)] 60 | 61 | texinfo_documents = [ 62 | ( 63 | master_doc, 64 | "CeleryBatches", 65 | "Celery Batches Documentation", 66 | author, 67 | "CeleryBatches", 68 | "One line description of project.", 69 | "Miscellaneous", 70 | ), 71 | ] 72 | 73 | intersphinx_mapping = { 74 | "python": ("https://docs.python.org/dev/", None), 75 | "kombu": ("https://docs.celeryq.dev/projects/kombu/en/main/", None), 76 | "celery": ("https://docs.celeryq.dev/en/main/", None), 77 | } 78 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Celery Batches 2 | ============== 3 | 4 | .. image:: https://img.shields.io/pypi/v/celery-batches.svg 5 | :target: https://pypi.org/project/celery-batches/ 6 | 7 | .. image:: https://github.com/clokep/celery-batches/actions/workflows/main.yml/badge.svg 8 | :target: https://github.com/clokep/celery-batches/actions/workflows/main.yml 9 | 10 | .. image:: https://readthedocs.org/projects/celery-batches/badge/?version=latest 11 | :target: https://celery-batches.readthedocs.io/en/latest/?badge=latest 12 | :alt: Documentation Status 13 | 14 | Celery Batches provides a ``Task`` class that allows processing of multiple 15 | Celery task calls together as a list. The buffer of tasks calls is flushed on a 16 | timer and based on the number of queued tasks. 17 | 18 | Some potential use-cases for batching of task calls include: 19 | 20 | * De-duplicating tasks. 21 | * Accumulating / only handling the latest task with similar arguments. 22 | * Bulk inserting / updating of data. 23 | * Tasks with expensive setup that can run across a range of arguments. 24 | 25 | What do I need? 26 | =============== 27 | 28 | celery-batches version runs on, 29 | 30 | - Python (3.9, 3.10, 3.11, 3.12, 3.13) 31 | - PyPy (3.10, 3.11) 32 | 33 | And is tested with Celery ~= 5.0. 34 | 35 | If you're running an older version of Python, you need to be running 36 | an older version of celery-batches, the last version supporting each 37 | Python version is listed below: 38 | 39 | - Python 2.7: celery-batches 0.3. 40 | - Python 3.4: celery-batches 0.2. 41 | - Python 3.5: celery-batches 0.3. 42 | - Python 3.6: celery-batches 0.5. 43 | - Python 3.7: celery-batches 0.7. 44 | - Python 3.8: celery-batches 0.9. 45 | 46 | If you're running an older version of Celery, you need to be running 47 | an older version of celery-batches: 48 | 49 | - Celery < 4.0: Use `celery.contrib.batches` instead. 50 | - Celery 4.0 - 4.3: celery-batches 0.3. 51 | - Celery 4.4: celery-batches 0.7. 52 | - Celery 5.0 - 5.1: celery-batches 0.9. 53 | 54 | History 55 | ======= 56 | 57 | Celery Batches was distributed as part of Celery (as ``celery.contrib.batches``) 58 | until Celery 4.0. This project updates the Batches code to maintain compatiblity 59 | with newer versions of Celery and other fixes. See the Changelog for details. 60 | 61 | Additionally, this repository includes the full history of the code from 62 | ``celery.contrib.batches``, but rewritten to the ``celery_batches/__init__.py`` 63 | file. 64 | -------------------------------------------------------------------------------- /celery_batches/trace.py: -------------------------------------------------------------------------------- 1 | """Trace task execution. 2 | 3 | This module defines how the task execution is traced: 4 | errors are recorded, handlers are applied and so on. 5 | 6 | Mimics some of the functionality found in celery.app.trace.trace_task. 7 | """ 8 | 9 | from typing import TYPE_CHECKING, Any, List, Tuple 10 | 11 | from celery import signals, states 12 | from celery._state import _task_stack 13 | from celery.app.task import Context 14 | from celery.utils.log import get_logger 15 | from kombu.utils.uuid import uuid 16 | 17 | if TYPE_CHECKING: 18 | from celery_batches import Batches, SimpleRequest 19 | 20 | logger = get_logger(__name__) 21 | 22 | send_prerun = signals.task_prerun.send 23 | send_postrun = signals.task_postrun.send 24 | send_success = signals.task_success.send 25 | SUCCESS = states.SUCCESS 26 | FAILURE = states.FAILURE 27 | 28 | 29 | def apply_batches_task( 30 | task: "Batches", args: Tuple[List["SimpleRequest"]], loglevel: int, logfile: None 31 | ) -> Any: 32 | request_stack = task.request_stack 33 | push_request = request_stack.push 34 | pop_request = request_stack.pop 35 | push_task = _task_stack.push 36 | pop_task = _task_stack.pop 37 | 38 | prerun_receivers = signals.task_prerun.receivers 39 | postrun_receivers = signals.task_postrun.receivers 40 | success_receivers = signals.task_success.receivers 41 | 42 | # Corresponds to multiple requests, so generate a new UUID. 43 | task_id = uuid() 44 | 45 | push_task(task) 46 | task_request = Context(loglevel=loglevel, logfile=logfile) 47 | push_request(task_request) 48 | 49 | try: 50 | # -*- PRE -*- 51 | if prerun_receivers: 52 | send_prerun(sender=task, task_id=task_id, task=task, args=args, kwargs={}) 53 | 54 | # -*- TRACE -*- 55 | try: 56 | result = task(*args) 57 | state = SUCCESS 58 | except Exception as exc: 59 | result = None 60 | state = FAILURE 61 | logger.error("Error: %r", exc, exc_info=True) 62 | else: 63 | if success_receivers: 64 | send_success(sender=task, result=result) 65 | finally: 66 | try: 67 | if postrun_receivers: 68 | send_postrun( 69 | sender=task, 70 | task_id=task_id, 71 | task=task, 72 | args=args, 73 | kwargs={}, 74 | retval=result, 75 | state=state, 76 | ) 77 | finally: 78 | pop_task() 79 | pop_request() 80 | 81 | return result 82 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020- Patrick Cloke & contributors. All rights reserved. 2 | Copyright (c) 2017-2020 Percipient Networks. All rights reserved. 3 | Copyright (c) 2015-2016 Ask Solem & contributors. All rights reserved. 4 | Copyright (c) 2012-2014 GoPivotal, Inc. All rights reserved. 5 | Copyright (c) 2009, 2010, 2011, 2012 Ask Solem, and individual contributors. All rights reserved. 6 | 7 | Celery is licensed under The BSD License (3 Clause, also known as 8 | the new BSD license). The license is an OSI approved Open Source 9 | license and is GPL-compatible(1). 10 | 11 | The license text can also be found here: 12 | http://www.opensource.org/licenses/BSD-3-Clause 13 | 14 | License 15 | ======= 16 | 17 | Redistribution and use in source and binary forms, with or without 18 | modification, are permitted provided that the following conditions are met: 19 | * Redistributions of source code must retain the above copyright 20 | notice, this list of conditions and the following disclaimer. 21 | * Redistributions in binary form must reproduce the above copyright 22 | notice, this list of conditions and the following disclaimer in the 23 | documentation and/or other materials provided with the distribution. 24 | * Neither the name of Ask Solem, nor the 25 | names of its contributors may be used to endorse or promote products 26 | derived from this software without specific prior written permission. 27 | 28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 30 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 31 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Ask Solem OR CONTRIBUTORS 32 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 38 | POSSIBILITY OF SUCH DAMAGE. 39 | 40 | Documentation License 41 | ===================== 42 | 43 | The documentation portion of Celery (the rendered contents of the 44 | "docs" directory of a software distribution or checkout) is supplied 45 | under the "Creative Commons Attribution-ShareAlike 4.0 46 | International" (CC BY-SA 4.0) License as described by 47 | https://creativecommons.org/licenses/by-sa/4.0/ 48 | 49 | Footnotes 50 | ========= 51 | (1) A GPL-compatible license makes it possible to 52 | combine Celery with other software that is released 53 | under the GPL, it does not mean that we're distributing 54 | Celery under the GPL license. The BSD license, unlike the GPL, 55 | let you distribute a modified version without making your 56 | changes open source. 57 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ############## 2 | celery-batches 3 | ############## 4 | 5 | celery-batches is a task class that buffers messages and processes them as a list. Task 6 | requests are buffered in memory (on a worker) until either the flush count or 7 | flush interval is reached. Once the requests are flushed, they are sent to the 8 | task as a list of :class:`~celery_batches.SimpleRequest` instances. 9 | 10 | Some potential use-cases for batching of task calls include: 11 | 12 | * De-duplicating tasks. 13 | * Accumulating / only handling the latest task with similar arguments. 14 | * Bulk inserting / updating of data. 15 | * Tasks with expensive setup that can run across a range of arguments. 16 | 17 | For the :class:`~celery_batches.Batches` task to work properly you must configure 18 | :setting:`worker_prefetch_multiplier` to zero, or some value where the final 19 | multiplied value is higher than :attr:`~celery_batches.Batches.flush_every`. 20 | 21 | .. warning:: 22 | 23 | Celery will attempt to continually pull all data from a queue into memory if 24 | :setting:`worker_prefetch_multiplier` is set to zero. This can cause excessive 25 | resource consumption on both Celery workers and the broker when used with a 26 | deep queue. 27 | 28 | In the future we hope to add the ability to direct batching tasks 29 | to a channel with different QoS requirements than the task channel. 30 | 31 | Returning results 32 | ################# 33 | 34 | It is possible to return a result for each task request by calling ``mark_as_done`` 35 | on your results backend. Returning a value from the :class:`~celery_batches.Batches` 36 | task call is only used to provide values to signals and does not populate into the 37 | results backend. 38 | 39 | .. note:: 40 | 41 | If you don't have an ``app`` instance then use the current app proxy 42 | instead: 43 | 44 | .. code-block:: python 45 | 46 | from celery import current_app 47 | current_app.backend.mark_as_done(request.id, response, request=request) 48 | 49 | Retrying tasks 50 | ############## 51 | 52 | In order to retry a failed task, the task must be re-executed with the original 53 | task :attr:`~celery.worker.request.Request.id`, see the example below: 54 | 55 | .. code-block:: python 56 | 57 | @app.task(base=Batches, flush_every=100, flush_interval=10) 58 | def flaky_task(requests): 59 | for request in requests: 60 | # Do something that might fail. 61 | try: 62 | response = might_fail(*request.args, **request.kwargs) 63 | except TemporaryError: 64 | # Retry the task 10 seconds from now with the same arguments and task_id. 65 | flaky_task.apply_async( 66 | args=request.args, 67 | kwargs=request.kwargs, 68 | countdown=10, 69 | task_id=request.id, 70 | ) 71 | else: 72 | app.backend.mark_as_done(request.id, response, request=request) 73 | 74 | Note that the retried task is still bound by the flush rules of the :class:`~celery_batches.Batches` 75 | task, it is used as a lower-bound and will not run *before* that timeout. In the 76 | example above it will run between 10 - 20 seconds from now, assuming no other 77 | tasks are in the queue. 78 | 79 | .. toctree:: 80 | :hidden: 81 | 82 | examples 83 | api 84 | how_it_works 85 | history 86 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | workflow_dispatch: 10 | 11 | jobs: 12 | lint: 13 | name: "Lint" 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - uses: "actions/setup-python@v3" 20 | with: 21 | python-version: "3.9" 22 | 23 | - name: "Install dependencies" 24 | run: python -m pip install -r requirements/pkgutils.txt 25 | 26 | - name: "Run pyupgrade" 27 | run: pyupgrade --py37-plus **/*.py 28 | 29 | - name: "Run flake8" 30 | run: flake8 31 | 32 | - name: "Run isort" 33 | run: isort --check . 34 | 35 | - name: "Run black" 36 | run: black --check . 37 | 38 | - name: "Run mypy" 39 | run: mypy 40 | 41 | docs: 42 | name: "Build Documentation" 43 | runs-on: ubuntu-latest 44 | 45 | steps: 46 | - uses: actions/checkout@v2 47 | 48 | - uses: "actions/setup-python@v3" 49 | with: 50 | python-version: "3.9" 51 | 52 | - name: "Install dependencies" 53 | run: | 54 | python -m pip install -r requirements/docs.txt 55 | python -m pip install . 56 | 57 | - name: "Run Sphinx" 58 | run: sphinx-build -W --keep-going -b html docs _build 59 | 60 | tests: 61 | name: "Python ${{ matrix.python-version }}" 62 | needs: 63 | - lint 64 | - docs 65 | runs-on: ubuntu-latest 66 | 67 | strategy: 68 | matrix: 69 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy-3.10", "pypy-3.11"] 70 | 71 | steps: 72 | - uses: actions/checkout@v2 73 | 74 | - uses: "actions/setup-python@v2" 75 | with: 76 | python-version: "${{ matrix.python-version }}" 77 | 78 | - name: "Install dependencies" 79 | run: | 80 | set -xe 81 | python -VV 82 | python -m site 83 | python -m pip install --upgrade pip setuptools wheel 84 | python -m pip install --upgrade "tox<4" "tox-gh-actions<3" 85 | 86 | - name: "Run tox targets for ${{ matrix.python-version }}" 87 | env: 88 | TOX_SKIP_ENV: ".*celerymaster.*|.*integration.*" 89 | run: "python -m tox" 90 | 91 | - name: "Run tox targets for ${{ matrix.python-version }} for celery master" 92 | env: 93 | TOX_SKIP_ENV: ".*celery[^m].*|.*integration.*" 94 | run: "python -m tox" 95 | continue-on-error: true 96 | 97 | integration-tests: 98 | name: "Integration tests" 99 | needs: lint 100 | runs-on: ubuntu-latest 101 | 102 | services: 103 | rabbitmq: 104 | image: rabbitmq 105 | # Set health checks to wait until redis has started 106 | options: >- 107 | --health-cmd "rabbitmq-diagnostics -q status" 108 | --health-interval 10s 109 | --health-timeout 5s 110 | --health-retries 5 111 | ports: 112 | - 5672:5672 113 | redis: 114 | image: redis 115 | # Set health checks to wait until redis has started 116 | options: >- 117 | --health-cmd "redis-cli ping" 118 | --health-interval 10s 119 | --health-timeout 5s 120 | --health-retries 5 121 | ports: 122 | - 6379:6379 123 | 124 | steps: 125 | - uses: actions/checkout@v2 126 | 127 | - uses: "actions/setup-python@v2" 128 | with: 129 | python-version: "3.x" 130 | 131 | - name: "Install dependencies" 132 | run: | 133 | set -xe 134 | python -VV 135 | python -m site 136 | python -m pip install --upgrade pip setuptools wheel 137 | python -m pip install --upgrade "tox<4" "tox-gh-actions<3" 138 | 139 | - name: "Run tox targets" 140 | env: 141 | TOX_SKIP_ENV: ".*unit.*|flake8" 142 | run: "python -m tox" 143 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | .. :changelog: 2 | 3 | Changelog 4 | ######### 5 | 6 | 0.10 (2025-04-04) 7 | ================= 8 | 9 | Bugfixes 10 | -------- 11 | 12 | * Fix installing resetting the count of pending tasks when flushing tasks due to 13 | the flush interval being reached. Contributed by `@FilipKarpinski `_. 14 | (`#95 `_) 15 | 16 | Improvements 17 | ------------ 18 | 19 | * Improve documentation. (`#92 `_) 20 | 21 | Maintenance 22 | ----------- 23 | 24 | * Support Celery 5.5. (`#97 `_) 25 | * Drop support for Celery < 5.2. (`#92 `_) 26 | * Support Python 3.13. (`#97 `_) 27 | * Drop support for Python 3.8. (`#97 `_) 28 | 29 | 30 | 0.9 (2024-06-03) 31 | ================ 32 | 33 | Improvements 34 | ------------ 35 | 36 | * Call the task received signal for ``Batches`` task. (`#85 `_) 37 | 38 | Maintenance 39 | ----------- 40 | 41 | * Limit test requirements to ``pytest-celery~=0.0.0`` for an incompatibility with 1.0.0. 42 | (`#90 `_) 43 | * Bump black to 24.3.0. (`#86 `_) 44 | * Support Celery 5.4. (`#91 `_) 45 | * Support Python 3.12. (`#91 `_) 46 | 47 | 48 | 0.8.1 (2023-06-27) 49 | ================== 50 | 51 | Bugfixes 52 | -------- 53 | 54 | * Fix installing celery-batches with Celery 5.3. Contributed by `@stegayet `_. 55 | (`#80 `_) 56 | 57 | 58 | 0.8 (2023-06-27) 59 | ================ 60 | 61 | Improvements 62 | ------------ 63 | 64 | * Support passing through the `request_dict` from the original Celery request. 65 | Contributed by `@montasaurus `_. 66 | (`#71 `_) 67 | 68 | Maintenance 69 | ----------- 70 | 71 | * Support Celery 5.3. Contributed by `@stegayet `_. 72 | (`#68 `_, 73 | `#75 `_, 74 | `#77 `_) 75 | * Drop support for Celery < 5.0. (`#78 `_) 76 | * Support Python 3.11. (`#75 `_) 77 | * Drop support for Python 3.7. (`#77 `_) 78 | 79 | 80 | 0.7 (2022-05-02) 81 | ================ 82 | 83 | Improvements 84 | ------------ 85 | 86 | * Support passing multiple or keyword arguments by disabling Celery's ``typing`` 87 | feature for ``Batches`` tasks. (`#39 `_) 88 | * Support |using a custom Request class|_ for ``Batches`` tasks. 89 | (`#63 `_) 90 | * Support calling tasks with an ``eta`` or ``countdown`` specified. Contributed by 91 | `@weetster `_. 92 | (`#59 `_) 93 | 94 | Bugfixes 95 | -------- 96 | 97 | * Handle "hybrid" messages that have moved between Celery versions. Port 98 | `celery/celery#4358 `_ to celery-batches. 99 | (`#64 `_) 100 | * Fix task ETA issues when timezone is defined in configuration. Port 101 | `celery/celery#3867 `_ to celery-batches. 102 | (`#64 `_) 103 | 104 | .. |using a custom Request class| replace:: using a custom ``Request`` class 105 | .. _using a custom Request class: https://docs.celeryq.dev/en/stable/userguide/tasks.html#requests-and-custom-requests 106 | 107 | Maintenance 108 | ----------- 109 | 110 | * Fix running of tests via tox. (`#40 `_, 111 | `#58 `_) 112 | * Simplify tests. (`#56 `_, 113 | `#60 `_) 114 | * Improve PyPI metadata. (`#43 `_, 115 | `#52 `_) 116 | * Ignore virtualenvs in `.gitignore`. Contributed by `Tony Narlock `_. 117 | (`#44 `_) 118 | * Update README badges to include PyPI and GitHub Actions (instead of Travis CI). 119 | Contributed by `Tony Narlock `_. 120 | (`#47 `_) 121 | * Update copyright information. Contributed by `Tony Narlock `_. 122 | (`#46 `_) 123 | * Improve documentation. Contributed by `Tony Narlock `_. 124 | (`#45 `_, 125 | `#49 `_, 126 | `#50 `_, 127 | `#55 `_) 128 | * Document use-cases and include more examples. (`#65 `_) 129 | * Run the unit tests against RabbitMQ & Redis brokers/backends. 130 | (`#57 `_) 131 | * Run `black `_, `isort `_, 132 | `flake8 `_, `pyupgrade `_, 133 | and `mypy `_. 134 | (`#61 `_, 135 | `#62 `_) 136 | 137 | 138 | 0.6 (2021-12-30) 139 | ================ 140 | 141 | Bugfixes 142 | -------- 143 | 144 | * Fix a bug when passing a ``request`` to ``mark_as_done`` with Celery 5.1.0. 145 | (`#32 `_) 146 | 147 | Maintenance 148 | ----------- 149 | 150 | * Clean-up and re-organize code. (`#31 `_) 151 | * Support Celery 5.2. (`#36 `_) 152 | * Support Python 3.10. (`#37 `_) 153 | * Drop support for Python 3.6. (`#36 `_) 154 | * Changed packaging to use setuptools declarative config in ``setup.cfg``. 155 | (`#37 `_) 156 | 157 | 158 | 0.5 (2021-05-24) 159 | ================ 160 | 161 | Bugfixes 162 | -------- 163 | 164 | * Fix storing of results in the 165 | `RPC Result Backend `_). 166 | (`#30 `_) 167 | 168 | Maintenance 169 | ----------- 170 | 171 | * Support Celery 5.1. (`#27 `_) 172 | * Clean-up unnecessary code. (`#28 `_) 173 | * CI improvements. (`#25 `_) 174 | 175 | 176 | 0.4 (2020-11-30) 177 | ================ 178 | 179 | Maintenance 180 | ----------- 181 | 182 | * Support Celery 5.0. (`#21 `_) 183 | * Drop support for Celery < 4.4. (`#21 `_) 184 | * Drop support for Python < 3.6. (`#21 `_) 185 | 186 | 187 | 0.3 (2020-01-29) 188 | ================ 189 | 190 | Improvements 191 | ------------ 192 | 193 | * Properly set the ``current_task`` when running ``Batches`` tasks. (`#4 `_) 194 | * Call the success signal after a successful run of the ``Batches`` task. (`#6 `_) 195 | * Support running tasks eagerly via the ``Task.apply()`` method. This causes 196 | the task to execute with a batch of a single item. Contributed by 197 | `@scalen `_. (`#16 `_, 198 | `#18 `_) 199 | 200 | Maintenance 201 | ----------- 202 | 203 | * Improved documentation. Contributed by 204 | `@nijel `_. (`#3 `_, 205 | `#7 `_) 206 | * Support Celery 4.2, 4.3, and 4.4. (`#12 `_, 207 | `#14 `_, 208 | `#19 `_) 209 | * Support Python 3.7 and 3.8. (`#19 `_) 210 | * Drop support for Python 3.4. (`#19 `_) 211 | * CI improvements. (`#5 `_, 212 | `#11 `_, 213 | `#13 `_,) 214 | 215 | 216 | 0.2 (2018-04-20) 217 | ================ 218 | 219 | Improvements 220 | ------------ 221 | 222 | * Add support for protocol v2. (`#1 `_) 223 | 224 | Maintenance 225 | ----------- 226 | 227 | * Add tests. (`#1 `_, 228 | `#2 `_) 229 | * Fixes some documentation issues. (`#1 `_) 230 | 231 | 232 | 0.1 (2018-03-23) 233 | ================ 234 | 235 | Improvements 236 | ------------ 237 | 238 | * ``Batches`` tasks now call pre- and post-run signals. 239 | 240 | Maintenance 241 | ----------- 242 | 243 | * The initial released version, includes changes to make it a separate package, 244 | etc. 245 | 246 | 247 | celery-final 248 | ============== 249 | 250 | * The final version of ``celery.contrib.batches`` before it was removed in 251 | |4b3ab708778a3772d24bb39142b7e9d5b94c488b|_. 252 | 253 | .. |4b3ab708778a3772d24bb39142b7e9d5b94c488b| replace:: ``4b3ab708778a3772d24bb39142b7e9d5b94c488b`` 254 | .. _4b3ab708778a3772d24bb39142b7e9d5b94c488b: https://github.com/celery/celery/commit/4b3ab708778a3772d24bb39142b7e9d5b94c488b 255 | -------------------------------------------------------------------------------- /t/integration/test_batches.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from time import sleep 3 | from typing import Any, Callable, List, Optional, Union 4 | 5 | from celery_batches import Batches, SimpleRequest 6 | 7 | from celery import Celery, signals, states 8 | from celery.app.task import Task 9 | from celery.contrib.testing.tasks import ping 10 | from celery.contrib.testing.worker import TestWorkController 11 | from celery.result import allow_join_result 12 | from celery.utils.dispatch import Signal 13 | from celery.worker.consumer.consumer import Consumer 14 | from celery.worker.request import Request 15 | 16 | import pytest 17 | 18 | from .tasks import add, cumadd 19 | 20 | 21 | class SignalCounter: 22 | def __init__( 23 | self, 24 | signal: Signal, 25 | expected_calls: int, 26 | callback: Optional[Callable[..., None]] = None, 27 | ): 28 | self.signal = signal 29 | signal.connect(self) 30 | self.calls = 0 31 | self.expected_calls = expected_calls 32 | self.callback = callback 33 | 34 | def __call__(self, sender: Union[Task, str, Consumer], **kwargs: Any) -> None: 35 | if isinstance(sender, Task): 36 | task_name = sender.name 37 | elif isinstance(sender, Consumer): 38 | assert self.signal == signals.task_received 39 | task_name = kwargs["request"].name 40 | else: 41 | task_name = sender 42 | 43 | # Ignore pings, those are used to ensure the worker processes tasks. 44 | if task_name == "celery.ping": 45 | return 46 | 47 | self.calls += 1 48 | 49 | # Call the "real" signal, if necessary. 50 | if self.callback: 51 | self.callback(sender, **kwargs) 52 | 53 | def assert_calls(self) -> None: 54 | assert ( 55 | self.calls == self.expected_calls 56 | ), f"Signal {self.signal.name} called incorrect number of times." 57 | 58 | 59 | def _wait_for_ping(ping_task_timeout: float = 10.0) -> None: 60 | """ 61 | Wait for the celery worker to respond to a ping. 62 | 63 | This should ensure that any other running tasks are done. 64 | """ 65 | with allow_join_result(): 66 | assert ping.delay().get(timeout=ping_task_timeout) == "pong" 67 | 68 | 69 | @pytest.mark.usefixtures("depends_on_current_app") 70 | def test_always_eager(celery_app: Celery) -> None: 71 | """The batch task runs immediately, in the same thread.""" 72 | celery_app.conf.task_always_eager = True 73 | result = add.delay(1) 74 | 75 | # An EagerResult that resolve to 1 should be returned. 76 | assert result.get() == 1 77 | 78 | 79 | def test_apply() -> None: 80 | """The batch task runs immediately, in the same thread.""" 81 | result = add.apply(args=(1,)) 82 | 83 | # An EagerResult that resolve to 1 should be returned. 84 | assert result.get() == 1 85 | 86 | 87 | def test_flush_interval(celery_app: Celery, celery_worker: TestWorkController) -> None: 88 | """The batch task runs after the flush interval has elapsed.""" 89 | 90 | if not celery_app.conf.broker_url.startswith("memory"): 91 | raise pytest.skip("Flaky on live brokers") 92 | 93 | result = add.delay(1) 94 | 95 | # The flush interval is 0.1 second, this is longer. 96 | sleep(0.2) 97 | 98 | # Let the worker work. 99 | _wait_for_ping() 100 | 101 | assert result.get() == 1 102 | 103 | 104 | def test_flush_interval_resets_counter( 105 | celery_app: Celery, celery_worker: TestWorkController 106 | ) -> None: 107 | """Flush counter is reset after flush is triggered by interval.""" 108 | 109 | if not celery_app.conf.broker_url.startswith("memory"): 110 | raise pytest.skip("Flaky on live brokers") 111 | 112 | result_1 = add.delay(1) 113 | 114 | # The flush interval is 0.1 second, this is longer. 115 | sleep(2) 116 | 117 | # Let the worker work. 118 | _wait_for_ping() 119 | 120 | assert result_1.get() == 1 121 | 122 | # Run next task, it should not execute as counter was reset 123 | result_2 = add.delay(2) 124 | 125 | # The flush interval is 0.1 second, this is shorter. 126 | sleep(0.01) 127 | _wait_for_ping() 128 | 129 | assert result_2.state == states.PENDING 130 | 131 | 132 | def test_flush_calls(celery_worker: TestWorkController) -> None: 133 | """The batch task runs after two calls.""" 134 | result_1 = add.delay(1) 135 | result_2 = add.delay(3) 136 | 137 | # Let the worker work. 138 | _wait_for_ping() 139 | 140 | assert result_1.get() == 4 141 | assert result_2.get() == 4 142 | 143 | 144 | def test_multi_arg(celery_worker: TestWorkController) -> None: 145 | """The batch task runs after two calls.""" 146 | result_1 = add.delay(1, 2) 147 | result_2 = add.delay(3, 4) 148 | 149 | # Let the worker work. 150 | _wait_for_ping() 151 | 152 | assert result_1.get() == 10 153 | assert result_2.get() == 10 154 | 155 | 156 | def test_kwarg(celery_worker: TestWorkController) -> None: 157 | """The batch task runs after two calls.""" 158 | result_1 = add.delay(a=1, b=2) 159 | result_2 = add.delay(a=3, b=4) 160 | 161 | # Let the worker work. 162 | _wait_for_ping() 163 | 164 | assert result_1.get() == 10 165 | assert result_2.get() == 10 166 | 167 | 168 | def test_result(celery_worker: TestWorkController) -> None: 169 | """Each task call can return a result.""" 170 | result_1 = cumadd.delay(1) 171 | result_2 = cumadd.delay(2) 172 | 173 | # Let the worker work. 174 | _wait_for_ping() 175 | 176 | assert result_1.get(timeout=3) == 1 177 | assert result_2.get(timeout=3) == 3 178 | 179 | 180 | def test_signals(celery_app: Celery, celery_worker: TestWorkController) -> None: 181 | """Ensure that Celery signals run for the batch task.""" 182 | # Configure a SignalCounter for each task signal. 183 | checks = ( 184 | # Each task request gets published separately. 185 | (signals.before_task_publish, 2), 186 | (signals.after_task_publish, 2), 187 | (signals.task_sent, 2), 188 | (signals.task_received, 2), 189 | # The Batch task only runs a single time. 190 | (signals.task_prerun, 1), 191 | (signals.task_postrun, 1), 192 | (signals.task_success, 1), 193 | # Other task signals are not implemented. 194 | (signals.task_retry, 0), 195 | (signals.task_failure, 0), 196 | (signals.task_revoked, 0), 197 | (signals.task_internal_error, 0), 198 | (signals.task_unknown, 0), 199 | (signals.task_rejected, 0), 200 | ) 201 | signal_counters = [] 202 | for sig, expected_count in checks: 203 | counter = SignalCounter(sig, expected_count) 204 | signal_counters.append(counter) 205 | 206 | # The batch runs after 2 task calls. 207 | result_1 = add.delay(1) 208 | result_2 = add.delay(3) 209 | 210 | # Let the worker work. 211 | _wait_for_ping() 212 | 213 | # Should still have the correct result. 214 | assert result_1.get() == 4 215 | assert result_2.get() == 4 216 | 217 | for counter in signal_counters: 218 | counter.assert_calls() 219 | 220 | 221 | def test_current_task(celery_app: Celery, celery_worker: TestWorkController) -> None: 222 | """Ensure the current_task is properly set when running the task.""" 223 | 224 | def signal(sender: Union[Task, str], **kwargs: Any) -> None: 225 | assert celery_app.current_task.name == "t.integration.tasks.add" 226 | 227 | counter = SignalCounter(signals.task_prerun, 1, signal) 228 | 229 | # The batch runs after 2 task calls. 230 | result_1 = add.delay(1) 231 | result_2 = add.delay(3) 232 | 233 | # Let the worker work. 234 | _wait_for_ping() 235 | 236 | # Should still have the correct result. 237 | assert result_1.get() == 4 238 | assert result_2.get() == 4 239 | 240 | counter.assert_calls() 241 | 242 | 243 | def test_acks_early(celery_app: Celery, celery_worker: TestWorkController) -> None: 244 | """Ensure that acking early works properly.""" 245 | # Setup a new task and track which Requests are acked. 246 | acked = [] 247 | 248 | class AckRequest(Request): 249 | def acknowledge(self) -> None: 250 | acked.append(self.id) 251 | 252 | @celery_app.task( 253 | base=Batches, flush_every=2, flush_interval=0.1, Request=AckRequest 254 | ) 255 | def acks(requests: List[SimpleRequest]) -> None: 256 | # The tasks are acked before running. 257 | assert acked == [result_1.id, result_2.id] 258 | 259 | # The task is acking before completion. 260 | assert acks.acks_late is False 261 | 262 | # Register the task with the worker. 263 | celery_worker.consumer.update_strategies() 264 | 265 | # Call the tasks, they should ack before flush. 266 | result_1 = acks.delay() 267 | result_2 = acks.delay() 268 | 269 | assert acked == [] 270 | 271 | # Let the worker work. 272 | _wait_for_ping() 273 | 274 | # The results are stilled acked after running. 275 | assert acked == [result_1.id, result_2.id] 276 | 277 | 278 | def test_acks_late(celery_app: Celery, celery_worker: TestWorkController) -> None: 279 | """Ensure that acking late works properly.""" 280 | # Setup a new task and track which Requests are acked. 281 | acked = [] 282 | 283 | class AckRequest(Request): 284 | def acknowledge(self) -> None: 285 | acked.append(self.id) 286 | 287 | @celery_app.task( 288 | base=Batches, 289 | acks_late=True, 290 | flush_every=2, 291 | flush_interval=0.1, 292 | Request=AckRequest, 293 | ) 294 | def acks(requests: List[SimpleRequest]) -> None: 295 | # When the tasks are running, nothing is acked. 296 | assert acked == [] 297 | 298 | # The task is acking after completion. 299 | assert acks.acks_late is True 300 | 301 | # Register the task with the worker. 302 | celery_worker.consumer.update_strategies() 303 | 304 | # Call the tasks, they should ack before flush. 305 | result_1 = acks.delay() 306 | result_2 = acks.delay() 307 | 308 | assert acked == [] 309 | 310 | # Let the worker work. 311 | _wait_for_ping() 312 | 313 | # After the tasks are done, both results are acked. 314 | assert acked == [result_1.id, result_2.id] 315 | 316 | 317 | def test_countdown(celery_app: Celery, celery_worker: TestWorkController) -> None: 318 | """Ensure that countdowns work properly. 319 | 320 | The batch task handles only the first request initially (as the second request 321 | is not ready). A subsequent call handles the second request. 322 | """ 323 | 324 | if not celery_app.conf.broker_url.startswith("memory"): 325 | raise pytest.skip("Flaky on live brokers") 326 | 327 | result_1 = add.apply_async(args=(1,)) 328 | # The countdown is longer than the flush interval + first sleep, but shorter 329 | # than the flush interval + first sleep + second sleep. 330 | result_2 = add.apply_async(args=(2,), countdown=3) 331 | 332 | # The flush interval is 0.1 seconds and the retry interval is 0.5 seconds, 333 | # this is longer. 334 | sleep(1) 335 | 336 | # Let the worker work. 337 | _wait_for_ping() 338 | 339 | assert result_1.get() == 1 340 | assert result_2.state == states.PENDING 341 | 342 | sleep(3) 343 | 344 | assert result_2.get() == 2 345 | 346 | 347 | def test_eta(celery_app: Celery, celery_worker: TestWorkController) -> None: 348 | """Ensure that ETAs work properly.""" 349 | 350 | if not celery_app.conf.broker_url.startswith("memory"): 351 | raise pytest.skip("Flaky on live brokers") 352 | 353 | result_1 = add.apply_async(args=(1,)) 354 | # The countdown is longer than the flush interval + first sleep, but shorter 355 | # than the flush interval + first sleep + second sleep. 356 | result_2 = add.apply_async(args=(2,), eta=datetime.utcnow() + timedelta(seconds=3)) 357 | 358 | # The flush interval is 0.1 seconds and the retry interval is 0.5 seconds, 359 | # this is longer. 360 | sleep(1) 361 | 362 | # Let the worker work. 363 | _wait_for_ping() 364 | 365 | assert result_1.get() == 1 366 | assert result_2.state == states.PENDING 367 | 368 | sleep(3) 369 | 370 | assert result_2.get() == 2 371 | -------------------------------------------------------------------------------- /celery_batches/__init__.py: -------------------------------------------------------------------------------- 1 | from itertools import count, filterfalse, tee 2 | from queue import Empty, Queue 3 | from time import monotonic 4 | from typing import ( 5 | Any, 6 | Callable, 7 | Collection, 8 | Dict, 9 | Iterable, 10 | NoReturn, 11 | Optional, 12 | Set, 13 | Tuple, 14 | TypeVar, 15 | ) 16 | 17 | from celery_batches.trace import apply_batches_task 18 | 19 | from celery import VERSION as CELERY_VERSION 20 | from celery import signals 21 | from celery.app import Celery 22 | from celery.app.task import Task 23 | from celery.concurrency.base import BasePool 24 | from celery.utils.imports import symbol_by_name 25 | from celery.utils.log import get_logger 26 | from celery.utils.nodenames import gethostname 27 | from celery.utils.time import timezone 28 | from celery.worker.consumer import Consumer 29 | from celery.worker.request import Request, create_request_cls 30 | from celery.worker.strategy import hybrid_to_proto2, proto1_to_proto2 31 | from kombu.asynchronous.timer import Timer, to_timestamp 32 | from kombu.message import Message 33 | from kombu.utils.uuid import uuid 34 | from vine import promise 35 | 36 | __all__ = ["Batches"] 37 | 38 | logger = get_logger(__name__) 39 | 40 | 41 | T = TypeVar("T") 42 | 43 | 44 | def consume_queue(queue: "Queue[T]") -> Iterable[T]: 45 | """Iterator yielding all immediately available items in a 46 | :class:`Queue.Queue`. 47 | 48 | The iterator stops as soon as the queue raises :exc:`Queue.Empty`. 49 | 50 | *Examples* 51 | 52 | >>> q = Queue() 53 | >>> map(q.put, range(4)) 54 | >>> list(consume_queue(q)) 55 | [0, 1, 2, 3] 56 | >>> list(consume_queue(q)) 57 | [] 58 | 59 | """ 60 | get = queue.get_nowait 61 | while 1: 62 | try: 63 | yield get() 64 | except Empty: 65 | break 66 | 67 | 68 | def partition( 69 | predicate: Callable[[T], bool], iterable: Iterable[T] 70 | ) -> Tuple[Iterable[T], Iterable[T]]: 71 | "Use a predicate to partition entries into false entries and true entries" 72 | t1, t2 = tee(iterable) 73 | return filterfalse(predicate, t1), filter(predicate, t2) 74 | 75 | 76 | class SimpleRequest: 77 | """ 78 | A request to execute a task. 79 | 80 | A list of :class:`~celery_batches.SimpleRequest` instances is provided to the 81 | batch task during execution. 82 | 83 | This must be pickleable (if using the prefork pool), but generally should 84 | have the same properties as :class:`~celery.worker.request.Request`. 85 | """ 86 | 87 | #: task id 88 | id = None 89 | 90 | #: task name 91 | name = None 92 | 93 | #: positional arguments 94 | args: Tuple[Any, ...] = () 95 | 96 | #: keyword arguments 97 | kwargs: Dict[Any, Any] = {} 98 | 99 | #: message delivery information. 100 | delivery_info = None 101 | 102 | #: worker node name 103 | hostname = None 104 | 105 | #: if the results of this request should be ignored 106 | ignore_result = None 107 | 108 | #: used by rpc backend when failures reported by parent process 109 | reply_to = None 110 | 111 | #: used similarly to reply_to 112 | correlation_id = None 113 | 114 | #: includes all of the original request headers 115 | request_dict: Optional[Dict[str, Any]] = {} 116 | 117 | #: TODO 118 | chord = None 119 | 120 | def __init__( 121 | self, 122 | id: str, 123 | name: str, 124 | args: Tuple[Any, ...], 125 | kwargs: Dict[Any, Any], 126 | delivery_info: dict, 127 | hostname: str, 128 | ignore_result: bool, 129 | reply_to: Optional[str], 130 | correlation_id: Optional[str], 131 | request_dict: Optional[Dict[str, Any]], 132 | ): 133 | self.id = id 134 | self.name = name 135 | self.args = args 136 | self.kwargs = kwargs 137 | self.delivery_info = delivery_info 138 | self.hostname = hostname 139 | self.ignore_result = ignore_result 140 | self.reply_to = reply_to 141 | self.correlation_id = correlation_id 142 | self.request_dict = request_dict 143 | 144 | @classmethod 145 | def from_request(cls, request: Request) -> "SimpleRequest": 146 | # Support both protocol v1 and v2. 147 | args, kwargs, embed = request._payload 148 | # Celery 5.1.0 added an ignore_result option. 149 | ignore_result = getattr(request, "ignore_result", False) 150 | return cls( 151 | request.id, 152 | request.name, 153 | args, 154 | kwargs, 155 | request.delivery_info, 156 | request.hostname, 157 | ignore_result, 158 | request.reply_to, 159 | request.correlation_id, 160 | request.request_dict, 161 | ) 162 | 163 | 164 | class Batches(Task): 165 | abstract = True 166 | 167 | # Disable typing since the signature of batch tasks take only a single item 168 | # (the list of SimpleRequest objects), but when calling it it should be 169 | # possible to provide more arguments. 170 | # 171 | # This unfortunately pushes more onto the user to ensure that each call to 172 | # a batch task is using the expected signature. 173 | typing = False 174 | 175 | #: Maximum number of message in buffer. 176 | flush_every = 10 177 | 178 | #: Timeout in seconds before buffer is flushed anyway. 179 | flush_interval = 30 180 | 181 | def __init__(self) -> None: 182 | self._buffer: Queue[Request] = Queue() 183 | self._pending: Queue[Request] = Queue() 184 | self._count = count(1) 185 | self._tref: Optional[Timer] = None 186 | self._pool: BasePool = None 187 | 188 | def run(self, *args: Any, **kwargs: Any) -> NoReturn: 189 | raise NotImplementedError("must implement run(requests)") 190 | 191 | def Strategy(self, task: "Batches", app: Celery, consumer: Consumer) -> Callable: 192 | # See celery.worker.strategy.default for inspiration. 193 | # 194 | # This adds to a buffer at the end, instead of executing the task as 195 | # the default strategy does. 196 | # 197 | # See Batches._do_flush for ETA handling. 198 | self._pool = consumer.pool 199 | 200 | hostname = consumer.hostname 201 | connection_errors = consumer.connection_errors 202 | 203 | eventer = consumer.event_dispatcher 204 | 205 | Request = symbol_by_name(task.Request) 206 | # Celery 5.1 added the app argument to create_request_cls. 207 | if CELERY_VERSION < (5, 1): 208 | Req = create_request_cls(Request, task, consumer.pool, hostname, eventer) 209 | else: 210 | Req = create_request_cls( 211 | Request, task, consumer.pool, hostname, eventer, app=app 212 | ) 213 | 214 | timer = consumer.timer 215 | put_buffer = self._buffer.put 216 | flush_buffer = self._do_flush 217 | 218 | def task_message_handler( 219 | message: Message, 220 | body: Optional[Dict[str, Any]], 221 | ack: promise, 222 | reject: promise, 223 | callbacks: Set, 224 | **kw: Any, 225 | ) -> None: 226 | if body is None and "args" not in message.payload: 227 | body, headers, decoded, utc = ( 228 | message.body, 229 | message.headers, 230 | False, 231 | app.uses_utc_timezone(), 232 | ) 233 | else: 234 | if "args" in message.payload: 235 | body, headers, decoded, utc = hybrid_to_proto2( 236 | message, message.payload 237 | ) 238 | else: 239 | body, headers, decoded, utc = proto1_to_proto2(message, body) 240 | 241 | req = Req( 242 | message, 243 | on_ack=ack, 244 | on_reject=reject, 245 | app=app, 246 | hostname=hostname, 247 | eventer=eventer, 248 | task=task, 249 | body=body, 250 | headers=headers, 251 | decoded=decoded, 252 | utc=utc, 253 | connection_errors=connection_errors, 254 | ) 255 | put_buffer(req) 256 | 257 | signals.task_received.send(sender=consumer, request=req) 258 | 259 | if self._tref is None: # first request starts flush timer. 260 | self._tref = timer.call_repeatedly(self.flush_interval, flush_buffer) 261 | 262 | if not next(self._count) % self.flush_every: 263 | flush_buffer() 264 | 265 | return task_message_handler 266 | 267 | def apply( 268 | self, 269 | args: Optional[Tuple[Any, ...]] = None, 270 | kwargs: Optional[dict] = None, 271 | *_args: Any, 272 | **options: Any, 273 | ) -> Any: 274 | """ 275 | Execute the task synchronously as a batch of size 1. 276 | 277 | Arguments: 278 | args: positional arguments passed on to the task. 279 | Returns: 280 | celery.result.EagerResult: pre-evaluated result. 281 | """ 282 | request = SimpleRequest( 283 | id=options.get("task_id", uuid()), 284 | name="batch request", 285 | args=args or (), 286 | kwargs=kwargs or {}, 287 | delivery_info={ 288 | "is_eager": True, 289 | "exchange": options.get("exchange"), 290 | "routing_key": options.get("routing_key"), 291 | "priority": options.get("priority"), 292 | }, 293 | hostname=gethostname(), 294 | ignore_result=options.get("ignore_result", False), 295 | reply_to=None, 296 | correlation_id=None, 297 | request_dict={}, 298 | ) 299 | 300 | return super().apply(([request],), {}, *_args, **options) 301 | 302 | def _do_flush(self) -> None: 303 | logger.debug("Batches: Wake-up to flush buffers...") 304 | 305 | ready_requests = [] 306 | app = self.app 307 | to_system_tz = timezone.to_system 308 | now = monotonic() 309 | 310 | all_requests = list(consume_queue(self._buffer)) + list( 311 | consume_queue(self._pending) 312 | ) 313 | for req in all_requests: 314 | # Similar to logic in celery.worker.strategy.default. 315 | if req.eta: 316 | try: 317 | if req.utc: 318 | eta = to_timestamp(to_system_tz(req.eta)) 319 | else: 320 | eta = to_timestamp(req.eta, app.timezone) 321 | except (OverflowError, ValueError) as exc: 322 | logger.error( 323 | "Couldn't convert ETA %r to timestamp: %r. Task: %r", 324 | req.eta, 325 | exc, 326 | req.info(safe=True), 327 | exc_info=True, 328 | ) 329 | req.reject(requeue=False) 330 | continue 331 | 332 | if eta <= now: 333 | # ETA has elapsed, request is ready. 334 | ready_requests.append(req) 335 | else: 336 | # ETA has not elapsed, add to pending queue. 337 | self._pending.put(req) 338 | else: 339 | # Request does not have an ETA, ready immediately 340 | ready_requests.append(req) 341 | 342 | if len(ready_requests) > 0: 343 | logger.debug("Batches: Ready buffer complete: %s", len(ready_requests)) 344 | self.flush(ready_requests) 345 | self._count = count(self._pending.qsize() + 1) 346 | 347 | if not ready_requests and self._pending.qsize() == 0: 348 | logger.debug("Batches: Canceling timer: Nothing in buffers.") 349 | if self._tref: 350 | self._tref.cancel() # cancel timer. 351 | self._tref = None 352 | 353 | def flush(self, requests: Collection[Request]) -> Any: 354 | acks_early, acks_late = partition(lambda r: bool(r.task.acks_late), requests) 355 | 356 | # Ensure the requests can be serialized using pickle for the prefork pool. 357 | serializable_requests = ([SimpleRequest.from_request(r) for r in requests],) 358 | 359 | def on_accepted(pid: int, time_accepted: float) -> None: 360 | for req in acks_early: 361 | req.acknowledge() 362 | 363 | def on_return(result: Optional[Any]) -> None: 364 | for req in acks_late: 365 | req.acknowledge() 366 | 367 | return self._pool.apply_async( 368 | apply_batches_task, 369 | (self, serializable_requests, 0, None), 370 | accept_callback=on_accepted, 371 | callback=on_return, 372 | ) 373 | --------------------------------------------------------------------------------