├── tests ├── __init__.py ├── contrib │ ├── __init__.py │ ├── test_django.py │ ├── test_datadog.py │ └── test_flask.py ├── functional │ ├── __init__.py │ └── test_concurrency.py ├── docker-compose.yml ├── test_signals.py ├── conftest.py ├── test_utils.py ├── test_queuey.py ├── test_memory_brokers.py ├── test_engine.py ├── test_worker.py ├── test_job.py ├── test_brokers.py └── test_task.py ├── doc ├── _static │ └── .gitkeep ├── hacking │ ├── internals.rst │ └── contributing.rst ├── user │ ├── jobs.rst │ ├── install.rst │ ├── queues.rst │ ├── engine.rst │ ├── faq.rst │ ├── asyncio.rst │ ├── signals.rst │ ├── production.rst │ ├── tasks.rst │ ├── design.rst │ └── integrations.rst ├── index.rst ├── Makefile └── conf.py ├── spinach ├── brokers │ ├── __init__.py │ ├── redis_scripts │ │ ├── __init__.py │ │ ├── flush.lua │ │ ├── deregister.lua │ │ ├── remove_job_from_running.lua │ │ ├── enqueue_job.lua │ │ ├── set_concurrency_keys.lua │ │ ├── register_periodic_tasks.lua │ │ ├── get_jobs_from_queue.lua │ │ ├── enqueue_jobs_from_dead_broker.lua │ │ └── move_future_jobs.lua │ ├── base.py │ └── memory.py ├── contrib │ ├── __init__.py │ ├── spinachd │ │ ├── management │ │ │ ├── __init__.py │ │ │ └── commands │ │ │ │ ├── __init__.py │ │ │ │ └── spinach.py │ │ ├── __init__.py │ │ ├── signals.py │ │ ├── settings.py │ │ ├── mail.py │ │ ├── tasks.py │ │ └── apps.py │ ├── datadog.py │ ├── sentry_sdk_spinach.py │ └── flask_spinach.py ├── __init__.py ├── exc.py ├── const.py ├── signals.py ├── utils.py ├── queuey.py ├── worker.py ├── job.py ├── engine.py └── task.py ├── pytest.ini ├── .coveragerc ├── .gitignore ├── .editorconfig ├── examples ├── flaskapp.py ├── periodic.py ├── quickstart.py ├── queues.py └── asyncio_workers.py ├── tox.ini ├── .github └── workflows │ ├── python-publish.yml │ └── tests.yml ├── LICENSE ├── README.rst └── setup.py /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/_static/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spinach/brokers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spinach/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | threadleak = True -------------------------------------------------------------------------------- /spinach/brokers/redis_scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spinach/contrib/spinachd/management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spinach/contrib/spinachd/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | relative_files = True 3 | include = 4 | spinach/* -------------------------------------------------------------------------------- /spinach/contrib/spinachd/__init__.py: -------------------------------------------------------------------------------- 1 | from .apps import spin 2 | 3 | default_app_config = 'spinach.contrib.spinachd.apps.SpinachdConfig' 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .pyc 2 | __pycache__ 3 | 4 | spinach.egg-info 5 | .cache 6 | dist 7 | build 8 | doc/_build 9 | 10 | .coverage 11 | .pytest_cache 12 | 13 | tags 14 | -------------------------------------------------------------------------------- /spinach/brokers/redis_scripts/flush.lua: -------------------------------------------------------------------------------- 1 | local namespace = ARGV[1] 2 | 3 | local pattern = string.format("%s/*", namespace) 4 | 5 | for _, key in ipairs(redis.call('keys', pattern)) do 6 | redis.call('del', key) 7 | end 8 | -------------------------------------------------------------------------------- /tests/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2.1' 2 | services: 3 | redis: 4 | image: docker.io/redis:latest 5 | command: "--appendonly yes" 6 | ports: 7 | - 6379:6379 8 | volumes: 9 | - /tmp/redis-data:/data 10 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # top-most EditorConfig file 2 | root = true 3 | 4 | [*.{py,rst}] 5 | charset = utf-8 6 | insert_final_newline = true 7 | indent_style = space 8 | indent_size = 4 9 | trim_trailing_whitespace = true 10 | 11 | [*.py] 12 | max_line_length = 79 13 | 14 | [*.rst] 15 | max_line_length = 99 16 | -------------------------------------------------------------------------------- /spinach/__init__.py: -------------------------------------------------------------------------------- 1 | from .brokers.memory import MemoryBroker 2 | from .brokers.redis import RedisBroker 3 | from .const import VERSION 4 | from .engine import Engine 5 | from .task import Tasks, Batch, RetryException, AbortException 6 | from .worker import ThreadWorkers, AsyncioWorkers 7 | 8 | __version__ = VERSION 9 | -------------------------------------------------------------------------------- /spinach/brokers/redis_scripts/deregister.lua: -------------------------------------------------------------------------------- 1 | local broker_id = ARGV[1] 2 | local all_brokers_hash_key = ARGV[2] 3 | local all_brokers_zset_key = ARGV[3] 4 | 5 | -- Remove the broker from the list of brokers 6 | redis.call('hdel', all_brokers_hash_key, broker_id) 7 | redis.call('zrem', all_brokers_zset_key, broker_id) 8 | -------------------------------------------------------------------------------- /doc/hacking/internals.rst: -------------------------------------------------------------------------------- 1 | .. _internals: 2 | 3 | Internals 4 | ========= 5 | 6 | This page provides the basic information needed to start reading and modifying the source code of 7 | Spinach. It presents how it works inside and how the project is designed. 8 | 9 | .. todo:: Document how Spinach works internally 10 | -------------------------------------------------------------------------------- /spinach/exc.py: -------------------------------------------------------------------------------- 1 | class SpinachError(Exception): 2 | """Base class for other Spinach exceptions.""" 3 | 4 | 5 | class UnknownTask(SpinachError): 6 | """Task name is not registered with the Engine.""" 7 | 8 | 9 | class InvalidJobSignatureError(SpinachError): 10 | """Job does not have proper arguments to execute the task function.""" 11 | -------------------------------------------------------------------------------- /examples/flaskapp.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from spinach.contrib.flask_spinach import Spinach 3 | 4 | app = Flask(__name__) 5 | spinach = Spinach(app) 6 | 7 | 8 | @spinach.task(name='say_hello') 9 | def say_hello(): 10 | print('Hello from a task') 11 | 12 | 13 | @app.route('/') 14 | def home(): 15 | spinach.schedule('say_hello') 16 | return 'Hello from HTTP' 17 | -------------------------------------------------------------------------------- /examples/periodic.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | from spinach import Engine, MemoryBroker 4 | 5 | spin = Engine(MemoryBroker()) 6 | every_5_sec = timedelta(seconds=5) 7 | 8 | 9 | @spin.task(name='make_coffee', periodicity=every_5_sec) 10 | def make_coffee(): 11 | print("Making coffee...") 12 | 13 | 14 | print('Starting workers, ^C to quit') 15 | spin.start_workers() 16 | -------------------------------------------------------------------------------- /examples/quickstart.py: -------------------------------------------------------------------------------- 1 | from spinach import Engine, MemoryBroker 2 | 3 | spin = Engine(MemoryBroker()) 4 | 5 | 6 | @spin.task(name='compute') 7 | def compute(a, b): 8 | print('Computed {} + {} = {}'.format(a, b, a + b)) 9 | 10 | 11 | # Schedule a job to be executed ASAP 12 | spin.schedule(compute, 5, 3) 13 | 14 | print('Starting workers, ^C to quit') 15 | spin.start_workers() 16 | -------------------------------------------------------------------------------- /doc/user/jobs.rst: -------------------------------------------------------------------------------- 1 | .. _jobs: 2 | 3 | Jobs 4 | ==== 5 | 6 | A :class:`Job` represents a specific execution of a task. To make an analogy with Python, 7 | a :class:`Task` gets instantiated into many :class:`Job`, like a `class` that gets instantiated 8 | into many `objects`. 9 | 10 | 11 | Job 12 | --- 13 | 14 | .. autoclass:: spinach.job.Job() 15 | 16 | 17 | Job Status 18 | ---------- 19 | 20 | .. autoclass:: spinach.job.JobStatus 21 | :members: 22 | :undoc-members: 23 | -------------------------------------------------------------------------------- /spinach/contrib/spinachd/signals.py: -------------------------------------------------------------------------------- 1 | from django.db import reset_queries, close_old_connections 2 | 3 | from spinach import signals 4 | 5 | from .apps import spin 6 | 7 | 8 | @signals.job_started.connect_via(spin.namespace) 9 | def job_started(*args, job=None, **kwargs): 10 | reset_queries() 11 | close_old_connections() 12 | 13 | 14 | @signals.job_finished.connect_via(spin.namespace) 15 | def job_finished(*args, job=None, **kwargs): 16 | close_old_connections() 17 | -------------------------------------------------------------------------------- /spinach/contrib/spinachd/settings.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | 3 | from spinach import RedisBroker 4 | from spinach.const import DEFAULT_NAMESPACE 5 | 6 | 7 | SPINACH_BROKER = getattr(settings, 'SPINACH_BROKER', RedisBroker()) 8 | SPINACH_NAMESPACE = getattr(settings, 'SPINACH_NAMESPACE', DEFAULT_NAMESPACE) 9 | SPINACH_ACTUAL_EMAIL_BACKEND = getattr( 10 | settings, 11 | 'SPINACH_ACTUAL_EMAIL_BACKEND', 12 | 'django.core.mail.backends.smtp.EmailBackend' 13 | ) 14 | SPINACH_CLEAR_SESSIONS_PERIODICITY = getattr( 15 | settings, 16 | 'SPINACH_CLEAR_SESSIONS_PERIODICITY', 17 | None 18 | ) 19 | -------------------------------------------------------------------------------- /examples/queues.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | 4 | from spinach import Engine, MemoryBroker 5 | 6 | 7 | logging.basicConfig( 8 | format='%(asctime)s - %(threadName)s %(levelname)s: %(message)s', 9 | level=logging.DEBUG 10 | ) 11 | spin = Engine(MemoryBroker()) 12 | 13 | 14 | @spin.task(name='fast', queue='high-priority') 15 | def fast(): 16 | time.sleep(1) 17 | 18 | 19 | @spin.task(name='slow', queue='low-priority') 20 | def slow(): 21 | time.sleep(10) 22 | 23 | 24 | spin.schedule(slow) 25 | spin.schedule(fast) 26 | 27 | spin.start_workers(number=1, queue='high-priority', stop_when_queue_empty=True) 28 | -------------------------------------------------------------------------------- /spinach/brokers/redis_scripts/remove_job_from_running.lua: -------------------------------------------------------------------------------- 1 | local running_jobs_key = ARGV[1] 2 | local max_concurrency_key = ARGV[2] 3 | local current_concurrency_key = ARGV[3] 4 | local job_json = ARGV[4] 5 | 6 | local job = cjson.decode(job_json) 7 | 8 | -- Remove the job from the list of running jobs. 9 | redis.call('hdel', running_jobs_key, job['id']) 10 | 11 | -- Decrement current concurrency if max concurrency set on the Task. 12 | local max_concurrency = tonumber(redis.call('hget', max_concurrency_key, job['task_name'])) 13 | if max_concurrency ~= nil and max_concurrency ~= -1 then 14 | redis.call('hincrby', current_concurrency_key, job['task_name'], -1) 15 | end 16 | -------------------------------------------------------------------------------- /spinach/const.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.0.25' 2 | 3 | DEFAULT_QUEUE = 'spinach' 4 | DEFAULT_NAMESPACE = 'spinach' 5 | DEFAULT_MAX_RETRIES = 0 6 | DEFAULT_ENQUEUE_JOB_RETRIES = 4 7 | DEFAULT_WORKER_NUMBER = 5 8 | 9 | FUTURE_JOBS_KEY = '_future-jobs' 10 | RUNNING_JOBS_KEY = '_running-jobs-on-broker-{}' 11 | NOTIFICATIONS_KEY = '_notifications' 12 | PERIODIC_TASKS_HASH_KEY = '_periodic_tasks_hash' 13 | PERIODIC_TASKS_QUEUE_KEY = '_periodic_tasks_queue' 14 | ALL_BROKERS_HASH_KEY = '_all_brokers_hash' 15 | ALL_BROKERS_ZSET_KEY = '_all_brokers_zset' 16 | MAX_CONCURRENCY_KEY = '_max_concurrency' 17 | CURRENT_CONCURRENCY_KEY = '_current_concurrency' 18 | 19 | WAIT_FOR_EVENT_MAX_SECONDS = 60 20 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py3, pep8 3 | 4 | [testenv] 5 | basepython = python3 6 | envdir = 7 | py3: {toxworkdir}/py3 8 | pep8: {toxworkdir}/py3 9 | usedevelop = True 10 | allowlist_externals = 11 | docker-compose 12 | deps = 13 | pytest 14 | pytest-cov 15 | pytest-threadleak 16 | pycodestyle 17 | flake8 18 | flask 19 | django 20 | 21 | [testenv:pep8] 22 | commands = 23 | pycodestyle --ignore=E252,W503,W504 spinach tests 24 | 25 | [testenv:py3] 26 | commands_pre = docker-compose -f {toxinidir}/tests/docker-compose.yml up -d 27 | commands = pytest tests {posargs} 28 | commands_post = docker-compose -f {toxinidir}/tests/docker-compose.yml down 29 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Set up Python 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: '3.x' 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install setuptools wheel twine 22 | - name: Build and publish 23 | env: 24 | TWINE_USERNAME: __token__ 25 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 26 | run: | 27 | python setup.py sdist bdist_wheel 28 | twine upload dist/* 29 | -------------------------------------------------------------------------------- /doc/user/install.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | Installation 4 | ============ 5 | 6 | Prerequisites 7 | ------------- 8 | 9 | Spinach is written in Python 3, prior to use it you must make sure you have a Python 3.8+ 10 | interpreter on your system. 11 | 12 | Pip 13 | --- 14 | 15 | If you are familiar with the Python ecosystem, you won't be surprised that Spinach can be installed 16 | with:: 17 | 18 | $ pip install spinach 19 | 20 | That's it, you can call it a day! 21 | 22 | From Source 23 | ----------- 24 | 25 | Spinach is developed on GitHub, you can find the code at `NicolasLM/spinach 26 | `_. 27 | 28 | You can clone the public repository:: 29 | 30 | $ git clone https://github.com/NicolasLM/spinach.git 31 | 32 | Once you have the sources, simply install it with:: 33 | 34 | $ cd spinach 35 | $ pip install -e . 36 | -------------------------------------------------------------------------------- /tests/test_signals.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | import pytest 4 | 5 | from spinach import signals 6 | 7 | 8 | def test_signal_no_receiver(): 9 | signals.job_started.send() 10 | 11 | 12 | def test_signal_multiple_send_args(): 13 | with pytest.raises(TypeError): 14 | signals.job_started.send('foo', 'bar') 15 | 16 | 17 | def test_signal_with_explicit_sender(): 18 | sender = object() 19 | mock_receiver = Mock(spec={}) 20 | signals.job_started.connect(mock_receiver) 21 | 22 | signals.job_started.send(sender) 23 | mock_receiver.assert_called_once_with(sender) 24 | 25 | 26 | def test_signal_receiver_exception(): 27 | mock_receiver = Mock(spec={}, side_effect=RuntimeError) 28 | signals.job_started.connect(mock_receiver) 29 | 30 | signals.job_started.send() 31 | 32 | 33 | def test_signal_repr(): 34 | assert repr(signals.job_started) == 'SafeNamedSignal "job_started"' 35 | -------------------------------------------------------------------------------- /tests/functional/test_concurrency.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import time 3 | 4 | from spinach.brokers.memory import MemoryBroker 5 | from spinach.brokers.redis import RedisBroker 6 | from spinach.engine import Engine 7 | 8 | 9 | @pytest.fixture(params=[MemoryBroker, RedisBroker]) 10 | def spin(request): 11 | broker = request.param 12 | spin = Engine(broker(), namespace='tests') 13 | yield spin 14 | 15 | 16 | def test_concurrency_limit(spin): 17 | count = 0 18 | 19 | @spin.task(name='do_something', max_retries=10, max_concurrency=1) 20 | def do_something(index): 21 | nonlocal count 22 | assert index == count 23 | count += 1 24 | 25 | for i in range(0, 5): 26 | spin.schedule(do_something, i) 27 | 28 | # Start two workers; test that only one job runs at once as per the 29 | # Task definition. 30 | spin.start_workers(number=2, block=True, stop_when_queue_empty=True) 31 | assert count == 5 32 | -------------------------------------------------------------------------------- /examples/asyncio_workers.py: -------------------------------------------------------------------------------- 1 | import aiohttp 2 | from spinach import Engine, MemoryBroker, Batch, AsyncioWorkers 3 | 4 | spin = Engine(MemoryBroker()) 5 | 6 | 7 | @spin.task(name='get_pokemon_name') 8 | async def get_pokemon_name(pokemon_id: int): 9 | """Call an HTTP API to retrieve a pokemon name by its ID.""" 10 | url = f'https://pokeapi.co/api/v2/pokemon/{pokemon_id}' 11 | async with aiohttp.ClientSession() as session: 12 | async with session.get(url) as response: 13 | pokemon = await response.json() 14 | 15 | print(f'Pokemon #{pokemon_id} is {pokemon["name"]}') 16 | 17 | 18 | # Schedule a batch of 150 tasks to retrieve the name of the 19 | # first 150 pokemons. 20 | batch = Batch() 21 | for pokemon_id in range(1, 151): 22 | batch.schedule(get_pokemon_name, pokemon_id) 23 | spin.schedule_batch(batch) 24 | 25 | # Start the asyncio workers and process the tasks 26 | spin.start_workers( 27 | number=256, 28 | workers_class=AsyncioWorkers, 29 | stop_when_queue_empty=True 30 | ) 31 | -------------------------------------------------------------------------------- /doc/user/queues.rst: -------------------------------------------------------------------------------- 1 | .. _queues: 2 | 3 | Queues 4 | ====== 5 | 6 | Queues are an optional feature that allows directing a set of tasks to specific workers. 7 | 8 | Queues are useful when different tasks have different usage patterns, for instance one task being 9 | fast and high priority while another task is slow and low-priority. To prevent the slow task from 10 | blocking the execution of the fast one, each task can be attached to its own queue: 11 | 12 | .. literalinclude:: ../../examples/queues.py 13 | 14 | The task decorator accepts an optional queue name that binds the task to a specific queue. 15 | Likewise, passing a queue name to `start_workers` restricts workers to executing only tasks of this 16 | particular queue. 17 | 18 | .. note:: By default all tasks and all workers use the ``spinach`` queue 19 | 20 | .. note:: Namespaces and queues are different concepts. While queues share the same Spinach 21 | :class:`Engine`, namespaces make two Spinach Engines invisible to each other while still using 22 | the same broker. 23 | -------------------------------------------------------------------------------- /tests/contrib/test_django.py: -------------------------------------------------------------------------------- 1 | import django 2 | import django.conf 3 | from django.core.mail import send_mail 4 | from django.core.management import call_command 5 | 6 | from spinach import MemoryBroker 7 | 8 | 9 | # capsys fixture allows to capture stdout 10 | def test_django_app(capsys): 11 | django.conf.settings.configure( 12 | LOGGING_CONFIG=None, 13 | INSTALLED_APPS=('spinach.contrib.spinachd',), 14 | EMAIL_BACKEND='spinach.contrib.spinachd.mail.BackgroundEmailBackend', 15 | SPINACH_BROKER=MemoryBroker(), 16 | SPINACH_ACTUAL_EMAIL_BACKEND='django.core.mail.backends.' 17 | 'console.EmailBackend' 18 | ) 19 | django.setup() 20 | 21 | from spinach.contrib.spinachd import spin 22 | spin.schedule('spinachd:clear_expired_sessions') 23 | send_mail('Subject', 'Hello from email', 'sender@example.com', 24 | ['receiver@example.com']) 25 | 26 | call_command('spinach', '--stop-when-queue-empty') 27 | 28 | captured = capsys.readouterr() 29 | assert 'Hello from email' in captured.out 30 | -------------------------------------------------------------------------------- /tests/contrib/test_datadog.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | import time 3 | 4 | import pytest 5 | 6 | from spinach import Engine, MemoryBroker, Tasks 7 | from spinach.contrib.datadog import register_datadog 8 | 9 | 10 | @pytest.fixture 11 | def spin(): 12 | tasks = Tasks() 13 | 14 | @tasks.task(name='success') 15 | def success(): 16 | return 17 | 18 | @tasks.task(name='fail') 19 | def fail(): 20 | raise RuntimeError('failing task') 21 | 22 | s = Engine(MemoryBroker(), namespace='tests-datadog') 23 | s.attach_tasks(tasks) 24 | s.start_workers(number=1, block=False) 25 | yield s 26 | s.stop_workers() 27 | 28 | 29 | def test_datadog(spin): 30 | mock_tracer = Mock() 31 | mock_span = Mock() 32 | mock_tracer.current_root_span.return_value = mock_span 33 | 34 | register_datadog(tracer=mock_tracer, namespace='tests-datadog') 35 | 36 | spin.schedule('success') 37 | time.sleep(0.1) 38 | mock_tracer.trace.assert_called_once_with( 39 | 'spinach.task', service='spinach', span_type='worker', 40 | resource='success' 41 | ) 42 | mock_span.finish.assert_called_once_with() 43 | mock_span.set_traceback.assert_not_called() 44 | 45 | spin.schedule('fail') 46 | time.sleep(0.1) 47 | mock_span.set_traceback.assert_called_once_with() 48 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | on: ["push", "pull_request"] 4 | 5 | jobs: 6 | 7 | test: 8 | 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.10"] 13 | 14 | services: 15 | redis: 16 | image: redis 17 | ports: 18 | - 6379:6379 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v5 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install -e .[tests] 32 | 33 | - name: Lint 34 | run: | 35 | pycodestyle --ignore=E252,W503,W504 spinach tests 36 | 37 | - name: Test with pytest 38 | run: | 39 | pytest -v --cov=spinach tests/ 40 | 41 | - name: Coveralls 42 | uses: AndreMiras/coveralls-python-action@develop 43 | with: 44 | parallel: true 45 | 46 | 47 | coveralls_finish: 48 | needs: test 49 | runs-on: ubuntu-latest 50 | steps: 51 | - name: Coveralls Finished 52 | uses: AndreMiras/coveralls-python-action@develop 53 | with: 54 | parallel-finished: true 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-2021, Nicolas Le Manchet 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /doc/user/engine.rst: -------------------------------------------------------------------------------- 1 | .. _engine: 2 | 3 | Engine 4 | ====== 5 | 6 | The Spinach :class:`Engine` is what connects tasks, jobs, brokers and workers together. 7 | 8 | It is possible, but unusual, to have multiple Engines running in the same Python interpreter. 9 | 10 | .. autoclass:: spinach.engine.Engine 11 | :members: 12 | 13 | Namespace 14 | --------- 15 | 16 | Namespaces allow to identify and isolate multiple Spinach engines running on the same Python 17 | interpreter and/or sharing the same Redis server. 18 | 19 | Having multiple engines on the same interpreter is rare but can happen when using the Flask 20 | integration with an app factory. In this case using different namespaces is important to avoid 21 | signals sent from one engine to be received by another engine. 22 | 23 | When multiple Spinach Engines use the same Redis server, for example when production and staging 24 | share the same database, different namespaces must be used to make sure they do not step on each 25 | other's feet. 26 | 27 | The production application would contain:: 28 | 29 | spin = Engine(RedisBroker(), namespace='prod') 30 | 31 | While the staging application would contain:: 32 | 33 | spin = Engine(RedisBroker(), namespace='stg') 34 | 35 | .. note:: Using different Redis database numbers (0, 1, 2...) for different environments is not 36 | enough as Redis pubsubs are shared among databases. Namespaces solve this problem. 37 | -------------------------------------------------------------------------------- /spinach/contrib/spinachd/mail.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import pickle 3 | from typing import List 4 | import zlib 5 | 6 | from django.core.mail import EmailMessage 7 | from django.core.mail.backends.base import BaseEmailBackend 8 | 9 | from .tasks import tasks, send_emails 10 | 11 | 12 | class BackgroundEmailBackend(BaseEmailBackend): 13 | 14 | def send_messages(self, messages): 15 | msg_count = 0 16 | for message in messages: 17 | message.message() # .message() triggers header validation 18 | msg_count += 1 19 | messages = serialize_email_messages(messages) 20 | tasks.schedule(send_emails, messages) 21 | 22 | return msg_count 23 | 24 | 25 | def serialize_email_messages(messages: List[EmailMessage]): 26 | """Serialize EmailMessages to be passed as task argument. 27 | 28 | Pickle is used because serializing an EmailMessage to json can be a bit 29 | tricky and would probably break if Django modifies the structure of the 30 | object in the future. 31 | """ 32 | return [ 33 | base64.b64encode(zlib.compress(pickle.dumps(m, protocol=4))).decode() 34 | for m in messages 35 | ] 36 | 37 | 38 | def deserialize_email_messages(messages: List[str]): 39 | """Deserialize EmailMessages passed as task argument.""" 40 | return [ 41 | pickle.loads(zlib.decompress(base64.b64decode(m))) 42 | for m in messages 43 | ] 44 | -------------------------------------------------------------------------------- /spinach/contrib/spinachd/tasks.py: -------------------------------------------------------------------------------- 1 | from importlib import import_module 2 | from logging import getLogger 3 | from typing import List 4 | 5 | from django.apps import apps 6 | from django.conf import settings 7 | from django.core.mail import get_connection 8 | 9 | from spinach import Tasks 10 | 11 | from .settings import ( 12 | SPINACH_ACTUAL_EMAIL_BACKEND, 13 | SPINACH_CLEAR_SESSIONS_PERIODICITY as PERIODICITY 14 | ) 15 | 16 | tasks = Tasks() 17 | logger = getLogger(__name__) 18 | 19 | 20 | @tasks.task(name='spinachd:send_emails') 21 | def send_emails(messages: List[str]): 22 | from .mail import deserialize_email_messages 23 | messages = deserialize_email_messages(messages) 24 | connection = get_connection(SPINACH_ACTUAL_EMAIL_BACKEND) 25 | logger.info('Sending %d emails using %s', len(messages), 26 | SPINACH_ACTUAL_EMAIL_BACKEND) 27 | connection.send_messages(messages) 28 | 29 | 30 | @tasks.task(name='spinachd:clear_expired_sessions', periodicity=PERIODICITY) 31 | def clear_expired_sessions(): 32 | if not apps.is_installed('django.contrib.sessions'): 33 | logger.info('django.contrib.sessions not installed, ' 34 | 'not clearing expired sessions') 35 | return 36 | 37 | engine = import_module(settings.SESSION_ENGINE) 38 | try: 39 | engine.SessionStore.clear_expired() 40 | except NotImplementedError: 41 | logger.info("Session engine '%s' doesn't support clearing " 42 | "expired sessions", settings.SESSION_ENGINE) 43 | -------------------------------------------------------------------------------- /spinach/contrib/spinachd/management/commands/spinach.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand 2 | 3 | from spinach.const import DEFAULT_QUEUE, DEFAULT_WORKER_NUMBER 4 | from spinach.contrib.datadog import register_datadog_if_module_patched 5 | 6 | from ...apps import spin 7 | 8 | 9 | class Command(BaseCommand): 10 | help = 'Run Spinach workers' 11 | 12 | def add_arguments(self, parser): 13 | parser.add_argument( 14 | '--threads', 15 | dest='threads', 16 | type=int, 17 | default=DEFAULT_WORKER_NUMBER, 18 | help='Number of worker threads to launch' 19 | ) 20 | parser.add_argument( 21 | '--queue', 22 | dest='queue', 23 | default=DEFAULT_QUEUE, 24 | help='Queue to consume' 25 | ) 26 | parser.add_argument( 27 | '--stop-when-queue-empty', 28 | dest='stop_when_queue_empty', 29 | default=False, 30 | action='store_true', 31 | help='Stop workers once the queue is empty' 32 | ) 33 | 34 | def handle(self, *args, **options): 35 | # Use the Datadog integration if Datadog is already used 36 | # to trace Django. 37 | register_datadog_if_module_patched( 38 | 'django', 39 | namespace=spin.namespace 40 | ) 41 | 42 | spin.start_workers( 43 | number=options['threads'], 44 | queue=options['queue'], 45 | stop_when_queue_empty=options['stop_when_queue_empty'] 46 | ) 47 | -------------------------------------------------------------------------------- /spinach/brokers/redis_scripts/enqueue_job.lua: -------------------------------------------------------------------------------- 1 | -- idempotency protected script, do not remove comment 2 | local idempotency_token = ARGV[1] 3 | local notifications = ARGV[2] 4 | local running_jobs_key = ARGV[3] 5 | local namespace = ARGV[4] 6 | local future_jobs = ARGV[5] 7 | local max_concurrency_key = ARGV[6] 8 | local current_concurrency_key = ARGV[7] 9 | local from_failure = ARGV[8] 10 | 11 | -- jobs starting at ARGV[9] 12 | 13 | if not redis.call('set', idempotency_token, 'true', 'EX', 3600, 'NX') then 14 | redis.log(redis.LOG_WARNING, "Not reprocessing script") 15 | return -1 16 | end 17 | 18 | for i=9, #ARGV do 19 | local job_json = ARGV[i] 20 | local job = cjson.decode(job_json) 21 | if tonumber(from_failure) == 1 then 22 | -- job is being requeued after a failure, decrement its concurrency 23 | local max_concurrency = tonumber(redis.call('hget', max_concurrency_key, job['task_name'])) 24 | if max_concurrency ~= nil and max_concurrency ~= -1 then 25 | redis.call('hincrby', current_concurrency_key, job['task_name'], -1) 26 | end 27 | end 28 | if job["status"] == 2 then 29 | -- job status is queued 30 | local queue = string.format("%s/%s", namespace, job["queue"]) 31 | redis.call('rpush', queue, job_json) 32 | else 33 | -- job status is waiting 34 | local at_timestamp = job["at"] + 1 -- approximation to avoid starting a job before its real "at" date 35 | redis.call('zadd', future_jobs, at_timestamp, job_json) 36 | end 37 | redis.call('hdel', running_jobs_key, job["id"]) 38 | end 39 | 40 | redis.call('publish', notifications, '') 41 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | 3 | import pytest 4 | 5 | 6 | _now = datetime(2017, 9, 2, 8, 50, 56, 482169, timezone.utc) 7 | 8 | 9 | @pytest.fixture 10 | def patch_now(monkeypatch): 11 | """Patch datetime.datetime. 12 | 13 | It is not possible to patch it like a normal Python object, so the 14 | reference is replaced completely by a custom class. 15 | The test function can get and set the fake time with get_now() and 16 | set_now(). 17 | """ 18 | global _now 19 | 20 | # Reset the time before each test 21 | _now = datetime(2017, 9, 2, 8, 50, 56, 482169, timezone.utc) 22 | 23 | class MyDatetime: 24 | @classmethod 25 | def now(cls, tz=None): 26 | # All code within Spinach shall create TZ aware datetime 27 | assert tz == timezone.utc 28 | return _now 29 | 30 | @classmethod 31 | def fromtimestamp(cls, *args, **kwargs): 32 | return datetime.fromtimestamp(*args, **kwargs) 33 | 34 | monkeypatch.setattr('spinach.brokers.base.datetime', MyDatetime) 35 | monkeypatch.setattr('spinach.brokers.redis.datetime', MyDatetime) 36 | monkeypatch.setattr('spinach.job.datetime', MyDatetime) 37 | monkeypatch.setattr('spinach.engine.datetime', MyDatetime) 38 | monkeypatch.setattr('spinach.task.datetime', MyDatetime) 39 | 40 | 41 | def get_now() -> datetime: 42 | return _now 43 | 44 | 45 | def set_now(now: datetime): 46 | global _now 47 | if now.tzinfo is None: 48 | # Make it a TZ aware datetime here for convenience to avoid over 49 | # verbose tests 50 | now = now.replace(tzinfo=timezone.utc) 51 | _now = now 52 | -------------------------------------------------------------------------------- /spinach/brokers/redis_scripts/set_concurrency_keys.lua: -------------------------------------------------------------------------------- 1 | local max_concurrency_key = ARGV[1] 2 | local current_concurrency_key = ARGV[2] 3 | -- tasks to register starting at ARGV[3] 4 | 5 | 6 | local function contains(t, e) 7 | return t[e] 8 | end 9 | 10 | local old_max_values = redis.call('hkeys', max_concurrency_key) 11 | local old_current_values = redis.call('hkeys', current_concurrency_key) 12 | 13 | local new_task_names = {} 14 | 15 | for i=3, #ARGV do 16 | local task_json = ARGV[i] 17 | local task = cjson.decode(task_json) 18 | local max_concurrency = tonumber(task["max_concurrency"]) 19 | if max_concurrency ~= -1 then 20 | new_task_names[task["name"]] = true 21 | 22 | -- Override max_concurrency whatever it is already set to, if 23 | -- anything. 24 | redis.call('hset', max_concurrency_key, task["name"], max_concurrency) 25 | -- Check to see if current_concurrency exists before initialising 26 | -- it. 27 | if redis.call('hexists', current_concurrency_key, task["name"]) == 0 then 28 | redis.call('hset', current_concurrency_key, task["name"], 0) 29 | end 30 | end 31 | end 32 | 33 | -- Delete concurrency keys for Tasks that no longer exist. 34 | for i=1, #old_max_values do 35 | local old_task_name = old_max_values[i] 36 | if not contains(new_task_names, old_task_name) then 37 | redis.call('hdel', max_concurrency_key, old_task_name) 38 | end 39 | end 40 | 41 | for i=1, #old_current_values do 42 | local old_task_name = old_current_values[i] 43 | if not contains(new_task_names, old_task_name) then 44 | redis.call('hdel', current_concurrency_key, old_task_name) 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /spinach/contrib/spinachd/apps.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | from django.apps import AppConfig 4 | from django.utils.module_loading import module_has_submodule 5 | 6 | from spinach import Tasks, Engine 7 | 8 | from .settings import SPINACH_BROKER, SPINACH_NAMESPACE 9 | 10 | spin = Engine(broker=SPINACH_BROKER, namespace=SPINACH_NAMESPACE) 11 | 12 | 13 | class SpinachdConfig(AppConfig): 14 | name = 'spinach.contrib.spinachd' 15 | verbose_name = 'Django Spinach' 16 | 17 | def ready(self): 18 | from . import signals # noqa 19 | 20 | for discovered_module in autodiscover_modules('tasks'): 21 | try: 22 | module_tasks = discovered_module.tasks 23 | except AttributeError: 24 | continue 25 | 26 | if isinstance(module_tasks, Tasks): 27 | spin.attach_tasks(module_tasks) 28 | 29 | 30 | def autodiscover_modules(*args): 31 | from django.apps import apps 32 | 33 | imported_modules = list() 34 | for app_config in apps.get_app_configs(): 35 | for module_to_search in args: 36 | # Attempt to import the app's module. 37 | try: 38 | path = '%s.%s' % (app_config.name, module_to_search) 39 | imported_modules.append(importlib.import_module(path)) 40 | except Exception: 41 | # Decide whether to bubble up this error. If the app just 42 | # doesn't have the module in question, we can ignore the error 43 | # attempting to import it, otherwise we want it to bubble up. 44 | if module_has_submodule(app_config.module, module_to_search): 45 | raise 46 | 47 | return imported_modules 48 | -------------------------------------------------------------------------------- /spinach/brokers/redis_scripts/register_periodic_tasks.lua: -------------------------------------------------------------------------------- 1 | local now = ARGV[1] 2 | local periodic_tasks_hash = ARGV[2] 3 | local periodic_tasks_queue = ARGV[3] 4 | -- tasks to register starting at ARGV[4] 5 | 6 | 7 | local function contains(t, e) 8 | return t[e] 9 | end 10 | 11 | 12 | local old_task_names = redis.call('hkeys', periodic_tasks_hash) 13 | local new_task_names = {} 14 | 15 | for i=4, #ARGV do 16 | local task_json = ARGV[i] 17 | local task = cjson.decode(task_json) 18 | local next_event_time = now + task["periodicity"] 19 | new_task_names[task["name"]] = true 20 | 21 | if redis.call('hexists', periodic_tasks_hash, task["name"]) == 0 then 22 | -- the periodic task is new, add it to the queue 23 | redis.call('zadd', periodic_tasks_queue, next_event_time, task["name"]) 24 | else 25 | local existing_task_json = redis.call('hget', periodic_tasks_hash, task["name"]) 26 | local existing_task = cjson.decode(existing_task_json) 27 | if existing_task["periodicity"] ~= task["periodicity"] then 28 | -- the periodic task already existed but the periodicity changed 29 | -- so it is reset 30 | redis.call('zadd', periodic_tasks_queue, next_event_time, task["name"]) 31 | end 32 | end 33 | 34 | -- unconditionnally override the task in the hash 35 | redis.call('hset', periodic_tasks_hash, task["name"], task_json) 36 | end 37 | 38 | 39 | for i=1, #old_task_names do 40 | local old_task_name = old_task_names[i] 41 | if not contains(new_task_names, old_task_name) then 42 | redis.call('hdel', periodic_tasks_hash, old_task_name) 43 | redis.call('zrem', periodic_tasks_queue, old_task_name) 44 | end 45 | end 46 | 47 | -------------------------------------------------------------------------------- /tests/contrib/test_flask.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | import pytest 3 | 4 | from spinach import MemoryBroker, Tasks 5 | from spinach.contrib.flask_spinach import Spinach 6 | 7 | 8 | @pytest.fixture 9 | def app(): 10 | app = Flask('testing') 11 | app.config['TESTING'] = True 12 | app.config['SPINACH_BROKER'] = MemoryBroker() 13 | return app 14 | 15 | 16 | def test_flask_extension(app): 17 | spinach = Spinach(app) 18 | 19 | @spinach.task(name='say_hello') 20 | def say_hello(): 21 | print('Hello from a task') 22 | 23 | @app.route('/') 24 | def home(): 25 | spinach.schedule('say_hello') 26 | return 'Hello from HTTP' 27 | 28 | client = app.test_client() 29 | response = client.get('/') 30 | assert response.status_code == 200 31 | assert response.data == b'Hello from HTTP' 32 | 33 | runner = app.test_cli_runner() 34 | result = runner.invoke(args=['spinach', '--stop-when-queue-empty']) 35 | assert result.output == 'Hello from a task\n' 36 | 37 | 38 | def test_flask_extension_app_factory(app): 39 | spinach = Spinach() 40 | tasks = Tasks() 41 | 42 | @tasks.task(name='foo') 43 | def foo(): 44 | return 'foo' 45 | 46 | spinach.init_app(app) 47 | spinach.register_tasks(app, tasks) 48 | 49 | with app.app_context(): 50 | assert spinach.execute('foo') == 'foo' 51 | 52 | 53 | def test_flask_extension_not_init(app): 54 | spinach = Spinach() 55 | 56 | # RuntimeError for being outside application context 57 | with pytest.raises(RuntimeError): 58 | spinach.spin 59 | 60 | # RuntimeError for having not initialized the extension 61 | with pytest.raises(RuntimeError): 62 | spinach.register_tasks(app, None) 63 | 64 | # RuntimeError for having not initialized the extension 65 | with app.app_context(): 66 | with pytest.raises(RuntimeError): 67 | spinach.spin 68 | -------------------------------------------------------------------------------- /doc/user/faq.rst: -------------------------------------------------------------------------------- 1 | .. _faq: 2 | 3 | FAQ 4 | === 5 | 6 | Should I use Spinach? 7 | --------------------- 8 | 9 | Spinach was designed from the ground up to be reliable. It is built using proven technologies 10 | (Redis, Python queues, thread pools...), is heavily tested and in my experience just works. 11 | 12 | The project has been around for long enough that I am now confident it is a good option among task 13 | frameworks. If after reading this documentation you feel like giving it a try, go for it! 14 | 15 | Threads are not enough, can I use Processes? 16 | -------------------------------------------- 17 | 18 | Threading is the only concurrency primitive however it is possible to run many processes each 19 | containing one worker thread. This will open more connections to Redis, but Redis is known to 20 | support thousands of concurrent connections so this should not be a problem. 21 | 22 | The best approach to achieve this is to rely on an init system like systemd, supervisord or docker. 23 | The init system will be responsible for spawning the correct number of processes and making sure 24 | they are properly restarted if they terminate prematurely. 25 | 26 | Writing this init system yourself in Python using the multiprocessing module is possible but it 27 | must not import your actual application using Spinach. This is because mixing threads and forks in 28 | a single interpreter is a minefield. Anyway you are probably better off using a battle tested init 29 | system. 30 | 31 | How do I get in touch? 32 | ---------------------- 33 | 34 | Bug reports and feature requests can be sent on `GitHub `_. 35 | 36 | For help with integrating Spinach to a project or giving feedback there is the IRC channel 37 | **#spinach** on `irc.libera.chat `_. 38 | 39 | What is the licence? 40 | -------------------- 41 | 42 | Spinach is released under the :download:`BSD license <../../LICENSE>`. 43 | 44 | -------------------------------------------------------------------------------- /spinach/brokers/redis_scripts/get_jobs_from_queue.lua: -------------------------------------------------------------------------------- 1 | local queue = ARGV[1] 2 | local running_jobs_key = ARGV[2] 3 | local job_status_running = tonumber(ARGV[3]) 4 | local max_jobs = tonumber(ARGV[4]) 5 | local max_concurrency_key = ARGV[5] 6 | local current_concurrency_key = ARGV[6] 7 | 8 | local jobs = {} 9 | local jobs_to_re_add = {} 10 | local num_jobs_to_re_add = 0 11 | 12 | local i = 1 13 | repeat 14 | 15 | local job_json = redis.call('lpop', queue) 16 | if not job_json then 17 | break 18 | end 19 | 20 | local job = cjson.decode(job_json) 21 | local max_concurrency = tonumber(redis.call('hget', max_concurrency_key, job['task_name'])) 22 | local current_concurrency = tonumber(redis.call('hget', current_concurrency_key, job['task_name'])) 23 | 24 | if max_concurrency ~= nil and max_concurrency ~= -1 and current_concurrency >= max_concurrency then 25 | -- The max concurrency limit was reach on this Task, Re-add the 26 | -- job(s) to the front of the queue after the loop finishes. 27 | num_jobs_to_re_add = num_jobs_to_re_add + 1 28 | jobs_to_re_add[num_jobs_to_re_add] = job_json 29 | else 30 | job["status"] = job_status_running 31 | local job_json = cjson.encode(job) 32 | 33 | -- track the running job 34 | redis.call('hset', running_jobs_key, job["id"], job_json) 35 | -- If tracking concurrency, bump the current value. 36 | if max_concurrency ~= nil and max_concurrency ~= -1 then 37 | redis.call('hincrby', current_concurrency_key, job['task_name'], 1) 38 | end 39 | 40 | jobs[i] = job_json 41 | i = i + 1 42 | end 43 | 44 | until i > max_jobs 45 | 46 | -- Re-add any jobs that were popped but could not be run due to 47 | -- max_concurrency limits. Loop in reverse order to keep the same 48 | -- original ordering! 49 | for i = #jobs_to_re_add, 1, -1 do 50 | redis.call('lpush', queue, jobs_to_re_add[i]) 51 | end 52 | 53 | return cjson.encode(jobs) 54 | -------------------------------------------------------------------------------- /doc/user/asyncio.rst: -------------------------------------------------------------------------------- 1 | .. _asyncio: 2 | 3 | Asyncio 4 | ======= 5 | 6 | Spinach allows to define and run tasks as asyncio coroutines. In this mode the worker is a single 7 | thread that runs all tasks asynchronously. This allows for greater concurrency as well as 8 | compatibility with the asyncio ecosystem. 9 | 10 | Creating async tasks 11 | -------------------- 12 | 13 | To define an asynchronous task, just prefix its definition with the ``async`` keyword:: 14 | 15 | @spin.task(name='compute') 16 | async def compute(a, b): 17 | await asyncio.sleep(1) 18 | print('Computed {} + {} = {}'.format(a, b, a + b)) 19 | 20 | To run the workers in asynchronous mode, pass the ``AsyncioWorkers`` class to ``start_workers``:: 21 | 22 | from spinach import AsyncioWorkers 23 | 24 | spin.start_workers(number=256, workers_class=AsyncioWorkers) 25 | 26 | When using the asyncio workers, the ``number`` argument can be set quite high because each worker 27 | is just a coroutine, consuming a negligible amount of resources. 28 | 29 | Scheduling jobs 30 | --------------- 31 | 32 | Because internally only workers are asyncio aware, jobs are still sent to Redis using a blocking 33 | socket. This means that to schedule jobs from asynchronous code, care must be taken to send jobs 34 | from outside the event loop. This can be achieve using `asyncio.to_thread 35 | `_:: 36 | 37 | await asyncio.to_thread(spin.schedule, compute, 2, 4) 38 | 39 | Code scheduling a lot of jobs should use :ref:`batches ` to improve performance. 40 | 41 | Example 42 | ------- 43 | 44 | .. literalinclude:: ../../examples/asyncio_workers.py 45 | 46 | 47 | .. note:: If an application defines both sync and async tasks, each kind of task should go in its 48 | own :ref:`queue ` so that sync tasks are picked by threaded workers and async tasks by 49 | asyncio workers. 50 | 51 | .. note:: Not all contrib :ref:`integrations ` may work with asynchronous workers. 52 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | Spinach 2 | ======= 3 | 4 | Release v\ |version|. (:ref:`Installation `) 5 | 6 | Spinach is a Redis task queue for Python 3 heavily inspired by Celery and RQ. 7 | 8 | Distinctive features: 9 | 10 | - Threaded and asyncio workers 11 | - At-least-once or at-most-once delivery per task 12 | - Periodic tasks without an additional process 13 | - Concurrency limits on queued jobs 14 | - Scheduling of tasks in batch 15 | - Embeddable workers for easier testing 16 | - Integrations with :ref:`Flask, Django, Logging, Sentry and Datadog 17 | ` 18 | - See :ref:`design choices ` for more details 19 | 20 | Installation:: 21 | 22 | pip install spinach 23 | 24 | Quickstart 25 | 26 | .. literalinclude:: ../examples/quickstart.py 27 | 28 | The :class:`Engine` is the central part of Spinach, it allows to define tasks, schedule jobs to 29 | execute in the background and start background workers. :ref:`More details `. 30 | 31 | The Broker is the backend that background workers use to retrieve jobs to execute. Spinach provides 32 | two brokers: MemoryBroker for development and RedisBroker for production. 33 | 34 | The :meth:`Engine.task` decorator is used to register tasks. It requires at least a `name` to 35 | identify the task, but other options can be given to customize how the task behaves. :ref:`More 36 | details `. 37 | 38 | Background jobs can then be scheduled by using either the task name or the task function:: 39 | 40 | spin.schedule('compute', 5, 3) # identify a task by its name 41 | spin.schedule(compute, 5, 3) # identify a task by its function 42 | 43 | Getting started with spinach: 44 | 45 | .. toctree:: 46 | :maxdepth: 1 47 | 48 | user/install 49 | user/tasks 50 | user/jobs 51 | user/engine 52 | user/queues 53 | user/asyncio 54 | user/integrations 55 | user/signals 56 | user/production 57 | user/design 58 | user/faq 59 | 60 | Hacking guide: 61 | 62 | .. toctree:: 63 | :maxdepth: 1 64 | 65 | hacking/contributing 66 | hacking/internals 67 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Spinach 2 | ======= 3 | 4 | .. image:: https://github.com/NicolasLM/spinach/workflows/Run%20tests/badge.svg?branch=master 5 | :target: https://github.com/NicolasLM/spinach/actions 6 | .. image:: https://coveralls.io/repos/github/NicolasLM/spinach/badge.svg?branch=master 7 | :target: https://coveralls.io/github/NicolasLM/spinach?branch=master 8 | .. image:: https://readthedocs.org/projects/spinach/badge/?version=latest 9 | :target: http://spinach.readthedocs.io/en/latest/?badge=latest 10 | .. image:: https://img.shields.io/badge/IRC-irc.libera.chat-1e72ff.svg?style=flat 11 | :target: https://kiwiirc.com/nextclient/irc.libera.chat:+6697/#spinach 12 | 13 | Redis task queue for Python 3 heavily inspired by Celery and RQ. 14 | 15 | Distinctive features: 16 | 17 | - Threaded and asyncio workers 18 | - At-least-once or at-most-once delivery per task 19 | - Periodic tasks without an additional process 20 | - Concurrency limits on queued jobs 21 | - Scheduling of tasks in batch 22 | - Integrations with `Flask, Django, Logging, Sentry and Datadog 23 | `_ 24 | - Embeddable workers for easier testing 25 | - See `design choices 26 | `_ for more 27 | details 28 | 29 | Quickstart 30 | ---------- 31 | 32 | Install Spinach with pip:: 33 | 34 | pip install spinach 35 | 36 | Create a task and schedule a job to be executed now: 37 | 38 | .. code:: python 39 | 40 | from spinach import Engine, MemoryBroker 41 | 42 | spin = Engine(MemoryBroker()) 43 | 44 | 45 | @spin.task(name='compute') 46 | def compute(a, b): 47 | print('Computed {} + {} = {}'.format(a, b, a + b)) 48 | 49 | 50 | # Schedule a job to be executed ASAP 51 | spin.schedule(compute, 5, 3) 52 | 53 | print('Starting workers, ^C to quit') 54 | spin.start_workers() 55 | 56 | Documentation 57 | ------------- 58 | 59 | The documentation is at `https://spinach.readthedocs.io 60 | `_. 61 | 62 | IRC channel for online discussions **#spinach** on `irc.libera.chat 63 | `_. 64 | 65 | License 66 | ------- 67 | 68 | BSD 2-clause 69 | 70 | -------------------------------------------------------------------------------- /spinach/contrib/datadog.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from spinach import signals 4 | 5 | 6 | def register_datadog(tracer=None, namespace: Optional[str]=None, 7 | service: str='spinach'): 8 | """Register the Datadog integration. 9 | 10 | :param tracer: optionally use a custom ddtrace Tracer instead of the global 11 | one. 12 | :param namespace: optionally only register the Datadog integration for a 13 | particular Spinach :class:`Engine` 14 | :param service: Datadog service associated with the trace, defaults to 15 | `spinach` 16 | """ 17 | if tracer is None: 18 | from ddtrace import tracer 19 | 20 | @signals.job_started.connect_via(namespace) 21 | def job_started(namespace, job, **kwargs): 22 | tracer.trace( 23 | 'spinach.task', service=service, span_type='worker', 24 | resource=job.task_name 25 | ) 26 | 27 | @signals.job_finished.connect_via(namespace) 28 | def job_finished(namespace, job, **kwargs): 29 | root_span = tracer.current_root_span() 30 | for attr in job.__slots__: 31 | root_span.set_tag(attr, getattr(job, attr)) 32 | root_span.finish() 33 | 34 | @signals.job_failed.connect_via(namespace) 35 | def job_failed(namespace, job, **kwargs): 36 | root_span = tracer.current_root_span() 37 | root_span.set_traceback() 38 | 39 | @signals.job_schedule_retry.connect_via(namespace) 40 | def job_schedule_retry(namespace, job, **kwargs): 41 | root_span = tracer.current_root_span() 42 | root_span.set_traceback() 43 | 44 | 45 | def register_datadog_if_module_patched(module: str, *args, **kwargs) -> bool: 46 | """Register the datadog integration if ddtrace is already used. 47 | 48 | This can be used to enable datadog for Spinach only if datadog 49 | is enabled for Django. 50 | 51 | :param module: Name of the module that must already be patched 52 | :return: boolean telling if the integration was registered 53 | """ 54 | try: 55 | from ddtrace.monkey import get_patched_modules 56 | except ImportError: 57 | return False 58 | 59 | if module not in get_patched_modules(): 60 | return False 61 | 62 | register_datadog(*args, **kwargs) 63 | return True 64 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | import threading 3 | from unittest.mock import Mock, patch, ANY 4 | 5 | import pytest 6 | from redis import ConnectionError 7 | 8 | from spinach import utils 9 | 10 | 11 | def test_human_duration(): 12 | assert utils.human_duration(0.00001) == '0 ms' 13 | assert utils.human_duration(0.001) == '1 ms' 14 | assert utils.human_duration(0.25) == '250 ms' 15 | assert utils.human_duration(1) == '1 s' 16 | assert utils.human_duration(2500) == '2500 s' 17 | 18 | 19 | @patch('spinach.utils.time.monotonic') 20 | @patch('spinach.utils.exponential_backoff', return_value=timedelta()) 21 | def test_run_forever(_, mock_monotonic): 22 | must_stop = threading.Event() 23 | logger = Mock() 24 | mock_monotonic.side_effect = [0, 0, 800, 0, 0, 0] 25 | call_count = 0 26 | 27 | def func(): 28 | nonlocal call_count 29 | call_count += 1 30 | 31 | if call_count == 1: 32 | return 33 | elif call_count == 2: 34 | raise RuntimeError('Foo') 35 | elif call_count == 3: 36 | raise ConnectionError('Bar') 37 | elif call_count == 4: 38 | must_stop.set() 39 | return 40 | 41 | utils.run_forever(func, must_stop, logger) 42 | assert call_count == 4 43 | logger.exception.assert_called_once_with(ANY, ANY) 44 | logger.warning.assert_called_once_with(ANY, ANY, ANY) 45 | assert must_stop.is_set() 46 | 47 | 48 | @patch('spinach.utils.time.sleep') 49 | @patch('spinach.utils.exponential_backoff', return_value=timedelta()) 50 | def test_call_with_retry(_, mock_sleep): 51 | logger = Mock() 52 | call_count = 0 53 | 54 | def func(): 55 | nonlocal call_count 56 | call_count += 1 57 | 58 | if call_count in (1, 2): 59 | raise ValueError('Foo') 60 | elif call_count == 3: 61 | raise RuntimeError('Foo') 62 | 63 | with pytest.raises(RuntimeError): 64 | utils.call_with_retry(func, (RuntimeError, ValueError), 3, logger) 65 | 66 | 67 | def test_exponential_backoff(): 68 | assert ( 69 | timedelta(seconds=3) <= utils.exponential_backoff(1) 70 | <= timedelta(seconds=6) 71 | ) 72 | assert utils.exponential_backoff(10000) <= timedelta(minutes=20) 73 | assert utils.exponential_backoff(10000, cap=60) <= timedelta(minutes=1) 74 | -------------------------------------------------------------------------------- /doc/user/signals.rst: -------------------------------------------------------------------------------- 1 | .. _signals: 2 | 3 | Signals 4 | ======= 5 | 6 | Signals are events broadcasted when something happens in Spinach, like a job starting or a worker 7 | shutting down. 8 | 9 | Subscribing to signals allows your code to react to internal events in a composable and reusable 10 | way. 11 | 12 | Subscribing to signals 13 | ---------------------- 14 | 15 | Subscribing to a signal is done via its ``connect`` decorator:: 16 | 17 | from spinach import signals 18 | 19 | @signals.job_started.connect 20 | def job_started(namespace, job, **kwargs): 21 | print('Job {} started'.format(job)) 22 | 23 | The first argument given to your function is always the namespace of your Spinach :class:`Engine`, 24 | the following arguments depend on the signal itself. 25 | 26 | Subscribing to signals of a specific Spinach Engine 27 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 28 | 29 | As your application gets bigger you may end up running multiple Engines in the same interpreter. 30 | The ``connect_via`` decorator allows to subscribe to the signals sent by a specific Spinach 31 | :class:`Engine`:: 32 | 33 | from spinach import Engine, MemoryBroker, signals 34 | 35 | foo_spin = Engine(MemoryBroker(), namespace='foo') 36 | bar_spin = Engine(MemoryBroker(), namespace='bar') 37 | 38 | @signals.job_started.connect_via(foo_spin.namespace) 39 | def job_started(namespace, job, **kwargs): 40 | print('Job {} started on Foo'.format(job)) 41 | 42 | In this example only signals sent by the `foo` :class:`Engine` will be received. 43 | 44 | Available signals 45 | ----------------- 46 | 47 | .. autodata:: spinach.signals.job_started 48 | .. autodata:: spinach.signals.job_finished 49 | .. autodata:: spinach.signals.job_schedule_retry 50 | .. autodata:: spinach.signals.job_failed 51 | .. autodata:: spinach.signals.worker_started 52 | .. autodata:: spinach.signals.worker_terminated 53 | 54 | Tips 55 | ---- 56 | 57 | Received objects 58 | ~~~~~~~~~~~~~~~~ 59 | 60 | Objects received via signals should not be modified in handlers as it could break something in 61 | Spinach internals. 62 | 63 | Exceptions 64 | ~~~~~~~~~~ 65 | 66 | If your receiving function raises an exception while processing a signal, this exception will be 67 | logged in the ``spinach.signals`` logger. 68 | 69 | Going further 70 | ~~~~~~~~~~~~~ 71 | 72 | Have a look at the `blinker documentation `_ for other ways using 73 | signals. 74 | -------------------------------------------------------------------------------- /spinach/brokers/redis_scripts/enqueue_jobs_from_dead_broker.lua: -------------------------------------------------------------------------------- 1 | local dead_broker_id = ARGV[1] 2 | local running_jobs_key = ARGV[2] 3 | local all_brokers_hash_key = ARGV[3] 4 | local all_brokers_zset_key = ARGV[4] 5 | local namespace = ARGV[5] 6 | local notifications = ARGV[6] 7 | local max_concurrency_key = ARGV[7] 8 | local current_concurrency_key = ARGV[8] 9 | 10 | local num_enqueued_jobs = 0 11 | local i = 1 12 | local failed_jobs = {} 13 | 14 | -- Get all jobs that were running on the broker before it died 15 | local jobs_json = redis.call('hvals', running_jobs_key) 16 | 17 | for _, job_json in ipairs(jobs_json) do 18 | local job = cjson.decode(job_json) 19 | -- `max_retries == 0` jobs are non-idempotent, do not re-run them 20 | if job["max_retries"] > 0 and job["retries"] < job["max_retries"] then 21 | job["retries"] = job["retries"] + 1 22 | -- Set job status to queued: 23 | -- A major difference between retrying a job failing in a worker and 24 | -- a failing from a dead broker is that the dead broker one is 25 | -- automatically put in the queue, there is not waiting state because 26 | -- the backoff is not taken into account. Most likely the broker was 27 | -- dead for a while before it was noticed, this acts as the backoff. 28 | job["status"] = 2 29 | 30 | -- Serialize the job so that it can be put in the queue 31 | local job_json = cjson.encode(job) 32 | 33 | -- Decrement the current concurrency if we are tracking 34 | -- concurrency on the Task. 35 | local max_concurrency = tonumber(redis.call('hget', max_concurrency_key, job['task_name'])) 36 | if max_concurrency ~= nil and max_concurrency ~= -1 then 37 | redis.call('hincrby', current_concurrency_key, job['task_name'], -1) 38 | end 39 | 40 | -- Enqueue the job 41 | local queue = string.format("%s/%s", namespace, job["queue"]) 42 | redis.call('rpush', queue, job_json) 43 | num_enqueued_jobs = num_enqueued_jobs + 1 44 | else 45 | -- Keep track of jobs that exceeded the max_retries (or were not 46 | -- retryable) 47 | failed_jobs[i] = job_json 48 | i = i + 1 49 | end 50 | 51 | redis.call('hdel', running_jobs_key, job["id"]) 52 | end 53 | 54 | -- Remove the broker from the list of brokers 55 | redis.call('hdel', all_brokers_hash_key, dead_broker_id) 56 | redis.call('zrem', all_brokers_zset_key, dead_broker_id) 57 | 58 | if num_enqueued_jobs > 0 then 59 | redis.call('publish', notifications, '') 60 | end 61 | 62 | return {num_enqueued_jobs, failed_jobs} 63 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from codecs import open 3 | from os import path 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | 7 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f: 8 | long_description = f.read() 9 | 10 | with open(path.join(here, 'LICENSE'), encoding='utf-8') as f: 11 | long_description += f.read() 12 | 13 | with open(path.join(here, 'spinach', 'const.py'), encoding='utf-8') as fp: 14 | version = dict() 15 | exec(fp.read(), version) 16 | version = version['VERSION'] 17 | 18 | setup( 19 | name='spinach', 20 | version=version, 21 | description='Modern Redis task queue for Python 3', 22 | long_description=long_description, 23 | url='https://github.com/NicolasLM/spinach', 24 | author='Nicolas Le Manchet', 25 | author_email='nicolas@lemanchet.fr', 26 | license='BSD 2-clause', 27 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 28 | classifiers=[ 29 | 'Development Status :: 4 - Beta', 30 | 'Intended Audience :: Developers', 31 | 'Topic :: Software Development :: Libraries', 32 | 'Topic :: System :: Distributed Computing', 33 | 'License :: OSI Approved :: BSD License', 34 | 'Natural Language :: English', 35 | 'Programming Language :: Python :: 3', 36 | 'Programming Language :: Python :: 3 :: Only', 37 | 'Programming Language :: Python :: 3.8', 38 | 'Programming Language :: Python :: 3.9', 39 | 'Programming Language :: Python :: 3.10', 40 | 'Programming Language :: Python :: 3.11', 41 | 'Programming Language :: Python :: 3.12', 42 | 'Programming Language :: Python :: 3.13', 43 | ], 44 | keywords='task queue jobs redis', 45 | 46 | packages=find_packages(include=('spinach', 'spinach.*')), 47 | install_requires=[ 48 | 'redis', 49 | 'blinker' 50 | ], 51 | 52 | extras_require={ 53 | 'tests': [ 54 | 'pytest', 55 | 'pytest-cov', 56 | 'pytest-threadleak', 57 | 'pycodestyle', 58 | 'flask', 59 | 'django' 60 | ], 61 | }, 62 | 63 | package_data={ 64 | 'spinach.brokers.redis_scripts': [ 65 | 'deregister.lua', 66 | 'enqueue_job.lua', 67 | 'enqueue_jobs_from_dead_broker.lua', 68 | 'flush.lua', 69 | 'get_jobs_from_queue.lua', 70 | 'move_future_jobs.lua', 71 | 'register_periodic_tasks.lua', 72 | 'remove_job_from_running.lua', 73 | 'set_concurrency_keys.lua', 74 | ], 75 | }, 76 | ) 77 | -------------------------------------------------------------------------------- /doc/hacking/contributing.rst: -------------------------------------------------------------------------------- 1 | .. _contributing: 2 | 3 | Contributing 4 | ============ 5 | 6 | This page contains the few guidelines and conventions used in the code base. 7 | 8 | Pull requests 9 | ------------- 10 | 11 | The development of Spinach happens on GitHub, the main repository is 12 | `https://github.com/NicolasLM/spinach `_. To contribute to 13 | Spinach: 14 | 15 | * Fork ``NicolasLM/spinach`` 16 | * Clone your fork 17 | * Create a feature branch ``git checkout -b my_feature`` 18 | * Commit your changes 19 | * Push your changes to your fork ``git push origin my_feature`` 20 | * Create a GitHub pull request against ``NicolasLM/spinach``'s master branch 21 | 22 | .. note:: Avoid including multiple commits in your pull request, unless it adds value to a future 23 | reader. If you need to modify a commit, ``git commit --amend`` is your friend. Write 24 | a meaningful commit message, see `How to write a commit message 25 | `_. 26 | 27 | Python sources 28 | -------------- 29 | 30 | The code base follows `pep8 `_ guidelines with lines 31 | wrapping at the 79th character. You can verify that the code follows the conventions with:: 32 | 33 | $ pycodestyle --ignore=E252,W503,W504 spinach tests 34 | 35 | Running tests is an invaluable help when adding a new feature or when refactoring. Try to add the 36 | proper test cases in ``tests/`` together with your patch. The test suite can be run with pytest:: 37 | 38 | $ pytest tests 39 | 40 | Because the Redis broker tests require a running Redis server, there is also a convenience 41 | `tox.ini` that runs all the tests and pep8 checks for you after starting Redis in a container via 42 | docker-compose. Simply running:: 43 | 44 | $ tox 45 | 46 | will build a virtualenv, install Spinach and its dependencies into it, start the Redis server in 47 | the container, and run tests and pycodestyle, tearing down the Redis server container when done. 48 | 49 | Compatibility 50 | ------------- 51 | 52 | Spinach runs on all versions of Python starting from 3.6. Tests are run via GitHub actions to 53 | ensure that. 54 | 55 | Documentation sources 56 | --------------------- 57 | 58 | Documentation is located in the ``doc`` directory of the repository. It is written in 59 | `reStructuredText `_ and built 60 | with `Sphinx `_. 61 | 62 | If you modify the docs, make sure it builds without errors:: 63 | 64 | $ cd doc/ 65 | $ make html 66 | 67 | The generated HTML pages should land in ``doc/_build/html``. 68 | -------------------------------------------------------------------------------- /tests/test_queuey.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | import pytest 4 | 5 | from spinach.queuey import Queuey 6 | 7 | 8 | def test_sync(): 9 | q = Queuey(2) 10 | q.put_sync(1) 11 | q.put_sync(2) 12 | assert len(q._items) == 2 13 | assert len(q._putters) == 0 14 | assert len(q._getters) == 0 15 | 16 | assert q.get_sync() == 1 17 | assert q.get_sync() == 2 18 | assert len(q._items) == 0 19 | assert len(q._putters) == 0 20 | assert len(q._getters) == 0 21 | 22 | 23 | def test_async(): 24 | q = Queuey(2) 25 | loop = asyncio.get_event_loop() 26 | loop.run_until_complete(q.put_async(1)) 27 | loop.run_until_complete(q.put_async(2)) 28 | assert len(q._items) == 2 29 | assert len(q._putters) == 0 30 | assert len(q._getters) == 0 31 | 32 | assert loop.run_until_complete(q.get_async()) == 1 33 | assert loop.run_until_complete(q.get_async()) == 2 34 | assert len(q._items) == 0 35 | assert len(q._putters) == 0 36 | assert len(q._getters) == 0 37 | 38 | 39 | def test_noblock(): 40 | q = Queuey(1) 41 | item, future_get = q._get_noblock() 42 | assert item is None 43 | assert future_get is not None 44 | assert future_get.done() is False 45 | 46 | future_put = q._put_noblock(1) 47 | assert future_put is None 48 | assert future_get.done() is True 49 | assert future_get.result() == 1 50 | 51 | future_put = q._put_noblock(2) 52 | assert future_put is None 53 | 54 | future_put = q._put_noblock(3) 55 | assert future_put is not None 56 | assert future_put.done() is False 57 | 58 | item, future_get = q._get_noblock() 59 | assert item == 2 60 | assert future_get is None 61 | assert future_put.done() is True 62 | 63 | item, future_get = q._get_noblock() 64 | assert item == 3 65 | assert future_get is None 66 | 67 | 68 | def test_max_unfinished_queue(): 69 | q = Queuey(maxsize=2) 70 | assert q.empty() 71 | assert q.available_slots() == 2 72 | 73 | q.put_sync(None) 74 | assert not q.full() 75 | assert not q.empty() 76 | assert q.available_slots() == 1 77 | 78 | q.put_sync(None) 79 | assert q.full() 80 | assert q.available_slots() == 0 81 | 82 | q.get_sync() 83 | assert q.full() 84 | assert q.available_slots() == 0 85 | 86 | q.task_done() 87 | assert not q.full() 88 | assert q.available_slots() == 1 89 | 90 | q.get_sync() 91 | assert not q.empty() 92 | assert q.available_slots() == 1 93 | 94 | q.task_done() 95 | assert q.empty() 96 | assert q.available_slots() == 2 97 | 98 | 99 | def test_too_many_task_done(): 100 | q = Queuey(10) 101 | with pytest.raises(ValueError): 102 | q.task_done() 103 | -------------------------------------------------------------------------------- /spinach/contrib/sentry_sdk_spinach.py: -------------------------------------------------------------------------------- 1 | from sentry_sdk.integrations import Integration 2 | from sentry_sdk.scope import Scope, ScopeType 3 | 4 | from spinach import signals 5 | 6 | 7 | class SpinachIntegration(Integration): 8 | """Register the Sentry SDK integration. 9 | 10 | Exceptions making jobs fail are sent to Sentry and performance 11 | tracing of Spinach tasks is enabled. 12 | 13 | :param send_retries: whether to also send to Sentry exceptions resulting 14 | in a job being retried 15 | """ 16 | 17 | identifier = 'spinach' 18 | 19 | def __init__(self, send_retries: bool=False): 20 | self.send_retries = send_retries 21 | 22 | @staticmethod 23 | def setup_once(): 24 | signals.job_started.connect(_job_started) 25 | signals.job_finished.connect(_job_finished) 26 | signals.job_failed.connect(_job_failed) 27 | signals.job_schedule_retry.connect(_job_schedule_retry) 28 | 29 | 30 | def _job_started(namespace, job, **kwargs): 31 | 32 | current_scope = Scope(ty=ScopeType.CURRENT) 33 | Scope.set_current_scope(current_scope) 34 | 35 | isolation_scope = Scope(ty=ScopeType.ISOLATION) 36 | Scope.set_isolation_scope(isolation_scope) 37 | 38 | isolation_scope.transaction = job.task_name 39 | isolation_scope.clear_breadcrumbs() 40 | for attr in job.__slots__: 41 | isolation_scope.set_extra(attr, getattr(job, attr)) 42 | 43 | # Transactions and spans are for tracing 44 | transaction = isolation_scope.start_transaction( 45 | op='task', 46 | name=job.task_name 47 | ) 48 | # Transaction are meant to be used as a context manager, 49 | # but this does not fit the signals based approach well so 50 | # pretend that we use a context manager. 51 | transaction.__enter__() 52 | 53 | 54 | def _job_finished(namespace, job, **kwargs): 55 | isolation_scope = Scope.get_isolation_scope() 56 | for attr in job.__slots__: 57 | isolation_scope.set_extra(attr, getattr(job, attr)) 58 | transaction = isolation_scope.transaction 59 | if transaction is not None: 60 | transaction.__exit__(None, None, None) 61 | Scope.set_current_scope(None) 62 | Scope.set_isolation_scope(None) 63 | 64 | 65 | def _job_failed(namespace, job, **kwargs): 66 | scope = Scope.get_isolation_scope() 67 | for attr in job.__slots__: 68 | scope.set_extra(attr, getattr(job, attr)) 69 | scope.capture_exception() 70 | if scope.transaction is not None: 71 | scope.transaction.set_status("internal_error") 72 | 73 | 74 | def _job_schedule_retry(namespace, job, **kwargs): 75 | scope = Scope.get_isolation_scope() 76 | for attr in job.__slots__: 77 | scope.set_extra(attr, getattr(job, attr)) 78 | integration = scope.get_client().get_integration(SpinachIntegration) 79 | if integration is None: 80 | return 81 | 82 | if integration.send_retries: 83 | scope.capture_exception() 84 | -------------------------------------------------------------------------------- /tests/test_memory_brokers.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | import pytest 3 | 4 | from spinach.brokers.memory import MemoryBroker 5 | from spinach.job import Job, JobStatus 6 | from spinach.task import Task 7 | 8 | 9 | @pytest.fixture 10 | def broker(): 11 | broker = MemoryBroker() 12 | broker.namespace = 'tests' 13 | broker.start() 14 | yield broker 15 | broker.stop() 16 | 17 | 18 | def test_namespace(): 19 | broker = MemoryBroker() 20 | 21 | with pytest.raises(RuntimeError): 22 | assert broker.namespace 23 | 24 | broker.namespace = 'tests' 25 | assert broker.namespace == 'tests' 26 | assert broker._to_namespaced('bar') == 'tests/bar' 27 | 28 | with pytest.raises(RuntimeError): 29 | broker.namespace = 'foo' 30 | 31 | 32 | def test_get_queues(broker): 33 | assert broker._queues == {} 34 | 35 | queue = broker._get_queue('bar') 36 | assert broker._queues == { 37 | 'tests/bar': queue 38 | } 39 | 40 | assert broker._get_queue('bar') is queue 41 | 42 | 43 | def test_get_jobs_from_queue_limits_concurrency(broker): 44 | task = Task(print, 'foo', 'q1', 10, None, max_concurrency=1) 45 | broker.set_concurrency_keys([task]) 46 | job1 = Job('foo', 'q1', datetime.now(timezone.utc), 10) 47 | job2 = Job('foo', 'q1', datetime.now(timezone.utc), 10) 48 | broker.enqueue_jobs([job1, job2]) 49 | 50 | # Try to get one more than it allows. 51 | jobs = broker.get_jobs_from_queue('q1', 2) 52 | assert len(jobs) == 1 53 | 54 | 55 | def test_get_jobs_from_queue_re_adds_jobs_if_over_limit(broker): 56 | task = Task(print, 'foo', 'q1', 10, None, max_concurrency=1) 57 | broker.set_concurrency_keys([task]) 58 | job1 = Job('foo', 'q1', datetime.now(timezone.utc), 10) 59 | job2 = Job('foo', 'q1', datetime.now(timezone.utc), 10) 60 | broker.enqueue_jobs([job1, job2]) 61 | 62 | # Try to get one more than it allows. 63 | [running_job] = broker.get_jobs_from_queue('q1', 2) 64 | 65 | # Pop what's left in the broker's Queue and inspect it. 66 | job_json_string = broker._get_queue('q1').get(block=False) 67 | queued_job = Job.deserialize(job_json_string) 68 | assert queued_job != running_job 69 | 70 | 71 | def test_decrements_concurrency_count_when_job_ends(broker): 72 | task = Task(print, 'foo', 'q1', 10, None, max_concurrency=1) 73 | broker.set_concurrency_keys([task]) 74 | job1 = Job('foo', 'q1', datetime.now(timezone.utc), 10) 75 | job2 = Job('foo', 'q1', datetime.now(timezone.utc), 10) 76 | broker.enqueue_jobs([job1, job2]) 77 | 78 | # Start the first job. 79 | running_jobs = broker.get_jobs_from_queue('q1', 2) 80 | assert 1 == len(running_jobs) 81 | 82 | # No more can start. 83 | assert 0 == len(broker.get_jobs_from_queue('q1', 2)) 84 | 85 | # Complete the first job. 86 | broker.remove_job_from_running(running_jobs[0]) 87 | 88 | # Start second job now first has finished. 89 | assert 1 == len(broker.get_jobs_from_queue('q1', 2)) 90 | -------------------------------------------------------------------------------- /spinach/brokers/redis_scripts/move_future_jobs.lua: -------------------------------------------------------------------------------- 1 | local namespace = ARGV[1] 2 | local future_jobs = ARGV[2] 3 | local notifications = ARGV[3] 4 | local now = ARGV[4] 5 | local job_status_queued = tonumber(ARGV[5]) 6 | local periodic_tasks_hash = ARGV[6] 7 | local periodic_tasks_queue = ARGV[7] 8 | local all_brokers_hash_key = ARGV[8] 9 | local all_brokers_zset_key = ARGV[9] 10 | local broker_info_json = ARGV[10] 11 | local broker_dead_threshold_seconds = ARGV[11] 12 | -- uuids starting at ARGV[12] 13 | -- lua in Redis cannot generate random UUIDs, so they are generated in Python 14 | -- and passed with each calls 15 | 16 | -- Register the current broker keepalive 17 | local broker_info = cjson.decode(broker_info_json) 18 | redis.call('hset', all_brokers_hash_key, broker_info["id"], broker_info_json) 19 | redis.call('zadd', all_brokers_zset_key, broker_info["last_seen_at"], broker_info["id"]) 20 | 21 | -- Get IDs of brokers that were not seen for a long time 22 | local dead_brokers_id = redis.call('zrangebyscore', all_brokers_zset_key, '-inf', now - broker_dead_threshold_seconds, 'LIMIT', 0, 10) 23 | 24 | -- Get the future jobs that are due 25 | -- Limit to fetching 1000 of them to avoid the script to take too long 26 | local jobs_json = redis.call('zrangebyscore', future_jobs, '-inf', now, 'LIMIT', 0, 1000) 27 | local jobs_moved = 0 28 | 29 | -- Create jobs from due periodic tasks 30 | local number_of_uuids = #ARGV + 1 - 12 -- as uuids start at ARGV[12] 31 | local task_names = redis.call('zrangebyscore', periodic_tasks_queue, '-inf', now, 'LIMIT', 0, number_of_uuids) 32 | for i, task_name in ipairs(task_names) do 33 | 34 | local task_json = redis.call('hget', periodic_tasks_hash, task_name) 35 | 36 | -- the key task_name may not exist in the hash if the periodic task was deleted 37 | if task_json == false then 38 | redis.call('zrem', periodic_tasks_queue, task_name) 39 | else 40 | local task = cjson.decode(task_json) 41 | local job = {} 42 | job["id"] = ARGV[12 + i - 1] 43 | job["status"] = job_status_queued 44 | job["task_name"] = task_name 45 | job["queue"] = task["queue"] 46 | job["max_retries"] = task["max_retries"] 47 | job["retries"] = 0 48 | job["at"] = tonumber(now) 49 | job["at_us"] = 0 50 | job["task_args"] = {} 51 | job["task_kwargs"] = {} 52 | table.insert(jobs_json, cjson.encode(job)) 53 | 54 | local next_event_time = job["at"] + task["periodicity"] 55 | redis.call('zrem', periodic_tasks_queue, task_name) 56 | redis.call('zadd', periodic_tasks_queue, next_event_time, task_name) 57 | end 58 | end 59 | 60 | for i, job_json in ipairs(jobs_json) do 61 | local job = cjson.decode(job_json) 62 | local queue = string.format("%s/%s", namespace, job["queue"]) 63 | job["status"] = job_status_queued 64 | local job_json_updated = cjson.encode(job) 65 | redis.call('rpush', queue, job_json_updated) 66 | redis.call('zrem', future_jobs, job_json) 67 | jobs_moved = jobs_moved + 1 68 | end 69 | 70 | if jobs_moved > 0 then 71 | redis.call('publish', notifications, '') 72 | end 73 | 74 | return {jobs_moved, dead_brokers_id} 75 | 76 | -------------------------------------------------------------------------------- /spinach/signals.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import blinker 4 | 5 | logger = getLogger(__name__) 6 | 7 | __all__ = [ 8 | 'job_started', 'job_finished', 'job_schedule_retry', 'job_failed', 9 | 'worker_started', 'worker_terminated' 10 | ] 11 | 12 | 13 | class SafeNamedSignal(blinker.NamedSignal): 14 | """Named signal for misbehaving receivers.""" 15 | 16 | def send(self, *sender, **kwargs): 17 | """Emit this signal on behalf of `sender`, passing on kwargs. 18 | 19 | This is an extension of `Signal.send` that changes one thing: 20 | Exceptions raised in calling the receiver are logged but do not fail 21 | """ 22 | if len(sender) == 0: 23 | sender = None 24 | elif len(sender) > 1: 25 | raise TypeError('send() accepts only one positional argument, ' 26 | '%s given' % len(sender)) 27 | else: 28 | sender = sender[0] 29 | 30 | if not self.receivers: 31 | return [] 32 | 33 | rv = list() 34 | for receiver in self.receivers_for(sender): 35 | try: 36 | rv.append((receiver, receiver(sender, **kwargs))) 37 | except Exception: 38 | logger.exception('Error while dispatching signal "{}" ' 39 | 'to receiver'.format(self.name)) 40 | return rv 41 | 42 | def __repr__(self): 43 | return 'SafeNamedSignal "{}"'.format(self.name) 44 | 45 | 46 | # Added signals but also be documented in doc/user/signals.rst 47 | job_started = SafeNamedSignal('job_started', doc='''\ 48 | Sent by a worker when a job starts being executed. 49 | 50 | Signal handlers receive: 51 | 52 | - `namespace` Spinach namespace 53 | - `job` :class:`Job` being executed 54 | ''') 55 | 56 | job_finished = SafeNamedSignal('job_finished', doc='''\ 57 | Sent by a worker when a job finishes execution. 58 | 59 | The signal is sent no matter the outcome, even if the job fails or gets 60 | rescheduled for retry. 61 | 62 | Signal handlers receive: 63 | 64 | - `namespace` Spinach namespace 65 | - `job` :class:`Job` being executed 66 | ''') 67 | 68 | job_schedule_retry = SafeNamedSignal('job_schedule_retry', doc='''\ 69 | Sent by a worker when a job gets rescheduled for retry. 70 | 71 | Signal handlers receive: 72 | 73 | - `namespace` Spinach namespace 74 | - `job` :class:`Job` being executed 75 | - `err` exception that made the job retry 76 | ''') 77 | 78 | job_failed = SafeNamedSignal('job_failed', doc='''\ 79 | Sent by a worker when a job failed. 80 | 81 | A failed job will not be retried. 82 | 83 | Signal handlers receive: 84 | 85 | - `namespace` Spinach namespace 86 | - `job` :class:`Job` being executed 87 | - `err` exception that made the job fail 88 | ''') 89 | 90 | worker_started = SafeNamedSignal('worker_started', doc='''\ 91 | Sent by a worker when it starts. 92 | 93 | Signal handlers receive: 94 | 95 | - `namespace` Spinach namespace 96 | - `worker_name` name of the worker starting 97 | ''') 98 | 99 | worker_terminated = SafeNamedSignal('worker_terminated', doc='''\ 100 | Sent by a worker when it shutdowns. 101 | 102 | Signal handlers receive: 103 | 104 | - `namespace` Spinach namespace 105 | - `worker_name` name of the worker shutting down 106 | ''') 107 | -------------------------------------------------------------------------------- /spinach/utils.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | from datetime import timedelta 3 | from logging import Logger 4 | import random 5 | import signal 6 | from threading import Event 7 | import time 8 | from typing import Callable 9 | 10 | from redis import ConnectionError, TimeoutError 11 | 12 | 13 | def human_duration(duration_seconds: float) -> str: 14 | """Convert a duration in seconds into a human friendly string.""" 15 | if duration_seconds < 0.001: 16 | return '0 ms' 17 | if duration_seconds < 1: 18 | return '{} ms'.format(int(duration_seconds * 1000)) 19 | return '{} s'.format(int(duration_seconds)) 20 | 21 | 22 | def run_forever(func: Callable, must_stop: Event, logger: Logger, 23 | *args, **kwargs): 24 | attempt = 0 25 | while not must_stop.is_set(): 26 | 27 | start = time.monotonic() 28 | try: 29 | func(*args, **kwargs) 30 | except Exception as e: 31 | 32 | # Reset the attempt counter if `func` ran for 10 minutes without 33 | # an error 34 | if int(time.monotonic() - start) > 600: 35 | attempt = 1 36 | else: 37 | attempt += 1 38 | 39 | delay = exponential_backoff(attempt, cap=120) 40 | if isinstance(e, (ConnectionError, TimeoutError)): 41 | logger.warning('Connection issue: %s. Retrying in %s', e, 42 | delay) 43 | else: 44 | logger.exception('Unexpected error. Retrying in %s', delay) 45 | 46 | must_stop.wait(delay.total_seconds()) 47 | 48 | 49 | def call_with_retry(func: Callable, exceptions, max_retries: int, 50 | logger: Logger, *args, **kwargs): 51 | """Call a function and retry it on failure.""" 52 | attempt = 0 53 | while True: 54 | try: 55 | return func(*args, **kwargs) 56 | except exceptions as e: 57 | attempt += 1 58 | if attempt >= max_retries: 59 | raise 60 | 61 | delay = exponential_backoff(attempt, cap=60) 62 | logger.warning('%s: retrying in %s', e, delay) 63 | time.sleep(delay.total_seconds()) 64 | 65 | 66 | def exponential_backoff(attempt: int, cap: int=1200) -> timedelta: 67 | """Calculate a delay to retry using an exponential backoff algorithm. 68 | 69 | It is an exponential backoff with random jitter to prevent failures 70 | from being retried at the same time. It is a good fit for most 71 | applications. 72 | 73 | :arg attempt: the number of attempts made 74 | :arg cap: maximum delay, defaults to 20 minutes 75 | """ 76 | base = 3 77 | temp = min(base * 2 ** attempt, cap) // 2 78 | return timedelta(seconds=temp + random.randint(0, temp)) 79 | 80 | 81 | @contextlib.contextmanager 82 | def handle_sigterm(): 83 | """Handle SIGTERM like a normal SIGINT (KeyboardInterrupt). 84 | 85 | By default Docker sends a SIGTERM for stopping containers, giving them 86 | time to terminate before getting killed. If a process does not catch this 87 | signal and does nothing, it just gets killed. 88 | 89 | Handling SIGTERM like SIGINT allows to gracefully terminate both 90 | interactively with ^C and with `docker stop`. 91 | 92 | This context manager restores the default SIGTERM behavior when exiting. 93 | """ 94 | original_sigterm_handler = signal.getsignal(signal.SIGTERM) 95 | signal.signal(signal.SIGTERM, signal.default_int_handler) 96 | try: 97 | yield 98 | finally: 99 | signal.signal(signal.SIGTERM, original_sigterm_handler) 100 | -------------------------------------------------------------------------------- /doc/user/production.rst: -------------------------------------------------------------------------------- 1 | .. _production: 2 | 3 | Running in Production 4 | ===================== 5 | 6 | Advices to read before deploying an application using Spinach to production. 7 | 8 | Spinach 9 | ------- 10 | 11 | Since by default Spinach executes jobs in a separate threads, the user's code must be thread-safe. 12 | This is usually quite easy to achieve on a traditional web application because frameworks like 13 | Flask or Django make that straightforward. 14 | 15 | Tasks should not store state in the process between invocations. Instead all state must be stored 16 | in an external system, like a database or a cache. This advice also applies to `views` in a web 17 | application. 18 | 19 | Redis 20 | ----- 21 | 22 | Most Spinach features are implemented as Lua scripts running inside Redis. Having a solid 23 | installation of Redis is the key to Spinach reliability. 24 | 25 | To ensure that no tasks are lost or duplicated, Redis must be configured with persistence enabled. 26 | It is recommended to use AOF persistence (``appendonly yes``) instead of periodic RDB dumps. The 27 | default of fsync every second (``appendfsync everysec``) is a good trade-off between performance 28 | and security against sudden power failures. 29 | 30 | Using Redis as a task queue is very different from using it as a cache. If an application uses 31 | Redis for both task queue and cache, it is recommended to have two separated Redis servers. One 32 | would be configured with persistence and without eviction while the other would have no persistence 33 | but would evict keys when running low on memory. 34 | 35 | Finally standard security practices apply: Redis should not accept connections from the Internet 36 | and it should require a password even when connecting locally. 37 | 38 | System 39 | ------ 40 | 41 | If the application is deployed on multiple servers it is important that their clocks be 42 | approximately synchronized. This is because Spinach uses the system time to know when a job should 43 | start. Running an `ntp` daemon is highly recommended. 44 | 45 | Workers should be started by an init system that will restart them if they get killed or if the 46 | host reboots. 47 | 48 | To gracefully shutdown a worker, it is recommended to send it a `SIGINT` or a `SIGTERM` and let it 49 | finish its running jobs. If the worker gets killed before it terminates gracefully, non-retryable 50 | jobs will be lost and retryable jobs will be rescheduled automatically after the worker is 51 | identified as dead, which takes 30 minutes by default. This is important if Spinach workers run in 52 | docker containers because docker gives 10 seconds to a container to finish before killing it. 53 | 54 | Production Checklist 55 | -------------------- 56 | 57 | Spinach: 58 | 59 | - Tasks that are NOT safe to be retried have their `max_retries` set to `0` 60 | - Tasks that are safe to be retried have their `max_retries` set to a positive 61 | number 62 | - Retries happen after an exponential delay with randomized jitter (the 63 | default) 64 | - Task `args` and `kwargs` are JSON serializable and small in size 65 | - Jobs are sent in :class:`Batch` to the broker when multiple jobs are to be 66 | scheduled at once 67 | - The user's code is thread-safe when using the default threaded workers 68 | - Tasks do not store state in the process between invocations 69 | - Logging is configured and exceptions are sent to Sentry, see 70 | :doc:`integrations` 71 | - Different queues are used if tasks have different usage pattens, see 72 | :doc:`queues` 73 | - Different namespaces are used if multiple Spinach applications share the same 74 | Redis server, see :doc:`engine` 75 | 76 | Redis: 77 | 78 | - Redis uses AOF persistence 79 | - Redis does not evict keys when running low on memory 80 | - The Redis server used by Spinach is not also used as a cache 81 | - Connections are secured by a long password 82 | - Connections are encrypted if they go through the public Internet 83 | 84 | System: 85 | 86 | - Servers have their clock synchronized by ntp 87 | - Workers get restarted by an init system if they get killed 88 | -------------------------------------------------------------------------------- /tests/test_engine.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock, ANY, patch 2 | 3 | import pytest 4 | 5 | from spinach import Engine, MemoryBroker, Batch, Tasks 6 | from spinach.exc import UnknownTask 7 | 8 | from .conftest import get_now 9 | 10 | 11 | @pytest.fixture 12 | def spin(): 13 | s = Engine(MemoryBroker(), namespace='tests') 14 | s.start_workers(number=1, block=False) 15 | yield s 16 | s.stop_workers() 17 | 18 | 19 | spin_2 = spin 20 | 21 | 22 | def test_schedule_unknown_task(spin): 23 | with pytest.raises(UnknownTask): 24 | spin.schedule('foo_task') 25 | 26 | 27 | @patch('spinach.engine.logger') 28 | def test_attach_tasks(mock_logger, spin, spin_2): 29 | tasks = Tasks() 30 | tasks.add(print, 'foo_task') 31 | 32 | spin.attach_tasks(tasks) 33 | mock_logger.warning.assert_not_called() 34 | assert tasks._spin is spin 35 | assert spin._tasks.tasks == tasks.tasks 36 | 37 | spin.attach_tasks(tasks) 38 | mock_logger.warning.assert_not_called() 39 | assert tasks._spin is spin 40 | assert spin._tasks.tasks == tasks.tasks 41 | 42 | spin_2.attach_tasks(tasks) 43 | mock_logger.warning.assert_called_once_with(ANY) 44 | assert tasks._spin is spin_2 45 | assert spin_2._tasks.tasks == tasks.tasks 46 | 47 | 48 | def test_schedule_at(patch_now): 49 | now = get_now() 50 | 51 | tasks = Tasks() 52 | tasks.add(Mock(), 'bar_task') 53 | 54 | broker = Mock() 55 | 56 | s = Engine(broker, namespace='tests') 57 | s.attach_tasks(tasks) 58 | 59 | job = s.schedule_at('bar_task', now, three=True) 60 | 61 | bar_job = broker.enqueue_jobs.call_args[0][0][0] 62 | assert bar_job == job 63 | assert bar_job.task_name == 'bar_task' 64 | assert bar_job.at == now 65 | assert bar_job.task_args == () 66 | assert bar_job.task_kwargs == {'three': True} 67 | 68 | 69 | def test_schedule(patch_now): 70 | now = get_now() 71 | 72 | tasks = Tasks() 73 | tasks.add(print, 'foo_task') 74 | 75 | broker = Mock() 76 | 77 | s = Engine(broker, namespace='tests') 78 | s.attach_tasks(tasks) 79 | 80 | job1 = s.schedule('foo_task', 1, 2) 81 | 82 | foo_job = broker.enqueue_jobs.call_args[0][0][0] 83 | assert foo_job == job1 84 | assert foo_job.task_name == 'foo_task' 85 | assert foo_job.at == now 86 | assert foo_job.task_args == (1, 2) 87 | assert foo_job.task_kwargs == {} 88 | 89 | 90 | def test_schedule_batch(patch_now): 91 | now = get_now() 92 | 93 | tasks = Tasks() 94 | tasks.add(Mock(), 'foo_task') 95 | tasks.add(Mock(), 'bar_task') 96 | 97 | broker = Mock() 98 | 99 | s = Engine(broker, namespace='tests') 100 | s.attach_tasks(tasks) 101 | 102 | batch = Batch() 103 | batch.schedule('foo_task', 1, 2) 104 | batch.schedule_at('bar_task', now, three=True) 105 | jobs = s.schedule_batch(batch) 106 | 107 | broker.enqueue_jobs.assert_called_once_with([ANY, ANY]) 108 | 109 | foo_job = broker.enqueue_jobs.call_args[0][0][0] 110 | assert foo_job in jobs 111 | assert foo_job.task_name == 'foo_task' 112 | assert foo_job.at == now 113 | assert foo_job.task_args == (1, 2) 114 | assert foo_job.task_kwargs == {} 115 | 116 | bar_job = broker.enqueue_jobs.call_args[0][0][1] 117 | assert bar_job in jobs 118 | assert bar_job.task_name == 'bar_task' 119 | assert bar_job.at == now 120 | assert bar_job.task_args == () 121 | assert bar_job.task_kwargs == {'three': True} 122 | 123 | 124 | def test_execute(spin): 125 | func = Mock() 126 | tasks = Tasks() 127 | tasks.add(func, 'foo_task') 128 | spin.attach_tasks(tasks) 129 | 130 | spin.execute('foo_task') 131 | func.assert_called_once_with() 132 | 133 | 134 | def test_start_workers_twice(spin): 135 | with pytest.raises(RuntimeError): 136 | spin.start_workers() 137 | 138 | 139 | def test_start_workers_blocking(): 140 | spin = Engine(MemoryBroker(), namespace='tests') 141 | spin.start_workers(number=1, block=True, stop_when_queue_empty=True) 142 | assert not spin._must_stop.is_set() 143 | -------------------------------------------------------------------------------- /tests/test_worker.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | from unittest.mock import Mock, ANY 3 | import time 4 | import sys 5 | 6 | import pytest 7 | 8 | from spinach import signals 9 | from spinach.worker import ThreadWorkers, AsyncioWorkers 10 | from spinach.job import Job 11 | 12 | 13 | # Spinach does not support AsyncIO on Python 3.6 14 | workers_to_test = [ThreadWorkers] 15 | if sys.version_info >= (3, 7): 16 | workers_to_test.append(AsyncioWorkers) 17 | 18 | 19 | @pytest.fixture(params=workers_to_test) 20 | def workers(request): 21 | workers = request.param(2, 'tests') 22 | yield workers 23 | workers.stop() 24 | 25 | 26 | @pytest.fixture 27 | def job(): 28 | task_func = Mock() 29 | job = Job('foo_task', 'foo_queue', datetime.now(timezone.utc), 10, 30 | task_args=(1, 2), task_kwargs={'foo': 'bar'}) 31 | job.task_func = task_func 32 | 33 | return job, task_func 34 | 35 | 36 | def wait_for_queue_empty(workers: ThreadWorkers, timeout=10): 37 | for _ in range(timeout * 10): 38 | if workers._in_queue.empty(): 39 | return 40 | 41 | time.sleep(0.1) 42 | 43 | raise RuntimeError('Queue did not get empty after {}s'.format(timeout)) 44 | 45 | 46 | def test_job_execution(workers, job): 47 | job, task_func = job 48 | assert workers.can_accept_job() 49 | 50 | workers.submit_job(job) 51 | wait_for_queue_empty(workers) 52 | 53 | # Executed function raised no error 54 | task_func.assert_called_once_with(*job.task_args, **job.task_kwargs) 55 | assert workers.out_queue.get() is job 56 | assert workers.can_accept_job() 57 | 58 | 59 | def test_job_execution_exception(workers, job): 60 | job, task_func = job 61 | 62 | # Executed function raised an error 63 | error = RuntimeError('Error') 64 | task_func.side_effect = error 65 | 66 | workers.submit_job(job) 67 | wait_for_queue_empty(workers) 68 | 69 | task_func.assert_called_once_with(*job.task_args, **job.task_kwargs) 70 | assert workers.out_queue.get() is job 71 | 72 | 73 | def test_submit_job_shutdown_workers(workers, job): 74 | job, task_func = job 75 | workers.stop() 76 | with pytest.raises(RuntimeError): 77 | workers.submit_job(job) 78 | 79 | 80 | def test_start_0_workers(): 81 | with pytest.raises(ValueError): 82 | ThreadWorkers(0, 'tests') 83 | 84 | 85 | @pytest.mark.parametrize('number', [1, 5]) 86 | def test_start_stop_n_workers(number): 87 | workers = ThreadWorkers(number, 'tests') 88 | assert workers._in_queue.maxsize == number 89 | assert len(workers._threads) == number 90 | for thread in workers._threads: 91 | assert 'tests-worker-' in thread.name 92 | 93 | workers.stop() 94 | 95 | 96 | def test_worker_signals(job): 97 | job, task_func = job 98 | 99 | mock_job_started_receiver = Mock(spec={}) 100 | signals.job_started.connect(mock_job_started_receiver) 101 | 102 | mock_job_finished_receiver = Mock(spec={}) 103 | signals.job_finished.connect(mock_job_finished_receiver) 104 | 105 | mock_worker_started_receiver = Mock(spec={}) 106 | signals.worker_started.connect(mock_worker_started_receiver) 107 | 108 | mock_worker_terminated_receiver = Mock(spec={}) 109 | signals.worker_terminated.connect(mock_worker_terminated_receiver) 110 | 111 | ns = 'tests' 112 | workers = ThreadWorkers(1, ns) 113 | workers.submit_job(job) 114 | wait_for_queue_empty(workers) 115 | workers.stop() 116 | 117 | mock_job_started_receiver.assert_called_once_with(ns, job=ANY) 118 | mock_job_finished_receiver.assert_called_once_with(ns, job=ANY) 119 | mock_worker_started_receiver.assert_called_once_with( 120 | ns, worker_name='tests-worker-1' 121 | ) 122 | mock_worker_terminated_receiver.assert_called_once_with( 123 | ns, worker_name='tests-worker-1' 124 | ) 125 | 126 | 127 | def test_can_accept_job(workers, job): 128 | job, _ = job 129 | assert workers.available_slots == 2 130 | 131 | workers.submit_job(job) 132 | workers.submit_job(job) 133 | assert workers.available_slots == 0 134 | 135 | wait_for_queue_empty(workers) 136 | assert workers.available_slots == 2 137 | -------------------------------------------------------------------------------- /tests/test_job.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from datetime import datetime, timedelta, timezone 3 | 4 | import pytest 5 | 6 | from spinach import RetryException, AbortException 7 | from spinach.job import Job, JobStatus, advance_job_status 8 | from spinach.exc import InvalidJobSignatureError 9 | 10 | from .conftest import get_now, set_now 11 | 12 | 13 | @pytest.fixture 14 | def job(patch_now): 15 | job = Job('foo_task', 'foo_queue', get_now(), 5, 16 | task_args=(1, 2), task_kwargs={'foo': 'bar'}) 17 | return job 18 | 19 | 20 | def test_serialization(job): 21 | job.status = JobStatus.QUEUED 22 | job.retries = 2 23 | job_json = job.serialize() 24 | assert Job.deserialize(job_json) == job 25 | 26 | 27 | def test_at_timestamp(job): 28 | assert job.at_timestamp == 1504342257 29 | 30 | 31 | def test_should_start(job): 32 | # Exact moment job should start 33 | assert job.should_start 34 | 35 | # A bit later 36 | set_now(get_now() + timedelta(minutes=1)) 37 | assert job.should_start 38 | 39 | # A bit earlier 40 | set_now(get_now() - timedelta(minutes=2)) 41 | assert not job.should_start 42 | 43 | 44 | def test_should_retry(job): 45 | job.max_retries = 0 46 | job.retries = 0 47 | assert not job.should_retry 48 | 49 | job.max_retries = 10 50 | job.retries = 0 51 | assert job.should_retry 52 | 53 | job.max_retries = float('+inf') 54 | job.retries = 93593956 55 | assert job.should_retry 56 | 57 | job.max_retries = 10 58 | job.retries = 10 59 | assert not job.should_retry 60 | 61 | 62 | def test_repr(job): 63 | assert str(job.id) in repr(job) 64 | assert job.task_name in repr(job) 65 | assert 'NOT_SET' in repr(job) 66 | 67 | 68 | def test_task_func(job): 69 | assert job.task_func is None 70 | job.task_func = print 71 | assert job.task_func is print 72 | 73 | 74 | def test_eq(job): 75 | assert job == job 76 | assert job != print 77 | 78 | job_2 = copy.deepcopy(job) 79 | job_2.task_name = 'bar_task' 80 | assert job != job_2 81 | 82 | 83 | def test_at_timezone_naive(): 84 | now_naive = datetime.utcnow() 85 | job = Job('foo_task', 'foo_queue', now_naive, 5, 86 | task_args=(1, 2), task_kwargs={'foo': 'bar'}) 87 | assert job.at.tzinfo is timezone.utc 88 | 89 | 90 | def test_advance_job_status(job): 91 | now = job.at 92 | job.max_retries = 0 93 | 94 | job.status = JobStatus.RUNNING 95 | advance_job_status('namespace', job, 1.0, None) 96 | assert job.status is JobStatus.SUCCEEDED 97 | 98 | job.status = JobStatus.RUNNING 99 | advance_job_status('namespace', job, 1.0, RuntimeError('Error')) 100 | assert job.status is JobStatus.FAILED 101 | 102 | job.status = JobStatus.RUNNING 103 | job.max_retries = 10 104 | advance_job_status('namespace', job, 1.0, RuntimeError('Error')) 105 | assert job.status is JobStatus.NOT_SET 106 | assert job.at > now 107 | 108 | job.status = JobStatus.RUNNING 109 | job.max_retries = 10 110 | advance_job_status('namespace', job, 1.0, 111 | RetryException('Must retry', at=now)) 112 | assert job.status is JobStatus.NOT_SET 113 | assert job.at == now 114 | 115 | job.status = JobStatus.RUNNING 116 | job.max_retries = 0 117 | advance_job_status('namespace', job, 1.0, 118 | RetryException('Must retry', at=now)) 119 | assert job.status is JobStatus.FAILED 120 | 121 | # The job should have retried twice due to previous tests, ensure that 122 | # an AbortException tops off the retry counter 123 | job.status = JobStatus.RUNNING 124 | job.max_retries = 10 125 | assert job.retries == 2 126 | advance_job_status('namespace', job, 1.0, AbortException('kaboom')) 127 | assert job.retries == 10 128 | assert job.status is JobStatus.FAILED 129 | 130 | 131 | def test_check_signature(job): 132 | def compatible_func(a, b, foo=None): 133 | pass 134 | 135 | def incompatible_func(a, bar=None): 136 | pass 137 | 138 | job.task_func = compatible_func 139 | assert job.check_signature() is None 140 | 141 | job.task_func = incompatible_func 142 | with pytest.raises(InvalidJobSignatureError): 143 | job.check_signature() 144 | -------------------------------------------------------------------------------- /spinach/queuey.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections import deque 3 | from concurrent.futures import Future 4 | import threading 5 | from typing import Tuple, Optional, Any, Deque 6 | 7 | 8 | class Queuey: 9 | """Hybrid queue allowing to interface sync and async(io) code. 10 | 11 | It is widely inspired by a talk by David Beazley on the subject: 12 | https://www.youtube.com/watch?v=x1ndXuw7S0s 13 | 14 | One big difference with a normal queue is that even with a maxsize 15 | set to a fixed number, this queue can still end up taking an 16 | infinite amount of memory since pending get/put operation are kept 17 | as futures. 18 | 19 | It is an alternative to the 3rd-party Janus library which had 20 | shortcomings when used in Spinach: 21 | - Janus queues have to be created in an asyncio coroutine, turning 22 | the creation of the queue in the Workers class into a strange dance. 23 | - It was not obvious to me how to implement showing the queue as full 24 | if there are unfinished tasks. 25 | - It adds a few dependencies only needed by a fractions of users, adds a 26 | ton of code for something that should be simple. 27 | """ 28 | 29 | def __init__(self, maxsize: int): 30 | self.maxsize = maxsize 31 | self._mutex = threading.Lock() 32 | self._items: Deque[Any] = deque() 33 | self._getters: Deque[Future] = deque() 34 | self._putters: Deque[Tuple[Any, Future]] = deque() 35 | self._unfinished_tasks = 0 36 | 37 | def _get_noblock(self) -> Tuple[Optional[Any], Optional[Future]]: 38 | with self._mutex: 39 | if self._items: 40 | if self._putters: 41 | # About to remove one item from the queue which means 42 | # that a new spot will be available. Since there are 43 | # putters waiting, wake up one and take its item. 44 | item, put_fut = self._putters.popleft() 45 | self._items.append(item) 46 | put_fut.set_result(True) 47 | return self._items.popleft(), None 48 | 49 | else: 50 | fut = Future() 51 | self._getters.append(fut) 52 | return None, fut 53 | 54 | def _put_noblock(self, item: Any) -> Optional[Future]: 55 | with self._mutex: 56 | if len(self._items) < self.maxsize: 57 | self._items.append(item) 58 | self._unfinished_tasks += 1 59 | if self._getters: 60 | self._getters.popleft().set_result(self._items.popleft()) 61 | else: 62 | fut = Future() 63 | self._putters.append((item, fut)) 64 | return fut 65 | 66 | def get_sync(self) -> Any: 67 | item, fut = self._get_noblock() 68 | if fut: 69 | item = fut.result() 70 | return item 71 | 72 | def put_sync(self, item: Any) -> None: 73 | fut = self._put_noblock(item) 74 | if fut is None: 75 | return 76 | 77 | fut.result() 78 | 79 | async def get_async(self) -> Any: 80 | item, fut = self._get_noblock() 81 | if fut: 82 | item = await asyncio.wait_for(asyncio.wrap_future(fut), None) 83 | return item 84 | 85 | async def put_async(self, item: Any) -> None: 86 | fut = self._put_noblock(item) 87 | if fut is None: 88 | return 89 | 90 | await asyncio.wait_for(asyncio.wrap_future(fut), None) 91 | 92 | def task_done(self) -> None: 93 | """Indicate that a formerly enqueued task is complete. 94 | 95 | Raises a ValueError if called more times than there were items 96 | placed in the queue. 97 | """ 98 | with self._mutex: 99 | unfinished = self._unfinished_tasks - 1 100 | if unfinished < 0: 101 | raise ValueError('task_done() called too many times') 102 | 103 | self._unfinished_tasks = unfinished 104 | 105 | def empty(self) -> bool: 106 | with self._mutex: 107 | return self._unfinished_tasks == 0 108 | 109 | def full(self) -> bool: 110 | with self._mutex: 111 | return self.maxsize <= self._unfinished_tasks 112 | 113 | def available_slots(self) -> int: 114 | with self._mutex: 115 | return self.maxsize - self._unfinished_tasks 116 | -------------------------------------------------------------------------------- /tests/test_brokers.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta, timezone 2 | from unittest.mock import patch 3 | import uuid 4 | 5 | import pytest 6 | 7 | from spinach import const 8 | from spinach.brokers.memory import MemoryBroker 9 | from spinach.brokers.redis import RedisBroker 10 | from spinach.job import Job, JobStatus 11 | from spinach.task import Task 12 | from .conftest import get_now, set_now 13 | 14 | 15 | @pytest.fixture(params=[MemoryBroker, RedisBroker]) 16 | def broker(request): 17 | broker = request.param() 18 | broker.namespace = 'tests' 19 | broker.must_stop_periodicity = 0.01 20 | broker.flush() 21 | broker.start() 22 | yield broker 23 | broker.stop() 24 | broker.flush() 25 | 26 | 27 | def test_normal_job(broker): 28 | job = Job('foo_task', 'foo_queue', datetime.now(timezone.utc), 0, 29 | task_args=(1, 2), task_kwargs={'foo': 'bar'}) 30 | broker.enqueue_jobs([job]) 31 | assert job.status == JobStatus.QUEUED 32 | 33 | job.status = JobStatus.RUNNING 34 | assert broker.get_jobs_from_queue('foo_queue', 5) == [job] 35 | assert broker.get_jobs_from_queue('foo_queue', 1) == [] 36 | 37 | 38 | def test_future_job(broker, patch_now): 39 | assert broker.next_future_job_delta is None 40 | assert broker.move_future_jobs() == 0 41 | 42 | job = Job('foo_task', 'foo_queue', get_now() + timedelta(minutes=10), 0, 43 | task_args=(1, 2), task_kwargs={'foo': 'bar'}) 44 | 45 | broker.enqueue_jobs([job]) 46 | assert job.status == JobStatus.WAITING 47 | assert broker.get_jobs_from_queue('foo_queue', 5) == [] 48 | assert broker.next_future_job_delta == 600 49 | assert broker.move_future_jobs() == 0 50 | 51 | set_now(datetime(2017, 9, 2, 9, 00, 56, 482169)) 52 | assert broker.next_future_job_delta == 0 53 | assert broker.move_future_jobs() == 1 54 | 55 | job.status = JobStatus.RUNNING 56 | assert broker.get_jobs_from_queue('foo_queue', 5) == [job] 57 | 58 | 59 | def test_wait_for_events_no_future_job(broker): 60 | with patch.object(broker, '_something_happened') as mock_sh: 61 | mock_sh.wait.return_value = False 62 | broker.wait_for_event() 63 | mock_sh.wait.assert_called_once_with( 64 | timeout=const.WAIT_FOR_EVENT_MAX_SECONDS 65 | ) 66 | mock_sh.clear.assert_not_called() 67 | 68 | mock_sh.wait.return_value = True 69 | broker.wait_for_event() 70 | mock_sh.clear.assert_called_once_with() 71 | 72 | 73 | @pytest.mark.parametrize('delta,timeout', [ 74 | (timedelta(weeks=10), const.WAIT_FOR_EVENT_MAX_SECONDS), 75 | (timedelta(seconds=5), 5) 76 | ]) 77 | def test_wait_for_events_with_future_job(broker, patch_now, delta, timeout): 78 | broker.enqueue_jobs( 79 | [Job('foo_task', 'foo_queue', get_now() + delta, 0)] 80 | ) 81 | with patch.object(broker, '_something_happened') as mock_sh: 82 | broker.wait_for_event() 83 | mock_sh.wait.assert_called_once_with(timeout=timeout) 84 | 85 | 86 | def test_flush(broker): 87 | broker.enqueue_jobs([ 88 | Job('t1', 'q1', get_now(), 0), 89 | Job('t2', 'q2', get_now() + timedelta(seconds=10), 0) 90 | ]) 91 | broker.flush() 92 | assert broker.get_jobs_from_queue('q1', 1) == [] 93 | assert broker.next_future_job_delta is None 94 | 95 | 96 | def test_enqueue_jobs_from_dead_broker(broker): 97 | # Marking a broker that doesn't exist as dead 98 | broker_id = uuid.UUID('62664577-cf89-4f6a-ab16-4e20ec8fe4c2') 99 | assert broker.enqueue_jobs_from_dead_broker(broker_id) == (0, []) 100 | 101 | 102 | def test_get_broker_info(broker): 103 | info = broker._get_broker_info() 104 | assert 'id' in info 105 | assert 'name' in info 106 | assert 'started_at' in info 107 | assert 'last_seen_at' in info 108 | assert info['namespace'] == broker.namespace 109 | 110 | 111 | def test_get_all_brokers(broker): 112 | broker.move_future_jobs() # Manually trigger registration 113 | all_brokers = broker.get_all_brokers() 114 | assert len(all_brokers) == 1 115 | assert isinstance(all_brokers, list) 116 | assert isinstance(all_brokers[0], dict) 117 | 118 | 119 | def test_repr(broker): 120 | assert broker.__class__.__name__ in repr(broker) 121 | assert str(broker._id) in repr(broker) 122 | 123 | 124 | def test_no_periodic_tasks(broker): 125 | broker.register_periodic_tasks([]) 126 | assert broker.inspect_periodic_tasks() == [] 127 | 128 | 129 | def test_periodic_tasks(broker): 130 | tasks = [ 131 | Task(print, 'foo', 'q1', 0, timedelta(seconds=5)), 132 | Task(print, 'bar', 'q1', 0, timedelta(seconds=10)) 133 | ] 134 | broker.register_periodic_tasks(tasks) 135 | 136 | r = broker.inspect_periodic_tasks() 137 | assert r[0][1] == 'foo' 138 | assert r[1][1] == 'bar' 139 | assert r[0][0] == r[1][0] - 5 140 | -------------------------------------------------------------------------------- /spinach/contrib/flask_spinach.py: -------------------------------------------------------------------------------- 1 | import click 2 | import flask 3 | 4 | import spinach 5 | from spinach import signals 6 | from spinach.const import DEFAULT_WORKER_NUMBER, DEFAULT_QUEUE 7 | 8 | 9 | class Spinach: 10 | def __init__(self, app=None): 11 | self.app = app 12 | if app is not None: 13 | self.init_app(app) 14 | 15 | def init_app(self, app): 16 | app.config.setdefault('SPINACH_BROKER', spinach.RedisBroker()) 17 | app.config.setdefault('SPINACH_NAMESPACE', app.name) 18 | 19 | app.extensions['spinach'] = spinach.Engine( 20 | broker=app.config['SPINACH_BROKER'], 21 | namespace=app.config['SPINACH_NAMESPACE'] 22 | ) 23 | namespace = app.extensions['spinach'].namespace 24 | 25 | @app.cli.command(name='spinach', help='Run Spinach workers') 26 | @click.option('--stop-when-queue-empty', is_flag=True, default=False, 27 | help='Stop workers once the queue is empty') 28 | @click.option('--queue', default=DEFAULT_QUEUE, 29 | help='Queue to consume') 30 | @click.option('--threads', default=DEFAULT_WORKER_NUMBER, 31 | help='Number of worker threads to launch') 32 | def spinach_run_workers(threads, queue, stop_when_queue_empty): 33 | self.spin.start_workers( 34 | number=threads, 35 | queue=queue, 36 | stop_when_queue_empty=stop_when_queue_empty 37 | ) 38 | 39 | @signals.job_started.connect_via(namespace) 40 | def job_started(*args, job=None, **kwargs): 41 | if not flask.has_app_context(): 42 | ctx = app.app_context() 43 | ctx.push() 44 | flask.g.spinach_ctx = ctx 45 | self.job_started(job) 46 | 47 | @signals.job_finished.connect_via(namespace) 48 | def job_finished(*args, job=None, **kwargs): 49 | self.job_finished(job) 50 | try: 51 | flask.g.spinach_ctx.pop() 52 | except AttributeError: 53 | # This means we didn't create the context. Ignore. 54 | pass 55 | 56 | @signals.job_failed.connect_via(namespace) 57 | def job_failed(args, job=None, err=None, **kwargs): 58 | if not flask.has_app_context(): 59 | ctx = app.app_context() 60 | ctx.push() 61 | flask.g.spinach_ctx = ctx 62 | self.job_failed(job, err) 63 | 64 | @classmethod 65 | def job_started(cls, *args, job=None, **kwargs): 66 | """Callback for subclasses to receive job_started signals. 67 | 68 | There's no guarantee of ordering for Signal's callbacks, 69 | so use this callback instead to make sure the app context 70 | was pushed. 71 | """ 72 | pass 73 | 74 | @classmethod 75 | def job_finished(cls, *args, job=None, **kwargs): 76 | """Callback for subclasses to receive job_finished signals. 77 | 78 | There's no guarantee of ordering for Signal's callbacks, 79 | so use this callback instead to make sure the app context 80 | was pushed. 81 | """ 82 | pass 83 | 84 | @classmethod 85 | def job_failed(cls, *args, job=None, err=None, **kwargs): 86 | """Callback for subclasses to receive job_failed signals. 87 | 88 | There's no guarantee of ordering for Signal's callbacks, 89 | so use this callback instead to make sure the app context 90 | was pushed. If the signal is called as part of dead broker 91 | detection, you will need to use this as normal signals may 92 | not have been called with the app context pushed. 93 | """ 94 | pass 95 | 96 | @property 97 | def spin(self): 98 | if self.app is not None: 99 | return self.app.extensions['spinach'] 100 | 101 | try: 102 | return flask.current_app.extensions['spinach'] 103 | except (AttributeError, TypeError, KeyError): 104 | raise RuntimeError('Spinach extension not initialized. ' 105 | 'Did you forget to call init_app?') 106 | 107 | def register_tasks(self, app, tasks): 108 | try: 109 | app.extensions['spinach'].attach_tasks(tasks) 110 | except KeyError: 111 | raise RuntimeError('Spinach extension not initialized. ' 112 | 'Did you forget to call init_app?') 113 | 114 | # Convenience access to common Engine attributes and methods 115 | 116 | @property 117 | def task(self): 118 | return self.spin.task 119 | 120 | @property 121 | def execute(self): 122 | return self.spin.execute 123 | 124 | @property 125 | def schedule(self): 126 | return self.spin.schedule 127 | 128 | @property 129 | def schedule_at(self): 130 | return self.spin.schedule_at 131 | 132 | @property 133 | def schedule_batch(self): 134 | return self.spin.schedule_batch 135 | -------------------------------------------------------------------------------- /doc/user/tasks.rst: -------------------------------------------------------------------------------- 1 | .. _tasks: 2 | 3 | Tasks 4 | ===== 5 | 6 | A tasks is a unit of code, usually a function, to be executed in the background on remote workers. 7 | 8 | To define a task:: 9 | 10 | from spinach import Tasks 11 | 12 | tasks = Tasks() 13 | 14 | @tasks.task(name='add') 15 | def add(a, b): 16 | print('Computed {} + {} = {}'.format(a, b, a + b)) 17 | 18 | .. note:: The `args` and `kwargs` of a task must be JSON serializable. 19 | 20 | Retries 21 | ------- 22 | 23 | Spinach knows two kinds of tasks: the ones that can be retried safely (idempotent tasks) and the 24 | ones that cannot be retried safely (non-idempotent tasks). Since Spinach cannot guess if a task 25 | code is safe to be retried multiple times, it must be annotated when the task is created. 26 | 27 | .. note:: Whether a task is retryable or not affects the behavior of jobs in case of normal errors 28 | during their execution but also when a worker catastrophically dies (power outage, OOM 29 | killed...). 30 | 31 | Non-Retryable Tasks 32 | ~~~~~~~~~~~~~~~~~~~ 33 | 34 | Spinach assumes that by default tasks are not safe to be retried (tasks are assumed to have side 35 | effects). 36 | 37 | These tasks are defined with `max_retries=0` (the default):: 38 | 39 | @tasks.task(name='foo') 40 | def foo(a, b): 41 | pass 42 | 43 | - use at-most-once delivery 44 | - it is guarantied that the job will not run multiple times 45 | - it is guarantied that the job will not run simultaneously in multiple workers 46 | - the job is not automatically retried in case of errors 47 | - the job may never even start in very rare conditions 48 | 49 | Retryable Tasks 50 | ~~~~~~~~~~~~~~~ 51 | 52 | Idempotent tasks can be executed multiple times without changing the result beyond the initial 53 | execution. It is a nice property to have and most tasks should try to be idempotent to gracefully 54 | recover from errors. 55 | 56 | Retryable tasks are defined with a positive `max_retries` value:: 57 | 58 | @tasks.task(name='foo', max_retries=10) 59 | def foo(a, b): 60 | pass 61 | 62 | - use at-least-once delivery 63 | - the job is automatically retried, up to `max_retries` times, in case of errors 64 | - the job may be executed more than once 65 | - the job may be executed simultaneously in multiple workers in very rare 66 | conditions 67 | 68 | When a worker catastrophically dies it will be detected dead after 30 minutes of inactivity and the 69 | retryable jobs that were running will be rescheduled automatically. 70 | 71 | Retrying 72 | ~~~~~~~~ 73 | 74 | When a retryable task is being executed it will be retried when it encounters an unexpected 75 | exception:: 76 | 77 | @tasks.task(name='foo', max_retries=10) 78 | def foo(a, b): 79 | l = [0, 1, 2] 80 | print(l[100]) # Raises IndexError 81 | 82 | To allow the system to recover gracefully, a default backoff strategy is applied. 83 | 84 | .. autofunction:: spinach.utils.exponential_backoff 85 | 86 | To be more explicit, a task can also raise a :class:`RetryException` which allows to precisely 87 | control when it should be retried:: 88 | 89 | from spinach import RetryException 90 | 91 | @tasks.task(name='foo', max_retries=10) 92 | def foo(a, b): 93 | if status_code == 429: 94 | raise RetryException( 95 | 'Should retry in 10 minutes', 96 | at=datetime.now(tz=timezone.utc) + timedelta(minutes=10) 97 | ) 98 | 99 | 100 | .. autoclass:: spinach.task.RetryException 101 | 102 | A task can also raise a :class:`AbortException` for short-circuit behavior: 103 | 104 | .. autoclass:: spinach.task.AbortException 105 | 106 | Limiting task concurrency 107 | ------------------------- 108 | 109 | If a task is idempotent it may also have a limit on the number of concurrent jobs spawned across 110 | all workers. These types of tasks are defined with a positive `max_concurrency` value:: 111 | 112 | @tasks.task(name='foo', max_retries=10, max_concurrency=1) 113 | def foo(a, b): 114 | pass 115 | 116 | With this definition, no more than one instance of the Task will ever be spawned as a running Job, 117 | no matter how many are queued and waiting to run. 118 | 119 | 120 | Periodic tasks 121 | -------------- 122 | 123 | Tasks marked as periodic get automatically scheduled. To run a task every 5 seconds: 124 | 125 | .. literalinclude:: ../../examples/periodic.py 126 | 127 | Periodic tasks get scheduled by the workers themselves, there is no need to run an additional 128 | process only for that. Of course having multiple workers on multiple machine is fine and will not 129 | result in duplicated tasks. 130 | 131 | Periodic tasks run at most every `period`. If the system scheduling periodic tasks gets delayed, 132 | nothing compensates for the time lost. This has the added benefit of periodic tasks not being 133 | scheduled if all the workers are down for a prolonged amount of time. When they get back online, 134 | workers won't have a storm of periodic tasks to execute. 135 | 136 | Tasks Registry 137 | -------------- 138 | 139 | Before being attached to a Spinach :class:`Engine`, tasks are created inside a :class:`Tasks` 140 | registry. 141 | 142 | Attaching tasks to a :class:`Tasks` registry instead of directly to the :class:`Engine` allows to 143 | compose large applications in smaller units independent from each other, the same way a Django 144 | project is composed of many small Django apps. 145 | 146 | This may seem cumbersome for trivial applications, like the examples in this documentation or some 147 | single-module projects, so those can create tasks directly on the :class:`Engine` using:: 148 | 149 | 150 | spin = Engine(MemoryBroker()) 151 | 152 | @spin.task(name='fast') 153 | def fast(): 154 | time.sleep(1) 155 | 156 | .. note:: Creating tasks directly in the :class:`Engine` is a bit like creating a Flask app 157 | globally instead of using an `app factory`: it works until a change introduces a circular 158 | import. Its usage should really be limited to tiny projects. 159 | 160 | .. autoclass:: spinach.task.Tasks 161 | :members: 162 | 163 | .. _batch: 164 | 165 | Batch 166 | ----- 167 | 168 | .. autoclass:: spinach.task.Batch 169 | :members: 170 | -------------------------------------------------------------------------------- /spinach/brokers/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from datetime import datetime, timezone 3 | from logging import getLogger 4 | import platform 5 | import threading 6 | import time 7 | from typing import Optional, Iterable, List, Tuple, Dict, Union 8 | import uuid 9 | 10 | from ..job import Job 11 | from ..task import Task 12 | from ..const import WAIT_FOR_EVENT_MAX_SECONDS 13 | 14 | logger = getLogger('spinach.broker') 15 | 16 | 17 | class Broker(ABC): 18 | 19 | def __init__(self): 20 | # Event that is set whenever: 21 | # - a job is enqueued in the main queue (to allow to fetch it) 22 | # - a job has been finished (to allow to fetch a new one) 23 | # - a future job is put in the waiting queue (to move it) 24 | # - the broker is stopping 25 | # It allows the Engine to wait for these things. 26 | self._something_happened = threading.Event() 27 | 28 | self._namespace = None 29 | self._id = uuid.uuid4() 30 | self._broker_info = { 31 | 'id': str(self._id), 32 | 'name': platform.node(), 33 | 'started_at': int(time.time()) 34 | } 35 | 36 | def wait_for_event(self): 37 | next_future_job_delta = self.next_future_job_delta 38 | if next_future_job_delta is None: 39 | next_future_job_delta = WAIT_FOR_EVENT_MAX_SECONDS 40 | 41 | next_future_periodic_delta = self.next_future_periodic_delta 42 | if next_future_periodic_delta is None: 43 | next_future_periodic_delta = WAIT_FOR_EVENT_MAX_SECONDS 44 | 45 | timeout = min( 46 | next_future_job_delta, 47 | next_future_periodic_delta, 48 | WAIT_FOR_EVENT_MAX_SECONDS 49 | ) 50 | if self._something_happened.wait(timeout=timeout): 51 | self._something_happened.clear() 52 | 53 | def start(self): 54 | """Start the broker. 55 | 56 | Only needed by arbiter. 57 | """ 58 | 59 | def stop(self): 60 | """Stop the broker. 61 | 62 | Only needed by arbiter. 63 | """ 64 | self._something_happened.set() 65 | 66 | @property 67 | def namespace(self) -> str: 68 | if not self._namespace: 69 | raise RuntimeError('Namespace must be set before using the broker') 70 | 71 | return self._namespace 72 | 73 | @namespace.setter 74 | def namespace(self, value: str): 75 | if self._namespace: 76 | raise RuntimeError('The namespace can only be set once') 77 | 78 | self._namespace = value 79 | self._broker_info['namespace'] = value 80 | 81 | def _to_namespaced(self, value: str) -> str: 82 | return '{}/{}'.format(self.namespace, value) 83 | 84 | @abstractmethod 85 | def register_periodic_tasks(self, tasks: Iterable[Task]): 86 | """Register tasks that need to be scheduled periodically.""" 87 | 88 | @abstractmethod 89 | def set_concurrency_keys(self, tasks: Iterable[Task]): 90 | """Register concurrency data for Tasks. 91 | 92 | Set up anything in the Broker that is required to track 93 | concurrency on Tasks, where a Task defines max_concurrency. 94 | """ 95 | 96 | @abstractmethod 97 | def is_queue_empty(self, queue: str) -> bool: 98 | """Return True if the provided queue is empty.""" 99 | 100 | @abstractmethod 101 | def inspect_periodic_tasks(self) -> List[Tuple[int, str]]: 102 | """Get the next periodic task schedule. 103 | 104 | Used only for debugging and during tests. 105 | """ 106 | 107 | @abstractmethod 108 | def enqueue_jobs(self, jobs: Iterable[Job], from_failure: bool): 109 | """Enqueue a batch of jobs.""" 110 | 111 | @abstractmethod 112 | def remove_job_from_running(self, job: Job): 113 | """Remove a job from the list of running ones.""" 114 | 115 | @abstractmethod 116 | def get_jobs_from_queue(self, queue: str, max_jobs: int) -> List[Job]: 117 | """Get jobs from a queue.""" 118 | 119 | @abstractmethod 120 | def move_future_jobs(self) -> int: 121 | """Perform periodic management of the broker and the queues. 122 | 123 | This method originally only moved future jobs, but it expanded to 124 | perform other actions related to maintenance of brokers' data: 125 | - Moves ready jobs from the future queue to their normal queues 126 | - Enqueue periodic tasks that are due 127 | - Perform broker keepalive 128 | 129 | Note: This method may be called very often. In the future it would be 130 | preferable to decouple it from the retrieval of jobs from the queue. 131 | 132 | :returns the number of jobs moved 133 | """ 134 | 135 | @abstractmethod 136 | def _get_next_future_job(self) -> Optional[Job]: 137 | """Get the next future job.""" 138 | 139 | @property 140 | def next_future_job_delta(self) -> Optional[float]: 141 | """Give the amount of seconds before the next future job is due.""" 142 | job = self._get_next_future_job() 143 | if not job: 144 | return None 145 | return (job.at - datetime.now(timezone.utc)).total_seconds() 146 | 147 | @property 148 | @abstractmethod 149 | def next_future_periodic_delta(self) -> Optional[float]: 150 | """Give the amount of seconds before the next periodic task is due.""" 151 | 152 | @abstractmethod 153 | def flush(self): 154 | """Delete everything in the namespace.""" 155 | 156 | @abstractmethod 157 | def get_all_brokers(self) -> List[Dict[str, Union[None, str, int]]]: 158 | """Return all registered brokers.""" 159 | 160 | @abstractmethod 161 | def enqueue_jobs_from_dead_broker( 162 | self, dead_broker_id: uuid.UUID 163 | ) -> Tuple[int, list]: 164 | """Re-enqueue the jobs that were running on a broker. 165 | 166 | Only jobs that can be retired are moved back to the queue, the others 167 | are lost as expected. 168 | 169 | Both the current broker and the dead one must use the same namespace. 170 | 171 | This method is called automatically on brokers that are identified 172 | as dead by Spinach but it can also be used by user's code. 173 | If someone has a better system to detect dead processes (monitoring, 174 | Consul, etcd...) this method can be called with the ID of the dead 175 | broker to re-enqueue jobs before Spinach notices that the broker is 176 | actually dead, which takes 30 minutes by default. 177 | 178 | :param dead_broker_id: UUID of the dead broker. 179 | :return: Number of jobs that were moved back to the queue. 180 | """ 181 | 182 | def _get_broker_info(self) -> Dict[str, Union[None, str, int]]: 183 | rv = self._broker_info.copy() 184 | rv['last_seen_at'] = int(time.time()) 185 | return rv 186 | 187 | def __repr__(self): 188 | return '<{}: {}>'.format(self.__class__.__name__, self._id) 189 | -------------------------------------------------------------------------------- /doc/user/design.rst: -------------------------------------------------------------------------------- 1 | .. _design: 2 | 3 | Design choices 4 | ============== 5 | 6 | I have used the Celery task queue for a long time and while it is a rock solid piece of software, 7 | there are some design decisions that just drive me crazy. 8 | 9 | This page presents and explains the key design decisions behind Spinach. It can be summed up as: 10 | explicit is better than implicit. Spinach makes sure that it does not provide any convenient 11 | feature that can backfire in more complex usages. 12 | 13 | Threaded & asynchronous workers 14 | ------------------------------- 15 | 16 | Spinach workers are either threaded or asynchronous while other task queues like Celery or RQ rely 17 | on processes by default. 18 | 19 | Threaded and asynchronous workers work best with IO bound tasks: tasks that make requests to other 20 | services, query a database or read files. If your tasks are CPU bound, meaning that you do heavy 21 | computations in Python, a process based worker will be more efficient. 22 | 23 | Tasks in a typical web application are more often than not IO bound. The choice of threads or 24 | coroutines as unit of concurrency is a sensible one. 25 | 26 | Threads and coroutines also have the advantage of being lighter than processes, a system can handle 27 | more threads than processes before resources get exhausted. 28 | 29 | Fork 30 | ~~~~ 31 | 32 | Another reason why Spinach does not use processes for concurrency is because the ``fork`` system 33 | call used to create the workers is a very special one. It has Copy-On-Write semantics that are 34 | unfamiliar to many Python developers. 35 | 36 | On the other hand thread-safety is a more understood problem in Python, the standard library 37 | providing most of the solutions to write thread-safe programs. 38 | 39 | Not relying on ``fork`` also makes Spinach compatible with Windows. 40 | 41 | Embeddable workers 42 | ------------------ 43 | 44 | As workers are just threads they are easily embeddable in any other Python process. This opens the 45 | door to two nice usages: 46 | 47 | During automated tests a worker can be launched processing jobs exactly like a normal worker would 48 | do in production. What is more by using an in-memory broker there is no need for having a Redis 49 | server running during tests. 50 | 51 | For small web projects, the task workers can be launched from the same process as the web 52 | application. As the application gets bigger the workers can be moved to a separate process very 53 | easily. 54 | 55 | Logging 56 | ------- 57 | 58 | One issue I have with Celery is the way it handles logging: the framework tries to be too smart, 59 | resulting in great pain when the logging setup gets more complex. 60 | 61 | That is why Spinach keeps it simple: as a well behaved library it uses the standard `logging module 62 | `_ and writes logs in its own loggers. 63 | 64 | The choice of what to do with theses log records is up to the final user. 65 | 66 | Jobs scheduled for the future 67 | ----------------------------- 68 | 69 | Spinach has full support for jobs that need to be executed in the future. These jobs go to 70 | a special queue until the are ready to be launched. At that time they are moved to a normal queue 71 | where they are picked by a worker. 72 | 73 | Celery emulates this behavior by immediately sending the task to a worker and waiting there until 74 | the time has come to execute it. It means tasks cannot be scheduled much in advance without wasting 75 | resources in the worker. 76 | 77 | Periodic jobs 78 | ------------- 79 | 80 | One annoying thing with Celery is that you can launch as many distributed workers as you want but 81 | there must be one and only one Celery beat process running in the cluster at a time. 82 | 83 | This approach does not work well with containerized applications that run in a cluster that often 84 | redeploys and move containers around. 85 | 86 | All Spinach workers are part of the system that schedules periodic jobs, there is no need to have 87 | a pet in the cattle farm. 88 | 89 | Only two brokers 90 | ---------------- 91 | 92 | Spinach lets the user pick between the in-memory broker for local development and the Redis broker 93 | for production. Both support exactly the same set of features. 94 | 95 | Redis was chosen because it is an incredibly versatile database. With Lua scripting it becomes 96 | possible to develop entirely new patterns which are essential to create a useful and reliable task 97 | queue. 98 | 99 | Other services like Google PubSub, Amazon SQS or AMQP are very opinionated and not as versatile as 100 | Redis, making them difficult to use within Spinach without cutting down on features. 101 | 102 | Namespace 103 | --------- 104 | 105 | Multiple Spinach applications (production, staging...) can use the same Redis database without 106 | interfering with each other. 107 | 108 | Likewise, a single interpreter can run multiple Spinach applications without them interfering with 109 | each other. 110 | 111 | Minimize import side-effects 112 | ---------------------------- 113 | 114 | Spinach encourages users to write applications that have minimal side-effects when imported. There 115 | is no global state that gets created or modified when importing or using Spinach. 116 | 117 | The user is free to use Spinach in a scoped fashion or declaring everything globally. 118 | 119 | This makes it possible for a single interpreter to run multiple Spinach applications without them 120 | interfering with each other, which is particularly useful for running automated tests. 121 | 122 | No worker entrypoint 123 | -------------------- 124 | 125 | Celery has this ``celery worker`` entrypoint that can be launched from the command line to load an 126 | application and spawn the workers. 127 | 128 | The problem I often face is that I never know if a setting should be defined in my code as part of 129 | the app setup or as a flag of this command line. 130 | 131 | Moreover command line flags and application settings often have slightly different names, making 132 | things more confusing. 133 | 134 | Spinach thus makes it foolproof, you are responsible for configuring the Spinach app though your 135 | Python code. You can read settings from environment variables, from a file or anything else 136 | possible in Python. 137 | 138 | It is then easy to use it to create your own entrypoint to launch the workers. 139 | 140 | Schedule tasks in batch 141 | ----------------------- 142 | 143 | A pattern that is used frequently with task queues is to periodically scan all entities and 144 | schedule an individual task for each entity that needs further work. For instance closing user 145 | accounts of member who haven't logged in in a year. 146 | 147 | With Celery this results in having to do as many round-trips to the broker as there are tasks to 148 | schedule. There are some workarounds but they just move the problem elsewhere. 149 | 150 | Spinach supports sending tasks to the broker in batch to avoid this overhead. 151 | 152 | Written for the Cloud 153 | --------------------- 154 | 155 | Latency between workers and Redis can be high, for example when they are deployed in two separate 156 | regions. Spinach leverages Lua scripting in Redis to avoid unnecessary round-trips by batching 157 | calls as much as possible. 158 | 159 | In a cloud environment network connections can get dropped and packets get lost. Spinach retries 160 | failed actions after applying an exponential backoff with randomized jitter to avoid the thundering 161 | herd problem when the network gets back to normal. 162 | 163 | Workers are expected to be deployed in containers, probably managed by an orchestrator like 164 | Kubernetes or Nomad that often scale and shuffle containers around. Workers can join and leave the 165 | cluster at any time without impacting the ability to process jobs. 166 | 167 | Because worker processes can die unexpectedly (power loss, OOM killed, extended network outage...), 168 | Spinach tries to detect dead workers and reschedule the jobs that were running on them if the jobs 169 | are safe to be retried. 170 | -------------------------------------------------------------------------------- /spinach/worker.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | import asyncio 3 | from logging import getLogger 4 | try: 5 | from queue import SimpleQueue 6 | except ImportError: 7 | # For Python 3.6 8 | from queue import Queue as SimpleQueue 9 | import sys 10 | import threading 11 | import time 12 | from typing import List 13 | 14 | from . import signals 15 | from .job import Job, advance_job_status 16 | from .queuey import Queuey 17 | 18 | logger = getLogger(__name__) 19 | 20 | 21 | class BaseWorkers(ABC): 22 | """Base class for Spinach workers. 23 | 24 | The Workers class receives jobs from the Engine via the `submit_job` 25 | method and sends back results to the Engine via the `out_queue`. 26 | 27 | Children classes are responsible for taking jobs from the in_queue, 28 | executing them and putting results in the out_queue. 29 | """ 30 | 31 | def __init__(self, num_workers: int, namespace: str): 32 | if num_workers <= 0: 33 | raise ValueError('num_workers must be at least 1') 34 | 35 | self._num_workers = num_workers 36 | self._namespace = namespace.format(namespace) 37 | 38 | # List containing worker threads 39 | self._threads: List[threading.Thread] = list() 40 | 41 | # in_queue receives Job objects to execute 42 | # out_queue send Job objects after execution 43 | self._in_queue = Queuey(maxsize=self._num_workers) 44 | self.out_queue = SimpleQueue() 45 | 46 | # The event exists only to stop accepting jobs, workers are terminated 47 | # via the poison pill 48 | self._must_stop = threading.Event() 49 | self.poison_pill = object() 50 | 51 | def submit_job(self, job: Job): 52 | if self._must_stop.is_set(): 53 | raise RuntimeError('Cannot submit job: workers are shutting down') 54 | 55 | self._in_queue.put_sync(job) 56 | 57 | @property 58 | def available_slots(self) -> int: 59 | """Number of jobs the :class:`BaseWorkers` can accept. 60 | 61 | It may be racy, but it should not be a problem here as jobs are 62 | only submitted by a single thread (the arbiter). 63 | """ 64 | return self._in_queue.available_slots() 65 | 66 | def can_accept_job(self) -> bool: 67 | return self.available_slots > 0 68 | 69 | def stop(self): 70 | if self._must_stop.is_set(): 71 | logger.warning('Workers are already shutting down') 72 | return 73 | 74 | logger.info('Stopping workers %s', self._namespace) 75 | self._must_stop.set() 76 | self._in_queue.put_sync(self.poison_pill) 77 | for thread in self._threads: 78 | thread.join() 79 | self.out_queue.put(self.poison_pill) 80 | logger.debug('All workers %s stopped', self._namespace) 81 | 82 | 83 | class ThreadWorkers(BaseWorkers): 84 | """Thread pool based workers. 85 | 86 | Launches a pool of `num_workers` threads, each executing a single job 87 | at once. 88 | """ 89 | 90 | def __init__(self, num_workers: int, namespace: str): 91 | super().__init__(num_workers, namespace) 92 | for i in range(1, self._num_workers + 1): 93 | thread = threading.Thread( 94 | target=self._worker_func, 95 | name='{}-worker-{}'.format(self._namespace, i) 96 | ) 97 | thread.start() 98 | self._threads.append(thread) 99 | 100 | def _worker_func(self): 101 | worker_name = threading.current_thread().name 102 | logger.debug('Worker %s started', worker_name) 103 | signals.worker_started.send(self._namespace, worker_name=worker_name) 104 | 105 | while True: 106 | item = self._in_queue.get_sync() 107 | 108 | if item is self.poison_pill: 109 | self._in_queue.task_done() 110 | self._in_queue.put_sync(self.poison_pill) 111 | break 112 | 113 | job = item 114 | logger.info('Starting execution of %s', job) 115 | signals.job_started.send(self._namespace, job=job) 116 | start_time = time.monotonic() 117 | try: 118 | job.task_func(*job.task_args, **job.task_kwargs) 119 | except Exception as e: 120 | duration = time.monotonic() - start_time 121 | advance_job_status(self._namespace, job, duration, e) 122 | else: 123 | duration = time.monotonic() - start_time 124 | advance_job_status(self._namespace, job, duration, None) 125 | finally: 126 | signals.job_finished.send(self._namespace, job=job) 127 | self.out_queue.put(job) 128 | self._in_queue.task_done() 129 | 130 | logger.debug('Worker %s terminated', worker_name) 131 | signals.worker_terminated.send(self._namespace, 132 | worker_name=worker_name) 133 | 134 | 135 | class AsyncioWorkers(BaseWorkers): 136 | """Asyncio based workers. 137 | 138 | Launches a single thread that runs `num_workers` asyncio coroutines at 139 | once. The sync part of Spinach (Engine, Broker...) interfaces with 140 | the asyncio loop in the worker thread via Janus queues that can be used 141 | both from sync and async code. 142 | """ 143 | 144 | def __init__(self, num_workers: int, namespace: str): 145 | # Python 3.6 misses a few asyncio features that make it a pain to 146 | # support. Projects using asyncio are most likely already using the 147 | # latest version of Python. 148 | if sys.version_info < (3, 7): 149 | raise Exception("Spinach asyncio workers require Python 3.7+") 150 | 151 | super().__init__(num_workers, namespace) 152 | self._threads.append(threading.Thread( 153 | target=self._sync_interface_func, 154 | name='{}-asyncio-worker'.format(self._namespace) 155 | )) 156 | self._threads[0].start() 157 | 158 | def _sync_interface_func(self): 159 | worker_name = threading.current_thread().name 160 | logger.debug('Worker %s started', worker_name) 161 | signals.worker_started.send(self._namespace, worker_name=worker_name) 162 | 163 | asyncio.run(self._async_interface_func()) 164 | 165 | logger.debug('Worker %s terminated', worker_name) 166 | signals.worker_terminated.send(self._namespace, 167 | worker_name=worker_name) 168 | 169 | async def _async_interface_func(self): 170 | worker_futures = list() 171 | for _ in range(1, self._num_workers + 1): 172 | worker_futures.append( 173 | asyncio.ensure_future(self._worker_func()) 174 | ) 175 | 176 | await asyncio.gather(*worker_futures) 177 | loop = asyncio.get_running_loop() 178 | await loop.shutdown_default_executor() 179 | 180 | async def _worker_func(self): 181 | while True: 182 | item = await self._in_queue.get_async() 183 | 184 | if item is self.poison_pill: 185 | self._in_queue.task_done() 186 | await self._in_queue.put_async(self.poison_pill) 187 | break 188 | 189 | job = item 190 | logger.info('Starting execution of %s', job) 191 | signals.job_started.send(self._namespace, job=job) 192 | start_time = time.monotonic() 193 | try: 194 | await job.task_func(*job.task_args, **job.task_kwargs) 195 | except Exception as e: 196 | duration = time.monotonic() - start_time 197 | advance_job_status(self._namespace, job, duration, e) 198 | else: 199 | duration = time.monotonic() - start_time 200 | advance_job_status(self._namespace, job, duration, None) 201 | finally: 202 | signals.job_finished.send(self._namespace, job=job) 203 | self.out_queue.put(job) 204 | self._in_queue.task_done() 205 | -------------------------------------------------------------------------------- /doc/user/integrations.rst: -------------------------------------------------------------------------------- 1 | .. _integrations: 2 | 3 | Integrations 4 | ============ 5 | 6 | Integration with third-party libraries and frameworks. 7 | 8 | Logging 9 | ------- 10 | 11 | Spinach uses the standard Python `logging package 12 | `_. Its logger prefix is ``spinach``. Spinach does 13 | nothing else besides creating its loggers and emitting log records. The user is responsible for 14 | configuring logging before starting workers. 15 | 16 | For simple applications it is enough to use:: 17 | 18 | import logging 19 | 20 | logging.basicConfig( 21 | format='%(asctime)s - %(threadName)s %(levelname)s: %(message)s', 22 | level=logging.DEBUG 23 | ) 24 | 25 | More complex applications will probably use `dictConfig 26 | `_. 27 | 28 | Flask 29 | ----- 30 | 31 | The Flask integration follows the spirit of Flask very closely, it provides two ways of getting 32 | started: a single module approach for minial applications and an application factory approach for 33 | more scalable code. 34 | 35 | The Spinach extension for Flask pushes an application context for the duration of the tasks, which 36 | means that it plays well with other extensions like Flask-SQLAlchemy and doesn't require extra 37 | precautions. 38 | 39 | Single Module 40 | ~~~~~~~~~~~~~ 41 | 42 | .. literalinclude:: ../../examples/flaskapp.py 43 | 44 | Application Factory 45 | ~~~~~~~~~~~~~~~~~~~ 46 | 47 | This more complex layout includes an Application Factory ``create_app`` and an imaginary ``auth`` 48 | Blueprint containing routes and tasks. 49 | 50 | ``app.py``:: 51 | 52 | from flask import Flask 53 | from spinach import RedisBroker 54 | from spinach.contrib.flask_spinach import Spinach 55 | 56 | spinach = Spinach() 57 | 58 | 59 | def create_app(): 60 | app = Flask(__name__) 61 | app.config['SPINACH_BROKER'] = RedisBroker() 62 | spinach.init_app(app) 63 | 64 | from . import auth 65 | app.register_blueprint(auth.blueprint) 66 | spinach.register_tasks(app, auth.tasks) 67 | 68 | return app 69 | 70 | ``auth.py``:: 71 | 72 | from flask import Blueprint, jsonify 73 | from spinach import Tasks 74 | 75 | from .app import spinach 76 | 77 | 78 | blueprint = Blueprint('auth', __name__) 79 | tasks = Tasks() 80 | 81 | 82 | @blueprint.route('/') 83 | def create_user(): 84 | spinach.schedule('send_welcome_email') 85 | return jsonify({'user_id': 42}) 86 | 87 | 88 | @tasks.task(name='send_welcome_email') 89 | def send_welcome_email(): 90 | print('Sending email...') 91 | 92 | Running workers 93 | ~~~~~~~~~~~~~~~ 94 | 95 | Workers can be launched from the Flask CLI:: 96 | 97 | $ FLASK_APP=examples.flaskapp flask spinach 98 | 99 | The working queue and the number of threads can be changed with:: 100 | 101 | $ FLASK_APP=examples.flaskapp flask spinach --queue high-priority --threads 20 102 | 103 | .. note:: When in development mode, Flask uses its reloader to automatically restart the process 104 | when the code changes. When having periodic tasks defined, using the MemoryBroker and Flask 105 | reloader users may see their periodic tasks scheduled each time the code changes. If this is 106 | a problem, users are encouraged to switch to the RedisBroker for development. 107 | 108 | Configuration 109 | ~~~~~~~~~~~~~ 110 | 111 | - ``SPINACH_BROKER``, default ``spinach.RedisBroker()`` 112 | - ``SPINACH_NAMESPACE``, defaults to the Flask app name 113 | 114 | Django 115 | ------ 116 | 117 | A Django application is available for integrating Spinach into Django projects. 118 | 119 | To get started, add the application ``spinach.contrib.spinachd`` to ``settings.py``:: 120 | 121 | INSTALLED_APPS = ( 122 | ... 123 | 'spinach.contrib.spinachd', 124 | ) 125 | 126 | On startup, Spinach will look for a ``tasks.py`` module in all installed applications. For instance 127 | ``polls/tasks.py``:: 128 | 129 | from spinach import Tasks 130 | 131 | from .models import Question 132 | 133 | tasks = Tasks() 134 | 135 | 136 | @tasks.task(name='polls:close_poll') 137 | def close_poll(question_id: int): 138 | Question.objects.get(pk=question_id).delete() 139 | 140 | Tasks can be easily scheduled from views:: 141 | 142 | from .models import Question 143 | from .tasks import tasks 144 | 145 | def close_poll_view(request, question_id): 146 | question = get_object_or_404(Question, pk=question_id) 147 | tasks.schedule('polls:close_poll', question.id) 148 | 149 | Users of the Django Datadog app get their jobs reported to Datadog APM automatically in task 150 | workers. 151 | 152 | Running workers 153 | ~~~~~~~~~~~~~~~ 154 | 155 | Workers can be launched from ``manage.py``:: 156 | 157 | $ python manage.py spinach 158 | 159 | The working queue and the number of threads can be changed with:: 160 | 161 | $ python manage.py spinach --queue high-priority --threads 20 162 | 163 | Sending emails in the background 164 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 165 | 166 | The Spinach app provides an ``EMAIL_BACKEND`` allowing to send emails as background tasks. To use 167 | it simply add it to ``settings.py``:: 168 | 169 | EMAIL_BACKEND = 'spinach.contrib.spinachd.mail.BackgroundEmailBackend' 170 | SPINACH_ACTUAL_EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' 171 | 172 | Emails can then be sent using regular Django functions:: 173 | 174 | from django.core.mail import send_mail 175 | 176 | send_mail('Subject', 'Content', 'sender@example.com', ['receiver@example.com']) 177 | 178 | Periodically clearing expired sessions 179 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 180 | 181 | Projects using ``django.contrib.sessions`` must remove expired session from the database from time 182 | to time. Django comes with a management command to do that manually, but this can be automated. 183 | 184 | Spinach provides a periodic task, disabled by default, to do that. To enable it give it 185 | a periodicity in ``settings.py``. For instance to clear sessions once per week:: 186 | 187 | from datetime import timedelta 188 | 189 | SPINACH_CLEAR_SESSIONS_PERIODICITY = timedelta(weeks=1) 190 | 191 | Configuration 192 | ~~~~~~~~~~~~~ 193 | 194 | - ``SPINACH_BROKER``, default ``spinach.RedisBroker()`` 195 | - ``SPINACH_NAMESPACE``, default ``spinach`` 196 | - ``SPINACH_ACTUAL_EMAIL_BACKEND``, default 197 | ``django.core.mail.backends.smtp.EmailBackend`` 198 | - ``SPINACH_CLEAR_SESSIONS_PERIODICITY``, default ``None`` (disabled) 199 | 200 | Sentry 201 | ------ 202 | 203 | With the Sentry integration, failing jobs can be automatically reported to `Sentry 204 | `_ with full traceback, log breadcrumbs and job information. Moreover 205 | performance tracing of task is enabled. 206 | 207 | The Sentry integration requires `Sentry SDK 208 | `_:: 209 | 210 | pip install sentry_sdk 211 | 212 | It then just needs to be registered before starting workers:: 213 | 214 | import sentry_sdk 215 | 216 | from spinach.contrib.sentry_sdk_spinach import SpinachIntegration 217 | 218 | sentry_sdk.init( 219 | dsn="https://sentry_dsn/42", 220 | integrations=[SpinachIntegration(send_retries=False)] 221 | ) 222 | 223 | 224 | Datadog 225 | ------- 226 | 227 | With the Datadog integration, all jobs are automatically reported to Datadog APM. 228 | 229 | The integration requires `ddtrace `_, the Datadog APM client 230 | for Python:: 231 | 232 | pip install ddtrace 233 | 234 | The integration just needs to be registered before starting workers:: 235 | 236 | from spinach.contrib.datadog import register_datadog 237 | 238 | register_datadog() 239 | 240 | spin = Engine(MemoryBroker()) 241 | spin.start_workers() 242 | 243 | This only installs the integration with Spinach, other libraries still need to be patched by 244 | ddtrace. It is recommended to run your application patched as explained in the ddtrace 245 | documentation. 246 | 247 | .. autofunction:: spinach.contrib.datadog.register_datadog 248 | -------------------------------------------------------------------------------- /spinach/brokers/memory.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | from logging import getLogger 3 | from queue import Queue, Empty 4 | import sched 5 | import threading 6 | import time 7 | from typing import Optional, Iterable, List, Tuple, Dict, Union 8 | import uuid 9 | 10 | from .base import Broker 11 | from ..job import Job, JobStatus 12 | from ..task import Task 13 | 14 | logger = getLogger('spinach.broker') 15 | 16 | 17 | class MemoryBroker(Broker): 18 | 19 | def __init__(self): 20 | super().__init__() 21 | self._lock = threading.RLock() 22 | self._queues = dict() 23 | self._future_jobs = list() 24 | self._running_jobs = list() 25 | self._scheduler = sched.scheduler() 26 | self._max_concurrency_keys = dict() 27 | self._cur_concurrency_keys = dict() 28 | 29 | def _get_queue(self, queue_name: str): 30 | queue_name = self._to_namespaced(queue_name) 31 | with self._lock: 32 | try: 33 | return self._queues[queue_name] 34 | except KeyError: 35 | queue = Queue() 36 | self._queues[queue_name] = queue 37 | return queue 38 | 39 | def enqueue_jobs(self, jobs: Iterable[Job], from_failure: bool=False): 40 | """Enqueue a batch of jobs.""" 41 | for job in jobs: 42 | with self._lock: 43 | if from_failure: 44 | max_concurrency = self._max_concurrency_keys[ 45 | job.task_name 46 | ] 47 | if max_concurrency is not None: 48 | self._cur_concurrency_keys[job.task_name] -= 1 49 | if job.should_start: 50 | job.status = JobStatus.QUEUED 51 | queue = self._get_queue(job.queue) 52 | queue.put(job.serialize()) 53 | else: 54 | with self._lock: 55 | job.status = JobStatus.WAITING 56 | self._future_jobs.append(job.serialize()) 57 | self._future_jobs.sort(key=lambda j: Job.deserialize(j).at) 58 | self._something_happened.set() 59 | 60 | def move_future_jobs(self) -> int: 61 | num_jobs_moved = 0 62 | with self._lock: 63 | job = self._get_next_future_job() 64 | 65 | while job and job.should_start: 66 | job.status = JobStatus.QUEUED 67 | queue = self._get_queue(job.queue) 68 | queue.put(job.serialize()) 69 | self._future_jobs.pop(0) 70 | num_jobs_moved += 1 71 | 72 | job = self._get_next_future_job() 73 | 74 | if num_jobs_moved < 0: 75 | # At least one job got enqueued so the flag must be set 76 | self._something_happened.set() 77 | 78 | # Create jobs from due periodic tasks 79 | self._scheduler.run(blocking=False) 80 | 81 | return num_jobs_moved 82 | 83 | def set_concurrency_keys(self, tasks: Iterable[Task]): 84 | for task in tasks: 85 | self._max_concurrency_keys[task.name] = task.max_concurrency 86 | self._cur_concurrency_keys[task.name] = 0 87 | 88 | def register_periodic_tasks(self, tasks: Iterable[Task]): 89 | """Register tasks that need to be scheduled periodically.""" 90 | for task in tasks: 91 | self._scheduler.enter( 92 | int(task.periodicity.total_seconds()), 93 | 0, 94 | self._schedule_periodic_task, 95 | argument=(task,) 96 | ) 97 | 98 | def _schedule_periodic_task(self, task: Task): 99 | at = datetime.now(timezone.utc) 100 | job = Job(task.name, task.queue, at, task.max_retries) 101 | self.enqueue_jobs([job]) 102 | self._scheduler.enter( 103 | int(task.periodicity.total_seconds()), 104 | 0, 105 | self._schedule_periodic_task, 106 | argument=(task,) 107 | ) 108 | 109 | @property 110 | def next_future_periodic_delta(self) -> Optional[float]: 111 | """Give the amount of seconds before the next periodic task is due.""" 112 | try: 113 | next_event = self._scheduler.queue[0] 114 | except IndexError: 115 | return None 116 | 117 | now = time.monotonic() 118 | next_event_time = next_event[0] 119 | if next_event_time < now: 120 | return 0 121 | 122 | return next_event_time - now 123 | 124 | def inspect_periodic_tasks(self) -> List[Tuple[int, str]]: 125 | """Get the next periodic task schedule. 126 | 127 | Used only for debugging and during tests. 128 | """ 129 | return [ 130 | (int(e.time), e.argument[0].name) 131 | for e in self._scheduler.queue 132 | ] 133 | 134 | def _get_next_future_job(self) -> Optional[Job]: 135 | with self._lock: 136 | try: 137 | return Job.deserialize(self._future_jobs[0]) 138 | except IndexError: 139 | return None 140 | 141 | def is_queue_empty(self, queue: str): 142 | return self._get_queue(queue).qsize() == 0 143 | 144 | def get_jobs_from_queue(self, queue: str, max_jobs: int) -> List[Job]: 145 | """Get jobs from a queue.""" 146 | rv = list() 147 | jobs_to_re_add = list() 148 | with self._lock: 149 | while len(rv) < max_jobs: 150 | try: 151 | job_json_string = self._get_queue(queue).get(block=False) 152 | except Empty: 153 | break 154 | 155 | job = Job.deserialize(job_json_string) 156 | max_concurrency = self._max_concurrency_keys.get(job.task_name) 157 | cur_concurrency = self._cur_concurrency_keys.get(job.task_name) 158 | if ( 159 | max_concurrency is not None and 160 | cur_concurrency >= max_concurrency 161 | ): 162 | jobs_to_re_add.append(job_json_string) 163 | 164 | else: 165 | job.status = JobStatus.RUNNING 166 | rv.append(job) 167 | if max_concurrency is not None: 168 | self._cur_concurrency_keys[job.task_name] += 1 169 | 170 | # Re-add jobs that could not be run due to max_concurrency 171 | # limits. Queue does not have a way to insert at the front, so 172 | # sadly they go straight to the back again. Given that 173 | # MemoryBroker is generally only used for testing, this should 174 | # not be a great hardship. 175 | logger.debug( 176 | "Re-adding %s jobs due to concurrency limits", 177 | len(jobs_to_re_add) 178 | ) 179 | for job in jobs_to_re_add: 180 | self._get_queue(queue).put(job) 181 | 182 | return rv 183 | 184 | def flush(self): 185 | with self._lock: 186 | self._queues = dict() 187 | self._future_jobs = list() 188 | 189 | def get_all_brokers(self) -> List[Dict[str, Union[None, str, int]]]: 190 | # A memory broker is not connected to any other broker 191 | return [self._get_broker_info()] 192 | 193 | def enqueue_jobs_from_dead_broker( 194 | self, dead_broker_id: uuid.UUID 195 | ) -> Tuple[int, list]: 196 | # A memory broker cannot be dead 197 | return 0, [] 198 | 199 | def remove_job_from_running(self, job: Job): 200 | """Remove a job from the list of running ones. 201 | 202 | Easy, the memory broker doesn't track running jobs. If the broker dies 203 | there is nothing we can do. 204 | 205 | We still need to decrement the current_concurrency count, 206 | however, if it exists. 207 | """ 208 | with self._lock: 209 | max_concurrency = self._max_concurrency_keys[job.task_name] 210 | if max_concurrency is not None: 211 | self._cur_concurrency_keys[job.task_name] -= 1 212 | 213 | self._something_happened.set() 214 | -------------------------------------------------------------------------------- /tests/test_task.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from datetime import timedelta 3 | from unittest import mock 4 | 5 | import pytest 6 | 7 | from spinach.task import (Task, Tasks, Batch, RetryException) 8 | from spinach import const, exc 9 | 10 | from .conftest import get_now 11 | 12 | 13 | @pytest.fixture 14 | def task(): 15 | return Task(print, 'write_to_stdout', 'foo_queue', 0, None, None) 16 | 17 | 18 | def test_task(task): 19 | assert task.func is print 20 | assert task.name == 'write_to_stdout' 21 | assert task.queue == 'foo_queue' 22 | assert task.max_retries == 0 23 | assert task.periodicity is None 24 | 25 | assert 'print' in repr(task) 26 | assert 'write_to_stdout' in repr(task) 27 | assert 'foo_queue' in repr(task) 28 | assert '0' in repr(task) 29 | 30 | 31 | def test_task_eq(task): 32 | assert task == task 33 | assert task != print 34 | 35 | task_2 = copy.deepcopy(task) 36 | task_2.name = 'read_from_stdout' 37 | assert task != task_2 38 | 39 | 40 | def test_task_repr(task): 41 | task_repr = repr(task) 42 | expected = 'Task({}, {}, {}, {}, {}, {})'.format( 43 | task.func, task.name, task.queue, task.max_retries, 44 | task.periodicity, task.max_concurrency, 45 | ) 46 | assert task_repr == expected 47 | 48 | 49 | def test_tasks_add(task): 50 | tasks = Tasks() 51 | tasks.add(print, 'write_to_stdout', queue='foo_queue') 52 | assert tasks.tasks == { 53 | 'write_to_stdout': task 54 | } 55 | 56 | with pytest.raises(ValueError): 57 | tasks.add(print, 'write_to_stdout', queue='bar_queue') 58 | with pytest.raises(ValueError): 59 | tasks.add(print, queue='bar_queue') 60 | with pytest.raises(ValueError): 61 | tasks.add(print, name='internal', queue='_internal_queue') 62 | assert tasks.tasks == { 63 | 'write_to_stdout': task 64 | } 65 | 66 | 67 | def test_task_serialize(task): 68 | expected = ( 69 | '{"max_concurrency": -1, "max_retries": 0, ' 70 | '"name": "write_to_stdout", ' 71 | '"periodicity": null, "queue": "foo_queue"}' 72 | ) 73 | assert task.serialize() == expected 74 | 75 | task.periodicity = timedelta(minutes=5) 76 | expected = ( 77 | '{"max_concurrency": -1, "max_retries": 0, ' 78 | '"name": "write_to_stdout", ' 79 | '"periodicity": 300, "queue": "foo_queue"}' 80 | ) 81 | assert task.serialize() == expected 82 | 83 | 84 | def test_tasks_queues_and_max_retries(): 85 | # Constant default 86 | tasks = Tasks() 87 | tasks.add(print, 'write_to_stdout') 88 | assert tasks.tasks['write_to_stdout'].queue == const.DEFAULT_QUEUE 89 | assert (tasks.tasks['write_to_stdout'].max_retries == 90 | const.DEFAULT_MAX_RETRIES) 91 | 92 | # Tasks has a default 93 | tasks = Tasks(queue='tasks_default', max_retries=10) 94 | tasks.add(print, 'write_to_stdout') 95 | assert tasks.tasks['write_to_stdout'].queue == 'tasks_default' 96 | assert tasks.tasks['write_to_stdout'].max_retries == 10 97 | 98 | # Task added with an explicit value 99 | tasks = Tasks(queue='tasks_default', max_retries=10) 100 | tasks.add(print, 'write_to_stdout', queue='task_queue', max_retries=20) 101 | assert tasks.tasks['write_to_stdout'].queue == 'task_queue' 102 | assert tasks.tasks['write_to_stdout'].max_retries == 20 103 | 104 | 105 | def test_tasks_decorator(): 106 | 107 | tasks = Tasks(queue='tasks_queue') 108 | 109 | @tasks.task(name='foo') 110 | def foo(): 111 | pass 112 | 113 | @tasks.task(name='bar', queue='task_queue', max_retries=20) 114 | def bar(): 115 | pass 116 | 117 | assert 'foo' in str(tasks.tasks['foo'].func) 118 | assert foo.task_name == 'foo' 119 | assert tasks.tasks['foo'].name == 'foo' 120 | assert tasks.tasks['foo'].queue == 'tasks_queue' 121 | assert tasks.tasks['foo'].max_retries == const.DEFAULT_MAX_RETRIES 122 | 123 | assert 'bar' in str(tasks.tasks['bar'].func) 124 | assert bar.task_name == 'bar' 125 | assert tasks.tasks['bar'].name == 'bar' 126 | assert tasks.tasks['bar'].queue == 'task_queue' 127 | assert tasks.tasks['bar'].max_retries == 20 128 | 129 | 130 | def test_task_function_can_be_called(): 131 | 132 | tasks = Tasks() 133 | 134 | @tasks.task(name='foo') 135 | def foo(a, b=2): 136 | return a + b 137 | 138 | assert foo(40) == 42 139 | assert foo(40, 3) == 43 140 | 141 | 142 | def test_tasks_update(): 143 | tasks_1, tasks_2 = Tasks(), Tasks() 144 | 145 | tasks_1.add(print, 'write_to_stdout', queue='foo_queue') 146 | tasks_2.update(tasks_1) 147 | assert tasks_1.tasks == tasks_2.tasks 148 | 149 | tasks_2.add(print, 'bar') 150 | assert tasks_1.tasks != tasks_2.tasks 151 | 152 | 153 | def test_tasks_names(): 154 | tasks = Tasks() 155 | assert tasks.names == [] 156 | tasks.add(print, 'foo') 157 | tasks.add(print, 'bar') 158 | assert sorted(tasks.names) == ['bar', 'foo'] 159 | 160 | 161 | def test_tasks_get_by_name(): 162 | tasks = Tasks() 163 | tasks.add(print, 'foo') 164 | 165 | r = tasks.get('foo') 166 | assert isinstance(r, Task) 167 | assert r.name == 'foo' 168 | assert r.func == print 169 | 170 | 171 | def test_tasks_get_by_function(): 172 | tasks = Tasks() 173 | 174 | @tasks.task(name='foo') 175 | def foo(): 176 | pass 177 | 178 | r = tasks.get(foo) 179 | assert isinstance(r, Task) 180 | assert r.name == 'foo' 181 | assert r.func == foo 182 | 183 | 184 | def test_tasks_get_by_task_object(task): 185 | tasks = Tasks() 186 | tasks._tasks[task.name] = task 187 | 188 | r = tasks.get(task) 189 | assert isinstance(r, Task) 190 | assert r.name == task.name 191 | assert r.func == task.func 192 | 193 | 194 | def test_tasks_get_by_unknown_or_wrong_object(): 195 | tasks = Tasks() 196 | with pytest.raises(exc.UnknownTask): 197 | tasks.get('foo') 198 | with pytest.raises(exc.UnknownTask): 199 | tasks.get(None) 200 | with pytest.raises(exc.UnknownTask): 201 | tasks.get(object()) 202 | with pytest.raises(exc.UnknownTask): 203 | tasks.get(b'foo') 204 | with pytest.raises(exc.UnknownTask): 205 | tasks.get(RuntimeError) 206 | 207 | 208 | def test_tasks_scheduling(task): 209 | tasks = Tasks() 210 | tasks.add(print, 'write_to_stdout', queue='foo_queue') 211 | batch = Batch() 212 | 213 | with pytest.raises(RuntimeError): 214 | tasks.schedule('write_to_stdout') 215 | with pytest.raises(RuntimeError): 216 | tasks.schedule_at('write_to_stdout', get_now()) 217 | with pytest.raises(RuntimeError): 218 | tasks.schedule_batch(batch) 219 | 220 | spin = mock.Mock() 221 | job = mock.sentinel 222 | spin.schedule.return_value = job 223 | spin.schedule_at.return_value = job 224 | spin.schedule_batch.return_value = job 225 | tasks._spin = spin 226 | 227 | retjob = tasks.schedule('write_to_stdout') 228 | spin.schedule.assert_called_once_with('write_to_stdout') 229 | assert retjob == job 230 | 231 | retjob = tasks.schedule_at('write_to_stdout', get_now()) 232 | spin.schedule_at.assert_called_once_with('write_to_stdout', get_now()) 233 | assert retjob == job 234 | 235 | tasks.schedule_batch(batch) 236 | spin.schedule_batch.assert_called_once_with(batch) 237 | 238 | 239 | def test_retry_exception(): 240 | r = RetryException('Foo') 241 | assert str(r) == 'Foo' 242 | assert r.at is None 243 | 244 | r = RetryException('Bar', get_now()) 245 | assert str(r) == 'Bar' 246 | assert r.at is get_now() 247 | 248 | 249 | def test_batch(patch_now): 250 | now = get_now() 251 | batch = Batch() 252 | batch.schedule('foo_task', 1, 2) 253 | batch.schedule_at('bar_task', now, three=True) 254 | 255 | assert batch.jobs_to_create == [ 256 | ('foo_task', now, (1, 2), {}), 257 | ('bar_task', now, (), {'three': True}) 258 | ] 259 | 260 | 261 | def test_task_raises_for_concurrency_without_idempotency(): 262 | with pytest.raises(ValueError) as e: 263 | task = Task( 264 | print, 'foo', 'foo_queue', max_retries=None, 265 | periodicity=None, max_concurrency=1 266 | ) 267 | assert "max_retries must be set if max_concurrency is set" in str(e) 268 | with pytest.raises(ValueError) as e: 269 | task = Task( 270 | print, 'foo', 'foo_queue', max_retries=0, 271 | periodicity=None, max_concurrency=1 272 | ) 273 | assert "max_retries must be set if max_concurrency is set" in str(e) 274 | 275 | 276 | def test_task_raises_if_max_concurrency_less_than_one(): 277 | with pytest.raises(ValueError) as e: 278 | task = Task( 279 | print, 'foo', 'foo_queue', max_retries=1, 280 | periodicity=None, max_concurrency=0 281 | ) 282 | assert "max_concurrency must be greater than zero" in str(e) 283 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " epub3 to make an epub3" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | @echo " dummy to check syntax errors of document sources" 51 | 52 | .PHONY: clean 53 | clean: 54 | rm -rf $(BUILDDIR)/* 55 | 56 | .PHONY: html 57 | html: 58 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 61 | 62 | .PHONY: dirhtml 63 | dirhtml: 64 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 65 | @echo 66 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 67 | 68 | .PHONY: singlehtml 69 | singlehtml: 70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 71 | @echo 72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 73 | 74 | .PHONY: pickle 75 | pickle: 76 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 77 | @echo 78 | @echo "Build finished; now you can process the pickle files." 79 | 80 | .PHONY: json 81 | json: 82 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 83 | @echo 84 | @echo "Build finished; now you can process the JSON files." 85 | 86 | .PHONY: htmlhelp 87 | htmlhelp: 88 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 89 | @echo 90 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 91 | ".hhp project file in $(BUILDDIR)/htmlhelp." 92 | 93 | .PHONY: qthelp 94 | qthelp: 95 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 96 | @echo 97 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 98 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 99 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Sauna.qhcp" 100 | @echo "To view the help file:" 101 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Sauna.qhc" 102 | 103 | .PHONY: applehelp 104 | applehelp: 105 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 106 | @echo 107 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 108 | @echo "N.B. You won't be able to view it unless you put it in" \ 109 | "~/Library/Documentation/Help or install it in your application" \ 110 | "bundle." 111 | 112 | .PHONY: devhelp 113 | devhelp: 114 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 115 | @echo 116 | @echo "Build finished." 117 | @echo "To view the help file:" 118 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Sauna" 119 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Sauna" 120 | @echo "# devhelp" 121 | 122 | .PHONY: epub 123 | epub: 124 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 125 | @echo 126 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 127 | 128 | .PHONY: epub3 129 | epub3: 130 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 131 | @echo 132 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 133 | 134 | .PHONY: latex 135 | latex: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo 138 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 139 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 140 | "(use \`make latexpdf' here to do that automatically)." 141 | 142 | .PHONY: latexpdf 143 | latexpdf: 144 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 145 | @echo "Running LaTeX files through pdflatex..." 146 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 147 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 148 | 149 | .PHONY: latexpdfja 150 | latexpdfja: 151 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 152 | @echo "Running LaTeX files through platex and dvipdfmx..." 153 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 154 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 155 | 156 | .PHONY: text 157 | text: 158 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 159 | @echo 160 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 161 | 162 | .PHONY: man 163 | man: 164 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 165 | @echo 166 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 167 | 168 | .PHONY: texinfo 169 | texinfo: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo 172 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 173 | @echo "Run \`make' in that directory to run these through makeinfo" \ 174 | "(use \`make info' here to do that automatically)." 175 | 176 | .PHONY: info 177 | info: 178 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 179 | @echo "Running Texinfo files through makeinfo..." 180 | make -C $(BUILDDIR)/texinfo info 181 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 182 | 183 | .PHONY: gettext 184 | gettext: 185 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 186 | @echo 187 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 188 | 189 | .PHONY: changes 190 | changes: 191 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 192 | @echo 193 | @echo "The overview file is in $(BUILDDIR)/changes." 194 | 195 | .PHONY: linkcheck 196 | linkcheck: 197 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 198 | @echo 199 | @echo "Link check complete; look for any errors in the above output " \ 200 | "or in $(BUILDDIR)/linkcheck/output.txt." 201 | 202 | .PHONY: doctest 203 | doctest: 204 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 205 | @echo "Testing of doctests in the sources finished, look at the " \ 206 | "results in $(BUILDDIR)/doctest/output.txt." 207 | 208 | .PHONY: coverage 209 | coverage: 210 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 211 | @echo "Testing of coverage in the sources finished, look at the " \ 212 | "results in $(BUILDDIR)/coverage/python.txt." 213 | 214 | .PHONY: xml 215 | xml: 216 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 217 | @echo 218 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 219 | 220 | .PHONY: pseudoxml 221 | pseudoxml: 222 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 223 | @echo 224 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 225 | 226 | .PHONY: dummy 227 | dummy: 228 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 229 | @echo 230 | @echo "Build finished. Dummy builder generates no files." 231 | -------------------------------------------------------------------------------- /spinach/job.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | import enum 3 | import inspect 4 | import json 5 | from logging import getLogger 6 | import math 7 | from typing import Optional 8 | import uuid 9 | 10 | from . import signals 11 | from .exc import InvalidJobSignatureError 12 | from .task import RetryException, AbortException 13 | from .utils import human_duration, exponential_backoff 14 | 15 | logger = getLogger(__name__) 16 | 17 | 18 | class JobStatus(enum.Enum): 19 | """Possible status of a :class:`Job`. 20 | 21 | Life-cycle: 22 | 23 | - Newly created jobs first get the status `NOT_SET` 24 | - Future jobs are then set to `WAITING` until they are ready to be `QUEUED` 25 | - Jobs starting immediately get the status `QUEUED` directly when they are 26 | received by the broker 27 | - Jobs are set to `RUNNING` when a worker start their execution 28 | - if the job terminates without error it is set to `SUCCEEDED` 29 | - if the job terminates with an error and can be retried it is set to 30 | `WAITING` until it is ready to be queued again 31 | - if the job terminates with an error and cannot be retried it is set 32 | to `FAILED` for ever 33 | 34 | See :doc:`signals` to be notified on some of these status transitions. 35 | """ 36 | 37 | NOT_SET = 0 #: Job created but not scheduled yet 38 | WAITING = 1 #: Job is scheduled to start in the future 39 | QUEUED = 2 #: Job is in a queue, ready to be picked by a worker 40 | RUNNING = 3 #: Job is being executed 41 | SUCCEEDED = 4 #: Job is finished, execution was successful 42 | FAILED = 5 #: Job failed and will not be retried 43 | 44 | 45 | class Job: 46 | """Represent the execution of a :class:`Task` by background workers. 47 | 48 | The :class:`Job` class should not be instantiated by the user, instead jobs 49 | are automatically created when they are scheduled. 50 | 51 | :ivar id: UUID of the job 52 | :ivar status: :class:`JobStatus` 53 | :ivar task_name: string name of the task 54 | :ivar queue: string name of the queue 55 | :ivar at: timezone aware `datetime` representing the date at which the job 56 | should start 57 | :ivar max_retries: int representing how many times a failing job should be 58 | retried 59 | :ivar retries: int representing how many times the job was already executed 60 | :ivar task_args: optional tuple containing args passed to the task 61 | :ivar task_kwargs: optional dict containing kwargs passed to the task 62 | """ 63 | 64 | __slots__ = ['id', 'status', 'task_name', 'queue', 'at', 'max_retries', 65 | 'retries', 'task_args', 'task_kwargs', 'task_func'] 66 | 67 | def __init__(self, task_name: str, queue: str, at: datetime, 68 | max_retries: int, 69 | task_args: Optional[tuple]=None, 70 | task_kwargs: Optional[dict]=None): 71 | self.id = uuid.uuid4() 72 | self.status = JobStatus.NOT_SET 73 | self.task_name = task_name 74 | self.queue = queue 75 | self.max_retries = max_retries 76 | self.retries = 0 77 | 78 | if at.tzinfo is None: 79 | # TZ naive datetime, make it a TZ aware datetime by assuming it 80 | # contains UTC time 81 | self.at = at.replace(tzinfo=timezone.utc) 82 | logger.debug('Job created from a naive datetime, assuming UTC') 83 | else: 84 | # TZ aware datetime, store it in its UTC representation 85 | self.at = at.astimezone(timezone.utc) 86 | 87 | self.task_args = task_args if task_args else tuple() 88 | self.task_kwargs = task_kwargs if task_kwargs else dict() 89 | 90 | # Populated by Spinach arbiter before passing to a worker 91 | self.task_func = None 92 | 93 | @property 94 | def should_retry(self) -> bool: 95 | return self.retries < self.max_retries 96 | 97 | @property 98 | def should_start(self) -> bool: 99 | return datetime.now(timezone.utc) >= self.at 100 | 101 | @property 102 | def at_timestamp(self) -> Optional[int]: 103 | return int(math.ceil(self.at.timestamp())) 104 | 105 | def serialize(self): 106 | return json.dumps({ 107 | 'id': str(self.id), 108 | 'status': self.status.value, 109 | 'task_name': self.task_name, 110 | 'queue': self.queue, 111 | 'max_retries': self.max_retries, 112 | 'retries': self.retries, 113 | 'at': int(self.at.timestamp()), # seconds component 114 | 'at_us': self.at.microsecond, # microseconds component 115 | 'task_args': self.task_args, 116 | 'task_kwargs': self.task_kwargs 117 | }, sort_keys=True) 118 | 119 | @classmethod 120 | def deserialize(cls, job_json_string: str): 121 | job_dict = json.loads(job_json_string) 122 | at = datetime.fromtimestamp(job_dict['at'], tz=timezone.utc) 123 | at = at.replace(microsecond=job_dict['at_us']) 124 | job = Job( 125 | job_dict['task_name'], 126 | job_dict['queue'], 127 | at, 128 | job_dict['max_retries'], 129 | task_args=tuple(job_dict['task_args']), 130 | task_kwargs=job_dict['task_kwargs'], 131 | ) 132 | job.id = uuid.UUID(job_dict['id']) 133 | job.status = JobStatus(job_dict['status']) 134 | job.retries = job_dict['retries'] 135 | return job 136 | 137 | def check_signature(self): 138 | """Check if a job has the correct params to be executed. 139 | 140 | This can be used to prevent the scheduling of a job that will fail 141 | during execution because its arguments do not match the task function. 142 | 143 | :raises InvalidJobSignatureError: Job arguments do not match the task 144 | """ 145 | if self.task_func is None: 146 | raise ValueError( 147 | 'Cannot verify signature until a task function is assigned' 148 | ) 149 | 150 | try: 151 | sig = inspect.signature(self.task_func) 152 | sig.bind(*self.task_args, **self.task_kwargs) 153 | except TypeError as e: 154 | msg = 'Arguments of job not compatible with task {}: {}'.format( 155 | self.task_name, e 156 | ) 157 | raise InvalidJobSignatureError(msg) 158 | except ValueError: 159 | logger.info('Cannot verify job signature, assuming it is correct') 160 | 161 | def __repr__(self): 162 | return 'Job <{} {} {}>'.format( 163 | self.task_name, self.status.name, self.id 164 | ) 165 | 166 | def __eq__(self, other): 167 | for attr in self.__slots__: 168 | try: 169 | if not getattr(self, attr) == getattr(other, attr): 170 | return False 171 | 172 | except AttributeError: 173 | return False 174 | 175 | return True 176 | 177 | 178 | def advance_job_status(namespace: str, job: Job, duration: float, 179 | err: Optional[Exception]): 180 | """Advance the status of a job depending on its execution. 181 | 182 | This function is called after a job has been executed. It calculates its 183 | next status and calls the appropriate signals. 184 | """ 185 | duration = human_duration(duration) 186 | if not err: 187 | job.status = JobStatus.SUCCEEDED 188 | logger.info('Finished execution of %s in %s', job, duration) 189 | return 190 | 191 | if isinstance(err, AbortException): 192 | job.retries = job.max_retries 193 | logger.error( 194 | 'Fatal error during execution of %s after %s, canceling retries', 195 | job, duration, exc_info=err 196 | ) 197 | 198 | if job.should_retry: 199 | job.status = JobStatus.NOT_SET 200 | job.retries += 1 201 | if isinstance(err, RetryException) and err.at is not None: 202 | job.at = err.at 203 | else: 204 | job.at = (datetime.now(timezone.utc) + 205 | exponential_backoff(job.retries)) 206 | 207 | signals.job_schedule_retry.send(namespace, job=job, err=err) 208 | 209 | log_args = ( 210 | job.retries, job.max_retries + 1, job, duration, 211 | human_duration( 212 | (job.at - datetime.now(tz=timezone.utc)).total_seconds() 213 | ) 214 | ) 215 | if isinstance(err, RetryException): 216 | logger.info('Retry requested during execution %d/%d of %s ' 217 | 'after %s, retry in %s', *log_args) 218 | else: 219 | logger.warning('Error during execution %d/%d of %s after %s, ' 220 | 'retry in %s', *log_args, exc_info=err) 221 | 222 | return 223 | 224 | job.status = JobStatus.FAILED 225 | signals.job_failed.send(namespace, job=job, err=err) 226 | if not isinstance(err, AbortException): 227 | logger.error( 228 | 'Error during execution %d/%d of %s after %s', 229 | job.max_retries + 1, job.max_retries + 1, job, duration, 230 | exc_info=err 231 | ) 232 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Spinach documentation build configuration file, created by 5 | # sphinx-quickstart on Fri May 6 22:17:12 2016. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | sys.path.insert(0, os.path.abspath('..')) 23 | 24 | import spinach 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | # needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.githubpages', 37 | 'sphinx.ext.intersphinx', 38 | 'sphinx.ext.todo' 39 | ] 40 | 41 | intersphinx_mapping = { 42 | 'python': ('https://docs.python.org/3', None) 43 | } 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | 48 | # The suffix(es) of source filenames. 49 | # You can specify multiple suffix as a list of string: 50 | # source_suffix = ['.rst', '.md'] 51 | source_suffix = '.rst' 52 | 53 | # The encoding of source files. 54 | # source_encoding = 'utf-8-sig' 55 | 56 | # The master toctree document. 57 | master_doc = 'index' 58 | 59 | # General information about the project. 60 | project = 'Spinach' 61 | copyright = '2017-2021, Nicolas Le Manchet' 62 | author = 'Nicolas Le Manchet' 63 | 64 | # The version info for the project you're documenting, acts as replacement for 65 | # |version| and |release|, also used in various other places throughout the 66 | # built documents. 67 | # 68 | # The short X.Y version. 69 | version = spinach.__version__ 70 | # The full version, including alpha/beta/rc tags. 71 | release = spinach.__version__ 72 | 73 | # The language for content autogenerated by Sphinx. Refer to documentation 74 | # for a list of supported languages. 75 | # 76 | # This is also used if you do content translation via gettext catalogs. 77 | # Usually you set "language" from the command line for these cases. 78 | language = None 79 | 80 | # There are two options for replacing |today|: either, you set today to some 81 | # non-false value, then it is used: 82 | #today = '' 83 | # Else, today_fmt is used as the format for a strftime call. 84 | #today_fmt = '%B %d, %Y' 85 | 86 | # List of patterns, relative to source directory, that match files and 87 | # directories to ignore when looking for source files. 88 | # This patterns also effect to html_static_path and html_extra_path 89 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 90 | 91 | # The reST default role (used for this markup: `text`) to use for all 92 | # documents. 93 | #default_role = None 94 | 95 | # If true, '()' will be appended to :func: etc. cross-reference text. 96 | #add_function_parentheses = True 97 | 98 | # If true, the current module name will be prepended to all description 99 | # unit titles (such as .. function::). 100 | #add_module_names = True 101 | 102 | # If true, sectionauthor and moduleauthor directives will be shown in the 103 | # output. They are ignored by default. 104 | #show_authors = False 105 | 106 | # The name of the Pygments (syntax highlighting) style to use. 107 | pygments_style = 'sphinx' 108 | 109 | # A list of ignored prefixes for module index sorting. 110 | #modindex_common_prefix = [] 111 | 112 | # If true, keep warnings as "system message" paragraphs in the built documents. 113 | #keep_warnings = False 114 | 115 | # If true, `todo` and `todoList` produce output, else they produce nothing. 116 | todo_include_todos = True 117 | 118 | 119 | # -- Options for HTML output ---------------------------------------------- 120 | 121 | # The theme to use for HTML and HTML Help pages. See the documentation for 122 | # a list of builtin themes. 123 | html_theme = 'alabaster' 124 | 125 | # Theme options are theme-specific and customize the look and feel of a theme 126 | # further. For a list of options available for each theme, see the 127 | # documentation. 128 | html_theme_options = { 129 | 'description': 'Modern Redis task queue for Python 3', 130 | 'github_user': 'NicolasLM', 131 | 'github_repo': 'spinach', 132 | 'github_type': 'star', 133 | 'github_button': True 134 | } 135 | 136 | # Add any paths that contain custom themes here, relative to this directory. 137 | #html_theme_path = [] 138 | 139 | # The name for this set of Sphinx documents. 140 | # " v documentation" by default. 141 | #html_title = 'Spinach v0.0.1' 142 | 143 | # A shorter title for the navigation bar. Default is the same as html_title. 144 | #html_short_title = None 145 | 146 | # The name of an image file (relative to this directory) to place at the top 147 | # of the sidebar. 148 | #html_logo = None 149 | 150 | # The name of an image file (relative to this directory) to use as a favicon of 151 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 152 | # pixels large. 153 | #html_favicon = None 154 | 155 | # Add any paths that contain custom static files (such as style sheets) here, 156 | # relative to this directory. They are copied after the builtin static files, 157 | # so a file named "default.css" will overwrite the builtin "default.css". 158 | html_static_path = ['_static'] 159 | 160 | # Add any extra paths that contain custom files (such as robots.txt or 161 | # .htaccess) here, relative to this directory. These files are copied 162 | # directly to the root of the documentation. 163 | #html_extra_path = [] 164 | 165 | # If not None, a 'Last updated on:' timestamp is inserted at every page 166 | # bottom, using the given strftime format. 167 | # The empty string is equivalent to '%b %d, %Y'. 168 | #html_last_updated_fmt = None 169 | 170 | # If true, SmartyPants will be used to convert quotes and dashes to 171 | # typographically correct entities. 172 | #html_use_smartypants = True 173 | 174 | # Custom sidebar templates, maps document names to template names. 175 | html_sidebars = { 176 | '**': ['about.html', 'navigation.html', 'searchbox.html'] 177 | } 178 | 179 | # Additional templates that should be rendered to pages, maps page names to 180 | # template names. 181 | #html_additional_pages = {} 182 | 183 | # If false, no module index is generated. 184 | #html_domain_indices = True 185 | 186 | # If false, no index is generated. 187 | #html_use_index = True 188 | 189 | # If true, the index is split into individual pages for each letter. 190 | #html_split_index = False 191 | 192 | # If true, links to the reST sources are added to the pages. 193 | html_show_sourcelink = False 194 | 195 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 196 | #html_show_sphinx = True 197 | 198 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 199 | #html_show_copyright = True 200 | 201 | # If true, an OpenSearch description file will be output, and all pages will 202 | # contain a tag referring to it. The value of this option must be the 203 | # base URL from which the finished HTML is served. 204 | #html_use_opensearch = '' 205 | 206 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 207 | #html_file_suffix = None 208 | 209 | # Language to be used for generating the HTML full-text search index. 210 | # Sphinx supports the following languages: 211 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 212 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' 213 | #html_search_language = 'en' 214 | 215 | # A dictionary with options for the search language support, empty by default. 216 | # 'ja' uses this config value. 217 | # 'zh' user can custom change `jieba` dictionary path. 218 | #html_search_options = {'type': 'default'} 219 | 220 | # The name of a javascript file (relative to the configuration directory) that 221 | # implements a search results scorer. If empty, the default will be used. 222 | #html_search_scorer = 'scorer.js' 223 | 224 | # Output file base name for HTML help builder. 225 | htmlhelp_basename = 'Spinachdoc' 226 | 227 | # -- Options for LaTeX output --------------------------------------------- 228 | 229 | latex_elements = { 230 | # The paper size ('letterpaper' or 'a4paper'). 231 | #'papersize': 'letterpaper', 232 | 233 | # The font size ('10pt', '11pt' or '12pt'). 234 | #'pointsize': '10pt', 235 | 236 | # Additional stuff for the LaTeX preamble. 237 | #'preamble': '', 238 | 239 | # Latex figure (float) alignment 240 | #'figure_align': 'htbp', 241 | } 242 | 243 | # Grouping the document tree into LaTeX files. List of tuples 244 | # (source start file, target name, title, 245 | # author, documentclass [howto, manual, or own class]). 246 | latex_documents = [ 247 | (master_doc, 'Spinach.tex', 'Spinach Documentation', 248 | 'Nicolas Le Manchet', 'manual'), 249 | ] 250 | 251 | # The name of an image file (relative to this directory) to place at the top of 252 | # the title page. 253 | #latex_logo = None 254 | 255 | # For "manual" documents, if this is true, then toplevel headings are parts, 256 | # not chapters. 257 | #latex_use_parts = False 258 | 259 | # If true, show page references after internal links. 260 | #latex_show_pagerefs = False 261 | 262 | # If true, show URL addresses after external links. 263 | #latex_show_urls = False 264 | 265 | # Documents to append as an appendix to all manuals. 266 | #latex_appendices = [] 267 | 268 | # If false, no module index is generated. 269 | #latex_domain_indices = True 270 | 271 | 272 | # -- Options for manual page output --------------------------------------- 273 | 274 | # One entry per manual page. List of tuples 275 | # (source start file, name, description, authors, manual section). 276 | man_pages = [ 277 | (master_doc, 'spinach', 'Spinach Documentation', 278 | [author], 1) 279 | ] 280 | 281 | # If true, show URL addresses after external links. 282 | #man_show_urls = False 283 | 284 | 285 | # -- Options for Texinfo output ------------------------------------------- 286 | 287 | # Grouping the document tree into Texinfo files. List of tuples 288 | # (source start file, target name, title, author, 289 | # dir menu entry, description, category) 290 | texinfo_documents = [ 291 | (master_doc, 'Spinach', 'Spinach Documentation', 292 | author, 'Spinach', 'Modern Redis task queue for Python 3.', 293 | 'Miscellaneous'), 294 | ] 295 | 296 | # Documents to append as an appendix to all manuals. 297 | #texinfo_appendices = [] 298 | 299 | # If false, no module index is generated. 300 | #texinfo_domain_indices = True 301 | 302 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 303 | #texinfo_show_urls = 'footnote' 304 | 305 | # If true, do not generate a @detailmenu in the "Top" node's menu. 306 | #texinfo_no_detailmenu = False 307 | -------------------------------------------------------------------------------- /spinach/engine.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | from logging import getLogger 3 | import threading 4 | from typing import Iterable, Type 5 | 6 | from .task import Tasks, Batch, Schedulable 7 | from .utils import run_forever, handle_sigterm 8 | from .job import Job, JobStatus, advance_job_status 9 | from .brokers.base import Broker 10 | from .const import DEFAULT_QUEUE, DEFAULT_NAMESPACE, DEFAULT_WORKER_NUMBER 11 | from .worker import BaseWorkers, ThreadWorkers 12 | from . import exc 13 | 14 | 15 | logger = getLogger(__name__) 16 | 17 | 18 | class Engine: 19 | """Spinach Engine coordinating a broker with workers. 20 | 21 | This class does the orchestration of all components, it is the one that 22 | starts and terminates the whole machinery. 23 | 24 | The Engine can be run in two modes: 25 | 26 | - client: synchronously submits jobs. 27 | - worker: asynchronously executes jobs. 28 | 29 | Submitting jobs is quite easy, so running the Engine in client mode doesn't 30 | require spawning any thread. 31 | 32 | Executing jobs however is a bit more involved, so running the Engine in 33 | worker mode ends up spawning a few threads: 34 | 35 | - a few worker threads: they are only responsible for executing the task 36 | function and advancing the job status once it is finished. 37 | - a result notifier thread: sends back the result of job executions to the 38 | Broker backend, acts basically as a client. 39 | - an arbiter thread: fetches jobs from the Broker and gives them to the 40 | workers as well as doing some periodic bookkeeping. 41 | - a Broker subscriber thread: receives notifications from the backend when 42 | something happens, typically a job is enqueued. 43 | - the process main thread: starts all the above threads, then does nothing 44 | waiting for the signal to terminate the threads it started. 45 | 46 | This means that a Spinach worker process has at least 5 threads. 47 | 48 | :arg broker: instance of a :class:`Broker` 49 | :arg namespace: name of the namespace used by the Engine. When different 50 | Engines use the same Redis server, they must use different namespaces to 51 | isolate themselves. 52 | """ 53 | 54 | def __init__(self, broker: Broker, namespace: str=DEFAULT_NAMESPACE): 55 | self._broker = broker 56 | self._broker.namespace = namespace 57 | self._namespace = namespace 58 | 59 | self._tasks = Tasks() 60 | self.task = self._tasks.task 61 | self._reset() 62 | 63 | def _reset(self): 64 | """Initialization that must happen before the arbiter is (re)started""" 65 | self._arbiter = None 66 | self._workers = None 67 | self._working_queue = None 68 | self._must_stop = threading.Event() 69 | 70 | @property 71 | def namespace(self) -> str: 72 | """Namespace the Engine uses.""" 73 | return self._namespace 74 | 75 | def attach_tasks(self, tasks: Tasks): 76 | """Attach a set of tasks. 77 | 78 | A task cannot be scheduled or executed before it is attached to an 79 | Engine. 80 | 81 | >>> tasks = Tasks() 82 | >>> spin.attach_tasks(tasks) 83 | """ 84 | if tasks._spin is not None and tasks._spin is not self: 85 | logger.warning('Tasks already attached to a different Engine') 86 | self._tasks.update(tasks) 87 | tasks._spin = self 88 | 89 | def execute(self, task: Schedulable, *args, **kwargs): 90 | return self._tasks.get(task).func(*args, **kwargs) 91 | 92 | def schedule(self, task: Schedulable, *args, **kwargs) -> Job: 93 | """Schedule a job to be executed as soon as possible. 94 | 95 | :arg task: the task or its name to execute in the background 96 | :arg args: args to be passed to the task function 97 | :arg kwargs: kwargs to be passed to the task function 98 | 99 | :return: The Job that was created and scheduled. 100 | """ 101 | at = datetime.now(timezone.utc) 102 | return self.schedule_at(task, at, *args, **kwargs) 103 | 104 | def schedule_at( 105 | self, task: Schedulable, at: datetime, *args, **kwargs 106 | ) -> Job: 107 | """Schedule a job to be executed in the future. 108 | 109 | :arg task: the task or its name to execute in the background 110 | :arg at: date at which the job should start. It is advised to pass a 111 | timezone aware datetime to lift any ambiguity. However if a 112 | timezone naive datetime if given, it will be assumed to 113 | contain UTC time. 114 | :arg args: args to be passed to the task function 115 | :arg kwargs: kwargs to be passed to the task function 116 | 117 | :return: The Job that was created and scheduled. 118 | """ 119 | task = self._tasks.get(task) 120 | job = Job(task.name, task.queue, at, task.max_retries, task_args=args, 121 | task_kwargs=kwargs) 122 | job.task_func = task.func 123 | job.check_signature() 124 | self._broker.enqueue_jobs([job]) 125 | return job 126 | 127 | def schedule_batch(self, batch: Batch) -> Iterable[Job]: 128 | """Schedule many jobs at once. 129 | 130 | Scheduling jobs in batches allows to enqueue them fast by avoiding 131 | round-trips to the broker. 132 | 133 | :arg batch: :class:`Batch` instance containing jobs to schedule 134 | 135 | :return: The Jobs that were created and scheduled. 136 | """ 137 | jobs = list() 138 | for task, at, args, kwargs in batch.jobs_to_create: 139 | task = self._tasks.get(task) 140 | job = Job( 141 | task.name, task.queue, at, task.max_retries, 142 | task_args=args, task_kwargs=kwargs 143 | ) 144 | job.task_func = task.func 145 | job.check_signature() 146 | jobs.append(job) 147 | 148 | self._broker.enqueue_jobs(jobs) 149 | return jobs 150 | 151 | def _arbiter_func(self, stop_when_queue_empty=False): 152 | logger.debug('Arbiter started') 153 | self._register_periodic_tasks() 154 | self._broker.set_concurrency_keys( 155 | [task for task in self._tasks.tasks.values()] 156 | ) 157 | while not self._must_stop.is_set(): 158 | 159 | self._broker.move_future_jobs() 160 | 161 | received_jobs = 0 162 | available_slots = self._workers.available_slots 163 | logger.debug("Available slots: %s", available_slots) 164 | if available_slots > 0: 165 | logger.debug("Getting jobs from queue %s", self._working_queue) 166 | jobs = self._broker.get_jobs_from_queue( 167 | self._working_queue, available_slots 168 | ) 169 | for job in jobs: 170 | logger.debug("Received job: %s", job) 171 | received_jobs += 1 172 | try: 173 | job.task_func = self._tasks.get(job.task_name).func 174 | except exc.UnknownTask as err: 175 | # This is slightly cheating, when a task is unknown 176 | # it doesn't go to workers but is still sent to the 177 | # workers out_queue so that it is processed by the 178 | # notifier. 179 | advance_job_status(self.namespace, job, 0.0, err) 180 | self._workers.out_queue.put(job) 181 | else: 182 | self._workers.submit_job(job) 183 | 184 | if (stop_when_queue_empty and available_slots > 0 185 | and received_jobs == 0 186 | and self._broker.is_queue_empty(self._working_queue)): 187 | logger.info("Stopping workers because queue '%s' is empty", 188 | self._working_queue) 189 | self.stop_workers(_join_arbiter=False) 190 | logger.debug('Arbiter terminated') 191 | return 192 | 193 | logger.debug('Received %s jobs, now waiting for events', 194 | received_jobs) 195 | self._broker.wait_for_event() 196 | 197 | logger.debug('Arbiter terminated') 198 | 199 | def start_workers(self, number: int = DEFAULT_WORKER_NUMBER, 200 | queue: str = DEFAULT_QUEUE, block: bool = True, 201 | stop_when_queue_empty=False, 202 | workers_class: Type[BaseWorkers] = ThreadWorkers): 203 | """Start the worker threads. 204 | 205 | :arg number: number of workers to launch, each job running uses one 206 | worker. 207 | :arg queue: name of the queue to consume, see :doc:`queues`. 208 | :arg block: whether to block the calling thread until a signal arrives 209 | and workers get terminated. 210 | :arg stop_when_queue_empty: automatically stop the workers when the 211 | queue is empty. Useful mostly for one-off scripts and testing. 212 | :arg worker_class: Class to change the behavior of workers, 213 | defaults to threaded workers 214 | """ 215 | if self._arbiter or self._workers: 216 | raise RuntimeError('Workers are already running') 217 | 218 | self._working_queue = queue 219 | 220 | tasks_names = '\n'.join( 221 | [' - ' + task.name for task in self._tasks.tasks.values() 222 | if task.queue == self._working_queue] 223 | ) 224 | logger.info('Starting %d workers on queue "%s" with tasks:\n%s', 225 | number, self._working_queue, tasks_names) 226 | 227 | # Start the broker 228 | self._broker.start() 229 | 230 | # Start workers 231 | self._workers = workers_class( 232 | num_workers=number, 233 | namespace=self.namespace, 234 | ) 235 | 236 | # Start the result notifier 237 | self._result_notifier = threading.Thread( 238 | target=run_forever, 239 | args=(self._result_notifier_func, self._must_stop, logger), 240 | name='{}-result-notifier'.format(self.namespace) 241 | ) 242 | self._result_notifier.start() 243 | 244 | # Start the arbiter 245 | self._arbiter = threading.Thread( 246 | target=run_forever, 247 | args=(self._arbiter_func, self._must_stop, logger, 248 | stop_when_queue_empty), 249 | name='{}-arbiter'.format(self.namespace) 250 | ) 251 | self._arbiter.start() 252 | 253 | if block: 254 | with handle_sigterm(): 255 | try: 256 | self._arbiter.join() 257 | except KeyboardInterrupt: 258 | self.stop_workers() 259 | except AttributeError: 260 | # Arbiter thread starts and stops immediately when ran with 261 | # `stop_when_queue_empty` and queue is already empty. 262 | pass 263 | 264 | def stop_workers(self, _join_arbiter=True): 265 | """Stop the workers and wait for them to terminate.""" 266 | # _join_arbiter is used internally when the arbiter is shutting down 267 | # the full engine itself. This is because the arbiter thread cannot 268 | # join itself. 269 | self._must_stop.set() 270 | self._workers.stop() 271 | self._result_notifier.join() 272 | self._broker.stop() 273 | if _join_arbiter: 274 | self._arbiter.join() 275 | self._reset() 276 | 277 | def _result_notifier_func(self): 278 | logger.debug('Result notifier started') 279 | 280 | while True: 281 | job = self._workers.out_queue.get() 282 | if job is self._workers.poison_pill: 283 | break 284 | 285 | if job.status in (JobStatus.SUCCEEDED, JobStatus.FAILED): 286 | self._broker.remove_job_from_running(job) 287 | elif job.status is JobStatus.NOT_SET: 288 | self._broker.enqueue_jobs([job], from_failure=True) 289 | else: 290 | raise RuntimeError('Received job with an incorrect status') 291 | 292 | logger.debug('Result notifier terminated') 293 | 294 | def _register_periodic_tasks(self): 295 | periodic_tasks = [task for task in self._tasks.tasks.values() 296 | if task.periodicity] 297 | self._broker.register_periodic_tasks(periodic_tasks) 298 | -------------------------------------------------------------------------------- /spinach/task.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone, timedelta 2 | import functools 3 | import json 4 | from typing import Iterable, Optional, Callable, List, TYPE_CHECKING, Union 5 | from numbers import Number 6 | 7 | from . import const, exc 8 | 9 | if TYPE_CHECKING: 10 | from .job import Job 11 | 12 | 13 | class Task: 14 | 15 | __slots__ = [ 16 | 'func', 'name', 'queue', 'max_retries', 'periodicity', 17 | 'max_concurrency', 18 | ] 19 | 20 | def __init__(self, func: Callable, name: str, queue: str, 21 | max_retries: Number, periodicity: Optional[timedelta], 22 | max_concurrency: Optional[int]=None): 23 | self.func = func 24 | self.name = name 25 | self.queue = queue 26 | self.max_retries = max_retries 27 | self.periodicity = periodicity 28 | self.max_concurrency = max_concurrency 29 | 30 | # Prevent initialisation with max_concurrency set and 31 | # max_retries not set. 32 | if max_concurrency is not None: 33 | if max_retries is None or max_retries == 0: 34 | raise ValueError( 35 | "max_retries must be set if max_concurrency is set" 36 | ) 37 | if max_concurrency < 1: 38 | raise ValueError("max_concurrency must be greater than zero") 39 | 40 | def serialize(self): 41 | periodicity = (int(self.periodicity.total_seconds()) 42 | if self.periodicity else None) 43 | return json.dumps({ 44 | 'name': self.name, 45 | 'queue': self.queue, 46 | 'max_retries': self.max_retries, 47 | 'periodicity': periodicity, 48 | 'max_concurrency': self.max_concurrency or -1, 49 | }, sort_keys=True) 50 | 51 | @property 52 | def task_name(self): 53 | return self.name 54 | 55 | def __repr__(self): 56 | return 'Task({}, {}, {}, {}, {}, {})'.format( 57 | self.func, self.name, self.queue, self.max_retries, 58 | self.periodicity, self.max_concurrency, 59 | ) 60 | 61 | def __eq__(self, other): 62 | for attr in self.__slots__: 63 | try: 64 | if not getattr(self, attr) == getattr(other, attr): 65 | return False 66 | except AttributeError: 67 | return False 68 | return True 69 | 70 | 71 | Schedulable = Union[str, Callable, Task] 72 | 73 | 74 | class Tasks: 75 | """Registry for tasks to be used by Spinach. 76 | 77 | :arg queue: default queue for tasks 78 | :arg max_retries: default retry policy for tasks 79 | :arg periodicity: for periodic tasks, delay between executions as a 80 | timedelta 81 | :arg max_concurrency: maximum number of simultaneous Jobs that can be 82 | started for this Task. Requires max_retries to be also set. 83 | """ 84 | # This class is not thread-safe because it doesn't need to be used 85 | # concurrently. 86 | 87 | def __init__(self, queue: Optional[str]=None, 88 | max_retries: Optional[Number]=None, 89 | periodicity: Optional[timedelta]=None, 90 | max_concurrency: Optional[int]=None): 91 | self._tasks = {} 92 | self.queue = queue 93 | self.max_retries = max_retries 94 | self.periodicity = periodicity 95 | self.max_concurrency = max_concurrency 96 | self._spin = None 97 | 98 | def update(self, tasks: 'Tasks'): 99 | self._tasks.update(tasks.tasks) 100 | 101 | @property 102 | def names(self) -> List[str]: 103 | return list(self._tasks.keys()) 104 | 105 | @property 106 | def tasks(self) -> dict: 107 | return self._tasks 108 | 109 | def get(self, name: Schedulable) -> Task: 110 | try: 111 | task_name = name.task_name 112 | except AttributeError: 113 | task_name = name 114 | task = self._tasks.get(task_name) 115 | if task is not None: 116 | return task 117 | 118 | raise exc.UnknownTask( 119 | 'Unknown task "{}", known tasks: {}'.format(name, self.names) 120 | ) 121 | 122 | def task(self, func: Optional[Callable]=None, name: Optional[str]=None, 123 | queue: Optional[str]=None, max_retries: Optional[Number]=None, 124 | periodicity: Optional[timedelta]=None, 125 | max_concurrency: Optional[int]=None): 126 | """Decorator to register a task function. 127 | 128 | :arg name: name of the task, used later to schedule jobs 129 | :arg queue: queue of the task, the default is used if not provided 130 | :arg max_retries: maximum number of retries, the default is used if 131 | not provided 132 | :arg periodicity: for periodic tasks, delay between executions as a 133 | timedelta 134 | :arg max_concurrency: maximum number of simultaneous Jobs that can be 135 | started for this Task. Requires max_retries to be also set. 136 | 137 | >>> tasks = Tasks() 138 | >>> @tasks.task(name='foo') 139 | >>> def foo(): 140 | ... pass 141 | """ 142 | if func is None: 143 | return functools.partial(self.task, name=name, queue=queue, 144 | max_retries=max_retries, 145 | periodicity=periodicity, 146 | max_concurrency=max_concurrency) 147 | 148 | self.add(func, name=name, queue=queue, max_retries=max_retries, 149 | periodicity=periodicity, max_concurrency=max_concurrency) 150 | 151 | # Add an attribute to the function to be able to conveniently use it as 152 | # spin.schedule(function) instead of spin.schedule('task_name') 153 | func.task_name = name 154 | 155 | return func 156 | 157 | def add(self, func: Callable, name: Optional[str]=None, 158 | queue: Optional[str]=None, max_retries: Optional[Number]=None, 159 | periodicity: Optional[timedelta]=None, 160 | max_concurrency: Optional[int]=None): 161 | """Register a task function. 162 | 163 | :arg func: a callable to be executed 164 | :arg name: name of the task, used later to schedule jobs 165 | :arg queue: queue of the task, the default is used if not provided 166 | :arg max_retries: maximum number of retries, the default is used if 167 | not provided 168 | :arg periodicity: for periodic tasks, delay between executions as a 169 | timedelta 170 | :arg max_concurrency: maximum number of simultaneous Jobs that can be 171 | started for this Task. Requires max_retries to be also set. 172 | 173 | >>> tasks = Tasks() 174 | >>> tasks.add(lambda x: x, name='do_nothing') 175 | """ 176 | if not name: 177 | raise ValueError('Each Spinach task needs a name') 178 | if name in self._tasks: 179 | raise ValueError('A task named {} already exists'.format(name)) 180 | 181 | if queue is None: 182 | if self.queue: 183 | queue = self.queue 184 | else: 185 | queue = const.DEFAULT_QUEUE 186 | 187 | if max_retries is None: 188 | if self.max_retries: 189 | max_retries = self.max_retries 190 | else: 191 | max_retries = const.DEFAULT_MAX_RETRIES 192 | 193 | if periodicity is None: 194 | periodicity = self.periodicity 195 | if max_concurrency is None: 196 | max_concurrency = self.max_concurrency 197 | 198 | if queue and queue.startswith('_'): 199 | raise ValueError('Queues starting with "_" are reserved by ' 200 | 'Spinach for internal use') 201 | 202 | self._tasks[name] = Task( 203 | func, name, queue, max_retries, periodicity, max_concurrency 204 | ) 205 | 206 | def _require_attached_tasks(self): 207 | if self._spin is None: 208 | raise RuntimeError( 209 | 'Cannot execute tasks until the tasks have been attached to ' 210 | 'a Spinach Engine.' 211 | ) 212 | 213 | def schedule(self, task: Schedulable, *args, **kwargs) -> "Job": 214 | """Schedule a job to be executed as soon as possible. 215 | 216 | :arg task: the task or its name to execute in the background 217 | :arg args: args to be passed to the task function 218 | :arg kwargs: kwargs to be passed to the task function 219 | 220 | :return: The Job that was created and scheduled. 221 | 222 | This method can only be used once tasks have been attached to a 223 | Spinach :class:`Engine`. 224 | """ 225 | self._require_attached_tasks() 226 | return self._spin.schedule(task, *args, **kwargs) 227 | 228 | def schedule_at( 229 | self, task: Schedulable, at: datetime, *args, **kwargs 230 | ) -> "Job": 231 | """Schedule a job to be executed in the future. 232 | 233 | :arg task: the task or its name to execute in the background 234 | :arg at: Date at which the job should start. It is advised to pass a 235 | timezone aware datetime to lift any ambiguity. However if a 236 | timezone naive datetime if given, it will be assumed to 237 | contain UTC time. 238 | :arg args: args to be passed to the task function 239 | :arg kwargs: kwargs to be passed to the task function 240 | 241 | :return: The Job that was created and scheduled. 242 | 243 | This method can only be used once tasks have been attached to a 244 | Spinach :class:`Engine`. 245 | """ 246 | self._require_attached_tasks() 247 | return self._spin.schedule_at(task, at, *args, **kwargs) 248 | 249 | def schedule_batch(self, batch: 'Batch') -> Iterable["Job"]: 250 | """Schedule many jobs at once. 251 | 252 | Scheduling jobs in batches allows to enqueue them fast by avoiding 253 | round-trips to the broker. 254 | 255 | :arg batch: :class:`Batch` instance containing jobs to schedule 256 | 257 | :return: The Jobs that were created and scheduled. 258 | """ 259 | self._require_attached_tasks() 260 | return self._spin.schedule_batch(batch) 261 | 262 | 263 | class Batch: 264 | """Container allowing to schedule many jobs at once. 265 | 266 | Batching the scheduling of jobs allows to avoid doing many round-trips 267 | to the broker, reducing the overhead and the chance of errors associated 268 | with doing network calls. 269 | 270 | In this example 100 jobs are sent to Redis in one call: 271 | 272 | >>> batch = Batch() 273 | >>> for i in range(100): 274 | ... batch.schedule('compute', i) 275 | ... 276 | >>> spin.schedule_batch(batch) 277 | 278 | Once the :class:`Batch` is passed to the :class:`Engine` it should be 279 | disposed off and not be reused. 280 | """ 281 | 282 | def __init__(self): 283 | self.jobs_to_create = list() 284 | 285 | def schedule(self, task: Schedulable, *args, **kwargs): 286 | """Add a job to be executed ASAP to the batch. 287 | 288 | :arg task: the task or its name to execute in the background 289 | :arg args: args to be passed to the task function 290 | :arg kwargs: kwargs to be passed to the task function 291 | """ 292 | at = datetime.now(timezone.utc) 293 | self.schedule_at(task, at, *args, **kwargs) 294 | 295 | def schedule_at(self, task: Schedulable, at: datetime, *args, **kwargs): 296 | """Add a job to be executed in the future to the batch. 297 | 298 | :arg task: the task or its name to execute in the background 299 | :arg at: Date at which the job should start. It is advised to pass a 300 | timezone aware datetime to lift any ambiguity. However if a 301 | timezone naive datetime if given, it will be assumed to 302 | contain UTC time. 303 | :arg args: args to be passed to the task function 304 | :arg kwargs: kwargs to be passed to the task function 305 | """ 306 | self.jobs_to_create.append((task, at, args, kwargs)) 307 | 308 | 309 | class RetryException(Exception): 310 | """Exception raised in a task to indicate that the job should be retried. 311 | 312 | Even if this exception is raised, the `max_retries` defined in the task 313 | still applies. 314 | 315 | :arg at: Optional date at which the job should be retried. If it is not 316 | given the job will be retried after a randomized exponential backoff. 317 | It is advised to pass a timezone aware datetime to lift any 318 | ambiguity. However if a timezone naive datetime if given, it will 319 | be assumed to contain UTC time. 320 | """ 321 | 322 | def __init__(self, message, at: Optional[datetime]=None): 323 | super().__init__(message) 324 | self.at = at 325 | 326 | 327 | class AbortException(Exception): 328 | """Exception raised in a task to indicate that the job should NOT be 329 | retried. 330 | 331 | If this exception is raised, all retry attempts are stopped immediately. 332 | """ 333 | --------------------------------------------------------------------------------