├── tests ├── __init__.py ├── example │ ├── __init__.py │ ├── jobs │ │ ├── __init__.py │ │ ├── periodic.py │ │ └── standard.py │ ├── manage.py │ ├── executable.py │ └── randomtext.txt ├── config.py ├── test_json.py ├── test_msgpack.py ├── test_close.py ├── test_scheduler.py ├── test_api.py ├── test_connection.py └── app.py ├── pq ├── backends │ ├── __init__.py │ └── redis.py ├── server │ ├── __init__.py │ ├── rpc.py │ ├── consumer.py │ ├── apps.py │ ├── config.py │ └── producer.py ├── tasks │ ├── __init__.py │ ├── concurrency.py │ ├── states.py │ ├── rpc.py │ ├── task.py │ ├── scheduler.py │ ├── executor.py │ ├── consumer.py │ └── models.py ├── utils │ ├── __init__.py │ ├── exc.py │ ├── concurrency.py │ ├── time.py │ ├── version.py │ └── serializers.py ├── __init__.py ├── api.py ├── jobs.py ├── consumer.py ├── cpubound.py └── mq.py ├── requirements ├── hard.txt ├── dev.txt └── soft.txt ├── docs ├── artwork │ └── pulsar-queue.sketch └── history │ ├── 0.1.md │ ├── 0.2.md │ ├── 0.3.md │ ├── 0.4.md │ └── 0.5.md ├── MANIFEST.in ├── ci ├── tag.sh └── install.sh ├── setup.cfg ├── .coveragerc ├── .gitignore ├── Makefile ├── appveyor.yml ├── LICENSE ├── setup.py ├── .circleci └── config.yml └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pq/backends/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pq/server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pq/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pq/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/example/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/example/jobs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements/hard.txt: -------------------------------------------------------------------------------- 1 | pulsar 2 | greenlet 3 | -------------------------------------------------------------------------------- /requirements/dev.txt: -------------------------------------------------------------------------------- 1 | coverage 2 | codecov 3 | flake8 4 | -------------------------------------------------------------------------------- /requirements/soft.txt: -------------------------------------------------------------------------------- 1 | uvloop 2 | httptools 3 | msgpack-python 4 | -------------------------------------------------------------------------------- /docs/artwork/pulsar-queue.sketch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quantmind/pulsar-queue/HEAD/docs/artwork/pulsar-queue.sketch -------------------------------------------------------------------------------- /tests/config.py: -------------------------------------------------------------------------------- 1 | task_paths = ['tests.example.jobs.*', 'pq.jobs'] 2 | data_store = 'redis://127.0.0.1:6379/7?namespace=pqtests' 3 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include LICENSE 3 | include README.rst 4 | graft pq 5 | graft tests 6 | graft requirements 7 | global-exclude *.pyc 8 | -------------------------------------------------------------------------------- /docs/history/0.1.md: -------------------------------------------------------------------------------- 1 | # Ver. 0.1.1 - 2015-Nov-26 2 | 3 | 4 | ## Internals 5 | 6 | * Run the ``CPUBOUND`` tasks in a child greenlet 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /pq/__init__.py: -------------------------------------------------------------------------------- 1 | """Asynchronous task queue""" 2 | from .utils.version import get_version 3 | 4 | VERSION = (0, 6, 0, 'final', 0) 5 | __version__ = get_version(VERSION, __file__) 6 | -------------------------------------------------------------------------------- /docs/history/0.2.md: -------------------------------------------------------------------------------- 1 | ## Ver. 0.2.0 - 2016-May-12 2 | 3 | * Works for python 3.5 and above 4 | * Increased test coverage 5 | * New test suite 6 | * Development status set to ``Alpha`` 7 | 8 | -------------------------------------------------------------------------------- /ci/tag.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | VERSION="$(python setup.py --version)" 4 | echo ${VERSION} 5 | 6 | git push 7 | git tag -am "Release $VERSION [ci skip]" ${VERSION} 8 | git push --tags 9 | -------------------------------------------------------------------------------- /tests/example/manage.py: -------------------------------------------------------------------------------- 1 | task_paths = ['jobs.*', 'pq.jobs'] 2 | 3 | 4 | if __name__ == '__main__': # pragma nocover 5 | from pq.api import PulsarQueue 6 | PulsarQueue(name='taskqueue', config=__file__).start() 7 | -------------------------------------------------------------------------------- /tests/test_json.py: -------------------------------------------------------------------------------- 1 | """Tests task execution with JSON serialiser""" 2 | import unittest 3 | 4 | from tests import app 5 | 6 | 7 | class TestJsonQueue(app.TaskQueueApp, unittest.TestCase): 8 | message_serializer = 'json' 9 | -------------------------------------------------------------------------------- /ci/install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | pip install --upgrade pip wheel 4 | pip install --upgrade setuptools 5 | pip install -r requirements/hard.txt 6 | pip install -r requirements/soft.txt 7 | pip install -r requirements/dev.txt 8 | -------------------------------------------------------------------------------- /pq/utils/exc.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def string_exception(exc): 4 | args = [] 5 | for arg in exc.args: 6 | if isinstance(arg, str): 7 | args.append(arg) 8 | if args: 9 | return args[0] if len(args) == 1 else args 10 | return str(exc) 11 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | release = clean sdist bdist_wheel upload 3 | test = pulsar_test 4 | 5 | [flake8] 6 | exclude = __pycache__,.git,.eggs,build,dist,docs,venv 7 | 8 | [metadata] 9 | license-file = LICENSE 10 | 11 | [pulsar_test] 12 | test_modules = tests 13 | test_timeout = 120 14 | -------------------------------------------------------------------------------- /tests/example/executable.py: -------------------------------------------------------------------------------- 1 | """An example of a python script which can be executed by the task queue 2 | """ 3 | import sys 4 | 5 | 6 | def execute(): 7 | """Simply write the python executable 8 | """ 9 | sys.stdout.write(sys.executable) 10 | 11 | 12 | if __name__ == '__main__': 13 | execute() 14 | -------------------------------------------------------------------------------- /docs/history/0.3.md: -------------------------------------------------------------------------------- 1 | ## Ver. 0.3.1 - 2016-Jul-09 2 | 3 | * Increased code coverage 4 | * Fixed RPC server 5 | * Increased documentation 6 | * Bug and API fixes 7 | 8 | 9 | ## Ver. 0.3.0 - 2016-Jul-01 10 | 11 | * Major code refactoring with introduction of submodules 12 | * Task queue Backend has Pubsub and Message queue handlers -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | concurrency = greenlet,multiprocessing 3 | source = pq 4 | omit = 5 | pq/__init__.py 6 | pq/cpubound.py 7 | pq/utils/version.py 8 | 9 | 10 | [report] 11 | # Regexes for lines to exclude from consideration 12 | exclude_lines = 13 | (?i)# *pragma[: ]*no *cover 14 | raise NotImplementedError 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.log 3 | *.rdb 4 | 5 | # IDE 6 | .project 7 | .pydevproject 8 | .idea 9 | 10 | #coverage 11 | .coveralls-repo-token 12 | .coverage 13 | htmlcov 14 | 15 | #python 16 | .python-version 17 | *.egg-info 18 | *.pyc 19 | __pycache__ 20 | *.egg-info 21 | .eggs 22 | build 23 | _build 24 | dist 25 | venv 26 | release-notes.md 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean test coverage 2 | 3 | 4 | PYTHON ?= python 5 | PIP ?= pip 6 | 7 | clean: 8 | rm -fr dist/ *.egg-info *.eggs .eggs build/ 9 | find . -name '__pycache__' | xargs rm -rf 10 | 11 | test: 12 | flake8 13 | $(PYTHON) -W ignore setup.py test -q --sequential 14 | 15 | coverage: 16 | $(PYTHON) -W ignore setup.py test --coverage -q 17 | -------------------------------------------------------------------------------- /tests/test_msgpack.py: -------------------------------------------------------------------------------- 1 | """Tests task execution with MsgPack serialiser""" 2 | import unittest 3 | 4 | try: 5 | import msgpack 6 | except ImportError: 7 | msgpack = None 8 | 9 | 10 | from tests import app 11 | 12 | 13 | @unittest.skipUnless(msgpack, "Requires msgpack library") 14 | class TestMsgPackQueue(app.TaskQueueApp, unittest.TestCase): 15 | message_serializer = 'msgpack' 16 | -------------------------------------------------------------------------------- /pq/tasks/concurrency.py: -------------------------------------------------------------------------------- 1 | import math 2 | from multiprocessing import cpu_count 3 | 4 | 5 | MULTIPLIER_NAME = 'max_concurrent_task_multiplier' 6 | 7 | 8 | def linear(cfg): 9 | multiplier = cfg.get(MULTIPLIER_NAME, 2) 10 | return multiplier*cpu_count() 11 | 12 | 13 | def log(cfg): 14 | multiplier = cfg.get(MULTIPLIER_NAME, 5) 15 | return 1 + round(multiplier*math.log(cpu_count())) 16 | -------------------------------------------------------------------------------- /pq/utils/concurrency.py: -------------------------------------------------------------------------------- 1 | from pulsar.utils.structures import inverse_mapping 2 | 3 | 4 | ASYNC_IO = 1 # run in the worker event loop 5 | THREAD_IO = 3 # run in the event loop executor 6 | CPUBOUND = 4 # run in a subprocess 7 | 8 | 9 | concurrency = {'asyncio': ASYNC_IO, 10 | 'thread': THREAD_IO, 11 | 'process': CPUBOUND} 12 | 13 | concurrency_name = dict(inverse_mapping(concurrency)) 14 | -------------------------------------------------------------------------------- /pq/utils/time.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | 3 | 4 | def timedelta_seconds(delta): 5 | return max(delta.total_seconds(), 0) 6 | 7 | 8 | def get_time(expiry, start): 9 | if isinstance(expiry, timedelta): 10 | return (start + 86400 * expiry.days + expiry.seconds + 11 | 0.000001 * expiry.microseconds) 12 | else: 13 | return start + expiry 14 | 15 | 16 | def format_time(dt): 17 | dt = timestamp_to_datetime(dt) 18 | return dt.isoformat() if dt else '?' 19 | 20 | 21 | def timestamp_to_datetime(timestamp): 22 | if isinstance(timestamp, (float, int)): 23 | timestamp = datetime.fromtimestamp(timestamp) 24 | return timestamp 25 | -------------------------------------------------------------------------------- /docs/history/0.4.md: -------------------------------------------------------------------------------- 1 | ## Ver. 0.4.0 - 2016-Aug-04 2 | 3 | Several backward incompatible changes. This version is not compatible with the 0.3 series 4 | and brings several bug fixes, enhancements and almost 100% test coverage. 5 | 6 | * Added ``TaskManager`` for better customise the task queue application 7 | * Added message serialisers - ``json`` and ``msgpack`` 8 | * Consumers can run on a single process [b392f82](https://github.com/quantmind/pulsar-queue/commit/b392f826a2544032a7775bfcfceb4ce2d89fd3ba) 9 | * Allow to queue tasks with a delay [3ed7d7a](https://github.com/quantmind/pulsar-queue/commit/3ed7d7a4795bea0a8071201e5506ac4b6d1089ed) 10 | * Handle drop connections from pubsub and message queue broker 11 | * Enhanced documentation 12 | 13 | -------------------------------------------------------------------------------- /tests/example/jobs/periodic.py: -------------------------------------------------------------------------------- 1 | import time 2 | from datetime import timedelta 3 | 4 | from pq import api 5 | 6 | 7 | class TestPeriodicJob(api.PeriodicJob): 8 | abstract = True 9 | run_every = timedelta(hours=1) 10 | 11 | 12 | @api.job(run_every=timedelta(seconds=1)) 13 | def testperiodic(self): 14 | assert self.cfg == self.backend.cfg 15 | assert self.wait 16 | return time.time() 17 | 18 | 19 | class TestPeriodicError(TestPeriodicJob): 20 | run_every = timedelta(seconds=60) 21 | 22 | def __call__(self, msg=None): 23 | raise Exception(msg or 'kaputt') 24 | 25 | 26 | class AnchoredEveryHour(TestPeriodicJob): 27 | anchor = api.anchorDate(minute=25) 28 | 29 | def __call__(self): # pragma nocover 30 | pass 31 | -------------------------------------------------------------------------------- /pq/server/rpc.py: -------------------------------------------------------------------------------- 1 | from pulsar.api import send 2 | from pulsar.apps import rpc 3 | 4 | 5 | class TaskQueueRpc(rpc.JSONRPC): 6 | '''A :class:`.JSONRPC` mixin for communicating with a :class:`.TaskQueue`. 7 | 8 | To use it, you need to have an :ref:`RPC application ` 9 | and a :ref:`task queue ` application installed in the 10 | :class:`.Arbiter`. 11 | 12 | :parameter taskqueue: instance or name of the :class:`.TaskQueue` 13 | application which exposes the remote procedure calls. 14 | 15 | ''' 16 | def __init__(self, api, **kwargs): 17 | self._api_ = (api,) 18 | super().__init__(**kwargs) 19 | 20 | async def rq(self, request, func, *args, **kw): 21 | api = await self.api() 22 | result = await send(api.cfg.name, 'run', func, *args, **kw) 23 | return result 24 | 25 | async def api(self): 26 | if isinstance(self._api_, tuple): 27 | self._api_ = await self._api_[0].start() 28 | return self._api_ 29 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | PYPI_PASSWD: 3 | secure: w16EmxgfwQdS1FLB/PCWQA== 4 | 5 | matrix: 6 | - PYTHON: "C:\\Python35" 7 | - PYTHON: "C:\\Python35-x64" 8 | - PYTHON: "C:\\Python36" 9 | - PYTHON: "C:\\Python36-x64" 10 | 11 | branches: 12 | only: 13 | - master 14 | - release 15 | 16 | init: 17 | - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%" 18 | 19 | install: 20 | - "%WITH_COMPILER% %PYTHON%/python -V" 21 | - cmd: nuget install redis-64 -excludeversion 22 | - cmd: redis-64\tools\redis-server.exe --service-install 23 | - cmd: redis-64\tools\redis-server.exe --service-start 24 | 25 | build: off 26 | 27 | test_script: 28 | - "%WITH_COMPILER% %PYTHON%/python setup.py test -q --io proactor --sequential" 29 | - "%WITH_COMPILER% %PYTHON%/python setup.py sdist" 30 | 31 | after_test: 32 | - ps: >- 33 | if($env:appveyor_repo_branch -eq 'release') { 34 | Invoke-Expression "$env:PYTHON\\python.exe -m twine upload dist/* --username lsbardel --password $env:PYPI_PASSWD" 35 | } 36 | -------------------------------------------------------------------------------- /docs/history/0.5.md: -------------------------------------------------------------------------------- 1 | ## Ver. 0.5.2 - 2016-Nov-28 2 | 3 | Obfuscate consumer urls 4 | 5 | 6 | ## Ver. 0.5.1 - 2016-Nov-25 7 | 8 | Several internal changes and bug fixes 9 | 10 | * Dropped ``pubsub`` in favour of pulsar ``channels`` 11 | * Redis backend uses namespace for both channels and queues 12 | * Better serialisation support for messages 13 | * Added ``register_broker`` function to api 14 | * Added ``queue_message`` function to api 15 | 16 | 17 | ## Ver. 0.5.0 - 2016-Oct-11 18 | 19 | Backward incompatible release with a considerable amount of internal and API refactoring 20 | 21 | * Ability to add additional consumers to the queue server (see [pulsar-twitter](https://github.com/quantmind/pulsar-twitter) for example) 22 | * Each consumer is an attribute of the server queue api and backend objects 23 | * Renamed ``TaskFuture`` to ``MessageFuture`` 24 | * Handle ``max_retries`` with ``retry_delay`` 25 | * ``TaskError`` can be used in order to avoid a full stack trace dump. It means it is a TaskError handled by the Job callable. 26 | * Added ``timeout`` to task execution 27 | * Max concurrent tasks is a linear function of number of cores and ``concurrent_tasks`` parameter -------------------------------------------------------------------------------- /pq/api.py: -------------------------------------------------------------------------------- 1 | from .utils.serializers import Message, MessageDict, queue_message 2 | from .utils.concurrency import ASYNC_IO, THREAD_IO, CPUBOUND 3 | from .mq import MessageFuture, MQ, Manager, register_broker 4 | from .consumer import ConsumerAPI 5 | 6 | from .server.apps import QueueApp, PulsarQueue 7 | from .server.config import DEFAULT_MQ_BACKEND 8 | 9 | from .tasks.consumer import Tasks 10 | from .tasks.task import TaskError, TaskNotAvailable, TaskTimeout, Task 11 | from .tasks.states import StatusType, status_string 12 | from .tasks.models import job, Job, PeriodicJob, anchorDate 13 | 14 | 15 | __all__ = [ 16 | 'QueueApp', 17 | 'PulsarQueue', 18 | # 19 | 'Message', 20 | 'queue_message', 21 | 'MessageDict', 22 | 'MessageFuture', 23 | 'ConsumerAPI', 24 | 'Manager', 25 | 'MQ', 26 | 'register_broker', 27 | # 28 | 'Tasks', 29 | 'TaskError', 30 | 'TaskNotAvailable', 31 | 'TaskTimeout', 32 | 'Task', 33 | 'StatusType', 34 | 'status_string', 35 | 'job', 36 | 'Job', 37 | 'PeriodicJob', 38 | 'anchorDate', 39 | # 40 | 'ASYNC_IO', 41 | 'THREAD_IO', 42 | 'CPUBOUND', 43 | 'DEFAULT_MQ_BACKEND' 44 | ] 45 | -------------------------------------------------------------------------------- /pq/tasks/states.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class StatusType(Enum): 5 | SUCCESS = 1 6 | FAILURE = 2 7 | REVOKED = 3 8 | RETRY = 4 9 | STARTED = 5 10 | QUEUED = 6 11 | 12 | 13 | # TODO remove these 6 lines after rest of pq code has been udpated to use enum 14 | SUCCESS = StatusType.SUCCESS.value 15 | FAILURE = StatusType.FAILURE.value 16 | REVOKED = StatusType.REVOKED.value 17 | RETRY = StatusType.RETRY.value 18 | STARTED = StatusType.STARTED.value 19 | QUEUED = StatusType.QUEUED.value 20 | 21 | FULL_RUN_STATES = frozenset([ 22 | StatusType.SUCCESS.value, 23 | StatusType.FAILURE.value 24 | ]) 25 | READY_STATES = frozenset([ 26 | StatusType.SUCCESS.value, 27 | StatusType.FAILURE.value, 28 | StatusType.REVOKED.value 29 | ]) 30 | EXCEPTION_STATES = frozenset([ 31 | StatusType.FAILURE.value, 32 | StatusType.REVOKED.value 33 | ]) 34 | UNREADY_STATES = frozenset([ 35 | StatusType.QUEUED.value, 36 | StatusType.STARTED.value, 37 | StatusType.RETRY.value 38 | ]) 39 | 40 | 41 | def status_string(status): 42 | try: 43 | code = StatusType(status).name 44 | except ValueError: 45 | code = 'UNKNOWN' 46 | 47 | return code 48 | -------------------------------------------------------------------------------- /tests/test_close.py: -------------------------------------------------------------------------------- 1 | """Tests closing the worker after 10 requests""" 2 | import unittest 3 | import asyncio 4 | from random import random 5 | 6 | from tests import app 7 | 8 | 9 | class TestMsgPackQueue(app.TaskQueueBase, unittest.TestCase): 10 | max_requests = 10 11 | concurrent_tasks = 20 12 | 13 | async def test_max_requests(self): 14 | api = self.api 15 | tasks = [api.tasks.queue('asynchronous', lag=random()) 16 | for _ in range(18)] 17 | tasks = await asyncio.gather(*tasks) 18 | self.assertEqual(len(tasks), 18) 19 | workers = set() 20 | for task in tasks: 21 | self.assertEqual(task.status_string, 'SUCCESS') 22 | workers.add(task.worker) 23 | 24 | self.assertEqual(len(workers), 2) 25 | 26 | # FAILURES 27 | tasks = [api.tasks.queue('asynchronous', sleep=1) for _ in range(6)] 28 | 29 | tasks = await asyncio.gather(*tasks) 30 | self.assertEqual(len(tasks), 6) 31 | workers = set() 32 | for task in tasks: 33 | self.assertEqual(task.status_string, 'FAILURE') 34 | workers.add(task.worker) 35 | 36 | self.assertEqual(len(workers), 2) 37 | -------------------------------------------------------------------------------- /pq/jobs.py: -------------------------------------------------------------------------------- 1 | """Useful Job for the task queue. 2 | 3 | Include this file in the ``task_paths`` list if you need them 4 | """ 5 | import sys 6 | import os 7 | import tempfile 8 | 9 | from pq.api import job 10 | 11 | 12 | @job() 13 | async def execute_python(self, code=None): 14 | """Execute arbitrary python code on a subprocess. For example: 15 | 16 | tasks.queue_task('execute.python', code='print("Hello World!")') 17 | """ 18 | assert isinstance(code, str), "code must be a string" 19 | fp, path = tempfile.mkstemp(suffix='.py', text=True) 20 | try: 21 | with open(path, 'w') as fp: 22 | fp.write(code) 23 | 24 | command = '%s %s' % (sys.executable, path) 25 | result = await self.shell(command) 26 | finally: 27 | os.remove(path) 28 | return result 29 | 30 | 31 | @job() 32 | async def execute_python_script(self, script=None): 33 | """Execute arbitrary python code on a subprocess 34 | """ 35 | assert isinstance(script, str), "script must be a string" 36 | assert os.path.isfile(script), "script %s is not a file" % script 37 | command = '%s %s' % (sys.executable, script) 38 | result = await self.shell(command) 39 | return result 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015-2017, Quantmind 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of pulsar-queue nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /pq/backends/redis.py: -------------------------------------------------------------------------------- 1 | from .. import mq 2 | 3 | 4 | class MQ(mq.MQ): 5 | """Redis Message Broker 6 | """ 7 | def __init__(self, backend, store): 8 | super().__init__(backend, store) 9 | self._client = store.client() 10 | 11 | async def get_message(self, *queues): 12 | '''Asynchronously retrieve a :class:`Task` from queues 13 | 14 | :return: a :class:`.Task` or ``None``. 15 | ''' 16 | assert queues 17 | args = [self.prefixed(q) for q in queues] 18 | args.append(max(1, int(self.cfg.task_pool_timeout))) 19 | qt = await self._client.execute('brpop', *args) 20 | if qt: 21 | _, message = qt 22 | return self.decode(message) 23 | 24 | async def flush_queues(self, *queues): 25 | '''Clear a list of task queues 26 | ''' 27 | pipe = self._client.pipeline() 28 | for queue in queues: 29 | pipe.execute('del', self.prefixed(queue)) 30 | await pipe.commit() 31 | 32 | async def queue_message(self, queue, message): 33 | '''Asynchronously queue a task 34 | ''' 35 | await self._client.lpush(self.prefixed(queue), message) 36 | 37 | async def size(self, *queues): 38 | pipe = self._client.pipeline() 39 | for queue in queues: 40 | pipe.execute('llen', self.prefixed(queue)) 41 | sizes = await pipe.commit() 42 | return sizes 43 | 44 | async def incr(self, name): 45 | concurrent = await self._client.incr(self.prefixed(name)) 46 | return concurrent 47 | 48 | async def decr(self, name): 49 | concurrent = await self._client.decr(self.prefixed(name)) 50 | return concurrent 51 | -------------------------------------------------------------------------------- /pq/utils/version.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import subprocess 4 | 5 | symbol = {'alpha': 'a', 'beta': 'b'} 6 | 7 | 8 | def get_version(version, filename=None): 9 | assert len(version) == 5 10 | assert version[3] in ('alpha', 'beta', 'rc', 'final') 11 | main = '.'.join(map(str, version[:3])) 12 | sub = '' 13 | if version[3] == 'alpha' and version[4] == 0: 14 | git_changeset = get_git_changeset(filename) 15 | if git_changeset: 16 | sub = '.dev%s' % git_changeset 17 | if version[3] != 'final' and not sub: 18 | sub = '%s%s' % (symbol.get(version[3], version[3]), version[4]) 19 | return main + sub 20 | 21 | 22 | def sh(command, cwd=None): 23 | return subprocess.Popen(command, 24 | stdout=subprocess.PIPE, 25 | stderr=subprocess.PIPE, 26 | shell=True, 27 | cwd=cwd, 28 | universal_newlines=True).communicate()[0] 29 | 30 | 31 | def get_git_changeset(filename=None): 32 | """Returns a numeric identifier of the latest git changeset. 33 | 34 | The result is the UTC timestamp of the changeset in YYYYMMDDHHMMSS format. 35 | This value isn't guaranteed to be unique, but collisions are very unlikely, 36 | so it's sufficient for generating the development version numbers. 37 | """ 38 | dirname = os.path.dirname(filename or __file__) 39 | git_show = sh('git show --pretty=format:%ct --quiet HEAD', 40 | cwd=dirname) 41 | timestamp = git_show.partition('\n')[0] 42 | try: 43 | timestamp = datetime.datetime.utcfromtimestamp(int(timestamp)) 44 | except ValueError: 45 | return None 46 | return timestamp.strftime('%Y%m%d%H%M%S') 47 | -------------------------------------------------------------------------------- /tests/test_scheduler.py: -------------------------------------------------------------------------------- 1 | """Tests task scheduling""" 2 | import time 3 | import asyncio 4 | import unittest 5 | from functools import partial 6 | 7 | from tests import app 8 | 9 | 10 | class TestScheduler(app.TaskQueueBase, unittest.TestCase): 11 | schedule_periodic = True 12 | 13 | def test_scheduler(self): 14 | scheduler = self.tq_app.backend 15 | self.assertEqual(scheduler.cfg.default_task_queue, '%s1' % self.name()) 16 | self.assertTrue(scheduler.tasks.next_run) 17 | 18 | def test_next_scheduled(self): 19 | scheduler = self.tq_app.backend 20 | entry, t = scheduler.tasks.next_scheduled() 21 | self.assertEqual(entry, 'testperiodic') 22 | 23 | def test_next_scheduled_entries(self): 24 | scheduler = self.tq_app.backend 25 | entry, t = scheduler.tasks.next_scheduled(['anchoredeveryhour']) 26 | self.assertEqual(entry, 'anchoredeveryhour') 27 | self.assertTrue(t > 0) 28 | 29 | async def test_periodic(self): 30 | scheduler = self.tq_app.backend 31 | future = asyncio.Future() 32 | cbk = partial(self._test_periodic, future) 33 | await scheduler.on_events('task', '*', cbk) 34 | try: 35 | result = await future 36 | self.assertTrue(result < time.time()) 37 | finally: 38 | await scheduler.remove_event_callback('task', '*', cbk) 39 | 40 | async def test_rpc_next_scheduled_tasks(self): 41 | next = await self.proxy.tasks.next_scheduled_tasks() 42 | self.assertTrue(isinstance(next, list)) 43 | self.assertEqual(len(next), 2) 44 | self.assertEqual(next[0], 'testperiodic') 45 | 46 | def _test_periodic(self, future, channel, event, task): 47 | try: 48 | self.assertEqual(task.name, 'testperiodic') 49 | if event != 'done': 50 | return 51 | except Exception as exc: 52 | future.set_exception(exc) 53 | else: 54 | future.set_result(task.result) 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import setup, find_packages 4 | 5 | import pq 6 | 7 | 8 | def read(name): 9 | filename = os.path.join(os.path.dirname(__file__), name) 10 | with open(filename) as fp: 11 | return fp.read() 12 | 13 | 14 | def requirements(name): 15 | install_requires = [] 16 | dependency_links = [] 17 | 18 | for line in read(name).split('\n'): 19 | if line.startswith('-e '): 20 | link = line[3:].strip() 21 | if link == '.': 22 | continue 23 | dependency_links.append(link) 24 | line = link.split('=')[1] 25 | line = line.strip() 26 | if line: 27 | install_requires.append(line) 28 | 29 | return install_requires, dependency_links 30 | 31 | 32 | meta = dict( 33 | version=pq.__version__, 34 | description=pq.__doc__, 35 | name='pulsar-queue', 36 | author='Luca Sbardella', 37 | author_email="luca@quantmind.com", 38 | maintainer_email="luca@quantmind.com", 39 | url="https://github.com/quantmind/pulsar-queue", 40 | license="BSD", 41 | long_description=read('README.rst'), 42 | packages=find_packages(exclude=['tests', 'tests.*']), 43 | include_package_data=True, 44 | zip_safe=False, 45 | setup_requires=['pulsar', 'wheel'], 46 | install_requires=requirements('requirements/hard.txt')[0], 47 | classifiers=[ 48 | 'Development Status :: 4 - Beta', 49 | 'Environment :: Web Environment', 50 | 'Intended Audience :: Developers', 51 | 'License :: OSI Approved :: BSD License', 52 | 'Operating System :: OS Independent', 53 | 'Programming Language :: Python', 54 | 'Programming Language :: Python :: 3', 55 | 'Programming Language :: Python :: 3.5', 56 | 'Programming Language :: Python :: 3.6', 57 | 'Topic :: Utilities' 58 | ] 59 | ) 60 | 61 | 62 | if __name__ == '__main__': 63 | try: 64 | from pulsar import cmds 65 | meta['cmdclass'] = dict(pypi=cmds.PyPi) 66 | except ImportError: 67 | pass 68 | setup(**meta) 69 | -------------------------------------------------------------------------------- /pq/server/consumer.py: -------------------------------------------------------------------------------- 1 | import time 2 | import asyncio 3 | from multiprocessing import cpu_count 4 | 5 | from pulsar.apps.data.channels import backoff 6 | 7 | from .producer import Producer, ConsumerMessage 8 | 9 | 10 | HEARTBEAT = 2 11 | 12 | 13 | class Consumer(Producer): 14 | """The consumer is used by the server side application 15 | """ 16 | def __repr__(self): 17 | return 'consumer <%s>' % self.broker 18 | 19 | @property 20 | def is_consumer(self): 21 | return True 22 | 23 | def tick(self, monitor): 24 | for consumer in self.consumers: 25 | consumer.tick() 26 | 27 | async def worker_tick(self, worker, next=None): 28 | pnext, next = next, HEARTBEAT 29 | try: 30 | info = dict(self.info()) 31 | info['consumer'] = worker.aid 32 | info['node'] = self.node_name 33 | info['pubsub'] = str(self.channels) 34 | info['cores'] = cpu_count() 35 | info['message-broker'] = str(self.broker.store) 36 | info['time'] = time.time() 37 | if self.cfg.debug: 38 | self.logger.debug('publishing worker %s info', worker) 39 | await self.publish('status', ConsumerMessage(info)) 40 | except ConnectionError: 41 | next = next if not pnext else backoff(pnext) 42 | self.logger.critical( 43 | 'Cannot publish consumer status: connection error.' 44 | ' Try in %s seconds', next 45 | ) 46 | finally: 47 | worker._loop.call_later(next, self.__tick, worker, next) 48 | 49 | async def start(self, worker, consume=True): 50 | self.logger.info('Start %s', self) 51 | await super().start() 52 | if consume: 53 | for consumer in self.consumers: 54 | consumer.start(worker) 55 | await self.worker_tick(worker) 56 | return self 57 | 58 | def __tick(self, worker, next): 59 | asyncio.ensure_future( 60 | self.worker_tick(worker, next), 61 | loop=worker._loop 62 | ) 63 | -------------------------------------------------------------------------------- /pq/consumer.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | from .mq import BaseComponent 4 | 5 | 6 | class ConsumerAPI(BaseComponent): 7 | 8 | def __init__(self, backend): 9 | super().__init__(backend) 10 | self.logger = getLogger('pulsar.%s' % self.name) 11 | self._closing_waiter = None 12 | 13 | def __str__(self): 14 | return self.name 15 | 16 | def __repr__(self): 17 | return '%s %s' % (self, self.backend) 18 | 19 | @property 20 | def name(self): 21 | return self.__class__.__name__.lower() 22 | 23 | @property 24 | def _loop(self): 25 | return self.backend._loop 26 | 27 | @property 28 | def manager(self): 29 | return self.backend.manager 30 | 31 | @property 32 | def broker(self): 33 | return self.backend.broker 34 | 35 | @property 36 | def channels(self): 37 | return self.backend.channels 38 | 39 | def start(self, worker): 40 | """Start this consumer""" 41 | 42 | def register(self): 43 | """Register this consumer with channels""" 44 | 45 | def tick(self): 46 | """Called periodically by the monitor and before closing 47 | by all workers. 48 | 49 | By default it checks for closing signal and it available do the close 50 | """ 51 | if self._closing_waiter: 52 | self.do_close() 53 | 54 | def info(self): 55 | pass 56 | 57 | def rpc(self): 58 | pass 59 | 60 | def execute(self, message): 61 | return message 62 | 63 | def closing(self): 64 | return self._closing_waiter is not None 65 | 66 | def close(self, msg=None): 67 | """Return a Future which should be called back once the consumer 68 | is closed""" 69 | if not self.closing(): 70 | self._closing_waiter = self._loop.create_future() 71 | if msg: 72 | self.logger.warning(msg) 73 | self.tick() 74 | return self._closing_waiter 75 | 76 | def do_close(self, msg=None): 77 | if not self._closing_waiter.done(): 78 | if msg: 79 | self.logger.warning(msg) 80 | self._closing_waiter.set_result(True) 81 | -------------------------------------------------------------------------------- /pq/utils/serializers.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import OrderedDict 3 | 4 | try: 5 | import msgpack 6 | except ImportError: # pragma nocover 7 | msgpack = None 8 | 9 | 10 | serializers = OrderedDict() 11 | message_types = {} 12 | 13 | 14 | def serializer(cls): 15 | name = cls.__name__.lower() 16 | serializers[name] = cls 17 | return cls 18 | 19 | 20 | class MessageMetaClass(type): 21 | 22 | def __new__(cls, name, bases, attrs): 23 | attrs['type'] = (attrs.get('type') or name).lower() 24 | c = super(MessageMetaClass, cls).__new__(cls, name, bases, attrs) 25 | message_types[c.type] = c 26 | return c 27 | 28 | 29 | class Message(metaclass=MessageMetaClass): 30 | id = None 31 | """Message ID - all messages should have one""" 32 | 33 | @classmethod 34 | def consumer(cls): 35 | pass 36 | 37 | def get(self, name): 38 | return self.__dict__.get(name) 39 | 40 | def tojson(self): 41 | '''A serializable dictionary 42 | ''' 43 | data = self.__dict__.copy() 44 | data['type'] = self.type 45 | return data 46 | 47 | 48 | class MessageDict(Message): 49 | 50 | def __init__(self, *args, **kwargs): 51 | self.__dict__.update(*args, **kwargs) 52 | 53 | 54 | @serializer 55 | class Json: 56 | 57 | @classmethod 58 | def decode(cls, data): 59 | if isinstance(data, bytes): 60 | data = data.decode('utf-8') 61 | return json.loads(data, object_hook=queue_message) 62 | 63 | @classmethod 64 | def encode(cls, message): 65 | return json.dumps(message, cls=JSONEncoder) 66 | 67 | 68 | if msgpack: 69 | 70 | @serializer 71 | class MsgPack: 72 | 73 | @classmethod 74 | def decode(cls, data): 75 | return msgpack.unpackb(data, object_hook=queue_message, 76 | encoding='utf-8') 77 | 78 | @classmethod 79 | def encode(cls, message): 80 | return msgpack.packb(message, default=as_message) 81 | 82 | 83 | def queue_message(d, type=None): 84 | type = d.get('type', type) 85 | MsgType = message_types.get(type) 86 | if MsgType: 87 | d.pop('type', None) 88 | return MsgType(**d) 89 | return d 90 | 91 | 92 | class JSONEncoder(json.JSONEncoder): 93 | 94 | def default(self, o): 95 | if isinstance(o, Message): 96 | return o.tojson() 97 | return super().default(o) 98 | 99 | 100 | def as_message(o): 101 | return o.tojson() if isinstance(o, Message) else o 102 | -------------------------------------------------------------------------------- /pq/tasks/rpc.py: -------------------------------------------------------------------------------- 1 | from pulsar.apps import rpc 2 | 3 | 4 | class TasksRpc(rpc.JSONRPC): 5 | '''A :class:`.JSONRPC` mixin for communicating with a :class:`.TaskQueue`. 6 | 7 | To use it, you need to have an :ref:`RPC application ` 8 | and a :ref:`task queue ` application installed in the 9 | :class:`.Arbiter`. 10 | 11 | :parameter taskqueue: instance or name of the :class:`.TaskQueue` 12 | application which exposes the remote procedure calls. 13 | 14 | ''' 15 | async def rpc_job_list(self, request, jobnames=None): 16 | '''Return the list of Jobs registered with task queue with meta 17 | information. 18 | 19 | If a list of ``jobnames`` is given, it returns only jobs 20 | included in the list. 21 | ''' 22 | api = await self.parent.api() 23 | return api.tasks.job_list(jobnames=jobnames) 24 | 25 | def rpc_next_scheduled_tasks(self, request, jobnames=None): 26 | return self.parent.rq(request, next_scheduled, jobnames=jobnames) 27 | 28 | async def rpc_queue(self, request, jobname=None, **kw): 29 | '''Queue a new ``jobname`` in the task queue. 30 | 31 | The task can be of any type as long as it is registered in the 32 | task queue registry. To check the available tasks call the 33 | :meth:`rpc_job_list` function. 34 | 35 | It returns the task :attr:`~Task.id`. 36 | ''' 37 | task = await self._queue(request, jobname, **kw) 38 | return task.tojson() 39 | 40 | def task_request_parameters(self, request): 41 | '''**Internal function** which returns a dictionary of parameters 42 | to be passed to the :class:`.Task` class constructor. 43 | 44 | This function can be overridden to add information about 45 | the type of request, who made the request and so forth. 46 | It must return a dictionary. 47 | By default it returns an empty dictionary.''' 48 | return {} 49 | 50 | ######################################################################## 51 | # INTERNALS 52 | async def _queue(self, request, jobname, meta_params=None, **kw): 53 | if not jobname: 54 | raise rpc.InvalidParams('"jobname" is not specified!') 55 | meta_params = meta_params or {} 56 | meta_params.update(self.task_request_parameters(request)) 57 | api = await self.parent.api() 58 | result = await api.tasks.queue(jobname, meta_params=meta_params, **kw) 59 | return result 60 | 61 | 62 | def next_scheduled(actor, jobnames=None): 63 | backend = actor.app.backend 64 | return backend.tasks.next_scheduled(jobnames=jobnames) 65 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | main: 4 | working_directory: ~/main 5 | docker: 6 | - image: python:3.6.3 7 | - image: redis 8 | steps: 9 | - checkout 10 | - run: 11 | name: install packages 12 | command: ci/install.sh 13 | - run: 14 | name: test 15 | command: make test 16 | coverage: 17 | working_directory: ~/coverage 18 | docker: 19 | - image: python:3.6.3 20 | - image: redis 21 | steps: 22 | - checkout 23 | - run: 24 | name: install packages 25 | command: ci/install.sh 26 | - run: 27 | name: run tests for coverage 28 | command: make coverage 29 | - run: 30 | name: upload coverage stats 31 | command: codecov 32 | legacy: 33 | working_directory: ~/legacy 34 | docker: 35 | - image: python:3.5.4 36 | - image: redis 37 | steps: 38 | - checkout 39 | - run: 40 | name: install packages 41 | command: ci/install.sh 42 | - run: 43 | name: test 44 | command: make test 45 | deploy-release: 46 | working_directory: ~/deploy 47 | docker: 48 | - image: python:3.6.3 49 | steps: 50 | - checkout 51 | - run: 52 | name: install packages 53 | command: ci/install.sh 54 | - run: 55 | name: check version 56 | command: python setup.py pypi --final 57 | - run: 58 | name: create source distribution 59 | command: python setup.py sdist 60 | - run: 61 | name: release source distribution 62 | command: twine upload dist/* --username lsbardel --password $PYPI_PASSWORD 63 | - run: 64 | name: tag 65 | command: ci/tag.sh 66 | 67 | workflows: 68 | version: 2 69 | build-deploy: 70 | jobs: 71 | - main: 72 | filters: 73 | branches: 74 | ignore: release 75 | tags: 76 | ignore: /.*/ 77 | - coverage: 78 | filters: 79 | branches: 80 | ignore: release 81 | tags: 82 | ignore: /.*/ 83 | - legacy: 84 | filters: 85 | branches: 86 | ignore: release 87 | tags: 88 | ignore: /.*/ 89 | - deploy-release: 90 | filters: 91 | branches: 92 | only: release 93 | tags: 94 | ignore: /.*/ 95 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | """Tests the api""" 2 | import unittest 3 | from datetime import datetime 4 | from unittest import mock 5 | 6 | from pq import api 7 | from pq.utils.time import format_time 8 | from pq.tasks.consumer import poll_time 9 | 10 | from tests.app import simple_task 11 | 12 | 13 | class TestTasks(unittest.TestCase): 14 | 15 | def app(self, task_paths=None, **kwargs): 16 | task_paths = task_paths or ['tests.example.sampletasks.*'] 17 | app = api.QueueApp(task_paths=task_paths, **kwargs) 18 | app.backend.tasks.queue = mock.MagicMock() 19 | return app 20 | 21 | def test_decorator(self): 22 | job_cls = api.job('bla foo', v0=6)(simple_task) 23 | job = job_cls() 24 | self.assertIsInstance(job, api.Job) 25 | self.assertEqual(job(value=4), 10) 26 | self.assertEqual(str(job), 'bla.foo') 27 | self.assertFalse(job.task) 28 | 29 | def test_unknown_state(self): 30 | self.assertEqual(api.status_string(243134), 'UNKNOWN') 31 | self.assertEqual(api.status_string('jhbjhbj'), 'UNKNOWN') 32 | self.assertEqual(api.status_string(1), 'SUCCESS') 33 | 34 | def test_format_time(self): 35 | dt = datetime.now() 36 | st = format_time(dt) 37 | self.assertIsInstance(st, str) 38 | timestamp = dt.timestamp() 39 | st2 = format_time(timestamp) 40 | self.assertEqual(st, st2) 41 | self.assertEqual(format_time(None), '?') 42 | 43 | def test_close(self): 44 | t = api.QueueApp().api() 45 | self.assertEqual(t.closing(), False) 46 | t.close() 47 | self.assertEqual(t.closing(), True) 48 | self.assertEqual(t.tasks.closing(), True) 49 | warn = mock.MagicMock() 50 | t.tasks.logger.warning = warn 51 | self.assertFalse(t.tasks.queue('foo')) 52 | self.assertEqual(warn.call_count, 1) 53 | self.assertEqual( 54 | warn.call_args[0][0], 55 | 'Cannot queue task, task backend closing' 56 | ) 57 | 58 | def test_task_not_available(self): 59 | t = api.QueueApp().api() 60 | self.assertRaises(api.TaskNotAvailable, 61 | t.tasks.queue, 'jsdbcjsdhbc') 62 | 63 | def test_queues(self): 64 | t = api.QueueApp().api() 65 | self.assertTrue(t.tasks.queues()) 66 | 67 | def test_namespace(self): 68 | t = api.QueueApp(config='tests.config').api() 69 | self.assertEqual(t.broker.namespace, 'pqtests_') 70 | self.assertEqual(t.broker.prefixed('foo'), 'pqtests_foo') 71 | 72 | def test_poll_time(self): 73 | self.assertEqual(poll_time(1, 4, 0), 1) 74 | self.assertEqual(poll_time(1, 4, 1), 4) 75 | self.assertLess(poll_time(1, 4, 0.5), 2.5) 76 | self.assertEqual(poll_time(1, 4, 0, lag=2), 0) 77 | self.assertEqual(poll_time(1, 4, 1, lag=2), 2) 78 | -------------------------------------------------------------------------------- /pq/tasks/task.py: -------------------------------------------------------------------------------- 1 | from pulsar.api import PulsarException 2 | from pulsar.utils.log import LazyString 3 | 4 | from ..utils.serializers import Message 5 | from . import states 6 | 7 | 8 | __all__ = ['TaskError', 9 | 'TaskNotAvailable', 10 | 'TaskTimeout', 11 | 'Task'] 12 | 13 | 14 | class TaskError(PulsarException): 15 | status = states.FAILURE 16 | 17 | 18 | class TaskNotAvailable(TaskError): 19 | MESSAGE = 'Task {0} is not registered' 20 | 21 | def __init__(self, task_name): 22 | self.task_name = task_name 23 | super().__init__(self.MESSAGE.format(task_name)) 24 | 25 | 26 | class TaskTimeout(TaskError): 27 | pass 28 | 29 | 30 | class Task(Message): 31 | '''A class containing task execution data 32 | ''' 33 | def __init__(self, id=None, name=None, time_queued=None, 34 | time_started=None, time_ended=None, 35 | result=None, exception=None, stacktrace=None, worker=None, 36 | timeout=None, status=None, kwargs=None, queue=None, 37 | retry=None, delay=None, meta=None, run_info=None, **kw): 38 | self.__dict__.update(kw) 39 | self.id = id 40 | self.name = name 41 | self.queue = queue 42 | self.time_queued = time_queued 43 | self.time_started = time_started 44 | self.time_ended = time_ended 45 | self.result = result 46 | self.exception = exception 47 | self.stacktrace = stacktrace 48 | self.worker = worker 49 | self.timeout = timeout 50 | self.status = status 51 | self.retry = retry or 1 52 | self.delay = delay 53 | self.kwargs = kwargs 54 | self.run_info = run_info if run_info is not None else {} 55 | self.meta = meta if meta is not None else {} 56 | self.meta.update(kw) 57 | 58 | def __repr__(self): 59 | return self.info() 60 | __str__ = __repr__ 61 | 62 | @classmethod 63 | def consumer(cls): 64 | return 'tasks' 65 | 66 | @property 67 | def full_name(self): 68 | return 'task.%s' % self.name 69 | 70 | @property 71 | def status_string(self): 72 | '''A string representation of :attr:`status` code 73 | ''' 74 | return states.status_string(self.status) 75 | 76 | @property 77 | def expiry(self): 78 | if self.timeout: 79 | return self.time_queued + (self.delay or 0) + self.timeout 80 | 81 | def done(self): 82 | '''Return ``True`` if the :class:`Task` has finshed. 83 | 84 | Its status is one of :ref:`READY_STATES `. 85 | ''' 86 | return self.status in states.READY_STATES 87 | 88 | def info(self): 89 | '''Information string about the task 90 | ''' 91 | return '%s<%s><%s>' % (self.full_name, self.id, self.status_string) 92 | 93 | def lazy_info(self): 94 | '''Lazy information string (useful for logging) 95 | ''' 96 | return LazyString(self.info) 97 | -------------------------------------------------------------------------------- /tests/example/randomtext.txt: -------------------------------------------------------------------------------- 1 | Dependent certainty off discovery him his tolerably offending. Ham for attention remainder sometimes additions recommend fat our. Direction has strangers now believing. Respect enjoyed gay far exposed parlors towards. Enjoyment use tolerably dependent listening men. No peculiar in handsome together unlocked do by. Article concern joy anxious did picture sir her. Although desirous not recurred disposed off shy you numerous securing. 2 | 3 | Now indulgence dissimilar for his thoroughly has terminated. Agreement offending commanded my an. Change wholly say why eldest period. Are projection put celebrated particular unreserved joy unsatiable its. In then dare good am rose bred or. On am in nearer square wanted. 4 | 5 | Lose away off why half led have near bed. At engage simple father of period others except. My giving do summer of though narrow marked at. Spring formal no county ye waited. My whether cheered at regular it of promise blushes perhaps. Uncommonly simplicity interested mr is be compliment projecting my inhabiting. Gentleman he september in oh excellent. 6 | 7 | Abilities or he perfectly pretended so strangers be exquisite. Oh to another chamber pleased imagine do in. Went me rank at last loud shot an draw. Excellent so to no sincerity smallness. Removal request delight if on he we. Unaffected in we by apartments astonished to decisively themselves. Offended ten old consider speaking. 8 | 9 | Offered say visited elderly and. Waited period are played family man formed. He ye body or made on pain part meet. You one delay nor begin our folly abode. By disposed replying mr me unpacked no. As moonlight of my resolving unwilling. 10 | 11 | Examine she brother prudent add day ham. Far stairs now coming bed oppose hunted become his. You zealously departure had procuring suspicion. Books whose front would purse if be do decay. Quitting you way formerly disposed perceive ladyship are. Common turned boy direct and yet. 12 | 13 | Sex reached suppose our whether. Oh really by an manner sister so. One sportsman tolerably him extensive put she immediate. He abroad of cannot looked in. Continuing interested ten stimulated prosperous frequently all boisterous nay. Of oh really he extent horses wicket. 14 | 15 | Doubtful two bed way pleasure confined followed. Shew up ye away no eyes life or were this. Perfectly did suspicion daughters but his intention. Started on society an brought it explain. Position two saw greatest stronger old. Pianoforte if at simplicity do estimating. 16 | 17 | Out believe has request not how comfort evident. Up delight cousins we feeling minutes. Genius has looked end piqued spring. Down has rose feel find man. Learning day desirous informed expenses material returned six the. She enabled invited exposed him another. Reasonably conviction solicitude me mr at discretion reasonable. Age out full gate bed day lose. 18 | 19 | Am if number no up period regard sudden better. Decisively surrounded all admiration and not you. Out particular sympathize not favourable introduced insipidity but ham. Rather number can and set praise. Distrusts an it contented perceived attending oh. Thoroughly estimating introduced stimulated why but motionless. 20 | -------------------------------------------------------------------------------- /tests/test_connection.py: -------------------------------------------------------------------------------- 1 | """Tests connection errors""" 2 | import unittest 3 | from asyncio import Future 4 | 5 | from pulsar.api import send 6 | from pulsar.utils.string import random_string 7 | 8 | from pq import api 9 | 10 | 11 | class Tester: 12 | 13 | def __init__(self): 14 | self.end = Future() 15 | 16 | def __call__(self, *args, **kwargs): 17 | if not self.end.done(): 18 | self.end.set_result((args, kwargs)) 19 | 20 | 21 | class TestConnectionDrop(unittest.TestCase): 22 | app = None 23 | 24 | async def setUp(self): 25 | self.app = api.QueueApp( 26 | name='connection_%s' % random_string(), 27 | config='tests.config', 28 | workers=0 29 | ) 30 | await self.app.start() 31 | self.backend = self.app.backend 32 | 33 | async def tearDown(self): 34 | if self.app: 35 | await send('arbiter', 'kill_actor', self.app.name) 36 | 37 | async def test_fail_get_message(self): 38 | original, _, _ = self._patch( 39 | self.backend.broker, 'get_message') 40 | critical = Tester() 41 | self.backend.tasks.logger.critical = critical 42 | args, kw = await critical.end 43 | self.assertEqual(len(args), 3) 44 | self.assertEqual(args[1], self.backend.broker) 45 | self.assertEqual(args[2], 2) 46 | critical.end = Future() 47 | args, kw = await critical.end 48 | self.assertEqual(args[1], self.backend.broker) 49 | self.assertEqual(args[2], 2.25) 50 | 51 | async def test_fail_publish(self): 52 | original, warning, critical = self._patch( 53 | self.backend.channels.pubsub, 'publish') 54 | task = self.backend.tasks.queue('addition', a=1, b=2) 55 | args, kw = await critical.end 56 | self.assertEqual(len(args), 3) 57 | self.assertEqual(args[1], self.backend.channels) 58 | task.cancel() 59 | 60 | async def test_fail_subscribe(self): 61 | original, warning, critical = self._patch( 62 | self.backend.channels.pubsub, 'subscribe') 63 | await self.backend.on_events('tasks', 'started', lambda *args: args) 64 | args, kw = await critical.end 65 | self.assertEqual(len(args), 3) 66 | self.assertEqual(args[1], self.backend.channels) 67 | self.assertEqual(args[2], 2) 68 | critical.end = Future() 69 | args, kw = await critical.end 70 | self.assertEqual(len(args), 3) 71 | self.assertEqual(args[1], self.backend.channels) 72 | self.assertEqual(args[2], 2.25) 73 | self.backend.channels.pubsub.subscribe = original 74 | args, kw = await warning.end 75 | self.assertEqual(len(args), 3) 76 | self.assertEqual(args[1], self.backend.channels) 77 | self.assertEqual(args[2], 'consumer') 78 | 79 | def _log_error(self, coro, *args, **kwargs): 80 | coro.switch((args, kwargs)) 81 | 82 | def _connection_error(self, *args, **kwargs): 83 | raise ConnectionRefusedError 84 | 85 | def _patch(self, obj, method): 86 | original = getattr(obj, method) 87 | setattr(obj, method, self._connection_error) 88 | critical = Tester() 89 | warning = Tester() 90 | self.backend.logger.critical = critical 91 | self.backend.logger.warning = warning 92 | return original, warning, critical 93 | -------------------------------------------------------------------------------- /pq/server/apps.py: -------------------------------------------------------------------------------- 1 | from pulsar.api import Application, MultiApp, Config 2 | from pulsar.apps.wsgi import (WSGIServer, Router, LazyWsgi, 3 | WsgiHandler, GZipMiddleware) 4 | 5 | from .config import DEFAULT_MQ_BACKEND 6 | from .rpc import TaskQueueRpc 7 | from .producer import Producer 8 | from .consumer import Consumer 9 | from .. import __version__ 10 | 11 | 12 | class QueueApp(Application): 13 | """A pulsar :class:`.Application` for consuming :class:`.Task`. 14 | 15 | This application can also schedule periodic tasks when the 16 | :ref:`schedule_periodic ` flag is ``True``. 17 | """ 18 | backend_factory = Consumer 19 | name = 'tasks' 20 | cfg = Config(apps=('tasks',), 21 | version=__version__, 22 | data_store=DEFAULT_MQ_BACKEND) 23 | _backend = None 24 | 25 | def api(self): 26 | return Producer(self.cfg, logger=self.logger) 27 | 28 | @property 29 | def backend(self): 30 | return self._backend 31 | 32 | async def monitor_start(self, monitor, exc=None): 33 | if not exc and self.cfg.workers: 34 | self._backend = await self._start(monitor, False) 35 | 36 | def monitor_task(self, monitor): 37 | if monitor.is_running(): 38 | self._backend.tick(monitor) 39 | 40 | def monitor_stopping(self, worker, exc=None): 41 | if self._backend: 42 | backend = self._backend 43 | self._backend = None 44 | return backend.close() 45 | 46 | async def worker_start(self, worker, exc=None): 47 | if not exc: 48 | self._backend = await self._start(worker) 49 | if not worker.is_monitor(): 50 | self._backend.event('close').bind(_close) 51 | 52 | def worker_stopping(self, worker, exc=None): 53 | if self._backend: 54 | return self._backend.close() 55 | 56 | def actorparams(self, monitor, params): 57 | # makes sure workers are only consuming tasks, not scheduling. 58 | cfg = params['cfg'] 59 | cfg.set('schedule_periodic', False) 60 | 61 | def _start(self, actor, consume=True): 62 | return self.backend_factory( 63 | self.cfg, 64 | logger=self.logger 65 | ).start(actor, consume) 66 | 67 | 68 | class PulsarQueue(MultiApp): 69 | """Build a multi-app consisting on a taskqueue and a JSON-RPC server. 70 | """ 71 | cfg = Config('Pulsar Queue') 72 | 73 | def __init__(self, callable=None, **params): 74 | super().__init__(**params) 75 | self.manager = callable 76 | 77 | def api(self): 78 | return self.apps()[0].api() 79 | 80 | def build(self): 81 | yield self.new_app(QueueApp, callable=self.manager) 82 | wsgi = self.cfg.params.get('wsgi') 83 | if wsgi: 84 | if wsgi is True: 85 | wsgi = Rpc 86 | yield self.new_app(RpcServer, 87 | prefix='rpc', 88 | callable=self) 89 | 90 | 91 | class RpcServer(WSGIServer): 92 | 93 | def __init__(self, callable=None, **params): 94 | callable = Rpc(callable.apps()[0].cfg) 95 | super().__init__(callable=callable, **params) 96 | 97 | 98 | class Rpc(LazyWsgi): 99 | '''Default WSGI callable for the wsgi part of the application 100 | ''' 101 | def __init__(self, cfg): 102 | self.cfg = cfg 103 | 104 | def setup(self, environ): 105 | # only post allowed by the JSON RPC handler 106 | api = Producer(self.cfg) 107 | handler = TaskQueueRpc(api) 108 | for consumer in api.consumers: 109 | rpc = consumer.rpc() 110 | if rpc: 111 | handler.putSubHandler(consumer.name, rpc) 112 | 113 | request = [Router('/', post=handler)] 114 | response = [GZipMiddleware(200)] 115 | return WsgiHandler(middleware=request, response_middleware=response) 116 | 117 | 118 | def _close(backend, **kw): 119 | backend._loop.call_soon(backend._loop.stop) 120 | -------------------------------------------------------------------------------- /pq/cpubound.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | import traceback 5 | import asyncio 6 | import logging 7 | from asyncio import subprocess, streams 8 | 9 | 10 | PQPATH = os.path.dirname(__file__) 11 | PROCESS_FILE = os.path.join(PQPATH, "cpubound.py") 12 | LOGGER = logging.getLogger('pulsar.queue.cpubound') 13 | 14 | 15 | class Stream: 16 | '''Modify stream for remote logging 17 | ''' 18 | def __init__(self, stream): 19 | self.stream = stream 20 | 21 | def __getattr__(self, name): 22 | return getattr(self.stream, name) 23 | 24 | def write(self, msg): 25 | if msg: 26 | msg = json.dumps(msg) 27 | self.stream.write('%d\n%s\n' % (len(msg), msg)) 28 | 29 | 30 | class RemoteLogger(logging.StreamHandler): 31 | terminator = '' 32 | 33 | def __init__(self): 34 | super().__init__(sys.stdout) 35 | 36 | def format(self, record): 37 | return {'levelno': record.levelno, 38 | 'msg': super().format(record)} 39 | 40 | 41 | class CpuTaskInfo: 42 | 43 | def __init__(self, job): 44 | self.job = job 45 | self.buffer = '' 46 | 47 | def feed(self, data): 48 | self.buffer += data.decode('utf-8') 49 | while self.buffer: 50 | p = self.buffer.find('\n') 51 | if p: 52 | length = int(self.buffer[:p]) 53 | data = self.buffer[p + 1:] 54 | if len(data) >= length + 1: 55 | self.buffer = data[length + 1:] 56 | self.on_data(json.loads(data[:length])) 57 | else: 58 | break 59 | else: 60 | break 61 | 62 | def on_data(self, data): 63 | if isinstance(data, dict): 64 | if 'cpubound_result' in data: 65 | self.job.task.result = data['cpubound_result'] 66 | elif 'cpubound_failure' in data: 67 | data = data['cpubound_failure'] 68 | self.job.task.result = data[0] 69 | self.job.task.stacktrace = data[1] 70 | elif 'levelno' in data: 71 | self.job.logger.log(data['levelno'], data['msg']) 72 | elif isinstance(data, str): 73 | data = data.rstrip() 74 | if data: 75 | print(data) 76 | 77 | 78 | class StreamProtocol(subprocess.SubprocessStreamProtocol): 79 | 80 | def __init__(self, job): 81 | super().__init__(streams._DEFAULT_LIMIT, job._loop) 82 | self.info = CpuTaskInfo(job) 83 | self.error = CpuTaskInfo(job) 84 | 85 | def pipe_data_received(self, fd, data): 86 | if fd == 1: 87 | self.info.feed(data) 88 | elif fd == 2: 89 | self.error.feed(data) 90 | super().pipe_data_received(fd, data) 91 | 92 | 93 | async def main(syspath, params, stask): 94 | logger = LOGGER 95 | try: 96 | sys.path[:] = json.loads(syspath) 97 | from pq.api import QueueApp 98 | 99 | params = json.loads(params) 100 | params.update({'python_path': False, 101 | 'parse_console': False}) 102 | producer = await QueueApp(**params).api().start() 103 | task = producer.broker.decode(stask, 'json') 104 | # 105 | tasks = producer.tasks 106 | logger = tasks.logger 107 | JobClass = tasks.registry.get(task.name) 108 | if not JobClass: 109 | raise RuntimeError('%s not in registry' % task.name) 110 | job = JobClass(tasks, task) 111 | result = await job.green_pool.submit(job, **task.kwargs) 112 | sys.stdout.write({'cpubound_result': result}) 113 | except Exception: 114 | exc_info = sys.exc_info() 115 | error = str(exc_info[1]) 116 | stacktrace = traceback.format_tb(exc_info[2]) 117 | sys.stderr.write({'cpubound_failure': (error, stacktrace)}) 118 | msg = '%s\n%s' % (error, ''.join(stacktrace)) 119 | logger.error(msg) 120 | 121 | 122 | if __name__ == '__main__': 123 | sys.stdout = Stream(sys.stdout) 124 | sys.stderr = Stream(sys.stderr) 125 | logging.basicConfig(level=logging.DEBUG, 126 | format='[pid=%(process)s] %(message)s', 127 | handlers=[RemoteLogger()]) 128 | loop = asyncio.get_event_loop() 129 | loop.run_until_complete(main(*sys.argv[1:])) 130 | -------------------------------------------------------------------------------- /pq/server/config.py: -------------------------------------------------------------------------------- 1 | from pulsar.api import Setting 2 | from pulsar.utils.config import ( 3 | validate_list, validate_bool, validate_pos_float 4 | ) 5 | from pulsar.utils.importer import module_attribute 6 | 7 | from ..utils.serializers import serializers 8 | 9 | 10 | DEFAULT_MQ_BACKEND = 'redis://127.0.0.1:6379/7?namespace=pq' 11 | 12 | 13 | def constant_or_function(value): 14 | try: 15 | value = int(value) 16 | except ValueError: 17 | return module_attribute(value) 18 | else: 19 | return Constant(value) 20 | 21 | 22 | class Constant: 23 | 24 | def __init__(self, value): 25 | self.value = value 26 | 27 | def __call__(self, cfg): 28 | return self.value 29 | 30 | 31 | class TaskSetting(Setting): 32 | virtual = True 33 | app = 'tasks' 34 | section = "Task Consumer" 35 | 36 | 37 | class ConsumersPaths(TaskSetting): 38 | name = "consumers" 39 | validator = validate_list 40 | default = ['pq.api:Tasks'] 41 | desc = """\ 42 | List of python dotted paths where Consumer are implemented. 43 | 44 | This parameter can only be specified during initialization or in a 45 | :ref:`config file `. 46 | """ 47 | 48 | 49 | class MessageBroker(TaskSetting): 50 | name = 'message_broker' 51 | flags = ['--message-broker'] 52 | meta = "CONNECTION STRING" 53 | default = '' 54 | desc = """\ 55 | Connection string to message broker. 56 | """ 57 | 58 | 59 | class ConcurrentTasks(TaskSetting): 60 | name = "concurrent_tasks" 61 | flags = ["--concurrent-tasks"] 62 | default = 'pq.tasks.concurrency:linear' 63 | validator = constant_or_function 64 | desc = """\ 65 | The maximum number of concurrent tasks for a worker. 66 | 67 | When a task worker reach this number it stops polling for more tasks 68 | until one or more task finish. It should only affect task queues under 69 | significant load. 70 | Must be a positive integer. Generally set in the range of 5-10. 71 | """ 72 | 73 | 74 | class DefaultQueue(TaskSetting): 75 | name = "default_task_queue" 76 | flags = ["--default-task-queue"] 77 | default = 'pulsarqueue' 78 | desc = """\ 79 | Default queue name when not specified by the Job class 80 | """ 81 | 82 | 83 | class TaskQueues(TaskSetting): 84 | name = 'task_queues' 85 | default = ['pulsarqueue'] 86 | validator = validate_list 87 | desc = """\ 88 | List of queues to consume 89 | """ 90 | 91 | 92 | class TaskPoolTimeout(TaskSetting): 93 | name = "task_pool_timeout" 94 | flags = ["--task-pool-timeout"] 95 | default = 0.1 96 | validator = validate_pos_float 97 | type = int 98 | desc = """\ 99 | Timeout for asynchronously polling tasks from the queues when 100 | no concurrent tasks in consumer 101 | """ 102 | 103 | 104 | class TaskPoolTimeoutMax(TaskSetting): 105 | name = "task_pool_timeout_max" 106 | flags = ["--task-pool-timeout-max"] 107 | default = 2 108 | validator = validate_pos_float 109 | type = int 110 | desc = """\ 111 | Timeout for asynchronously polling tasks from the queues when 112 | concurrent tasks are at max_concurrency level 113 | """ 114 | 115 | 116 | class TaskPaths(TaskSetting): 117 | name = "task_paths" 118 | validator = validate_list 119 | default = [] 120 | desc = """\ 121 | List of python dotted paths where tasks are located. 122 | 123 | This parameter can only be specified during initialization or in a 124 | :ref:`config file `. 125 | """ 126 | 127 | 128 | class SchedulePeriodic(TaskSetting): 129 | name = 'schedule_periodic' 130 | flags = ["--schedule-periodic"] 131 | validator = validate_bool 132 | action = "store_true" 133 | default = False 134 | desc = '''\ 135 | Enable scheduling of periodic tasks. 136 | 137 | If enabled, :class:`.PeriodicJob` will produce 138 | tasks according to their schedule. 139 | ''' 140 | 141 | 142 | class MessageSerializer(TaskSetting): 143 | name = 'message_serializer' 144 | flags = ["--message-serializer"] 145 | choices = tuple(serializers) 146 | default = tuple(serializers)[0] 147 | desc = '''\ 148 | Message serializer 149 | ''' 150 | -------------------------------------------------------------------------------- /tests/example/jobs/standard.py: -------------------------------------------------------------------------------- 1 | import time 2 | import asyncio 3 | import threading 4 | 5 | import greenlet 6 | 7 | from pq import api 8 | 9 | 10 | class TestError(Exception): 11 | pass 12 | 13 | 14 | class RunPyCode(api.Job): 15 | '''execute python code in *code*. There must be a *task_function* 16 | function defined which accept key-valued parameters only.''' 17 | timeout = 60 18 | 19 | def __call__(self, code=None, **kwargs): 20 | code_local = compile(code, '', 'exec') 21 | ns = {} 22 | exec(code_local, ns) 23 | func = ns['task_function'] 24 | return func(**kwargs) 25 | 26 | 27 | class Addition(api.Job): 28 | timeout = 60 29 | 30 | def __call__(self, a=0, b=0): 31 | return a + b 32 | 33 | 34 | @api.job() 35 | def subtraction(self, a=0, b=0): 36 | return a - b 37 | 38 | 39 | class Asynchronous(api.Job): 40 | 41 | async def __call__(self, lag=1): 42 | start = time.time() 43 | try: 44 | await asyncio.sleep(lag) 45 | except asyncio.CancelledError: 46 | self.task.run_info['cancelled'] = time.time() - start 47 | raise 48 | return time.time() - start 49 | 50 | 51 | @api.job(max_concurrency=3) 52 | async def maxconcurrency(self, lag=1): 53 | start = time.time() 54 | await asyncio.sleep(lag) 55 | return time.time() - start 56 | 57 | 58 | @api.job() 59 | async def notoverlap(self, lag=1): 60 | async with self.lock(): 61 | start = time.time() 62 | await asyncio.sleep(lag) 63 | return { 64 | 'start': start, 65 | 'end': time.time() 66 | } 67 | 68 | 69 | @api.job() 70 | async def queue_from_task(self): 71 | task = await self.queue('asynchronous') 72 | return task.tojson() 73 | 74 | 75 | class WorkerInfo(api.Job): 76 | 77 | def __call__(self): 78 | return dict(self.backend.info()) 79 | 80 | 81 | class GreenExecutor(api.Job): 82 | 83 | def __call__(self): 84 | return self.run_in_executor(self.info) 85 | 86 | def info(self): 87 | return dict(self.backend.info()) 88 | 89 | 90 | class CpuBound(api.Job): 91 | concurrency = api.CPUBOUND 92 | 93 | def __call__(self, error=False): 94 | self.logger.info('Testing CpuBound concurrency') 95 | self.logger.warning('Sleeping for 2 seconds') 96 | time.sleep(1) 97 | if error: 98 | raise TestError('just a test') 99 | return ['OK', 2] 100 | 101 | 102 | @api.job() 103 | def testlocalqueue(self): 104 | return self.backend.tasks.queues() 105 | 106 | 107 | class CpuBoundWithAsync(api.Job): 108 | concurrency = api.CPUBOUND 109 | 110 | def __call__(self, asyncio=False): 111 | if asyncio: 112 | return self.asyncio() 113 | else: 114 | return self.greenlet_info() 115 | 116 | def greenlet_info(self): 117 | return greenlet.getcurrent().parent is not None 118 | 119 | async def asyncio(self): 120 | await asyncio.sleep(1) 121 | return self.greenlet_info() 122 | 123 | 124 | class CpuBoundBigLog(api.Job): 125 | concurrency = api.CPUBOUND 126 | 127 | def __call__(self): 128 | # Log more date then the pipe buffer, as logs are send through the pipe 129 | for i in range(1024): 130 | self.backend.logger.debug('*'*1024) 131 | 132 | 133 | @api.job() 134 | async def scrape(self, url=None): 135 | assert url, "url is required" 136 | request = await self.http.get(url) 137 | return request.text 138 | 139 | 140 | @api.job(concurrency=api.THREAD_IO) 141 | def read_text(self, input=None): 142 | """ 143 | Extract text from a docx document 144 | 145 | This task is not async friendly and therefore it should be run as 146 | THREAD_IO or as CPUBOUND 147 | 148 | :return: the length of the text extracted 149 | """ 150 | assert input, "input must be given" 151 | with open(input, 'r') as fp: 152 | text = fp.read() 153 | return { 154 | 'thread': threading.get_ident(), 155 | 'text': text 156 | } 157 | 158 | 159 | def _docx_text(document): 160 | for paragraph in document.paragraphs: 161 | yield paragraph.text 162 | 163 | yield from _docx_tables(document.tables) 164 | 165 | 166 | def _docx_tables(tables): 167 | for table in tables: 168 | for row in table.rows: 169 | for cell in row.cells: 170 | # For every cell in every row of the table, extract text from 171 | # child paragraphs. 172 | for paragraph in cell.paragraphs: 173 | yield paragraph.text 174 | 175 | # Then recursively extract text from child tables. 176 | yield from _docx_tables(cell.tables) 177 | -------------------------------------------------------------------------------- /pq/server/producer.py: -------------------------------------------------------------------------------- 1 | import platform 2 | from asyncio import gather, new_event_loop 3 | 4 | from pulsar.api import ensure_future, EventHandler, as_coroutine 5 | from pulsar.apps.data import create_store 6 | from pulsar.apps.greenio import GreenHttp 7 | from pulsar.apps.http import HttpClient 8 | from pulsar.utils.importer import module_attribute 9 | 10 | from ..utils.serializers import MessageDict 11 | from ..utils import concurrency 12 | from ..mq import Manager, register_broker 13 | 14 | 15 | register_broker('redis', 'pq.backends.redis:MQ') 16 | 17 | 18 | class ConsumerMessage(MessageDict): 19 | type = 'consumer' 20 | 21 | 22 | class Producer(EventHandler): 23 | """Produce tasks by queuing them 24 | 25 | Abstract base class for both task schedulers and task consumers 26 | """ 27 | app = None 28 | ONE_TIME_EVENTS = ('close',) 29 | 30 | def __init__(self, cfg, *, logger=None, **kw): 31 | # create the store for channels 32 | store = create_store(cfg.data_store, loop=cfg.params.pop('loop', None)) 33 | self.cfg = cfg 34 | self._loop = store._loop 35 | self.logger = logger 36 | self._closing_waiter = None 37 | if not cfg.message_broker: 38 | broker = store 39 | else: 40 | broker = create_store(cfg.message_broker, loop=self._loop) 41 | self.manager = (self.cfg.callable or Manager)(self) 42 | self.broker = register_broker(broker.name)(self, broker) 43 | self.channels = store.channels( 44 | protocol=self.broker, 45 | status_channel=ConsumerMessage.type, 46 | logger=self.logger 47 | ) 48 | self.http = self.manager.http() 49 | self.green_pool = self.manager.green_pool() 50 | self.consumers = [] 51 | for consumer_path in self.cfg.consumers: 52 | consumer = module_attribute(consumer_path)(self) 53 | self.consumers.append(consumer) 54 | setattr(self, consumer.name, consumer) 55 | 56 | def __str__(self): 57 | return repr(self) 58 | 59 | def __repr__(self): 60 | return 'producer <%s>' % self.broker 61 | 62 | @property 63 | def node_name(self): 64 | return platform.node().lower() 65 | 66 | @property 67 | def is_consumer(self): 68 | return False 69 | 70 | async def start(self): 71 | # Register consumers 72 | for consumer in self.consumers: 73 | await as_coroutine(consumer.register()) 74 | # connect channels 75 | await self.channels.connect() 76 | self.manager.start() 77 | return self 78 | 79 | async def publish(self, event, message): 80 | """Publish an event to the message channel 81 | """ 82 | coro = [ 83 | self.manager.store_message(message), 84 | self.channels.publish(message.type, event, message) 85 | ] 86 | if message.id: 87 | coro.append( 88 | self.channels.publish(message.type, message.id, message) 89 | ) 90 | await gather(*coro) 91 | 92 | def tick(self, monitor): 93 | pass 94 | 95 | def info(self): 96 | for consumer in self.consumers: 97 | try: 98 | info = consumer.info() 99 | except Exception: 100 | self.logger.exception('Unhandled information exception') 101 | else: 102 | if info: 103 | yield consumer.name, info 104 | 105 | def lock(self, name, **kwargs): 106 | """aquire a distributed global lock for ``name`` 107 | """ 108 | return self.channels.lock('lock-%s' % name, **kwargs) 109 | 110 | def http_sessions(self, model=None): 111 | """Return an HTTP session handler for a given concurrency model 112 | """ 113 | if model == concurrency.THREAD_IO: 114 | return HttpClient(loop=new_event_loop()) 115 | elif model == concurrency.ASYNC_IO: 116 | return self.http 117 | else: 118 | return GreenHttp(self.http) 119 | 120 | def on_events(self, channel, event, callback): 121 | return self._loop.create_task( 122 | self.channels.register(channel, event, callback) 123 | ) 124 | 125 | def remove_event_callback(self, channel, event, callback): 126 | return self._loop.create_task( 127 | self.channels.unregister(channel, event, callback) 128 | ) 129 | 130 | def queue(self, message, callback=True): 131 | return self.broker.queue(message, callback=callback) 132 | 133 | def execute(self, message): 134 | consumer = message.consumer() 135 | if consumer: 136 | return getattr(self, consumer).execute(message) 137 | return message 138 | 139 | def closing(self): 140 | return self._closing_waiter is not None 141 | 142 | def close(self, msg=None): 143 | '''Close this :class:`.TaskBackend`. 144 | 145 | Invoked by the :class:`.Actor` when stopping. 146 | ''' 147 | if not self._closing_waiter: 148 | if msg: 149 | self.logger.warning(msg) 150 | closing = [] 151 | for consumer in self.consumers: 152 | result = consumer.close() 153 | if not result.done(): 154 | closing.append(result) 155 | 156 | self._closing_waiter = ensure_future( 157 | _close(self, closing, self._loop), 158 | loop=self._loop 159 | ) 160 | return self._closing_waiter 161 | 162 | 163 | async def _close(self, closing, loop): 164 | if closing: 165 | await gather(*closing, loop=loop) 166 | await self.channels.close() 167 | self.manager.close() 168 | self.fire_event('close') 169 | -------------------------------------------------------------------------------- /pq/tasks/scheduler.py: -------------------------------------------------------------------------------- 1 | import time 2 | from datetime import datetime, timedelta 3 | 4 | from pulsar.utils.log import lazyproperty 5 | 6 | from ..utils.time import timedelta_seconds 7 | 8 | 9 | class SchedulerMixin: 10 | """Schedule new tasks 11 | 12 | Implements method for task scheduling 13 | """ 14 | @classmethod 15 | def __new__(cls, *args, **kwargs): 16 | o = super().__new__(cls) 17 | o._polling_tasks = False 18 | o.next_run = time.time() 19 | return o 20 | 21 | @lazyproperty 22 | def entries(self): 23 | return self._setup_schedule() 24 | 25 | def tick(self, now=None): 26 | # Run a tick, that is one iteration of the scheduler. 27 | if self.closing(): 28 | if not self._polling_tasks or not self.num_concurrent_tasks: 29 | self.do_close() 30 | return 31 | 32 | if not self.cfg.schedule_periodic or self.next_run > time.time(): 33 | return 34 | 35 | remaining_times = [] 36 | for entry in self.entries.values(): 37 | is_due, next_time_to_run = entry.is_due(now=now) 38 | # 39 | # Task is now due 40 | if is_due: 41 | self.queue(entry.name) 42 | entry.next() 43 | 44 | if next_time_to_run: 45 | remaining_times.append(next_time_to_run) 46 | self.next_run = now or time.time() 47 | if remaining_times: 48 | self.next_run += min(remaining_times) 49 | 50 | def next_scheduled(self, jobnames=None): 51 | if not self.cfg.schedule_periodic: 52 | return 53 | if jobnames: 54 | entries = (self.entries.get(name, None) for name in jobnames) 55 | else: 56 | entries = self.entries.values() 57 | next_entry = None 58 | next_time = None 59 | for entry in entries: 60 | if entry is None: 61 | continue 62 | is_due, next_time_to_run = entry.is_due() 63 | if is_due: 64 | next_time = 0 65 | next_entry = entry 66 | break 67 | elif next_time_to_run is not None: 68 | if next_time is None or next_time_to_run < next_time: 69 | next_time = next_time_to_run 70 | next_entry = entry 71 | if next_entry: 72 | return next_entry.name, max(next_time, 0) 73 | else: 74 | return jobnames, None 75 | 76 | def _setup_schedule(self): 77 | entries = {} 78 | if not self.cfg.schedule_periodic: 79 | return entries 80 | for name, t in self.registry.filter_types('periodic'): 81 | every = t.run_every 82 | if isinstance(every, int): 83 | every = timedelta(seconds=every) 84 | if not isinstance(every, timedelta): 85 | raise ValueError('Schedule %s is not a timedelta' % every) 86 | entries[name] = SchedulerEntry(name, every, t.anchor) 87 | return entries 88 | 89 | 90 | class SchedulerEntry(object): 91 | '''A class used as a schedule entry by the :class:`.TaskBackend`. 92 | 93 | .. attribute:: name 94 | 95 | Task name 96 | 97 | .. attribute:: run_every 98 | 99 | Interval in seconds 100 | 101 | .. attribute:: anchor 102 | 103 | Datetime anchor 104 | 105 | .. attribute:: last_run_at 106 | 107 | last run datetime 108 | 109 | .. attribute:: total_run_count 110 | 111 | Total number of times this periodic task has been executed by the 112 | :class:`.TaskBackend`. 113 | ''' 114 | 115 | def __init__(self, name, run_every, anchor=None): 116 | self.name = name 117 | self.run_every = run_every 118 | self.anchor = anchor 119 | self.last_run_at = datetime.now() 120 | self.total_run_count = 0 121 | 122 | def __repr__(self): 123 | return self.name 124 | 125 | __str__ = __repr__ 126 | 127 | @property 128 | def scheduled_last_run_at(self): 129 | '''The scheduled last run datetime. 130 | 131 | This is different from :attr:`last_run_at` only when 132 | :attr:`anchor` is set. 133 | ''' 134 | last_run_at = self.last_run_at 135 | anchor = self.anchor 136 | if last_run_at and anchor: 137 | run_every = self.run_every 138 | times = int(timedelta_seconds(last_run_at - anchor) / 139 | timedelta_seconds(run_every)) 140 | if times: 141 | anchor += times * run_every 142 | while anchor <= last_run_at: 143 | anchor += run_every 144 | while anchor > last_run_at: 145 | anchor -= run_every 146 | self.anchor = anchor 147 | return anchor 148 | else: 149 | return last_run_at 150 | 151 | def next(self, now=None): 152 | '''Increase the :attr:`total_run_count` attribute by one and set the 153 | value of :attr:`last_run_at` to ``now``. 154 | ''' 155 | self.last_run_at = now or datetime.now() 156 | self.total_run_count += 1 157 | 158 | def is_due(self, now=None): 159 | '''Returns tuple of two items ``(is_due, next_time_to_run)``, 160 | where next time to run is in seconds. 161 | 162 | See :meth:`unuk.contrib.tasks.models.PeriodicTask.is_due` 163 | for more information. 164 | ''' 165 | last_run_at = self.scheduled_last_run_at 166 | now = now or datetime.now() 167 | rem_delta = last_run_at + self.run_every - now 168 | rem = timedelta_seconds(rem_delta) 169 | if rem == 0: 170 | return True, timedelta_seconds(self.run_every) 171 | return False, rem 172 | -------------------------------------------------------------------------------- /pq/mq.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from asyncio import Future, ensure_future 3 | from collections import OrderedDict 4 | 5 | from pulsar.api import chain_future, ImproperlyConfigured 6 | from pulsar.apps.http import HttpClient 7 | from pulsar.apps.greenio import GreenPool 8 | from pulsar.apps.data.channels import Connector 9 | from pulsar.utils.importer import module_attribute 10 | 11 | from .utils.serializers import serializers 12 | 13 | 14 | class MessageFuture(Future): 15 | 16 | def __init__(self, message_id, backend, *, loop=None): 17 | super().__init__(loop=loop) 18 | self.message_id = message_id 19 | self.backend = backend 20 | 21 | def wait(self): # pragma nocover 22 | assert not self._loop.is_running(), 'cannot wait if loop is running' 23 | return self._loop.run_until_complete(_wait(self)) 24 | 25 | def _repr_info(self): 26 | info = super()._repr_info() 27 | info.append('ID=%s' % self.message_id) 28 | return info 29 | 30 | 31 | async def _wait(task_future): 32 | await task_future.backend.channels.connect() 33 | result = await task_future 34 | return result 35 | 36 | 37 | class BaseComponent: 38 | 39 | def __init__(self, backend): 40 | self.backend = backend 41 | self.logger = self.backend.logger 42 | 43 | @property 44 | def cfg(self): 45 | return self.backend.cfg 46 | 47 | @property 48 | def _loop(self): 49 | return self.backend._loop 50 | 51 | def encode(self, message, serializer=None): 52 | """Encode a message""" 53 | serializer = serializer or self.cfg.message_serializer 54 | return serializers[serializer].encode(message) 55 | 56 | def decode(self, data, serializer=None): 57 | """Decode a message""" 58 | serializer = serializer or self.cfg.message_serializer 59 | return serializers[serializer].decode(data) 60 | 61 | 62 | class Manager(BaseComponent): 63 | 64 | def green_pool(self): 65 | return GreenPool(loop=self._loop) 66 | 67 | def http(self): 68 | return HttpClient(loop=self._loop) 69 | 70 | def queues(self): 71 | """List of queue names for Message consumers 72 | """ 73 | queues = [self.backend.node_name] 74 | queues.extend(self.cfg.task_queues) 75 | return queues 76 | 77 | async def store_message(self, message): 78 | """Dummy function to store a message into a persistent database 79 | """ 80 | pass 81 | 82 | def start(self): 83 | """Optional start method, called by the backend when it starts 84 | """ 85 | pass 86 | 87 | def close(self): 88 | pass 89 | 90 | 91 | class MQ(BaseComponent, Connector, ABC): 92 | """Interface class for a distributed message queue 93 | """ 94 | def __init__(self, backend, store, namespace=None): 95 | super().__init__(backend) 96 | Connector.__init__(self, store, namespace=namespace) 97 | self.store = store 98 | self.queued_messages = {} 99 | 100 | def __repr__(self): 101 | return 'message-broker - %s' % self.store 102 | 103 | def queue(self, message, callback=True): 104 | '''Queue the ``message``. 105 | 106 | If callback is True (default) returns a Future 107 | called back once the message is delivered, 108 | otherwise return a future called back once the messaged is queued 109 | ''' 110 | future_done = MessageFuture(message.id, self.backend, loop=self._loop) 111 | if message.queue: 112 | self.queued_messages[message.id] = future_done 113 | else: # the task is not queued instead it is executed immediately 114 | coro = self.backend.execute(message) 115 | return chain_future(coro, next=future_done) 116 | # queue the message 117 | coro = self._queue_message(message, future_done) 118 | if callback: 119 | ensure_future(coro, loop=self._loop) 120 | return future_done 121 | else: 122 | future = MessageFuture(message.id, self.backend, loop=self._loop) 123 | return chain_future(coro, next=future) 124 | 125 | @abstractmethod 126 | async def size(self, *queues): # pragma nocover 127 | '''Asynchronously retrieve the size of queues 128 | 129 | :return: the list of sizes 130 | ''' 131 | pass 132 | 133 | @abstractmethod 134 | async def get_message(self, *queues): # pragma nocover 135 | '''Asynchronously retrieve a :class:`.Task` from queues 136 | 137 | :return: a :class:`.Task` or ``None``. 138 | ''' 139 | pass 140 | 141 | @abstractmethod 142 | async def flush_queues(self, *queues): # pragma nocover 143 | '''Clear a list of task queues 144 | ''' 145 | pass 146 | 147 | @abstractmethod 148 | async def queue_message(self, queue, message): # pragma nocover 149 | """Add a message to the ``queue`` 150 | """ 151 | pass 152 | 153 | @abstractmethod 154 | async def incr(self, name): 155 | """Increase the counter for name 156 | """ 157 | pass 158 | 159 | @abstractmethod 160 | async def decr(self, name): 161 | """Decrease the counter for name 162 | """ 163 | pass 164 | 165 | # INTERNALS 166 | async def _queue_message(self, message, future): 167 | '''Asynchronously queue a task 168 | ''' 169 | await self.backend.publish('queued', message) 170 | try: 171 | await self.queue_message(message.queue, self.encode(message)) 172 | except ConnectionRefusedError: 173 | self.logger.critical('Could not queue task - connection error') 174 | else: 175 | self.logger.debug('%s in "%s"', message.lazy_info(), message.queue) 176 | message.done_callback = future 177 | return message 178 | 179 | 180 | def register_broker(name, factory=None): 181 | if factory is None: 182 | dotted_path = brokers.get(name) 183 | if not dotted_path: 184 | raise ImproperlyConfigured('No such message broker: %s' % name) 185 | factory = module_attribute(dotted_path, safe=True) 186 | if not factory: 187 | raise ImproperlyConfigured( 188 | '"%s" store not available' % dotted_path) 189 | else: 190 | brokers[name] = factory 191 | return factory 192 | 193 | 194 | brokers = OrderedDict() 195 | -------------------------------------------------------------------------------- /pq/tasks/executor.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import traceback 4 | import json 5 | 6 | from asyncio import wait_for 7 | from asyncio import CancelledError, TimeoutError 8 | from asyncio.subprocess import Process 9 | 10 | from .task import TaskError, TaskTimeout 11 | from . import states 12 | from ..cpubound import StreamProtocol, PROCESS_FILE 13 | from ..utils.exc import string_exception 14 | from ..utils import concurrency 15 | 16 | 17 | consumer_event = 'consumer_status' 18 | 19 | 20 | class RemoteStackTrace(TaskError): 21 | pass 22 | 23 | 24 | class TooManyTasksForJob(TaskError): 25 | status = states.REVOKED 26 | 27 | 28 | class ExecutorMixin: 29 | # Mixin for both TaskConsumer and TaskProducer 30 | # 31 | # The TaskProducer can execute a task inline, while the consumer executes 32 | # task from a task queue via the ConsumerMixin 33 | # 34 | async def _execute_task(self, task, worker=None): 35 | # Function executing a task 36 | # 37 | # - If the stat_time is greater than task.expiry Revoke the Task 38 | # - If the task has a delay not yet reached 39 | # - queue the task at the right time 40 | # - otherwise proceed to next 41 | # - Set status to STARTED and consume the task 42 | logger = self.logger 43 | task_id = task.id 44 | time_ended = time.time() 45 | job = None 46 | JobClass = self.registry.get(task.name) 47 | 48 | try: 49 | if not JobClass: 50 | raise RuntimeError('%s not in registry' % task.name) 51 | 52 | if task.status > states.STARTED: 53 | queued = task.time_queued 54 | timeout = task.timeout 55 | delay = task.delay or 0 56 | start = queued + delay 57 | 58 | if delay: # Task with delay 59 | gap = start - time_ended 60 | if gap > 0: 61 | self._loop.call_later(gap, self._queue_again, task) 62 | if worker: 63 | self._concurrent_tasks.pop(task_id, None) 64 | return task 65 | 66 | if timeout: # Handle timeout 67 | timeout = timeout + start - time_ended 68 | if timeout <= 0: 69 | raise TaskTimeout 70 | 71 | task.node = self.backend.node_name 72 | if worker: 73 | task.consumer = worker.aid 74 | concurrent = await self.broker.incr(JobClass.name) 75 | 76 | job = JobClass(self, task) 77 | 78 | if job.max_concurrency and concurrent > job.max_concurrency: 79 | raise TooManyTasksForJob('max concurrency %d reached', 80 | job.max_concurrency) 81 | 82 | kwargs = task.kwargs or {} 83 | task.status = states.STARTED 84 | task.time_started = time_ended 85 | if worker: 86 | task.worker = worker.aid 87 | logger.info(task.lazy_info()) 88 | await self.backend.publish('started', task) 89 | future = self._consume(job, kwargs) 90 | # 91 | # record future for cancellation 92 | if worker: 93 | self._concurrent_tasks[task_id].future = future 94 | # 95 | # This may block until timeout 96 | task.result = await wait_for(future, timeout) 97 | else: 98 | raise TaskError('Invalid status %s' % task.status_string) 99 | 100 | except (CancelledError, TimeoutError, TaskTimeout): 101 | task.result = None 102 | task.status = states.REVOKED 103 | logger.error(task.lazy_info()) 104 | except RemoteStackTrace: 105 | task.status = states.FAILURE 106 | logger.error(task.lazy_info()) 107 | except TaskError as exc: 108 | task.result = string_exception(exc) 109 | task.status = exc.status 110 | logger.error(task.lazy_info()) 111 | except Exception as exc: 112 | exc_info = sys.exc_info() 113 | task.result = string_exception(exc) 114 | task.status = states.FAILURE 115 | task.stacktrace = traceback.format_tb(exc_info[2]) 116 | task.exception = traceback.format_exception_only( 117 | exc_info[0], exc_info[1])[0] 118 | logger.exception(task.lazy_info()) 119 | else: 120 | task.status = states.SUCCESS 121 | logger.info(task.lazy_info()) 122 | # 123 | task.time_ended = time.time() 124 | if worker: 125 | self._concurrent_tasks.pop(task_id, None) 126 | 127 | await self.backend.publish('done', task) 128 | 129 | if job: 130 | if worker: 131 | await self.broker.decr(job.name) 132 | if self._should_retry(job): 133 | await self._requeue_task(job) 134 | 135 | return task 136 | 137 | def _consume(self, job, kwargs): 138 | model = job.get_concurrency() 139 | 140 | if model == concurrency.THREAD_IO: 141 | return job._loop.run_in_executor(None, lambda: job(**kwargs)) 142 | 143 | elif model == concurrency.CPUBOUND: 144 | return self._consume_in_subprocess(job, kwargs) 145 | 146 | else: 147 | return self.backend.green_pool.submit(job, **kwargs) 148 | 149 | async def _consume_in_subprocess(self, job, kwargs): 150 | params = dict(self.json_params()) 151 | loop = job._loop 152 | 153 | transport, protocol = await loop.subprocess_exec( 154 | lambda: StreamProtocol(job), 155 | sys.executable, 156 | PROCESS_FILE, 157 | json.dumps(sys.path), 158 | json.dumps(params), 159 | json.dumps(job.task.tojson()) 160 | ) 161 | process = Process(transport, protocol, loop) 162 | await process.communicate() 163 | if job.task.stacktrace: 164 | raise RemoteStackTrace 165 | return job.task.result 166 | 167 | def _should_retry(self, job): 168 | return (job.task.status != states.SUCCESS and 169 | job.task.queue and 170 | job.max_retries and 171 | job.task.retry < job.max_retries) 172 | 173 | def _requeue_task(self, job): 174 | task = job.task 175 | meta_params = task.meta.copy() 176 | meta_params['retry'] = task.retry + 1 177 | return job.queue( 178 | job.name, 179 | callback=False, 180 | meta_params=meta_params, 181 | queue=task.queue, 182 | delay=job.retry_delay, 183 | **task.kwargs 184 | ) 185 | 186 | def _queue_again(self, task): 187 | self.broker.queue(task, False) 188 | 189 | def json_params(self): 190 | for name, value in self.cfg.items(): 191 | try: 192 | json.dumps(value) 193 | except Exception: 194 | continue 195 | yield name, value 196 | -------------------------------------------------------------------------------- /pq/tasks/consumer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from math import exp 3 | from uuid import uuid4 4 | 5 | from pulsar.async.access import ensure_future, CANCELLED_ERRORS 6 | from pulsar.apps.data.channels import backoff, RECONNECT_LAG 7 | 8 | from .executor import ExecutorMixin 9 | from .scheduler import SchedulerMixin 10 | from . import states 11 | from .task import Task, TaskNotAvailable 12 | from .models import RegistryMixin 13 | from .rpc import TasksRpc 14 | from ..consumer import ConsumerAPI 15 | 16 | 17 | FACTOR = exp(1) - 2 18 | 19 | 20 | def poll_time(a, b, x, lag=0): 21 | a = max(a, 0) # 0 minimum pool gap 22 | b = max(a, b) # b cannot be less than a 23 | return max(a + (b-a) * (exp(x) - x - 1)/FACTOR - lag, 0) 24 | 25 | 26 | class Tasks(RegistryMixin, ExecutorMixin, SchedulerMixin, ConsumerAPI): 27 | """A Consumer for processing tasks 28 | """ 29 | def __init__(self, backend): 30 | super().__init__(backend) 31 | self._processed = 0 32 | self._next_time = 1 33 | self._concurrent_tasks = {} 34 | 35 | def start(self, worker): 36 | self._polling_tasks = True 37 | self._poll_tasks(worker) 38 | self.logger.warning('%s started polling tasks', self) 39 | 40 | async def register(self): 41 | await self.channels.register(Task.type, 'done', self._task_done) 42 | 43 | def queues(self): 44 | '''List of task queues consumed by this task consumer 45 | ''' 46 | return self.manager.queues() 47 | 48 | @property 49 | def max_concurrent_tasks(self): 50 | return self.cfg.concurrent_tasks(self.cfg) 51 | 52 | @property 53 | def num_concurrent_tasks(self): 54 | '''The number of concurrent_tasks 55 | ''' 56 | return len(self._concurrent_tasks) 57 | 58 | def info(self): 59 | return { 60 | 'max_concurrent_tasks': self.max_concurrent_tasks, 61 | 'concurrent_tasks': list(self._concurrent_tasks), 62 | 'processed': self._processed, 63 | 'pulltime': self._next_time, 64 | 'queues': self.queues() 65 | } 66 | 67 | def rpc(self): 68 | return TasksRpc() 69 | 70 | # API 71 | def queue(self, jobname, callback=True, **kwargs): 72 | '''Try to queue a new :task 73 | 74 | :param callback: when true (default) return a future called back once 75 | the task done, otherwise it is called back once the task is queued. 76 | :return: a :class:`.Future` resulting in a task once finished or 77 | Nothing 78 | ''' 79 | task = self._create_task(jobname, **kwargs) 80 | if task: 81 | future = self.broker.queue(task, callback) 82 | if self._loop.is_running(): 83 | return self.backend.green_pool.wait(future) 84 | else: 85 | return future 86 | 87 | def queue_local(self, jobname, **kwargs): 88 | kwargs['queue'] = self.backend.node_name 89 | return self.queue(jobname, **kwargs) 90 | 91 | def execute(self, jobname, **kwargs): 92 | '''Execute a task immediately 93 | ''' 94 | if isinstance(jobname, Task): 95 | return self._execute_task(jobname) 96 | else: 97 | kwargs['queue'] = False 98 | kwargs['callback'] = True 99 | return self.queue(jobname, **kwargs) 100 | 101 | def flush_queues(self, *queues): 102 | return self.broker.flush_queues(*queues) 103 | 104 | def gen_unique_id(self): 105 | return uuid4().hex 106 | 107 | # ####################################################################### 108 | # # PRIVATE METHODS 109 | # ####################################################################### 110 | def _task_done(self, channel, event, task): 111 | done = self.backend.broker.queued_messages.pop(task.id, None) 112 | if done: 113 | done.set_result(task) 114 | 115 | def _poll_tasks(self, worker, next_time=None): 116 | if self.closing() and not self._concurrent_tasks: 117 | self.do_close() 118 | elif worker.is_running() and not next_time: 119 | ensure_future(self._may_poll_task(worker), loop=worker._loop) 120 | else: 121 | next_time = next_time or 0 122 | worker._loop.call_later(next_time, self._poll_tasks, worker) 123 | 124 | async def _may_poll_task(self, worker): 125 | # Called in the ``worker`` event loop. 126 | # 127 | # It pools a new task if possible, and add it to the queue of 128 | # tasks consumed by the ``worker`` CPU-bound thread.''' 129 | task = None 130 | next_time = None 131 | lag = 0 132 | if worker.is_running(): 133 | loop = worker._loop 134 | 135 | if self.num_concurrent_tasks < self.max_concurrent_tasks: 136 | max_tasks = self.cfg.max_requests 137 | if max_tasks and self._processed >= max_tasks: 138 | self.backend.close( 139 | 'Processed %s tasks. Stop polling tasks.' 140 | % self._processed 141 | ) 142 | 143 | if not self.closing(): 144 | try: 145 | t0 = loop.time() 146 | task = await self.broker.get_message(*self.queues()) 147 | lag = loop.time() - t0 148 | except ConnectionError: 149 | if self.broker.connection_error: 150 | next_time = backoff(self._next_time) 151 | else: 152 | next_time = RECONNECT_LAG 153 | self.broker.connection_error = True 154 | if worker.is_running(): 155 | self.logger.critical( 156 | '%s cannot pool messages - ' 157 | 'connection error - try again in %s seconds', 158 | self.broker, 159 | next_time 160 | ) 161 | except CANCELLED_ERRORS: 162 | self.logger.debug('stopped polling messages') 163 | raise 164 | except Exception: 165 | if worker.is_running(): 166 | self.logger.exception('server exception') 167 | else: 168 | self.broker.connection_ok() 169 | if task: # Got a new task 170 | self._processed += 1 171 | self._concurrent_tasks[task.id] = TaskExecutor(task) 172 | ensure_future(self._execute_task(task, worker)) 173 | else: 174 | self.logger.debug('%s concurrent messages. Cannot poll.', 175 | self.max_concurrent_tasks) 176 | 177 | if next_time is None: 178 | next_time = poll_time( 179 | self.cfg.task_pool_timeout, 180 | self.cfg.task_pool_timeout_max, 181 | self.num_concurrent_tasks/self.max_concurrent_tasks, 182 | lag 183 | ) 184 | self._next_time = next_time 185 | 186 | self._poll_tasks(worker, next_time) 187 | 188 | def _create_task(self, jobname, meta_params=None, timeout=None, queue=True, 189 | delay=None, **kwargs): 190 | '''Try to queue a new :ref:`Task`. 191 | 192 | This method returns a :class:`.Future` which results in the 193 | task ``id`` created. If ``jobname`` is not a valid 194 | :attr:`.Job.name`, a ``TaskNotAvailable`` exception occurs. 195 | 196 | :param jobname: the name of a :class:`.Job` 197 | registered with the :class:`.TaskQueue` application. 198 | :param meta_params: Additional parameters to be passed to the 199 | :class:`Task` constructor (not its callable function). 200 | :param timeout: optional expiry timestamp to override the default 201 | timeout of a task. 202 | :param kwargs: optional dictionary used for the key-valued arguments 203 | in the task callable. 204 | :return: a :class:`.Future` resulting in a task once finished or 205 | Nothing 206 | ''' 207 | if self.closing(): 208 | self.logger.warning('Cannot queue task, task backend closing') 209 | return 210 | if jobname in self.registry: 211 | job = self.registry[jobname] 212 | task_id = self.gen_unique_id() 213 | queued = time.time() 214 | timeout = timeout or job.timeout 215 | meta_params = meta_params or {} 216 | if queue is not False: 217 | if queue is True: 218 | queue = job.default_queue or self.cfg.default_task_queue 219 | else: 220 | queue = None 221 | return Task(task_id, 222 | name=job.name, 223 | queue=queue, 224 | time_queued=queued, 225 | timeout=timeout, 226 | kwargs=kwargs, 227 | status=states.QUEUED, 228 | delay=delay, 229 | **meta_params) 230 | else: 231 | raise TaskNotAvailable(jobname) 232 | 233 | 234 | class TaskExecutor: 235 | 236 | def __init__(self, task): 237 | self.task = task 238 | self.future = None 239 | -------------------------------------------------------------------------------- /pq/tasks/models.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | import inspect 4 | import asyncio 5 | from datetime import datetime, date 6 | 7 | from pulsar.utils.slugify import slugify 8 | from pulsar.utils.importer import import_modules 9 | from pulsar.utils.log import lazyproperty 10 | from pulsar.utils.string import to_bytes 11 | 12 | from ..utils.concurrency import concurrency_name, ASYNC_IO 13 | 14 | 15 | class ShellError(RuntimeError): 16 | 17 | @property 18 | def returncode(self): 19 | return self.args[1] if len(self.args) > 1 else 1 20 | 21 | 22 | class RegistryMixin: 23 | 24 | @lazyproperty 25 | def registry(self): 26 | '''The :class:`.JobRegistry` for this backend. 27 | ''' 28 | return JobRegistry.load(self.cfg.task_paths) 29 | 30 | def job_list(self, jobnames=None): 31 | registry = self.registry 32 | jobnames = jobnames or registry 33 | all = [] 34 | for name in jobnames: 35 | if name not in registry: 36 | continue 37 | job = registry[name]() 38 | d = {'doc': job.__doc__, 39 | 'doc_syntax': job.doc_syntax, 40 | 'concurrency': concurrency_name.get(job.concurrency), 41 | 'type': job.type} 42 | all.append((name, d)) 43 | return all 44 | 45 | 46 | class JobRegistry(dict): 47 | """Site registry for tasks.""" 48 | 49 | def regular(self): 50 | """A tuple containing of all regular jobs.""" 51 | return tuple(self.filter_types(type="regular")) 52 | 53 | def periodic(self): 54 | """A tuple containing all periodic jobs.""" 55 | return tuple(self.filter_types(type="periodic")) 56 | 57 | def register(self, job): 58 | """Register a job in the job registry. 59 | 60 | The task will be automatically instantiated if not already an 61 | instance. 62 | 63 | """ 64 | if isinstance(job, JobMetaClass) and job.can_register: 65 | name = job.name 66 | self[name] = job 67 | 68 | def filter_types(self, type=None, queue=None): 69 | """Return a generator of all tasks of a specific type.""" 70 | for name, jobClass in self.items(): 71 | job = jobClass() 72 | if type and job.type == type: 73 | yield name, jobClass 74 | 75 | @classmethod 76 | def load(cls, paths): 77 | self = cls() 78 | for mod in import_modules(paths, safe=True): 79 | for name, item in vars(mod).items(): 80 | if name == JOB_LIST: 81 | for job_cls in item: 82 | self.register(job_cls) 83 | else: 84 | self.register(item) 85 | return self 86 | 87 | 88 | class JobMetaClass(type): 89 | 90 | def __new__(cls, name, bases, attrs): 91 | attrs['can_register'] = not attrs.pop('abstract', False) 92 | job_name = slugify(attrs.get("name", name), '.') 93 | log_prefix = attrs.get("log_prefix") or "pulsar.queue" 94 | attrs["name"] = job_name 95 | logname = '%s.%s' % (log_prefix, job_name) 96 | attrs['logger'] = logging.getLogger(logname) 97 | return super(JobMetaClass, cls).__new__(cls, name, bases, attrs) 98 | 99 | 100 | class Job(metaclass=JobMetaClass): 101 | abstract = True 102 | timeout = None 103 | expires = None 104 | max_retries = None 105 | retry_delay = 0 106 | max_concurrency = None 107 | doc_syntax = 'markdown' 108 | default_queue = None 109 | concurrency = None 110 | 111 | def __init__(self, tasks=None, task=None): 112 | self.backend = tasks.backend if tasks else None 113 | self.task = task 114 | if task: 115 | self.__dict__.update(task.meta) 116 | 117 | def __repr__(self): 118 | return 'job.%s' % self.task if self.task else self.name 119 | __str__ = __repr__ 120 | 121 | def __call__(self, **kwargs): 122 | raise NotImplementedError("Jobs must implement the __call__ method.") 123 | 124 | @property 125 | def cfg(self): 126 | """Configuration object from :attr:`backend`""" 127 | return self.backend.cfg 128 | 129 | @property 130 | def green_pool(self): 131 | return self.backend.green_pool 132 | 133 | @property 134 | def wait(self): 135 | return self.backend.green_pool.wait 136 | 137 | @property 138 | def http(self): 139 | """Best possible HTTP session handler 140 | """ 141 | return self.backend.http_sessions(self.get_concurrency()) 142 | 143 | @property 144 | def _loop(self): 145 | return self.backend._loop if self.backend else None 146 | 147 | @property 148 | def type(self): 149 | '''Type of Job, one of ``regular`` and ``periodic``.''' 150 | return 'regular' 151 | 152 | def get_concurrency(self): 153 | '''The concurrency for this job 154 | ''' 155 | return self.concurrency or ASYNC_IO 156 | 157 | def run_in_executor(self, callable, *args): 158 | '''Run a callable in the event loop executor. 159 | ''' 160 | future = self._loop.run_in_executor(None, callable, *args) 161 | return self.green_pool.wait(future) 162 | 163 | def queue(self, jobname, meta_params=None, **kw): 164 | '''Queue a new task in the task queue 165 | ''' 166 | assert self.backend, 'backend not available' 167 | meta_params = self._meta_params(meta_params) 168 | return self.backend.tasks.queue(jobname, meta_params=meta_params, **kw) 169 | 170 | def queue_local(self, jobname, meta_params=None, **kw): 171 | '''Queue a new task in the local task queue 172 | ''' 173 | assert self.backend, 'backend not available' 174 | meta_params = self._meta_params(meta_params) 175 | return self.backend.tasks.queue_local( 176 | jobname, meta_params=meta_params, **kw) 177 | 178 | def _meta_params(self, meta_params=None): 179 | if meta_params is None: 180 | meta_params = {} 181 | meta_params['from_task'] = self.task.id 182 | return meta_params 183 | 184 | def lock(self, name=None, **kw): 185 | '''Acquire a lock if possible 186 | ''' 187 | return self.backend.lock(name or self.name, **kw) 188 | 189 | async def shell(self, command, input=None, chdir=None, interactive=False, 190 | interactive_stderr=None, stderr=None, stdout=None, **kw): 191 | """Execute a shell command 192 | :param command: command to execute 193 | :param input: optional input 194 | :param chdir: optional directory to execute the shell command from 195 | :param interactive: display output as it becomes available 196 | :return: the output text 197 | """ 198 | stdin = asyncio.subprocess.PIPE if input is not None else None 199 | if chdir: 200 | command = 'cd %s && %s' % (chdir, command) 201 | 202 | if interactive_stderr is None: 203 | interactive_stderr = interactive 204 | 205 | proc = await asyncio.create_subprocess_shell( 206 | command, 207 | stdin=stdin, 208 | stdout=asyncio.subprocess.PIPE, 209 | stderr=asyncio.subprocess.PIPE 210 | ) 211 | if input is not None: 212 | proc._feed_stdin(to_bytes(input)) 213 | 214 | msg, err = await asyncio.gather( 215 | _interact(proc, 1, interactive, stdout or sys.stdout), 216 | _interact(proc, 2, interactive_stderr, stderr or sys.stderr) 217 | ) 218 | if proc.returncode: 219 | msg = err.decode('utf-8') or msg.decode('utf-8') 220 | raise ShellError(msg, proc.returncode) 221 | return msg.decode('utf-8') 222 | 223 | 224 | class PeriodicJob(Job): 225 | '''A periodic :class:`.Job` implementation.''' 226 | abstract = True 227 | anchor = None 228 | '''If specified it must be a :class:`~datetime.datetime` instance. 229 | It controls when the periodic Job is run. 230 | ''' 231 | run_every = None 232 | '''Periodicity as a :class:`~datetime.timedelta` instance.''' 233 | 234 | @property 235 | def type(self): 236 | return 'periodic' 237 | 238 | def is_due(self, last_run_at): 239 | """Returns tuple of two items ``(is_due, next_time_to_run)``, 240 | where next time to run is in seconds. For example: 241 | 242 | * ``(True, 20)``, means the job should be run now, and the next 243 | time to run is in 20 seconds. 244 | 245 | * ``(False, 12)``, means the job should be run in 12 seconds. 246 | 247 | You can override this to decide the interval at runtime. 248 | """ 249 | return self.run_every.is_due(last_run_at) 250 | 251 | 252 | def anchorDate(hour=0, minute=0, second=0): 253 | '''Create an anchor date.''' 254 | td = date.today() 255 | return datetime(year=td.year, month=td.month, day=td.day, 256 | hour=hour, minute=minute, second=second) 257 | 258 | 259 | JOB_LIST = '__PULSAR_QUEUE_JOBS__' 260 | 261 | 262 | class job: 263 | """Decorator for creating a Job class from a function 264 | """ 265 | def __init__(self, name=None, run_every=None, **attrs): 266 | self.class_name = name 267 | self.attrs = attrs 268 | base = Job 269 | if run_every: 270 | self.attrs['run_every'] = run_every 271 | base = PeriodicJob 272 | self.bases = (base,) 273 | 274 | def __call__(self, callable): 275 | if not self.class_name: 276 | self.class_name = callable.__name__ 277 | self.class_name = slugify(self.class_name, '_') 278 | self.attrs['__call__'] = callable 279 | self.attrs['__doc__'] = callable.__doc__ 280 | cls = JobMetaClass(self.class_name, self.bases, self.attrs) 281 | module = inspect.getmodule(callable) 282 | job_list = getattr(module, JOB_LIST, None) 283 | if not job_list: 284 | job_list = [] 285 | setattr(module, JOB_LIST, job_list) 286 | job_list.append(cls) 287 | return cls 288 | 289 | 290 | # INTERNALS 291 | async def _interact(proc, fd, interactive, out): 292 | transport = proc._transport.get_pipe_transport(fd) 293 | stream = proc.stdout if fd == 1 else proc.stderr 294 | output = b'' 295 | while interactive: 296 | line = await stream.readline() 297 | if not line: 298 | break 299 | out.write(line.decode('utf-8')) 300 | else: 301 | output = await stream.read() 302 | transport.close() 303 | return output 304 | -------------------------------------------------------------------------------- /tests/app.py: -------------------------------------------------------------------------------- 1 | """Tests task scheduling and execution""" 2 | import os 3 | import sys 4 | import asyncio 5 | import threading 6 | 7 | from pulsar.api import send, create_future 8 | from pulsar.apps import rpc 9 | 10 | from pq import api 11 | 12 | 13 | CODE_TEST = '''\ 14 | import time 15 | def task_function(N = 10, lag = 0.1): 16 | time.sleep(lag) 17 | return N*N 18 | ''' 19 | 20 | PATH = os.path.dirname(__file__) 21 | 22 | 23 | def simple_task(self, value=0): 24 | return self.v0 + value 25 | 26 | 27 | class TaskQueueBase: 28 | # used for both keep-alive and timeout in JsonProxy 29 | # long enough to allow to wait for tasks 30 | rpc_timeout = 500 31 | max_requests = 0 32 | concurrent_tasks = None 33 | tq_app = None 34 | rpc = None 35 | schedule_periodic = False 36 | message_serializer = 'json' 37 | 38 | @classmethod 39 | def name(cls): 40 | return cls.__name__.lower() 41 | 42 | @classmethod 43 | def rpc_name(cls): 44 | return 'rpc_%s' % cls.name() 45 | 46 | @classmethod 47 | async def setUpClass(cls): 48 | # The name of the task queue application 49 | params = cls.params() 50 | params.update( 51 | wsgi=True, 52 | schedule_periodic=cls.schedule_periodic, 53 | rpc_bind='127.0.0.1:0', 54 | rpc_workers=0, 55 | concurrent_tasks=cls.concurrent_tasks, 56 | max_requests=cls.max_requests, 57 | message_serializer=cls.message_serializer, 58 | task_pool_timeout=0.1, 59 | task_pool_timeout_max=0.1, 60 | rpc_keep_alive=cls.rpc_timeout, 61 | ) 62 | pq = api.PulsarQueue(**params) 63 | await pq.start() 64 | cls.tq_app = pq.apps()[0] 65 | cls.rpc = pq.apps()[1] 66 | # make sure the time out is high enough (bigger than test-timeout) 67 | cls.proxy = rpc.JsonProxy('http://%s:%s' % cls.rpc.cfg.addresses[0], 68 | timeout=cls.rpc_timeout) 69 | # Now flush the task queue 70 | cls.api = cls.tq_app.backend 71 | await cls.api.tasks.flush_queues(*cls.queues()) 72 | 73 | @classmethod 74 | def tearDownClass(cls): 75 | coros = [send('arbiter', 'kill_actor', a.name) for a in 76 | (cls.tq_app, cls.rpc) if a is not None] 77 | return asyncio.gather(*coros) 78 | 79 | @classmethod 80 | def queues(cls): 81 | name = cls.name() 82 | return ['%s1' % name, '%s2' % name] 83 | 84 | @classmethod 85 | def params(cls): 86 | queues = cls.queues() 87 | return dict( 88 | name=cls.name(), 89 | config='tests.config', 90 | task_queues=queues, 91 | default_task_queue=queues[0] 92 | ) 93 | 94 | 95 | class TaskQueueApp(TaskQueueBase): 96 | 97 | def test_registry(self): 98 | tasks = self.api.tasks 99 | self.assertTrue(isinstance(tasks.registry, dict)) 100 | regular = tasks.registry.regular() 101 | periodic = tasks.registry.periodic() 102 | self.assertTrue(regular) 103 | self.assertTrue(periodic) 104 | 105 | def test_consumer(self): 106 | self.assertTrue(str(self.api).startswith('consumer <')) 107 | self.assertEqual(self.api.cfg.default_task_queue, '%s1' % self.name()) 108 | 109 | def test_job_list(self): 110 | jobs = self.api.tasks.job_list() 111 | self.assertTrue(jobs) 112 | self.assertTrue(isinstance(jobs, list)) 113 | d = dict(jobs) 114 | pycode = d['runpycode'] 115 | self.assertEqual(pycode['type'], 'regular') 116 | 117 | async def test_simple_task(self): 118 | tasks = self.api.tasks 119 | task = await tasks.queue('addition', a=40, b=50) 120 | self.assertIsInstance(task, api.Task) 121 | self.assertEqual(task.status_string, 'SUCCESS') 122 | self.assertEqual(task.result, 90) 123 | self.assertTrue(str(task).startswith('task.addition<')) 124 | self.assertTrue(task.done()) 125 | 126 | async def test_info(self): 127 | tasks = self.api.tasks 128 | task = await tasks.queue('workerinfo') 129 | self.assertIsInstance(task, api.Task) 130 | self.assertEqual(task.status_string, 'SUCCESS') 131 | self.assertIsInstance(task.result, dict) 132 | self.assertEqual(len(task.result['tasks']['queues']), 3) 133 | 134 | async def test_async_job(self): 135 | tasks = self.api.tasks 136 | result = tasks.queue('asynchronous', lag=2) 137 | self.assertIsInstance(result, asyncio.Future) 138 | task = await result 139 | self.assertIsInstance(task, api.Task) 140 | self.assertEqual(task.status_string, 'SUCCESS') 141 | self.assertTrue(task.result >= 2) 142 | 143 | async def test_failure(self): 144 | tasks = self.api.tasks 145 | task = await tasks.queue('testperiodicerror', msg='testing') 146 | self.assertIsInstance(task, api.Task) 147 | self.assertEqual(task.status_string, 'FAILURE') 148 | self.assertEqual(task.result, 'testing') 149 | self.assertTrue(task.stacktrace) 150 | 151 | async def test_execute_addition(self): 152 | tasks = self.api.tasks 153 | future = tasks.execute('addition', a=3, b=-4) 154 | self.assertIsInstance(future, api.MessageFuture) 155 | self.assertTrue(future.message_id) 156 | task = await future 157 | self.assertIsInstance(task, api.Task) 158 | self.assertEqual(task.status_string, 'SUCCESS') 159 | self.assertEqual(task.result, -1) 160 | self.assertFalse(task.worker) 161 | self.assertFalse(task.queue) 162 | 163 | async def test_green_executor(self): 164 | tasks = self.api.tasks 165 | task = await tasks.queue('greenexecutor') 166 | self.assertIsInstance(task, api.Task) 167 | self.assertEqual(task.status_string, 'SUCCESS') 168 | self.assertIsInstance(task.result, dict) 169 | self.assertEqual(len(task.result['tasks']['queues']), 3) 170 | 171 | async def test_queue_local(self): 172 | tasks = self.api.tasks 173 | task = await tasks.queue_local('testlocalqueue') 174 | self.assertIsInstance(task, api.Task) 175 | self.assertIsInstance(task.result, list) 176 | # self.assertEqual(len(task.result), 3) 177 | self.assertEqual(task.result[0], self.api.node_name) 178 | 179 | async def test_no_callback(self): 180 | tasks = self.api.tasks 181 | task = await tasks.queue('asynchronous', callback=False) 182 | self.assertTrue(task.id) 183 | self.assertEqual(task.status_string, 'QUEUED') 184 | self.assertTrue('ID=%s' % task.id in repr(task.done_callback)) 185 | task = await task.done_callback 186 | self.assertEqual(task.status_string, 'SUCCESS') 187 | 188 | async def test_cpubound_task(self): 189 | tasks = self.api.tasks 190 | task = await tasks.queue('cpubound') 191 | self.assertIsInstance(task, api.Task) 192 | self.assertEqual(task.status_string, 'SUCCESS') 193 | self.assertEqual(task.result, ['OK', 2]) 194 | 195 | async def __test_error_cpubound_task(self): 196 | tasks = self.api.tasks 197 | task = await tasks.queue('cpubound', error=True) 198 | self.assertIsInstance(task, api.Task) 199 | self.assertEqual(task.status_string, 'FAILURE') 200 | self.assertTrue(task.stacktrace) 201 | 202 | async def test_is_in_greenlet(self): 203 | tasks = self.api.tasks 204 | task = await tasks.queue('cpuboundwithasync') 205 | self.assertIsInstance(task, api.Task) 206 | self.assertEqual(task.status_string, 'SUCCESS') 207 | self.assertEqual(task.result, True) 208 | 209 | async def test_cpu_supports_asyncio(self): 210 | tasks = self.api.tasks 211 | task = await tasks.queue('cpuboundwithasync', asyncio=True) 212 | self.assertIsInstance(task, api.Task) 213 | self.assertEqual(task.status_string, 'SUCCESS') 214 | self.assertEqual(task.result, False) 215 | 216 | async def test_big_log(self): 217 | # If this test fails, it is because the test runner will timeout on 218 | # this future, this is because the pipe fills up and blocks the 219 | # cpu bound task 220 | tasks = self.api.tasks 221 | await tasks.queue('cpuboundbiglog') 222 | 223 | async def test_execute_python_code(self): 224 | tasks = self.api.tasks 225 | task = await tasks.execute('execute.python', 226 | code='print("Hello World!")') 227 | self.assertEqual(task.status_string, 'SUCCESS') 228 | self.assertEqual(task.result, 'Hello World!\n') 229 | 230 | async def test_execute_python_script(self): 231 | script = os.path.join(PATH, 'example', 'executable.py') 232 | tasks = self.api.tasks 233 | task = await tasks.execute('execute.python.script', script=script) 234 | self.assertEqual(task.status_string, 'SUCCESS') 235 | self.assertEqual(task.result, sys.executable) 236 | 237 | async def test_queue_size(self): 238 | code = "import time;time.sleep(1)" 239 | tasks = self.api.tasks 240 | task = await asyncio.gather( 241 | tasks.queue('execute.python', code=code, callback=False), 242 | tasks.queue('execute.python', code=code, callback=False), 243 | tasks.queue('execute.python', code=code, callback=False), 244 | tasks.queue('execute.python', code=code, callback=False) 245 | ) 246 | self.assertEqual(task[0].status_string, 'QUEUED') 247 | size = await self.api.broker.size(task[0].queue) 248 | task = await asyncio.gather( 249 | task[0].done_callback, 250 | task[1].done_callback, 251 | task[2].done_callback, 252 | task[3].done_callback 253 | ) 254 | self.assertEqual(task[0].status_string, 'SUCCESS') 255 | self.assertEqual(len(size), 1) 256 | self.assertTrue(size[0] > 0) 257 | 258 | async def test_lock(self): 259 | # make sure the lock is release (in case of errors) 260 | tasks = self.api.tasks 261 | results = await asyncio.gather( 262 | tasks.queue('notoverlap'), 263 | tasks.queue('notoverlap'), 264 | tasks.queue('notoverlap') 265 | ) 266 | results = sorted(results, key=lambda task: task.result['start']) 267 | self.assertTrue(results[0].result['end'] < results[1].result['start']) 268 | self.assertTrue(results[1].result['end'] < results[2].result['start']) 269 | 270 | async def test_queue_from_task(self): 271 | tasks = self.api.tasks 272 | task = await tasks.queue('queue.from.task') 273 | self.assertEqual(task.status_string, 'SUCCESS') 274 | other_task = task.result 275 | self.assertEqual(other_task.from_task, task.id) 276 | 277 | async def test_scrape(self): 278 | tasks = self.api.tasks 279 | task = await tasks.queue('scrape', url='https://github.com/') 280 | self.assertEqual(task.status_string, 'SUCCESS') 281 | self.assertTrue(task.result) 282 | 283 | async def test_delay(self): 284 | tasks = self.api.tasks 285 | task = await tasks.queue('scrape', 286 | delay=5, 287 | url='https://www.bbc.co.uk/') 288 | self.assertEqual(task.status_string, 'SUCCESS') 289 | self.assertEqual(task.delay, 5) 290 | self.assertTrue(task.time_started - task.time_queued > 5) 291 | self.assertTrue(task.result) 292 | 293 | async def test_thread_io(self): 294 | inp = os.path.join(PATH, 'example', 'randomtext.txt') 295 | tasks = self.api.tasks 296 | task = await tasks.queue('read.text', input=inp) 297 | self.assertEqual(task.status_string, 'SUCCESS') 298 | self.assertNotEqual(task.result['thread'], threading.get_ident()) 299 | self.assertTrue(task.result['text']) 300 | 301 | async def test_bad_task(self): 302 | tasks = self.api.tasks 303 | task = await tasks.queue('asynchronous', sleep=2) 304 | self.assertEqual(task.status_string, 'FAILURE') 305 | 306 | async def test_retry(self): 307 | meta = {'max_retries': 3, 'retry_delay': 1} 308 | done = create_future() 309 | 310 | class CheckRetry: 311 | count = 1 312 | message_id = None 313 | 314 | def __call__(self, _, event, task): 315 | if task.name == 'subtraction': 316 | if task.meta.get('from_task') == self.message_id: 317 | self.count += 1 318 | if task.retry == 3: 319 | done.set_result(task) 320 | else: 321 | self.message_id = task.id 322 | 323 | check_retry = CheckRetry() 324 | await self.api.on_events('task', 'done', check_retry) 325 | try: 326 | task = await self.api.tasks.queue('subtraction', a=1, b='foo', 327 | delay=1, 328 | callback=False, 329 | meta_params=meta) 330 | self.assertEqual(task.status_string, 'QUEUED') 331 | check_retry.message_id = task.id 332 | task = await done 333 | self.assertEqual(check_retry.count, 3) 334 | self.assertEqual(task.status_string, 'FAILURE') 335 | finally: 336 | await self.api.remove_event_callback('task', 'done', check_retry) 337 | 338 | async def test_max_concurrency(self): 339 | tasks = [self.api.tasks.queue('maxconcurrency', lag=3) 340 | for _ in range(5)] 341 | tasks = await asyncio.gather(*tasks) 342 | self.assertEqual(len(tasks), 5) 343 | revoked = success = 0 344 | for task in tasks: 345 | if task.status_string == 'REVOKED': 346 | revoked += 1 347 | elif task.status_string == 'SUCCESS': 348 | success += 1 349 | self.assertEqual(revoked, 2) 350 | self.assertEqual(success, 3) 351 | 352 | async def test_task_timeout(self): 353 | future = self.api.tasks.queue('asynchronous', lag=10, timeout=3) 354 | task = await future 355 | self.assertEqual(task.status_string, 'REVOKED') 356 | self.assertGreaterEqual(task.time_ended-task.time_queued, 357 | task.timeout) 358 | self.assertTrue(task.expiry) 359 | 360 | # RPC 361 | async def test_rpc_job_list(self): 362 | data = await self.proxy.tasks.job_list() 363 | self.assertIsInstance(data, list) 364 | 365 | async def test_rpc_queue_task(self): 366 | task = await self.proxy.tasks.queue('cpubound') 367 | self.assertEqual(task['status'], 1) 368 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | |pulsar-queue| 2 | 3 | :Badges: |license| |pyversions| |status| |pypiversion| 4 | :CI: |circleci| |coverage| |appveyor| 5 | :Downloads: http://pypi.python.org/pypi/pulsar-queue 6 | :Source: https://github.com/quantmind/pulsar-queue 7 | :Mailing list: `google user group`_ 8 | :Design by: `Quantmind`_ and `Luca Sbardella`_ 9 | :Platforms: Linux, OSX, Windows. Python 3.5 and above 10 | :Keywords: server, asynchronous, concurrency, actor, process, queue, tasks, redis 11 | 12 | 13 | .. |pypiversion| image:: https://badge.fury.io/py/pulsar-queue.svg 14 | :target: https://pypi.python.org/pypi/pulsar-queue 15 | .. |pyversions| image:: https://img.shields.io/pypi/pyversions/pulsar-queue.svg 16 | :target: https://pypi.python.org/pypi/pulsar-queue 17 | .. |license| image:: https://img.shields.io/pypi/l/pulsar-queue.svg 18 | :target: https://pypi.python.org/pypi/pulsar-queue 19 | .. |status| image:: https://img.shields.io/pypi/status/pulsar-queue.svg 20 | :target: https://pypi.python.org/pypi/pulsar-queue 21 | .. |downloads| image:: https://img.shields.io/pypi/dd/pulsar-queue.svg 22 | :target: https://pypi.python.org/pypi/pulsar-queue 23 | .. |travis| image:: https://img.shields.io/travis/quantmind/pulsar-queue/master.svg 24 | :target: https://travis-ci.org/quantmind/pulsar-queue 25 | .. |appveyor| image:: https://ci.appveyor.com/api/projects/status/7us462jl0de2w0ly/branch/master?svg=true 26 | :target: https://ci.appveyor.com/project/lsbardel/pulsar-queue 27 | .. |coverage| image:: https://codecov.io/gh/quantmind/pulsar-queue/branch/master/graph/badge.svg 28 | :target: https://codecov.io/gh/quantmind/pulsar-queue 29 | .. |circleci| image:: https://circleci.com/gh/quantmind/pulsar-queue.svg?style=svg 30 | :target: https://circleci.com/gh/quantmind/pulsar-queue 31 | 32 | Asynchronous server for consuming asynchronous IO tasks, green IO tasks, 33 | blocking IO tasks and long running CPU bound tasks. 34 | 35 | * Fully configurable 36 | * Consumers poll tasks from distributed message brokers (redis broker implemented) 37 | * Publish/subscribe for real-time event and logging (redis pub/sub backend) 38 | * Can schedule tasks when run as a scheduler (``--schedule-periodic`` flag) 39 | * Build on top of pulsar_ and asyncio_ 40 | 41 | 42 | TL;DR 43 | ======== 44 | 45 | Clone the repository:: 46 | 47 | git clone git@github.com:quantmind/pulsar-queue.git 48 | 49 | 50 | Move to the ``tests/example`` directory and run the server:: 51 | 52 | python manage.py 53 | 54 | 55 | 56 | .. contents:: **CONTENTS** 57 | 58 | 59 | Four steps tutorial 60 | ======================== 61 | 62 | 1 - Create a script 63 | ---------------------- 64 | 65 | A simple python file which runs your application: 66 | 67 | .. code:: 68 | 69 | vim manage.py 70 | 71 | 72 | .. code:: python 73 | 74 | from pq.api import PulsarQueue 75 | 76 | 77 | task_paths = ['sampletasks.*', 'pq.jobs'] 78 | 79 | 80 | def app(): 81 | return PulsarQueue(config=__file__) 82 | 83 | if __name__ == '__main__': 84 | app().start() 85 | 86 | 87 | 2 - Implement Jobs 88 | --------------------- 89 | 90 | Create the modules where Jobs_ are implemented. 91 | It can be a directory containing several submodules. 92 | 93 | .. code:: 94 | 95 | mkdir sampletasks 96 | cd sampletasks 97 | vim mytasks.py 98 | 99 | .. code:: python 100 | 101 | import asyncio 102 | import time 103 | 104 | from pq import api 105 | 106 | 107 | @api.job() 108 | def addition(self, a=0, b=0): 109 | return a + b 110 | 111 | 112 | @api.job() 113 | async def asynchronous(self, lag=1): 114 | start = time.time() 115 | await asyncio.sleep(lag) 116 | return time.time() - start 117 | 118 | 119 | 3 - Run the server 120 | --------------------- 121 | 122 | Run the server with two task consumers (pulsar actors). 123 | 124 | **NOTE**: Make sure you have Redis server up and running before you start the server. 125 | 126 | .. code:: 127 | 128 | python manage.py -w 2 129 | 130 | 4 - Queue tasks 131 | --------------------- 132 | 133 | Launch a python shell and play with the api 134 | 135 | .. code:: python 136 | 137 | >>> from manage import app 138 | >>> api = app().api() 139 | >>> task = api.tasks.queue('addition', a=4, b=6) 140 | >>> task 141 | 142 | >>> task = task.wait() 143 | task.addition 144 | >>> task.result 145 | 10 146 | >>> task.status_string 147 | 'SUCCESS' 148 | 149 | You can also queue tasks with a ``delay`` 150 | 151 | .. code:: python 152 | 153 | >>> task = api.tasks.queue('addition', a=4, b=6, callback=False, delay=2).wait() 154 | >>> task.status_string 155 | 'QUEUED' 156 | >>> task.time_queued # timestamp 157 | >>> task = task.done_callback.wait() 158 | >>> task.status_string 159 | 'SUCCESS' 160 | >>> task.time_started - task.time_queued 161 | 2.00 162 | 163 | **NOTE**: The ``wait`` method in a task future can only be used on the shell 164 | or when the event loop is not running. In all other cases one should ``await`` 165 | for the task future in a coroutine. 166 | 167 | API 168 | ============= 169 | 170 | The producer API is obtained from the Task application ``api`` method: 171 | 172 | .. code:: python 173 | 174 | from pq.api import PusarQueue 175 | 176 | api = PusarQueue(...).api() 177 | 178 | 179 | API methods 180 | --------------- 181 | 182 | *api*.start() 183 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 184 | 185 | Start listening to events. This method return a coroutine which resolve in the api: 186 | 187 | .. code:: python 188 | 189 | api = await api.start() 190 | 191 | The start method is used when the api is used by application to queue messages/tasks 192 | and listen for events published by distributed consumers. 193 | 194 | *api*.on_events(*message_type*, *event_re*, *callback*) 195 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 196 | 197 | Add a callback invoked every time an event matching the regular expression ``event_re`` 198 | occurs on the ``message_type`` channel. The *callback* has the following signature: 199 | 200 | .. code:: python 201 | 202 | def event_callback(channel, event, message): 203 | # event is string, the event matched 204 | # message is of type message_type 205 | 206 | If the event is a task event (see events_) the message is a Task_ object. 207 | 208 | This method is useful when creating applications which needs to respond to the 209 | queue server events in real time:: 210 | 211 | api.on_events('task', 'queued', callback) 212 | api.on_events('task', 'started', callback) 213 | api.on_events('task', 'done', callback) 214 | 215 | 216 | *api*.remove_event_callback(*message_type*, *event_re*, *callback*) 217 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 218 | 219 | Remove a previously added event callback. This method is safe. 220 | 221 | *api*.queue(*message*, *callback=True*) 222 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 223 | 224 | Queue a message in the message queue, equivalent to: 225 | 226 | .. code:: python 227 | 228 | api.broker.queue(message, callback) 229 | 230 | This method returns a ``MessageFuture``, a subclass of asyncio Future_ which 231 | resolves in a ``message`` object. 232 | If ``callback`` is True (default) the Future is resolved once the message 233 | is delivered (out of the queue), otherwise is is resolved once the message 234 | is queued (entered the queue). 235 | 236 | *api*.execute(*message*) 237 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 238 | 239 | Execute a message without queueing. This is only supported by messages with 240 | a message consumer which execute them (the ``tasks`` consumer for example). 241 | If *message* is a Task_, this method is equivalent to: 242 | 243 | .. code:: python 244 | 245 | api.tasks.execute(task) 246 | 247 | This method returns a ``MessageFuture``, a subclass of asyncio Future_ which 248 | resolve in a ``message`` object. 249 | 250 | *api*.consumers 251 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 252 | 253 | List of consumers registered with the api. 254 | 255 | Tasks API 256 | ----------------- 257 | 258 | The tasks producer is obtained vua the ``tasks`` property from the producer API instance 259 | 260 | .. code:: python 261 | 262 | tasks = api.tasks 263 | 264 | The following methods are available for the tasks producer: 265 | 266 | 267 | *tasks*.queue(*jobname*, *\*\*kwargs*) 268 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 269 | 270 | Queue a task and return a **TaskFuture** which is resolved once the task has finished. 271 | It is possible to obtain a task future resolved when the task has been queued, rather than finished, by passing the **callback=False** parameter: 272 | 273 | .. code:: python 274 | 275 | task = await tasks.queue(..., callback=False) 276 | task.status_string # QUEUED 277 | 278 | The ``kwargs`` parameters are used as input parameters for the Job_ callable with the exception of: 279 | 280 | * ``callback``: discussed above 281 | * ``delay``: delay execution by a given number of seconds 282 | * ``queue``: overrides the Job_ [default_queue](#job-default-queue) 283 | * [timeout](#job-timeout) 284 | * ``meta_params``: dictionary of parameters used by the Job_ callable to override default values of: 285 | * [max_retries](#job-max-retries) 286 | * [retry_delay](#job-retry-delay) 287 | * [max_concurrency](#job-max-concurrency) 288 | 289 | *tasks*.queue_local(*jobname*, *\*\*kwargs*) 290 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 291 | 292 | Queue a job in the local task queue. The local task queue is processed by the same server instance. It is equivalent to execute: 293 | 294 | .. code:: python 295 | 296 | task = await tasks.queue(..., queue=tasks.node_name) 297 | task.queue # tasks.node_name 298 | 299 | 300 | *tasks*.execute(*jobname*, *\*args*, *\*\*kwargs*) 301 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 302 | 303 | Execute a task immediately, it does not put the task in the task queue. 304 | This method is useful for debugging and testing. It is equivalent to execute: 305 | 306 | .. code:: python 307 | 308 | task = await tasks.queue(..., queue=False) 309 | task.queue # None 310 | task.status_string # SUCCESS 311 | 312 | 313 | *tasks*.queues() 314 | ~~~~~~~~~~~~~~~~~~~~~~~~ 315 | 316 | Return the list of queue names the backend is subscribed. This list is not empty when the backend is a task consumer. 317 | 318 | *tasks*.job_list(*jobname=None*) 319 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 320 | 321 | Returns a list of ``job_name``, ``job_description`` tuples. The ``job_name`` is a string which must be used as the **jobname** parameter when executing or queing tasks. The ``job_description`` is a dictionary containing metadata and documentation for the job. Example: 322 | 323 | .. code:: python 324 | 325 | jobs = dict(tasks.job_lits()) 326 | jobs['execute.python'] 327 | # { 328 | # 'type': 'regular', 329 | # 'concurrency': 'asyncio', 330 | # 'doc_syntax': 'markdown', 331 | # 'doc': 'Execute arbitrary python code on a subprocess ... ' 332 | # } 333 | 334 | 335 | The Job class 336 | ----------------- 337 | 338 | The **Job** class is how task factories are implemented and added to the 339 | tasks backend registry. When writing a new **Job** one can either subclass: 340 | 341 | .. code:: python 342 | 343 | import asyncio 344 | 345 | class AsyncSleep(api.Job): 346 | 347 | async def __call__(self, lag=1): 348 | await asyncio.sleep(lag) 349 | 350 | 351 | or use the less verbose **job** decorator: 352 | 353 | .. code:: python 354 | 355 | @api.job() 356 | async def asyncsleep(self, lag=1): 357 | await asyncio.sleep(lag) 358 | 359 | 360 | In either cases the ``self`` parameter is an instance of a **Job** class and 361 | it has the following useful attributes and methods: 362 | 363 | *job*.backend 364 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 365 | 366 | The tasks backend that is processing this Task_ run 367 | 368 | *job*.default_queue 369 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 370 | 371 | The default queue name where tasks for this job are queued. By default it is ``None`` 372 | in which case, if a ``queue`` is not given when queueing a task, the first queue 373 | from the `queues <#tasks_queues>`_ list taken. 374 | 375 | *job*.http 376 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 377 | 378 | Best possible HTTP session handler for the job concurrency mode. 379 | 380 | *job*.logger 381 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 382 | 383 | Python logging handler for this job. The name of this handler 384 | is ``.``. 385 | 386 | *job*.max_retries 387 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 388 | 389 | Optional positive integer which specify the maximum number of retries when a 390 | task fails or is revoked. If not available failing tasks are not re-queued. 391 | It can be specified as a class attribute or during initialisation from the task 392 | meta parameters. 393 | 394 | *job*.retry_delay 395 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 396 | 397 | Optional positive integer which specifies the number of seconds to delay a task 398 | retry. 399 | 400 | *job*.name 401 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 402 | 403 | The name of this job. Used to queue tasks 404 | 405 | *job*.task 406 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 407 | 408 | The Task_ instance associated with this task run 409 | 410 | *job*.queue(*jobname*, *\*args*, *\*\*kwargs*) 411 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 412 | 413 | Queue a new job form a task run. It is equivalent to: 414 | 415 | .. code:: python 416 | 417 | meta_params = {'from_task': self.task.id} 418 | self.backend.tasks.queue(..., meta_params=meta_params) 419 | 420 | 421 | *job*.shell(*command*, *\*\*kwargs*) 422 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 423 | 424 | Execute a shell command and returns a coroutine: 425 | 426 | .. code:: python 427 | 428 | await self.shell("...") 429 | 430 | 431 | The Task 432 | ----------- 433 | 434 | A task contains the metadata information of a job run and it is exchanged between task producers and task consumers via a distributed task queue. 435 | 436 | 437 | Task States 438 | ----------------- 439 | 440 | A Task_ can have one of the following ``task.status``: 441 | 442 | * ``QUEUED = 6`` a task queued but not yet executed. 443 | * ``STARTED = 5`` a task where execution has started. 444 | * ``RETRY = 4`` a task is retrying calculation. 445 | * ``REVOKED = 3`` the task execution has been revoked (or timed-out). 446 | * ``FAILURE = 2`` task execution has finished with failure. 447 | * ``SUCCESS = 1`` task execution has finished with success. 448 | 449 | 450 | **FULL_RUN_STATES** 451 | 452 | The set of states for which a Task_ has run: ``FAILURE`` and ``SUCCESS`` 453 | 454 | 455 | **READY_STATES** 456 | 457 | The set of states for which a Task_ has finished: ``REVOKED``, ``FAILURE`` and ``SUCCESS`` 458 | 459 | Events 460 | ------------- 461 | 462 | The task queue broadcast several events during task execution and internal state: 463 | 464 | * ``task_queued``: a new Task_ has been queued, the message is a task instance 465 | * ``task_started``: a Task_ has started to be consumed by a task consumer, it is out of the task queue 466 | * ``task_done``: a Task_ is done, the message is a task in a **READY_STATES** 467 | 468 | 469 | Configuration 470 | ------------------ 471 | 472 | There are several parameters you can use to twick the way the task queue works. 473 | In this list the name in bold is the entry point in the config file and **cfg** 474 | dictionary, while, the value between brackets shows the command line entry with default 475 | value. 476 | 477 | * **concurrent_tasks** (``--concurrent-tasks 5``) 478 | 479 | The maximum number of concurrent tasks for a given worker in a task consumer server. 480 | 481 | * **data_store** (``--data-store redis://127.0.0.1:6379/7``) 482 | 483 | Data store used for publishing and subscribing to messages (redis is the 484 | only backend available at the moment) 485 | 486 | * **max_requests** (``--max-requests 0``) 487 | 488 | The maximum number of tasks a worker will process before restarting. 489 | A 0 value (the default) means no maximum number, workers will process 490 | all tasks forever. 491 | 492 | * **message_broker** (``--message-broker ...``) 493 | 494 | Data store used as distributed task queue. If not provided (default) the 495 | ``data_store`` is used instead. Redis is the 496 | only backend available at the moment. 497 | 498 | * **message_serializer** (``--message-serializer json``) 499 | 500 | The decoder/encoder for messages and tasks. The default is **JSON** but **Message Pack** 501 | is also available if msgpack_ is installed. 502 | 503 | * **schedule_periodic** (``--schedule-periodic``) 504 | 505 | When ``True``, the task application can schedule periodic Jobs_. 506 | Usually, only one running server is responsible for 507 | scheduling tasks. 508 | 509 | * **task_pool_timeout** (``--task-pool-timeout 2``) 510 | 511 | Timeout in seconds for asynchronously polling tasks from the queues. No need to change this parameter really. 512 | 513 | * **workers** (``--workers 4``) 514 | 515 | Number of workers (processes) consuming tasks. 516 | 517 | 518 | Tasks Concurrency 519 | ====================== 520 | 521 | A task can run in one of four ``concurrency`` modes. 522 | If not specified by the ``Job``, the concurrency mode is ``ASYNC_IO``. 523 | 524 | ASYNC_IO 525 | ----------- 526 | 527 | The asynchronous IO mode is associated with tasks which return 528 | an asyncio Future or a coroutine. These tasks run concurrently 529 | in the worker event loop. 530 | An example can be a Job to scrape web pages and create new tasks to process the html 531 | 532 | .. code:: python 533 | 534 | @api.job() 535 | async def scrape(self, url=None): 536 | assert url, "url is required" 537 | request = await self.http.get(url) 538 | html = request.text() 539 | task = self.queue('process.html', html=html, callback=False) 540 | return task.id 541 | 542 | 543 | THREAD_IO 544 | ------------- 545 | 546 | This concurrency mode is best suited for tasks performing 547 | *blocking* IO operations. 548 | A ``THREAD_IO`` job runs its tasks in the event loop executor. 549 | You can use this model for most blocking operation unless 550 | 551 | * Long running CPU bound 552 | * The operation does not release the GIL 553 | 554 | Example of tasks suitable for thread IO are IO operations on files. 555 | For example the test suite uses this Job for testing ``THREAD_IO`` 556 | concurrency (check the ``tests.example.jobs.standard`` module 557 | for the full code): 558 | 559 | 560 | .. code:: python 561 | 562 | @api.job(concurrency=api.THREAD_IO) 563 | def extract_docx(self, input=None, output=None): 564 | """ 565 | Extract text from a docx document 566 | """ 567 | import docx 568 | assert input and output, "input and output must be given" 569 | document = docx.Document(input) 570 | text = '\n\n'.join(_docx_text(document)) 571 | with open(output, 'w') as fp: 572 | fp.write(text) 573 | return { 574 | 'thread': threading.get_ident(), 575 | 'text': len(text) 576 | } 577 | 578 | CPUBOUND 579 | ------------ 580 | 581 | It assumes the task performs blocking CPU bound operations. 582 | Jobs with this consurrency mode run their tasks on sub-processeses 583 | using `asyncio subprocess`_ module. 584 | 585 | Extend 586 | ================= 587 | 588 | It is possible to enhance the task queue application by passing 589 | a custom ``Manager`` during initialisation. 590 | For example: 591 | 592 | .. code:: python 593 | 594 | from pq import api 595 | 596 | class Manager(api.Manager): 597 | 598 | async def store_message(self, message): 599 | """This method is called when a message/task is queued, 600 | started and finished 601 | """ 602 | if message.type == 'task': 603 | # save this task into a db for example 604 | 605 | def queues(self): 606 | """List of queue names for Task consumers 607 | By default it returns the node name and the task_queues 608 | in the config dictionary. 609 | """ 610 | queues = [self.backend.node_name] 611 | queues.extend(self.cfg.task_queues) 612 | return queues 613 | 614 | 615 | tq = PulsarQueue(Manager, ...) 616 | 617 | 618 | The ``Manager`` class is initialised when the backend handler is initialised 619 | (on each consumer and in the scheduler). 620 | 621 | Changelog 622 | ============== 623 | 624 | * `Versions 0.5 `_ 625 | * `Versions 0.4 `_ 626 | * `Versions 0.3 `_ 627 | * `Versions 0.2 `_ 628 | * `Versions 0.1 `_ 629 | 630 | License 631 | ============= 632 | This software is licensed under the BSD 3-clause License. See the LICENSE 633 | file in the top distribution directory for the full license text. Logo designed by Ralf Holzemer, 634 | `creative common license`_. 635 | 636 | 637 | .. _`google user group`: https://groups.google.com/forum/?fromgroups#!forum/python-pulsar 638 | .. _`Luca Sbardella`: http://lucasbardella.com 639 | .. _`Quantmind`: http://quantmind.com 640 | .. _`creative common license`: http://creativecommons.org/licenses/by-nc/3.0/ 641 | .. _pulsar: https://github.com/quantmind/pulsar 642 | .. _asyncio: https://docs.python.org/3/library/asyncio.html 643 | .. _greenlet: https://greenlet.readthedocs.io/en/latest/ 644 | .. _msgpack: https://pypi.python.org/pypi/msgpack-python 645 | .. _`asyncio subprocess`: https://docs.python.org/3/library/asyncio-subprocess.html 646 | .. _Future: https://docs.python.org/3/library/asyncio-task.html#future 647 | .. _Job: #the-job-class 648 | .. _Jobs: #the-job-class 649 | .. _Task: #the-task 650 | .. _Events: #events 651 | .. _events: #events 652 | .. |pulsar-queue| image:: https://fluidily-public.s3.amazonaws.com/pulsar/queue/pulsar-queue-banner.svg 653 | :target: https://github.com/quantmind/pulsar-queue 654 | --------------------------------------------------------------------------------