├── requirements.pip ├── .gitignore ├── celeryconfig.py.example ├── setup.py ├── test_tasks.py ├── tests.py ├── README.rst └── celery_tasktree.py /requirements.pip: -------------------------------------------------------------------------------- 1 | celery 2 | nose 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | .*.swp 3 | /celery_tasktree.egg-info 4 | /.project 5 | /.pydevproject 6 | /.settings 7 | /celeryconfig.py 8 | /env 9 | -------------------------------------------------------------------------------- /celeryconfig.py.example: -------------------------------------------------------------------------------- 1 | # see http://docs.celeryproject.org/en/latest/getting-started/first-steps-with-celery.html#configuration 2 | # 3 | # Rename this file to celeryconfig.py 4 | # Command for test: 5 | # celeryd --loglevel=INFO 6 | BROKER_HOST = 'localhost' 7 | BROKER_PORT = 5672 8 | BROKER_USER = 'myuser' 9 | BROKER_PASSWORD = 'mypassword' 10 | BROKER_VHOST = 'myvhost' 11 | CELERY_IMPORTS = ("test_tasks", ) 12 | CELERY_RESULT_BACKEND = "amqp" 13 | CELERY_AMQP_TASK_RESULT_EXPIRES = 1000 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | from distutils.core import setup 4 | 5 | def read(fname): 6 | try: 7 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 8 | except: 9 | return '' 10 | 11 | setup( 12 | name='celery-tasktree', 13 | version='0.3.4', 14 | description='Celery Tasktree module', 15 | author='NetAngels team', 16 | author_email='info@netangels.ru', 17 | url='https://github.com/NetAngels/celery-tasktree', 18 | long_description = read('README.rst'), 19 | license = 'BSD License', 20 | py_modules=['celery_tasktree'], 21 | classifiers=( 22 | 'Development Status :: 4 - Beta', 23 | 'Intended Audience :: Developers', 24 | 'License :: OSI Approved :: BSD License', 25 | 'Programming Language :: Python', 26 | 'Topic :: Software Development :: Libraries :: Python Modules', 27 | ), 28 | ) 29 | -------------------------------------------------------------------------------- /test_tasks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from celery.task import Task 3 | from celery_tasktree import task_with_callbacks, run_with_callbacks 4 | import os 5 | 6 | 7 | @task_with_callbacks 8 | def mkdir(directory): 9 | """ Create directory. 10 | 11 | We return CreateDirectoryResult object intentionally, so that 12 | task_with_callbacks decorator can add async_result attribute to this one. 13 | """ 14 | os.mkdir(directory) 15 | return CreateDirectoryResult(True) 16 | 17 | 18 | class MkdirTask(Task): 19 | 20 | @run_with_callbacks 21 | def run(self, directory): 22 | os.mkdir(directory) 23 | return CreateDirectoryResult(True) 24 | 25 | 26 | class CreateDirectoryResult(object): 27 | def __init__(self, created): 28 | self.created = created 29 | def __bool__(self): 30 | return bool(self.created) 31 | def __str__(self): 32 | return '%s <%s>' % (id(self), self.created) 33 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | How to run these tests. 4 | ------------------------ 5 | 6 | 1. Install celery, then copy ``celeryconfig.py.example`` to ``celeryconfig.py`` 7 | and tune the configuration file. Follow celery "getting started" guide: 8 | http://docs.celeryproject.org/en/latest/getting-started/index.html 9 | 2. Launch celeryd as ``celeryd --loglevel=INFO``. 10 | Make sure that tasks "test_tasks.mkdir" and "test_tasks.MkdirTask" are found. 11 | 3. Run tests with ``nosetests`` command. 12 | 13 | """ 14 | import os 15 | from celery_tasktree import * 16 | from nose.tools import * 17 | from test_tasks import mkdir, MkdirTask 18 | 19 | def setup(): 20 | for dir in 'd1/2/1 d1/2/2 d1/2 d1/3 d1 d0/1/2 d0/1 d0/2 d0'.split(): 21 | if os.path.isdir(dir): 22 | os.rmdir(dir) 23 | 24 | @with_setup(setup, setup) 25 | def test_task_tree(): 26 | """ 27 | Check TaskTree execution order. 28 | 29 | Following tree of tasks is created:: 30 | 31 | d 0 32 | d 1 - d 1.1 33 | ` d 1.2 - d 1.2.1 34 | ` d 1.3 ` d 1.2.2 35 | """ 36 | tree = TaskTree() 37 | 38 | # this set of tasks created in the right order should create all these 39 | # files 40 | node0 = tree.add_task(mkdir, args=['d0']) 41 | node1 = tree.add_task(mkdir, args=['d1']) 42 | node12 = node1.add_task(mkdir, args=['d1/2']) 43 | node13 = node1.add_task(mkdir, args=['d1/3']) 44 | node121 = node12.add_task(mkdir, args=['d1/2/1']) 45 | node122 = node12.add_task(mkdir, args=['d1/2/2']) 46 | 47 | # check that tree is build correctly 48 | eq_(tree.children, [node0, node1]) 49 | eq_(node1.children, [node12, node13]) 50 | eq_(node12.children, [node121, node122]) 51 | eq_(node13.children, []) 52 | 53 | # run tasks and wait for the f0 and f1 task result 54 | async_res = tree.apply_async() 55 | f0_res, f1_res = async_res.join() 56 | eq_(f0_res.created, True) 57 | eq_(f1_res.created, True) 58 | 59 | # wait for the 1.1, 1.2, 1.3 task result 60 | f11_res, f12_res = f1_res.async_result.join() 61 | eq_(f11_res.created, True) 62 | eq_(f12_res.created, True) 63 | 64 | # wait for 1.2.1 and 1.2.2 tasks 65 | f121_res, f122_res = f11_res.async_result.join() 66 | eq_(f121_res.created, True) 67 | eq_(f122_res.created, True) 68 | 69 | # check that all files were created 70 | ok_(os.path.isdir('d1/2/1')) 71 | ok_(os.path.isdir('d1/2/2')) 72 | ok_(os.path.isdir('d1/3')) 73 | 74 | 75 | @with_setup(setup, setup) 76 | def test_task_already_contains_callback(): 77 | tree = TaskTree() 78 | task0 = mkdir.subtask(args=['d0/1']) 79 | node0 = tree.add_task(mkdir, args=['d0'], kwargs=dict(callback=task0)) 80 | node01 = node0.add_task(mkdir, args=['d0/2']) 81 | async_res = tree.apply_async() 82 | (f0_res,) = async_res.join() 83 | eq_(f0_res.created, True) 84 | f01_res, f02_res = f0_res.async_result.join() 85 | eq_(f01_res.created, True) 86 | eq_(f02_res.created, True) 87 | 88 | @with_setup(setup, setup) 89 | def test_task_subclass(): 90 | tree = TaskTree() 91 | node0 = tree.add_task(MkdirTask, args=['d0']) 92 | node01 = node0.add_task(MkdirTask, args=['d0/1']) 93 | tree.apply_and_join() 94 | ok_(os.path.isdir('d0')) 95 | ok_(os.path.isdir('d0/1')) 96 | 97 | @with_setup(setup, setup) 98 | def test_push_and_pop(): 99 | tree = TaskTree() 100 | tree.push(mkdir, args=('d0',)) 101 | tree.push(mkdir, args=('d0/abc/def',)) 102 | tree.pop() 103 | tree.push(mkdir, args=('d0/1',)) 104 | tree.push(mkdir, args=('d0/1/2',)) 105 | [res0, res1, res2] = tree.apply_and_join() 106 | 107 | def test_empty_task_tree(): 108 | tree = TaskTree() 109 | results = tree.apply_and_join() 110 | eq_(results, []) 111 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Celery tasktree module 2 | ====================== 3 | 4 | celery-tasktree is a module which helps to execute trees of celery tasks 5 | asynchronously in a particular order. Tasktree comes to the rescue when the 6 | number of tasks and dependencies grows and when a naive callback-based approach 7 | becomes hard to understand and maintain. 8 | 9 | Usage sample 10 | ------------- 11 | 12 | :: 13 | 14 | from celery_tasktree import task_with_callbacks, TaskTree 15 | 16 | @task_with_callbacks 17 | def some_action(...): 18 | ... 19 | 20 | def execute_actions(): 21 | tree = TaskTree() 22 | task0 = tree.add_task(some_action, args=[...], kwargs={...}) 23 | task1 = tree.add_task(some_action, args=[...], kwargs={...}) 24 | task10 = task1.add_task(some_action, args=[...], kwargs={...}) 25 | task11 = task1.add_task(some_action, args=[...], kwargs={...}) 26 | task110 = task11.add_task(some_action, args=[...], kwargs={...}) 27 | async_result = tree.apply_async() 28 | return async_result 29 | 30 | 31 | Decorator named ``task_with_callbacks`` should be used instead of simple celery 32 | ``task`` decorator. 33 | 34 | According to the code: 35 | 36 | - task0 and task1 are executed simultaniously 37 | - task10 and task11 are executed simultaniously after task1 38 | - task110 is executed after task11 39 | 40 | Things to be noted: 41 | 42 | - There is no way to stop propagation of the execution and there is no way to 43 | pass extra arguments from an ancestor to a child task. In short, there in only one 44 | kind of dependency between tasks: the dependency of execution order. 45 | - If the subtask (function) return value is an object, then a property named 46 | "async_result" will be added to that object so that it will be possible to 47 | use ``join()`` to gather the ordered task results. To extend the previous example:: 48 | 49 | async_result = execute_actions() 50 | task0_result, task1_result = async_result.join() 51 | task10_result, task11_result = task1_result.async_result.join() 52 | task110_result = task11_result.async_result.join() 53 | 54 | Subclassing `celery.task.Task` with callbacks 55 | ---------------------------------------------- 56 | 57 | Decorating functions with ``@task`` decorator is the easiest, but not the only 58 | one way to create new ``Task`` subclasses. Sometimes it is more convenient to 59 | subclass the generic ``celery.task.Task`` class and re-define its ``run()`` method. 60 | To make such a class compatible with TaskTree, ``run`` should be wrapped with 61 | ``celery_tasktree.run_with_callbacks`` decorator. The example below 62 | illustrates this approach:: 63 | 64 | from celery.task import Task 65 | from celery_tasktree import run_with_callbacks, TaskTree 66 | 67 | class SomeActionTask(Task): 68 | 69 | @run_with_callbacks 70 | def run(self, ...): 71 | ... 72 | 73 | def execute_actions(): 74 | tree = TaskTree() 75 | task0 = tree.add_task(SomeActionTask, args=[...], kwargs={...}) 76 | task01 = task0.add_task(SomeActionTask, args=[...], kwargs={...}) 77 | tree.apply_async() 78 | 79 | 80 | Using TaskTree as a simple queue 81 | ----------------------------------- 82 | 83 | In many cases a fully fledged tree of tasks would be overkill for you. All you 84 | need is to add two or more tasks to a queue to make sure that they will be 85 | executed in order. To allow this TaskTree has ``push()`` and ``pop()`` 86 | methods which in fact are nothing but wrappers around ``add_task()``. 87 | The ``push()`` method adds a new task as a child to the perviously created one 88 | whereas ``pop()`` removes and returns the task from the tail of the task stack. 89 | Usage sample looks like:: 90 | 91 | # create the tree 92 | tree = TaskTree() 93 | # push a number of tasks into it 94 | tree.push(action1, args=[...], kwargs={...}) 95 | tree.push(action2, args=[...], kwargs={...}) 96 | tree.push(actionX, args=[...], kwargs={...}) 97 | tree.pop() # get back action X from the queue 98 | tree.push(action3, args=[...], kwargs={...}) 99 | # apply asynchronously 100 | tree.apply_async() 101 | 102 | Actions will be executed in order ``action1 -> action2 -> action3``. 103 | 104 | 105 | Task with callbacks outside TaskTree 106 | --------------------------------------- 107 | 108 | The ``task_with_callbacks`` decorator can be useful in itself. It decorates 109 | functions the same way the ordinary ``task`` celery decorator does, but also 110 | adds an optional ``callback`` parameter. 111 | 112 | Callback can be a subtask or a list of subtasks (not the TaskSet). Behind the 113 | scenes, when a task with a callback is invoked, it executes the function's main code, 114 | then builds a TaskSet, invokes it asynchronously and attaches the 115 | ``TaskSetResut`` as the attribute named ``async_result`` to the function's return 116 | value. 117 | 118 | Simple example is provided below:: 119 | 120 | from celery_tasktree import task_with_callbacks 121 | 122 | @task_with_callbacks 123 | def some_action(...): 124 | ... 125 | 126 | cb1 = some_action.subtask(...) 127 | cb2 = some_action.subtask(...) 128 | async_result = some_action.delay(..., callback=[cb1, cb2]) 129 | main_result = async_result.wait() 130 | cb1_result, cb2_result = main_result.async_result.join() 131 | -------------------------------------------------------------------------------- /celery_tasktree.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from celery.task import task 3 | from celery import group as celery_group 4 | from functools import wraps 5 | 6 | 7 | class TaskTree(object): 8 | 9 | def __init__(self): 10 | self.children = [] 11 | self.last_node = self 12 | 13 | def add_task(self, func, args=None, kwargs=None): 14 | if args is None: 15 | args = [] 16 | if kwargs is None: 17 | kwargs = {} 18 | node = TaskTreeNode(func, args, kwargs) 19 | self.children.append(node) 20 | node.parent = self 21 | return node 22 | 23 | def push(self, func, args=None, kwargs=None): 24 | self.last_node = self.last_node.add_task(func, args, kwargs) 25 | return self.last_node 26 | 27 | def pop(self): 28 | if self.last_node == self: 29 | raise IndexError('pop from empty stack') 30 | parent = self.last_node.parent 31 | parent.children.remove(self.last_node) 32 | self.last_node = parent 33 | 34 | def apply_async(self): 35 | tasks = [] 36 | for node in self.children: 37 | func = node.func 38 | args = node.args 39 | kwargs = node.kwargs 40 | callback = kwargs.pop('callback', []) 41 | if not isinstance(callback, (list, tuple)): 42 | callback = [callback] 43 | subtasks = node._get_child_tasks() 44 | callback += subtasks 45 | kwargs = dict(callback=callback, **kwargs) 46 | _task = func.subtask(args=args, kwargs=kwargs) 47 | tasks.append(_task) 48 | taskset = celery_group(*tasks) 49 | result = taskset.apply_async() 50 | return result 51 | 52 | def apply_and_join(self): 53 | """ Execute tasks asynchronously and wait for the latest result. 54 | 55 | Method can be useful in conjunction with pop()/push() methods. In such 56 | a case method returns a list of results in the order which corresponds 57 | to the order of nodes being pushed. 58 | """ 59 | return join_tree(self.apply_async()) 60 | 61 | 62 | def join_tree(async_result): 63 | """ Join to all async results in the tree """ 64 | output = [] 65 | results = async_result.join() 66 | if not results: 67 | return output 68 | first_result = results[0] 69 | while True: 70 | output.append(first_result) 71 | if not getattr(first_result, 'async_result', None): 72 | break 73 | first_result = first_result.async_result.join()[0] 74 | return output 75 | 76 | 77 | class TaskTreeNode(object): 78 | 79 | def __init__(self, func, args=None, kwargs=None): 80 | self.parent = None 81 | if args is None: 82 | args = [] 83 | if kwargs is None: 84 | kwargs = {} 85 | self.func = func 86 | self.args = args 87 | self.kwargs = kwargs 88 | self.children = [] 89 | 90 | def add_task(self, func, args=None, kwargs=None): 91 | if args is None: 92 | args = [] 93 | if kwargs is None: 94 | kwargs = {} 95 | node = TaskTreeNode(func, args, kwargs) 96 | self.children.append(node) 97 | node.parent = self 98 | return node 99 | 100 | def _get_child_tasks(self): 101 | tasks = [] 102 | for node in self.children: 103 | func = node.func 104 | args = node.args 105 | kwargs = node.kwargs 106 | callback = kwargs.pop('callback', []) 107 | if not isinstance(callback, (list, tuple)): 108 | callback = [callback] 109 | subtasks = node._get_child_tasks() 110 | callback += subtasks 111 | kwargs = dict(callback=callback, **kwargs) 112 | _task = func.subtask(args=args, kwargs=kwargs) 113 | tasks.append(_task) 114 | return tasks 115 | 116 | 117 | def task_with_callbacks(func, **options): 118 | """ decorator "task with callbacks" 119 | 120 | Callback or list of callbacks which go to function in "callbacks" kwarg, 121 | will be executed after the function, regardless of the subtask's return 122 | status. 123 | 124 | If subtask (function) result is an object, then a property named 125 | "async_result" will be added to that object so that it will be possible to 126 | join() for that result. 127 | """ 128 | return task(run_with_callbacks(func), **options) 129 | 130 | 131 | def run_with_callbacks(func): 132 | """Decorator "run with callbacks" 133 | 134 | Function is useful as decorator for :meth:`run` method of tasks which are 135 | subclasses of generic :class:`celery.task.Task` and are expected to be used 136 | with callbacks. 137 | """ 138 | @wraps(func) 139 | def wrapper(*args, **kwargs): 140 | callback = kwargs.pop('callback', None) 141 | retval = func(*args, **kwargs) 142 | async_result = _exec_callbacks(callback) 143 | try: 144 | retval.async_result = async_result 145 | except AttributeError: 146 | pass 147 | return retval 148 | return wrapper 149 | 150 | 151 | def _exec_callbacks(callback): 152 | """ Exec the callback or list of callbacks. Return asyncronous results as 153 | the TaskSetResult object. 154 | """ 155 | async_result = None 156 | if callback: 157 | if not isinstance(callback, (list, tuple)): # not iterable 158 | callback = [callback,] 159 | taskset = celery_group(*callback) 160 | async_result = taskset.apply_async() 161 | return async_result 162 | --------------------------------------------------------------------------------