├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── setup.py
└── taskmap
    ├── __init__.py
    ├── logparse.py
    ├── taskmap.py
    ├── taskmap_test.py
    └── tgraph.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | .static_storage/
 56 | .media/
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.5"
 4 |   - "3.6"
 5 |   
 6 | # command to install dependencies
 7 | install:
 8 |   - pip install --editable ./
 9 |   - pip install pytest
10 |   
11 | # command to run tests
12 | script:
13 |   - py.test
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Noam Finkelstein
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/n-s-f/taskmap.svg?branch=master)](https://travis-ci.org/n-s-f/taskmap)
  2 | 
  3 | # taskmap
  4 | 
  5 | This library facilitates keeping track of dependencies between python functions,
  6 | and running them asyncronously and/or in parallel.
  7 | 
  8 | ## Overview
  9 | 
 10 | There are many libraries in python that help run pipelines in parallel while
 11 | keeping track of dependencies between tasks, notably
 12 | [joblib](https://pythonhosted.org/joblib/) and 
 13 | [luigi](https://github.com/spotify/luigi).
 14 | 
 15 | taskmap provides a way to easily handle coroutines in task pipelines. Many kinds
 16 | of pipelines will have a significant number of tasks that are IO bound. Running
 17 | these tasks in parallel will still leave lots of unused processing time.
 18 | 
 19 | That's where taskmap comes in. It's designed to help you get the most out of a
 20 | single machine. You specify the dependency graph for your tasks (which are just
 21 | native python functions or coroutines), and optionally which ones are IO bound. 
 22 | The tasks can then be run asynchronously and in parallel, making sure that no core
 23 | sits unused while there are tasks it could be running.
 24 | 
 25 | Because taskmap keeps track of the dependency graph, it is easy to only rerun
 26 | failed tasks. It's also possible to change the functions corresponding to tasks
 27 | and rerun only those changed tasks and their children. You can then cache your
 28 | results, so that later you can pick up where you left off.
 29 | 
 30 | ## Installation
 31 | 
 32 | ```
 33 | pip install taskmap
 34 | ```
 35 | 
 36 | ## Quick Start
 37 | 
 38 | This example demonstrates the major use case of the taskmap library.
 39 | 
 40 | ```.py
 41 | import taskmap
 42 | import asyncio
 43 | import time
 44 | 
 45 | # simulates io waits with asyncio.sleep
 46 | async def io_bound_a(): await asyncio.sleep(1); return 'io_a'
 47 | async def io_bound_b(x): await asyncio.sleep(1); return x + ' io_b'
 48 | 
 49 | # simulates cpu usage with time.sleep
 50 | async def cpu_bound_a(x): time.sleep(1); return x + ' cpu_a'
 51 | async def cpu_bound_b(): time.sleep(1); return 'cpu_b'
 52 | 
 53 | def test_async_parallel_demo():
 54 |     # given
 55 |     funcs = {
 56 |         'io_bound_a': io_bound_a,
 57 |         'io_bound_b': io_bound_b,
 58 |         'cpu_bound_a': cpu_bound_a,
 59 |         'cpu_bound_b': cpu_bound_b,
 60 |     }
 61 | 
 62 |     dependencies = {
 63 |         'io_bound_a': [],
 64 |         'io_bound_b': ['cpu_bound_b'],
 65 |         'cpu_bound_a': ['io_bound_a'],
 66 |         'cpu_bound_b': [],
 67 |     }
 68 | 
 69 |     io_bound = ['io_bound_a', 'io_bound_b']
 70 |     graph = taskmap.create_graph(funcs, dependencies, io_bound=io_bound)
 71 | 
 72 |     # when
 73 |     graph = taskmap.run_parallel_async(graph, nprocs=2)
 74 | 
 75 |     # then
 76 |     assert graph.results['io_bound_a'] == 'io_a'
 77 |     assert graph.results['io_bound_b'] == 'cpu_b io_b'
 78 |     assert graph.results['cpu_bound_a'] == 'io_a cpu_a'
 79 |     assert graph.results['cpu_bound_b'] == 'cpu_b'
 80 | ```
 81 | 
 82 | More examples can be found in the tests.
 83 | 
 84 | ## API
 85 | 
 86 | ### Creating and Running the Graph
 87 | 
 88 | #### create_graph(funcs, dependencies, io_bound=None, done=None, results=None)
 89 | 
 90 | Creates the dependency graph.
 91 | 
 92 | `dependencies`: a dictionary that maps task names to a list of dependencies. The
 93 | results of those dependencies will be fed into the function in the order in
 94 | which they appear. Tasks that return `None` will not have the results fed into
 95 | the tasks that depend on them.
 96 | 
 97 | `funcs`: a dictionary that maps the names of the tasks to functions. Each
 98 | function should accept the same number of arguments as it has dependencies that
 99 | return non `None` values.
100 | 
101 | `io_bound`: a list of the names of the tasks that are io bound. These will be
102 | picked up first, so that the cpu bound tasks can be executed while waiting on
103 | results from e.g. network or database calls.
104 | 
105 | `done`: a list of the names of tasks that are already done. These tasks will not
106 | be run if any of the `run*(graph)` functions are called with this graph. This is
107 | a way to run only part of a dependency graph without changing the code that
108 | creates the `dependencies` or `funcs` arguments.
109 | 
110 | `results`: a dictionary mapping the names of tasks to their results. This is
111 | useful if the tasks listed in the `done` arguments have results that their
112 | children will need passed to them.
113 | 
114 | This function will throw for a dependency dictionary with cyclic dependencies,
115 | or if there are functions that are depended on but are not present as keys in
116 | the dependencies dictionary.
117 | 
118 | Note that for coroutines, `functools.partial` will not work. If you need to
119 | create partial functions to use as tasks, you can use `partial` from the `paco`
120 | library.
121 | 
122 | #### taskmap.run_parallel_async(graph, sleep=0.1, ncores=None)
123 | 
124 | Runs the graph asynchronously across multiple cores. All tasks must be python
125 | coroutines. This can be used when tasks are bottlenecked by both io and cpu.
126 | 
127 | `sleep` determines how long each process waits between checks to see if a new
128 | task has become available.
129 | 
130 | `ncores` is how many cores are used in parallel. Defaults to half of available
131 | cores.
132 | 
133 | #### taskmap.run_async(graph, sleep=.01)
134 | 
135 | Runs all coroutines on a single core. This can be used if all tasks are
136 | bottlenecked by io.
137 | 
138 | #### taskmap.run_parallel(graph, sleep=.01, ncores=None)
139 | 
140 | The tasks must be normal python functions, and are not run in parallel but not
141 | asynchronously. This can be used if all tasks are cpu bottlenecked.
142 | 
143 | #### taskmap.run(graph)
144 | 
145 | All tasks must be normal python functions and are run synchronously in a single
146 | process.
147 | 
148 | ### Handling Failed Tasks
149 | 
150 | #### taskmap.reset_failed_tasks
151 | 
152 | taskmap marks tasks that throw an exception as 'failed'. This function allows
153 | you to rebuild a graph to only run the tasks that have failed and their
154 | children. A common pattern is:
155 | 
156 | ```.py
157 | result_graph = taskmap.run_parallel_async(graph)
158 | # failures abound
159 | 
160 | new_graph = taskmap.reset_failed_tasks(result_graph)
161 | 
162 | # make a fix (e.g. make sure DB is available)
163 | new_result_graph = taskmap.run_parallel_async(new_graph)
164 | ```
165 | 
166 | #### taskmap.reset_tasks
167 | 
168 | This function allows you to rebuild a graph to only run a subset of tasks, and
169 | their children. This is useful if you change some of the tasks in the `'funcs'`
170 | and want to rerun those tasks and the tasks that depend on their outcomes. This
171 | can be because there was an bug in the task, or simply because you want to alter
172 | the behavior.
173 | 
174 | ```.py
175 | result_graph = taskmap.run_parallel_async(graph)
176 | 
177 | # change the function corresponding to some task name
178 | result_graph.funcs['some_func'] = new_task
179 | 
180 | new_graph = taskmap.reset_tasks(result_graph, ['some_func'])
181 | new_result_graph = taskmap.run_parallel_async(new_graph)
182 | ```
183 | 
184 | ### Manipulating the graph
185 | 
186 | #### taskmap.mark_as_done(graph, tasks)
187 | #### taskmap.mark_as_done_except(graph, tasks)
188 | 
189 | ### Parsing logs
190 | 
191 | #### taskmap.in\_progress(path\_to\_log)
192 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='taskmap',
 5 |     version='0.0.7',
 6 |     description='Dependency graph with parallel asyncronous task runner',
 7 |     url='https://github.com/n-s-f/taskmap',
 8 |     packages=find_packages(exclude=['contrib', 'docs', 'tests']),
 9 |     install_requires=['multiprocess', 'multiprocessing-logging'],
10 | )
11 | 


--------------------------------------------------------------------------------
/taskmap/__init__.py:
--------------------------------------------------------------------------------
1 | from .taskmap import (run_task, run_task_async, run, run_parallel, run_async,
2 |                       run_parallel_async)
3 | 
4 | from .tgraph import (create_graph, get_ready_tasks, mark_as_done,
5 |                      mark_as_done_except, get_all_children, reset_tasks,
6 |                      reset_failed_tasks)
7 | 
8 | from .logparse import in_progress
9 | 


--------------------------------------------------------------------------------
/taskmap/logparse.py:
--------------------------------------------------------------------------------
 1 | def in_progress(path_to_log):
 2 |     with open(path_to_log, 'r') as f:
 3 |         log = f.readlines()
 4 | 
 5 |     queued = []
 6 |     started = []
 7 |     finished = []
 8 |     for line in log:
 9 |         words = line.strip().split(' ')
10 | 
11 |         if 'starting' in words:
12 |             started.append(words[-1])
13 |         elif 'finished' in words:
14 |             finished.append(words[-1])
15 |         elif 'queueing' in words:
16 |             queued.append(words[-1])
17 | 
18 |     return {
19 |         'in_progress': set(started) - set(finished),
20 |         'queued': set(queued) - set(started),
21 |     }
22 | 


--------------------------------------------------------------------------------
/taskmap/taskmap.py:
--------------------------------------------------------------------------------
  1 | from taskmap import tgraph
  2 | 
  3 | import os
  4 | import time
  5 | import asyncio
  6 | import logging
  7 | import traceback
  8 | import multiprocess as mp
  9 | 
 10 | 
 11 | def log(graph):
 12 |     return logging.getLogger('{}-worker'.format(graph.name))
 13 | 
 14 | 
 15 | def mlog(graph):
 16 |     return logging.getLogger('{}-manager'.format(graph.name))
 17 | 
 18 | 
 19 | def run_task(graph, task, raise_errors=False):
 20 |     graph = tgraph.mark_as_in_progress(graph, task)
 21 |     args = get_task_args(graph, task)
 22 |     log(graph).info('pid {}: starting task {}'.format(os.getpid(), task))
 23 | 
 24 |     try:
 25 |         result = graph.funcs[task](*args)
 26 |         return task_success(graph, task, result)
 27 | 
 28 |     except Exception as error:
 29 |         graph = task_error(graph, task, error)
 30 |         if raise_errors:
 31 |             raise
 32 |         return graph
 33 | 
 34 | 
 35 | async def run_task_async(graph, task, raise_errors=False):
 36 |     graph = tgraph.mark_as_in_progress(graph, task)
 37 |     args = get_task_args(graph, task)
 38 |     log(graph).info('pid {}: starting task {}'.format(os.getpid(), task))
 39 | 
 40 |     try:
 41 |         result = await asyncio.coroutine(graph.funcs[task])(*args)
 42 |         return task_success(graph, task, result)
 43 | 
 44 |     except Exception as error:
 45 |         graph = task_error(graph, task, error)
 46 |         if raise_errors:
 47 |             raise
 48 |         return graph
 49 | 
 50 | 
 51 | def task_success(graph, task, result):
 52 |     log(graph).info('pid {}: finished task {}'.format(os.getpid(), task))
 53 |     graph.results[task] = result
 54 |     return tgraph.mark_as_done(graph, task)
 55 | 
 56 | 
 57 | def task_error(graph, task, error):
 58 |     tb = traceback.format_exc()
 59 |     msg = 'pid {}: failed task {}: stack {}'.format(os.getpid(), task, tb)
 60 |     log(graph).exception(msg, {'exc_info': error})
 61 |     graph.results[task] = error
 62 |     graph = tgraph.mark_as_done(graph, task)
 63 |     return mark_children_as_incomplete(graph, task)
 64 | 
 65 | 
 66 | def run(graph, raise_errors=False):
 67 |     while not tgraph.all_done(graph):
 68 |         ready = tgraph.get_ready_tasks(graph)
 69 |         for task in ready:
 70 |             log(graph).info('pid {}: claiming task {}'.format(os.getpid(), task))
 71 |             graph = run_task(graph, task, raise_errors)
 72 |     return graph
 73 | 
 74 | 
 75 | def run_parallel(graph, nprocs=None, sleep=0.2, raise_errors=False):
 76 |     nprocs = nprocs or mp.cpu_count() - 1
 77 |     with mp.Manager() as manager:
 78 |         graph = tgraph.create_parallel_compatible_graph(graph, manager)
 79 |         with mp.Pool(nprocs) as pool:
 80 | 
 81 |             exception_q = mp.Queue(10)
 82 | 
 83 |             def error_callback(exception):
 84 |                 exception_q.put_nowait(exception)
 85 |                 pool.terminate()
 86 | 
 87 |             while not tgraph.all_done(graph):
 88 |                 for task in tgraph.get_ready_tasks(graph, reverse=False):
 89 |                     graph = tgraph.mark_as_in_progress(graph, task)
 90 |                     mlog(graph).info(
 91 |                         'pid {}: assigning task {}'.format(os.getpid(), task))
 92 |                     pool.apply_async(
 93 |                         run_task, args=(graph, task, raise_errors),
 94 |                         error_callback=error_callback
 95 |                     )
 96 |                 time.sleep(sleep)
 97 | 
 98 |                 if not exception_q.empty():
 99 |                     raise exception_q.get()
100 | 
101 |         return tgraph.recover_values_from_manager(graph)
102 | 
103 | 
104 | def exception_handler(loop, context):
105 |     # workaround for the fact that asyncio will not let you stop on exceptions
106 |     # for tasks added to the loop after it has already started running
107 |     loop.stop()
108 | 
109 | 
110 | def run_async(graph, sleep=0.2, coro=None, raise_errors=False):
111 |     ioq = asyncio.Queue(len(graph.funcs.keys()))
112 |     cpuq = asyncio.Queue(len(graph.funcs.keys()))
113 |     loop = asyncio.new_event_loop()
114 |     loop.set_exception_handler(exception_handler)
115 |     coros = asyncio.gather(
116 |         queue_loader(graph, ioq, cpuq, sleep),
117 |         scheduler(graph, sleep, ioq, cpuq, loop, raise_errors),
118 |         loop=loop)
119 | 
120 |     try:
121 |         loop.run_until_complete(coros)
122 |     except Exception as error:
123 |         raise RuntimeError('An async task has failed. Please check your logs')
124 |     finally:
125 |         loop.close()
126 | 
127 |     return graph
128 | 
129 | 
130 | def run_parallel_async(graph, nprocs=None, sleep=0.2, raise_errors=False):
131 |     if nprocs == 1:
132 |         return run_async(graph, sleep=sleep, raise_errors=raise_errors)
133 | 
134 |     nprocs = nprocs or mp.cpu_count() // 2
135 | 
136 |     with mp.Manager() as manager:
137 |         graph = tgraph.create_parallel_compatible_graph(graph, manager)
138 | 
139 |         ioq = mp.Queue(len(graph.funcs.keys()))
140 |         cpuq = mp.Queue(len(graph.funcs.keys()))
141 | 
142 |         procs = [mp.Process(target=run_scheduler,
143 |                             args=(graph, sleep, ioq, cpuq, raise_errors))
144 |                  for _ in range(nprocs)]
145 |         for proc in procs:
146 |             proc.start()
147 | 
148 |         while not tgraph.all_done(graph):
149 |             for task in tgraph.get_ready_tasks(graph):
150 |                 graph = tgraph.mark_as_in_progress(graph, task)
151 |                 mlog(graph).info(
152 |                     'pid {}: queueing task {}'.format(os.getpid(), task))
153 |                 if task in graph.io_bound:
154 |                     ioq.put(task)
155 |                 else:
156 |                     cpuq.put(task)
157 | 
158 |             time.sleep(sleep)
159 | 
160 |             if raise_errors and sum(not p.exitcode for p in procs):
161 |                 raise RuntimeError('An async task has failed. Please check your logs')
162 | 
163 |         return tgraph.recover_values_from_manager(graph)
164 | 
165 | 
166 | def run_scheduler(graph, sleep, ioq, cpuq, raise_errors=False):
167 |     loop = asyncio.new_event_loop()
168 |     loop.set_exception_handler(exception_handler)
169 |     try:
170 |         loop.run_until_complete(
171 |             scheduler(graph, sleep, ioq, cpuq, loop, raise_errors))
172 |     except Exception as error:
173 |         raise RuntimeError('An async task has failed. Please check your logs')
174 |     finally:
175 |         loop.close()
176 | 
177 | 
178 | # TODO: scheduler can be improved
179 | async def scheduler(graph, sleep, ioq, cpuq, loop, raise_errors):
180 |     while not tgraph.all_done(graph):
181 |         try:
182 |             task = ioq.get_nowait()
183 |             log(graph).info(
184 |                 'pid {}: dequeueing task {}'.format(os.getpid(), task))
185 |             asyncio.ensure_future(
186 |                 run_task_async(graph, task, raise_errors), loop=loop)
187 |         except Exception:
188 |             try:
189 |                 task = cpuq.get_nowait()
190 |                 log(graph).info(
191 |                     'pid {}: dequeueing task {}'.format(os.getpid(), task))
192 |                 asyncio.ensure_future(
193 |                     run_task_async(graph, task, raise_errors), loop=loop)
194 |                 # don't put two cpu intensive tasks on the same core without waiting
195 |                 await asyncio.sleep(sleep)
196 |             except Exception:
197 |                 await asyncio.sleep(sleep)
198 | 
199 | 
200 | async def queue_loader(graph, ioq, cpuq, sleep):
201 |     while not tgraph.all_done(graph):
202 |         for task in tgraph.get_ready_tasks(graph):
203 |             graph = tgraph.mark_as_in_progress(graph, task)
204 |             log(graph).info(
205 |                 'pid {}: queueing task {}'.format(os.getpid(), task))
206 | 
207 |             if task in graph.io_bound:
208 |                 await ioq.put(task)
209 |             else:
210 |                 await cpuq.put(task)
211 | 
212 |         await asyncio.sleep(sleep)
213 | 
214 | 
215 | def mark_children_as_incomplete(graph, task):
216 |     children = tgraph.get_all_children(graph, task)
217 | 
218 |     if not children:
219 |         return graph
220 | 
221 |     log(graph).info('pid {}: marking children {} of failed task {}'.format(
222 |         os.getpid(), children, task))
223 | 
224 |     msg = 'Ancestor task {} failed; task not run'.format(task)
225 |     for child in children:
226 |         graph.results[child] = msg
227 |         tgraph.mark_as_done(graph, child)
228 |     return graph
229 | 
230 | 
231 | def get_task_args(graph, task):
232 |     return [
233 |         graph.results.get(dep) for dep in graph.dependencies[task]
234 |         if graph.results.get(dep) is not None
235 |     ]
236 | 


--------------------------------------------------------------------------------
/taskmap/taskmap_test.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | import taskmap
  4 | import pytest
  5 | import time
  6 | import os
  7 | 
  8 | # disable logging during tests
  9 | logging.disable(logging.CRITICAL)
 10 | 
 11 | 
 12 | def a():
 13 |     return 5
 14 | 
 15 | 
 16 | def b(x):
 17 |     return x + 10
 18 | 
 19 | 
 20 | def c(x, y):
 21 |     return x + y + 20
 22 | 
 23 | 
 24 | def test_graph_ready():
 25 |     # given
 26 |     dependencies = {
 27 |         'a': {'b', 'c'},
 28 |         'b': {'c'},
 29 |         'c': set(),
 30 |     }
 31 | 
 32 |     funcs = {
 33 |         'a': a,
 34 |         'b': b,
 35 |         'c': c,
 36 |     }
 37 | 
 38 |     graph = taskmap.create_graph(funcs, dependencies)
 39 | 
 40 |     # when
 41 |     results = taskmap.get_ready_tasks(graph)
 42 | 
 43 |     # then
 44 |     assert results == ['c']
 45 | 
 46 | 
 47 | def test_graph_ordered_ready():
 48 |     # given
 49 |     dependencies = {'a': set(), 'b': set()}
 50 |     funcs = {'a': a, 'b': b}
 51 |     io_bound = ['a']
 52 |     graph = taskmap.create_graph(funcs, dependencies, io_bound=io_bound)
 53 | 
 54 |     # when
 55 |     results = taskmap.get_ready_tasks(graph)
 56 | 
 57 |     # then
 58 |     assert results == ['a', 'b']
 59 | 
 60 | 
 61 | def test_tasks_can_be_marked_done():
 62 |     # given
 63 |     funcs = {'a': a, 'b': b}
 64 |     dependencies = {'a': ['b'], 'b': []}
 65 | 
 66 |     # when
 67 |     graph = taskmap.create_graph(funcs, dependencies, done=['b'])
 68 | 
 69 |     # then
 70 |     assert taskmap.get_ready_tasks(graph) == ['a']
 71 | 
 72 | 
 73 | def test_cached_results_are_used():
 74 |     # given
 75 |     funcs = {'a': a, 'b': b}
 76 |     dependencies = {'b': ['a'], 'a': []}
 77 |     results = {'a': 5}
 78 | 
 79 |     graph = taskmap.create_graph(
 80 |         funcs, dependencies, done=['a'], results=results)
 81 | 
 82 |     # when
 83 |     graph = taskmap.run(graph)
 84 | 
 85 |     # then
 86 |     assert graph.results['b'] == 15
 87 | 
 88 | 
 89 | def test_graph_ready_after_task_completed():
 90 |     # given
 91 |     dependencies = {
 92 |         'a': {'b', 'c'},
 93 |         'b': {'c'},
 94 |         'c': set(),
 95 |     }
 96 | 
 97 |     funcs = {
 98 |         'a': a,
 99 |         'b': b,
100 |         'c': c,
101 |     }
102 | 
103 |     graph = taskmap.create_graph(funcs, dependencies)
104 |     ready = taskmap.get_ready_tasks(graph)
105 | 
106 |     # when
107 |     for func in ready:
108 |         taskmap.mark_as_done(graph, func)
109 | 
110 |     results = taskmap.get_ready_tasks(graph)
111 | 
112 |     # then
113 |     assert results == ['b']
114 | 
115 | 
116 | def test_mark_as_done_except():
117 |     # given
118 |     dependencies = {
119 |         'a': {'b', 'c'},
120 |         'b': {'c'},
121 |         'c': set(),
122 |     }
123 | 
124 |     funcs = {
125 |         'a': a,
126 |         'b': b,
127 |         'c': c,
128 |     }
129 | 
130 |     graph = taskmap.create_graph(funcs, dependencies)
131 |     graph = taskmap.mark_as_done_except(graph, ['c'])
132 | 
133 |     results = taskmap.get_ready_tasks(graph)
134 | 
135 |     # then
136 |     assert results == ['c']
137 | 
138 | 
139 | def test_cyclic_dependency():
140 |     # given
141 |     dependencies = {
142 |         'a': {'b'},
143 |         'b': {'c'},
144 |         'c': {'a'},
145 |     }
146 | 
147 |     funcs = {
148 |         'a': a,
149 |         'b': b,
150 |         'c': c,
151 |     }
152 | 
153 |     # then
154 |     with pytest.raises(ValueError):
155 | 
156 |         # when
157 |         taskmap.create_graph(funcs, dependencies)
158 | 
159 | 
160 | def test_absent_tasks():
161 |     # given
162 |     dependencies = {
163 |         'a': {'b', 'c'},
164 |     }
165 | 
166 |     funcs = {
167 |         'a': a,
168 |         'b': b,
169 |         'c': c,
170 |     }
171 | 
172 |     # then
173 |     with pytest.raises(ValueError):
174 | 
175 |         # when
176 |         taskmap.create_graph(funcs, dependencies)
177 | 
178 | 
179 | def test_all_names_are_funcs():
180 |     # given
181 |     dependencies = {'d': ['a'], 'a': []}
182 | 
183 |     funcs = {'a': a, 'b': b, 'c': c}
184 | 
185 |     # then
186 |     with pytest.raises(ValueError):
187 | 
188 |         # when
189 |         taskmap.create_graph(funcs, dependencies)
190 | 
191 | 
192 | def test_logging_no_write():
193 |     # given
194 |     dependencies = {'a': []}
195 |     funcs = {'a': a}
196 |     logging_config = {'write': False}
197 | 
198 |     # when
199 |     taskmap.create_graph(funcs, dependencies, name='name', logging_config=logging_config)
200 | 
201 | 
202 | def test_logging_filename_change():
203 |     # given
204 |     dependencies = {'a': []}
205 |     funcs = {'a': a}
206 |     name = 'test-taskmap-name'
207 |     graph = taskmap.create_graph(funcs, dependencies, name=name,
208 |                                  logging_config={'write': True})
209 | 
210 |     # when
211 |     graph = taskmap.run(graph)
212 | 
213 |     # then
214 |     assert any(name in f for f in os.listdir('./'))
215 | 
216 | 
217 | def test_default_logging_severity_level():
218 |     dependencies = {'a': []}
219 |     funcs = {'a': a}
220 |     name = 'test-taskmap-default-level'
221 |     taskmap.create_graph(funcs, dependencies, name=name)
222 | 
223 |     manager_logger_name = '{}-manager'.format(name)
224 |     worker_logger_name = '{}-worker'.format(name)
225 | 
226 |     assert logging.getLogger(manager_logger_name).level == logging.DEBUG
227 |     assert logging.getLogger(worker_logger_name).level == logging.DEBUG
228 | 
229 | 
230 | def test_explicit_logging_severity_level():
231 |     dependencies = {'a': []}
232 |     funcs = {'a': a}
233 |     name = 'test-taskmap-explicit-level'
234 |     taskmap.create_graph(funcs, dependencies, name=name, 
235 |                          logging_config={'level': logging.ERROR})
236 | 
237 |     manager_logger_name = '{}-manager'.format(name)
238 |     worker_logger_name = '{}-worker'.format(name)
239 | 
240 |     assert logging.getLogger(manager_logger_name).level == logging.ERROR
241 |     assert logging.getLogger(worker_logger_name).level == logging.ERROR
242 | 
243 | 
244 | def test_run_pass_args():
245 |     # given
246 |     dependencies = {
247 |         'c': ['a', 'b'],
248 |         'b': ['a'],
249 |         'a': [],
250 |     }
251 | 
252 |     funcs = {
253 |         'a': a,
254 |         'b': b,
255 |         'c': c,
256 |     }
257 | 
258 |     graph = taskmap.create_graph(funcs, dependencies)
259 | 
260 |     # when
261 |     graph = taskmap.run(graph)
262 | 
263 |     # then
264 |     assert graph.results == {'a': 5, 'b': 15, 'c': 40}
265 | 
266 | 
267 | error = RuntimeError('some error')
268 | 
269 | 
270 | def d():
271 |     raise error
272 | 
273 | 
274 | def test_sync_error_handling():
275 |     # given
276 |     dependencies = {
277 |         'c': ['d'],
278 |         'd': [],
279 |     }
280 | 
281 |     funcs = {
282 |         'd': d,
283 |         'c': c,
284 |     }
285 | 
286 |     # when
287 |     graph = taskmap.create_graph(funcs, dependencies)
288 | 
289 |     result = taskmap.run(graph)
290 |     result_parallel = taskmap.run_parallel(graph, nprocs=2, sleep=.001)
291 | 
292 |     # then
293 |     expected = {
294 |         'd': error,
295 |         'c': 'Ancestor task d failed; task not run',
296 |     }
297 |     assert result.results['c'] == expected['c']
298 |     assert result.results['d'].__class__ == expected['d'].__class__
299 |     assert result.results['d'].args == expected['d'].args
300 | 
301 |     assert result_parallel.results['c'] == expected['c']
302 |     assert result_parallel.results['d'].__class__ == expected['d'].__class__
303 |     assert result_parallel.results['d'].args == expected['d'].args
304 | 
305 | 
306 | def test_sync_error_raise_errors():
307 |     # given
308 |     dependencies = {'c': ['d'], 'd': []}
309 |     funcs = {'d': d, 'c': c}
310 | 
311 |     # when
312 |     graph = taskmap.create_graph(funcs, dependencies)
313 |     with pytest.raises(RuntimeError, match='some error'):
314 |         taskmap.run(graph, raise_errors=True)
315 | 
316 |     graph = taskmap.create_graph(funcs, dependencies)
317 |     with pytest.raises(RuntimeError, match='some error'):
318 |         taskmap.run_parallel(graph, raise_errors=True)
319 | 
320 | 
321 | async def control():
322 |     return 5
323 | 
324 | 
325 | async def e():
326 |     raise error
327 | 
328 | async def g(er):
329 |     return er
330 | 
331 | def test_async_error_handling():
332 |     # given
333 |     dependencies = {
334 |         'g': ['e'],
335 |         'e': [],
336 |         'control': [],
337 |     }
338 | 
339 |     funcs = {
340 |         'e': e,
341 |         'g': g,
342 |         'control': control,
343 |     }
344 | 
345 |     # when
346 |     graph = taskmap.create_graph(funcs.copy(), dependencies.copy())
347 |     graph = taskmap.run_async(graph, sleep=.001)
348 | 
349 |     graph_parallel = taskmap.create_graph(funcs.copy(), dependencies.copy())
350 |     graph_parallel = taskmap.run_parallel_async(graph_parallel, nprocs=2, sleep=.001)
351 | 
352 |     # then
353 |     expected = {
354 |         'e': error,
355 |         'control': 5,
356 |         'g': 'Ancestor task e failed; task not run',
357 |     }
358 | 
359 |     assert graph.results['g'] == expected['g']
360 |     assert graph.results['e'].__class__ == expected['e'].__class__
361 |     assert graph.results['e'].args == expected['e'].args
362 |     assert graph.results['control'] == 5
363 | 
364 |     assert graph_parallel.results['g'] == expected['g']
365 |     assert graph_parallel.results['e'].__class__ == expected['e'].__class__
366 |     assert graph_parallel.results['e'].args == expected['e'].args
367 |     assert graph.results['control'] == 5
368 | 
369 | 
370 | def test_async_error_raise_errors():
371 |     # given
372 |     funcs = {'e': e, 'control': control}
373 |     dependencies = {'e': [], 'control': []}
374 | 
375 |     # when
376 |     graph = taskmap.create_graph(funcs, dependencies)
377 |     with pytest.raises(RuntimeError, match='check your logs'):
378 |         taskmap.run_async(graph, raise_errors=True)
379 | 
380 |     graph = taskmap.create_graph(funcs, dependencies)
381 |     with pytest.raises(RuntimeError, match='check your logs'):
382 |         taskmap.run_parallel_async(graph, raise_errors=True)
383 | 
384 | 
385 | def test_rebuilding_graph_from_failure():
386 |     # given
387 |     dependencies = {
388 |         'c': ['e'],
389 |         'e': [],
390 |         'w': [],
391 |     }
392 | 
393 |     funcs = {
394 |         'e': e,
395 |         'c': c,
396 |         'w': w,
397 |     }
398 | 
399 |     graph = taskmap.create_graph(funcs.copy(), dependencies.copy())
400 |     graph = taskmap.run_parallel_async(graph, nprocs=2, sleep=.001)
401 | 
402 |     # when
403 |     new_graph = taskmap.reset_failed_tasks(graph)
404 | 
405 |     # then
406 |     assert new_graph.done == ['w']
407 | 
408 | 
409 | def test_get_all_children():
410 |     # given
411 |     # given
412 |     dependencies = {
413 |         'd': ['a'],
414 |         'c': ['b'],
415 |         'b': ['a'],
416 |         'a': [],
417 |     }
418 | 
419 |     funcs = {
420 |         'a': a,
421 |         'b': b,
422 |         'c': c,
423 |         'd': d,
424 |     }
425 | 
426 |     graph = taskmap.create_graph(funcs, dependencies)
427 | 
428 |     # when
429 |     a_children = taskmap.get_all_children(graph, 'a')
430 |     b_children = taskmap.get_all_children(graph, 'b')
431 |     c_children = taskmap.get_all_children(graph, 'c')
432 | 
433 |     # then
434 |     assert a_children == {'b', 'c', 'd'}
435 |     assert b_children == {'c'}
436 |     assert c_children == set()
437 | 
438 | 
439 | def long_task():
440 |     time.sleep(.02)
441 |     return 5
442 | 
443 | 
444 | def test_run_parallel():
445 |     # given
446 |     dependencies = {
447 |         'c': ['long_task', 'b'],
448 |         'b': ['long_task'],
449 |         'long_task': [],
450 |     }
451 | 
452 |     funcs = {
453 |         'long_task': long_task,
454 |         'b': b,
455 |         'c': c,
456 |     }
457 | 
458 |     graph = taskmap.create_graph(funcs, dependencies)
459 | 
460 |     # when
461 |     graph = taskmap.run_parallel(graph, nprocs=2, sleep=.001)
462 | 
463 |     # then
464 |     assert graph.results == {'long_task': 5, 'b': 15, 'c': 40}
465 | 
466 | 
467 | async def ab(x):
468 |     return x + 10
469 | 
470 | 
471 | async def ac(x, y):
472 |     return x + y + 20
473 | 
474 | 
475 | async def along_task():
476 |     await asyncio.sleep(.02)
477 |     return 5
478 | 
479 | 
480 | def test_run_async():
481 |     # given
482 |     dependencies = {
483 |         'ac': ['along_task', 'ab'],
484 |         'ab': ['along_task'],
485 |         'along_task': [],
486 |     }
487 | 
488 |     funcs = {
489 |         'along_task': along_task,
490 |         'ab': ab,
491 |         'ac': ac,
492 |     }
493 | 
494 |     graph = taskmap.create_graph(funcs, dependencies)
495 | 
496 |     # when
497 |     graph = taskmap.run_async(graph, sleep=0.001)
498 | 
499 |     # then
500 |     assert graph.results == {'along_task': 5, 'ab': 15, 'ac': 40}
501 | 
502 | 
503 | def test_run_parllel_async():
504 |     # given
505 |     dependencies = {
506 |         'ac': ['along_task', 'ab'],
507 |         'ab': ['along_task'],
508 |         'along_task': [],
509 |     }
510 | 
511 |     funcs = {
512 |         'along_task': along_task,
513 |         'ab': ab,
514 |         'ac': ac,
515 |     }
516 | 
517 |     graph = taskmap.create_graph(funcs, dependencies)
518 | 
519 |     # when
520 |     graph = taskmap.run_parallel_async(graph, nprocs=2, sleep=.001)
521 | 
522 |     # then
523 |     assert graph.results == {'along_task': 5, 'ab': 15, 'ac': 40}
524 | 
525 | 
526 | async def x():
527 |     await asyncio.sleep(.4)
528 |     return 5
529 | 
530 | 
531 | async def y():
532 |     await asyncio.sleep(.4)
533 |     return 5
534 | 
535 | 
536 | def test_async_speed():
537 |     # given
538 |     funcs = {'x': x, 'y': y}
539 |     dependencies = {'x': [], 'y': []}
540 |     graph = taskmap.create_graph(funcs, dependencies)
541 | 
542 |     # when
543 |     start = time.time()
544 |     taskmap.run_async(graph, sleep=0.001)
545 |     end = time.time()
546 | 
547 |     # then
548 |     assert end - start < .8
549 | 
550 | 
551 | def v():
552 |     time.sleep(.4)
553 |     return 5
554 | 
555 | 
556 | def u():
557 |     time.sleep(.4)
558 |     return 5
559 | 
560 | 
561 | def test_parallel_speed():
562 |     # given
563 |     funcs = {'x': u, 'y': v}
564 |     dependencies = {'x': [], 'y': []}
565 |     graph = taskmap.create_graph(funcs, dependencies)
566 | 
567 |     # when
568 |     start = time.time()
569 |     taskmap.run_parallel(graph, nprocs=2, sleep=.001)
570 |     end = time.time()
571 | 
572 |     # then
573 |     assert end - start < .8
574 | 
575 | 
576 | async def r():
577 |     await asyncio.sleep(.4)
578 | 
579 | 
580 | async def t():
581 |     await asyncio.sleep(.4)
582 | 
583 | 
584 | async def w():
585 |     time.sleep(.4)
586 | 
587 | 
588 | async def p():
589 |     time.sleep(.4)
590 | 
591 | 
592 | def test_async_parallel_speed():
593 |     # given
594 |     funcs = {'r': r, 't': t, 'w': w, 'p': p}
595 |     dependencies = {'r': [], 't': [], 'w': [], 'p': []}
596 |     graph = taskmap.create_graph(funcs, dependencies, io_bound=['r', 't'])
597 | 
598 |     # when
599 |     start = time.time()
600 |     taskmap.run_parallel_async(graph, nprocs=2, sleep=.0001)
601 |     end = time.time()
602 | 
603 |     # then
604 |     assert end - start < .8
605 | 
606 | 
607 | async def io_bound_a(): await asyncio.sleep(.4); return 'io_a'
608 | async def io_bound_b(x): await asyncio.sleep(.4); return x + ' io_b'
609 | async def cpu_bound_a(x): time.sleep(.4); return x + ' cpu_a'
610 | async def cpu_bound_b(): time.sleep(.4); return 'cpu_b'
611 | 
612 | 
613 | def test_async_parallel_demo():
614 |     # given
615 |     funcs = {
616 |         'io_bound_a': io_bound_a,
617 |         'io_bound_b': io_bound_b,
618 |         'cpu_bound_a': cpu_bound_a,
619 |         'cpu_bound_b': cpu_bound_b,
620 |     }
621 | 
622 |     dependencies = {
623 |         'io_bound_a': [],
624 |         'io_bound_b': ['cpu_bound_b'],
625 |         'cpu_bound_a': ['io_bound_a'],
626 |         'cpu_bound_b': [],
627 |     }
628 | 
629 |     io_bound = ['io_bound_a', 'io_bound_b']
630 |     graph = taskmap.create_graph(funcs, dependencies, io_bound=io_bound)
631 | 
632 |     # when
633 |     start = time.time()
634 |     graph = taskmap.run_parallel_async(graph, nprocs=2, sleep=.001)
635 |     end = time.time()
636 | 
637 |     # then
638 |     assert end - start < 1.2
639 |     assert graph.results['io_bound_a'] == 'io_a'
640 |     assert graph.results['io_bound_b'] == 'cpu_b io_b'
641 |     assert graph.results['cpu_bound_a'] == 'io_a cpu_a'
642 |     assert graph.results['cpu_bound_b'] == 'cpu_b'
643 | 


--------------------------------------------------------------------------------
/taskmap/tgraph.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import datetime as dt
  3 | import multiprocessing_logging as mplogging
  4 | 
  5 | from itertools import chain
  6 | from operator import contains
  7 | from functools import partial
  8 | from collections import namedtuple
  9 | 
 10 | Graph = namedtuple('graph', [
 11 |     'funcs', 'dependencies', 'done', 'results', 'in_progress', 'lock',
 12 |     'io_bound', 'name'
 13 | ])
 14 | 
 15 | 
 16 | def reset_failed_tasks(graph):
 17 |     """
 18 |     create a new graph based on the outcomes of a previous run.
 19 |     if there were errors - only the failed tasks and their children will
 20 |     be included in the new graph. otherwise the new graph will be empty
 21 |     """
 22 |     failed_tasks = set([
 23 |         task for task, res in graph.results.items()
 24 |         if isinstance(res, Exception)
 25 |     ])
 26 | 
 27 |     return reset_tasks(graph, failed_tasks)
 28 | 
 29 | 
 30 | def reset_tasks(graph, tasks):
 31 |     children = set(chain(* [get_all_children(graph, task) for task in tasks]))
 32 |     rerun = children | set(tasks)
 33 | 
 34 |     for task in rerun:
 35 |         if task in graph.done:
 36 |             graph.results[task] = None
 37 |             graph.done.remove(task)
 38 | 
 39 |     return graph
 40 | 
 41 | 
 42 | def create_graph(funcs, dependencies, io_bound=None, done=None, results=None,
 43 |                  name='taskmap', logging_config=None):
 44 |     """
 45 |     logging_config is expected to be a dictionary. the keys can be 'name',
 46 |     which names the loggers to be used, and 'write', which specificies whether
 47 |     the log is written to disk. Note if two graphs with the same name are
 48 |     created, only the logging config from the first will be used.
 49 |     """
 50 |     defaults = {'name': name, 'write': False}
 51 |     setup_loggers({**defaults, **(logging_config or {})})
 52 | 
 53 |     dependencies = {task: list(deps) for task, deps in dependencies.items()}
 54 |     io_bound = io_bound or []
 55 |     done = done or []
 56 |     results = results or {}
 57 | 
 58 |     check_all_tasks_present(dependencies)
 59 |     check_cyclic_dependency(dependencies)
 60 |     check_all_keys_are_funcs(funcs, dependencies)
 61 | 
 62 |     return Graph(
 63 |         funcs=funcs,
 64 |         dependencies=dependencies,
 65 |         in_progress=[],
 66 |         done=list(done),
 67 |         results=results,
 68 |         lock=0,
 69 |         io_bound=io_bound,
 70 |         name=name
 71 |     )
 72 | 
 73 | 
 74 | def check_cyclic_dependency(dependencies):
 75 |     ancestry = dict()
 76 | 
 77 |     for task, parents in dependencies.items():
 78 |         already_seen = set()
 79 |         ancestry[task] = set()
 80 | 
 81 |         while parents:
 82 |             if task in parents:
 83 |                 raise ValueError('Cyclic dependency: task %s' % task)
 84 | 
 85 |             already_seen.update(parents)
 86 |             ancestry[task].update(parents)
 87 | 
 88 |             new_parents = set()
 89 |             for parent in parents:
 90 |                 new_parents.update(ancestry.get(parent, dependencies[parent]))
 91 | 
 92 |             parents = new_parents - already_seen
 93 | 
 94 | 
 95 | def check_all_tasks_present(deps):
 96 |     absent_tasks = set(chain(*deps.values())) - set(deps.keys())
 97 | 
 98 |     if absent_tasks:
 99 |         msg = ' '.join([
100 |             'Tasks {} are depended upon, but are not present as',
101 |             'keys in dependencies dictionary.'
102 |         ])
103 |         raise ValueError(msg.format(absent_tasks))
104 | 
105 | 
106 | def check_all_keys_are_funcs(funcs, dependencies):
107 |     vacuous_names = set(dependencies.keys()) - set(funcs.keys())
108 |     if vacuous_names:
109 |         msg = ' '.join([
110 |             'Tasks {} are listed in the dependencies dict, but do',
111 |             'not correspond to functions in the funcs dict.'
112 |         ])
113 |         raise ValueError(msg.format(vacuous_names))
114 | 
115 | 
116 | def get_all_children(graph, task):
117 |     all_children = set()
118 |     new_children = {k for k, v in graph.dependencies.items() if task in v}
119 |     while new_children:
120 |         all_children.update(new_children)
121 |         new_children = {
122 |             k
123 |             for child in new_children for k, v in graph.dependencies.items()
124 |             if child in v
125 |         }
126 |         new_children = new_children - all_children
127 | 
128 |     return all_children
129 | 
130 | 
131 | def get_ready_tasks(graph, reverse=True):
132 |     done = set(graph.done) or set()
133 |     in_progress = graph.in_progress or set()
134 |     ready = set()
135 |     for task, deps in graph.dependencies.items():
136 |         if not set(deps) - done:
137 |             ready.add(task)
138 |     ready = list(ready - done - set(in_progress))
139 |     key = partial(contains, graph.io_bound)
140 |     return sorted(ready, key=key, reverse=reverse)
141 | 
142 | 
143 | def mark_as_done_except(graph, task):
144 |     if type(task) == str:
145 |         task = [task]
146 | 
147 |     all_tasks = graph.dependencies.keys()
148 | 
149 |     for t in set(all_tasks) - set(graph.done) - set(task):
150 |         graph.done.append(t)
151 | 
152 |     return graph
153 | 
154 | 
155 | def mark_as_done(graph, task):
156 |     if type(task) == str:
157 |         task = [task]
158 | 
159 |     for t in set(task) - set(graph.done):
160 |         graph.done.append(t)
161 | 
162 |     return graph
163 | 
164 | 
165 | def mark_as_in_progress(graph, task):
166 |     graph.in_progress.append(task)
167 |     return graph
168 | 
169 | 
170 | def all_done(graph):
171 |     return set(graph.done) == set(graph.dependencies.keys())
172 | 
173 | 
174 | def setup_loggers(config):
175 |     name = config.get('name', 'taskmap')
176 |     level = config.get('level', logging.DEBUG)
177 | 
178 |     if logging.getLogger('{}-manager'.format(name)).handlers:
179 |         # we've already configured these loggers
180 |         return
181 | 
182 |     mlogger = logging.getLogger('{}-manager'.format(name))
183 |     mlogger.setLevel(level)
184 | 
185 |     logger = logging.getLogger('{}-worker'.format(name))
186 |     logger.setLevel(level)
187 | 
188 |     formatter = logging.Formatter(
189 |         '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
190 | 
191 |     ch = logging.StreamHandler()
192 |     ch.setFormatter(formatter)
193 |     ch.setLevel(level)
194 |     logger.addHandler(ch)
195 |     mlogger.addHandler(ch)
196 | 
197 |     if config.get('write', True):
198 |         now = dt.datetime.now()
199 |         logname_frmt = '{}{}.log'.format(name, now.strftime('%m-%d-%Y:%H.%M.%S'))
200 |         fh = logging.FileHandler(logname_frmt)
201 |         fh.setLevel(level)
202 |         fh.setFormatter(formatter)
203 |         logger.addHandler(fh)
204 |         mlogger.addHandler(fh)
205 | 
206 |     mplogging.install_mp_handler(logger)
207 | 
208 | 
209 | def create_parallel_compatible_graph(graph, manager):
210 |     return Graph(
211 |         funcs=manager.dict(graph.funcs),
212 |         dependencies=manager.dict(graph.dependencies),
213 |         done=manager.list(graph.done),
214 |         results=manager.dict(graph.results),
215 |         in_progress=manager.list(),
216 |         lock=manager.Value(int, 0),
217 |         io_bound=manager.list(graph.io_bound),
218 |         name=graph.name)
219 | 
220 | 
221 | def recover_values_from_manager(graph):
222 |     return Graph(
223 |         lock=0,
224 |         in_progress=[],
225 |         done=list(graph.done),
226 |         funcs=dict(graph.funcs),
227 |         results=dict(graph.results),
228 |         io_bound=list(graph.io_bound),
229 |         dependencies=dict(graph.dependencies),
230 |         name=graph.name)
231 | 


--------------------------------------------------------------------------------