├── .gitignore
├── .travis.yml
├── LICENSE
├── README.rst
├── docs
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    ├── overview.rst
    ├── quickstart.rst
    ├── tasks.rst
    └── tutorial
    │   ├── index.rst
    │   ├── setup.rst
    │   └── simple.rst
├── examples
    ├── __init__.py
    ├── abort.py
    ├── bash.py
    ├── branching.py
    ├── chunking_dag.py
    ├── data_store.py
    ├── decision.py
    ├── multi_data.py
    ├── parallel.py
    ├── parameters.py
    ├── queues.py
    ├── sequence.py
    ├── simple.py
    ├── stop.py
    ├── sub_dag.py
    └── timing.py
├── lightflow-complete.sh
├── lightflow.cfg
├── lightflow
    ├── __init__.py
    ├── config.py
    ├── logger.py
    ├── models
    │   ├── __init__.py
    │   ├── action.py
    │   ├── dag.py
    │   ├── dag_signal.py
    │   ├── datastore.py
    │   ├── exceptions.py
    │   ├── mongo_proxy.py
    │   ├── parameters.py
    │   ├── signal.py
    │   ├── task.py
    │   ├── task_context.py
    │   ├── task_data.py
    │   ├── task_parameters.py
    │   ├── task_signal.py
    │   ├── utils.py
    │   └── workflow.py
    ├── queue
    │   ├── __init__.py
    │   ├── app.py
    │   ├── const.py
    │   ├── event.py
    │   ├── jobs.py
    │   ├── models.py
    │   ├── pickle.py
    │   └── worker.py
    ├── scripts
    │   ├── __init__.py
    │   └── cli.py
    ├── tasks
    │   ├── __init__.py
    │   ├── bash_task.py
    │   └── python_task.py
    ├── version.py
    ├── workers.py
    └── workflows.py
├── meta.yaml
├── requirements-dev.txt
├── setup.py
├── tests
    ├── __init__.py
    ├── conftest.py
    ├── fixtures
    │   └── workflows
    │   │   ├── dag_present_workflow.py
    │   │   ├── no_dag_workflow.py
    │   │   └── parameters_workflow.py
    ├── test_base_task.py
    ├── test_bash_task.py
    ├── test_exceptions.py
    ├── test_task_data.py
    ├── test_workflow.py
    └── test_workflows.py
└── tox.ini


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | ### Python template
 3 | # Byte-compiled / optimized / DLL files
 4 | __pycache__/
 5 | *.py[cod]
 6 | *$py.class
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | 
29 | # PyInstaller
30 | #  Usually these files are written by a python script from a template
31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 | 
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 | 
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | .hypothesis/
49 | 
50 | # Translations
51 | *.mo
52 | *.pot
53 | 
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | 
58 | # Flask instance folder
59 | instance/
60 | 
61 | # Scrapy stuff:
62 | .scrapy
63 | 
64 | # Sphinx documentation
65 | docs/_build/
66 | 
67 | # PyBuilder
68 | target/
69 | 
70 | # IPython Notebook
71 | .ipynb_checkpoints
72 | 
73 | # pyenv
74 | .python-version
75 | 
76 | # celery beat schedule file
77 | celerybeat-schedule
78 | 
79 | # dotenv
80 | .env
81 | 
82 | # virtualenv
83 | venv/
84 | ENV/
85 | 
86 | # Spyder project settings
87 | .spyderproject
88 | 
89 | # Rope project settings
90 | .ropeproject
91 | 
92 | # PyCharm
93 | .idea/
94 | 
95 | # Visual Studio Code
96 | .vscode/
97 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 |   - "3.5"
4 |   - "3.6"
5 | install:
6 |   - pip install -r requirements-dev.txt
7 |   - pip install .
8 | script: pytest


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017, Synchrotron Light Source Australia Pty Ltd
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 | list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 | 
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 | may be used to endorse or promote products derived from this software without
16 | specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Lightflow - a lightweight, distributed workflow system
 2 | ======================================================
 3 | 
 4 | .. image:: https://travis-ci.org/AustralianSynchrotron/Lightflow.svg?branch=master
 5 |     :target: https://travis-ci.org/AustralianSynchrotron/Lightflow
 6 | 
 7 | .. image:: https://readthedocs.org/projects/lightflow/badge/?version=latest
 8 |     :target: http://lightflow.readthedocs.io/en/latest
 9 |     :alt: Documentation Status
10 | 
11 | Lightflow is a Python 3.5+ library and command-line tool for executing workflows,
12 | composed of individual tasks, in a distributed fashion. It is based on Celery and
13 | provides task dependencies, data exchange between tasks and an intuitive description of workflows.
14 | 
15 | 
16 | Dependencies
17 | ------------
18 | 
19 | Python
20 | ^^^^^^
21 | Lightflow is written in Python 3 and requires Python 3.5 or higher.
22 | 
23 | Operating system
24 | ^^^^^^^^^^^^^^^^
25 | Lightflow is being developed and tested on Linux, with Debian and RedHat being the main platforms.
26 | 
27 | redis
28 | ^^^^^
29 | The redis database is required by Lightflow as a communication broker between tasks.
30 | It is also used as the default broker for the Celery queuing system, but could be replaced
31 | with any other supported Celery broker.
32 | 
33 | MongoDB
34 | ^^^^^^^
35 | Lightflow makes use of MongoDB for storing persistent data during a workflow run that can be accessed
36 | by all tasks.
37 | 
38 | 
39 | Getting started
40 | ---------------
41 | 
42 | The following getting started guide assumes a redis database running on ``localhost`` and port ``6379``
43 | as well as a MongoDB database running on ``localhost`` and port ``27017``.
44 | 
45 | Install Lightflow from PyPi::
46 | 
47 |     pip install lightflow
48 | 
49 | 
50 | Create a default configuration file and copy the provided example workflows to a local directory of your choice::
51 | 
52 |     lightflow config default .
53 |     lightflow config examples .
54 | 
55 | 
56 | If you like, list all available example workflows::
57 | 
58 |     lightflow workflow list
59 | 
60 | 
61 | In order to execute a workflow, start a worker that consumes jobs from the workflow, dag and task queues.
62 | Then start a workflow from the list of available examples. The following example starts the workflow ``simple``::
63 | 
64 |     lightflow worker start
65 |     lightflow workflow start simple
66 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = Lightflow
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | Workers
 5 | -------
 6 | The ``workers`` module provides the API functions for starting, stopping and managing workers.
 7 | 
 8 | Methods
 9 | ^^^^^^^
10 | .. automodule:: lightflow.workers
11 |    :members:
12 | 
13 | Return Classes
14 | ^^^^^^^^^^^^^^
15 | .. autoclass:: lightflow.queue.models.WorkerStats
16 |    :members:
17 | 
18 | .. autoclass:: lightflow.queue.models.QueueStats
19 |    :members:
20 | 
21 | 
22 | Workflows
23 | ---------
24 | The ``workflows`` module provides the API functions for starting, stopping and monitoring workflows.
25 | 
26 | Methods
27 | ^^^^^^^
28 | .. automodule:: lightflow.workflows
29 |    :members:
30 | 
31 | Return Classes
32 | ^^^^^^^^^^^^^^
33 | .. autoclass:: lightflow.queue.models.JobStats
34 |    :members:
35 | 
36 | 
37 | Config
38 | ------
39 | The configuration of Lightflow is passed to the API functions via an instance of the ``Config`` class. The configuration is described as
40 | a YAML structure and can be loaded from a file. The ``Config`` class contains a default configuration, which means that you only need
41 | to specify the settings in the config file that you would like to change.
42 | 
43 | .. autoclass:: lightflow.Config
44 |    :members:
45 |    :inherited-members:
46 | 
47 | 
48 | Task Data
49 | ---------
50 | .. autoclass:: lightflow.models.MultiTaskData
51 |    :members:
52 | 
53 | 
54 | Persistent Data Store
55 | ---------------------
56 | .. autoclass:: lightflow.models.DataStoreDocument
57 |    :members:
58 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Lightflow documentation build configuration file, created by
  5 | # sphinx-quickstart on Thu Jun 22 09:54:31 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | import sys
 22 | import time
 23 | import datetime
 24 | import pkg_resources
 25 | 
 26 | sys.path.append(os.path.dirname(__file__))
 27 | 
 28 | BUILD_DATE = datetime.datetime.utcfromtimestamp(int(os.environ.get('SOURCE_DATE_EPOCH', time.time())))
 29 | 
 30 | 
 31 | # -- General configuration ------------------------------------------------
 32 | 
 33 | # If your documentation needs a minimal Sphinx version, state it here.
 34 | #
 35 | # needs_sphinx = '1.0'
 36 | 
 37 | # Add any Sphinx extension module names here, as strings. They can be
 38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 39 | # ones.
 40 | extensions = [
 41 |     'sphinx.ext.autodoc',
 42 |     'sphinx.ext.napoleon',
 43 |     'sphinx.ext.coverage',
 44 |     'sphinx.ext.viewcode']
 45 | 
 46 | # Add any paths that contain templates here, relative to this directory.
 47 | templates_path = ['_templates']
 48 | 
 49 | # The suffix(es) of source filenames.
 50 | # You can specify multiple suffix as a list of string:
 51 | #
 52 | # source_suffix = ['.rst', '.md']
 53 | source_suffix = '.rst'
 54 | 
 55 | # The master toctree document.
 56 | master_doc = 'index'
 57 | 
 58 | # General information about the project.
 59 | project = 'Lightflow'
 60 | copyright = u'2017 - {}, Software Engineering Group Australian Synchrotron'.format(BUILD_DATE.year)
 61 | author = 'Software Engineering Group Australian Synchrotron'
 62 | 
 63 | # The version info for the project you're documenting, acts as replacement for
 64 | # |version| and |release|, also used in various other places throughout the
 65 | # built documents.
 66 | #
 67 | try:
 68 |     release = pkg_resources.get_distribution('Lightflow').version
 69 | except pkg_resources.DistributionNotFound:
 70 |     print('Lightflow must be available in order to build the documentation')
 71 |     sys.exit(1)
 72 | 
 73 | version = '.'.join(release.split('.')[:2])
 74 | 
 75 | # The language for content autogenerated by Sphinx. Refer to documentation
 76 | # for a list of supported languages.
 77 | #
 78 | # This is also used if you do content translation via gettext catalogs.
 79 | # Usually you set "language" from the command line for these cases.
 80 | language = None
 81 | 
 82 | # List of patterns, relative to source directory, that match files and
 83 | # directories to ignore when looking for source files.
 84 | # This patterns also effect to html_static_path and html_extra_path
 85 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 86 | 
 87 | # The name of the Pygments (syntax highlighting) style to use.
 88 | pygments_style = 'sphinx'
 89 | 
 90 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 91 | todo_include_todos = False
 92 | 
 93 | 
 94 | # -- Options for HTML output ----------------------------------------------
 95 | 
 96 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 97 | # a list of builtin themes.
 98 | #
 99 | html_theme = 'alabaster'
100 | 
101 | # Theme options are theme-specific and customize the look and feel of a theme
102 | # further.  For a list of options available for each theme, see the
103 | # documentation.
104 | #
105 | # html_theme_options = {}
106 | 
107 | # Add any paths that contain custom static files (such as style sheets) here,
108 | # relative to this directory. They are copied after the builtin static files,
109 | # so a file named "default.css" will overwrite the builtin "default.css".
110 | html_static_path = ['_static']
111 | 
112 | 
113 | # -- Options for HTMLHelp output ------------------------------------------
114 | 
115 | # Output file base name for HTML help builder.
116 | htmlhelp_basename = 'Lightflowdoc'
117 | 
118 | 
119 | # -- Options for LaTeX output ---------------------------------------------
120 | 
121 | latex_elements = {
122 |     # The paper size ('letterpaper' or 'a4paper').
123 |     #
124 |     # 'papersize': 'letterpaper',
125 | 
126 |     # The font size ('10pt', '11pt' or '12pt').
127 |     #
128 |     # 'pointsize': '10pt',
129 | 
130 |     # Additional stuff for the LaTeX preamble.
131 |     #
132 |     # 'preamble': '',
133 | 
134 |     # Latex figure (float) alignment
135 |     #
136 |     # 'figure_align': 'htbp',
137 | }
138 | 
139 | # Grouping the document tree into LaTeX files. List of tuples
140 | # (source start file, target name, title,
141 | #  author, documentclass [howto, manual, or own class]).
142 | latex_documents = [
143 |     (master_doc, 'Lightflow.tex', 'Lightflow Documentation',
144 |      'Software Engineering Australian Synchrotron Software Group', 'manual'),
145 | ]
146 | 
147 | 
148 | # -- Options for manual page output ---------------------------------------
149 | 
150 | # One entry per manual page. List of tuples
151 | # (source start file, name, description, authors, manual section).
152 | man_pages = [
153 |     (master_doc, 'lightflow', 'Lightflow Documentation',
154 |      [author], 1)
155 | ]
156 | 
157 | 
158 | # -- Options for Texinfo output -------------------------------------------
159 | 
160 | # Grouping the document tree into Texinfo files. List of tuples
161 | # (source start file, target name, title, author,
162 | #  dir menu entry, description, category)
163 | texinfo_documents = [
164 |     (master_doc, 'Lightflow', 'Lightflow Documentation',
165 |      author, 'Lightflow', 'One line description of project.',
166 |      'Miscellaneous'),
167 | ]
168 | 
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to Lightflow!
 2 | =====================
 3 | 
 4 | Lightflow is a Python 3.5+ library and command-line tool for executing workflows,
 5 | composed of individual tasks, in a distributed fashion. It is based on Celery and
 6 | provides task dependencies, data exchange between tasks and an intuitive description of workflows. 
 7 | 
 8 | 
 9 | User's Guide
10 | ------------
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    installation
15 |    quickstart
16 |    tutorial/index
17 | 
18 | 
19 | Tasks
20 | -----
21 | .. toctree::
22 |    :maxdepth: 2
23 | 
24 |    tasks
25 | 
26 | 
27 | API Reference
28 | -------------
29 | .. toctree::
30 |    :maxdepth: 2
31 | 
32 |    api
33 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _installation:
 2 | 
 3 | Installation
 4 | ============
 5 | 
 6 | One of the key goals when developing Lightflow was to keep the infrastructure dependencies as small as possible.
 7 | Lightflow does not require special file systems, job scheduler systems or special hardware. It runs on most Linux distributions
 8 | and is known to work on MacOSX as well as on the Ubuntu subsystem of Windows 10. Apart from Python 3.5+, the only dependencies
 9 | of Lightflow are a running redis and MongoDB database.
10 | 
11 | 
12 | Python
13 | ------
14 | Lightflow requires Python 3.5 or higher. It has been developed and tested with both a native Python installation as well as Miniconda/Anaconda.
15 | 
16 | Lightflow's main Python dependencies are:
17 | 
18 | - `Celery <http://www.celeryproject.org>`_ - for queuing and managing jobs and running workers
19 | - `NetworkX <http://networkx.github.io>`_ - for building and interrogating the directed acyclic graphs of a workflow
20 | - `cloudpickle <https://github.com/cloudpipe/cloudpickle>`_ - for exchanging data between tasks running on distributed hosts
21 | - `Click <http://click.pocoo.org/5>`_ - for the command line client
22 | - `ruamel.yaml <http://yaml.readthedocs.io/en/latest>`_ - for reading the configuration file
23 | 
24 | These dependencies are installed during the installation of Lightflow automatically.
25 | 
26 | 
27 | redis
28 | -----
29 | Redis is an in-memory key-value database and is required by Lightflow as a communication broker between tasks. It is also used as the default
30 | broker for the Celery queuing system, but could be replaced with any other supported Celery broker.
31 | 
32 | You can either download redis from the `offical redis website <https://redis.io/download>`_ or install it via the package
33 | manager of your distribution. By default, the redis server runs on ``localhost`` and port ``6379``. The :ref:`quickstart` as well as the :ref:`tutorial`
34 | assume you are running redis using these defaults.
35 | 
36 | 
37 | MongoDB
38 | -------
39 | MongoDB is a popular document-oriented database and is used by Lightflow for storing data that should persist during a workflow run.
40 | 
41 | You can either download MongoDB from the `official MongoDB website <https://www.mongodb.com/download-center#community>`_ or install it via the package
42 | manager of your distribution:
43 | 
44 | - `RedHat <https://docs.mongodb.com/master/tutorial/install-mongodb-on-red-hat>`_
45 | - `Debian <https://docs.mongodb.com/master/tutorial/install-mongodb-on-debian>`_
46 | - `Ubuntu <https://docs.mongodb.com/master/tutorial/install-mongodb-on-ubuntu>`_
47 | 
48 | By default, MongoDB runs on ``localhost`` and port ``27017``. The :ref:`quickstart` as well as the :ref:`tutorial`
49 | assume you are running MongoDB using these defaults.
50 | 
51 | 
52 | Lightflow
53 | ---------
54 | After having redis and MongoDB running, installing Lightflow is a breeze. It is available from PyPI and can be installed with::
55 | 
56 |     pip install lightflow
57 | 
58 | This will install the Lightflow libraries, command line client and example workflows.
59 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=python -msphinx
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=Lightflow
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | 	echo.then set the SPHINXBUILD environment variable to point to the full
21 | 	echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | 	echo.Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/overview.rst:
--------------------------------------------------------------------------------
 1 | Overview
 2 | ========
 3 | 
 4 | Introduction
 5 | ------------
 6 | 
 7 | Concepts
 8 | --------
 9 | 
10 | Lightflow models a workflow as a set of individual tasks arranged as a directed acyclic graph (DAG).
11 | This specification encodes the direction that data flows as well as dependencies between tasks.
12 | Each workflow consists of one or more DAGs. Lightflow employs a worker-based queuing system, in which
13 | workers consume individual tasks. In order to avoid single points of failure, such as a central daemon
14 | often found in other workflow tools, the queuing system is also used to manage and monitor workflows and DAGs.


--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
 1 | .. _quickstart:
 2 | 
 3 | Quickstart
 4 | ==========
 5 | 
 6 | You can't wait to start using Lightflow or have no time to follow the tutorial? No problem, just spend a few minutes with
 7 | this quickstart guide and you are on your way to using Lightflow.
 8 | 
 9 | This quickstart guide assumes that you have a redis database running on ``localhost`` and port ``6379``,
10 | a MongoDB database running on ``localhost`` and port ``27017`` as well as Lightflow installed on your system. If you haven't
11 | installed the database systems and Lightflow yet, no problem, just follow the :ref:`installation` guide.
12 | 
13 | 
14 | Configuration and examples
15 | --------------------------
16 | 
17 | Create an empty directory in your preferred location. We will use this directory in the following to store the configuration file and
18 | the example workflows. Lightflow has no restrictions on where this directory should be located and what its name should be.
19 | 
20 | The first step is to create the global configuration file for Lightflow. This file contains, among other settings, the connection
21 | information for redis and MongoDB. The quickest and easiest way to generate a default configuration file is to use the Lightflow command line interface.
22 | Make sure you are located in the directory you created earlier and enter::
23 | 
24 |     $ lightflow config default .
25 | 
26 | This will create a configuration file called ``lightflow.cfg`` containing a default configuration. If you were running redis and MongoDB on
27 | different hosts than ``localhost`` or the default port, edit the appropriate settings in the configuration file. You can find more
28 | information about the configuration file in the section Configuration.
29 | 
30 | Lightflow ships with a number of examples that demonstrate various features of the system. We will copy these examples into a subfolder called ``examples``
31 | inside your current directory. This will allow you to modify the examples as you see fit or use them as a starting point for your own workflows.
32 | The command line interface offers a quick and easy way to copy the examples::
33 | 
34 |     $ lightflow config examples .
35 | 
36 | Now you will find a subfolder ``examples`` in your directory containing all example workflows. If you like, you can list all available example workflows
37 | together with a short description, Make sure you are located in the folder containing the configuration file, then enter::
38 | 
39 |     $ lightflow workflow list
40 | 
41 | 
42 | Start the workers
43 | -----------------
44 | 
45 | Lightflow uses a worker based scheme. This means a workflow adds jobs onto a central queue from which a number of workers consume jobs and execute them.
46 | In order for Lightflow to run a workflow, it needs at least one running worker (obviously). You can start a worker with::
47 | 
48 |     $ lightflow worker start
49 | 
50 | This will start a worker, which then waits for the first job to be added to the queue. You can start as many workers as you like, but for the quickstart
51 | guide one worker is enough.
52 | 
53 | .. admonition:: A recommended setup for multiple workers 
54 | 
55 |    What is special about Lightflow, in comparison with other workflow systems, is that it also uses workers for running the workflow itself. This means, there
56 |    is no central daemon and thus no single point of failure. Lightflow uses three queues for running a workflow. Two queues, labelled ``workflow`` and ``dag``, for
57 |    managing the workflows and one queue, labelled ``task``, for executing the individual tasks of a workflow. A typical setup of workers would consist of one worker
58 |    dealing with workflow related jobs, thus consuming jobs only from the ``workflow`` and ``dag`` queues, and one or more workers executing the actual tasks.
59 | 
60 |    You can use the ``-q`` argument in the command line interface in order to restrict the queues a worker consumes jobs from. For the recommended setup
61 |    discussed above you would start one worker with::
62 | 
63 |        $ lightflow worker start -q workflow,dag
64 |     
65 |    and at least one more worker with::
66 | 
67 |        $ lightflow worker start -q task
68 | 
69 | 
70 | Run a workflow
71 | --------------
72 | 
73 | With at least one worker running, we are ready to run our first workflow. You can pick any example workflow you like and run it. In the following we
74 | will run the most basic of all workflows, the ``simple`` workflow. You might need a second terminal in order to run the workflow as the first one
75 | is occupied running our worker. In your second terminal enter::
76 | 
77 |     $ lightflow workflow start simple
78 | 
79 | This will send the workflow ``simple`` to the queue. Our worker will pick up the workflow and run it. The default logging level is very verbose so you
80 | will see the worker print out a lot of information as it executes the workflow.
81 | 
82 | 
83 | Where to go from here
84 | ---------------------
85 | 
86 | Congratulations, you have finished the quickstart guide. A good place to continue is to have a look at the documented example workflows. They are a great
87 | starting point for exploring the features of Lightflow. Alternatively, head over to the tutorial section for a more structured introduction to Lightflow.
88 | 


--------------------------------------------------------------------------------
/docs/tasks.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Python task
 3 | -----------
 4 | 
 5 | The ``PythonTask`` is the most basic and most flexible task in Lightflow. It allows you to execute almost arbitrary Python code in your task.
 6 | The only requirement is that the Python code can be serialised and deserialised safely. 
 7 | 
 8 | .. autoclass:: lightflow.tasks.PythonTask
 9 | 
10 | 
11 | Bash task
12 | ----------
13 | 
14 | The ``BashTask`` provides an easy to use task for executing bash commands. It allows you to capture and process the standard and error output
15 | of the bash command either in 'real-time' or once the process has completed as a file object.
16 | 
17 | .. autoclass:: lightflow.tasks.BashTask
18 | 


--------------------------------------------------------------------------------
/docs/tutorial/index.rst:
--------------------------------------------------------------------------------
 1 | .. _tutorial:
 2 | 
 3 | Tutorial
 4 | ========
 5 | Welcome to the Lightflow tutorial! This tutorial will guide you step by step through the development of a workflow. The emphasis is on showing you how to
 6 | model and implement typical workflow elements with Lightflow and to demonstrate the features Lightflow has to offer. Therefore, the workflow we are going
 7 | to create does nothing particularly sophisticated. So don't expect too much. We will push numbers around and perform some basic math operations on them.
 8 | 
 9 | Let's go! While we recommend that you follow the tutorial steps in order, you don't have to. Each tutorial step introduces a specific concept or feature
10 | of Lightflow, so feel free to jump directly to the step that interests you most.
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 | 
15 |    setup
16 |    simple
17 | 


--------------------------------------------------------------------------------
/docs/tutorial/setup.rst:
--------------------------------------------------------------------------------
 1 | .. _tutorial-setup:
 2 | 
 3 | Step 0: Setup
 4 | =============
 5 | In this step we will set up the environment for the tutorial and create an empty workflow. We assume that you followed the :ref:`installation` guide
 6 | and have a redis database running on ``localhost`` and port ``6379``, a MongoDB database running on ``localhost`` and port ``27017``
 7 | as well as Lightflow installed on your system.
 8 | 
 9 | To test whether you have installed Lightflow correctly, enter the following into a terminal::
10 | 
11 |     $ lightflow
12 | 
13 | This calls the command line interface of Lightflow and will print the available commands and options.
14 | 
15 | 
16 | Configuration file
17 | ------------------
18 | Start by creating an empty directory with your preferred name (e.g. ``lightflow_tutorial``) in a location of your choice (e.g. your home directory). 
19 | This will be our working directory for the tutorial containing the lightflow configuration file and our workflow file. Lightflow has no restrictions on where
20 | this directory should be located and what it is called.
21 | 
22 | Next, we will create a configuration file. Lightflow uses the configuration file for storing settings such as the connection information
23 | for redis and MongoDB, and the location of your workflow files. To make things easier, we equipped the command line interface with a command to generate
24 | a default configuration file for you. Make sure you are located in the directory you created earlier, then enter::
25 | 
26 |     $ lightflow config default .
27 | 
28 | This creates a configuration file called ``lightflow.cfg`` containing the default settings into the current directory.
29 | 
30 | Let's have a look into the configuration file. Open ``lightflow.cfg`` with your editor of choice. The configuration file uses the YAML format
31 | and is broken up into several sections.
32 | 
33 | .. admonition:: Non-default redis or MongoDB
34 | 
35 |    If you are running redis on a different host or port from the default mentioned above, change the host
36 |    and port settings in the ``celery`` as well as ``signal`` sections in the configuration file.
37 |    If your MongoDB configuration deviates from the default, edit the host and port fields in the ``store`` section.
38 | 
39 | We will focus on the first field labelled ``workflows``. This field contains a list of paths where Lightflow should look for workflow files.
40 | The paths can either be relative to the configuration file or absolute paths. By default, Lightflow expects to find workflow files in a sub-directory
41 | called ``examples``, located in the same directory as your configuration file. However, we would like our tutorial workflow file to
42 | live in its own directory called ``tutorial``. Therefore, edit the configuration file by changing ``examples`` to ``tutorial``::
43 | 
44 |     workflows:
45 |       - ./tutorial
46 | 
47 | Save the file and exit your editor.
48 | 
49 | 
50 | Tutorial folder
51 | ---------------
52 | 
53 | Before we can move on we have to create the ``tutorial`` folder for our tutorial workflow file of course. In the same
54 | directory as your configuration file, create a sub-directory called ``tutorial``::
55 | 
56 |     $ mkdir tutorial
57 | 
58 | Now you are ready to write your first workflow! Head over to :ref:`tutorial-simple` in our tutorial and learn how to write your first workflow.
59 | 


--------------------------------------------------------------------------------
/docs/tutorial/simple.rst:
--------------------------------------------------------------------------------
  1 | .. _tutorial-simple:
  2 | 
  3 | Step 1: A simple workflow
  4 | =========================
  5 | In this section of the tutorial we will write our first workflow. It will consist of two tasks that are executed in order. Each task will print
  6 | a message so you can track the execution of the tasks. At the end of this section you will have learned how to create tasks, arrange their execution
  7 | order and run a workflow.
  8 | 
  9 | 
 10 | Workflow file
 11 | -------------
 12 | In Lightflow, workflows are defined using Python. This means you don't have to learn another language and you can use your favorite Python libraries and modules.
 13 | Typically you would have a single Python file describing the entire workflow, but for complex workflows you can, of course, split the workflow definition
 14 | into multiple files. For this tutorial, we will only have a single workflow file.
 15 | 
 16 | Change into the ``tutorial`` directory and create an empty file called ``tutorial01.py``. This file will contain the workflow for this step of the tutorial.
 17 | Your directory structure should look like this::
 18 | 
 19 |     /lightflow_tutorial
 20 |         lightflow.cfg
 21 |         /tutorial
 22 |             tutorial01.py
 23 | 
 24 | 
 25 | Create two tasks
 26 | ------------------
 27 | Let's get started with our workflow. First, we will create the two tasks for our small workflow. Open the workflow file you just created with your editor of choice.
 28 | At the top of the file import the PythonTask class::
 29 | 
 30 |     from lightflow.tasks import PythonTask
 31 | 
 32 | Lightflow is shipped with two task classes: the ``PythonTask`` and the ``BashTask``. The ``PythonTask`` allows you to execute Python code in your task, while
 33 | the ``BashTask`` provides an easy to use task for executing bash commands. In this tutorial we will use the ``PythonTask`` for all our tasks as it is the most
 34 | flexible type of task. You can pretty much do whatever you like during the execution of a ``PythonTask``.
 35 | 
 36 | Next, create the two tasks for our workflow. We are going to be boring here and call the first task ``first_task`` and the second task ``second_task``::
 37 | 
 38 |     first_task = PythonTask(name='first_task',
 39 |                             callback=print_first)
 40 | 
 41 |     second_task = PythonTask(name='second_task',
 42 |                              callback=print_second)
 43 | 
 44 | The first argument ``name`` defines a name for the task so you can track the task more easily. We are using the name of the object here, but you can name the
 45 | task whatever you think is appropriate. The second argument ``callback`` is a callable that is being run when the task is executed. This is the 'body' of the task
 46 | and you are free to execute your own Python code here. In the spirit of boring names for our tutorial, we have named the callables: ``print_first`` and
 47 | ``print_second``. Of course, we haven't defined the callables yet, so let's do this next.
 48 | 
 49 | 
 50 | Implement the callables
 51 | -----------------------
 52 | We will use functions as the callables for our ``PythonTask`` objects. The functions take a specific form and look like this:: 
 53 | 
 54 |     def print_first(data, store, signal, context):
 55 |         print('This is the first task')
 56 | 
 57 | Add this code above your task instantiations. A callable for a ``PythonTask`` has four arguments. We will cover all four arguments in more detail
 58 | in the following tutorial steps. So for now, you can safely ignore them. All we do in the body of the function is to print a simple string.
 59 | 
 60 | The callable for the second task is pretty much the same, we only change the name and the string that is printed::
 61 | 
 62 |     def print_second(data, store, signal, context):
 63 |         print('This is the second task')
 64 | 
 65 | At this point we have the task objects that should be run and the code that should be executed for each task. We haven't defined the order in which
 66 | we want the tasks to be run yet. This will happen in the next step.
 67 | 
 68 | 
 69 | Arrange the tasks in a sequence
 70 | -------------------------------
 71 | In Lightflow tasks are arranged in a Directed Acyclic Graph, or 'DAG' for short. While this might sound complicated, what it means is that all you do is to
 72 | define the dependencies between the tasks, thereby building a network (also called graph) of tasks. The 'directed' captures the fact that the dependencies impose
 73 | a direction on the graph. In our case, we want the ``first_task`` to be run before the ``second_task``. Lightflow does not allow for loops in the task graph,
 74 | represented by the word 'acyclic'. For example, you are not allowed to set up a graph in which you start with ``first_task`` then run ``second_task`` followed
 75 | by running ``first_task`` again.
 76 | 
 77 | In Lightflow the ``Dag`` class takes care of running the tasks in the correct order. Import the ``Dag`` class at the top of your workflow file with::
 78 | 
 79 |     from lightflow.models import Dag
 80 | 
 81 | Next, below your task object instantiations at the bottom of your workflow, create an object of the ``Dag`` class::
 82 | 
 83 |     d = Dag('main_dag')
 84 | 
 85 | You have to provide a single argument, which is the name you would like to give to the Dag.
 86 | 
 87 | The ``Dag`` class provides the function ``define()`` for setting up the task graph. This is where the magic happens. Lightflow uses a Python dictionary
 88 | in order to specify the arrangement of the tasks. The **key:value** relationship of a dictionary is mapped to a **parent:child** relationship for tasks,
 89 | thereby defining the dependencies between tasks. For our simple, two task workflow the graph definition looks like this::
 90 | 
 91 |     d.define({
 92 |         first_task: second_task
 93 |     })
 94 | 
 95 | That's it! You have defined our first workflow and are now ready to run it.
 96 | 
 97 | 
 98 | The complete workflow
 99 | ---------------------
100 | Here is the complete workflow for this tutorial including a few comments::
101 | 
102 |     from lightflow.models import Dag
103 |     from lightflow.tasks import PythonTask
104 | 
105 |     
106 |     # the callback functions for the task
107 |     def print_first(data, store, signal, context):
108 |         print('This is the first task')
109 | 
110 |     def print_second(data, store, signal, context):
111 |         print('This is the second task')
112 | 
113 | 
114 |     # create the two task objects
115 |     first_task = PythonTask(name='first_task',
116 |                             callback=print_first)
117 | 
118 |     second_task = PythonTask(name='second_task',
119 |                              callback=print_second)
120 | 
121 |     # create the main DAG
122 |     d = Dag('main_dag')
123 |     
124 |     # set up the graph of the DAG, in which the first_task has
125 |     # to be executed first, followed by the second_task.
126 |     d.define({
127 |         first_task: second_task
128 |     })
129 | 
130 | 
131 | Document the workflow
132 | ---------------------
133 | This step is optional, but highly recommended as it will help you remembering what the workflow does. We will add a title and a short description
134 | to the workflow. At the top of your workflow file add the following docstring::
135 | 
136 |    """ Tutorial 1: a sequence of two tasks
137 | 
138 |    This workflow uses two tasks in order to demonstrate
139 |    the basics of a workflow definition in Lightflow.
140 |    """
141 | 
142 | Lightflow uses the first line of the docstring when listing all available workflows. Give it a go by changing to the directory where the configuration
143 | file is located and enter::
144 | 
145 |    $ lightflow workflow list
146 |    tutorial01      Tutorial 1: a sequence of two tasks
147 | 
148 | Lightflow will list your workflow together with the short description you gave it.
149 | 
150 | 
151 | Start a worker
152 | --------------
153 | Lightflow uses a worker based scheme. This means a workflow adds jobs onto a central queue from which a number of workers consume jobs and execute them.
154 | In order for Lightflow to run our workflow, it needs at least one running worker. Start a worker with::
155 | 
156 |     $ lightflow worker start
157 | 
158 | This will start a worker, which then waits for the first job to be added to the queue. You can start as many workers as you like, but for now one worker
159 | is enough.
160 | 
161 | 
162 | Run the workflow
163 | ----------------
164 | With at least one worker running, we are ready to run our first workflow. You might need a second terminal in order to run the workflow as the first one
165 | is occupied running our worker. In your second terminal enter::
166 | 
167 |     $ lightflow workflow start tutorial01
168 | 
169 | This will send our workflow to the queue. The worker will pick up the workflow and run it. The default logging level is very verbose so you
170 | will see the worker print out a lot of information as it executes the workflow.
171 | 
172 | You will see how the ``first_task`` is being executed first and prints the string "This is the first task", then followed by the ``second_task`` and the
173 | string "This is the second task".
174 | 
175 | Congratulations! You completed the first tutorial successfully.
176 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AustralianSynchrotron/lightflow/dc53dbc1d961e20fb144273baca258060705c03e/examples/__init__.py


--------------------------------------------------------------------------------
/examples/abort.py:
--------------------------------------------------------------------------------
 1 | """ Abort the running workflow upon an error in a task
 2 | 
 3 | The following workflow stores the file names of three images into an array by the
 4 | first task. The second task checks whether there are at least 5 image file names in the
 5 | array, and as this is not the case aborts the workflow gracefully. The abort is
 6 | accomplished by using the Abort exception.
 7 | 
 8 | """
 9 | 
10 | from lightflow.models import Dag, AbortWorkflow
11 | from lightflow.tasks import PythonTask
12 | 
13 | 
14 | # the callback function for the task that stores the array of three image file names
15 | def collect_data(data, store, signal, context):
16 |     data['images'] = ['img_001.tif', 'img_002.tif', 'img_003.tif']
17 | 
18 | 
19 | # the callback function for the task that checks the number of stored file names
20 | def check_data(data, store, signal, context):
21 |     if len(data['images']) < 5:
22 |         raise AbortWorkflow('At least 5 images are required')
23 | 
24 | 
25 | # create the main DAG
26 | d = Dag('main_dag')
27 | 
28 | # create the two tasks for storing and checking data
29 | collect_task = PythonTask(name='collect_task',
30 |                           callback=collect_data)
31 | 
32 | check_task = PythonTask(name='check_task',
33 |                         callback=check_data)
34 | 
35 | # set up the graph of the DAG
36 | d.define({
37 |     collect_task: check_task
38 | })
39 | 


--------------------------------------------------------------------------------
/examples/bash.py:
--------------------------------------------------------------------------------
 1 | """ Use the BashTask to execute bash commands and shell scripts
 2 | 
 3 | This workflow demonstrates the use of the BashTask. A simple bash command is executed
 4 | and each line in stdout is displayed as well as counted. The full output is captured as
 5 | well and displayed after the process completed.
 6 | 
 7 | """
 8 | 
 9 | from lightflow.models import Dag
10 | from lightflow.tasks import BashTask
11 | 
12 | 
13 | # this callback is executed after the process was started and before the stdout and stderr
14 | # readers are started. Set the line counter to zero.
15 | def proc_start(pid, data, store, signal, context):
16 |     data['num_lines'] = 0
17 | 
18 | 
19 | # this callback is called for each line of stdout. Print each line and increase
20 | # the line counter.
21 | def proc_stdout(line, data, store, signal, context):
22 |     print(line.rstrip())
23 |     data['num_lines'] += 1
24 | 
25 | 
26 | # this callback is called after the process completed. Print the line counter and the
27 | # full output of stdout and stderr.
28 | def proc_end(return_code, stdout_file, stderr_file, data, store, signal, context):
29 |     print('\n')
30 |     print('Process return code: {}'.format(return_code))
31 |     print('Number lines: {}'.format(data['num_lines']))
32 |     print('\n')
33 |     print('stdout:\n{}\n'.format(stdout_file.read().decode()))
34 |     print('stderr:\n{}\n'.format(stderr_file.read().decode()))
35 | 
36 | 
37 | # create the main DAG and the bash task. Please note how the output of stderr is being
38 | # handled by the stdout callback.
39 | d = Dag('main_dag')
40 | 
41 | proc_task = BashTask(name='proc_task',
42 |                      command='for i in `seq 1 10`; do echo "This is line $i"; done',
43 |                      capture_stdout=True,
44 |                      capture_stderr=True,
45 |                      callback_stdout=proc_stdout,
46 |                      callback_stderr=proc_stdout,
47 |                      callback_process=proc_start,
48 |                      callback_end=proc_end)
49 | 
50 | # this DAG has only a single task
51 | d.define({
52 |     proc_task: None
53 | })
54 | 


--------------------------------------------------------------------------------
/examples/branching.py:
--------------------------------------------------------------------------------
 1 | """ Select task branches dynamically and wait for their completion
 2 | 
 3 | The workflow also demonstrates the use of the 'limit' parameter in the returned
 4 | Action of the branch_task to select which successor task, and thus which lane,
 5 | will be processed. In the example below lane 1 and lane 2 will run in parallel,
 6 | while lane 3 is skipped.
 7 | 
 8 | 
 9 | The graph is as following:
10 | 
11 |                         /-> lane1_print_task \
12 | put_task -> branch_task --> lane2_print_task  --> join_task
13 |                         \-> lane3_print_task /
14 | 
15 | """
16 | 
17 | from lightflow.models import Dag, Action
18 | from lightflow.tasks import PythonTask
19 | 
20 | 
21 | # the callback function for the task that stores the value 5
22 | def put_data(data, store, signal, context):
23 |     print('Task {task_name} being run in DAG {dag_name} '
24 |           'for workflow {workflow_name} ({workflow_id})'.format(**context.to_dict()))
25 | 
26 |     data['value'] = 5
27 | 
28 | 
29 | # the callback function for the branch task that limits the successor tasks to the
30 | # print tasks in lane 1 and lane 2. The successor tasks can be specified by either their
31 | # name or the task object itself. Both methods are shown here.
32 | def branch_with_limit(data, store, signal, context):
33 |     return Action(data, limit=[lane1_print_task, 'lane2_print_task'])
34 | 
35 | 
36 | # the callback function for tasks that print the data
37 | def print_value(data, store, signal, context):
38 |     print('Task {} and value {}'.format(context.task_name, data['value']))
39 | 
40 | 
41 | # create the main DAG
42 | d = Dag('main_dag')
43 | 
44 | # task for storing the data
45 | put_task = PythonTask(name='put_task',
46 |                       callback=put_data)
47 | 
48 | # task that limits the branching to certain successor tasks
49 | branch_task = PythonTask(name='branch_task',
50 |                          callback=branch_with_limit)
51 | 
52 | # first task, first lane, simply prints the value stored in the put_task
53 | lane1_print_task = PythonTask(name='lane1_print_task',
54 |                               callback=print_value)
55 | 
56 | # first task, second lane, simply prints the value stored in the put_task
57 | lane2_print_task = PythonTask(name='lane2_print_task',
58 |                               callback=print_value)
59 | 
60 | # first task, third lane, simply prints the value stored in the put_task
61 | lane3_print_task = PythonTask(name='lane3_print_task',
62 |                               callback=print_value)
63 | 
64 | # joins all three lanes together and waits for the predecessor tasks to finish processing
65 | join_task = PythonTask(name='t_join_me',
66 |                        callback=print_value)
67 | 
68 | # set up the graph of the DAG as illustrated above. Please note how a list of tasks
69 | # defines tasks that are run in parallel (branched out).
70 | d.define({put_task: branch_task,
71 |           branch_task: [lane1_print_task, lane2_print_task, lane3_print_task],
72 |           lane1_print_task: join_task,
73 |           lane2_print_task: join_task,
74 |           lane3_print_task: join_task})
75 | 


--------------------------------------------------------------------------------
/examples/chunking_dag.py:
--------------------------------------------------------------------------------
 1 | from lightflow.models import Dag
 2 | from lightflow.tasks import PythonTask, ChunkingTask
 3 | 
 4 | 
 5 | def make_list(data, store, signal, context):
 6 |     print(context.task_name)
 7 |     data['my_list'] = ['asdf_0001.dat', 'asdf_0002.dat', 'sdfa_0001.dat', 'sdfa_0002.dat', 'sdfa_0003.dat',
 8 |                        'blah_0001.dat', '|', 'blah_0002.dat', 'blah2_0001.dat']
 9 | 
10 | 
11 | def print_list(data, store, signal, context):
12 |     print(context.task_name)
13 |     print('==================================')
14 |     print(data['my_list'])
15 |     print('==================================')
16 | 
17 | 
18 | print_dag = Dag('print_dag', autostart=False)
19 | 
20 | print_list_task = PythonTask(name='print_list',
21 |                              callback=print_list)
22 | 
23 | print_dag.define({print_list_task: None})
24 | 
25 | 
26 | chunk_dag = Dag('chunk_dag')
27 | 
28 | make_list_task = PythonTask(name='make_list',
29 |                             callback=make_list)
30 | 
31 | chunk_task = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False,
32 |                           match_pattern='(?P<match>[0-9A-Za-z]*)_', in_key='my_list')
33 | 
34 | chunk_task2 = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False,
35 |                            match_pattern='[0-9A-Za-z]*_', in_key='my_list')
36 | 
37 | chunk_dag.define({make_list_task: [chunk_task, chunk_task2]})
38 | 


--------------------------------------------------------------------------------
/examples/data_store.py:
--------------------------------------------------------------------------------
 1 | """ Keep data during a workflow run in the persistent data store
 2 | 
 3 | Data that should be kept during a workflow run can be saved into the persistent
 4 | data store. This data is deleted as soon as the workflow run ends, but is available
 5 | to all tasks during the lifetime of the workflow.
 6 | 
 7 | The data store provides methods to store and retrieve single values or append values
 8 | to a list. This can even be done asynchronously from different tasks at the same time.
 9 | 
10 | The key under which the data is being stored supports a hierarchical structure using
11 | the dot notation.
12 | 
13 | This workflow stores different types of data in the persistent data store and modifies
14 | them.
15 | 
16 | """
17 | 
18 | from lightflow.models import Dag
19 | from lightflow.tasks import PythonTask
20 | 
21 | import numpy as np
22 | 
23 | 
24 | # the callback function to store data in the persistent data store. It stores a single
25 | # integer value in 'number', a single integer value into the hierarchical key
26 | # 'buffer' -> 'observable' and a numpy array into 'image'. Additionally it adds an integer
27 | # value to a list in 'sample' -> 'spectra'.
28 | def store_data(data, store, signal, context):
29 |     store.set('number', 5)
30 |     store.set('buffer.observable', 20)
31 |     store.set('image', np.ones((100, 100)))
32 |     store.push('sample.spectra', 7)
33 | 
34 | 
35 | # the callback function for the task that retrieves and prints the 'number' and 'image'
36 | # values then modifies the 'number' value and creates a new list of 'filenames'.
37 | def modify_data(data, store, signal, context):
38 |     number = store.get('number')
39 |     print('The number is: {}'.format(number))
40 | 
41 |     img = store.get('image')
42 |     print('The dimension of the image is: {}'.format(img.shape))
43 | 
44 |     store.set('number', number * 10)
45 |     store.push('filenames', 'file_a.spec')
46 | 
47 | 
48 | # the callback function for the task that adds another filename to the list.
49 | def add_filename(data, store, signal, context):
50 |     store.push('filenames', 'file_b.spec')
51 | 
52 | 
53 | # the callback function for the task that adds a nested list to the list of filenames and
54 | # then extends the list of filenames with two more entries.
55 | def add_more_filenames(data, store, signal, context):
56 |     store.push('filenames', ['nested_a', 'nested_b'])
57 |     store.extend('filenames', ['file_c.spec', 'file_d.spec'])
58 | 
59 | 
60 | # create the main DAG
61 | d = Dag('main_dag')
62 | 
63 | # create the tasks that call the functions above
64 | store_task = PythonTask(name='store_task',
65 |                         callback=store_data)
66 | 
67 | modify_task = PythonTask(name='modify_task',
68 |                          callback=modify_data)
69 | 
70 | add_filename_task = PythonTask(name='add_filename_task',
71 |                                callback=add_filename)
72 | 
73 | add_more_filename_task = PythonTask(name='add_more_filename_task',
74 |                                     callback=add_more_filenames)
75 | 
76 | # set up the graph of the DAG, in which the store_task and modify_task are called
77 | # in sequence while the add_filename_task and add_more_filename_task are run in parallel.
78 | d.define({
79 |     store_task: modify_task,
80 |     modify_task: [add_filename_task, add_more_filename_task]
81 | })
82 | 


--------------------------------------------------------------------------------
/examples/decision.py:
--------------------------------------------------------------------------------
 1 | """ Use branching to implement a dynamic decision graph
 2 | 
 3 | This workflow demonstrates how to use branching in order to change direction in a path
 4 | dynamically. Based on a random number, the graph will either call the small_number_task
 5 | or the large_number_task.
 6 | 
 7 | """
 8 | 
 9 | from random import random
10 | 
11 | from lightflow.models import Dag, Action
12 | from lightflow.tasks import PythonTask
13 | 
14 | 
15 | # decide which route to go based on a random number
16 | def decide_on_successor(data, store, signal, context):
17 |     data['number'] = random()
18 |     if data['number'] < 0.5:
19 |         return Action(data, limit=[small_number_task])
20 |     else:
21 |         return Action(data, limit=[large_number_task])
22 | 
23 | 
24 | # the callback function for the small number route
25 | def print_small_number(data, store, signal, context):
26 |     print('Small number: {}'.format(data['number']))
27 | 
28 | 
29 | # the callback function for the large number route
30 | def print_large_number(data, store, signal, context):
31 |     print('Large number: {}'.format(data['number']))
32 | 
33 | 
34 | # task definitions
35 | decision_task = PythonTask(name='decision_task',
36 |                            callback=decide_on_successor)
37 | 
38 | small_number_task = PythonTask(name='small_number_task',
39 |                                callback=print_small_number)
40 | 
41 | large_number_task = PythonTask(name='large_number_task',
42 |                                callback=print_large_number)
43 | 
44 | 
45 | # create the main DAG
46 | d = Dag('main_dag')
47 | 
48 | d.define({decision_task: [small_number_task, large_number_task]})
49 | 


--------------------------------------------------------------------------------
/examples/multi_data.py:
--------------------------------------------------------------------------------
 1 | """ Manage multiple datasets in the task input using indices and aliases
 2 | 
 3 | The put_task stores the value 5 into the data and passes this value on to
 4 | the print_task_1, square_task, multiply_task and subtract_task. The square_task will
 5 | square the value (now 25), print it and pass it on to the multiply_task. The input to the
 6 | multiply_task are now two datasets. One from the put_task with ['value']==5 and one from
 7 | the square_task with ['value']==25. Since multiplication is a commutative operation,
 8 | the multiply_task does not care about the order of the datasets and will simply multiply
 9 | both datasets regardless of their order. The result (['value']==125) of the multiplication
10 | is passed on to the subtract_task. Again, the subtract task gets two datasets as input.
11 | The dataset from the put_task with ['value'==5] and the dataset from the multiply_task
12 | with ['value']==125. This time the order in which the subtraction is executed matters.
13 | Thus, the data passed from the put_task to the subtract_task is given the alias 'first',
14 | while the data from the multiply_task is labelled 'second'. The desired result is -120,
15 | so the sutract_task accesses the first dataset by its alias 'first' and subtracts the
16 | value from the second dataset, accessed via its alias 'second'.
17 | 
18 | 
19 | The schematic for the graph is as follows:
20 | 
21 |           /-> print_task_1
22 |           |
23 | put_task -|-> square_task -> print_task_2
24 |           |       |
25 |           |       v
26 |           |-> multiply_task -> print_task_3
27 |           |       |
28 |           |       v
29 |           \-> subtract_task -> print_task_4
30 | """
31 | 
32 | from lightflow.models import Dag
33 | from lightflow.tasks import PythonTask
34 | 
35 | 
36 | # store the value 5 under the key 'value'
37 | def put_data(data, store, signal, context):
38 |     data['value'] = 5
39 | 
40 | 
41 | # print the name of the task and the current value
42 | def print_data(data, store, signal, context):
43 |     print(context.task_name, 'The value is:', data['value'])
44 | 
45 | 
46 | # square the current value
47 | def square_data(data, store, signal, context):
48 |     data['value'] = data['value']**2
49 | 
50 | 
51 | # multiply the value from the first dataset and the second dataset. Since the default
52 | # dataset has never been changed, the default dataset is still the first (index==0)
53 | # dataset in the list of all datasets. The second dataset is referenced by its index==1.
54 | def multiply_data(data, store, signal, context):
55 |     data['value'] = data['value'] * data.get_by_index(1)['value']
56 | 
57 | 
58 | # subtract two values by using the aliases of the two datasets and different functions
59 | # for illustration purposes: get_by_alias() and the shorthand notation ([alias])
60 | def subtract_data(data, store, signal, context):
61 |     data['value'] = data.get_by_alias('first')['value'] - data('second')['value']
62 | 
63 | 
64 | # create the main DAG based on the diagram above
65 | d = Dag('main_dag')
66 | 
67 | put_task = PythonTask(name='put_task', callback=put_data)
68 | square_task = PythonTask(name='square_task', callback=square_data)
69 | multiply_task = PythonTask(name='multiply_task', callback=multiply_data)
70 | subtract_task = PythonTask(name='subtract_task', callback=subtract_data)
71 | 
72 | print_task_1 = PythonTask(name='print_task_1', callback=print_data)
73 | print_task_2 = PythonTask(name='print_task_2', callback=print_data)
74 | print_task_3 = PythonTask(name='print_task_3', callback=print_data)
75 | print_task_4 = PythonTask(name='print_task_4', callback=print_data)
76 | 
77 | 
78 | d.define({put_task: {print_task_1: None,
79 |                      square_task: None,
80 |                      multiply_task: None,
81 |                      subtract_task: 'first'},
82 |           square_task: [print_task_2, multiply_task],
83 |           multiply_task: {print_task_3: None,
84 |                           subtract_task: 'second'},
85 |           subtract_task: print_task_4})
86 | 


--------------------------------------------------------------------------------
/examples/parallel.py:
--------------------------------------------------------------------------------
 1 | """ Process tasks in parallel with branches and wait for their completion
 2 | 
 3 | This workflow shows how to run tasks in parallel by branching into multiple lanes. A join
 4 | task waits for the tasks in the lanes to finish.
 5 | 
 6 | 
 7 | The graph is as following:
 8 | 
 9 |             /-> lane1_print_task \
10 | branch_task --> lane2_print_task  --> join_task
11 |             \-> lane3_print_task /
12 | 
13 | """
14 | 
15 | from lightflow.models import Dag
16 | from lightflow.tasks import PythonTask
17 | 
18 | 
19 | # the callback function for the tasks
20 | def print_info(data, store, signal, context):
21 |     print('Task {task_name} being run in DAG {dag_name} '
22 |           'for workflow {workflow_name} ({workflow_id})'.format(**context.to_dict()))
23 | 
24 | 
25 | # create the main DAG
26 | d = Dag('main_dag')
27 | 
28 | # task that limits the branching to certain successor tasks
29 | branch_task = PythonTask(name='branch_task',
30 |                          callback=print_info)
31 | 
32 | # first task, first lane
33 | lane1_print_task = PythonTask(name='lane1_print_task',
34 |                               callback=print_info)
35 | 
36 | # first task, second lane
37 | lane2_print_task = PythonTask(name='lane2_print_task',
38 |                               callback=print_info)
39 | 
40 | # first task, third lane
41 | lane3_print_task = PythonTask(name='lane3_print_task',
42 |                               callback=print_info)
43 | 
44 | # joins all three lanes together and waits for the predecessor tasks to finish processing
45 | join_task = PythonTask(name='t_join_me',
46 |                        callback=print_info)
47 | 
48 | # set up the graph of the DAG as illustrated above. Please note how a list of tasks
49 | # defines tasks that are run in parallel (branched out).
50 | d.define({branch_task: [lane1_print_task, lane2_print_task, lane3_print_task],
51 |           lane1_print_task: join_task,
52 |           lane2_print_task: join_task,
53 |           lane3_print_task: join_task})
54 | 


--------------------------------------------------------------------------------
/examples/parameters.py:
--------------------------------------------------------------------------------
 1 | """ Demonstration of user provided workflow parameters
 2 | 
 3 | Parameters allow a workflow to ingest data upon its execution and thus allow
 4 | the customization of the workflow by users without changes to the workflow code.
 5 | 
 6 | On the command line, parameters are specified as argname=value pairs.
 7 | 
 8 | Workflow parameters are stored into the persistent data store and can be retrieved from
 9 | there as the code below shows.
10 | 
11 | """
12 | 
13 | from lightflow.models import Parameters, Option, Dag
14 | from lightflow.tasks import PythonTask
15 | 
16 | 
17 | # This workflow takes four parameters, three optional and one mandatory. All parameters
18 | # without a default value are considered mandatory. In the example below, if the
19 | # 'filepath' parameter is not specified the workflow will not start and an error message
20 | # will be printed on the command line. Additionally, each parameter can have a help text
21 | # and a type. If a type is given, the user provided value is automatically converted
22 | # to this type.
23 | parameters = Parameters([
24 |     Option('filepath', help='Specify a file path', type=str),
25 |     Option('recursive', default=True, help='Run recursively', type=bool),
26 |     Option('iterations', default=1, help='The number of iterations', type=int),
27 |     Option('threshold', default=0.4, help='The threshold value', type=float)
28 | ])
29 | 
30 | 
31 | # the callback function that prints the value of the filepath parameter
32 | def print_filepath(data, store, signal, context):
33 |     print('The filepath is:', store.get('filepath'))
34 | 
35 | 
36 | # the callback function that prints the value of the iterations parameter
37 | def print_iterations(data, store, signal, context):
38 |     print('Number of iterations:', store.get('iterations'))
39 | 
40 | 
41 | # create the main DAG
42 | d = Dag('main_dag')
43 | 
44 | # task for printing the value of the filepath parameter
45 | print_filepath_task = PythonTask(name='print_filepath_task',
46 |                                  callback=print_filepath)
47 | 
48 | # task for printing the value of the iterations parameter
49 | print_iterations_task = PythonTask(name='print_iterations_task',
50 |                                    callback=print_iterations)
51 | 
52 | # set up the graph of the DAG, in which the print_filepath_task has to be executed first,
53 | # followed by the print_iterations_task.
54 | d.define({
55 |     print_filepath_task: print_iterations_task
56 | })
57 | 


--------------------------------------------------------------------------------
/examples/queues.py:
--------------------------------------------------------------------------------
 1 | """ Workflow of two tasks showing custom queues for workflows, dags and tasks
 2 | 
 3 | By default a workflow is scheduled to a queue with the name 'workflow', a DAG to a
 4 | queue with the name 'dag' and a task to a queue with the name 'task'. In order to have
 5 | a worker consume workflows, DAGs and tasks, start it with:
 6 | 
 7 |     lightflow worker start
 8 | 
 9 | If you like to specify the queue a worker takes jobs from, use the -q argument.
10 | For instance a worker started with:
11 | 
12 |     lightflow worker start -q workflow
13 | 
14 | will only run workflows. However you are not restricted to the three default queue names.
15 | By changing the queue names a workflow, dag or task is scheduled to allows you to route
16 | jobs to specific workers.
17 | 
18 | For this example workflow, start three workers as following:
19 | 
20 |     lightflow worker start -q main
21 | 
22 |     lightflow worker start -q graph,task
23 | 
24 |     lightflow worker start -q high_memory
25 | 
26 | The first worker only consumes jobs from the 'main' queue, which we will use to run
27 | workflows. The second worker consumes jobs sent to the 'graph' and 'task' queues. We will
28 | route all DAGs to the 'graph' queue, while all tasks without a custom queue name will end
29 | up in the default 'task' queue. For the third worker we assume it runs on a special host
30 | for memory demanding tasks. So we will route our large memory consuming tasks to this
31 | worker.
32 | 
33 | Start the workflow with:
34 | 
35 |     lightflow workflow start -q main queues
36 | 
37 | In the output of the workers you will see how the workflow is being processed on the
38 | worker consuming the 'main' queue, the DAG and the print_task on the second worker, and
39 | the print_memory task on the third worker.
40 | 
41 | """
42 | 
43 | from lightflow.models import Dag
44 | from lightflow.tasks import PythonTask
45 | 
46 | 
47 | # the callback function for the tasks that simply prints the context
48 | def print_text(data, store, signal, context):
49 |     print('Task {task_name} being run in DAG {dag_name} '
50 |           'for workflow {workflow_name} ({workflow_id})'.format(**context.to_dict()))
51 | 
52 | 
53 | # create the main DAG and have it scheduled on the 'graph' queue
54 | d = Dag('main_dag', queue='graph')
55 | 
56 | # create the two task, where the first task is executed on the default 'task' queue
57 | # while the second task is processed on the 'high_memory' queue
58 | print_task = PythonTask(name='print_task',
59 |                         callback=print_text)
60 | 
61 | print_memory = PythonTask(name='print_memory',
62 |                           callback=print_text,
63 |                           queue='high_memory')
64 | 
65 | # set up the graph of the DAG, in which the print_task has to be executed first,
66 | # followed by the print_memory task.
67 | d.define({
68 |     print_task: print_memory
69 | })
70 | 


--------------------------------------------------------------------------------
/examples/sequence.py:
--------------------------------------------------------------------------------
 1 | """ A sequence of tasks incrementing a number
 2 | 
 3 | This workflow arranges 3 tasks in a row. Each task calls the same callable,
 4 | which increments a number and prints the current number and some information.
 5 | 
 6 | """
 7 | 
 8 | from lightflow.models import Dag
 9 | from lightflow.tasks import PythonTask
10 | 
11 | 
12 | # the callback function for all tasks
13 | def inc_number(data, store, signal, context):
14 |     print('Task {task_name} being run in DAG {dag_name} '
15 |           'for workflow {workflow_name} ({workflow_id}) '
16 |           'on {worker_hostname}'.format(**context.to_dict()))
17 | 
18 |     if 'value' not in data:
19 |         data['value'] = 0
20 | 
21 |     data['value'] = data['value'] + 1
22 |     print('This is task #{}'.format(data['value']))
23 | 
24 | 
25 | # create the main DAG
26 | d = Dag('main_dag')
27 | 
28 | # create the 3 tasks that increment a number
29 | task_1 = PythonTask(name='task_1',
30 |                     callback=inc_number)
31 | 
32 | task_2 = PythonTask(name='task_2',
33 |                     callback=inc_number)
34 | 
35 | task_3 = PythonTask(name='task_3',
36 |                     callback=inc_number)
37 | 
38 | 
39 | # set up the graph of the DAG as a linear sequence of tasks
40 | d.define({
41 |     task_1: task_2,
42 |     task_2: task_3
43 | })
44 | 


--------------------------------------------------------------------------------
/examples/simple.py:
--------------------------------------------------------------------------------
 1 | """ Simple workflow of two tasks exchanging data
 2 | 
 3 | The first task (put_task) stores the value 5 in the key 'value', that is then read
 4 | and displayed by the second task (print_task).
 5 | 
 6 | """
 7 | 
 8 | from lightflow.models import Dag
 9 | from lightflow.tasks import PythonTask
10 | 
11 | 
12 | # the callback function for the task that stores the value 5
13 | def put_data(data, store, signal, context):
14 |     print('Task {task_name} being run in DAG {dag_name} '
15 |           'for workflow {workflow_name} ({workflow_id}) '
16 |           'on {worker_hostname}'.format(**context.to_dict()))
17 | 
18 |     data['value'] = 5
19 | 
20 | 
21 | # the callback function for the task that prints the data
22 | def print_value(data, store, signal, context):
23 |     print('The value is: {}'.format(data['value']))
24 | 
25 | 
26 | # create the main DAG
27 | d = Dag('main_dag')
28 | 
29 | # create the two tasks for storing and retrieving data
30 | put_task = PythonTask(name='put_task',
31 |                       callback=put_data)
32 | 
33 | print_task = PythonTask(name='print_task',
34 |                         callback=print_value)
35 | 
36 | # set up the graph of the DAG, in which the put_task has to be executed first,
37 | # followed by the print_task.
38 | d.define({
39 |     put_task: print_task
40 | })
41 | 


--------------------------------------------------------------------------------
/examples/stop.py:
--------------------------------------------------------------------------------
 1 | """ Stop the execution of a task from callback functions
 2 | 
 3 | This workflow shows how to stop a task using the Stop exception from any callback. In the
 4 | example below, 'start_task' is executed first and then branches into three paths. The
 5 | first path consists of a bash task that enumerates the numbers 1 to 10. As soon as
 6 | number 5 is reached it raises the Stop exception from its stdout callback function. This
 7 | leads to an immediate stop of the bash process and skips the successor 'print_task_1'.
 8 | The second path, running in parallel, executes 'stop_noskip_task', which raises a
 9 | Stop exception but with the 'skip_successors' flag set to False, meaning that the task
10 | is stopped immediately but 'print_task_2' will be executed. The third path is similar
11 | to the second path but will skip 'print_task_3'.
12 | """
13 | 
14 | from lightflow.models import Dag, StopTask
15 | from lightflow.tasks import PythonTask, BashTask
16 | 
17 | 
18 | # callback function for the start task
19 | def start_all(data, store, signal, context):
20 |     print('Starting DAG {}'.format(context.dag_name))
21 | 
22 | 
23 | # callback function that is called for each new line of the stdout of the bash process
24 | def bash_stdout(line, data, store, signal, context):
25 |     if int(line) == 5:
26 |         raise StopTask('Reached line number 5')
27 |     else:
28 |         print('Content of current line is {}'.format(line))
29 | 
30 | 
31 | # callback function for a task that immediately stops but will not affect successor tasks
32 | def stop_noskip(data, store, signal, context):
33 |     raise StopTask('Stop task {} but not successor tasks'.format(context.task_name),
34 |                    skip_successors=False)
35 | 
36 | 
37 | # callback function for a task that immediately stops and also skips its successor tasks
38 | def stop(data, store, signal, context):
39 |     raise StopTask('Stop task {} and all successor tasks'.format(context.task_name))
40 | 
41 | 
42 | # callback for printing the current task context
43 | def print_context(data, store, signal, context):
44 |     print('Task {task_name} being run in DAG {dag_name} '
45 |           'for workflow {workflow_name} ({workflow_id})'.format(**context.to_dict()))
46 | 
47 | 
48 | # create the main DAG
49 | d = Dag('main_dag')
50 | 
51 | 
52 | start_task = PythonTask(name='start_task',
53 |                         callback=start_all)
54 | 
55 | bash_task = BashTask(name='bash_task',
56 |                      command='for i in `seq 1 10`; do echo "$i"; done',
57 |                      callback_stdout=bash_stdout)
58 | 
59 | stop_noskip_task = PythonTask(name='stop_noskip_task',
60 |                               callback=stop_noskip)
61 | 
62 | stop_task = PythonTask(name='stop_task',
63 |                        callback=stop)
64 | 
65 | print_task_1 = PythonTask(name='print_task_1',
66 |                           callback=print_context)
67 | 
68 | print_task_2 = PythonTask(name='print_task_2',
69 |                           callback=print_context)
70 | 
71 | print_task_3 = PythonTask(name='print_task_3',
72 |                           callback=print_context)
73 | 
74 | 
75 | # set up the graph of the DAG with a start task and three paths with different stop
76 | # conditions.
77 | d.define({
78 |     start_task: [bash_task, stop_noskip_task, stop_task],
79 |     bash_task: print_task_1,
80 |     stop_noskip_task: print_task_2,
81 |     stop_task: print_task_3
82 | })
83 | 


--------------------------------------------------------------------------------
/examples/sub_dag.py:
--------------------------------------------------------------------------------
 1 | """ Implement dynamic workflows by calling other dags from within a task
 2 | 
 3 | In order to change the workflow at runtime, a task can request the execution of another
 4 | dag via the start_dag function of the signal system.
 5 | 
 6 | This example requires the numpy module to be installed and available to the workers
 7 | as well as to the workflow.
 8 | 
 9 | """
10 | 
11 | from time import sleep
12 | import numpy as np
13 | 
14 | from lightflow.models import Dag
15 | from lightflow.tasks import PythonTask
16 | 
17 | 
18 | # the callback function for the init task
19 | def print_name(data, store, signal, context):
20 |     print('Task {task_name} being run in DAG {dag_name} '
21 |           'for workflow {workflow_name} ({workflow_id})'.format(**context.to_dict()))
22 | 
23 | 
24 | # this callback function starts five dags. For each dag the function waits a second,
25 | # then creates a numpy array and stores it into the data that is then passed to the
26 | # sub dag. The dag that should be started can either be given by its name or the dag
27 | # object itself. The names of the created dags are recorded and the task waits for
28 | # all created dags to be completed.
29 | def start_sub_dag(data, store, signal, context):
30 |     dag_names = []
31 |     for i in range(5):
32 |         sleep(1)
33 |         data['image'] = np.ones((100, 100))
34 |         started_dag = signal.start_dag(sub_dag, data=data)
35 |         dag_names.append(started_dag)
36 | 
37 |     signal.join_dags(dag_names)
38 | 
39 | 
40 | # this callback function prints the dimensions of the received numpy array
41 | def sub_dag_print(data, store, signal, context):
42 |     print('Received an image with dimensions: {}'.format(data['image'].shape))
43 | 
44 | 
45 | init_task = PythonTask(name='init_task',
46 |                        callback=print_name)
47 | 
48 | call_dag_task = PythonTask(name='call_dag_task',
49 |                            callback=start_sub_dag)
50 | 
51 | # create the main dag that runs the init task first, followed by the call_dag task.
52 | main_dag = Dag('main_dag')
53 | main_dag.define({
54 |     init_task: call_dag_task
55 | })
56 | 
57 | 
58 | # create the tasks for the sub dag that simply prints the shape of the numpy array
59 | # passed down from the main dag.
60 | print_task = PythonTask(name='print_task',
61 |                         callback=sub_dag_print)
62 | 
63 | # create the sub dag that is being called by the main dag. In order to stop the
64 | # system from automatically starting the dag when the workflow is run, set the autostart
65 | # parameter to false.
66 | sub_dag = Dag('sub_dag', autostart=False)
67 | 
68 | sub_dag.define({
69 |     print_task: None
70 | })
71 | 


--------------------------------------------------------------------------------
/examples/timing.py:
--------------------------------------------------------------------------------
 1 | """ Retrieve the duration for all tasks from the store meta section
 2 | 
 3 | The last task in a list of tasks interrogates the log in the persistent data store
 4 | in order to print the run time for each task.
 5 | 
 6 | """
 7 | from time import sleep
 8 | from random import random
 9 | 
10 | from lightflow.models import Dag, DataStoreDocumentSection
11 | from lightflow.tasks import PythonTask
12 | 
13 | 
14 | # the callback for all sleep tasks
15 | def random_sleep(data, store, signal, context):
16 |     sleep(random() * 4)
17 | 
18 | 
19 | # the callback function for the task that prints the run times
20 | def print_times(data, store, signal, context):
21 |     dag_log = store.get(key='log.{}.tasks'.format(context.dag_name),
22 |                         section=DataStoreDocumentSection.Meta)
23 |     for task, fields in dag_log.items():
24 |         # The print task has not finished yet, so there is no duration available
25 |         if task != context.task_name:
26 |             print(task, 'on', fields['worker'], 'took', fields['duration'], 'seconds')
27 |         else:
28 |             print(task, 'on', fields['worker'], 'is still running')
29 | 
30 | 
31 | # create the main DAG
32 | d = Dag('main_dag')
33 | 
34 | # create the sleep tasks
35 | sleep_task_1 = PythonTask(name='sleep_task_1', callback=random_sleep)
36 | sleep_task_2 = PythonTask(name='sleep_task_2', callback=random_sleep)
37 | sleep_task_3 = PythonTask(name='sleep_task_3', callback=random_sleep)
38 | 
39 | # create the print task
40 | print_task = PythonTask(name='print_task', callback=print_times)
41 | 
42 | # set up the DAG
43 | d.define({
44 |     sleep_task_1: sleep_task_2,
45 |     sleep_task_2: sleep_task_3,
46 |     sleep_task_3: print_task
47 | })
48 | 


--------------------------------------------------------------------------------
/lightflow-complete.sh:
--------------------------------------------------------------------------------
1 | _lightflow_completion() {
2 |     COMPREPLY=( $( env COMP_WORDS="${COMP_WORDS[*]}" \
3 |                    COMP_CWORD=$COMP_CWORD \
4 |                    _LIGHTFLOW_COMPLETE=complete $1 ) )
5 |     return 0
6 | }
7 | 
8 | complete -F _lightflow_completion -o default lightflow;
9 | 


--------------------------------------------------------------------------------
/lightflow.cfg:
--------------------------------------------------------------------------------
 1 | 
 2 | workflows:
 3 |   - ./examples
 4 | 
 5 | libraries: []
 6 | 
 7 | celery:
 8 |   broker_url: redis://localhost:6379/0
 9 |   result_backend: redis://localhost:6379/0
10 |   worker_concurrency: 8
11 |   result_expires: 0
12 |   worker_send_task_events: True
13 |   worker_prefetch_multiplier: 1
14 | 
15 | signal:
16 |   host: localhost
17 |   port: 6379
18 |   password: null
19 |   database: 0
20 |   polling_time: 0.5
21 | 
22 | store:
23 |   host: localhost
24 |   port: 27017
25 |   database: lightflow
26 |   username: null
27 |   password: null
28 |   auth_source: admin
29 |   auth_mechanism: null
30 |   connect_timeout: 30000
31 | 
32 | graph:
33 |   workflow_polling_time: 0.5
34 |   dag_polling_time: 0.5
35 | 
36 | cli:
37 |   time_format: '%d/%m/%Y %H:%M:%S'
38 | 
39 | extensions: {}
40 | 
41 | logging:
42 |   version: 1
43 |   disable_existing_loggers: false
44 |   formatters:
45 |     verbose:
46 |       format: '[%(asctime)s][%(levelname)s] %(name)s %(filename)s:%(funcName)s:%(lineno)d | %(message)s'
47 |       datefmt: '%d/%m/%Y %H:%M:%S'
48 |     simple:
49 |       (): 'colorlog.ColoredFormatter'
50 |       format: '%(log_color)s[%(asctime)s][%(levelname)s] %(blue)s%(processName)s%(reset)s | %(message)s'
51 |       datefmt: '%d/%m/%Y %H:%M:%S'
52 |   handlers:
53 |     console:
54 |       class: logging.StreamHandler
55 |       level: INFO
56 |       formatter: simple
57 |   loggers:
58 |     celery:
59 |       handlers:
60 |         - console
61 |       level: INFO
62 | 
63 |     root:
64 |       handlers:
65 |         - console
66 |       level: INFO
67 | 


--------------------------------------------------------------------------------
/lightflow/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .config import Config
3 | default_config = Config.default()
4 | 


--------------------------------------------------------------------------------
/lightflow/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import ruamel.yaml as yaml
  4 | 
  5 | from lightflow.models.exceptions import ConfigLoadError, ConfigFieldError
  6 | 
  7 | 
  8 | LIGHTFLOW_CONFIG_ENV = 'LIGHTFLOW_CONFIG'
  9 | LIGHTFLOW_CONFIG_NAME = 'lightflow.cfg'
 10 | 
 11 | 
 12 | def expand_env_var(env_var):
 13 |     """ Expands, potentially nested, environment variables.
 14 | 
 15 |         Args:
 16 |             env_var (str): The environment variable that should be expanded.
 17 | 
 18 |         Returns:
 19 |             str: The fully expanded environment variable.
 20 |     """
 21 |     if not env_var:
 22 |         return env_var
 23 |     while True:
 24 |         interpolated = os.path.expanduser(os.path.expandvars(str(env_var)))
 25 |         if interpolated == env_var:
 26 |             return interpolated
 27 |         else:
 28 |             env_var = interpolated
 29 | 
 30 | 
 31 | class Config:
 32 |     """ Hosts the global configuration.
 33 | 
 34 |     The configuration is read from a structured YAML file or a dictionary.
 35 |     The location of the file can either be specified directly, is given in
 36 |     the environment variable LIGHTFLOW_CONFIG_ENV, is looked for in the
 37 |     current execution directory or in the home directory of the user.
 38 |     """
 39 |     def __init__(self):
 40 |         self._config = None
 41 | 
 42 |     @classmethod
 43 |     def from_file(cls, filename, *, strict=True):
 44 |         """ Create a new Config object from a configuration file.
 45 | 
 46 |         Args:
 47 |             filename (str): The location and name of the configuration file.
 48 |             strict (bool): If true raises a ConfigLoadError when the configuration
 49 |                 cannot be found.
 50 | 
 51 |         Returns:
 52 |             An instance of the Config class.
 53 | 
 54 |         Raises:
 55 |             ConfigLoadError: If the configuration cannot be found.
 56 |         """
 57 |         config = cls()
 58 |         config.load_from_file(filename, strict=strict)
 59 |         return config
 60 | 
 61 |     def load_from_file(self, filename=None, *, strict=True):
 62 |         """ Load the configuration from a file.
 63 | 
 64 |         The location of the configuration file can either be specified directly in the
 65 |         parameter filename or is searched for in the following order:
 66 | 
 67 |             1. In the environment variable given by LIGHTFLOW_CONFIG_ENV
 68 |             2. In the current execution directory
 69 |             3. In the user's home directory
 70 | 
 71 |         Args:
 72 |             filename (str): The location and name of the configuration file.
 73 |             strict (bool): If true raises a ConfigLoadError when the configuration
 74 |                 cannot be found.
 75 | 
 76 |         Raises:
 77 |             ConfigLoadError: If the configuration cannot be found.
 78 |         """
 79 |         self.set_to_default()
 80 | 
 81 |         if filename:
 82 |             self._update_from_file(filename)
 83 |         else:
 84 |             if LIGHTFLOW_CONFIG_ENV not in os.environ:
 85 |                 if os.path.isfile(os.path.join(os.getcwd(), LIGHTFLOW_CONFIG_NAME)):
 86 |                     self._update_from_file(
 87 |                         os.path.join(os.getcwd(), LIGHTFLOW_CONFIG_NAME))
 88 |                 elif os.path.isfile(expand_env_var('~/{}'.format(LIGHTFLOW_CONFIG_NAME))):
 89 |                     self._update_from_file(
 90 |                         expand_env_var('~/{}'.format(LIGHTFLOW_CONFIG_NAME)))
 91 |                 else:
 92 |                     if strict:
 93 |                         raise ConfigLoadError('Could not find the configuration file.')
 94 |             else:
 95 |                 self._update_from_file(expand_env_var(os.environ[LIGHTFLOW_CONFIG_ENV]))
 96 | 
 97 |         self._update_python_paths()
 98 | 
 99 |     def load_from_dict(self, conf_dict=None):
100 |         """ Load the configuration from a dictionary.
101 | 
102 |         Args:
103 |             conf_dict (dict): Dictionary with the configuration.
104 |         """
105 |         self.set_to_default()
106 |         self._update_dict(self._config, conf_dict)
107 |         self._update_python_paths()
108 | 
109 |     def to_dict(self):
110 |         """ Returns a copy of the internal configuration as a dictionary. """
111 |         return dict(self._config)
112 | 
113 |     @property
114 |     def workflows(self):
115 |         """ Return the workflow folders """
116 |         return self._config.get('workflows')
117 | 
118 |     @property
119 |     def data_store(self):
120 |         """ Return the data store settings """
121 |         return self._config.get('store')
122 | 
123 |     @property
124 |     def signal(self):
125 |         """ Return the signal system settings """
126 |         return self._config.get('signal')
127 | 
128 |     @property
129 |     def logging(self):
130 |         """ Return the logging settings """
131 |         return self._config.get('logging')
132 | 
133 |     @property
134 |     def celery(self):
135 |         """ Return the celery settings """
136 |         return self._config.get('celery')
137 | 
138 |     @property
139 |     def cli(self):
140 |         """ Return the cli settings """
141 |         return self._config.get('cli')
142 | 
143 |     @property
144 |     def extensions(self):
145 |         """ Return the custom settings of extensions """
146 |         if 'extensions' not in self._config:
147 |             raise ConfigFieldError(
148 |                 'The extensions section is missing in the configuration')
149 |         return self._config.get('extensions')
150 | 
151 |     @property
152 |     def workflow_polling_time(self):
153 |         """ Return the waiting time between status checks of the running dags (sec) """
154 |         if 'graph' not in self._config:
155 |             raise ConfigFieldError('The graph section is missing in the configuration')
156 |         return self._config.get('graph').get('workflow_polling_time')
157 | 
158 |     @property
159 |     def dag_polling_time(self):
160 |         """ Return the waiting time between status checks of the running tasks (sec) """
161 |         if 'graph' not in self._config:
162 |             raise ConfigFieldError('The graph section is missing in the configuration')
163 |         return self._config.get('graph').get('dag_polling_time')
164 | 
165 |     def set_to_default(self):
166 |         """ Overwrite the configuration with the default configuration. """
167 |         self._config = yaml.safe_load(self.default())
168 | 
169 |     def _update_from_file(self, filename):
170 |         """ Helper method to update an existing configuration with the values from a file.
171 | 
172 |         Loads a configuration file and replaces all values in the existing configuration
173 |         dictionary with the values from the file.
174 | 
175 |         Args:
176 |             filename (str): The path and name to the configuration file.
177 |         """
178 |         if os.path.exists(filename):
179 |             try:
180 |                 with open(filename, 'r') as config_file:
181 |                     yaml_dict = yaml.safe_load(config_file.read())
182 |                     if yaml_dict is not None:
183 |                         self._update_dict(self._config, yaml_dict)
184 |             except IsADirectoryError:
185 |                 raise ConfigLoadError(
186 |                     'The specified configuration file is a directory not a file')
187 |         else:
188 |             raise ConfigLoadError('The config file {} does not exist'.format(filename))
189 | 
190 |     def _update_dict(self, to_dict, from_dict):
191 |         """ Recursively merges the fields for two dictionaries.
192 | 
193 |         Args:
194 |             to_dict (dict): The dictionary onto which the merge is executed.
195 |             from_dict (dict): The dictionary merged into to_dict
196 |         """
197 |         for key, value in from_dict.items():
198 |             if key in to_dict and isinstance(to_dict[key], dict) and \
199 |                     isinstance(from_dict[key], dict):
200 |                 self._update_dict(to_dict[key], from_dict[key])
201 |             else:
202 |                 to_dict[key] = from_dict[key]
203 | 
204 |     def _update_python_paths(self):
205 |         """ Append the workflow and libraries paths to the PYTHONPATH. """
206 |         for path in self._config['workflows'] + self._config['libraries']:
207 |             if os.path.isdir(os.path.abspath(path)):
208 |                 if path not in sys.path:
209 |                     sys.path.append(path)
210 |             else:
211 |                 raise ConfigLoadError(
212 |                     'Workflow directory {} does not exist'.format(path))
213 | 
214 |     @staticmethod
215 |     def default():
216 |         """ Returns the default configuration. """
217 |         return '''
218 | workflows:
219 |   - ./examples
220 | 
221 | libraries: []
222 | 
223 | celery:
224 |   broker_url: redis://localhost:6379/0
225 |   result_backend: redis://localhost:6379/0
226 |   worker_concurrency: 8
227 |   result_expires: 0
228 |   worker_send_task_events: True
229 |   worker_prefetch_multiplier: 1
230 | 
231 | signal:
232 |   host: localhost
233 |   port: 6379
234 |   password: null
235 |   database: 0
236 |   polling_time: 0.5
237 | 
238 | store:
239 |   host: localhost
240 |   port: 27017
241 |   database: lightflow
242 |   username: null
243 |   password: null
244 |   auth_source: admin
245 |   auth_mechanism: null
246 |   connect_timeout: 30000
247 | 
248 | graph:
249 |   workflow_polling_time: 0.5
250 |   dag_polling_time: 0.5
251 | 
252 | cli:
253 |   time_format: '%d/%m/%Y %H:%M:%S'
254 | 
255 | extensions: {}
256 | 
257 | logging:
258 |   version: 1
259 |   disable_existing_loggers: false
260 |   formatters:
261 |     verbose:
262 |       format: '[%(asctime)s][%(levelname)s] %(name)s %(filename)s:%(funcName)s:%(lineno)d | %(message)s'
263 |       datefmt: '%d/%m/%Y %H:%M:%S'
264 |     simple:
265 |       (): 'colorlog.ColoredFormatter'
266 |       format: '%(log_color)s[%(asctime)s][%(levelname)s] %(blue)s%(processName)s%(reset)s | %(message)s'
267 |       datefmt: '%d/%m/%Y %H:%M:%S'
268 |   handlers:
269 |     console:
270 |       class: logging.StreamHandler
271 |       level: INFO
272 |       formatter: simple
273 |   loggers:
274 |     celery:
275 |       handlers:
276 |         - console
277 |       level: INFO
278 | 
279 |     root:
280 |       handlers:
281 |         - console
282 |       level: INFO
283 |     '''
284 | 


--------------------------------------------------------------------------------
/lightflow/logger.py:
--------------------------------------------------------------------------------
 1 | from celery.utils.log import get_task_logger
 2 | 
 3 | 
 4 | def get_logger(name):
 5 |     """ Helper function to return a  valid logger object
 6 | 
 7 |     Args:
 8 |         name (str): The name of the logger. Typically: __name__.
 9 | 
10 |     Returns:
11 |         Logger: A logger object for sending messages to the logging system
12 |     """
13 |     return get_task_logger(name)
14 | 


--------------------------------------------------------------------------------
/lightflow/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .workflow import Workflow
 2 | from .parameters import Parameters, Option
 3 | from .dag import Dag
 4 | from .dag_signal import DagSignal
 5 | from .task import BaseTask, TaskStatus
 6 | from .task_parameters import TaskParameters
 7 | from .task_signal import TaskSignal
 8 | from .action import Action
 9 | from .task_data import TaskData, MultiTaskData
10 | from .datastore import DataStore, DataStoreDocumentSection, DataStoreDocument
11 | from .signal import Server, Client
12 | from .exceptions import AbortWorkflow, StopTask
13 | 
14 | 
15 | __all__ = [
16 |     'Workflow',
17 |     'Parameters', 'Option',
18 |     'Dag',
19 |     'DagSignal',
20 |     'BaseTask', 'TaskStatus',
21 |     'TaskParameters',
22 |     'TaskSignal',
23 |     'Action',
24 |     'TaskData', 'MultiTaskData',
25 |     'DataStore', 'DataStoreDocumentSection', 'DataStoreDocument',
26 |     'Server', 'Client',
27 |     'AbortWorkflow', 'StopTask',
28 | ]
29 | 


--------------------------------------------------------------------------------
/lightflow/models/action.py:
--------------------------------------------------------------------------------
 1 | from copy import copy
 2 | 
 3 | 
 4 | class Action:
 5 |     """ The class for the action object that is returned by each task.
 6 | 
 7 |     The action object encapsulates the information that is returned by a task to the
 8 |     system. It contains the data that should be passed on to the successor tasks and
 9 |     a list of immediate successor tasks that should be executed. The latter allows
10 |     to limit the execution of successor tasks.
11 |     """
12 |     def __init__(self, data, limit=None):
13 |         """ Initialise the Action object.
14 | 
15 |         Args:
16 |             data (MultiTaskData): The processed data from the task that should be passed
17 |                                   on to successor tasks.
18 |             limit (list): A list of names of all immediate successor tasks that
19 |                           should be executed.
20 |         """
21 |         self._data = data
22 |         self._limit = limit
23 | 
24 |     @property
25 |     def data(self):
26 |         """ Returns the data object. """
27 |         return self._data
28 | 
29 |     @property
30 |     def limit(self):
31 |         """ Returns the list of tasks that should be executed. """
32 |         return self._limit
33 | 
34 |     def copy(self):
35 |         """ Return a copy of the Action object. """
36 |         return copy(self)
37 | 


--------------------------------------------------------------------------------
/lightflow/models/dag_signal.py:
--------------------------------------------------------------------------------
 1 | from .signal import Request
 2 | 
 3 | 
 4 | class DagSignal:
 5 |     """ Class to wrap the construction and sending of signals into easy to use methods """
 6 |     def __init__(self, client, dag_name):
 7 |         """ Initialise the dag signal convenience class.
 8 | 
 9 |         Args:
10 |             client (Client): A reference to a signal client object.
11 |             dag_name (str): The name of the dag sending this signal.
12 |         """
13 |         self._client = client
14 |         self._dag_name = dag_name
15 | 
16 |     def stop_workflow(self):
17 |         """ Send a stop signal to the workflow.
18 | 
19 |         Upon receiving the stop signal, the workflow will not queue any new dags.
20 |         Furthermore it will make the stop signal available to the dags, which will
21 |         then stop queueing new tasks. As soon as all active tasks have finished
22 |         processing, the workflow will terminate.
23 | 
24 |         Returns:
25 |             bool: True if the signal was sent successfully.
26 |         """
27 |         return self._client.send(Request(action='stop_workflow')).success
28 | 
29 |     @property
30 |     def is_stopped(self):
31 |         """ Check whether the dag received a stop signal from the workflow.
32 | 
33 |         As soon as the dag receives a stop signal, no new tasks will be queued
34 |         and the dag will wait for the active tasks to terminate.
35 | 
36 |         Returns:
37 |             bool: True if the dag should be stopped.
38 |         """
39 |         resp = self._client.send(
40 |             Request(
41 |                 action='is_dag_stopped',
42 |                 payload={'dag_name': self._dag_name}
43 |             )
44 |         )
45 |         return resp.payload['is_stopped']
46 | 


--------------------------------------------------------------------------------
/lightflow/models/exceptions.py:
--------------------------------------------------------------------------------
  1 | 
  2 | class LightflowException(RuntimeError):
  3 |     """ Lightflow base class for all exceptions. """
  4 |     def __init__(self, message=''):
  5 |         self.message = message
  6 | 
  7 |     def __str__(self):
  8 |         return self.message
  9 | 
 10 |     def __repr__(self):
 11 |         return "<LightflowException - {}>".format(self.message)
 12 | 
 13 | 
 14 | class ConfigLoadError(RuntimeError):
 15 |     """ Raise this if there is a configuration loading error. """
 16 |     pass
 17 | 
 18 | 
 19 | class ConfigOverwriteError(RuntimeError):
 20 |     pass
 21 | 
 22 | 
 23 | class ConfigNotDefinedError(RuntimeError):
 24 |     pass
 25 | 
 26 | 
 27 | class ConfigFieldError(RuntimeError):
 28 |     pass
 29 | 
 30 | 
 31 | class WorkflowArgumentError(RuntimeError):
 32 |     pass
 33 | 
 34 | 
 35 | class WorkflowImportError(RuntimeError):
 36 |     pass
 37 | 
 38 | 
 39 | class WorkflowDefinitionError(RuntimeError):
 40 |     def __init__(self, workflow_name, graph_name):
 41 |         """ Initialize the exception for invalid workflow definitions.
 42 | 
 43 |         Args:
 44 |             workflow_name (str): The name of the workflow that contains an invalid
 45 |                                  definition.
 46 |             graph_name (str): The name of the dag that is invalid.
 47 |         """
 48 |         self.workflow_name = workflow_name
 49 |         self.graph_name = graph_name
 50 | 
 51 | 
 52 | class DirectedAcyclicGraphInvalid(RuntimeError):
 53 |     def __init__(self, graph_name):
 54 |         """ Initialize the exception for invalid directed acyclic graphs.
 55 | 
 56 |         Args:
 57 |             graph_name (str): The name of the dag that is invalid.
 58 |         """
 59 |         self.graph_name = graph_name
 60 | 
 61 | 
 62 | class DirectedAcyclicGraphUndefined(RuntimeError):
 63 |     pass
 64 | 
 65 | 
 66 | class DataInvalidIndex(RuntimeError):
 67 |     pass
 68 | 
 69 | 
 70 | class DataInvalidAlias(RuntimeError):
 71 |     pass
 72 | 
 73 | 
 74 | class DataStoreNotConnected(RuntimeError):
 75 |     pass
 76 | 
 77 | 
 78 | class DataStoreIDExists(RuntimeError):
 79 |     pass
 80 | 
 81 | 
 82 | class DataStoreIDInvalid(RuntimeError):
 83 |     pass
 84 | 
 85 | 
 86 | class DataStoreGridfsIdInvalid(RuntimeError):
 87 |     pass
 88 | 
 89 | 
 90 | class DataStoreDecodeUnknownType(RuntimeError):
 91 |     pass
 92 | 
 93 | 
 94 | class TaskReturnActionInvalid(RuntimeError):
 95 |     pass
 96 | 
 97 | 
 98 | class RequestActionUnknown(RuntimeError):
 99 |     pass
100 | 
101 | 
102 | class RequestFailed(RuntimeError):
103 |     pass
104 | 
105 | 
106 | class DagNameUnknown(RuntimeError):
107 |     pass
108 | 
109 | 
110 | class EventTypeUnknown(RuntimeError):
111 |     pass
112 | 
113 | 
114 | class JobEventTypeUnsupported(RuntimeError):
115 |     pass
116 | 
117 | 
118 | class WorkerEventTypeUnsupported(RuntimeError):
119 |     pass
120 | 
121 | 
122 | class JobStatInvalid(RuntimeError):
123 |     pass
124 | 
125 | 
126 | class AbortWorkflow(LightflowException):
127 |     pass
128 | 
129 | 
130 | class StopTask(LightflowException):
131 |     def __init__(self, message='', *, skip_successors=True):
132 |         super().__init__(message)
133 |         self.skip_successors = skip_successors
134 | 


--------------------------------------------------------------------------------
/lightflow/models/mongo_proxy.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from itertools import count
  3 | import pymongo
  4 | from pymongo import MongoClient
  5 | from pymongo.errors import AutoReconnect
  6 | import gridfs
  7 | from gridfs import GridFS
  8 | 
  9 | from lightflow.logger import get_logger
 10 | 
 11 | logger = get_logger(__name__)
 12 | 
 13 | 
 14 | # if the connection to MongoDB got lost, try reconnecting for 5 minutes
 15 | WAIT_TIME = 300
 16 | 
 17 | 
 18 | def get_methods(*objs):
 19 |     """ Return the names of all callable attributes of an object"""
 20 |     return set(
 21 |         attr
 22 |         for obj in objs
 23 |         for attr in dir(obj)
 24 |         if not attr.startswith('_') and callable(getattr(obj, attr))
 25 |     )
 26 | 
 27 | 
 28 | class MongoExecutable:
 29 |     """ Wrapper class for catching and handling reconnect exceptions in pymongo calls.
 30 | 
 31 |     The provided callable is executed and if the pymongo library raises an AutoReconnect
 32 |     exception, another call is attempted. This is repeated until WAIT_TIME is reached.
 33 |     """
 34 |     def __init__(self, method):
 35 |         """ Initialize the MongoExecutable.
 36 | 
 37 |         Args:
 38 |             method (callable): The function that should be called and for which
 39 |                                reconnection attempts should be tried.
 40 |         """
 41 |         self._method = method
 42 | 
 43 |     def __call__(self, *args, **kwargs):
 44 |         """ Call the method and handle the AutoReconnect exception gracefully """
 45 |         start_time = time.time()
 46 | 
 47 |         for attempt in count():
 48 |             try:
 49 |                 return self._method(*args, **kwargs)
 50 |             except AutoReconnect:
 51 |                 duration = time.time() - start_time
 52 | 
 53 |                 if duration >= WAIT_TIME:
 54 |                     break
 55 | 
 56 |                 logger.warning(
 57 |                     'Reconnecting to MongoDB, attempt {} ({:.3f} seconds elapsed)'.
 58 |                     format(attempt, duration))
 59 | 
 60 |                 time.sleep(self.calc_sleep(attempt))
 61 | 
 62 |         return self._method(*args, **kwargs)
 63 | 
 64 |     def calc_sleep(self, attempt):
 65 |         """ Calculate the sleep time based on the number of past attempts.
 66 | 
 67 |         The sleep time grows exponentially with the attempts up to a maximum
 68 |         of 10 seconds.
 69 | 
 70 |         Args:
 71 |             attempt (int): The number of reconnection attempts.
 72 | 
 73 |         Returns:
 74 |             int: The number of seconds to sleep before trying the next attempt.
 75 |         """
 76 |         return min(10, pow(2, attempt))
 77 | 
 78 |     def __dir__(self):
 79 |         return dir(self._method)
 80 | 
 81 |     def __str__(self):
 82 |         return str(self._method)
 83 | 
 84 |     def __repr__(self):
 85 |         return repr(self._method)
 86 | 
 87 | 
 88 | class MongoReconnectProxy:
 89 |     """ Proxy for catching AutoReconnect exceptions in function calls of another class """
 90 | 
 91 |     def __init__(self, obj, methods):
 92 |         """ Initialize the MongoReconnectProxy.
 93 | 
 94 |         Args:
 95 |             obj: The object for which all calls should be wrapped in the AutoReconnect
 96 |                  exception handling block.
 97 |             methods (set): The list of method names that should be wrapped.
 98 |         """
 99 |         self._unproxied_object = obj
100 |         self._methods = methods
101 | 
102 |     @property
103 |     def unproxied_object(self):
104 |         """ Return the unproxied object """
105 |         return self._unproxied_object
106 | 
107 |     def __getitem__(self, key):
108 |         """ Return proxy for the object method named 'key'. """
109 |         item = self._unproxied_object[key]
110 |         if callable(item):
111 |             return MongoReconnectProxy(item, self._methods)
112 |         return item
113 | 
114 |     def __getattr__(self, key):
115 |         """ Depending on the type of attribute return an Executable or Proxy object. """
116 |         attr = getattr(self._unproxied_object, key)
117 |         if callable(attr):
118 |             if key in self._methods:
119 |                 return MongoExecutable(attr)
120 |             else:
121 |                 return MongoReconnectProxy(attr, self._methods)
122 |         return attr
123 | 
124 |     def __call__(self, *args, **kwargs):
125 |         return self._unproxied_object(*args, **kwargs)
126 | 
127 |     def __dir__(self):
128 |         return dir(self._unproxied_object)
129 | 
130 |     def __str__(self):
131 |         return str(self._unproxied_object)
132 | 
133 |     def __repr__(self):
134 |         return repr(self._unproxied_object)
135 | 
136 | 
137 | class MongoClientProxy(MongoReconnectProxy):
138 |     """ Proxy for catching AutoReconnect exceptions in function calls of the MongoClient
139 | 
140 |     Specialization of the MongoReconnectProxy class for the MongoClient class.
141 |     """
142 |     def __init__(self, obj):
143 |         super().__init__(obj,
144 |                          get_methods(pymongo.collection.Collection,
145 |                                      pymongo.database.Database,
146 |                                      MongoClient,
147 |                                      pymongo))
148 | 
149 | 
150 | class GridFSProxy(MongoReconnectProxy):
151 |     """ Proxy for catching AutoReconnect exceptions in function calls of the GridFS class
152 | 
153 |     Specialization of the MongoReconnectProxy class for the GridFS class.
154 |     """
155 |     def __init__(self, obj):
156 |         super().__init__(obj,
157 |                          get_methods(gridfs, GridFS))
158 | 


--------------------------------------------------------------------------------
/lightflow/models/parameters.py:
--------------------------------------------------------------------------------
  1 | from .exceptions import WorkflowArgumentError
  2 | 
  3 | 
  4 | class Option:
  5 |     """ A single option which is required to run the workflow.
  6 | 
  7 |     The option is checked against the provided arguments to the workflow and,
  8 |     if available, its provided value is stored in the data store for use within
  9 |     the workflow.
 10 |     """
 11 |     def __init__(self, name, default=None, help=None, type=str):
 12 |         """ Initialise the workflow option.
 13 | 
 14 |         Args:
 15 |             name (str): The name of the option under which the value will be stored.
 16 |             default: The default value that should be used when no value is specified.
 17 |                      Set to None to make this a non-optional option.
 18 |             help (str): A short help string for this option.
 19 |             type: The type of the option. Supported types are: str, int, float, bool
 20 |         """
 21 |         self._name = name
 22 |         self._default = default
 23 |         self._help = help
 24 |         self._type = type
 25 | 
 26 |     @property
 27 |     def name(self):
 28 |         """ Returns the name of the option.
 29 | 
 30 |         Returns:
 31 |             str: the name of the option.
 32 |         """
 33 |         return self._name
 34 | 
 35 |     @property
 36 |     def default(self):
 37 |         """ Return the default value of the option.
 38 | 
 39 |         Returns:
 40 |             str: the default value of the option
 41 |         """
 42 |         return self._default
 43 | 
 44 |     @property
 45 |     def type(self):
 46 |         """ Return the type of the option.
 47 | 
 48 |         Returns:
 49 |             type: the type of the option.
 50 |         """
 51 |         return self._type
 52 | 
 53 |     @property
 54 |     def help(self):
 55 |         """ Return the help text of the option.
 56 | 
 57 |         Returns:
 58 |             str: the help text of the option.
 59 |         """
 60 |         return self._help
 61 | 
 62 |     def convert(self, value):
 63 |         """ Convert the specified value to the type of the option.
 64 | 
 65 |         Args:
 66 |             value: The value that should be converted.
 67 | 
 68 |         Returns:
 69 |             The value with the type given by the option.
 70 |         """
 71 |         if self._type is str:
 72 |             return str(value)
 73 |         elif self._type is int:
 74 |             try:
 75 |                 return int(value)
 76 |             except (UnicodeError, ValueError):
 77 |                 raise WorkflowArgumentError('Cannot convert {} to int'.format(value))
 78 |         elif self._type is float:
 79 |             try:
 80 |                 return float(value)
 81 |             except (UnicodeError, ValueError):
 82 |                 raise WorkflowArgumentError('Cannot convert {} to float'.format(value))
 83 |         elif self._type is bool:
 84 |             if isinstance(value, bool):
 85 |                 return bool(value)
 86 |             value = value.lower()
 87 |             if value in ('true', '1', 'yes', 'y'):
 88 |                 return True
 89 |             elif value in ('false', '0', 'no', 'n'):
 90 |                 return False
 91 |             raise WorkflowArgumentError('Cannot convert {} to bool'.format(value))
 92 |         else:
 93 |             return value
 94 | 
 95 | 
 96 | class Parameters(list):
 97 |     """ A list of options that the workflow requires in order to run. """
 98 | 
 99 |     def check_missing(self, args):
100 |         """ Returns the names of all options that are required but were not specified.
101 | 
102 |         All options that don't have a default value are required in order to run the
103 |         workflow.
104 | 
105 |         Args:
106 |             args (dict): A dictionary of the provided arguments that is checked for
107 |                          missing options.
108 | 
109 |         Returns:
110 |             list: A list with the names of the options that are missing from the
111 |                   provided arguments.
112 |         """
113 |         return [opt.name for opt in self
114 |                 if (opt.name not in args) and (opt.default is None)]
115 | 
116 |     def consolidate(self, args):
117 |         """ Consolidate the provided arguments.
118 | 
119 |         If the provided arguments have matching options, this performs a type conversion.
120 |         For any option that has a default value and is not present in the provided
121 |         arguments, the default value is added.
122 | 
123 |         Args:
124 |             args (dict): A dictionary of the provided arguments.
125 | 
126 |         Returns:
127 |             dict: A dictionary with the type converted and with default options enriched
128 |                   arguments.
129 |         """
130 |         result = dict(args)
131 | 
132 |         for opt in self:
133 |             if opt.name in result:
134 |                 result[opt.name] = opt.convert(result[opt.name])
135 |             else:
136 |                 if opt.default is not None:
137 |                     result[opt.name] = opt.convert(opt.default)
138 | 
139 |         return result
140 | 


--------------------------------------------------------------------------------
/lightflow/models/signal.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import uuid
  3 | from time import sleep
  4 | from redis import StrictRedis
  5 | 
  6 | SIGNAL_REDIS_PREFIX = 'lightflow'
  7 | 
  8 | 
  9 | class SignalConnection:
 10 |     """ The connection to the redis signal broker database.
 11 | 
 12 |     Args:
 13 |         host (str): The host of the redis database.
 14 |         port (int): The port of the redis database.
 15 |         database (int): The number of the database.
 16 |         password (str): Optional password for the redis database.
 17 |         auto_connect (bool): Set to True to connect to the redis broker database.
 18 |         polling_time (float): The polling time for signal requests in seconds.
 19 |     """
 20 |     def __init__(self, host, port, database, *, password=None, auto_connect=False,
 21 |                  polling_time=0.5):
 22 |         self._host = host
 23 |         self._port = port
 24 |         self._database = database
 25 |         self._password = password
 26 |         self._polling_time = polling_time
 27 | 
 28 |         self._connection = None
 29 |         if auto_connect:
 30 |             self.connect()
 31 | 
 32 |     @property
 33 |     def is_connected(self):
 34 |         """ Returns the status of the signal connection. """
 35 |         return self._connection is not None
 36 | 
 37 |     @property
 38 |     def connection(self):
 39 |         """ Returns the connection object or None if the connection is not open. """
 40 |         return self._connection
 41 | 
 42 |     @property
 43 |     def polling_time(self):
 44 |         """ Returns the polling time for signal requests in seconds. """
 45 |         return self._polling_time
 46 | 
 47 |     def connect(self):
 48 |         """ Connects to the redis database. """
 49 |         self._connection = StrictRedis(
 50 |             host=self._host,
 51 |             port=self._port,
 52 |             db=self._database,
 53 |             password=self._password)
 54 | 
 55 | 
 56 | class Request:
 57 |     """ The request that is sent from a client to the server.
 58 | 
 59 |     This implements a custom request protocol with:
 60 |         - action: A string representing the requested action that should be
 61 |                   executed by the server.
 62 |         - payload: A dictionary with data that is available to the action.
 63 |                    The content depends on the type of action.
 64 |         - uid: A unique ID that is used to tag the response that follows this request.
 65 |         """
 66 |     def __init__(self, action, *, payload=None):
 67 |         """ Initialise the request object.
 68 | 
 69 |         Args:
 70 |             action (str): A string representing the requested action that should be
 71 |                           executed by the server.
 72 |             payload (dict): A dictionary with data that is available to the action.
 73 |         """
 74 |         self.action = action
 75 |         self.payload = payload if payload is not None else {}
 76 |         self.uid = uuid.uuid4()
 77 | 
 78 | 
 79 | class Response:
 80 |     """ The response that is sent from the server to the client.
 81 | 
 82 |     This implements a custom response protocol with:
 83 |         - success: Specifies whether the request was successful.
 84 |         - payload: A dictionary with response data. The content depends
 85 |                    on the type of response.
 86 |         - uid: A unique ID that matches the id of the initial request.
 87 |     """
 88 |     def __init__(self, success, uid, *, payload=None):
 89 |         """ Initialise the response object.
 90 | 
 91 |         Args:
 92 |             success (bool): True if the request was successful.
 93 |             uid (str): Unique response id.
 94 |             payload (dict): A dictionary with the response data.
 95 |         """
 96 |         self.success = success
 97 |         self.uid = uid
 98 |         self.payload = payload if payload is not None else {}
 99 | 
100 | 
101 | class Server:
102 |     """ The server for the signal system, listening for requests from clients.
103 | 
104 |     This implementation retrieves requests from a list stored in redis. Each request
105 |     is implemented using the Request class and stored as a pickled object. The response
106 |     is stored under a unique response id, so the client can pick up the response.
107 |     """
108 |     def __init__(self, connection, request_key):
109 |         """ Initialises the signal server.
110 | 
111 |         Args:
112 |             connection: Reference to a signal connection object.
113 |             request_key (str): The key under which the list of requests is stored.
114 |         """
115 |         self._connection = connection
116 |         self._request_key = '{}:{}'.format(SIGNAL_REDIS_PREFIX, request_key)
117 | 
118 |     def receive(self):
119 |         """ Returns a single request.
120 | 
121 |         Takes the first request from the list of requests and returns it. If the list
122 |         is empty, None is returned.
123 | 
124 |         Returns:
125 |             Response: If a new request is available a Request object is returned,
126 |                       otherwise None is returned.
127 |         """
128 |         pickled_request = self._connection.connection.lpop(self._request_key)
129 |         return pickle.loads(pickled_request) if pickled_request is not None else None
130 | 
131 |     def send(self, response):
132 |         """ Send a response back to the client that issued a request.
133 | 
134 |         Args:
135 |             response (Response): Reference to the response object that should be sent.
136 |         """
137 |         self._connection.connection.set('{}:{}'.format(SIGNAL_REDIS_PREFIX, response.uid),
138 |                                         pickle.dumps(response))
139 | 
140 |     def restore(self, request):
141 |         """ Push the request back onto the queue.
142 | 
143 |         Args:
144 |             request (Request): Reference to a request object that should be pushed back
145 |                                onto the request queue.
146 |         """
147 |         self._connection.connection.rpush(self._request_key, pickle.dumps(request))
148 | 
149 |     def clear(self):
150 |         """ Deletes the list of requests from the redis database. """
151 |         self._connection.connection.delete(self._request_key)
152 | 
153 | 
154 | class Client:
155 |     """ The client for the signal system, sending requests to the server.
156 | 
157 |     This implementation sends requests to a list stored in redis. Each request
158 |     is implemented using the Request class and stored as a pickled object. The response
159 |     from the server is stored under the unique response id.
160 |     """
161 |     def __init__(self, connection, request_key):
162 |         """ Initialises the signal client.
163 | 
164 |         Args:
165 |             connection: Reference to a signal connection object.
166 |             request_key (str): The key under which the list of requests is stored.
167 |         """
168 |         self._connection = connection
169 |         self._request_key = '{}:{}'.format(SIGNAL_REDIS_PREFIX, request_key)
170 | 
171 |     def send(self, request):
172 |         """ Send a request to the server and wait for its response.
173 | 
174 |         Args:
175 |             request (Request): Reference to a request object that is sent to the server.
176 | 
177 |         Returns:
178 |             Response: The response from the server to the request.
179 |         """
180 |         self._connection.connection.rpush(self._request_key, pickle.dumps(request))
181 |         resp_key = '{}:{}'.format(SIGNAL_REDIS_PREFIX, request.uid)
182 | 
183 |         while True:
184 |             if self._connection.polling_time > 0.0:
185 |                 sleep(self._connection.polling_time)
186 | 
187 |             response_data = self._connection.connection.get(resp_key)
188 |             if response_data is not None:
189 |                 self._connection.connection.delete(resp_key)
190 |                 break
191 | 
192 |         return pickle.loads(response_data)
193 | 


--------------------------------------------------------------------------------
/lightflow/models/task.py:
--------------------------------------------------------------------------------
  1 | from .action import Action
  2 | from .task_data import MultiTaskData
  3 | from .exceptions import TaskReturnActionInvalid, AbortWorkflow, StopTask
  4 | from lightflow.queue import DefaultJobQueueName
  5 | 
  6 | 
  7 | class TaskState:
  8 |     """ Constants for flagging the current state of the task. """
  9 |     Init = 1
 10 |     Waiting = 2
 11 |     Running = 3
 12 |     Completed = 4
 13 |     Stopped = 5
 14 |     Aborted = 6
 15 | 
 16 | 
 17 | class TaskStatus:
 18 |     """ Constants for flagging the status of the task after it completed running. """
 19 |     Success = 1
 20 |     Stopped = 2
 21 |     Aborted = 3
 22 |     Error = 4
 23 | 
 24 | 
 25 | class BaseTask:
 26 |     """ The base class for all tasks.
 27 | 
 28 |     Tasks should inherit from this class and implement the run() method.
 29 |     """
 30 |     def __init__(self, name, *, queue=DefaultJobQueueName.Task,
 31 |                  callback_init=None, callback_finally=None,
 32 |                  force_run=False, propagate_skip=True):
 33 |         """ Initialize the base task.
 34 | 
 35 |         The dag_name and workflow_name attributes are filled at runtime.
 36 | 
 37 |         Args:
 38 |             name (str): The name of the task.
 39 |             queue (str): Name of the queue the task should be scheduled to.
 40 |             callback_init (callable): A callable that is called shortly before the task
 41 |                                       is run. The definition is:
 42 |                                         def (data, store, signal, context)
 43 |                                       where data the task data, store the workflow
 44 |                                       data store, signal the task signal and
 45 |                                       context the task context.
 46 |             callback_finally (callable): A callable that is always called at the end of
 47 |                                          a task, regardless whether it completed
 48 |                                          successfully, was stopped or was aborted.
 49 |                                          The definition is:
 50 |                                            def (status, data, store, signal, context)
 51 |                                          where status specifies whether the task was
 52 |                                            success: TaskStatus.Success
 53 |                                            stopped: TaskStatus.Stopped
 54 |                                            aborted: TaskStatus.Aborted
 55 |                                            raised exception: TaskStatus.Error
 56 |                                          data the task data, store the workflow
 57 |                                          data store, signal the task signal and
 58 |                                          context the task context.
 59 |             force_run (bool): Run the task even if it is flagged to be skipped.
 60 |             propagate_skip (bool): Propagate the skip flag to the next task.
 61 |         """
 62 |         self._name = name
 63 |         self._queue = queue
 64 |         self._callback_init = callback_init
 65 |         self._callback_finally = callback_finally
 66 |         self._force_run = force_run
 67 |         self._propagate_skip = propagate_skip
 68 | 
 69 |         self._skip = False
 70 |         self._state = TaskState.Init
 71 |         self._celery_result = None
 72 | 
 73 |         self.workflow_name = None
 74 |         self.dag_name = None
 75 | 
 76 |     @property
 77 |     def name(self):
 78 |         """ Returns the name of the task. """
 79 |         return self._name
 80 | 
 81 |     @property
 82 |     def queue(self):
 83 |         """ Returns the queue the task should be scheduled to. """
 84 |         return self._queue
 85 | 
 86 |     @property
 87 |     def has_to_run(self):
 88 |         """ Returns whether the task has to run, even if the DAG would skip it. """
 89 |         return self._force_run
 90 | 
 91 |     @property
 92 |     def propagate_skip(self):
 93 |         """ Returns whether the skip flag should be propagated to the successor tasks. """
 94 |         return self._propagate_skip
 95 | 
 96 |     @property
 97 |     def is_waiting(self):
 98 |         """ Internal state: returns whether the task is waiting in the DAG to be run. """
 99 |         return self._state == TaskState.Waiting
100 | 
101 |     @property
102 |     def is_running(self):
103 |         """ Internal state: returns whether the task is currently running. """
104 |         return self._state == TaskState.Running
105 | 
106 |     @property
107 |     def is_completed(self):
108 |         """ Internal state: returns whether the task has completed successfully. """
109 |         return self._state == TaskState.Completed
110 | 
111 |     @property
112 |     def is_stopped(self):
113 |         """ Internal state: returns whether the task was stopped. """
114 |         return self._state == TaskState.Stopped
115 | 
116 |     @property
117 |     def is_aborted(self):
118 |         """ Internal state: returns whether the task was aborted. """
119 |         return self._state == TaskState.Aborted
120 | 
121 |     @property
122 |     def is_skipped(self):
123 |         """ Internal state: returns whether the task was skipped. """
124 |         return self._skip
125 | 
126 |     @is_skipped.setter
127 |     def is_skipped(self, value):
128 |         """ Set whether the task has been skipped.
129 | 
130 |         Args:
131 |             value (bool): Set to True if the tasked was skipped.
132 |         """
133 |         self._skip = value
134 | 
135 |     @property
136 |     def state(self):
137 |         """ Returns the internal state of the task. """
138 |         return self._state
139 | 
140 |     @state.setter
141 |     def state(self, state):
142 |         """ Sets the internal state of the task.
143 | 
144 |         Args:
145 |             state (TaskState): The new state of the task
146 |         """
147 |         self._state = state
148 | 
149 |     @property
150 |     def celery_pending(self):
151 |         """ Celery state: returns whether the task is queued. """
152 |         if self.has_celery_result:
153 |             return self.celery_result.state == "PENDING"
154 |         else:
155 |             return False
156 | 
157 |     @property
158 |     def celery_completed(self):
159 |         """ Celery state: returns whether the execution of the task has completed. """
160 |         if self.has_celery_result:
161 |             return self.celery_result.ready()
162 |         else:
163 |             return False
164 | 
165 |     @property
166 |     def celery_failed(self):
167 |         """ Celery state: returns whether the execution of the task failed. """
168 |         if self.has_celery_result:
169 |             return self.celery_result.failed()
170 |         else:
171 |             return False
172 | 
173 |     @property
174 |     def celery_state(self):
175 |         """ Returns the current celery state of the task as a string. """
176 |         if self.has_celery_result:
177 |             return self.celery_result.state
178 |         else:
179 |             return "NOT_QUEUED"
180 | 
181 |     @property
182 |     def has_celery_result(self):
183 |         """ Returns whether the task has a result from celery.
184 | 
185 |         This indicates that the task is either queued, running or finished.
186 |         """
187 |         return self.celery_result is not None
188 | 
189 |     @property
190 |     def celery_result(self):
191 |         """ Returns the celery result object for this task. """
192 |         return self._celery_result
193 | 
194 |     @celery_result.setter
195 |     def celery_result(self, result):
196 |         """ Sets the celery result object for this task.
197 | 
198 |         Args:
199 |             result (AsyncResult): The result of the celery queuing call.
200 |         """
201 |         self._celery_result = result
202 | 
203 |     def clear_celery_result(self):
204 |         """ Removes the task's celery result from the result backend. """
205 |         if self.has_celery_result:
206 |             self._celery_result.forget()
207 | 
208 |     def _run(self, data, store, signal, context, *,
209 |              success_callback=None, stop_callback=None, abort_callback=None):
210 |         """ The internal run method that decorates the public run method.
211 | 
212 |         This method makes sure data is being passed to and from the task.
213 | 
214 |         Args:
215 |             data (MultiTaskData): The data object that has been passed from the
216 |                                   predecessor task.
217 |             store (DataStoreDocument): The persistent data store object that allows the
218 |                                        task to store data for access across the current
219 |                                        workflow run.
220 |             signal (TaskSignal): The signal object for tasks. It wraps the construction
221 |                                  and sending of signals into easy to use methods.
222 |             context (TaskContext): The context in which the tasks runs.
223 |             success_callback: This function is called when the task completed successfully
224 |             stop_callback: This function is called when a StopTask exception was raised.
225 |             abort_callback: This function is called when an AbortWorkflow exception
226 |                             was raised.
227 | 
228 |         Raises:
229 |             TaskReturnActionInvalid: If the return value of the task is not
230 |                                      an Action object.
231 | 
232 |         Returns:
233 |             Action: An Action object containing the data that should be passed on
234 |                     to the next task and optionally a list of successor tasks that
235 |                     should be executed.
236 |         """
237 |         if data is None:
238 |             data = MultiTaskData()
239 |             data.add_dataset(self._name)
240 | 
241 |         try:
242 |             if self._callback_init is not None:
243 |                 self._callback_init(data, store, signal, context)
244 | 
245 |             result = self.run(data, store, signal, context)
246 | 
247 |             if self._callback_finally is not None:
248 |                 self._callback_finally(TaskStatus.Success, data, store, signal, context)
249 | 
250 |             if success_callback is not None:
251 |                 success_callback()
252 | 
253 |         # the task should be stopped and optionally all successor tasks skipped
254 |         except StopTask as err:
255 |             if self._callback_finally is not None:
256 |                 self._callback_finally(TaskStatus.Stopped, data, store, signal, context)
257 | 
258 |             if stop_callback is not None:
259 |                 stop_callback(exc=err)
260 | 
261 |             result = Action(data, limit=[]) if err.skip_successors else None
262 | 
263 |         # the workflow should be stopped immediately
264 |         except AbortWorkflow as err:
265 |             if self._callback_finally is not None:
266 |                 self._callback_finally(TaskStatus.Aborted, data, store, signal, context)
267 | 
268 |             if abort_callback is not None:
269 |                 abort_callback(exc=err)
270 | 
271 |             result = None
272 |             signal.stop_workflow()
273 | 
274 |         # catch any other exception, call the finally callback, then re-raise
275 |         except:
276 |             if self._callback_finally is not None:
277 |                 self._callback_finally(TaskStatus.Error, data, store, signal, context)
278 | 
279 |             signal.stop_workflow()
280 |             raise
281 | 
282 |         # handle the returned data (either implicitly or as an returned Action object) by
283 |         # flattening all, possibly modified, input datasets in the MultiTask data down to
284 |         # a single output dataset.
285 |         if result is None:
286 |             data.flatten(in_place=True)
287 |             data.add_task_history(self.name)
288 |             return Action(data)
289 |         else:
290 |             if not isinstance(result, Action):
291 |                 raise TaskReturnActionInvalid()
292 | 
293 |             result.data.flatten(in_place=True)
294 |             result.data.add_task_history(self.name)
295 |             return result
296 | 
297 |     def run(self, data, store, signal, context, **kwargs):
298 |         """ The main run method of a task.
299 | 
300 |         Implement this method in inherited classes.
301 | 
302 |         Args:
303 |             data (MultiTaskData): The data object that has been passed from the
304 |                                   predecessor task.
305 |             store (DataStoreDocument): The persistent data store object that allows the
306 |                                        task to store data for access across the current
307 |                                        workflow run.
308 |             signal (TaskSignal): The signal object for tasks. It wraps the construction
309 |                                  and sending of signals into easy to use methods.
310 |             context (TaskContext): The context in which the tasks runs.
311 | 
312 |         Returns:
313 |             Action: An Action object containing the data that should be passed on
314 |                     to the next task and optionally a list of successor tasks that
315 |                     should be executed.
316 |         """
317 |         pass
318 | 


--------------------------------------------------------------------------------
/lightflow/models/task_context.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class TaskContext:
 3 |     """ This class contains information about the context the task is running in. """
 4 | 
 5 |     def __init__(self, task_name, dag_name, workflow_name, workflow_id, worker_hostname):
 6 |         """ Initialize the task context object.
 7 | 
 8 |         Args:
 9 |             task_name (str): The name of the task.
10 |             dag_name (str): The name of the DAG the task was started from.
11 |             workflow_name (str): The name of the workflow the task was started from.
12 |             workflow_id (str): The id of the workflow this task is member of.
13 |             worker_hostname (str): The name of the worker executing this task.
14 |         """
15 |         self.task_name = task_name
16 |         self.dag_name = dag_name
17 |         self.workflow_name = workflow_name
18 |         self.workflow_id = workflow_id
19 |         self.worker_hostname = worker_hostname
20 | 
21 |     def to_dict(self):
22 |         """ Return the task context content as a dictionary. """
23 |         return {
24 |             'task_name': self.task_name,
25 |             'dag_name': self.dag_name,
26 |             'workflow_name': self.workflow_name,
27 |             'workflow_id': self.workflow_id,
28 |             'worker_hostname': self.worker_hostname
29 |         }
30 | 


--------------------------------------------------------------------------------
/lightflow/models/task_data.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | 
  3 | from .exceptions import DataInvalidIndex, DataInvalidAlias
  4 | 
  5 | 
  6 | class TaskData:
  7 |     """ This class represents a single dataset that is passed between tasks.
  8 | 
  9 |     It behaves like a dictionary but also contains a history of all tasks that have
 10 |     contributed to this dataset.
 11 | 
 12 |     Args:
 13 |         data (dict): A dictionary with the initial data that should be stored.
 14 |         task_history (list): A list of task names that have contributed to this data.
 15 |     """
 16 |     def __init__(self, data=None, *, task_history=None):
 17 |         self._data = data if data is not None else {}
 18 |         self._task_history = task_history if task_history is not None else []
 19 | 
 20 |     def add_task_history(self, task_name):
 21 |         """ Add a task name to the list of tasks that have contributed to this dataset.
 22 | 
 23 |         Args:
 24 |             task_name (str): The name of the task that contributed.
 25 |         """
 26 |         self._task_history.append(task_name)
 27 | 
 28 |     @property
 29 |     def data(self):
 30 |         """ Return the data of this dataset. """
 31 |         return self._data
 32 | 
 33 |     @property
 34 |     def task_history(self):
 35 |         """ Return the list of task names that have contributed to this dataset.  """
 36 |         return self._task_history
 37 | 
 38 |     def get(self, key, default=None):
 39 |         """ Access a single value in the dataset by its key
 40 | 
 41 |         Args:
 42 |             key (str): The key under which the value is stored.
 43 |             default: Return this value if the key cannot be found.
 44 | 
 45 |         Returns:
 46 |             object: The value that is stored under the specified key.
 47 |         """
 48 |         return self._data.get(key, default)
 49 | 
 50 |     def set(self, key, value):
 51 |         """ Change the value of a field in the dataset.
 52 | 
 53 |         Args:
 54 |             key (str): The key pointing to the value that should be changed.
 55 |             value: The new value that should be set.
 56 |         """
 57 |         self._data[key] = value
 58 | 
 59 |     def merge(self, dataset):
 60 |         """ Merge the specified dataset on top of the existing data.
 61 | 
 62 |         This replaces all values in the existing dataset with the values from the
 63 |         given dataset.
 64 | 
 65 |         Args:
 66 |             dataset (TaskData): A reference to the TaskData object that should be merged
 67 |                 on top of the existing object.
 68 |         """
 69 |         def merge_data(source, dest):
 70 |             for key, value in source.items():
 71 |                 if isinstance(value, dict):
 72 |                     merge_data(value, dest.setdefault(key, {}))
 73 |                 else:
 74 |                     dest[key] = value
 75 |             return dest
 76 | 
 77 |         merge_data(dataset.data, self._data)
 78 | 
 79 |         for h in dataset.task_history:
 80 |             if h not in self._task_history:
 81 |                 self._task_history.append(h)
 82 | 
 83 |     def __deepcopy__(self, memo):
 84 |         """ Copy the object. """
 85 |         return TaskData(data=deepcopy(self._data, memo),
 86 |                         task_history=self._task_history[:])
 87 | 
 88 |     def __getitem__(self, item):
 89 |         """ Access a single value in the dataset by its key. """
 90 |         return self._data[item]
 91 | 
 92 |     def __setitem__(self, key, value):
 93 |         """ Change the value of a field in the dataset. """
 94 |         self._data[key] = value
 95 | 
 96 |     def __delitem__(self, key):
 97 |         """ Delete a field in the dataset. """
 98 |         del self._data[key]
 99 | 
100 |     def __contains__(self, item):
101 |         """ Checks whether the item is present in the dataset """
102 |         return item in self._data
103 | 
104 |     def __repr__(self):
105 |         """ Return a representation of the object. """
106 |         return '{}({})'.format(self.__class__.__name__, self._data)
107 | 
108 |     def __str__(self):
109 |         """ Return a string of the data. """
110 |         return str(self._data)
111 | 
112 | 
113 | class MultiTaskData:
114 |     """ Manages multiple TaskData datasets and their aliases.
115 | 
116 |     This class implements the data object that is being passed between tasks. It consists
117 |     of one or more TaskData datasets in order to accommodate multiple inputs to a single
118 |     task. Each dataset can be accessed by its index or by one or more aliases. There is
119 |     a default dataset, which is used whenever the user does not specify the exact dataset
120 |     to work with.
121 | 
122 |     Args:
123 |         dataset (TaskData): An initial TaskData dataset.
124 |         aliases (list): A list of aliases for the initial dataset.
125 |     """
126 | 
127 |     def __init__(self, *, dataset=None, aliases=None):
128 |         self._datasets = [] if dataset is None else [dataset]
129 |         self._aliases = {} if aliases is None else {a: 0 for a in aliases}
130 |         self._default_index = 0
131 | 
132 |     @property
133 |     def default_index(self):
134 |         """ Return the index of the default dataset. """
135 |         return self._default_index
136 | 
137 |     @property
138 |     def default_dataset(self):
139 |         """ Return the default dataset.
140 | 
141 |         Returns:
142 |             TaskData: A reference to the default dataset.
143 |         """
144 |         return self.get_by_index(self._default_index)
145 | 
146 |     def add_dataset(self, task_name, dataset=None, *, aliases=None):
147 |         """ Add a new dataset to the MultiTaskData.
148 | 
149 |         Args:
150 |             task_name (str): The name of the task from which the dataset was received.
151 |             dataset (TaskData): The dataset that should be added.
152 |             aliases (list): A list of aliases that should be registered with the dataset.
153 |         """
154 |         self._datasets.append(dataset if dataset is not None else TaskData())
155 |         last_index = len(self._datasets) - 1
156 |         self._aliases[task_name] = last_index
157 | 
158 |         if aliases is not None:
159 |             for alias in aliases:
160 |                 self._aliases[alias] = last_index
161 | 
162 |         if len(self._datasets) == 1:
163 |             self._default_index = 0
164 | 
165 |     def add_alias(self, alias, index):
166 |         """ Add an alias pointing to the specified index.
167 | 
168 |         Args:
169 |             alias (str): The alias that should point to the given index.
170 |             index (int): The index of the dataset for which an alias should be added.
171 | 
172 |         Raises:
173 |             DataInvalidIndex: If the index does not represent a valid dataset.
174 |         """
175 |         if index >= len(self._datasets):
176 |             raise DataInvalidIndex('A dataset with index {} does not exist'.format(index))
177 |         self._aliases[alias] = index
178 | 
179 |     def flatten(self, in_place=True):
180 |         """ Merge all datasets into a single dataset.
181 | 
182 |         The default dataset is the last dataset to be merged, as it is considered to be
183 |         the primary source of information and should overwrite all existing fields with
184 |         the same key.
185 | 
186 |         Args:
187 |             in_place (bool): Set to ``True`` to replace the existing datasets with the
188 |                 merged one. If set to ``False``, will return a new MultiTaskData
189 |                 object containing the merged dataset.
190 | 
191 |         Returns:
192 |             MultiTaskData: If the in_place flag is set to False.
193 |         """
194 |         new_dataset = TaskData()
195 | 
196 |         for i, dataset in enumerate(self._datasets):
197 |             if i != self._default_index:
198 |                 new_dataset.merge(dataset)
199 | 
200 |         new_dataset.merge(self.default_dataset)
201 | 
202 |         # point all aliases to the new, single dataset
203 |         new_aliases = {alias: 0 for alias, _ in self._aliases.items()}
204 | 
205 |         # replace existing datasets or return a new MultiTaskData object
206 |         if in_place:
207 |             self._datasets = [new_dataset]
208 |             self._aliases = new_aliases
209 |             self._default_index = 0
210 |         else:
211 |             return MultiTaskData(dataset=new_dataset, aliases=list(new_aliases.keys()))
212 | 
213 |     def set_default_by_alias(self, alias):
214 |         """ Set the default dataset by its alias.
215 | 
216 |         After changing the default dataset, all calls without explicitly specifying the
217 |         dataset by index or alias will be redirected to this dataset.
218 | 
219 |         Args:
220 |             alias (str): The alias of the dataset that should be made the default.
221 | 
222 |         Raises:
223 |             DataInvalidAlias: If the alias does not represent a valid dataset.
224 |         """
225 |         if alias not in self._aliases:
226 |             raise DataInvalidAlias('A dataset with alias {} does not exist'.format(alias))
227 | 
228 |         self._default_index = self._aliases[alias]
229 | 
230 |     def set_default_by_index(self, index):
231 |         """ Set the default dataset by its index.
232 | 
233 |         After changing the default dataset, all calls without explicitly specifying the
234 |         dataset by index or alias will be redirected to this dataset.
235 | 
236 |         Args:
237 |             index (int): The index of the dataset that should be made the default.
238 | 
239 |         Raises:
240 |             DataInvalidIndex: If the index does not represent a valid dataset.
241 |         """
242 |         if index >= len(self._datasets):
243 |             raise DataInvalidIndex('A dataset with index {} does not exist'.format(index))
244 | 
245 |         self._default_index = index
246 | 
247 |     def get_by_alias(self, alias):
248 |         """ Return a dataset by its alias.
249 | 
250 |         Args:
251 |             alias (str): The alias of the dataset that should be returned.
252 | 
253 |         Raises:
254 |             DataInvalidAlias: If the alias does not represent a valid dataset.
255 |         """
256 |         if alias not in self._aliases:
257 |             raise DataInvalidAlias('A dataset with alias {} does not exist'.format(alias))
258 | 
259 |         return self.get_by_index(self._aliases[alias])
260 | 
261 |     def get_by_index(self, index):
262 |         """ Return a dataset by its index.
263 | 
264 |         Args:
265 |             index (int): The index of the dataset that should be returned.
266 | 
267 |         Raises:
268 |             DataInvalidIndex: If the index does not represent a valid dataset.
269 |         """
270 |         if index >= len(self._datasets):
271 |             raise DataInvalidIndex('A dataset with index {} does not exist'.format(index))
272 | 
273 |         return self._datasets[index]
274 | 
275 |     def add_task_history(self, task_name):
276 |         """ Add a task name to the list of tasks that have contributed to all datasets.
277 | 
278 |         Args:
279 |             task_name (str): The name of the task that contributed.
280 |         """
281 |         for dataset in self._datasets:
282 |             dataset.add_task_history(task_name)
283 | 
284 |     def __getitem__(self, item):
285 |         """ Access a single value in the default dataset by its key. """
286 |         return self.default_dataset[item]
287 | 
288 |     def __setitem__(self, key, value):
289 |         """ Change the value of a field in the default dataset. """
290 |         self.default_dataset[key] = value
291 | 
292 |     def __delitem__(self, key):
293 |         """ Delete a field in the default dataset. """
294 |         del self.default_dataset[key]
295 | 
296 |     def __contains__(self, item):
297 |         """ Checks whether the item is present in the dataset """
298 |         return item in self.default_dataset
299 | 
300 |     def __call__(self, alias):
301 |         """ Shorthand notation for accessing a dataset by its alias. """
302 |         return self.get_by_alias(alias)
303 | 
304 |     def __iter__(self):
305 |         """ Forward iteration requests to the internal list of datasets. """
306 |         return iter(self._datasets)
307 | 


--------------------------------------------------------------------------------
/lightflow/models/task_parameters.py:
--------------------------------------------------------------------------------
  1 | 
  2 | class TaskParameters(dict):
  3 |     """ A class to store a mix of callable and native data type parameters for tasks.
  4 | 
  5 |     A single parameter can either be a callable returning a native data type or the
  6 |     native data type itself. This allows tasks do dynamically change their parameters
  7 |     based on the data flowing into the task or data in the data_store. The structure
  8 |     of the callable has to be either:
  9 | 
 10 |         my_method(data, data_store)
 11 |     or
 12 |         lambda data, data_store:
 13 | 
 14 |     Tasks that implement parameters create an object of the class in their __init__()
 15 |     method and populate it with the tasks attributes. In their run() method tasks then
 16 |     have to call the eval(data, data_store) method in order to evaluate any callables.
 17 |     """
 18 |     def __init__(self, *args, **kwargs):
 19 |         """ Initialise the class by passing any arguments down to the dict base type. """
 20 |         super().__init__(*args, **kwargs)
 21 |         self.update(*args, **kwargs)
 22 | 
 23 |     def __getattr__(self, key):
 24 |         """ Return the parameter value for a key using attribute-style dot notation.
 25 | 
 26 |         Args:
 27 |             key (str): The key that points to the parameter value that should be returned.
 28 | 
 29 |         Returns:
 30 |             str: The parameter value stored under the specified key.
 31 |         """
 32 |         if key in self:
 33 |             return self[key]
 34 |         else:
 35 |             raise AttributeError()
 36 | 
 37 |     def __setattr__(self, key, value):
 38 |         """ Assign a parameter value to a key using attribute-style dot notation.
 39 | 
 40 |         Args:
 41 |             key (str): The key to which the parameter value should be assigned.
 42 |             value: The parameter value that should be assigned to the key.
 43 |         """
 44 |         self[key] = value
 45 | 
 46 |     def __delattr__(self, key):
 47 |         """ Delete a parameter from the dictionary.
 48 | 
 49 |         Args:
 50 |             key (str): The key to the entry that should be deleted.
 51 | 
 52 |         Raise:
 53 |             AttributeError: if the key does not exist.
 54 |         """
 55 |         if key in self:
 56 |             del self[key]
 57 |         else:
 58 |             raise AttributeError()
 59 | 
 60 |     def eval(self, data, data_store, *, exclude=None):
 61 |         """ Return a new object in which callable parameters have been evaluated.
 62 | 
 63 |         Native types are not touched and simply returned, while callable methods are
 64 |         executed and their return value is returned.
 65 | 
 66 |         Args:
 67 |             data (MultiTaskData): The data object that has been passed from the
 68 |                                   predecessor task.
 69 |             data_store (DataStore): The persistent data store object that allows the task
 70 |                                     to store data for access across the current workflow
 71 |                                     run.
 72 |             exclude (list): List of key names as strings that should be excluded from
 73 |                             the evaluation.
 74 | 
 75 |         Returns:
 76 |             TaskParameters: A new TaskParameters object with the callable parameters
 77 |                             replaced by their return value.
 78 |         """
 79 |         exclude = [] if exclude is None else exclude
 80 | 
 81 |         result = {}
 82 |         for key, value in self.items():
 83 |             if key in exclude:
 84 |                 continue
 85 | 
 86 |             if value is not None and callable(value):
 87 |                 result[key] = value(data, data_store)
 88 |             else:
 89 |                 result[key] = value
 90 |         return TaskParameters(result)
 91 | 
 92 |     def eval_single(self, key, data, data_store):
 93 |         """ Evaluate the value of a single parameter taking into account callables .
 94 | 
 95 |         Native types are not touched and simply returned, while callable methods are
 96 |         executed and their return value is returned.
 97 | 
 98 |         Args:
 99 |             key (str): The name of the parameter that should be evaluated.
100 |             data (MultiTaskData): The data object that has been passed from the
101 |                                   predecessor task.
102 |             data_store (DataStore): The persistent data store object that allows the task
103 |                                     to store data for access across the current workflow
104 |                                     run.
105 | 
106 |         """
107 |         if key in self:
108 |             value = self[key]
109 |             if value is not None and callable(value):
110 |                 return value(data, data_store)
111 |             else:
112 |                 return value
113 |         else:
114 |             raise AttributeError()
115 | 


--------------------------------------------------------------------------------
/lightflow/models/task_signal.py:
--------------------------------------------------------------------------------
  1 | from .dag import Dag
  2 | from .signal import Request
  3 | from .task_data import MultiTaskData
  4 | 
  5 | 
  6 | class TaskSignal:
  7 |     """ Class to wrap the construction and sending of signals into easy to use methods."""
  8 |     def __init__(self, client, dag_name):
  9 |         """ Initialise the task signal convenience class.
 10 | 
 11 |         Args:
 12 |             client (Client): A reference to a signal client object.
 13 |             dag_name (str): The name of the dag the task belongs to.
 14 |         """
 15 |         self._client = client
 16 |         self._dag_name = dag_name
 17 | 
 18 |     def start_dag(self, dag, *, data=None):
 19 |         """ Schedule the execution of a dag by sending a signal to the workflow.
 20 | 
 21 |         Args:
 22 |             dag (Dag, str): The dag object or the name of the dag that should be started.
 23 |             data (MultiTaskData): The data that should be passed on to the new dag.
 24 | 
 25 |         Returns:
 26 |             str: The name of the successfully started dag.
 27 |         """
 28 |         return self._client.send(
 29 |             Request(
 30 |                 action='start_dag',
 31 |                 payload={'name': dag.name if isinstance(dag, Dag) else dag,
 32 |                          'data': data if isinstance(data, MultiTaskData) else None}
 33 |             )
 34 |         ).payload['dag_name']
 35 | 
 36 |     def join_dags(self, names=None):
 37 |         """ Wait for the specified dags to terminate.
 38 | 
 39 |         This function blocks until the specified dags terminate. If no dags are specified
 40 |         wait for all dags of the workflow, except the dag of the task calling this signal,
 41 |         to terminate.
 42 | 
 43 |         Args:
 44 |             names (list): The names of the dags that have to terminate.
 45 | 
 46 |         Returns:
 47 |             bool: True if all the signal was sent successfully.
 48 |         """
 49 |         return self._client.send(
 50 |             Request(
 51 |                 action='join_dags',
 52 |                 payload={'names': names}
 53 |             )
 54 |         ).success
 55 | 
 56 |     def stop_dag(self, name=None):
 57 |         """ Send a stop signal to the specified dag or the dag that hosts this task.
 58 | 
 59 |         Args:
 60 |             name str: The name of the dag that should be stopped. If no name is given the
 61 |                       dag that hosts this task is stopped.
 62 | 
 63 |         Upon receiving the stop signal, the dag will not queue any new tasks and wait
 64 |         for running tasks to terminate.
 65 | 
 66 |         Returns:
 67 |             bool: True if the signal was sent successfully.
 68 |         """
 69 |         return self._client.send(
 70 |             Request(
 71 |                 action='stop_dag',
 72 |                 payload={'name': name if name is not None else self._dag_name}
 73 |             )
 74 |         ).success
 75 | 
 76 |     def stop_workflow(self):
 77 |         """ Send a stop signal to the workflow.
 78 | 
 79 |         Upon receiving the stop signal, the workflow will not queue any new dags.
 80 |         Furthermore it will make the stop signal available to the dags, which will
 81 |         then stop queueing new tasks. As soon as all active tasks have finished
 82 |         processing, the workflow will terminate.
 83 | 
 84 |         Returns:
 85 |             bool: True if the signal was sent successfully.
 86 |         """
 87 |         return self._client.send(Request(action='stop_workflow')).success
 88 | 
 89 |     @property
 90 |     def is_stopped(self):
 91 |         """ Check whether the task received a stop signal from the workflow.
 92 | 
 93 |         Tasks can use the stop flag to gracefully terminate their work. This is
 94 |         particularly important for long running tasks and tasks that employ an
 95 |         infinite loop, such as trigger tasks.
 96 | 
 97 |         Returns:
 98 |             bool: True if the task should be stopped.
 99 |         """
100 |         resp = self._client.send(
101 |             Request(
102 |                 action='is_dag_stopped',
103 |                 payload={'dag_name': self._dag_name}
104 |             )
105 |         )
106 |         return resp.payload['is_stopped']
107 | 


--------------------------------------------------------------------------------
/lightflow/models/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def find_indices(lst, element):
 3 |     """ Returns the indices for all occurrences of 'element' in 'lst'.
 4 | 
 5 |     Args:
 6 |         lst (list): List to search.
 7 |         element:  Element to find.
 8 | 
 9 |     Returns:
10 |         list: List of indices or values
11 |     """
12 |     result = []
13 |     offset = -1
14 |     while True:
15 |         try:
16 |             offset = lst.index(element, offset+1)
17 |         except ValueError:
18 |             return result
19 |         result.append(offset)
20 | 


--------------------------------------------------------------------------------
/lightflow/queue/__init__.py:
--------------------------------------------------------------------------------
1 | from .const import JobExecPath, JobStatus, JobType, JobEventName, DefaultJobQueueName
2 | 
3 | __all__ = ['JobExecPath', 'JobStatus', 'JobType', 'JobEventName', 'DefaultJobQueueName']
4 | 


--------------------------------------------------------------------------------
/lightflow/queue/app.py:
--------------------------------------------------------------------------------
 1 | import logging.config
 2 | from kombu import Queue
 3 | from celery import Celery
 4 | from celery.result import AsyncResult
 5 | from celery.signals import setup_logging, task_postrun
 6 | from functools import partial
 7 | 
 8 | from lightflow.queue.const import DefaultJobQueueName
 9 | from lightflow.queue.pickle import patch_celery
10 | from lightflow.models.exceptions import ConfigOverwriteError
11 | 
12 | 
13 | LIGHTFLOW_INCLUDE = ['lightflow.queue.jobs', 'lightflow.models']
14 | 
15 | 
16 | def create_app(config):
17 |     """ Create a fully configured Celery application object.
18 | 
19 |     Args:
20 |         config (Config): A reference to a lightflow configuration object.
21 | 
22 |     Returns:
23 |         Celery: A fully configured Celery application object.
24 |     """
25 | 
26 |     # configure the celery logging system with the lightflow settings
27 |     setup_logging.connect(partial(_initialize_logging, config), weak=False)
28 |     task_postrun.connect(partial(_cleanup_workflow, config), weak=False)
29 | 
30 |     # patch Celery to use cloudpickle instead of pickle for serialisation
31 |     patch_celery()
32 | 
33 |     # create the main celery app and load the configuration
34 |     app = Celery('lightflow')
35 |     app.conf.update(**config.celery)
36 | 
37 |     # overwrite user supplied settings to make sure celery works with lightflow
38 |     app.conf.update(
39 |         task_serializer='pickle',
40 |         accept_content=['pickle'],
41 |         result_serializer='pickle',
42 |         task_default_queue=DefaultJobQueueName.Task
43 |     )
44 | 
45 |     if isinstance(app.conf.include, list):
46 |         app.conf.include.extend(LIGHTFLOW_INCLUDE)
47 |     else:
48 |         if len(app.conf.include) > 0:
49 |             raise ConfigOverwriteError(
50 |                 'The content in the include config will be overwritten')
51 |         app.conf.include = LIGHTFLOW_INCLUDE
52 | 
53 |     return app
54 | 
55 | 
56 | def _initialize_logging(config, **kwargs):
57 |     """ Hook into the logging system of celery.
58 | 
59 |     Connects the local logging system to the celery logging system such that both systems
60 |     can coexist next to each other.
61 | 
62 |     Args:
63 |         config (Config): Reference to the configuration object from which the
64 |                          logging settings are retrieved.
65 |         **kwargs: Keyword arguments from the hook.
66 |     """
67 |     logging.config.dictConfig(config.logging)
68 | 
69 | 
70 | def _cleanup_workflow(config, task_id, args, **kwargs):
71 |     """ Cleanup the results of a workflow when it finished.
72 | 
73 |     Connects to the postrun signal of Celery. If the signal was sent by a workflow,
74 |     remove the result from the result backend.
75 | 
76 |     Args:
77 |         task_id (str): The id of the task.
78 |         args (tuple): The arguments the task was started with.
79 |         **kwargs: Keyword arguments from the hook.
80 |     """
81 |     from lightflow.models import Workflow
82 |     if isinstance(args[0], Workflow):
83 |         if config.celery['result_expires'] == 0:
84 |             AsyncResult(task_id).forget()
85 | 


--------------------------------------------------------------------------------
/lightflow/queue/const.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class JobExecPath:
 4 |     Workflow = 'lightflow.queue.jobs.execute_workflow'
 5 |     Dag = 'lightflow.queue.jobs.execute_dag'
 6 |     Task = 'lightflow.queue.jobs.execute_task'
 7 | 
 8 | 
 9 | class JobStatus:
10 |     Active = 0
11 |     Registered = 1
12 |     Reserved = 2
13 |     Scheduled = 3
14 | 
15 | 
16 | class JobType:
17 |     Workflow = 'workflow'
18 |     Dag = 'dag'
19 |     Task = 'task'
20 | 
21 | 
22 | class JobEventName:
23 |     Started = 'task-lightflow-started'
24 |     Succeeded = 'task-lightflow-succeeded'
25 |     Stopped = 'task-lightflow-stopped'
26 |     Aborted = 'task-lightflow-aborted'
27 | 
28 | 
29 | class DefaultJobQueueName:
30 |     Workflow = 'workflow'
31 |     Dag = 'dag'
32 |     Task = 'task'
33 | 


--------------------------------------------------------------------------------
/lightflow/queue/event.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | from queue import Queue
 3 | 
 4 | from .const import JobEventName
 5 | from .models import JobStartedEvent, JobSucceededEvent, JobStoppedEvent, JobAbortedEvent
 6 | from lightflow.models.exceptions import (EventTypeUnknown, JobEventTypeUnsupported,
 7 |                                          WorkerEventTypeUnsupported)
 8 | 
 9 | 
10 | def event_stream(app, *, filter_by_prefix=None):
11 |     """ Generator function that returns celery events.
12 | 
13 |     This function turns the callback based celery event handling into a generator.
14 | 
15 |     Args:
16 |         app: Reference to a celery application object.
17 |         filter_by_prefix (str): If not None, only allow events that have a type that
18 |                                  starts with this prefix to yield an generator event.
19 | 
20 |     Returns:
21 |         generator: A generator that returns celery events.
22 | 
23 |     """
24 |     q = Queue()
25 | 
26 |     def handle_event(event):
27 |         if filter_by_prefix is None or\
28 |                 (filter_by_prefix is not None and
29 |                  event['type'].startswith(filter_by_prefix)):
30 |             q.put(event)
31 | 
32 |     def receive_events():
33 |         with app.connection() as connection:
34 |             recv = app.events.Receiver(connection, handlers={
35 |                 '*': handle_event
36 |             })
37 | 
38 |             recv.capture(limit=None, timeout=None, wakeup=True)
39 | 
40 |     t = threading.Thread(target=receive_events)
41 |     t.start()
42 | 
43 |     while True:
44 |         yield q.get(block=True)
45 | 
46 | 
47 | def create_event_model(event):
48 |     """ Factory function that turns a celery event into an event object.
49 | 
50 |     Args:
51 |         event (dict): A dictionary that represents a celery event.
52 | 
53 |     Returns:
54 |         object: An event object representing the received event.
55 | 
56 |     Raises:
57 |         JobEventTypeUnsupported: If an unsupported celery job event was received.
58 |         WorkerEventTypeUnsupported: If an unsupported celery worker event was received.
59 |         EventTypeUnknown: If an unknown event type (neither job nor worker) was received.
60 |     """
61 |     if event['type'].startswith('task'):
62 |         factory = {
63 |             JobEventName.Started: JobStartedEvent,
64 |             JobEventName.Succeeded: JobSucceededEvent,
65 |             JobEventName.Stopped: JobStoppedEvent,
66 |             JobEventName.Aborted: JobAbortedEvent
67 |         }
68 |         if event['type'] in factory:
69 |             return factory[event['type']].from_event(event)
70 |         else:
71 |             raise JobEventTypeUnsupported(
72 |                 'Unsupported event type {}'.format(event['type']))
73 |     elif event['type'].startswith('worker'):
74 |         raise WorkerEventTypeUnsupported(
75 |             'Unsupported event type {}'.format(event['type']))
76 |     else:
77 |         raise EventTypeUnknown('Unknown event type {}'.format(event['type']))
78 | 


--------------------------------------------------------------------------------
/lightflow/queue/jobs.py:
--------------------------------------------------------------------------------
  1 | import celery
  2 | from datetime import datetime
  3 | from functools import partial
  4 | 
  5 | from lightflow.logger import get_logger
  6 | from lightflow.models.task_signal import TaskSignal
  7 | from lightflow.models.task_context import TaskContext
  8 | from lightflow.models.dag_signal import DagSignal
  9 | from lightflow.models.datastore import DataStore, DataStoreDocumentSection
 10 | from lightflow.models.signal import Server, Client, SignalConnection
 11 | from .const import JobType, JobEventName
 12 | 
 13 | logger = get_logger(__name__)
 14 | 
 15 | 
 16 | @celery.task(bind=True)
 17 | def execute_workflow(self, workflow, workflow_id=None):
 18 |     """ Celery task (aka job) that runs a workflow on a worker.
 19 | 
 20 |     This celery task starts, manages and monitors the dags that make up a workflow.
 21 | 
 22 |     Args:
 23 |         self (Task): Reference to itself, the celery task object.
 24 |         workflow (Workflow): Reference to the workflow object that is being used to
 25 |                              start, manage and monitor dags.
 26 |         workflow_id (string): If a workflow ID is provided the workflow run will use
 27 |                               this ID, if not a new ID will be auto generated.
 28 |     """
 29 |     start_time = datetime.utcnow()
 30 | 
 31 |     logger.info('Running workflow <{}>'.format(workflow.name))
 32 |     data_store = DataStore(**self.app.user_options['config'].data_store,
 33 |                            auto_connect=True)
 34 | 
 35 |     # create a unique workflow id for this run
 36 |     if data_store.exists(workflow_id):
 37 |         logger.info('Using existing workflow ID: {}'.format(workflow_id))
 38 |     else:
 39 |         workflow_id = data_store.add(payload={
 40 |                                          'name': workflow.name,
 41 |                                          'queue': workflow.queue,
 42 |                                          'start_time': start_time
 43 |                                      })
 44 |         logger.info('Created workflow ID: {}'.format(workflow_id))
 45 | 
 46 |     # send custom celery event that the workflow has been started
 47 |     self.send_event(JobEventName.Started,
 48 |                     job_type=JobType.Workflow,
 49 |                     name=workflow.name,
 50 |                     queue=workflow.queue,
 51 |                     time=start_time,
 52 |                     workflow_id=workflow_id,
 53 |                     duration=None)
 54 | 
 55 |     # create server for inter-task messaging
 56 |     signal_server = Server(SignalConnection(**self.app.user_options['config'].signal,
 57 |                                             auto_connect=True),
 58 |                            request_key=workflow_id)
 59 | 
 60 |     # store job specific meta information wth the job
 61 |     self.update_state(meta={'name': workflow.name,
 62 |                             'type': JobType.Workflow,
 63 |                             'workflow_id': workflow_id,
 64 |                             'queue': workflow.queue,
 65 |                             'start_time': start_time,
 66 |                             'arguments': workflow.provided_arguments})
 67 | 
 68 |     # run the DAGs in the workflow
 69 |     workflow.run(config=self.app.user_options['config'],
 70 |                  data_store=data_store,
 71 |                  signal_server=signal_server,
 72 |                  workflow_id=workflow_id)
 73 | 
 74 |     end_time = datetime.utcnow()
 75 |     duration = (end_time - start_time).total_seconds()
 76 | 
 77 |     # update data store with provenance information
 78 |     store_doc = data_store.get(workflow_id)
 79 |     store_doc.set(key='end_time', value=end_time,
 80 |                   section=DataStoreDocumentSection.Meta)
 81 |     store_doc.set(key='duration', value=duration,
 82 |                   section=DataStoreDocumentSection.Meta)
 83 | 
 84 |     # send custom celery event that the workflow has succeeded
 85 |     event_name = JobEventName.Succeeded if not workflow.is_stopped \
 86 |         else JobEventName.Aborted
 87 | 
 88 |     self.send_event(event_name,
 89 |                     job_type=JobType.Workflow,
 90 |                     name=workflow.name,
 91 |                     queue=workflow.queue,
 92 |                     time=end_time,
 93 |                     workflow_id=workflow_id,
 94 |                     duration=duration)
 95 | 
 96 |     logger.info('Finished workflow <{}>'.format(workflow.name))
 97 | 
 98 | 
 99 | @celery.task(bind=True)
100 | def execute_dag(self, dag, workflow_id, data=None):
101 |     """ Celery task that runs a single dag on a worker.
102 | 
103 |     This celery task starts, manages and monitors the individual tasks of a dag.
104 | 
105 |     Args:
106 |         self (Task): Reference to itself, the celery task object.
107 |         dag (Dag): Reference to a Dag object that is being used to start, manage and
108 |                    monitor tasks.
109 |         workflow_id (string): The unique ID of the workflow run that started this dag.
110 |         data (MultiTaskData): An optional MultiTaskData object that is being passed to
111 |                               the first tasks in the dag. This allows the transfer of
112 |                               data from dag to dag.
113 |     """
114 |     start_time = datetime.utcnow()
115 |     logger.info('Running DAG <{}>'.format(dag.name))
116 | 
117 |     store_doc = DataStore(**self.app.user_options['config'].data_store,
118 |                           auto_connect=True).get(workflow_id)
119 |     store_loc = 'log.{}'.format(dag.name)
120 | 
121 |     # update data store with provenance information
122 |     store_doc.set(key='{}.start_time'.format(store_loc), value=start_time,
123 |                   section=DataStoreDocumentSection.Meta)
124 | 
125 |     # send custom celery event that the dag has been started
126 |     self.send_event(JobEventName.Started,
127 |                     job_type=JobType.Dag,
128 |                     name=dag.name,
129 |                     queue=dag.queue,
130 |                     time=start_time,
131 |                     workflow_id=workflow_id,
132 |                     duration=None)
133 | 
134 |     # store job specific meta information wth the job
135 |     self.update_state(meta={'name': dag.name,
136 |                             'queue': dag.queue,
137 |                             'type': JobType.Dag,
138 |                             'workflow_id': workflow_id})
139 | 
140 |     # run the tasks in the DAG
141 |     signal = DagSignal(Client(SignalConnection(**self.app.user_options['config'].signal,
142 |                                                auto_connect=True),
143 |                               request_key=workflow_id), dag.name)
144 |     dag.run(config=self.app.user_options['config'],
145 |             workflow_id=workflow_id,
146 |             signal=signal,
147 |             data=data)
148 | 
149 |     end_time = datetime.utcnow()
150 |     duration = (end_time - start_time).total_seconds()
151 | 
152 |     # update data store with provenance information
153 |     store_doc.set(key='{}.end_time'.format(store_loc), value=end_time,
154 |                   section=DataStoreDocumentSection.Meta)
155 |     store_doc.set(key='{}.duration'.format(store_loc), value=duration,
156 |                   section=DataStoreDocumentSection.Meta)
157 | 
158 |     # send custom celery event that the dag has succeeded
159 |     event_name = JobEventName.Succeeded if not signal.is_stopped else JobEventName.Aborted
160 |     self.send_event(event_name,
161 |                     job_type=JobType.Dag,
162 |                     name=dag.name,
163 |                     queue=dag.queue,
164 |                     time=end_time,
165 |                     workflow_id=workflow_id,
166 |                     duration=duration)
167 | 
168 |     logger.info('Finished DAG <{}>'.format(dag.name))
169 | 
170 | 
171 | @celery.task(bind=True)
172 | def execute_task(self, task, workflow_id, data=None):
173 |     """ Celery task that runs a single task on a worker.
174 | 
175 |     Args:
176 |         self (Task): Reference to itself, the celery task object.
177 |         task (BaseTask): Reference to the task object that performs the work
178 |                          in its run() method.
179 |         workflow_id (string): The unique ID of the workflow run that started this task.
180 |         data (MultiTaskData): An optional MultiTaskData object that contains the data
181 |                               that has been passed down from upstream tasks.
182 |     """
183 |     start_time = datetime.utcnow()
184 | 
185 |     store_doc = DataStore(**self.app.user_options['config'].data_store,
186 |                           auto_connect=True).get(workflow_id)
187 |     store_loc = 'log.{}.tasks.{}'.format(task.dag_name, task.name)
188 | 
189 |     def handle_callback(message, event_type, exc=None):
190 |         msg = '{}: {}'.format(message, str(exc)) if exc is not None else message
191 | 
192 |         # set the logging level
193 |         if event_type == JobEventName.Stopped:
194 |             logger.warning(msg)
195 |         elif event_type == JobEventName.Aborted:
196 |             logger.error(msg)
197 |         else:
198 |             logger.info(msg)
199 | 
200 |         current_time = datetime.utcnow()
201 | 
202 |         # store provenance information about a task
203 |         if event_type != JobEventName.Started:
204 |             duration = (current_time - start_time).total_seconds()
205 | 
206 |             store_doc.set(key='{}.end_time'.format(store_loc),
207 |                           value=current_time,
208 |                           section=DataStoreDocumentSection.Meta)
209 | 
210 |             store_doc.set(key='{}.duration'.format(store_loc),
211 |                           value=duration,
212 |                           section=DataStoreDocumentSection.Meta)
213 |         else:
214 |             # store provenance information about a task
215 |             store_doc.set(key='{}.start_time'.format(store_loc),
216 |                           value=start_time,
217 |                           section=DataStoreDocumentSection.Meta)
218 | 
219 |             store_doc.set(key='{}.worker'.format(store_loc),
220 |                           value=self.request.hostname,
221 |                           section=DataStoreDocumentSection.Meta)
222 | 
223 |             store_doc.set(key='{}.queue'.format(store_loc),
224 |                           value=task.queue,
225 |                           section=DataStoreDocumentSection.Meta)
226 |             duration = None
227 | 
228 |         # send custom celery event
229 |         self.send_event(event_type,
230 |                         job_type=JobType.Task,
231 |                         name=task.name,
232 |                         queue=task.queue,
233 |                         time=current_time,
234 |                         workflow_id=workflow_id,
235 |                         duration=duration)
236 | 
237 |     # store job specific meta information wth the job
238 |     self.update_state(meta={'name': task.name,
239 |                             'queue': task.queue,
240 |                             'type': JobType.Task,
241 |                             'workflow_id': workflow_id})
242 | 
243 |     # send start celery event
244 |     handle_callback('Start task <{}>'.format(task.name), JobEventName.Started)
245 | 
246 |     # run the task and capture the result
247 |     return task._run(
248 |         data=data,
249 |         store=store_doc,
250 |         signal=TaskSignal(Client(
251 |             SignalConnection(**self.app.user_options['config'].signal, auto_connect=True),
252 |             request_key=workflow_id),
253 |             task.dag_name),
254 |         context=TaskContext(task.name, task.dag_name, task.workflow_name,
255 |                             workflow_id, self.request.hostname),
256 |         success_callback=partial(handle_callback,
257 |                                  message='Complete task <{}>'.format(task.name),
258 |                                  event_type=JobEventName.Succeeded),
259 |         stop_callback=partial(handle_callback,
260 |                               message='Stop task <{}>'.format(task.name),
261 |                               event_type=JobEventName.Stopped),
262 |         abort_callback=partial(handle_callback,
263 |                                message='Abort workflow <{}> by task <{}>'.format(
264 |                                    task.workflow_name, task.name),
265 |                                event_type=JobEventName.Aborted))
266 | 


--------------------------------------------------------------------------------
/lightflow/queue/models.py:
--------------------------------------------------------------------------------
  1 | from celery.result import AsyncResult
  2 | 
  3 | from lightflow.models.exceptions import JobStatInvalid
  4 | 
  5 | 
  6 | class BrokerStats:
  7 |     """ Represents the broker information returned from celery.
  8 | 
  9 |     Args:
 10 |         hostname (str): The broker hostname.
 11 |         port (int): The broker port.
 12 |         transport (str): The transport protocol of the broker.
 13 |         virtual_host (str): The virtual host, e.g. the database number in redis.
 14 |     """
 15 |     def __init__(self, hostname, port, transport, virtual_host):
 16 |         self.hostname = hostname
 17 |         self.port = port
 18 |         self.transport = transport
 19 |         self.virtual_host = virtual_host
 20 | 
 21 |     @classmethod
 22 |     def from_celery(cls, broker_dict):
 23 |         """ Create a BrokerStats object from the dictionary returned by celery.
 24 | 
 25 |         Args:
 26 |             broker_dict (dict): The dictionary as returned by celery.
 27 | 
 28 |         Returns:
 29 |             BrokerStats: A fully initialized BrokerStats object.
 30 |         """
 31 |         return BrokerStats(
 32 |             hostname=broker_dict['hostname'],
 33 |             port=broker_dict['port'],
 34 |             transport=broker_dict['transport'],
 35 |             virtual_host=broker_dict['virtual_host']
 36 |         )
 37 | 
 38 |     def to_dict(self):
 39 |         """ Return a dictionary of the broker stats.
 40 | 
 41 |         Returns:
 42 |             dict: Dictionary of the stats.
 43 |         """
 44 |         return {
 45 |             'hostname': self.hostname,
 46 |             'port': self.port,
 47 |             'transport': self.transport,
 48 |             'virtual_host': self.virtual_host
 49 |         }
 50 | 
 51 | 
 52 | class QueueStats:
 53 |     """ Represents the queue information returned from celery.
 54 | 
 55 |     Args:
 56 |         name (str): The name of the queue.
 57 |         routing_key (str): The routing key of the queue.
 58 |     """
 59 |     def __init__(self, name, routing_key):
 60 |         self.name = name
 61 |         self.routing_key = routing_key
 62 | 
 63 |     @classmethod
 64 |     def from_celery(cls, queue_dict):
 65 |         """ Create a QueueStats object from the dictionary returned by celery.
 66 | 
 67 |         Args:
 68 |             queue_dict (dict): The dictionary as returned by celery.
 69 | 
 70 |         Returns:
 71 |             QueueStats: A fully initialized QueueStats object.
 72 |         """
 73 |         return QueueStats(
 74 |             name=queue_dict['name'],
 75 |             routing_key=queue_dict['routing_key']
 76 |         )
 77 | 
 78 |     def to_dict(self):
 79 |         """ Return a dictionary of the queue stats.
 80 | 
 81 |         Returns:
 82 |             dict: Dictionary of the stats.
 83 |         """
 84 |         return {
 85 |             'name': self.name,
 86 |             'routing_key': self.routing_key
 87 |         }
 88 | 
 89 | 
 90 | class WorkerStats:
 91 |     """ Represents the worker information returned from celery.
 92 | 
 93 |     Args:
 94 |         name (str): The name of the worker.
 95 |         broker (BrokerStats): A reference to a BrokerStats Object the worker is using.
 96 |         pid (int): The PID of the worker.
 97 |         process_pids (int): The PIDs of the concurrent task processes.
 98 |         concurrency (int): The number of concurrent processes.
 99 |         job_count (int): The number of jobs this worker has processed so far.
100 |         queues (list): A list of QueueStats objects that represent the queues this
101 |             worker is listening on.
102 |     """
103 |     def __init__(self, name, broker, pid, process_pids,
104 |                  concurrency, job_count, queues):
105 |         self.name = name
106 |         self.broker = broker
107 |         self.pid = pid
108 |         self.process_pids = process_pids
109 |         self.concurrency = concurrency
110 |         self.job_count = job_count
111 |         self.queues = queues
112 | 
113 |     @classmethod
114 |     def from_celery(cls, name, worker_dict, queues):
115 |         """ Create a WorkerStats object from the dictionary returned by celery.
116 | 
117 |         Args:
118 |             name (str): The name of the worker.
119 |             worker_dict (dict): The dictionary as returned by celery.
120 |             queues (list): A list of QueueStats objects that represent the queues this
121 |                 worker is listening on.
122 | 
123 |         Returns:
124 |             WorkerStats: A fully initialized WorkerStats object.
125 |         """
126 |         return WorkerStats(
127 |             name=name,
128 |             broker=BrokerStats.from_celery(worker_dict['broker']),
129 |             pid=worker_dict['pid'],
130 |             process_pids=worker_dict['pool']['processes'],
131 |             concurrency=worker_dict['pool']['max-concurrency'],
132 |             job_count=worker_dict['pool']['writes']['total'],
133 |             queues=queues
134 |         )
135 | 
136 |     def to_dict(self):
137 |         """ Return a dictionary of the worker stats.
138 | 
139 |         Returns:
140 |             dict: Dictionary of the stats.
141 |         """
142 |         return {
143 |             'name': self.name,
144 |             'broker': self.broker.to_dict(),
145 |             'pid': self.pid,
146 |             'process_pids': self.process_pids,
147 |             'concurrency': self.concurrency,
148 |             'job_count': self.job_count,
149 |             'queues': [q.to_dict() for q in self.queues]
150 |         }
151 | 
152 | 
153 | class JobStats:
154 |     """ Represents the job (=celery task) information returned from celery.
155 | 
156 |     Args:
157 |         name (str): The name of the job.
158 |         job_id (str): The internal ID of the job.
159 |         job_type (str): The type of the job (workflow, dag, task).
160 |         queue (str): The name of the queue the job was scheduled to.
161 |         workflow_id (str): The id of the workflow that started this job.
162 |         start_time (datetime): The time the job was started in UTC.
163 |         arguments (dict): The provided arguments to a workflow.
164 |         acknowledged (bool): True of the job was acknowledged by the message system.
165 |         func_name (str): The name of the function that represents this job.
166 |         hostname (str): The name of the host this job runs on.
167 |         worker_name (str): The name of the worker this job runs on.
168 |         worker_pid (int): The pid of the process this jobs runs on.
169 |         routing_key (str): The routing key for this job.
170 |     """
171 |     def __init__(self, name, job_id, job_type, queue, workflow_id, start_time, arguments,
172 |                  acknowledged, func_name, hostname, worker_name, worker_pid, routing_key):
173 |         self.name = name
174 |         self.id = job_id
175 |         self.type = job_type
176 |         self.workflow_id = workflow_id
177 |         self.queue = queue
178 |         self.start_time = start_time
179 |         self.arguments = arguments
180 |         self.acknowledged = acknowledged
181 |         self.func_name = func_name
182 |         self.hostname = hostname
183 |         self.worker_name = worker_name
184 |         self.worker_pid = worker_pid
185 |         self.routing_key = routing_key
186 | 
187 |     @classmethod
188 |     def from_celery(cls, worker_name, job_dict, celery_app):
189 |         """ Create a JobStats object from the dictionary returned by celery.
190 | 
191 |         Args:
192 |             worker_name (str): The name of the worker this jobs runs on.
193 |             job_dict (dict): The dictionary as returned by celery.
194 |             celery_app: Reference to a celery application object.
195 | 
196 |         Returns:
197 |             JobStats: A fully initialized JobStats object.
198 |         """
199 |         if not isinstance(job_dict, dict) or 'id' not in job_dict:
200 |             raise JobStatInvalid('The job description is missing important fields.')
201 | 
202 |         async_result = AsyncResult(id=job_dict['id'], app=celery_app)
203 |         a_info = async_result.info if isinstance(async_result.info, dict) else None
204 | 
205 |         return JobStats(
206 |             name=a_info.get('name', '') if a_info is not None else '',
207 |             job_id=job_dict['id'],
208 |             job_type=a_info.get('type', '') if a_info is not None else '',
209 |             workflow_id=a_info.get('workflow_id', '') if a_info is not None else '',
210 |             queue=a_info.get('queue', '') if a_info is not None else '',
211 |             start_time=a_info.get('start_time', None) if a_info is not None else None,
212 |             arguments=a_info.get('arguments', {}) if a_info is not None else {},
213 |             acknowledged=job_dict['acknowledged'],
214 |             func_name=job_dict['type'],
215 |             hostname=job_dict['hostname'],
216 |             worker_name=worker_name,
217 |             worker_pid=job_dict['worker_pid'],
218 |             routing_key=job_dict['delivery_info']['routing_key']
219 |         )
220 | 
221 |     def to_dict(self):
222 |         """ Return a dictionary of the job stats.
223 | 
224 |         Returns:
225 |             dict: Dictionary of the stats.
226 |         """
227 |         return {
228 |             'name': self.name,
229 |             'id': self.id,
230 |             'type': self.type,
231 |             'workflow_id': self.workflow_id,
232 |             'queue': self.queue,
233 |             'start_time': self.start_time,
234 |             'arguments': self.arguments,
235 |             'acknowledged': self.acknowledged,
236 |             'func_name': self.func_name,
237 |             'hostname': self.hostname,
238 |             'worker_name': self.worker_name,
239 |             'worker_pid': self.worker_pid,
240 |             'routing_key': self.routing_key
241 |         }
242 | 
243 | 
244 | class JobEvent:
245 |     """ The base class for job events from celery.
246 | 
247 |     Args:
248 |         uuid (str): The internal event id.
249 |         job_type (str): The type of job that caused this event (workflow, dag, task).
250 |         event_type (str): The internal event type name.
251 |         queue (str): The name of the queue the job was scheduled to.
252 |         hostname (str): The name of the host on which the job is running.
253 |         pid (int): The pid of the process that runs the job.
254 |         name (str): The name of the workflow, dag or task that caused this event.
255 |         workflow_id (str): The id of the workflow that hosts this job.
256 |         event_time (datetime): The time when the event was triggered.
257 |         duration (float, None): The duration it took to execute the job.
258 |     """
259 |     def __init__(self, uuid, job_type, event_type, queue, hostname, pid,
260 |                  name, workflow_id, event_time, duration):
261 |         self.uuid = uuid
262 |         self.type = job_type
263 |         self.event = event_type
264 |         self.queue = queue
265 |         self.hostname = hostname
266 |         self.pid = pid
267 |         self.name = name
268 |         self.workflow_id = workflow_id
269 |         self.event_time = event_time
270 |         self.duration = duration
271 | 
272 |     @classmethod
273 |     def from_event(cls, event):
274 |         """ Create a JobEvent object from the event dictionary returned by celery.
275 | 
276 |         Args:
277 |             event (dict): The dictionary as returned by celery.
278 | 
279 |         Returns:
280 |             JobEvent: A fully initialized JobEvent object.
281 |         """
282 |         return cls(
283 |             uuid=event['uuid'],
284 |             job_type=event['job_type'],
285 |             event_type=event['type'],
286 |             queue=event['queue'],
287 |             hostname=event['hostname'],
288 |             pid=event['pid'],
289 |             name=event['name'],
290 |             workflow_id=event['workflow_id'],
291 |             event_time=event['time'],
292 |             duration=event['duration']
293 |         )
294 | 
295 | 
296 | class JobStartedEvent(JobEvent):
297 |     """ This event is triggered when a new job starts running. """
298 |     def __init__(self, uuid, job_type, event_type, queue, hostname, pid,
299 |                  name, workflow_id, event_time, duration):
300 |         super().__init__(uuid, job_type, event_type, queue, hostname, pid,
301 |                          name, workflow_id, event_time, duration)
302 | 
303 | 
304 | class JobSucceededEvent(JobEvent):
305 |     """ This event is triggered when a job completed successfully. """
306 |     def __init__(self, uuid, job_type, event_type, queue, hostname, pid,
307 |                  name, workflow_id, event_time, duration):
308 |         super().__init__(uuid, job_type, event_type, queue, hostname, pid,
309 |                          name, workflow_id, event_time, duration)
310 | 
311 | 
312 | class JobStoppedEvent(JobEvent):
313 |     """ This event is triggered when a job was stopped. """
314 |     def __init__(self, uuid, job_type, event_type, queue, hostname, pid,
315 |                  name, workflow_id, event_time, duration):
316 |         super().__init__(uuid, job_type, event_type, queue, hostname, pid,
317 |                          name, workflow_id, event_time, duration)
318 | 
319 | 
320 | class JobAbortedEvent(JobEvent):
321 |     """ This event is triggered when a job was aborted. """
322 |     def __init__(self, uuid, job_type, event_type, queue, hostname, pid,
323 |                  name, workflow_id, event_time, duration):
324 |         super().__init__(uuid, job_type, event_type, queue, hostname, pid,
325 |                          name, workflow_id, event_time, duration)
326 | 


--------------------------------------------------------------------------------
/lightflow/queue/pickle.py:
--------------------------------------------------------------------------------
 1 | """Patch Celery to use cloudpickle instead of pickle.
 2 | 
 3 | This file is based on the file '_patch_celery.py' of the cesium project.
 4 | Copyright (C) 2016, the cesium team.
 5 | 
 6 | The project can be found at: https://github.com/cesium-ml/cesium
 7 | """
 8 | import cloudpickle
 9 | import kombu.serialization as serialization
10 | from io import BytesIO
11 | 
12 | 
13 | def cloudpickle_loads(s, load=cloudpickle.load):
14 |     """ Decode the byte stream into Python objects using cloudpickle. """
15 |     return load(BytesIO(s))
16 | 
17 | 
18 | def cloudpickle_dumps(obj, dumper=cloudpickle.dumps):
19 |     """ Encode Python objects into a byte stream using cloudpickle. """
20 |     return dumper(obj, protocol=serialization.pickle_protocol)
21 | 
22 | 
23 | def patch_celery():
24 |     """ Monkey patch Celery to use cloudpickle instead of pickle. """
25 |     registry = serialization.registry
26 |     serialization.pickle = cloudpickle
27 |     registry.unregister('pickle')
28 |     registry.register('pickle', cloudpickle_dumps, cloudpickle_loads,
29 |                       content_type='application/x-python-serialize',
30 |                       content_encoding='binary')
31 | 
32 |     import celery.worker as worker
33 |     import celery.concurrency.asynpool as asynpool
34 |     worker.state.pickle = cloudpickle
35 |     asynpool._pickle = cloudpickle
36 | 
37 |     import billiard.common
38 |     billiard.common.pickle = cloudpickle
39 |     billiard.common.pickle_dumps = cloudpickle_dumps
40 |     billiard.common.pickle_loads = cloudpickle_loads
41 | 


--------------------------------------------------------------------------------
/lightflow/queue/worker.py:
--------------------------------------------------------------------------------
 1 | from celery.result import AsyncResult
 2 | from celery.bootsteps import StartStopStep
 3 | 
 4 | from lightflow.models.signal import Client, Request, SignalConnection
 5 | 
 6 | 
 7 | class WorkerLifecycle(StartStopStep):
 8 |     """ Class that manages the lifecycle of a worker. """
 9 | 
10 |     def stop(self, consumer):
11 |         """ This function is called when the worker received a request to terminate.
12 | 
13 |         Upon the termination of the worker, the workflows for all running jobs are
14 |         stopped gracefully.
15 | 
16 |         Args:
17 |             consumer (Consumer): Reference to the consumer object that handles messages
18 |                                  from the broker.
19 |         """
20 |         stopped_workflows = []
21 |         for request in [r for r in consumer.controller.state.active_requests]:
22 |             job = AsyncResult(request.id)
23 | 
24 |             workflow_id = job.result['workflow_id']
25 |             if workflow_id not in stopped_workflows:
26 |                 client = Client(
27 |                     SignalConnection(**consumer.app.user_options['config'].signal,
28 |                                      auto_connect=True),
29 |                     request_key=workflow_id)
30 |                 client.send(Request(action='stop_workflow'))
31 | 
32 |                 stopped_workflows.append(workflow_id)
33 | 


--------------------------------------------------------------------------------
/lightflow/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AustralianSynchrotron/lightflow/dc53dbc1d961e20fb144273baca258060705c03e/lightflow/scripts/__init__.py


--------------------------------------------------------------------------------
/lightflow/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | from .bash_task import BashTask
2 | from .python_task import PythonTask
3 | 
4 | 
5 | __all__ = ['BashTask', 'PythonTask']
6 | 


--------------------------------------------------------------------------------
/lightflow/tasks/python_task.py:
--------------------------------------------------------------------------------
  1 | from lightflow.models import BaseTask, Action
  2 | from lightflow.queue import DefaultJobQueueName
  3 | 
  4 | 
  5 | class PythonTask(BaseTask):
  6 |     """ The Python task executes a user-defined python method.
  7 | 
  8 |     Args:
  9 |         name (str): The name of the task.
 10 |         callback (callable): A reference to the Python method that should be called by
 11 |             the task as soon as it is run. It has to have the following definition::
 12 | 
 13 |                 (data, store, signal, context) -> None, Action
 14 | 
 15 |             with the parameters:
 16 | 
 17 |                 - **data** (:class:`.MultiTaskData`): The data object that has been passed\
 18 |                     from the predecessor task.
 19 |                 - **store** (:class:`.DataStoreDocument`): The persistent data store object\
 20 |                     that allows the task to store data for access across the current\
 21 |                     workflow run.
 22 |                 - **signal** (*TaskSignal*): The signal object for tasks. It wraps\
 23 |                     the construction and sending of signals into easy to use methods.
 24 |                 - **context** (*TaskContext*): The context in which the tasks runs.
 25 | 
 26 |         queue (str): Name of the queue the task should be scheduled to. Defaults to
 27 |             the general task queue.
 28 |         callback_init (callable): An optional callable that is called shortly
 29 |             before the task is run. The definition is::
 30 | 
 31 |                 (data, store, signal, context) -> None
 32 | 
 33 |             with the parameters:
 34 | 
 35 |                 - **data** (:class:`.MultiTaskData`): The data object that has been passed\
 36 |                     from the predecessor task.
 37 |                 - **store** (:class:`.DataStoreDocument`): The persistent data store object\
 38 |                     that allows the task to store data for access across the current\
 39 |                     workflow run.
 40 |                 - **signal** (*TaskSignal*): The signal object for tasks. It wraps\
 41 |                     the construction and sending of signals into easy to use methods.
 42 |                 - **context** (*TaskContext*): The context in which the tasks runs.
 43 | 
 44 |         callback_finally (callable): An optional callable that is always called
 45 |             at the end of a task, regardless whether it completed successfully,
 46 |             was stopped or was aborted. The definition is::
 47 | 
 48 |                 (status, data, store, signal, context) -> None
 49 | 
 50 |             with the parameters:
 51 | 
 52 |                 - **status** (*TaskStatus*): The current status of the task. It can\
 53 |                     be one of the following:
 54 | 
 55 |                         - ``TaskStatus.Success`` -- task was successful
 56 |                         - ``TaskStatus.Stopped`` -- task was stopped
 57 |                         - ``TaskStatus.Aborted`` -- task was aborted
 58 |                         - ``TaskStatus.Error`` -- task raised an exception
 59 | 
 60 |                 - **data** (:class:`.MultiTaskData`): The data object that has been passed\
 61 |                     from the predecessor task.
 62 |                 - **store** (:class:`.DataStoreDocument`): The persistent data store object\
 63 |                     that allows the task to store data for access across the current\
 64 |                     workflow run.
 65 |                 - **signal** (*TaskSignal*): The signal object for tasks. It wraps\
 66 |                     the construction and sending of signals into easy to use methods.
 67 |                 - **context** (*TaskContext*): The context in which the tasks runs.
 68 | 
 69 |         force_run (bool): Run the task even if it is flagged to be skipped.
 70 |         propagate_skip (bool): Propagate the skip flag to the next task.
 71 |     """
 72 |     def __init__(self, name, callback=None, *, queue=DefaultJobQueueName.Task,
 73 |                  callback_init=None, callback_finally=None,
 74 |                  force_run=False, propagate_skip=True):
 75 |         super().__init__(name, queue=queue,
 76 |                          callback_init=callback_init, callback_finally=callback_finally,
 77 |                          force_run=force_run, propagate_skip=propagate_skip)
 78 |         self._callback = callback
 79 | 
 80 |     def run(self, data, store, signal, context, **kwargs):
 81 |         """ The main run method of the Python task.
 82 | 
 83 |         Args:
 84 |             data (:class:`.MultiTaskData`): The data object that has been passed from the
 85 |                 predecessor task.
 86 |             store (:class:`.DataStoreDocument`): The persistent data store object that allows the
 87 |                 task to store data for access across the current workflow run.
 88 |             signal (TaskSignal): The signal object for tasks. It wraps the construction
 89 |                 and sending of signals into easy to use methods.
 90 |             context (TaskContext): The context in which the tasks runs.
 91 | 
 92 |         Returns:
 93 |             Action: An Action object containing the data that should be passed on
 94 |                 to the next task and optionally a list of successor tasks that
 95 |                 should be executed.
 96 |         """
 97 |         if self._callback is not None:
 98 |             result = self._callback(data, store, signal, context, **kwargs)
 99 |             return result if result is not None else Action(data)
100 | 


--------------------------------------------------------------------------------
/lightflow/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.11.1'
2 | 


--------------------------------------------------------------------------------
/lightflow/workers.py:
--------------------------------------------------------------------------------
 1 | from uuid import uuid4
 2 | 
 3 | from .models.datastore import DataStore
 4 | 
 5 | from .queue.app import create_app
 6 | from .queue.worker import WorkerLifecycle
 7 | from .queue.models import WorkerStats, QueueStats
 8 | 
 9 | 
10 | def start_worker(queues, config, *, name=None, celery_args=None, check_datastore=True):
11 |     """ Start a worker process.
12 | 
13 |     Args:
14 |         queues (list): List of queue names this worker accepts jobs from.
15 |         config (Config): Reference to the configuration object from which the
16 |             settings for the worker are retrieved.
17 |         name (string): Unique name for the worker. The hostname template variables from
18 |             Celery can be used. If not given, a unique name is created.
19 |         celery_args (list): List of additional Celery worker command line arguments.
20 |             Please note that this depends on the version of Celery used and might change.
21 |             Use with caution.
22 |         check_datastore (bool): Set to True to check whether the data store is available
23 |             prior to starting the worker.
24 |     """
25 |     celery_app = create_app(config)
26 | 
27 |     if check_datastore:
28 |         with DataStore(**config.data_store,
29 |                        auto_connect=True, handle_reconnect=False) as ds:
30 |             celery_app.user_options['datastore_info'] = ds.server_info
31 | 
32 |     argv = [
33 |         'worker',
34 |         '-n={}'.format(uuid4() if name is None else name),
35 |         '--queues={}'.format(','.join(queues))
36 |     ]
37 | 
38 |     argv.extend(celery_args or [])
39 | 
40 |     celery_app.steps['consumer'].add(WorkerLifecycle)
41 |     celery_app.user_options['config'] = config
42 |     celery_app.worker_main(argv)
43 | 
44 | 
45 | def stop_worker(config, *, worker_ids=None):
46 |     """ Stop a worker process.
47 | 
48 |     Args:
49 |         config (Config): Reference to the configuration object from which the
50 |             settings for the worker are retrieved.
51 |         worker_ids (list): An optional list of ids for the worker that should be stopped.
52 |     """
53 |     if worker_ids is not None and not isinstance(worker_ids, list):
54 |         worker_ids = [worker_ids]
55 | 
56 |     celery_app = create_app(config)
57 |     celery_app.control.shutdown(destination=worker_ids)
58 | 
59 | 
60 | def list_workers(config, *, filter_by_queues=None):
61 |     """ Return a list of all available workers.
62 | 
63 |     Args:
64 |         config (Config): Reference to the configuration object from which the
65 |             settings are retrieved.
66 |         filter_by_queues (list): Restrict the returned workers to workers that listen to
67 |             at least one of the queue names in this list.
68 | 
69 |     Returns:
70 |         list: A list of WorkerStats objects.
71 |     """
72 |     celery_app = create_app(config)
73 |     worker_stats = celery_app.control.inspect().stats()
74 |     queue_stats = celery_app.control.inspect().active_queues()
75 | 
76 |     if worker_stats is None:
77 |         return []
78 | 
79 |     workers = []
80 |     for name, w_stat in worker_stats.items():
81 |         queues = [QueueStats.from_celery(q_stat) for q_stat in queue_stats[name]]
82 | 
83 |         add_worker = filter_by_queues is None
84 |         if not add_worker:
85 |             for queue in queues:
86 |                 if queue.name in filter_by_queues:
87 |                     add_worker = True
88 |                     break
89 | 
90 |         if add_worker:
91 |             workers.append(WorkerStats.from_celery(name, w_stat, queues))
92 | 
93 |     return workers
94 | 


--------------------------------------------------------------------------------
/lightflow/workflows.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | 
  4 | from .models import Workflow
  5 | from .models.signal import Client, Request, SignalConnection
  6 | from .models.exceptions import (WorkflowImportError,
  7 |                                 JobEventTypeUnsupported, JobStatInvalid,
  8 |                                 DirectedAcyclicGraphInvalid, WorkflowDefinitionError)
  9 | 
 10 | from .queue.app import create_app
 11 | from .queue.models import JobStats
 12 | from .queue.event import event_stream, create_event_model
 13 | from .queue.const import JobExecPath, JobStatus, JobType, DefaultJobQueueName
 14 | 
 15 | 
 16 | def start_workflow(name, config, *, queue=DefaultJobQueueName.Workflow,
 17 |                    clear_data_store=True, store_args=None):
 18 |     """ Start a single workflow by sending it to the workflow queue.
 19 | 
 20 |     Args:
 21 |         name (str): The name of the workflow that should be started. Refers to the
 22 |             name of the workflow file without the .py extension.
 23 |         config (Config): Reference to the configuration object from which the
 24 |             settings for the workflow are retrieved.
 25 |         queue (str): Name of the queue the workflow should be scheduled to.
 26 |         clear_data_store (bool): Remove any documents created during the workflow
 27 |             run in the data store after the run.
 28 |         store_args (dict): Dictionary of additional arguments that are ingested into the
 29 |             data store prior to the execution of the workflow.
 30 |     Returns:
 31 |         str: The ID of the workflow job.
 32 |     Raises:
 33 |         WorkflowArgumentError: If the workflow requires arguments to be set in store_args
 34 |             that were not supplied to the workflow.
 35 |         WorkflowImportError: If the import of the workflow fails.
 36 |     """
 37 |     try:
 38 |         wf = Workflow.from_name(name,
 39 |                                 queue=queue,
 40 |                                 clear_data_store=clear_data_store,
 41 |                                 arguments=store_args)
 42 |     except DirectedAcyclicGraphInvalid as e:
 43 |         raise WorkflowDefinitionError(workflow_name=name,
 44 |                                       graph_name=e.graph_name)
 45 | 
 46 |     celery_app = create_app(config)
 47 |     result = celery_app.send_task(JobExecPath.Workflow,
 48 |                                   args=(wf,), queue=queue, routing_key=queue)
 49 |     return result.id
 50 | 
 51 | 
 52 | def stop_workflow(config, *, names=None):
 53 |     """ Stop one or more workflows.
 54 | 
 55 |     Args:
 56 |         config (Config): Reference to the configuration object from which the
 57 |             settings for the workflow are retrieved.
 58 |         names (list): List of workflow names, workflow ids or workflow job ids for the
 59 |             workflows that should be stopped. If all workflows should be
 60 |             stopped, set it to None.
 61 | 
 62 |     Returns:
 63 |         tuple: A tuple of the workflow jobs that were successfully stopped and the ones
 64 |             that could not be stopped.
 65 |     """
 66 |     jobs = list_jobs(config, filter_by_type=JobType.Workflow)
 67 | 
 68 |     if names is not None:
 69 |         filtered_jobs = []
 70 |         for job in jobs:
 71 |             if (job.id in names) or (job.name in names) or (job.workflow_id in names):
 72 |                 filtered_jobs.append(job)
 73 |     else:
 74 |         filtered_jobs = jobs
 75 | 
 76 |     success = []
 77 |     failed = []
 78 |     for job in filtered_jobs:
 79 |         client = Client(SignalConnection(**config.signal, auto_connect=True),
 80 |                         request_key=job.workflow_id)
 81 | 
 82 |         if client.send(Request(action='stop_workflow')).success:
 83 |             success.append(job)
 84 |         else:
 85 |             failed.append(job)
 86 | 
 87 |     return success, failed
 88 | 
 89 | 
 90 | def list_workflows(config):
 91 |     """ List all available workflows.
 92 | 
 93 |     Returns a list of all workflows that are available from the paths specified
 94 |     in the config. A workflow is defined as a Python file with at least one DAG.
 95 | 
 96 |     Args:
 97 |         config (Config): Reference to the configuration object from which the
 98 |             settings are retrieved.
 99 | 
100 |     Returns:
101 |         list: A list of workflows.
102 |     """
103 |     workflows = []
104 |     for path in config.workflows:
105 |         filenames = glob.glob(os.path.join(os.path.abspath(path), '*.py'))
106 | 
107 |         for filename in filenames:
108 |             module_name = os.path.splitext(os.path.basename(filename))[0]
109 |             workflow = Workflow()
110 |             try:
111 |                 workflow.load(module_name, validate_arguments=False, strict_dag=True)
112 |                 workflows.append(workflow)
113 |             except DirectedAcyclicGraphInvalid as e:
114 |                 raise WorkflowDefinitionError(workflow_name=module_name,
115 |                                               graph_name=e.graph_name)
116 |             except WorkflowImportError:
117 |                 continue
118 | 
119 |     return workflows
120 | 
121 | 
122 | def list_jobs(config, *, status=JobStatus.Active,
123 |               filter_by_type=None, filter_by_worker=None):
124 |     """ Return a list of Celery jobs.
125 | 
126 |     Args:
127 |         config (Config): Reference to the configuration object from which the
128 |             settings are retrieved.
129 |         status (JobStatus): The status of the jobs that should be returned.
130 |         filter_by_type (list): Restrict the returned jobs to the types in this list.
131 |         filter_by_worker (list): Only return jobs that were registered, reserved or are
132 |             running on the workers given in this list of worker names. Using
133 |             this option will increase the performance.
134 | 
135 |     Returns:
136 |         list: A list of JobStats.
137 |     """
138 |     celery_app = create_app(config)
139 | 
140 |     # option to filter by the worker (improves performance)
141 |     if filter_by_worker is not None:
142 |         inspect = celery_app.control.inspect(
143 |             destination=filter_by_worker if isinstance(filter_by_worker, list)
144 |             else [filter_by_worker])
145 |     else:
146 |         inspect = celery_app.control.inspect()
147 | 
148 |     # get active, registered or reserved jobs
149 |     if status == JobStatus.Active:
150 |         job_map = inspect.active()
151 |     elif status == JobStatus.Registered:
152 |         job_map = inspect.registered()
153 |     elif status == JobStatus.Reserved:
154 |         job_map = inspect.reserved()
155 |     elif status == JobStatus.Scheduled:
156 |         job_map = inspect.scheduled()
157 |     else:
158 |         job_map = None
159 | 
160 |     if job_map is None:
161 |         return []
162 | 
163 |     result = []
164 |     for worker_name, jobs in job_map.items():
165 |         for job in jobs:
166 |             try:
167 |                 job_stats = JobStats.from_celery(worker_name, job, celery_app)
168 | 
169 |                 if (filter_by_type is None) or (job_stats.type == filter_by_type):
170 |                     result.append(job_stats)
171 |             except JobStatInvalid:
172 |                 pass
173 | 
174 |     return result
175 | 
176 | 
177 | def events(config):
178 |     """ Return a generator that yields workflow events.
179 | 
180 |     For every workflow event that is sent from celery this generator yields an event
181 |     object.
182 | 
183 |     Args:
184 |         config (Config): Reference to the configuration object from which the
185 |             settings are retrieved.
186 | 
187 |     Returns:
188 |         generator: A generator that returns workflow events.
189 | 
190 |     """
191 |     celery_app = create_app(config)
192 | 
193 |     for event in event_stream(celery_app, filter_by_prefix='task'):
194 |         try:
195 |             yield create_event_model(event)
196 |         except JobEventTypeUnsupported:
197 |             pass
198 | 


--------------------------------------------------------------------------------
/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |     name: lightflow
 3 |     version: {{ GIT_DESCRIBE_TAG }}
 4 | 
 5 | source:
 6 |     path: .
 7 | 
 8 | requirements:
 9 |     build:
10 |         - click
11 |         - python
12 |         - setuptools
13 |         - setuptools_scm
14 |         - celery
15 |         - colorlog
16 |         - networkx
17 |         - pymongo
18 |         - pytz
19 |         - ruamel.yaml
20 |         - cloudpickle
21 |         - redis
22 |         - redis-py
23 | 
24 |     run:
25 |         - python
26 |         - setuptools
27 |         - mongodb
28 |         - redis
29 |         - celery
30 |         - click
31 |         - colorlog
32 |         - networkx
33 |         - pymongo
34 |         - pytz
35 |         - ruamel.yaml
36 |         - cloudpickle
37 |         - redis-py
38 | 
39 | build:
40 |     entry_points:
41 |         - lightflow = lightflow.scripts.cli:cli
42 |     script: python setup.py install
43 |     number: {{ GIT_DESCRIBE_NUMBER }}
44 | 
45 | test:
46 |     requires:
47 |         - pytest
48 |     source_files:
49 |         - tests/*
50 |     commands:
51 |         - pytest
52 | 
53 | about:
54 |     home: https://github.com/AustralianSynchrotron/Lightflow
55 |     licence: BSD-3
56 |     license_file: LICENSE
57 |     summary: A lightweight, distributed workflow system
58 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | flake8
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Lightflow
 3 | ---------
 4 | 
 5 | Lightflow is a lightweight, distributed workflow system.
 6 | 
 7 | It is based on a directed acyclic graph structure, with tasks as nodes and arbitrary data
 8 | flowing between tasks.
 9 | 
10 | """
11 | 
12 | from setuptools import setup, find_packages
13 | import re
14 | 
15 | with open('lightflow/version.py') as file:
16 |     version = re.search(r"__version__ = '(.*)'", file.read()).group(1)
17 | 
18 | setup(
19 |     name='Lightflow',
20 |     version=version,
21 |     description='A lightweight, distributed workflow system',
22 |     long_description=__doc__,
23 |     url='https://github.com/AustralianSynchrotron/Lightflow',
24 | 
25 |     author='The Australian Synchrotron Software Group',
26 |     author_email='python@synchrotron.org.au',
27 | 
28 |     classifiers=[
29 |         'Development Status :: 3 - Alpha',
30 |         'Topic :: Scientific/Engineering',
31 |         'Intended Audience :: Developers',
32 |         'Intended Audience :: Science/Research',
33 |         'Natural Language :: English',
34 |         'Intended Audience :: Developers',
35 |         'Programming Language :: Python :: 3',
36 |         'Programming Language :: Python :: 3.5',
37 |         'Programming Language :: Python :: 3.6',
38 |     ],
39 | 
40 |     packages=find_packages(exclude=['tests']),
41 | 
42 |     install_requires=[
43 |         'celery>=4.2.1',
44 |         'Click>=7.0',
45 |         'colorlog>=4.0.2',
46 |         'networkx>=2.2',
47 |         'pymongo>=3.7.2',
48 |         'pytz>=2018.7',
49 |         'redis>=3.0.1',
50 |         'ruamel.yaml>=0.15.83',
51 |         'cloudpickle>=0.6.1'
52 |     ],
53 | 
54 |     entry_points={
55 |         'console_scripts': [
56 |             'lightflow=lightflow.scripts.cli:cli',
57 |         ],
58 |     },
59 | )
60 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AustralianSynchrotron/lightflow/dc53dbc1d961e20fb144273baca258060705c03e/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import create_autospec
 2 | 
 3 | import pytest
 4 | 
 5 | from lightflow.models.task_data import MultiTaskData
 6 | from lightflow.models.datastore import DataStoreDocument
 7 | from lightflow.models.task_signal import TaskSignal
 8 | from lightflow.models.task_context import TaskContext
 9 | 
10 | 
11 | @pytest.fixture
12 | def data_mock():
13 |     yield create_autospec(MultiTaskData, instance=True)
14 | 
15 | 
16 | @pytest.fixture
17 | def store_mock():
18 |     yield create_autospec(DataStoreDocument, instance=True)
19 | 
20 | 
21 | @pytest.fixture
22 | def signal_mock():
23 |     m = create_autospec(TaskSignal, instance=True)
24 |     m.configure_mock(is_stopped=False)
25 |     yield m
26 | 
27 | 
28 | @pytest.fixture
29 | def context_mock():
30 |     yield create_autospec(TaskContext, instance=True)
31 | 


--------------------------------------------------------------------------------
/tests/fixtures/workflows/dag_present_workflow.py:
--------------------------------------------------------------------------------
1 | """The docstring"""
2 | 
3 | from lightflow.models import Dag
4 | 
5 | 
6 | d = Dag('dag_present')
7 | 


--------------------------------------------------------------------------------
/tests/fixtures/workflows/no_dag_workflow.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AustralianSynchrotron/lightflow/dc53dbc1d961e20fb144273baca258060705c03e/tests/fixtures/workflows/no_dag_workflow.py


--------------------------------------------------------------------------------
/tests/fixtures/workflows/parameters_workflow.py:
--------------------------------------------------------------------------------
 1 | from lightflow.models import Parameters, Option, Dag
 2 | 
 3 | 
 4 | parameters = Parameters([
 5 |     Option('required_arg'),
 6 | ])
 7 | 
 8 | 
 9 | d = Dag('dag')
10 | 


--------------------------------------------------------------------------------
/tests/test_base_task.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import Mock, create_autospec, call
  2 | 
  3 | import pytest  # noqa
  4 | 
  5 | from lightflow.models.task import BaseTask, TaskState, TaskStatus
  6 | from lightflow.queue import DefaultJobQueueName
  7 | from lightflow.models.task_data import MultiTaskData
  8 | from lightflow.models.exceptions import AbortWorkflow, StopTask, TaskReturnActionInvalid
  9 | from lightflow.models.action import Action
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def task():
 14 |     yield BaseTask('task-name')
 15 | 
 16 | 
 17 | class CeleryResultMock:
 18 |     def __init__(self, *, state=None, ready=False, failed=False):
 19 |         self.state = state
 20 |         self._ready = ready
 21 |         self._failed = failed
 22 |         self._forget_called = False
 23 | 
 24 |     def ready(self):
 25 |         return self._ready
 26 | 
 27 |     def failed(self):
 28 |         return self._failed
 29 | 
 30 |     def forget(self):
 31 |         self._forget_called = True
 32 | 
 33 | 
 34 | def test_base_task_properties(task):
 35 |     assert task.name == 'task-name'
 36 |     assert task.state == TaskState.Init
 37 |     assert task.queue == DefaultJobQueueName.Task
 38 |     assert task.has_to_run is False
 39 |     assert task.propagate_skip is True
 40 |     assert task.is_waiting is False
 41 |     assert task.is_running is False
 42 |     assert task.is_completed is False
 43 |     assert task.is_stopped is False
 44 |     assert task.is_aborted is False
 45 |     assert task.is_skipped is False
 46 |     assert task.celery_pending is False
 47 |     assert task.celery_completed is False
 48 |     assert task.celery_failed is False
 49 |     assert task.celery_state == 'NOT_QUEUED'
 50 |     assert task.has_celery_result is False
 51 | 
 52 | 
 53 | def test_base_task_skipped_setter(task):
 54 |     task.is_skipped = True
 55 |     assert task.is_skipped is True
 56 | 
 57 | 
 58 | def test_base_task_state_setter(task):
 59 |     task.state = TaskState.Waiting
 60 |     assert task.state == TaskState.Waiting
 61 | 
 62 | 
 63 | def test_base_task_celery_pending(task):
 64 |     task.celery_result = CeleryResultMock(state='PENDING')
 65 |     assert task.celery_pending is True
 66 | 
 67 | 
 68 | def test_base_task_celery_completed(task):
 69 |     task.celery_result = CeleryResultMock(ready=True)
 70 |     assert task.celery_completed is True
 71 | 
 72 | 
 73 | def test_base_task_celery_failed(task):
 74 |     task.celery_result = CeleryResultMock(failed=True)
 75 |     assert task.celery_failed is True
 76 | 
 77 | 
 78 | def test_base_task_celery_state(task):
 79 |     task.celery_result = CeleryResultMock(state='PENDING')
 80 |     assert task.celery_state == 'PENDING'
 81 | 
 82 | 
 83 | def test_base_task_clear_result(task):
 84 |     celery_result = CeleryResultMock()
 85 |     task.celery_result = celery_result
 86 |     task.clear_celery_result()
 87 |     assert celery_result._forget_called is True
 88 | 
 89 | 
 90 | def test_run_calls_callbacks(data_mock, store_mock, signal_mock, context_mock):
 91 |     init_cb = Mock()
 92 |     finally_cb = Mock()
 93 |     success_cb = Mock()
 94 |     stop_cb = Mock()
 95 |     abort_cb = Mock()
 96 |     task = BaseTask('task-name', callback_init=init_cb, callback_finally=finally_cb)
 97 |     task._run(data_mock, store_mock, signal_mock, context_mock,
 98 |               success_callback=success_cb, stop_callback=stop_cb, abort_callback=abort_cb)
 99 |     assert init_cb.call_args == call(data_mock, store_mock, signal_mock, context_mock)
100 |     assert finally_cb.call_args == call(TaskStatus.Success, data_mock, store_mock, signal_mock, context_mock)
101 |     assert success_cb.called is True
102 |     assert stop_cb.called is False
103 |     assert abort_cb.called is False
104 | 
105 | 
106 | def test_run_calls_callback_finally_on_error(data_mock, store_mock, signal_mock, context_mock):
107 | 
108 |     class FailingTask(BaseTask):
109 |         def run(self, *args, **kwargs):
110 |             raise Exception()
111 | 
112 |     finally_cb = Mock()
113 |     success_cb = Mock()
114 |     stop_cb = Mock()
115 |     abort_cb = Mock()
116 |     task = FailingTask('task-name', callback_finally=finally_cb)
117 |     with pytest.raises(Exception):
118 |         task._run(data_mock, store_mock, signal_mock, context_mock,
119 |                   success_callback=success_cb, stop_callback=stop_cb,
120 |                   abort_callback=abort_cb)
121 |     assert finally_cb.call_args == call(TaskStatus.Error, data_mock, store_mock, signal_mock, context_mock)
122 |     assert success_cb.called is False
123 |     assert stop_cb.called is False
124 |     assert abort_cb.called is False
125 | 
126 | 
127 | def test_run_calls_callback_finally_on_stop_task(data_mock, store_mock, signal_mock, context_mock):
128 | 
129 |     class StoppingTask(BaseTask):
130 |         def run(self, *args, **kwargs):
131 |             raise StopTask()
132 | 
133 |     finally_cb = Mock()
134 |     success_cb = Mock()
135 |     stop_cb = Mock()
136 |     abort_cb = Mock()
137 |     task = StoppingTask('task-name', callback_finally=finally_cb)
138 |     task._run(data_mock, store_mock, signal_mock, context_mock,
139 |               success_callback=success_cb, stop_callback=stop_cb, abort_callback=abort_cb)
140 |     assert finally_cb.call_args == call(TaskStatus.Stopped, data_mock, store_mock, signal_mock, context_mock)
141 |     assert success_cb.called is False
142 |     assert stop_cb.called is True
143 |     assert abort_cb.called is False
144 | 
145 | 
146 | def test_run_calls_callback_finally_on_abort_workflow(data_mock, store_mock, signal_mock, context_mock):
147 | 
148 |     class AbortingTask(BaseTask):
149 |         def run(self, *args, **kwargs):
150 |             raise AbortWorkflow()
151 | 
152 |     finally_cb = Mock()
153 |     success_cb = Mock()
154 |     stop_cb = Mock()
155 |     abort_cb = Mock()
156 |     task = AbortingTask('task-name', callback_finally=finally_cb)
157 |     task._run(data_mock, store_mock, signal_mock, context_mock,
158 |               success_callback=success_cb, stop_callback=stop_cb, abort_callback=abort_cb)
159 |     assert finally_cb.call_args == call(TaskStatus.Aborted, data_mock, store_mock, signal_mock, context_mock)
160 |     assert success_cb.called is False
161 |     assert stop_cb.called is False
162 |     assert abort_cb.called is True
163 | 
164 | 
165 | def test_run_handles_invalid_result(data_mock, store_mock, signal_mock, context_mock):
166 | 
167 |     class InvalidResultTask(BaseTask):
168 |         def run(self, *args, **kwargs):
169 |             return 'whoops'
170 | 
171 |     with pytest.raises(TaskReturnActionInvalid):
172 |         InvalidResultTask('task-name')._run(data_mock, store_mock, signal_mock, context_mock)
173 | 
174 | 
175 | def test_run_handles_action_response(data_mock, store_mock, signal_mock, context_mock):
176 | 
177 |     run_result = Action(create_autospec(MultiTaskData, instance=True))
178 | 
179 |     class Task(BaseTask):
180 |         def run(self, *args, **kwargs):
181 |             return run_result
182 | 
183 |     result = Task('task-name')._run(data_mock, store_mock, signal_mock, context_mock)
184 |     assert result == run_result
185 | 
186 | 
187 | def test_run_handles_no_data(store_mock, signal_mock, context_mock):
188 |     result = BaseTask('task-name')._run(None, store_mock, signal_mock, context_mock)
189 |     assert result.data is not None
190 | 


--------------------------------------------------------------------------------
/tests/test_bash_task.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from unittest.mock import Mock, call, patch
 3 | 
 4 | import pytest  # noqa
 5 | 
 6 | from lightflow.tasks.bash_task import BashTask
 7 | from lightflow.models.exceptions import StopTask, AbortWorkflow
 8 | 
 9 | 
10 | def test_bash_task_executes_command(tmpdir, data_mock, store_mock, signal_mock, context_mock):
11 |     tmp_file_path = Path(str(tmpdir.mkdir('bash-task').join('target.txt')))
12 |     callback = Mock()
13 |     command = 'echo ok > {target_path}'.format(target_path=tmp_file_path)
14 |     task = BashTask('task-name', command, callback_process=callback)
15 |     task.run(data_mock, store_mock, signal_mock, context_mock)
16 |     assert callback.called is True
17 |     assert tmp_file_path.open().read().strip() == 'ok'
18 | 
19 | 
20 | def test_bash_task_handles_sleep(data_mock, store_mock, signal_mock, context_mock):
21 |     supplied_return_code = None
22 | 
23 |     def end_callback(return_code, *args):
24 |         nonlocal supplied_return_code
25 |         supplied_return_code = return_code
26 | 
27 |     task = BashTask('task-name',  'sleep 0.5', callback_end=end_callback)
28 |     task.run(data_mock, store_mock, signal_mock, context_mock)
29 |     assert supplied_return_code == 0
30 | 
31 | 
32 | def test_bash_task_calls_stdout_callback(data_mock, store_mock, signal_mock, context_mock):
33 |     stdout_callback, stderr_callback = Mock(), Mock()
34 |     task = BashTask('task-name',  'echo line1; echo line2', callback_stdout=stdout_callback,
35 |                     callback_stderr=stderr_callback)
36 |     task.run(data_mock, store_mock, signal_mock, context_mock)
37 |     assert stdout_callback.call_args_list == [
38 |         call('line1\n', data_mock, store_mock, signal_mock, context_mock),
39 |         call('line2\n', data_mock, store_mock, signal_mock, context_mock),
40 |     ]
41 |     assert stderr_callback.called is False
42 | 
43 | 
44 | def test_bash_task_calls_stderr_callback(data_mock, store_mock, signal_mock, context_mock):
45 |     stdout_callback, stderr_callback = Mock(), Mock()
46 |     task = BashTask('task-name', 'invalid-command-blerg', callback_stdout=stdout_callback,
47 |                     callback_stderr=stderr_callback)
48 |     task.run(data_mock, store_mock, signal_mock, context_mock)
49 |     stderr_callback_args = stderr_callback.call_args[0]
50 |     assert 'not found' in stderr_callback_args[0]
51 |     assert stderr_callback_args[1:] == (data_mock, store_mock, signal_mock, context_mock)
52 |     assert stdout_callback.called is False
53 | 
54 | 
55 | def test_bash_task_captures_io_to_file(data_mock, store_mock, signal_mock, context_mock):
56 | 
57 |     supplied_return_code = None
58 |     stdout_file_contents = None
59 |     stderr_file_contents = None
60 | 
61 |     def end_callback(return_code, stdout_file, stderr_file, data, store, signal, context):
62 |         nonlocal supplied_return_code, stdout_file_contents, stderr_file_contents
63 |         supplied_return_code = return_code
64 |         stdout_file_contents = stdout_file.read()
65 |         stderr_file_contents = stderr_file.read()
66 | 
67 |     task = BashTask('task-name', 'echo ok; invalid-command-blerg',
68 |                     capture_stdout=True, capture_stderr=True,
69 |                     callback_end=end_callback)
70 |     task.run(data_mock, store_mock, signal_mock, context_mock)
71 |     assert supplied_return_code > 0
72 |     assert stdout_file_contents == b'ok\n'
73 |     assert b'not found' in stderr_file_contents
74 | 
75 | 
76 | @pytest.mark.parametrize('ExceptionType', [StopTask, AbortWorkflow])
77 | @patch('lightflow.tasks.bash_task.Popen')
78 | def test_terminates_the_process_if_stopped_or_aborted(PopenMock, ExceptionType, data_mock, store_mock,
79 |                                                       signal_mock, context_mock):
80 | 
81 |     def process_callback(*args):
82 |         raise ExceptionType()
83 | 
84 |     task = BashTask('task-name', 'echo ok', callback_process=process_callback)
85 |     with pytest.raises(ExceptionType):
86 |         task.run(data_mock, store_mock, signal_mock, context_mock)
87 |     assert PopenMock.return_value.terminate.called is True
88 | 


--------------------------------------------------------------------------------
/tests/test_exceptions.py:
--------------------------------------------------------------------------------
1 | from lightflow.models.exceptions import LightflowException
2 | 
3 | 
4 | def test_exception_str_and_repr():
5 |     exc = LightflowException(message='the-message')
6 |     assert 'the-message' in str(exc)
7 |     assert 'the-message' in repr(exc)
8 | 


--------------------------------------------------------------------------------
/tests/test_task_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AustralianSynchrotron/lightflow/dc53dbc1d961e20fb144273baca258060705c03e/tests/test_task_data.py


--------------------------------------------------------------------------------
/tests/test_workflow.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | import pytest  # noqa
 5 | 
 6 | from lightflow.models.workflow import Workflow
 7 | from lightflow.models.exceptions import WorkflowImportError, WorkflowArgumentError
 8 | 
 9 | 
10 | @pytest.fixture(autouse=True)
11 | def add_workflow_path():
12 |     path = str(Path(__file__).parent / 'fixtures/workflows')
13 |     sys.path.append(path)
14 |     yield
15 |     sys.path.remove(path)
16 | 
17 | 
18 | def test_load_workflow_for_missing_name():
19 |     with pytest.raises(WorkflowImportError):
20 |         Workflow().load('invalid_name_workflow')
21 | 
22 | 
23 | def test_load_workflow_with_no_dag():
24 |     Workflow().load('no_dag_workflow', strict_dag=False)
25 |     with pytest.raises(WorkflowImportError):
26 |         Workflow().load('no_dag_workflow', strict_dag=True)
27 | 
28 | 
29 | def test_load_workflow_with_dag():
30 |     wf = Workflow()
31 |     wf.load('dag_present_workflow', strict_dag=True)
32 |     assert wf.name == 'dag_present_workflow'
33 |     assert wf.docstring == 'The docstring'
34 |     assert len(wf.parameters) == 0
35 | 
36 | 
37 | def test_load_workflow_with_no_arguments():
38 |     with pytest.raises(WorkflowArgumentError):
39 |         Workflow().load('parameters_workflow')
40 | 
41 | 
42 | def test_load_workflow_with_missing_arguments():
43 |     with pytest.raises(WorkflowArgumentError):
44 |         Workflow().load('parameters_workflow', arguments={})
45 | 
46 | 
47 | def test_load_workflow_with_all_arguments():
48 |     wf = Workflow()
49 |     wf.load('parameters_workflow', arguments={'required_arg': 'ok'})
50 |     assert wf.parameters[0].name == 'required_arg'
51 | 
52 | 
53 | def test_load_workflow_when_validate_arguments_is_false():
54 |     Workflow().load('parameters_workflow', validate_arguments=False)
55 | 
56 | 
57 | def test_workflow_from_name_constructor():
58 |     wf = Workflow.from_name('parameters_workflow', arguments={'required_arg': 'ok'})
59 |     assert wf.parameters[0].name == 'required_arg'
60 | 


--------------------------------------------------------------------------------
/tests/test_workflows.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from lightflow.workflows import list_workflows
 4 | from lightflow.config import Config
 5 | 
 6 | 
 7 | def test_list_workflows_when_no_workflow_dirs_in_config():
 8 |     config = Config()
 9 |     config.load_from_dict({'workflows': []})
10 |     assert list_workflows(config) == []
11 | 
12 | 
13 | def test_list_workflows_handles_missing_parameters():
14 |     config = Config()
15 |     workflows_path = str(Path(__file__).parent / 'fixtures/workflows')
16 |     config.load_from_dict({'workflows': [workflows_path]})
17 |     assert 'parameters_workflow' in {wf.name for wf in list_workflows(config)}
18 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py36, flake, coverage
 3 | 
 4 | [testenv]
 5 | deps = -rrequirements-dev.txt
 6 | commands = pytest
 7 | 
 8 | [testenv:flake]
 9 | commands = flake8
10 | 
11 | [testenv:coverage]
12 | commands = pytest --cov=lightflow --cov-report html
13 | 
14 | [flake8]
15 | max-line-length = 120
16 | 
17 | [pytest]
18 | addopts = --strict
19 | markers =
20 |   wip: Work in progress
21 | 


--------------------------------------------------------------------------------