├── requirements.txt ├── docs ├── pyfi16.ico ├── ui-design.odp ├── .vscode │ └── settings.json ├── images │ ├── favicon.ico │ ├── favicon.xcf │ ├── favicon2.ico │ ├── flow.svg │ ├── pyfi-white.svg │ └── pyfi-mono.svg ├── problem-solution.odp ├── api │ ├── REST │ │ └── index.rst │ ├── CLI │ │ └── index.rst │ ├── python │ │ ├── index.rst │ │ ├── lambda │ │ │ └── index.rst │ │ ├── objects │ │ │ └── index.rst │ │ └── decorators │ │ │ └── index.rst │ ├── index.rst │ └── ORM │ │ └── index.rst ├── discord │ └── index.rst ├── install │ └── index.rst ├── servers │ ├── api │ │ └── index.rst │ ├── web │ │ └── index.rst │ └── index.rst ├── overview │ ├── images │ │ ├── platform1.png │ │ ├── scheduler1.png │ │ ├── scheduler2.png │ │ ├── architecture1.png │ │ ├── architecture2.png │ │ ├── architecture3.png │ │ └── architecture4.png │ └── index.rst ├── tutorials │ ├── examples │ │ └── index.rst │ └── index.rst ├── database │ ├── index.rst │ └── datamodel │ │ └── index.rst ├── stack │ ├── containers │ │ ├── nginx │ │ │ └── index.rst │ │ ├── docker │ │ │ └── index.rst │ │ ├── flower │ │ │ └── index.rst │ │ ├── kibana │ │ │ └── index.rst │ │ ├── pgadmin │ │ │ └── index.rst │ │ ├── redis │ │ │ └── index.rst │ │ ├── elasticsearch │ │ │ └── index.rst │ │ ├── insights │ │ │ └── index.rst │ │ ├── portainer │ │ │ └── index.rst │ │ ├── postgres │ │ │ └── index.rst │ │ ├── rabbitmq │ │ │ └── index.rst │ │ └── index.rst │ └── index.rst ├── _static │ └── css │ │ └── override.css ├── CLI │ ├── index.rst │ └── examples │ │ └── index.rst ├── ui │ └── index.rst ├── Makefile ├── make.bat ├── index.rst ├── usecases │ └── index.rst ├── architecture │ └── index.rst ├── dataflows │ └── index.rst ├── conf.py ├── designgoals │ └── index.rst └── quickstart │ └── index.rst ├── screens ├── flower.png ├── kibana.png ├── layer1.png ├── layer2.png ├── layer3.png ├── layer4.png ├── pyfi1.png ├── pyfi2.png ├── pyfi3.png ├── pyfi4.png ├── pyfi5.png ├── pyfi6.png ├── pyfi7.png ├── pyfi8.png ├── redis.png ├── amplify.png ├── execution.png ├── pgadmin.png ├── portainer.png ├── rabbitmq.png ├── scaling.png ├── screen14.png ├── screen15.png ├── screen16.png ├── socket1.png ├── socket2.png ├── techstack.png ├── architecture1.png ├── architecture3.png ├── pyfi-data-model.png └── pyfi.svg └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-click 2 | -------------------------------------------------------------------------------- /docs/pyfi16.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/pyfi16.ico -------------------------------------------------------------------------------- /docs/ui-design.odp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/ui-design.odp -------------------------------------------------------------------------------- /screens/flower.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/flower.png -------------------------------------------------------------------------------- /screens/kibana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/kibana.png -------------------------------------------------------------------------------- /screens/layer1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/layer1.png -------------------------------------------------------------------------------- /screens/layer2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/layer2.png -------------------------------------------------------------------------------- /screens/layer3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/layer3.png -------------------------------------------------------------------------------- /screens/layer4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/layer4.png -------------------------------------------------------------------------------- /screens/pyfi1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/pyfi1.png -------------------------------------------------------------------------------- /screens/pyfi2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/pyfi2.png -------------------------------------------------------------------------------- /screens/pyfi3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/pyfi3.png -------------------------------------------------------------------------------- /screens/pyfi4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/pyfi4.png -------------------------------------------------------------------------------- /screens/pyfi5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/pyfi5.png -------------------------------------------------------------------------------- /screens/pyfi6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/pyfi6.png -------------------------------------------------------------------------------- /screens/pyfi7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/pyfi7.png -------------------------------------------------------------------------------- /screens/pyfi8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/pyfi8.png -------------------------------------------------------------------------------- /screens/redis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/redis.png -------------------------------------------------------------------------------- /docs/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "restructuredtext.confPath": "${workspaceFolder}" 3 | } -------------------------------------------------------------------------------- /screens/amplify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/amplify.png -------------------------------------------------------------------------------- /screens/execution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/execution.png -------------------------------------------------------------------------------- /screens/pgadmin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/pgadmin.png -------------------------------------------------------------------------------- /screens/portainer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/portainer.png -------------------------------------------------------------------------------- /screens/rabbitmq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/rabbitmq.png -------------------------------------------------------------------------------- /screens/scaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/scaling.png -------------------------------------------------------------------------------- /screens/screen14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/screen14.png -------------------------------------------------------------------------------- /screens/screen15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/screen15.png -------------------------------------------------------------------------------- /screens/screen16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/screen16.png -------------------------------------------------------------------------------- /screens/socket1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/socket1.png -------------------------------------------------------------------------------- /screens/socket2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/socket2.png -------------------------------------------------------------------------------- /screens/techstack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/techstack.png -------------------------------------------------------------------------------- /docs/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/images/favicon.ico -------------------------------------------------------------------------------- /docs/images/favicon.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/images/favicon.xcf -------------------------------------------------------------------------------- /docs/images/favicon2.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/images/favicon2.ico -------------------------------------------------------------------------------- /docs/problem-solution.odp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/problem-solution.odp -------------------------------------------------------------------------------- /screens/architecture1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/architecture1.png -------------------------------------------------------------------------------- /screens/architecture3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/architecture3.png -------------------------------------------------------------------------------- /screens/pyfi-data-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/screens/pyfi-data-model.png -------------------------------------------------------------------------------- /docs/api/REST/index.rst: -------------------------------------------------------------------------------- 1 | 2 | REST 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | -------------------------------------------------------------------------------- /docs/discord/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Discord 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | -------------------------------------------------------------------------------- /docs/install/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Install 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | -------------------------------------------------------------------------------- /docs/servers/api/index.rst: -------------------------------------------------------------------------------- 1 | 2 | API 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | -------------------------------------------------------------------------------- /docs/servers/web/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Web 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | -------------------------------------------------------------------------------- /docs/overview/images/platform1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/overview/images/platform1.png -------------------------------------------------------------------------------- /docs/overview/images/scheduler1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/overview/images/scheduler1.png -------------------------------------------------------------------------------- /docs/overview/images/scheduler2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/overview/images/scheduler2.png -------------------------------------------------------------------------------- /docs/overview/images/architecture1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/overview/images/architecture1.png -------------------------------------------------------------------------------- /docs/overview/images/architecture2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/overview/images/architecture2.png -------------------------------------------------------------------------------- /docs/overview/images/architecture3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/overview/images/architecture3.png -------------------------------------------------------------------------------- /docs/overview/images/architecture4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radiantone/pyfi/HEAD/docs/overview/images/architecture4.png -------------------------------------------------------------------------------- /docs/tutorials/examples/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Examples 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | -------------------------------------------------------------------------------- /docs/database/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Database 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | datamodel/index -------------------------------------------------------------------------------- /docs/stack/containers/nginx/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Nginx 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | -------------------------------------------------------------------------------- /docs/stack/containers/docker/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Docker 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/stack/containers/flower/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Flower 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/stack/containers/kibana/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Kibana 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/stack/containers/pgadmin/index.rst: -------------------------------------------------------------------------------- 1 | 2 | pgAdmin 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/stack/containers/redis/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Redis 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Tutorials 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | examples/index -------------------------------------------------------------------------------- /docs/api/CLI/index.rst: -------------------------------------------------------------------------------- 1 | 2 | CLI 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | See section on :ref:`CLI` 8 | -------------------------------------------------------------------------------- /docs/stack/containers/elasticsearch/index.rst: -------------------------------------------------------------------------------- 1 | 2 | ElasticSearch 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | -------------------------------------------------------------------------------- /docs/stack/containers/insights/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Insights 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/stack/containers/portainer/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Portainer 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/stack/containers/postgres/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Postgres 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/stack/containers/rabbitmq/index.rst: -------------------------------------------------------------------------------- 1 | 2 | RabbitMQ 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/servers/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Servers 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | web/index 8 | api/index 9 | -------------------------------------------------------------------------------- /docs/api/python/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Python 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | decorators/index 8 | objects/index 9 | lambda/index -------------------------------------------------------------------------------- /docs/database/datamodel/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Data Model 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | .. image:: ../../../screens/pyfi-data-model.png -------------------------------------------------------------------------------- /docs/_static/css/override.css: -------------------------------------------------------------------------------- 1 | .wy-nav-content { 2 | max-width: 100% !important; 3 | } 4 | 5 | a:visited { 6 | color: #728e9b; 7 | } 8 | 9 | .icon { 10 | color: #728e9b; 11 | } -------------------------------------------------------------------------------- /docs/api/index.rst: -------------------------------------------------------------------------------- 1 | 2 | API 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | CLI/index 8 | python/index 9 | ORM/index 10 | REST/index 11 | -------------------------------------------------------------------------------- /docs/CLI/index.rst: -------------------------------------------------------------------------------- 1 | .. _CLI: 2 | 3 | CLI 4 | ================================ 5 | 6 | .. click:: pyfi.cli:cli 7 | :prog: flow 8 | :show-nested: 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | 13 | examples/index 14 | -------------------------------------------------------------------------------- /docs/stack/containers/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Containers 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | docker/index 8 | elasticsearch/index 9 | flower/index 10 | insights/index 11 | kibana/index 12 | nginx/index 13 | pgadmin/index 14 | portainer/index 15 | postgres/index 16 | rabbitmq/index 17 | redis/index 18 | -------------------------------------------------------------------------------- /docs/stack/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Stack 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | containers/index 8 | 9 | 10 | .. image:: ../../screens/techstack.png 11 | 12 | .. image:: ../../screens/amplify.png 13 | .. image:: ../../screens/flower.png 14 | .. image:: ../../screens/kibana.png 15 | .. image:: ../../screens/pgadmin.png 16 | .. image:: ../../screens/portainer.png 17 | .. image:: ../../screens/rabbitmq.png 18 | .. image:: ../../screens/redis.png -------------------------------------------------------------------------------- /docs/ui/index.rst: -------------------------------------------------------------------------------- 1 | 2 | UI 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | .. image:: ../../screens/pyfi1.png 8 | .. image:: ../../screens/pyfi2.png 9 | .. image:: ../../screens/pyfi3.png 10 | .. image:: ../../screens/pyfi4.png 11 | .. image:: ../../screens/pyfi5.png 12 | .. image:: ../../screens/pyfi6.png 13 | .. image:: ../../screens/pyfi7.png 14 | .. image:: ../../screens/pyfi8.png 15 | .. image:: ../../screens/screen14.png 16 | .. image:: ../../screens/screen15.png 17 | .. image:: ../../screens/screen16.png -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. ElasticCode documentation master file, created by 2 | sphinx-quickstart on Thu Sep 2 05:56:06 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Documentation Outline 7 | ================================ 8 | .. toctree:: 9 | :caption: Introduction 10 | :maxdepth: 2 11 | 12 | overview/index 13 | designgoals/index 14 | usecases/index 15 | install/index 16 | quickstart/index 17 | 18 | .. toctree:: 19 | :caption: Data Flows 20 | :maxdepth: 2 21 | 22 | dataflows/index 23 | 24 | .. toctree:: 25 | :caption: Technology 26 | :maxdepth: 2 27 | 28 | architecture/index 29 | database/index 30 | servers/index 31 | CLI/index 32 | ui/index 33 | api/index 34 | stack/index.rst 35 | 36 | .. toctree:: 37 | :caption: Learning 38 | :maxdepth: 2 39 | 40 | tutorials/index 41 | 42 | .. toctree:: 43 | :caption: Support 44 | :maxdepth: 2 45 | 46 | discord/index 47 | 48 | Indices and tables 49 | ================== 50 | 51 | * :ref:`genindex` 52 | * :ref:`modindex` 53 | * :ref:`search` 54 | -------------------------------------------------------------------------------- /docs/api/python/lambda/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Lambda 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | .. code-block:: python 9 | :caption: ElasticCode Python Lambda API 10 | 11 | from pyfi.client.api import funnel, parallel, pipeline 12 | from pyfi.client.example.api import do_something_p as do_something 13 | from pyfi.client.example.api import do_this_p as do_this 14 | 15 | """ 16 | An example app on top of pyfi. References existing infrastructure and then runs complex workflows and parallel operations on it 17 | """ 18 | _pipeline = pipeline( 19 | [ 20 | do_something("One"), 21 | do_something("Two"), 22 | parallel( 23 | [ 24 | do_this("Four"), 25 | do_this("Five"), 26 | ] 27 | ), 28 | do_this("Three"), 29 | ] 30 | ) 31 | print(_pipeline().get()) 32 | _parallel = parallel([_pipeline, do_something("Six"), do_something("Seven")]) 33 | 34 | _funnel = funnel( 35 | [do_something("Eight"), _parallel, do_something("Nine")], do_something("A") 36 | ) 37 | 38 | _funnel2 = funnel([_parallel, do_something("Ten")], do_something("B")) 39 | 40 | _funnel3 = funnel([_funnel, _funnel2]) 41 | 42 | result = _funnel3(do_something("Eleven")) 43 | print("FUNNEL: ", result.get()) 44 | -------------------------------------------------------------------------------- /docs/usecases/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Use Cases 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | There are a wide variety of use-cases ElasticCode can address, a few of which are listed below. 8 | 9 | 1. **Enterprise Workflow Automation** - ElasticCode can design and execute dynamic workflows across heterogenous enterprise, leveraging a variety of data sources and services. 10 | 2. **High Performance Computing** - ElasticCode's support for real-time streaming compute and parallel workflow execution lends itself to big-data and compute intensive tasks. 11 | 3. **Enterprise DevOps** - DevOps involves automated and repeatable pipelines for building software assets. ElasticCode's flow models and distributed compute is a perfect fit for custom DevOps. 12 | 4. **IoT and Factory Automation** - Orchestrating across connected devices or machinery in a factory is easy to model with ElasticCode due to it's dynamic and ad hoc workflow capability. Custom scripting allows for easy integration into existing device APIs. 13 | 5. **AI & Machine Learning Modelling** - Generating effective AI models requires obtaining and cleaning data from various sources, feature extraction, merging and training epochs. This is naturally a multi-step process that can be done visually with ElasticCode's visual modelling tools. 14 | 6. **Simulation** - Simulation seeks to model real world processes and given a set of inputs, determine or predict certain target variables. These models are typically designed as a network of connected dependencies or entities along with environmental conditions that affect the simulation. 15 | 7. **Decision Systems & Analytics** - State-transition modelling is technique used at major companies that have to make important stochastic financial decisions using key business metrics. ElasticCode's visual modeling and streaming compute capability allow for such models to be easily designed and customized, fully integrating into company databases, spreadsheets, accounting systems or other data sources. -------------------------------------------------------------------------------- /docs/architecture/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Architecture 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | ElasticCode is a scalable, high-performance network architecture that separates concerns across layers. Each layer has best-of-breed components that manage the responsibility of that layer. 8 | The slides below show the different layers and their responsibilities, starting with the bottom-most layer. 9 | 10 | Managed Compute 11 | --------------- 12 | 13 | ElasticCode takes a different approach to staging and executing python code on its network. Other frameworks or libraries allow you to define your functions in your execution environment and serialize that code to remote workers for execution. Obviously that has some serious security implications in a *shared, managed compute environment*. So ElasticCode does not allow this. Rather, you request ElasticCode to mount your code through a secure git repostiory URL. This becomes *the contract* between you and ElasticCode and allows ElasticCode to securely load your code into its network. 14 | 15 | This approach also allows administrators to control white and blacklists for what repositories of code it trusts. 16 | 17 | Code Isolation 18 | -------------- 19 | 20 | Each ElasticCode worker that mounts a git repository, will create a virtual environment for that code and execute the repositories *setup.py* to install the code in that virtual environment. This is beneficial for a number of reasons, but most importantly it keeps the environment for the mounted code separate from the ElasticCode agent's python environment. 21 | 22 | Layered Design 23 | -------------- 24 | 25 | ElasticCode is a distributed, scalable architecture and as such it is relationship between connected hardware & service layers interacting as a whole. 26 | 27 | .. image:: ../../screens/layer1.png 28 | :align: center 29 | .. image:: ../../screens/layer2.png 30 | :align: center 31 | .. image:: ../../screens/layer3.png 32 | :align: center 33 | .. image:: ../../screens/layer4.png 34 | :align: center -------------------------------------------------------------------------------- /docs/dataflows/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Data Flows 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | ElasticCode provides a unique and easy way to deploy distributed data flows (sometimes called workflows). These flows are contructed by using the ElasticCode object model and linking them together. 8 | 9 | To review the ElasticCode object model, we have the following taxonomy used in an ElasticCode network. 10 | 11 | - Nodes 12 | - Agents 13 | - Workers 14 | - Processors 15 | - Sockets 16 | - Tasks 17 | -Arguments 18 | 19 | For a given processor, multiple sockets can be exposed that allow incoming requests to different functions (tasks) within the processors python module code. 20 | Links between outputs of one socket and inputs of another are established using Plugs. 21 | 22 | Each Plug has a source socket and a target socket, such that when the function associated with the source socket completes, its output is used as input to the target socket function. 23 | These requests persist on a queue and execute in an orderly fashion to not stress resources. Since processors are bound to one or more CPUs, they can service requests in parallel but will only execute requests when resources are free to do so. 24 | 25 | Because functions are coupled into data flows using loose coupling, you are able to change the topology of your data flow anytime. Execution will follow the path of the current dataflow. 26 | 27 | When connecting a Plug to a target Socket, you can specify a specific argument for the target function that the plug is connected to. 28 | 29 | For example, consider this target function: 30 | 31 | .. code-block:: python 32 | 33 | def add_two(one, two): 34 | return one+two 35 | 36 | *diagram* 37 | 38 | It has two arguments `one` and `two` by name. You might have a data flow with two separate inputs to `add_two` where one plug satisfies the `one` argument and the other plug satisfies the `two` argument. 39 | In this design,`add_two` will only trigger once both arguments have `arrived` at the socket. This means arguments can arrive at different times and different orders. -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('..')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'ElasticCode' 21 | copyright = '2022-2026, elasticcode.ai' 22 | author = 'Darren Govoni' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = '0.0.1' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = ['sphinx_click.ext'] 34 | 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # List of patterns, relative to source directory, that match files and 40 | # directories to ignore when looking for source files. 41 | # This pattern also affects html_static_path and html_extra_path. 42 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 43 | 44 | 45 | # -- Options for HTML output ------------------------------------------------- 46 | 47 | # The theme to use for HTML and HTML Help pages. See the documentation for 48 | # a list of builtin themes. 49 | # 50 | #html_theme = 'alabaster' 51 | html_theme = 'sphinx_rtd_theme' 52 | #html_theme = 'sphinx_material' 53 | 54 | # Add any paths that contain custom static files (such as style sheets) here, 55 | # relative to this directory. They are copied after the builtin static files, 56 | # so a file named "default.css" will overwrite the builtin "default.css". 57 | html_static_path = ['_static'] 58 | html_logo = 'images/flow.svg' 59 | #html_favicon = 'pyfi16.ico' 60 | html_favicon = 'images/favicon2.ico' 61 | 62 | html_theme_options = { 63 | 64 | 'logo_only': True, 65 | 'display_version': True, 66 | 'prev_next_buttons_location': 'bottom', 67 | 'style_external_links': False, 68 | 'vcs_pageview_mode': '', 69 | 'style_nav_header_background': '#abbcc3', 70 | # Toc options 71 | 'collapse_navigation': True, 72 | 'sticky_navigation': True, 73 | 'navigation_depth': 4, 74 | 'includehidden': True, 75 | 'titles_only': False 76 | } 77 | ''' 78 | html_theme_options = { 79 | 'repo_url': 'https://github.com/radiantone/sphinx-material', 80 | 'repo_name': 'Material for Sphinx', 81 | 'html_minify': True, 82 | 'css_minify': True, 83 | 'nav_title': 'Welcome to ElasticCode', 84 | 'globaltoc_depth': 2 85 | } 86 | ''' 87 | 88 | 89 | html_css_files = [ 90 | 'css/override.css', 91 | ] 92 | -------------------------------------------------------------------------------- /docs/api/python/objects/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Objects 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | .. code-block:: python 9 | :caption: ElasticCode Python Object API 10 | import json 11 | 12 | from pyfi.client.api import Plug, Processor, Socket 13 | from pyfi.client.user import USER 14 | from pyfi.db.model import AlchemyEncoder 15 | 16 | # Log in a user first 17 | print("USER", USER) 18 | # Create a processor 19 | processor = Processor( 20 | name="proc1", 21 | beat=True, 22 | user=USER, 23 | module="pyfi.processors.sample", 24 | branch="main", 25 | concurrency=6, 26 | gitrepo="https://user:key@github.com/radiantone/pyfi-processors#egg=pyfi-processor", 27 | ) 28 | 29 | processor2 = Processor( 30 | name="proc2", 31 | user=USER, 32 | module="pyfi.processors.sample", 33 | hostname="agent1", 34 | concurrency=6, 35 | branch="main", 36 | gitrepo="https://user:key@github.com/radiantone/pyfi-processors#egg=pyfi-processor", 37 | ) 38 | 39 | processor3 = Processor( 40 | name="proc3", 41 | user=USER, 42 | module="pyfi.processors.sample", 43 | hostname="agent2", 44 | concurrency=6, 45 | branch="main", 46 | gitrepo="https://user:pword@github.com/radiantone/pyfi-processors#egg=pyfi-processor", 47 | ) 48 | 49 | # Create a socket on the processor to receive requests for the do_something python function(task) 50 | do_something = Socket( 51 | name="pyfi.processors.sample.do_something", 52 | user=USER, 53 | interval=5, 54 | processor=processor, 55 | queue={"name": "pyfi.queue1"}, 56 | task="do_something", 57 | ) 58 | 59 | print(json.dumps(do_something.socket, indent=4, cls=AlchemyEncoder)) 60 | # Create a socket on the processor to receive requests for the do_this python function(task) 61 | do_this = Socket( 62 | name="pyfi.processors.sample.do_this", 63 | user=USER, 64 | processor=processor2, 65 | queue={"name": "pyfi.queue2"}, 66 | task="do_this", 67 | ) 68 | do_this2 = Socket( 69 | name="pyfi.processors.sample.do_this", 70 | user=USER, 71 | processor=processor3, 72 | queue={"name": "pyfi.queue3"}, 73 | task="do_this", 74 | ) 75 | 76 | do_something2 = Socket( 77 | name="proc2.do_something", 78 | user=USER, 79 | processor=processor2, 80 | queue={"name": "pyfi.queue1"}, 81 | task="do_something", 82 | ) 83 | 84 | # Create a plug that connects one processor to a socket of another 85 | plug = Plug( 86 | name="plug1", 87 | processor=processor, 88 | user=USER, 89 | source=do_something, 90 | queue={"name": "pyfi.queue3"}, 91 | target=do_this, 92 | ) 93 | 94 | -------------------------------------------------------------------------------- /docs/designgoals/index.rst: -------------------------------------------------------------------------------- 1 | Goals 2 | ================================ 3 | .. toctree:: 4 | :maxdepth: 2 5 | 6 | As the name suggests, ElasticCode is a spiritual offshoot of Apache NIFI except built using a python stack for running python (and other scripting languages) processors. 7 | However, ElasticCode is designed to be more broad in terms of design and scope which we will discuss below. 8 | 9 | Some important design goals for this technology are: 10 | 11 | 1. **Fault-Tolerant** - ElasticCode runs as a distributed network of logical compute processors that have redundancy and load-balancing built in. 12 | 2. **At-Scale** - This phrase is important. It indicates that the logical constructs (e.g. pyfi processors) run at the scale of the hardware (e.g. CPU processors), meaning there is a 1-1 correlation (physical mapping) between hardware processors and pyfi processors. 13 | 3. **Secure** - All the functional components in ElasticCode (database, broker, storage, cache) have security built in. 14 | 4. **Dynamic** - The topology and behavior of a ElasticCode network can be adjusted and administered in real-time without taking down the entire network. Because ElasticCode is not a single VM controlling everything, you can add/remove update components without negatively impacting the functionality of the system. 15 | 5. **Distributed** - As was mentioned above, everything in ElasticCode is inherently distributed, down to the processors. There is no physical centralization of any kind. 16 | 6. **Performance** - ElasticCode is built on mature technology stack that is capable of high-throughput message traffic. 17 | 7. **Reliability** - The distributed queue paradigm used by ElasticCode allows for every processor in your dataflow to consume and acknowledge message traffic from its inbound queues and write to outbound queues. These durable queues persist while processors consume messages off them. 18 | 8. **Scalability** - Processors can scale across CPUs, Machines and networks, consuming message traffic off the same or multiple persistent queues. In fact, ElasticCode can auto-scale processors to accommodate the swell of tasks arriving on a queue. In addition, flow processors will be automatically balanced across physical locations to evenly distribute computational load and reduce local resource contention. 19 | 9. **Pluggable Backends** - ElasticCode supports various implementations of backend components such as message (e.g. RabbitMQ, SQS) or result storage (SQL, Redis, S3) in addition to allowing you to implement an entire backend (behind the SQL database) yourself. 20 | 10. **Real-time Metrics** - ElasticCode processors will support real-time broadcasting of data throughput metrics via subscription web-sockets. This will allow for all kinds of custom integrations and front-end visualizations to see what the network is doing. 21 | 11. **Data Analysis** - One of the big goals for ElasticCode is to save important data metrics about the flows and usages so it can be mined by predictive AI models later. This will give your organization key insights into the movement patterns of data. 22 | 12. **GIT Integration** - All the code used by processors can be pulled from your own git repositories giving you instant integration into existing devops and CM processes. ElasticCode will let you select which repo and commit version you want a processor to execute code from in your flows. -------------------------------------------------------------------------------- /docs/api/python/decorators/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Decorators 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | .. code-block:: python 9 | :caption: ElasticCode Python decorator API 10 | """ 11 | Decorator API for Flow. Defines network from plain old classes and methods. 12 | """ 13 | import os 14 | 15 | from pyfi.client.api import ProcessorBase 16 | from pyfi.client.decorators import plug, processor, socket 17 | 18 | 19 | @processor( 20 | name="proc2", 21 | gitrepo=os.environ["GIT_REPO"], 22 | module="pyfi.processors.sample", 23 | concurrency=1, 24 | ) 25 | class ProcessorB(ProcessorBase): 26 | """Description""" 27 | 28 | @socket( 29 | name="proc2.do_this", 30 | processor="proc2", 31 | arguments=True, 32 | queue={"name": "sockq2"}, 33 | ) 34 | def do_this(message): 35 | from random import randrange 36 | 37 | print("Do this!", message) 38 | message = "Do this String: " + str(message) 39 | graph = { 40 | "tag": {"name": "tagname", "value": "tagvalue"}, 41 | "name": "distance", 42 | "value": randrange(50), 43 | } 44 | return {"message": message, "graph": graph} 45 | 46 | 47 | @processor( 48 | name="proc1", 49 | gitrepo=os.environ["GIT_REPO"], 50 | module="pyfi.processors.sample", 51 | concurrency=7, 52 | ) 53 | class ProcessorA(ProcessorBase): 54 | """Description""" 55 | 56 | def get_message(self): 57 | return "Self message!" 58 | 59 | @plug( 60 | name="plug1", 61 | target="proc2.do_this", # Must be defined above already (prevents cycles) 62 | queue={ 63 | "name": "queue1", 64 | "message_ttl": 300000, 65 | "durable": True, 66 | "expires": 200, 67 | }, 68 | ) 69 | @socket( 70 | name="proc1.do_something", 71 | processor="proc1", 72 | beat=False, 73 | interval=15, 74 | queue={"name": "sockq1"}, 75 | ) 76 | def do_something(message): 77 | """do_something""" 78 | from random import randrange 79 | 80 | message = "TEXT:" + str(message) 81 | graph = { 82 | "tag": {"name": "tagname", "value": "tagvalue"}, 83 | "name": "temperature", 84 | "value": randrange(10), 85 | } 86 | return {"message": message, "graph": graph} 87 | 88 | 89 | @processor( 90 | name="proc3", 91 | gitrepo=os.environ["GIT_REPO"], 92 | module="pyfi.processors.sample", 93 | concurrency=1, 94 | ) 95 | class ProcessorC(ProcessorBase): 96 | """Description""" 97 | 98 | def get_message(self): 99 | return "Self message!" 100 | 101 | @plug( 102 | name="plug2", 103 | target="proc2.do_this", # Must be defined above already (prevents cycles) 104 | queue={ 105 | "name": "queue2", 106 | "message_ttl": 300000, 107 | "durable": True, 108 | "expires": 200, 109 | }, 110 | ) 111 | @socket( 112 | name="proc3.do_something", 113 | processor="proc3", 114 | beat=False, 115 | interval=5, 116 | queue={"name": "sockq3"}, 117 | ) 118 | def do_something(message): 119 | """do_something""" 120 | from random import randrange 121 | 122 | message = "TEXT2:" + str(message) 123 | graph = { 124 | "tag": {"name": "tagname", "value": "tagvalue"}, 125 | "name": "temperature", 126 | "value": randrange(10), 127 | } 128 | return {"message": message, "graph": graph} 129 | 130 | 131 | if __name__ == "__main__": 132 | print("Network created.") 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /docs/images/flow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 16 | 36 | 38 | 42 | 46 | 50 | 54 | 58 | 62 | 66 | 70 | 74 | flow 86 | 90 | lambda 102 | 103 | 104 | -------------------------------------------------------------------------------- /screens/pyfi.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 16 | 36 | 38 | 42 | 46 | 50 | 54 | 58 | 62 | 66 | 70 | 74 | 78 | 82 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /docs/images/pyfi-white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 16 | 36 | 38 | 42 | 46 | 50 | 54 | 58 | 62 | 66 | 70 | 74 | 78 | 82 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /docs/images/pyfi-mono.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 16 | 36 | 38 | 42 | 46 | 50 | 54 | 58 | 62 | 66 | 70 | 74 | 78 | 81 | 85 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /docs/quickstart/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Quickstart 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | Bringing up the Stack 9 | --------------------- 10 | 11 | .. code-block:: bash 12 | 13 | $ docker-compose up 14 | 15 | Configuring Lambda Flow 16 | ----------------------- 17 | 18 | .. code-block:: bash 19 | 20 | $ flow --config 21 | Database connection URI [postgresql://postgres:pyfi101@phoenix:5432/pyfi]: 22 | Result backend URI [redis://localhost]: 23 | Message broker URI [pyamqp://localhost]: 24 | Configuration file created at /home/user/pyfi.ini 25 | 26 | Initialize the Database 27 | ----------------------- 28 | 29 | .. code-block:: bash 30 | 31 | $ flow db init 32 | Enabling security on table action 33 | Enabling security on table event 34 | Enabling security on table flow 35 | Enabling security on table jobs 36 | Enabling security on table log 37 | Enabling security on table privilege 38 | Enabling security on table queue 39 | Enabling security on table queuelog 40 | Enabling security on table role 41 | Enabling security on table scheduler 42 | Enabling security on table settings 43 | Enabling security on table task 44 | Enabling security on table user 45 | Enabling security on table node 46 | Enabling security on table processor 47 | Enabling security on table role_privileges 48 | Enabling security on table user_privileges 49 | Enabling security on table user_roles 50 | Enabling security on table agent 51 | Enabling security on table plug 52 | Enabling security on table socket 53 | Enabling security on table call 54 | Enabling security on table plugs_queues 55 | Enabling security on table plugs_source_sockets 56 | Enabling security on table plugs_target_sockets 57 | Enabling security on table sockets_queues 58 | Enabling security on table worker 59 | Enabling security on table calls_events 60 | Database create all schemas done. 61 | 62 | The Flow CLI 63 | ------------ 64 | 65 | .. code-block:: bash 66 | 67 | $ flow 68 | Usage: flow [OPTIONS] COMMAND [ARGS]... 69 | 70 | Flow CLI for managing the pyfi network 71 | 72 | Options: 73 | --debug Debug switch 74 | -d, --db TEXT Database URI 75 | --backend TEXT Task queue backend 76 | --broker TEXT Message broker URI 77 | -i, --ini TEXT Flow .ini configuration file 78 | -c, --config Configure pyfi 79 | --help Show this message and exit. 80 | 81 | Commands: 82 | add Add an object to the database 83 | agent Run pyfi agent 84 | api API server admin 85 | db Database operations 86 | delete Delete an object from the database 87 | listen Listen to a processor output 88 | ls List database objects and their relations 89 | node Node management operations 90 | proc Run or manage processors 91 | scheduler Scheduler management commands 92 | task Pyfi task management 93 | update Update a database object 94 | web Web server admin 95 | whoami Database login user 96 | worker Run pyfi worker 97 | 98 | Creating Your First Flow 99 | ------------------------ 100 | 101 | Let's look at the sequence of CLI commands needed to build out our flow infrastructure and execute a task. From scratch! 102 | First thing we do below is create a queue. This provides the persistent message broker the definition it needs to allocate a ``message queue`` by the same name for holding task messages. 103 | 104 | Next we create a processor, which refers to our gitrepo and defines the module within that codebase we want to expose. It also defines the host where the processor should be run, but that is optional. 105 | We specific a concurrency value of 5 that indicates *the scale* for our processor. This means it will seek to occupy 5 CPUs, allowing it to run in parallel and respond to high-volume message traffic better. 106 | 107 | Then we create sockets and attach them to our processor. The socket tells pyfi what specific python function we want to receive messages for and what queue it should use. Lastly, it indicates what processor to be attached to. 108 | 109 | Finally, we can run our task and get the result. 110 | 111 | .. code-block:: bash 112 | 113 | $ flow add queue -n pyfi.queue1 -t direct 114 | $ flow add processor -n proc1 -g https://github.com/radiantone/pyfi-processors -m pyfi.processors.sample -h localhost -c 5 115 | $ flow add socket -n pyfi.processors.sample.do_something -q pyfi.queue1 -pn proc1 -t do_something 116 | $ flow add socket -n pyfi.processors.sample.do_this -q pyfi.queue1 -pn proc1 -t do_this 117 | $ flow task run --socket pyfi.processors.sample.do_this --data "['some data']" 118 | Do this: ['some data'] 119 | 120 | Creating Sockets 121 | ^^^^^^^^^^^^^^^^ 122 | Sockets represent addressable endpoints for python functions hosted by a processor. Remember, the processor points to a gitrepo and defines a python module within that repo. 123 | The socket defines the task (or python function) within the processor python module. Thus, a single processor can have many sockets associated with it. Sockets also declare a queue they will use to pull their requests from. 124 | This allows calls to tasks to be durable and reliable. 125 | 126 | The following extract from the above flow defines a socket, gives it a name ``pyfi.processors.sample.do_something``, declares the queue ``pyfi.queue1``, associates it with processor named ``proc1`` and represents the python function/task ``do_something``. 127 | 128 | .. code-block:: bash 129 | 130 | $ flow add socket -n pyfi.processors.sample.do_something -q pyfi.queue1 -pn proc1 -t do_something 131 | 132 | Defining Socket Functions 133 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 134 | 135 | Once you've built out your flow and infrastructure to support it, you can create convenient types that represent your python functions via the Socket class. 136 | 137 | For the parallel flow above, we import the .p (or partial) signature from this file, which comes from our Socket we created earlier named ``pyfi.processors.sample.do_something``. 138 | Remember, the socket captures the module (from its parent Processor) and function name within that module you want to run. Think of it like an endpoint with a queue in front of it. 139 | 140 | We take one step further in the file below and rename Socket class to Function simply as a linguistic preference in this context. 141 | 142 | .. code-block:: python 143 | 144 | from pyfi.client.api import Socket as Function 145 | 146 | do_something = Function(name='pyfi.processors.sample.do_something') 147 | do_something_p = do_something.p 148 | 149 | do_this = Function(name='pyfi.processors.sample.do_this') 150 | do_this_p = do_this.p 151 | 152 | Once we've created our function definitions above, we can use them like normal python functions as in the parallel workflow below! 153 | 154 | Executing Socket Functions 155 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 156 | 157 | Executing socket functions from python is very easy. Since we can create the socket ahead of time, we only need to refer to it by name as above. 158 | 159 | .. code-block:: python 160 | 161 | from pyfi.client.examples.api import do_something_p as do_something 162 | 163 | do_something("Some text!") 164 | 165 | The just invoke the function reference as you normally would. If you are using the function within a parallel API structure such as ``parallel``, ``pipeline``, ``funnel`` etc then you should use the ``partial`` (.p, _p) version of the function signature. 166 | This allows ElasticCode to add arguments to the task when it is invoked. The invocation is deferred so it doesn't happen at the time you declare your workflow. The reason is because your task will execute on thos remote CPU at a time when the workflow reaches that task. 167 | So the .p partial is a ``signature`` for your task in that respect. 168 | 169 | Running a Parallel Workflow 170 | --------------------------- 171 | 172 | .. code-block:: python 173 | 174 | from pyfi.client.api import parallel, pipeline, funnel 175 | from pyfi.client.example.api import do_something_p as do_something 176 | 177 | # Create a pipeline that executes tasks sequentially, passing result to next task 178 | _pipeline = pipeline([ 179 | do_something("One"), 180 | do_something("Two"), 181 | # Create a parallel structure that executes tasks in parallel and returns the 182 | # result list 183 | parallel([ 184 | do_something("Four"), 185 | do_something("Five"), 186 | ]), 187 | do_something("Three")]) 188 | 189 | # Create another parallel structure using the above pipeline as one of its tasks 190 | _parallel = parallel([ 191 | _pipeline, 192 | do_something("Six"), 193 | do_something("Seven")]) 194 | 195 | # Create a funnel structure that executes all its tasks passing the result to the 196 | # single, final task 197 | _funnel = funnel([ 198 | do_something("Eight"), 199 | _parallel, 200 | do_something("Nine")]) 201 | 202 | # Gather the result from the _funnel and send it to do_something("Four") 203 | print("FUNNEL: ", _funnel(do_something("Four")).get()) 204 | 205 | -------------------------------------------------------------------------------- /docs/overview/index.rst: -------------------------------------------------------------------------------- 1 | Overview 2 | ================================ 3 | .. toctree:: 4 | :maxdepth: 2 5 | 6 | ElasticCode is a distributed data flow and computation system that runs on transactional messaging infrastructure. It implements the concept of a NVM Networked-Virtual-Machine by distributing logic over networked hardware CPU/GPU processors. 7 | 8 | It offers applications and users the following benefits: 9 | 10 | - **Persistent Task & Workflow Execution** - Tasks & Workflows persist within the network 11 | - **Reliable Task Execution** - Tasks execution survives failure anomalies, restarts, hardware faults 12 | - **Simplified Workflow Functions** - Parallel, Pipeline, Funnel 13 | - **Powerful Compute Composition** - Build at-scale data and compute flows using CLI, UI or API 14 | - **Streaming Compute** - Real-time streaming compute data flows 15 | - **Secure & Trusted Code Execution** - No client-side code marshalling or serialization. Code is loaded from the network side via git repositories into isolated virtual environments 16 | - **Micro-Scheduling** - Efficient task scheduling and 100% hardware utilization 17 | - **Next-Gen User Interface** - Quickly build out at-scale HPC data flows with simple and intuitive interfaces. 18 | 19 | As a platform, ElasticCode is designed so you can build rich, high-performance applications, services and scripts on top. Doing this provides the transparent benefits listed above and makes building powerful compute applications fast and easy. 20 | 21 | .. figure:: images/platform1.png 22 | :align: center 23 | 24 | ElasticCode Platform Architecture 25 | 26 | Managed Compute environment 27 | --------------------------- 28 | 29 | The entire ElasticCode stack, as a whole, provides a complete "Managed Compute Platform" (MCP) with specialized tooling to support different layers of concern, such as: 30 | 31 | - Hardware Compute Resources 32 | - Compute Availability & Scheduling 33 | - Distributed Flows 34 | - Python Processors 35 | + Modules & Functions 36 | - Data Logging and Streaming 37 | + Real-time & Historical Metrics 38 | 39 | ElasticCode is designed as a single, extensible platform for building reliable & persistent computational workflows. It relieves developers from having to know where and when tasks get executed or having to configure client side services. 40 | In addition, ElasticCode's multiple API's are designed for users (of all kinds) to build complex, fully-distributed HPC apps and sharable workflows. The platform nature of ElasticCode sets it apart from other libraries and frameworks that only tackle part of the big picture. 41 | 42 | Simple, Parallel Workflows 43 | -------------------------- 44 | 45 | ElasticCode exposes simple APIs that make writing powerful, distributed workflows fast and easy. A few examples below. 46 | 47 | .. code-block:: python 48 | :caption: Python API Complex Workflow With pipeline & parallel functions 49 | 50 | from pyfi.client.api import parallel, pipeline, funnel 51 | from pyfi.client.example.api import do_something_p as do_something 52 | 53 | # Create a pipeline that executes tasks sequentially, passing result to next task 54 | _pipeline = pipeline([ 55 | do_something("One"), 56 | do_something("Two"), 57 | # Create a parallel structure that executes tasks in parallel and returns the 58 | # result list 59 | parallel([ 60 | do_something("Four"), 61 | do_something("Five"), 62 | ]), 63 | do_something("Three")]) 64 | 65 | # Create another parallel structure using the above pipeline as one of its tasks 66 | _parallel = parallel([ 67 | _pipeline, 68 | do_something("Six"), 69 | do_something("Seven")]) 70 | 71 | # Create a funnel structure that executes all its tasks passing the result to the 72 | # single, final task 73 | _funnel = funnel([ 74 | do_something("Eight"), 75 | _parallel, 76 | do_something("Nine")]) 77 | 78 | # Gather the result from the _funnel and send it to do_something("Four") 79 | print("FUNNEL: ", _funnel(do_something("Four")).get()) 80 | 81 | .. code-block:: bash 82 | :caption: ElasticCode CLI: Build a distributed, reliable ElasticCode network using simple commands, and then execute a task. 83 | 84 | # Build out the infrastructure 85 | pyfi add queue -n pyfi.queue1 -t direct 86 | pyfi add processor -n proc1 -g https://github.com/radiantone/pyfi-processors -m pyfi.processors.sample 87 | 88 | # Add sockets (not POSIX sockets!) that receive incoming task requests with -c concurrency factors (i.e. # of CPUs occupied) 89 | pyfi add socket -n pyfi.processors.sample.do_something -q pyfi.queue1 -pn proc1 -t do_something -c 5 90 | pyfi add socket -n pyfi.processors.sample.do_this -q pyfi.queue1 -pn proc1 -t do_this -c 8 91 | 92 | # Execute a task (can re-run only this after network is built) 93 | pyfi task run --socket pyfi.processors.sample.do_something --data "['some data']" 94 | 95 | .. code-block:: bash 96 | :caption: ElasticCode in bash using pipes. Compose pipeline workflows and run parallel tasks using piped output. 97 | 98 | # Create alias' for the run task commands 99 | alias pyfi.processors.sample.do_something="pyfi task run -s pyfi.processors.sample.do_something" 100 | alias pyfi.processors.sample.do_this="pyfi task run -s pyfi.processors.sample.do_this" 101 | 102 | # Pipe some output from stdin to a task 103 | echo "HI THERE!" | pyfi.processors.sample.do_something 104 | 105 | # Pipe some text to a task, then append some new text to that output, then send that to final task, do_this 106 | echo "HI THERE!" | pyfi.processors.sample.do_something | echo "$(cat -) add some text" | pyfi.processors.sample.do_this 107 | 108 | # Echo a string as input to two different processors and they run in parallel 109 | echo "HI THERE!" | tee -a >(pyfi.processors.sample.do_something) tee -a >(pyfi.processors.sample.do_this) 110 | 111 | .. code-block:: bash 112 | :caption: Easily list out the call graph for any task in your workflow to see where the parallelism occurred 113 | 114 | $ pyfi ls call --id 033cf3d3-a0fa-492d-af0a-f51cf5f58d49 -g 115 | pyfi.processors.sample.do_something 116 | └─────────────────────────────────────────────┐ 117 | pyfi.processors.sample.do_something 118 | ┌──────────────────────┴───────────────────┬───────────────────────────────────────┐ 119 | pyfi.processors.sample.do_something pyfi.processors.sample.do_something pyfi.processors.sample.do_something 120 | 121 | Persistent, Reliable Tasks 122 | -------------------------- 123 | 124 | Task calls in your workflows are completely persisent, meaning they are stored in the ElasticCode network (database) and delivered to the task at the soonest possible time. 125 | This depends when the processor hosting the task is available and free to do the compute, but the task call will remain active until it has completed. If the task worker fails for any reason, the task can be retried on another node. 126 | These qualities of service are completely invisible to the application or user script. 127 | 128 | High Level Architecture 129 | ----------------------- 130 | ElasticCode's high level architecture can be seen in the diagram below. Central to the architecture is the **ElasticCode Model Database** which stores the relational meta-model for the ElasticCode compute network. This database provides the *single source of truth* for the runtime operation of the distributed architecture. 131 | Equally as important is the **reliable message broker** which is the heart of ElasticCode's execution model. Workflows execute functions just like any other python code, but those functions trigger persistent requests for ElasticCode to execute a remote task when the compute resources are available. The message broker handles all the runtime orchestration with compute nodes to carry out tasks. 132 | 133 | .. figure:: images/architecture2.png 134 | :align: center 135 | 136 | ElasticCode System Architecture 137 | 138 | Virtual Processors 139 | ------------------ 140 | 141 | ElasticCode introduces the notion of **virtual processors** that network together to form a reliable and distributed mesh topology for executing compute tasks. 142 | 143 | ElasticCode Processors are object abstractions that capture the location, version and definition of python modules and functions via your own git repository. This trusted code model is important as it establishes the contract between your code, ElasticCode and virtual processors where the code is to be executed. 144 | This relationship must be strong and well-defined. 145 | 146 | Via the various ElasticCode interfaces (CLI, API, Python etc) you define ElasticCode virtual processors. Agents (a kind of ElasticCode service) running across your network will deploy them and receive tasks to execute their code. 147 | 148 | This type of service (or task) mesh architecture allows for fine-grained scalability characteristics that benefit the peformance and stability of the network. 149 | 150 | At Scale Design 151 | --------------- 152 | 153 | ElasticCode is designed to operate "at scale", which means there is a one-to-one correspondence between logic compute units (processors) and physical compute units (CPU cores). 154 | This provides a number of obvious and inherent benefits such as hardware redundancy, high-availability, fault-tolerance, fail-over, performance and ease of maintenance. 155 | 156 | .. figure:: images/architecture3.png 157 | :align: center 158 | 159 | ElasticCode At-Scale Task/CPU Architecture 160 | 161 | Event Driven 162 | ------------ 163 | ElasticCode is an event driven architecture from the bottom (data) to the top (ui). This design approach allows it to scale smoothly and not overconsume resources. Messages and notifications are sent when they are available which eliminates the need for *long polling* or similar resource intensive designs. 164 | Because ElasticCode is purely event driven, when there are no events, ElasticCode is respectful of system resources and can idle - allowing kernel schedulers and other system tasks to operate as needed. 165 | 166 | Message-Oriented Execution Graphs 167 | --------------------------------- 168 | ElasticCode differs from other dataflow engines in that it is fully distributed and runs "at-scale" across heterogeneous infrastructure and computational resources. 169 | 170 | It establishes a logical directed-graph (DG) overlay network across compute nodes and executes your custom processor scripts (python, node, bash). 171 | 172 | Using the power of reliable, transactional messaging, compute tasks are never lost, discarded or undone. Fault tolerance and load-balancing are intrinsic qualities of ElasticCode and not tacked on as a separate process, which itself would be a failure point. 173 | 174 | Execution Stack 175 | --------------- 176 | 177 | There are various layers within ElasticCode that allow it to scale seamless and expose simple APIs that do powerful things behind the scenes. A quick glance at the lifecycle of a ElasticCode python task is below. 178 | Various qualities of service are offered by each layer, most of which are implied during a task invocation. 179 | 180 | .. image:: images/architecture4.png 181 | :align: center 182 | 183 | Micro-Scheduling 184 | ---------------- 185 | 186 | ElasticCode uses a scheduling design that will allow tasks to fully utilize the available CPUs in the ElasticCode network, if processors are created in the ElasticCode database. ElasticCode will never consume more resources than what is requested in its database. 187 | Although traditional batch scheduling design allows for blocks of compute resources to be dedicated to one task or flow, it comes at the expense of resource utilization and wait time for other requests. Micro-scheduling seeks to remedy this situation and provide better compute efficiency which means higher task throughput and more satisfied users. 188 | 189 | .. image:: images/scheduler1.png 190 | .. image:: images/scheduler2.png 191 | 192 | A True Elastic Compute Platform 193 | ------------------------------- 194 | ElasticCode provides a set of interacting compute layers that control the location and execution of managed code assets. 195 | With ElasticCode, code modules and functions can be loaded at multiple locations and invoked from clients without knowledge of where those functions are or how those functions are executed. 196 | 197 | Redundant code (processors) loaded into a ElasticCode network will be able to respond to higher volume of data and requests and thus can scale at will, individually. 198 | 199 | Functional tasks (processors hosting code) are fronted by durable queues that deliver reliable invocations when those functions are present on the network, regardless of their exact location. 200 | This allows the system to be resilient to hardware or network changes, as well as influence by schedulers that might change the location of functions (processors) to re-balance the resources across the network. 201 | 202 | All of this underlying management, hardware arriving and departing, services starting and stopping, processors moving from one host to another (or failing), is completely invisibile to the applications and clients using the system. To them, function calls will always, eventually be executed, if not immediately, in the near future when compute resources allow it. 203 | 204 | System Benefits 205 | --------------- 206 | 207 | The ElasticCode platform provides numerous benefits, only some of which are below. 208 | 209 | - **A single, purpose-built platform** that addresses end-to-end managed compute from the CPU to the end user. Compared to cobbled together frameworks. 210 | - **Data flow and data streaming support** 211 | - **Real-time observable data** across your compute resources 212 | - **DevOps out-of-the-box** - ElasticCode integrates directly with GIT allowing your existing code management practices to be used. 213 | - **Elastic, At-Scale** - ElasticCode is an elastic infrastructure, meaning that it scales up and down on-the-fly. Code can be moved across hardware locations at any time without data loss. 214 | - **Extensible** - ElasticCode is designed to be extended and specialized to your needs. Both the UI and the core platform is open and leverages modern framework design patterns to easily build on top of. 215 | 216 | 217 | Ecosystem of Supported Roles 218 | ---------------------------- 219 | The ElasticCode compute environment is a seamless collaboration across disciplines with powerful, out-of-the-box tooling for everyone to manage their concerns, independent of the whole. 220 | Let's quantify the previous sentence some. Let's say you are in the middle of running a lengthy workflow, but elsewhere in the grid, hardware administrators need to replace hardware some of your tasks might be running on. 221 | With ElasticCode, your workflow would simply pause if it cannot find an active ElasticCode processor hosting the task (python function) it needs and when the hardware admins bring new hardware online, the ElasticCode agents resume and your workflow would continue running where it left off, seamlessly. 222 | Sounds amazing but it's true! 223 | 224 | Some of the roles that might participate in a ElasticCode network, directly or indirectly. 225 | 226 | - Hardware Admins 227 | - Infrastructure Admins 228 | - Compute Admins 229 | - Data Admins 230 | - Code Repository Owners 231 | - End Users 232 | 233 | Powerful, Next-Gen UI 234 | --------------------- 235 | 236 | ElasticCode's user interface is a powerful, next-gen no-code application that enpowers anyone to create fast, parallel workflows across ElasticCode's distributed task mesh. 237 | 238 | 239 | .. image:: ../../screens/pyfi1.png 240 | .. image:: ../../screens/pyfi5.png 241 | .. image:: ../../screens/pyfi7.png -------------------------------------------------------------------------------- /docs/CLI/examples/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Examples 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | .. code-block:: bash 9 | :caption: The 'flow' command is the single command for building and managing a ElasticCode network. 10 | 11 | $ flow 12 | Usage: flow [OPTIONS] COMMAND [ARGS]... 13 | 14 | CLI for creating & managing flow networks 15 | 16 | Options: 17 | --debug Debug switch 18 | -d, --db TEXT Database URI 19 | --backend TEXT Task queue backend 20 | --broker TEXT Message broker URI 21 | -a, --api TEXT Message broker API URI 22 | -u, --user TEXT Message broker API user 23 | -p, --password TEXT Message broker API password 24 | -i, --ini TEXT flow .ini configuration file 25 | -c, --config Configure pyfi 26 | --help Show this message and exit. 27 | 28 | Commands: 29 | add Add an object to the database 30 | agent Commands for remote agent management 31 | api API server admin 32 | compose Manage declarative infrastructure files 33 | db Database operations 34 | delete Delete an object from the database 35 | listen Listen to a processor output 36 | login Log into flow CLI 37 | logout Logout current user 38 | ls List database objects and their relations 39 | network Network operations 40 | node Node management operations 41 | proc Run or manage processors 42 | scheduler Scheduler management commands 43 | task Pyfi task management 44 | update Update a database object 45 | user User commands 46 | web Web server admin 47 | whoami Database login user 48 | worker Run pyfi worker 49 | 50 | 51 | Database 52 | ----------------- 53 | .. code-block:: bash 54 | :caption: Flow database sub-commands 55 | 56 | $ flow db 57 | Usage: flow db [OPTIONS] COMMAND [ARGS]... 58 | 59 | Database operations 60 | 61 | Options: 62 | --help Show this message and exit. 63 | 64 | Commands: 65 | drop Drop all database tables 66 | init Initialize database tables 67 | json Dump the database to JSON 68 | migrate Perform database migration/upgrade 69 | rebuild Drop and rebuild database tables 70 | 71 | 72 | Objects 73 | ------------------------- 74 | 75 | There are numerous objects within an ElasticCode network. Some are infrastructure related, others are service related. Using the ElasticCode CLI you create, update and manage these objects in the database, which acts as a **single source of truth** for the entire ElasticCode network. 76 | All the deployed ElasticCode services (e.g. agents) *react* to changes in the ElasticCode database. So you could say that ElasticCode is *reactive* on a distributed, network-scale. 77 | 78 | Some of the system objects and CLI commands are shown below. 79 | 80 | Queues 81 | ------ 82 | 83 | .. code-block:: bash 84 | :caption: Add a queue to the database 85 | 86 | $ flow add queue --help 87 | Usage: flow add queue [OPTIONS] 88 | 89 | Add queue object to the database 90 | 91 | Options: 92 | -n, --name TEXT [required] 93 | -t, --type [topic|direct|fanout] 94 | [default: direct; required] 95 | --help Show this message and exit. 96 | 97 | 98 | Processors 99 | ----------------------- 100 | 101 | .. code-block:: bash 102 | :caption: Add a processor to the database 103 | 104 | $ flow add processor --help 105 | Usage: flow add processor [OPTIONS] 106 | 107 | Add processor to the database 108 | 109 | Options: 110 | -n, --name TEXT Name of this processor [required] 111 | -m, --module TEXT Python module (e.g. some.module.path 112 | [required] 113 | -h, --hostname TEXT Target server hostname 114 | -w, --workers INTEGER Number of worker tasks 115 | -r, --retries INTEGER Number of retries to invoke this processor 116 | -g, --gitrepo TEXT Git repo URI [required] 117 | -c, --commit TEXT Git commit id for processor code 118 | -rs, --requested_status TEXT The requested status for this processor 119 | -b, --beat Enable the beat scheduler 120 | -br, --branch TEXT Git branch to be used for checkouts 121 | -p, --password TEXT Password to access this processor 122 | -rq, --requirements TEXT requirements.txt file 123 | -e, --endpoint TEXT API endpoint path 124 | -a, --api BOOLEAN Has an API endpoint 125 | -cs, --cpus INTEGER Number of CPUs for default deployment 126 | -d, --deploy Enable the beat scheduler 127 | -mp, --modulepath TEXT Relative repo path to python module file 128 | --help Show this message and exit. 129 | 130 | 131 | .. code-block:: bash 132 | :caption: Specific processor subcommands 133 | 134 | $ flow proc 135 | Usage: flow proc [OPTIONS] COMMAND [ARGS]... 136 | 137 | Run or manage processors 138 | 139 | Options: 140 | --id TEXT ID of processor 141 | --help Show this message and exit. 142 | 143 | Commands: 144 | pause Pause a processor 145 | remove Remove a processor 146 | restart Start a processor 147 | resume Pause a processor 148 | start Start a processor 149 | stop Stop a processor 150 | 151 | 152 | Calls 153 | ----- 154 | 155 | .. code-block:: bash 156 | :caption: Call subcommands 157 | 158 | $ flow ls calls --help 159 | Usage: flow ls calls [OPTIONS] 160 | 161 | List calls 162 | 163 | Options: 164 | -p, --page INTEGER 165 | -r, --rows INTEGER 166 | -u, --unfinished 167 | -a, --ascend 168 | -i, --id 169 | -t, --tracking 170 | -tk, --task 171 | --help Show this message and exit. 172 | 173 | 174 | 175 | .. code-block:: bash 176 | :caption: flow ls calls 177 | 178 | $ flow ls calls 179 | +------+-----+-------------------------------------+--------------------------------------+----------+----------------------------+-------------------------------------+----------------------------+----------------------------+----------+ 180 | | Page | Row | Name | ID | Owner | Last Updated | Socket | Started | Finished | State | 181 | +------+-----+-------------------------------------+--------------------------------------+----------+----------------------------+-------------------------------------+----------------------------+----------------------------+----------+ 182 | | 1 | 1 | pyfi.processors.sample.do_this | e3f73300-f3fd-4230-ba11-258d4f5a17f4 | postgres | 2021-09-13 19:30:19.933346 | pyfi.processors.sample.do_this | 2021-09-13 19:30:19.903573 | 2021-09-13 19:30:19.932491 | finished | 183 | | 1 | 2 | pyfi.processors.sample.do_something | e3bf09c5-ae45-4772-b301-c394acae3c4e | postgres | 2021-09-13 19:30:19.885993 | pyfi.processors.sample.do_something | 2021-09-13 19:30:19.847282 | 2021-09-13 19:30:19.885440 | finished | 184 | | 1 | 3 | pyfi.processors.sample.do_this | a58de16a-1b92-4acb-81c1-92e81cb6ea56 | postgres | 2021-09-13 19:29:49.944219 | pyfi.processors.sample.do_this | 2021-09-13 19:29:49.917225 | 2021-09-13 19:29:49.943415 | finished | 185 | | 1 | 4 | pyfi.processors.sample.do_something | 58df162a-ac2e-40b7-9e27-635c61a4d9a7 | postgres | 2021-09-13 19:29:49.868975 | pyfi.processors.sample.do_something | 2021-09-13 19:29:49.820097 | 2021-09-13 19:29:49.868109 | finished | 186 | | 1 | 5 | pyfi.processors.sample.do_this | 60d8b91d-1b8b-433c-a289-5704856d37d1 | postgres | 2021-09-13 19:29:19.907705 | pyfi.processors.sample.do_this | 2021-09-13 19:29:19.880742 | 2021-09-13 19:29:19.906931 | finished | 187 | | 1 | 6 | pyfi.processors.sample.do_something | 66c78849-9052-48d0-ae62-59942d544096 | postgres | 2021-09-13 19:29:19.861880 | pyfi.processors.sample.do_something | 2021-09-13 19:29:19.824456 | 2021-09-13 19:29:19.861330 | finished | 188 | | 1 | 7 | pyfi.processors.sample.do_this | e5189a71-9805-492e-a8d7-e5eb2b8d68d3 | postgres | 2021-09-13 19:28:49.873301 | pyfi.processors.sample.do_this | 2021-09-13 19:28:49.842724 | 2021-09-13 19:28:49.872176 | finished | 189 | | 1 | 8 | pyfi.processors.sample.do_something | 35fd3635-743a-4015-acfe-c5a8f62ef65d | postgres | 2021-09-13 19:28:49.812921 | pyfi.processors.sample.do_something | 2021-09-13 19:28:49.789503 | 2021-09-13 19:28:49.812406 | finished | 190 | | 1 | 9 | pyfi.processors.sample.do_this | 4136ebe2-ee96-4b74-ba0e-33d8c5974252 | postgres | 2021-09-13 19:28:19.830508 | pyfi.processors.sample.do_this | 2021-09-13 19:28:19.805839 | 2021-09-13 19:28:19.829667 | finished | 191 | | 1 | 10 | pyfi.processors.sample.do_something | 707f18c5-5708-4c70-81fb-ca0afb30e28b | postgres | 2021-09-13 19:28:19.789542 | pyfi.processors.sample.do_something | 2021-09-13 19:28:19.764792 | 2021-09-13 19:28:19.788999 | finished | 192 | +------+-----+-------------------------------------+--------------------------------------+----------+----------------------------+-------------------------------------+----------------------------+----------------------------+----------+ 193 | Page 1 of 383 of 3830 total records 194 | 195 | .. code-block:: bash 196 | :caption: flow ls call --help 197 | 198 | $ flow ls call --help 199 | Usage: flow ls call [OPTIONS] 200 | 201 | List details about a call record 202 | 203 | Options: 204 | --id TEXT ID of call 205 | -n, --name TEXT Name of call 206 | -r, --result Include result of call 207 | -t, --tree Show forward call tree 208 | -g, --graph Show complete call graph 209 | -f, --flow Show all calls in a workflow 210 | --help Show this message and exit. 211 | 212 | .. code-block:: bash 213 | :caption: flow ls call --id e3bf09c5-ae45-4772-b301-c394acae3c4e 214 | 215 | $ flow ls call --id e3bf09c5-ae45-4772-b301-c394acae3c4e 216 | +-------------------------------------+--------------------------------------+----------+----------------------------+-------------------------------------+----------------------------+----------------------------+----------+ 217 | | Name | ID | Owner | Last Updated | Socket | Started | Finished | State | 218 | +-------------------------------------+--------------------------------------+----------+----------------------------+-------------------------------------+----------------------------+----------------------------+----------+ 219 | | pyfi.processors.sample.do_something | e3bf09c5-ae45-4772-b301-c394acae3c4e | postgres | 2021-09-13 19:30:19.885993 | pyfi.processors.sample.do_something | 2021-09-13 19:30:19.847282 | 2021-09-13 19:30:19.885440 | finished | 220 | +-------------------------------------+--------------------------------------+----------+----------------------------+-------------------------------------+----------------------------+----------------------------+----------+ 221 | Provenance 222 | +--------------------------------------+-------------+-------------+ 223 | | Task | Task Parent | Flow Parent | 224 | +--------------------------------------+-------------+-------------+ 225 | | a13ba1e7-78f9-4644-9c29-696dfd89e9e4 | None | None | 226 | +--------------------------------------+-------------+-------------+ 227 | Events 228 | +----------+--------------------------------------+----------+----------------------------+-----------------------------------------------------+ 229 | | Name | ID | Owner | Last Updated | Note | 230 | +----------+--------------------------------------+----------+----------------------------+-----------------------------------------------------+ 231 | | received | 8e8845d5-cd32-40d9-93c7-e95f7500926c | postgres | 2021-09-13 19:30:19.844512 | Received task pyfi.processors.sample.do_something | 232 | | prerun | a2507cd1-1d72-4ad1-be74-375aac29f1c4 | postgres | 2021-09-13 19:30:19.874789 | Prerun for task pyfi.processors.sample.do_something | 233 | | postrun | f8b5ff03-e0e3-467d-9257-a682f0865581 | postgres | 2021-09-13 19:30:19.886504 | Postrun for task | 234 | +----------+--------------------------------------+----------+----------------------------+-----------------------------------------------------+ 235 | 236 | .. code-block:: bash 237 | :caption: flow ls call --id e3bf09c5-ae45-4772-b301-c394acae3c4e --tree 238 | 239 | $ flow ls call --id e3bf09c5-ae45-4772-b301-c394acae3c4e --tree 240 | pyfi.processors.sample.do_something 241 | └────────────────────┐ 242 | pyfi.processors.sample.do_this 243 | 244 | 245 | Listening 246 | --------- 247 | The listen command allows you to listen to the pubsub channels associated with queues and processors. A kind of *network sniffer* that displays in real-time the various message traffic, compute results, lifecycle events etc. 248 | You can provide your own custom class to receive the results which is designed to provide a simple and loosely coupled mechanism for system integrations. 249 | 250 | .. code-block:: bash 251 | :caption: Messages will be displayed as they are generated within the network. 252 | 253 | $ flow listen --help 254 | Usage: flow listen [OPTIONS] 255 | 256 | Listen to a processor output 257 | 258 | Options: 259 | -n, --name TEXT Name of processor [required] 260 | -c, --channel TEXT Listen channel (e.g. task, log, etc) [required] 261 | -a, --adaptor TEXT Adaptor class function (e.g. my.module.class.function) 262 | --help Show this message and exit. 263 | $ flow listen --name pyfi.queue1.proc1 --channel task 264 | Listening to pyfi.queue1.proc1 265 | {'type': 'psubscribe', 'pattern': None, 'channel': b'pyfi.queue1.proc1.task', 'data': 1} 266 | {'type': 'pmessage', 'pattern': b'pyfi.queue1.proc1.task', 'channel': b'pyfi.queue1.proc1.task', 'data': b'{"channel": "task", "state": "received", "date": "2021-09-13 19:37:20.094443", "room": "pyfi.queue1.proc1"}'} 267 | {'type': 'pmessage', 'pattern': b'pyfi.queue1.proc1.task', 'channel': b'pyfi.queue1.proc1.task', 'data': b'{"channel": "task", "state": "running", "date": "2021-09-13 19:37:20.108668", "room": "pyfi.queue1.proc1"}'} 268 | {'type': 'pmessage', 'pattern': b'pyfi.queue1.proc1.task', 'channel': b'pyfi.queue1.proc1.task', 'data': b'{"module": "pyfi.processors.sample", "date": "2021-09-13 19:37:20.133327", "resultkey": "celery-task-meta-b3feb181-484d-4b98-aba8-daabd07ee3d1", "message": "{\\"module\\": \\"pyfi.processors.sample\\", \\"date\\": \\"2021-09-13 19:37:20.133327\\", \\"resultkey\\": \\"celery-task-meta-b3feb181-484d-4b98-aba8-daabd07ee3d1\\", \\"message\\": \\"\\\\\\"\\\\\\\\\\\\\\"Message:Hello World!\\\\\\\\\\\\\\"\\\\\\"\\", \\"channel\\": \\"task\\", \\"room\\": \\"pyfi.queue1.proc1\\", \\"task\\": \\"do_something\\"}", "channel": "task", "room": "pyfi.queue1.proc1", "task": "do_something", "state": "postrun"}'} 269 | 270 | Running an Agent 271 | ---------------- 272 | 273 | .. code-block:: bash 274 | :caption: FLOW agent subcommand 275 | 276 | $ flow agent 277 | Usage: flow agent [OPTIONS] COMMAND [ARGS]... 278 | 279 | Run flow agent 280 | 281 | Options: 282 | --help Show this message and exit. 283 | 284 | Commands: 285 | start Run pyfi agent server 286 | 287 | 288 | .. code-block:: bash 289 | :caption: Flow agent subcommand 290 | 291 | $ flow agent start --help 292 | Usage: flow agent start [OPTIONS] 293 | 294 | Start an agent 295 | 296 | Options: 297 | -p, --port INTEGER Healthcheck port 298 | --clean Remove work directories before launch 299 | -b, --backend TEXT Message backend URI 300 | -r, --broker TEXT Message broker URI 301 | -n, --name TEXT Hostname for this agent to use 302 | -c, --config TEXT Config module.object import (e.g. 303 | path.to.module.MyConfigClass 304 | -q, --queues Run the queue monitor only 305 | -u, --user TEXT Run the worker as user 306 | -po, --pool INTEGER Process pool for message dispatches 307 | -cp, --cpus INTEGER Number of CPUs 308 | -s, --size INTEGER Maximum number of messages on worker internal 309 | queue 310 | -h, --host TEXT Remote hostname to start the agent via ssh 311 | -wp, --workerport INTEGER Healthcheck port for worker 312 | --help Show this message and exit. 313 | 314 | 315 | Roles & Users 316 | ------------- 317 | 318 | .. code-block:: bash 319 | :caption: FLOW user, role and privilege subcommands 320 | 321 | $ flow add user --help 322 | Usage: flow add user [OPTIONS] 323 | 324 | Add user object to the database 325 | 326 | Options: 327 | -n, --name TEXT [required] 328 | -e, --email TEXT [required] 329 | -p, --password TEXT [required] 330 | --help Show this message and exit. 331 | 332 | $ flow add role --help 333 | Usage: flow add role [OPTIONS] 334 | 335 | Add role object to the database 336 | 337 | Options: 338 | -n, --name TEXT [required] 339 | --help Show this message and exit. 340 | 341 | $ flow add privilege --help 342 | Usage: flow add privilege [OPTIONS] 343 | 344 | Add privilege to the database 345 | 346 | Options: 347 | -u, --user TEXT 348 | -n, --name TEXT [required] 349 | -r, --role TEXT 350 | --help Show this message and exit. 351 | 352 | 353 | .. code-block:: bash 354 | :caption: Creating a user 355 | 356 | $ flow add user 357 | Name: joe 358 | Email: joe@xyz 359 | Password: 12345 360 | CREATE USER joe WITH PASSWORD '12345' 361 | User "joe" added 362 | 363 | .. code-block:: bash 364 | :caption: Creating a role 365 | 366 | $ flow add role -n developer 367 | bc15ee9d-a208-43a9-82d2-bf0810dc4380:developer:2021-09-15 21:50:40.714192 368 | 369 | .. code-block:: bash 370 | :caption: Adding a privilege to a user 371 | 372 | $ flow add privilege -u joe -n ADD_PROCESSOR 373 | Privilege added 374 | 375 | .. code-block:: bash 376 | :caption: List a user with role_privileges 377 | 378 | $ flow ls user -n joe 379 | +------+--------------------------------------+----------+---------+ 380 | | Name | ID | Owner | Email | 381 | +------+--------------------------------------+----------+---------+ 382 | | joe | a8dcf9bb-c821-4d44-82f5-828dceb4cb23 | postgres | joe@xyz | 383 | +------+--------------------------------------+----------+---------+ 384 | Privileges 385 | +------+---------------+----------------------------+----------+ 386 | | Name | Right | Last Updated | By | 387 | +------+---------------+----------------------------+----------+ 388 | | joe | ADD_PROCESSOR | 2021-09-15 21:46:48.611286 | postgres | 389 | +------+---------------+----------------------------+----------+ 390 | 391 | .. code-block:: bash 392 | :caption: Adding a privilege to a role 393 | 394 | $ flow add privilege -r developer -n ADD_PROCESSOR 395 | Privilege added 396 | 397 | .. code-block:: bash 398 | :caption: Adding a role to a user 399 | 400 | 401 | Privileges & Rights 402 | --------------------------- 403 | 404 | A **right** is an atomic string that names a particular **privilege**. It only becomes a privilege when it's associated with a user. 405 | When it's just **a name** we call it a *right*. 406 | 407 | .. code-block:: python 408 | :caption: Available Rights 409 | 410 | rights = ['ALL', 411 | 'CREATE', 412 | 'READ', 413 | 'UPDATE', 414 | 'DELETE', 415 | 416 | 'DB_DROP', 417 | 'DB_INIT', 418 | 419 | 'START_AGENT', 420 | 421 | 'RUN_TASK', 422 | 'CANCEL_TASK', 423 | 424 | 'START_PROCESSOR', 425 | 'STOP_PROCESSOR', 426 | 'PAUSE_PROCESSOR', 427 | 'RESUME_PROCESSOR', 428 | 'LOCK_PROCESSOR', 429 | 'UNLOCK_PROCESSOR', 430 | 'VIEW_PROCESSOR', 431 | 'VIEW_PROCESSOR_CONFIG', 432 | 'VIEW_PROCESSOR_CODE', 433 | 'EDIT_PROCESSOR_CONFIG', 434 | 'EDIT_PROCESSOR_CODE' 435 | 436 | 'LS_PROCESSORS', 437 | 'LS_USERS', 438 | 'LS_USER', 439 | 'LS_PLUGS', 440 | 'LS_SOCKETS', 441 | 'LS_QUEUES', 442 | 'LS_AGENTS', 443 | 'LS_NODES', 444 | 'LS_SCHEDULERS', 445 | 'LS_WORKERS', 446 | 447 | 'ADD_PROCESSOR', 448 | 'ADD_AGENT', 449 | 'ADD_NODE', 450 | 'ADD_PLUG', 451 | 'ADD_PRIVILEGE', 452 | 'ADD_QUEUE', 453 | 'ADD_ROLE', 454 | 'ADD_SCHEDULER', 455 | 'ADD_SOCKET', 456 | 'ADD_USER', 457 | 458 | 'UPDATE_PROCESSOR', 459 | 'UPDATE_AGENT', 460 | 'UPDATE_NODE', 461 | 'UPDATE_PLUG', 462 | 'UPDATE_PRIVILEGE', 463 | 'UPDATE_QUEUE', 464 | 'UPDATE_ROLE', 465 | 'UPDATE_SCHEDULER', 466 | 'UPDATE_SOCKET', 467 | 'UPDATE_USER', 468 | 469 | 'DELETE_PROCESSOR', 470 | 'DELETE_AGENT', 471 | 'DELETE_NODE', 472 | 'DELETE_PLUG', 473 | 'DELETE_PRIVILEGE', 474 | 'DELETE_QUEUE', 475 | 'DELETE_ROLE', 476 | 'DELETE_SCHEDULER', 477 | 'DELETE_SOCKET', 478 | 'DELETE_USER', 479 | 480 | 'READ_PROCESSOR', 481 | 'READ_AGENT', 482 | 'READ_NODE', 483 | 'READ_PLUG', 484 | 'READ_PRIVILEGE', 485 | 'READ_QUEUE', 486 | 'READ_ROLE', 487 | 'READ_SCHEDULER', 488 | 'READ_SOCKET', 489 | 'READ_USER' 490 | ] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![logo](./screens/pyfi.svg) 2 | 3 | [![Documentation Status](https://readthedocs.org/projects/pyfi/badge/?version=latest)](https://pyfi.readthedocs.io/en/latest/?badge=latest) 4 | 5 | A distributed data flow and computation system that runs on transactional messaging infrastructure. PYFI is designed to operate as a NVM Networked-Virtual-Machine by implementing distributed logic over networked hardware CPU/GPU processors and is suitable for all kinds of computational tasks. 6 | 7 | The entire PYFI stack, as a whole, provides a complete "Managed Compute Platform" (MCP) with specialized, best-of-breed tooling to support [different layers](#network-layers) of concern, such as: 8 | * Hardware Compute Resources 9 | * Compute Availability & Scheduling 10 | * Distributed Flows 11 | * Python Processors 12 | * Modules & Functions 13 | * Data Logging and Streaming 14 | * Real-time & Historical Metrics 15 | 16 | With the PYFI compute environment different roles & responsibilities exist: 17 | * Hardware Admins 18 | * Infrastructure Admins 19 | * Compute Admins 20 | * Data Admins 21 | * Code Repository Owners 22 | * End Users 23 | 24 | PYFI provides the necessary tooling for all these adminstrators and users to work together smoothly across heterogenous compute environments. 25 | 26 | *NOTE: This repo will initially contain the core design documentation for PYFI but eventually I will commit the code here. Currently looking for sponsors to support the effort. If curious or interested, please reach out to me at darren@ontrenet.com.* 27 | ## Introduction 28 | PYFI differs from other dataflow engines in that it is fully distributed and runs "at-scale" across heterogeneous infrastructure and computational resources. 29 | 30 | It establishes a logical directed-graph (DG) overlay network across compute nodes and executes your custom processor scripts (python, node, bash). 31 | 32 | Using the power of reliable, transactional messaging, compute tasks are never lost, discarded or undone. Fault tolerance and load-balancing are intrinsic qualities of PYFI and not tacked on as a separate process, which itself would be a failure point. 33 | 34 | ### A True Elastic Compute Platform 35 | 36 | PYFI provides a set of interacting compute layers that control the location and execution of managed code assets. 37 | With PYFI, code modules and functions can be loaded at multiple locations and invoked from clients without knowledge of where those functions are or how those functions are called. 38 | 39 | Redundant code (processors) loaded into a PYFI network will be able to respond to higher volume of data and requests and thus can scale at will, individually. 40 | 41 | Functional tasks (processors hosting code) are fronted by durable queues that deliver reliable invocations when those functions are present on the network, regardless of their exact location. 42 | This allows the system to be resilient to hardware or network changes, as well as influence by schedulers that might change the location of functions (processors) to re-balance the resources across the network. 43 | 44 | All of this underlying management, hardware arriving and departing, services starting and stopping, processors moving from one host to another (or failing), is completely invisibile to the applications and clients using the system. To them, function calls will always, eventually be executed, if not immediately, in the near future when compute resources allow it. 45 | 46 | ### Benefits 47 | 48 | The PYFI platform provides numerous benefits: 49 | 50 | * **A single, purpose-built platform** that addresses end-to-end managed compute from the CPU to the end user. Compared to cobbled together frameworks. 51 | * **Data flow and data streaming support** 52 | * **Real-time observable data** across your compute resources 53 | * **DevOps out-of-the-box** - PYFI integrates directly with GIT allowing your existing code management practices to be used. 54 | * **Elastic, At-Scale** - PYFI is an elastic infrastructure, meaning that it scales up and down on-the-fly. Code can be moved across hardware locations at any time without data loss. 55 | * **Extensible** - PYFI is designed to be extended and specialized to your needs. Both the UI and the core platform is open and leverages modern framework design patterns to easily build on top of. 56 | 57 | ## Outline 58 | 59 | * [Introduction](#introduction) 60 | * [High Level Architecture](#high-level-architecture) 61 | * [At-Scale Design](#at-scale-design) 62 | * [Tech Stack](#tech-stack) 63 | * [Design Goals](#design-goals) 64 | * [Detailed Architecture](#detailed-architecture) 65 | * [Network Layers](#network-layers) 66 | * [Execution Stack](#execution-stack) 67 | * [Why A SQL Database?](#why-a-sql-database) 68 | * [Data Model](#data-model) 69 | * [Security Model](#security-model) 70 | * [Logical Processors](#logical-processors) 71 | * [Command Line Interface](#command-line-interface) 72 | * [System Objects](#system-objects) 73 | * [Nodes](#nodes) 74 | * [Agents](#agents) 75 | * [Processors](#processors) 76 | * [Workers](#workers) 77 | * [Tasks](#tasks) 78 | * [Building Dataflows](#building-dataflows) 79 | * [Stack Tools](#stack-tools) 80 | 81 | ## High-Level Architecture 82 | The following diagram shows one cross-section of the current *reference implementation* of PYFI. Since everything behind the database can be implemented in a variety of ways, this architecture is not absolute. 83 | 84 | ![architecture1](./screens/architecture1.png) 85 | ### At Scale Design 86 | 87 | PYFI is designed to operate "at scale", which means there is a one-to-one correspondence between logic compute units (processors) and physical compute units (CPU cores). 88 | This provides a number of obvious and inherent benefits such as hardware redundancy, high-availability, fault-tolerance, fail-over, performance and ease of maintenance. 89 | 90 | Below is a diagram that compares the scaling architecture of Apache NIFI to PYFI. Whereas, NIFI uses a course grained, flow-level scaling design, PYFI uses a fine grained, processor-level scale design. 91 | 92 | ![atscale](./screens/scaling.png) 93 | 94 | ## Tech Stack 95 | The following diagram shows the technology stack for the reference implementation. It uses entirely FOSS software that is mature, open and in most cases supported by a commercial entity. 96 | All of these components provide instant, out-of-the-box functionality that contributes to the PYFI system ensemble and have proven their usefulness in enterprise production settings. 97 | 98 | *NOTE: You are not forced to use any of these tools and can use other compatible tools, make your own, or replace the backend component entirely* 99 | 100 | ![techstack](./screens/techstack.png) 101 | ## Design Goals 102 | 103 | As the name suggests, PYFI is a spiritual offshoot of [Apache NIFI](https://nifi.apache.org/) except built using a python stack for running python (and other scripting languages) processors. 104 | However, PYFI is designed to be more broad in terms of design and scope which we will discuss below. 105 | 106 | Some important design goals for this technology are: 107 | 108 | 1. **Fault-Tolerant** - PYFI runs as a distributed network of logical compute processors that have redundancy and load-balancing built in. 109 | 2. **At-Scale** - This phrase is important. It indicates that the logical constructs (e.g. pyfi processors) run at the scale of the hardware (e.g. CPU processors), meaning there is a 1-1 correlation (physical mapping) between hardware processors and pyfi processors. 110 | 3. **Secure** - All the functional components in PYFI (database, broker, storage, cache) have security built in. 111 | 4. **Dynamic** - The topology and behavior of a PYFI network can be adjusted and administered in real-time without taking down the entire network. Because PYFI is not a single VM controlling everything, you can add/remove update components without negatively impacting the functionality of the system. 112 | 5. **Distributed** - As was mentioned above, everything in PYFI is inherently distributed, down to the processors. There is no physical centralization of any kind. 113 | 6. **Performance** - PYFI is built on mature technology stack that is capable of high-throughput message traffic. 114 | 7. **Reliability** - The distributed queue paradigm used by PYFI allows for every processor in your dataflow to consume and acknowledge message traffic from its inbound queues and write to outbound queues. These durable queues persist while processors consume messages off them. 115 | 8. **Scalability** - Processors can scale across CPUs, Machines and networks, consuming message traffic off the same or multiple persistent queues. In fact, PYFI can auto-scale processors to accommodate the swell of tasks arriving on a queue. In addition, pyfi processors will be automatically balanced across physical locations to evenly distribute computational load and reduce local resource contention. 116 | 9. **Pluggable Backends** - PYFI supports various implementations of backend components such as message (e.g. RabbitMQ, SQS) or result storage (SQL, Redis, S3) in addition to allowing you to implement an entire backend (behind the SQL database) yourself. 117 | 10. **Real-time Metrics** - PYFI processors will support real-time broadcasting of data throughput metrics via subscription web-sockets. This will allow for all kinds of custom integrations and front-end visualizations to see what the network is doing. 118 | 11. **Data Analysis** - One of the big goals for PYFI is to save important data metrics about the flows and usages so it can be mined by predictive AI models later. This will give your organization key insights into the movement patterns of data. 119 | 12. **GIT Integration** - All the code used by processors can be pulled from your own git repositories giving you instant integration into existing devops and CM processes. PYFI will let you select which repo and commit version you want a processor to execute code from in your flows. 120 | 121 | ## Detailed Architecture 122 | PFYI is a scalable, high-performance network architecture that separates concerns across layers. Each layer has best-of-breed components that manage the responsibility of that layer. 123 | The slides below show the different layers and their responsibilities, starting with the bottom-most layer. 124 | 125 | ### Managed Compute 126 | 127 | PYFI takes a different approach to staging and executing python code on its network. Other frameworks or libraries allow you to define your functions in your execution environment and serialize that code to remote workers for execution. Obviously that has some serious security implications in a *shared, managed compute environment*. So PYFI does not allow this. Rather, you request PYFI to mount your code through a secure git repostiory URL. This becomes *the contract* between you and PYFI and allows PYFI to securely load your code into its network. 128 | 129 | This approach also allows administrators to control white and blacklists for what repositories of code it trusts. 130 | 131 | #### Code Isolation 132 | 133 | Each PYFI worker that mounts a git repository, will create a virtual environment for that code and execute the repositories *setup.py* to install the code in that virtual environment. This is beneficial for a number of reasons, but most importantly it keeps the environment for the mounted code separate from the PYFI agent's python environment. 134 | ### Network Layers 135 | 136 | PYFI is a distributed, scalable architecture and as such it is relationship between connected hardware & service layers interacting as a whole. 137 | 138 | ![layer1](./screens/layer1.png) 139 | ![layer2](./screens/layer2.png) 140 | ![layer3](./screens/layer3.png) 141 | ![layer4](./screens/layer4.png) 142 | 143 | ### Execution Stack 144 | The following diagram shows the traversal of PYFI programs through various stages from the top-level compute API to its destination python function (task). 145 | 146 | ![stack](./screens/execution.png) 147 | 148 | ### Why a SQL Database? 149 | The center of the PYFI architecture is an enterprise-grade transactional database that maintains the relational models used by the PYFI network. 150 | 151 | Some of you might be asking why a SQL database is the center abstraction point of PYFI, SQL databases have been around for decades! Let me explain. 152 | 153 | There are some important enterprise qualities we want from the logical database that governs the structure and behavior of a PYFI network. 154 | 155 | * ***Constraints*** - PYFI data models should adhere to logical constraints that maintain the integrity of the network design. This prevents any errors in the data model that might cause the network to not perform. It also protects against any errors introduced by humans. 156 | * ***Transactions*** - Similar to the nature of the message/task layer, we want to provide transactional semantics to the data layer so sets of logical changes can be applied in an atomic fashion. This ensures your network is not caught in an inconsistent (or partial) state when making design changes. 157 | * ***Security*** - Row level security is built into the database and allows us to control who is able to see what without having to implement a weaker form of this at the application layer. By design, the pyfi stack captures access control semantics ***all the way down to the data**.* 158 | * ***Scaling*** - SQL databases such as Postgres have mature scaling mechanics that allow them to cluster and scale appropriately. 159 | * ***Administration*** - Mature tools exist to administer and manage SQL databases that don't need to be reinvented. 160 | 161 | Coupling the PYFI physical network from the logical model through a transactional database allows for future implementation-independence of a particular PYFI network. 162 | All the existing PYFI CLI tools that operate on the database will continue to work as is, if you choose to implement a different backend. 163 | 164 | ### Data Model 165 | 166 | The data model is the system abstraction behind which the PYFI reference implementation operates. Services monitor the data models and reflect the semantics of the data in the PYFI network state. 167 | 168 | For example, if a processor model *requested_status* is changed to "STOPPED" then the agent responsible for that processor will stop the processor and put its *status* field to "STOPPED". 169 | 170 | Simply put, the PYFI network "reacts" to the current state of the database. 171 | 172 | ![datamodel](./screens/pyfi-data-model.png) 173 | 174 | ### Security Model 175 | 176 | PYFI uses a fine grained access control model for user actions against the data model (via UI or CLI or API). At the database level this is also enforced with RLS (Row Level Security) features of Postgres (or your database of choice). 177 | It is vital to the security model of PYFI to implement access control all the way through the stack down to the row data. 178 | 179 | Using the CLI you can add and remove privileges for individual users and view their current privileges. 180 | 181 | ```python 182 | $ pyfi add privilege --user darren --name DELETE 183 | 184 | # You can only add privileges that are named in the list of rights further below. Trying to add something not in this list will result in an error. 185 | 186 | $ pyfi ls user --name darren 187 | +--------+--------------------------------------+----------+------------+ 188 | | Name | ID | Owner | Email | 189 | +--------+--------------------------------------+----------+------------+ 190 | | darren | a725b5ff-bb60-401a-a79a-7bfcb87dfc93 | postgres | d@some.com | 191 | +--------+--------------------------------------+----------+------------+ 192 | Privileges 193 | +--------+--------+----------------------------+----------+ 194 | | Name | Right | Last Updated | By | 195 | +--------+--------+----------------------------+----------+ 196 | | darren | CREATE | 2021-08-18 08:59:42.749164 | postgres | 197 | | darren | DELETE | 2021-08-19 08:36:29.922190 | postgres | 198 | +--------+--------+----------------------------+----------+ 199 | ``` 200 | 201 | Here is an initial list of the privileges ("rights") that can be assigned to a user. 202 | 203 | ```python 204 | 205 | rights = [ 'ALL', 206 | 'CREATE', 207 | 'READ', 208 | 'UPDATE', 209 | 'DELETE', 210 | 211 | 'DB_DROP', 212 | 'DB_INIT', 213 | 214 | 'START_AGENT', 215 | 216 | 'RUN_TASK', 217 | 'CANCEL_TASK', 218 | 219 | 'UPDATE_PROCESSOR', 220 | 'DELETE_PROCESSOR', 221 | 'START_PROCESSOR', 222 | 'STOP_PROCESSOR', 223 | 'PAUSE_PROCESSOR', 224 | 'RESUME_PROCESSOR', 225 | 'LOCK_PROCESSOR', 226 | 'UNLOCK_PROCESSOR', 227 | 'VIEW_PROCESSOR', 228 | 'VIEW_PROCESSOR_CONFIG', 229 | 'VIEW_PROCESSOR_CODE', 230 | 'EDIT_PROCESSOR_CONFIG', 231 | 'EDIT_PROCESSOR_CODE' 232 | 233 | 'LS_PROCESSORS', 234 | 'LS_USERS', 235 | 'LS_USER', 236 | 'LS_PLUGS', 237 | 'LS_SOCKETS', 238 | 'LS_QUEUES', 239 | 'LS_AGENTS', 240 | 'LS_NODES', 241 | 'LS_SCHEDULERS', 242 | 'LS_WORKERS', 243 | 244 | 'ADD_PROCESSOR', 245 | 'ADD_AGENT', 246 | 'ADD_NODE', 247 | 'ADD_PLUG', 248 | 'ADD_PRIVILEGE', 249 | 'ADD_QUEUE', 250 | 'ADD_ROLE', 251 | 'ADD_SCHEDULER', 252 | 'ADD_SOCKET', 253 | 'ADD_USER', 254 | 255 | 'UPDATE_PROCESSOR', 256 | 'UPDATE_AGENT', 257 | 'UPDATE_NODE', 258 | 'UPDATE_PLUG', 259 | 'UPDATE_PRIVILEGE', 260 | 'UPDATE_QUEUE', 261 | 'UPDATE_ROLE', 262 | 'UPDATE_SCHEDULER', 263 | 'UPDATE_SOCKET', 264 | 'UPDATE_USER', 265 | 266 | 'DELETE_PROCESSOR', 267 | 'DELETE_AGENT', 268 | 'DELETE_NODE', 269 | 'DELETE_PLUG', 270 | 'DELETE_PRIVILEGE', 271 | 'DELETE_QUEUE', 272 | 'DELETE_ROLE', 273 | 'DELETE_SCHEDULER', 274 | 'DELETE_SOCKET', 275 | 'DELETE_USER', 276 | 277 | 'READ_PROCESSOR', 278 | 'READ_AGENT', 279 | 'READ_NODE', 280 | 'READ_PLUG', 281 | 'READ_PRIVILEGE', 282 | 'READ_QUEUE', 283 | 'READ_ROLE', 284 | 'READ_SCHEDULER', 285 | 'READ_SOCKET', 286 | 'READ_USER' 287 | ] 288 | ``` 289 | 290 | ### Logical Processors 291 | 292 | PYFI implements the notion of a *logical processor* that acts as a one-to-one proxy to physical hardware processor cores. PYFI processors have a few interesting traits, only a handful of which are listed below. 293 | * **Reliable** - Messages (or method calls) on PYFI processors are durable and reliable, surviving faults, reboots or restarts. 294 | * **Load Balanced** - PYFI processors run natively as a cluster of distributed processes that automatically balanced invocation load. 295 | * **High Availability** - For the same reasons PYFI processors are reliable and durable, they also become highly-available 296 | * **Hardware Independent** - PYFI processors are hardware independent and also server independent. They can be relocated from one server to another at any time, even while they are running, without data loss. 297 | 298 | Because of the logical nature of PYFI processors, PYFI can offer the user a truly scalable and elastic compute paradigm with powerful visual tools. 299 | 300 | #### Introduction to Processors & Sockets 301 | 302 | PYFI processors reference python modules that are stored in specific git repositories. When PYFI launches a processor, that git repository is loaded into an isolated virtualenv for that processor. 303 | Individual functions within that module are referenced by *sockets* that are attached to the processor. 304 | 305 | The socket is the logical abstraction of an inbound task invocation, a durable queue and a python function. Client code or other processors can send data to specific processor sockets which are then dispatched to the associated python function and the result is either returned or forwarded to the next socket attached in the data flow (which is to say putting the request on its queue). 306 | 307 | #### ***Processors & Sockets Using the GUI*** 308 | 309 | ![socket1](./screens/socket1.png) 310 | ![socket2](./screens/socket2.png) 311 | #### ***Processors & Sockets Using the CLI*** 312 | 313 | ```bash 314 | pyfi add queue -n pyfi.queue1 -t direct 315 | pyfi add processor -n proc1 -g https://github.com/radiantone/pyfi-processors -m pyfi.processors.sample 316 | pyfi add socket -n proc1.socket1 -q pyfi.queue1 -pn proc1 -t do_something 317 | pyfi add socket -n proc1.socket2 -q pyfi.queue1 -pn proc1 -t do_this 318 | ``` 319 | #### ***Processors & Sockets Using the API*** 320 | 321 | ```python 322 | from pyfi.client.api import Processor, Socket, Queue 323 | 324 | # Create a processor 325 | processor = Processor(name='proc1', module='pyfi.processors.sample', branch='main', 326 | gitrepo='https://github.com/radiantone/pyfi-processors') 327 | 328 | # Create a socket for that processor 329 | do_something = Socket(name='proc1.socket1', processor=processor, queue={ 330 | 'name': 'pyfi.queue1'}, task='do_something') 331 | 332 | do_this = Socket(name='proc1.socket2', processor=processor, queue={ 333 | 'name': 'pyfi.queue1'}, task='do_this') 334 | 335 | # Send a message to a socket 336 | do_something("Hello World !") 337 | 338 | do_this("Do this!!") 339 | ``` 340 | 341 | #### Parallel Compute & Workflow API 342 | This example builds on top of the previous client API built into PYFI and let's you define a simple and flexible API based on your processor functions, so it looks like *plain-old-python*. A key goal with this API is that the design of the workflow (which is to say its structure) should be *obvious* just by looking at the code. 343 | 344 | We also introduce parallel workflow constructs such as *pipeline*, *parallel* and *funnel* here but will talk about them in more detail later on. 345 | 346 | 347 | NOTE: The functions *do_something* and *do_this* are actual (but contrived) python functions connected to a Processor via git repository. Merely used for examples. 348 | View them [here](https://github.com/radiantone/pyfi-processors/blob/main/pyfi/processors/sample.py). 349 | ```python 350 | """ Example""" 351 | from pyfi.client.api import parallel, pipeline, funnel 352 | 353 | # Function API over your processor models 354 | from pyfi.client.example.api import do_something_p as do_something, do_this_p as do_this 355 | 356 | # Durable, reliable, parallel, distributed workflows 357 | _pipeline = pipeline([ 358 | do_something("One"), 359 | do_something("Two"), 360 | parallel([ 361 | do_something("Four"), 362 | do_something("Five"), 363 | ]), 364 | do_something("Three")]) 365 | 366 | _parallel = parallel([ 367 | _pipeline, 368 | do_something("Two"), 369 | do_something("Three")]) 370 | 371 | _funnel = funnel([ 372 | do_something("One"), 373 | _parallel, 374 | do_this("Three")]) 375 | 376 | print("FUNNEL: ", _funnel(do_this("Four")).get()) 377 | ``` 378 | 379 | What's interesting about the above code is that the function calls are fully durable and reliable, meaning that they are persistent and if a particular worker fails, the task is retried elsewhere. 380 | In addition, if the compute resources are not available at the time of execution, the workflow will wait until the PYFI environment finishes executing all the code, which can occur at different times. 381 | 382 | Even if the backend were to suffer hardware failures or reboots, the above script would eventually finish and produce its result, all transparently. 383 | You get all these *qualities of service* for free in PYFI. 384 | 385 | #### Shell Based Workflows 386 | The example below shows how you can use shell pipes to create pipelines. 387 | 388 | ```bash 389 | # Create alias' for the run task commands 390 | alias pyfi.processors.sample.do_something="pyfi task run -s pyfi.processors.sample.do_something" 391 | alias pyfi.processors.sample.do_this="pyfi task run -s pyfi.processors.sample.do_this" 392 | 393 | echo "HI THERE!" | pyfi.processors.sample.do_something 394 | 395 | # Add a 'string' the output of a processor and then flow that into a different processor 396 | echo "HI THERE!" | pyfi.processors.sample.do_something | echo "$(cat -)string" | pyfi.processors.sample.do_this 397 | 398 | # Echo a string as input to two different processors and they run in parallel 399 | echo "HI THERE!" | tee -a >(pyfi.processors.sample.do_something) tee -a >(pyfi.processors.sample.do_this) 400 | ``` 401 | ## Command Line Interface 402 | 403 | One of the design goals for PYFI was to allow both Graphical and Command line User Interfaces. A CLI will open up access to various server-side automations, devops pipelines and human sysops that can interact with the PYFI network through a remote console. 404 | 405 | All constructs within PYFI can be created, deleted, updated or otherwise managed via the CLI as well as the GUI, again, adhering to the principle that architecture is logically designed and loosely coupled in ways that enable more freedom and technology independence if desired. 406 | 407 | Here is a sample script that builds a distributed flow using just the CLI 408 | 409 | ```bash 410 | 411 | pyfi add node -n node1 -h phoenix 412 | pyfi add node -n node2 -h radiant 413 | pyfi add node -n node3 -h miko 414 | pyfi add scheduler --name sched1 415 | 416 | pyfi scheduler -n sched1 add --node node1 417 | pyfi scheduler -n sched1 add --node node2 418 | pyfi scheduler -n sched1 add --node node3 419 | 420 | pyfi add processor -n proc1 -g https://github.com/radiantone/pyfi-processors -m pyfi.processors.sample -t do_something 421 | pyfi add queue -n pyfi.queue1 -t direct 422 | pyfi add queue -n pyfi.queue2 -t direct 423 | pyfi add queue -n pyfi.queue3 -t direct 424 | 425 | pyfi add outlet -n proc1.outlet1 -q pyfi.queue1 -pn proc1 426 | pyfi add plug -n plug1 -q pyfi.queue2 -pn proc1 427 | pyfi add plug -n plug3 -q pyfi.queue3 -pn proc1 428 | pyfi add processor -n proc2 -g https://github.com/radiantone/pyfi-processors -m pyfi.processors.sample -t do_this -h radiant 429 | pyfi add outlet -n proc2.outlet1 -q pyfi.queue2 -pn proc2 430 | 431 | pyfi add processor -n proc4 -g https://github.com/radiantone/pyfi-processors -m pyfi.processors.sample -t do_something -h radiant 432 | pyfi add outlet -n proc4.outlet1 -q pyfi.queue1 -pn proc4 433 | 434 | pyfi add processor -n proc3 -g https://github.com/radiantone/pyfi-processors -m pyfi.processors.sample -t do_this -h miko 435 | pyfi add outlet -n proc3.outlet1 -q pyfi.queue3 -pn proc3 436 | 437 | ``` 438 | 439 | Here are some sample help screens from the CLI. 440 | 441 | **Top level pyfi help screen** 442 | 443 | ```bash 444 | $ pyfi 445 | Usage: pyfi [OPTIONS] COMMAND [ARGS]... 446 | 447 | PYFI CLI for managing the pyfi network 448 | 449 | Options: 450 | --debug Debug switch 451 | -d, --db TEXT Database URI 452 | -i, --ini TEXT PYFI .ini configuration file 453 | -c, --config Configure pyfi 454 | --help Show this message and exit. 455 | 456 | Commands: 457 | add Add an object to the database 458 | agent Run pyfi agent 459 | api API server admin 460 | db Database operations 461 | delete Delete an object from the database 462 | ls List database objects 463 | node Node management operations 464 | proc Run or manage processors 465 | scheduler Scheduler management commands 466 | task PYFI task management 467 | update Update a database object 468 | web Web server admin 469 | ``` 470 | **Adding various objects to the PYFI network database** 471 | ```bash 472 | $ pyfi add 473 | Usage: pyfi add [OPTIONS] COMMAND [ARGS]... 474 | 475 | Add an object to the database 476 | 477 | Options: 478 | --id TEXT ID of object being added 479 | --help Show this message and exit. 480 | 481 | Commands: 482 | agent Add agent object to the database 483 | node Add node object to the database 484 | outlet Add outlet to a processor 485 | plug Add plug to a processor 486 | processor Add processor to the database 487 | queue Add queue object to the database 488 | role Add role object to the database 489 | scheduler Add scheduler object to the database 490 | user Add user object to the database 491 | ``` 492 | 493 | **Running & managing distributed processors** 494 | ```bash 495 | $ pyfi proc 496 | Usage: pyfi proc [OPTIONS] COMMAND [ARGS]... 497 | 498 | Run or manage processors 499 | 500 | Options: 501 | --id TEXT ID of processor 502 | --help Show this message and exit. 503 | 504 | Commands: 505 | pause Pause a processor 506 | remove Remove a processor 507 | restart Start a processor 508 | resume Pause a processor 509 | start Start a processor 510 | stop Stop a processor 511 | ``` 512 | 513 | **Listing objects in the database** 514 | ```bash 515 | $ pyfi ls 516 | Usage: pyfi ls [OPTIONS] COMMAND [ARGS]... 517 | 518 | List database objects 519 | 520 | Options: 521 | --help Show this message and exit. 522 | 523 | Commands: 524 | agents List agents 525 | nodes List queues 526 | outlets List outlets 527 | plugs List agents 528 | processors List processors 529 | queues List queues 530 | schedulers List queues 531 | users List users 532 | workers List workers 533 | ``` 534 | ### Advanced UI 535 | 536 | PYFI uses a custom built, modern User Interface derived from the core design of NIFI but extended in meaningful ways. You can preview the PYFI UI in the [pyfi-ui](https://github.com/radiantone/pyfi-ui) repository. 537 | 538 | **Real Time Streaming Analytics** 539 | ![screen1](./screens/pyfi7.png) 540 | 541 | **Real Time Coding** 542 | ![screen1](./screens/pyfi8.png) 543 | 544 | **Advanced Workflows with Embedded Subflows** 545 | ![screen1](./screens/screen16.png) 546 | 547 | ## Stack Tools 548 | 549 | The follow section shows screenshots of the tech stack UI tools. PYFI uses best-of-breed components in its stack and does not try to reinvent the wheel or tackle all the needs itself (being average in all of them). 550 | 551 | ### pgAdmin 552 | 553 | pgadmin is the UI for postgres. 554 | 555 | ![pgadmin](./screens/pgadmin.png) 556 | 557 | ### Portainer 558 | 559 | Manage your docker container stack 560 | 561 | ![portainer](./screens/portainer.png) 562 | 563 | ### Redis Insights 564 | 565 | Manage your cache and task results datastore 566 | 567 | ![portainer](./screens/redis.png) 568 | 569 | ### RabbitMQ Admin UI 570 | 571 | Manage your message broker and queues 572 | 573 | ![rabbitmq](./screens/rabbitmq.png) 574 | 575 | ### Flower 576 | 577 | Manage your task queues 578 | 579 | ![flower](./screens/flower.png) 580 | 581 | ### Kibana 582 | 583 | Build dashboards from your logs and long-term persistence 584 | 585 | ![kibana](./screens/kibana.png) 586 | 587 | ### Amplify 588 | 589 | Monitor your network reverse proxy (NGINX) 590 | 591 | ![amplify](./screens/amplify.png) -------------------------------------------------------------------------------- /docs/api/ORM/index.rst: -------------------------------------------------------------------------------- 1 | 2 | ORM 3 | ================================ 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | .. code-block:: python 9 | :caption: ElasticCode Python SQL Model API 10 | """ 11 | Class database model definitions 12 | """ 13 | import json 14 | from datetime import datetime 15 | from typing import Any, Optional 16 | 17 | from oso import Oso 18 | from sqlalchemy import ( 19 | Boolean, 20 | Column, 21 | DateTime, 22 | Enum, 23 | Float, 24 | ForeignKey, 25 | Integer, 26 | LargeBinary, 27 | String, 28 | Table, 29 | Text, 30 | and_, 31 | literal_column, 32 | ) 33 | from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION 34 | from sqlalchemy.ext.compiler import compiles 35 | from sqlalchemy.ext.declarative import DeclarativeMeta, declared_attr 36 | from sqlalchemy.orm import declarative_base, foreign, relationship 37 | from sqlalchemy.schema import CreateColumn 38 | 39 | Base: Any = declarative_base(name="Base") 40 | 41 | oso = Oso() 42 | 43 | 44 | @compiles(CreateColumn, "postgresql") 45 | def use_identity(element, compiler, **kw): 46 | text = compiler.visit_create_column(element, **kw) 47 | text = text.replace("SERIAL", "INT GENERATED BY DEFAULT AS IDENTITY") 48 | return text 49 | 50 | 51 | class AlchemyEncoder(json.JSONEncoder): 52 | def default(self, obj): 53 | from datetime import datetime 54 | 55 | if isinstance(obj.__class__, DeclarativeMeta): 56 | # an SQLAlchemy class 57 | fields = {} 58 | for field in [ 59 | x for x in dir(obj) if not x.startswith("_") and x != "metadata" 60 | ]: 61 | data = obj.__getattribute__(field) 62 | try: 63 | # this will fail on non-encodable values, like other classes 64 | if type(data) is datetime: 65 | data = str(data) 66 | json.dumps(data) 67 | fields[field] = data 68 | except TypeError: 69 | fields[field] = None 70 | # a json-encodable dict 71 | return fields 72 | 73 | return json.JSONEncoder.default(self, obj) 74 | 75 | 76 | class HasLogins(object): 77 | @declared_attr 78 | def logins(cls): 79 | return relationship( 80 | "LoginModel", 81 | order_by="desc(LoginModel.created)", 82 | primaryjoin=lambda: and_(foreign(LoginModel.user_id) == cls.id), 83 | lazy="select", 84 | ) 85 | 86 | 87 | class HasLogs(object): 88 | @declared_attr 89 | def logs(cls): 90 | return relationship( 91 | "LogModel", 92 | order_by="desc(LogModel.created)", 93 | primaryjoin=lambda: and_( 94 | foreign(LogModel.oid) == cls.id, 95 | LogModel.discriminator == cls.__name__, 96 | ), 97 | lazy="select", 98 | ) 99 | 100 | 101 | class BaseModel(Base): 102 | """ 103 | Docstring 104 | """ 105 | 106 | __abstract__ = True 107 | 108 | id = Column( 109 | String(40), 110 | autoincrement=False, 111 | default=literal_column("uuid_generate_v4()"), 112 | unique=True, 113 | primary_key=True, 114 | ) 115 | name = Column(String(80), unique=True, nullable=False, primary_key=True) 116 | owner = Column(String(40), default=literal_column("current_user")) 117 | 118 | status = Column(String(20), nullable=False, default="ready") 119 | requested_status = Column(String(40), default="ready") 120 | 121 | enabled = Column(Boolean) 122 | created = Column(DateTime, default=datetime.now, nullable=False) 123 | lastupdated = Column( 124 | DateTime, default=datetime.now, onupdate=datetime.now, nullable=False 125 | ) 126 | 127 | def __repr__(self): 128 | return json.dumps(self, cls=AlchemyEncoder) 129 | 130 | 131 | class LogModel(Base): 132 | """ 133 | Docstring 134 | """ 135 | 136 | __tablename__ = "log" 137 | 138 | id = Column( 139 | String(40), 140 | autoincrement=False, 141 | default=literal_column("uuid_generate_v4()"), 142 | unique=True, 143 | primary_key=True, 144 | ) 145 | 146 | user_id = Column(String, ForeignKey("users.id"), nullable=False) 147 | user = relationship("UserModel", lazy=True) 148 | 149 | public = Column(Boolean, default=False) 150 | created = Column(DateTime, default=datetime.now, nullable=False) 151 | oid = Column(String(40), primary_key=True) 152 | discriminator = Column(String(40)) 153 | text = Column(String(80), nullable=False) 154 | source = Column(String(40), nullable=False) 155 | 156 | def __repr__(self): 157 | return json.dumps(self, cls=AlchemyEncoder) 158 | 159 | 160 | rights = [ 161 | "ALL", 162 | "CREATE", 163 | "READ", 164 | "UPDATE", 165 | "DELETE", 166 | "DB_DROP", 167 | "DB_INIT", 168 | "START_AGENT", 169 | "RUN_TASK", 170 | "CANCEL_TASK", 171 | "START_PROCESSOR", 172 | "STOP_PROCESSOR", 173 | "PAUSE_PROCESSOR", 174 | "RESUME_PROCESSOR", 175 | "LOCK_PROCESSOR", 176 | "UNLOCK_PROCESSOR", 177 | "VIEW_PROCESSOR", 178 | "VIEW_PROCESSOR_CONFIG", 179 | "VIEW_PROCESSOR_CODE", 180 | "EDIT_PROCESSOR_CONFIG", 181 | "EDIT_PROCESSOR_CODE" "LS_PROCESSORS", 182 | "LS_USERS", 183 | "LS_USER", 184 | "LS_PLUGS", 185 | "LS_SOCKETS", 186 | "LS_QUEUES", 187 | "LS_AGENTS", 188 | "LS_NODES", 189 | "LS_SCHEDULERS", 190 | "LS_WORKERS", 191 | "ADD_PROCESSOR", 192 | "ADD_AGENT", 193 | "ADD_NODE", 194 | "ADD_PLUG", 195 | "ADD_PRIVILEGE", 196 | "ADD_QUEUE", 197 | "ADD_ROLE", 198 | "ADD_SCHEDULER", 199 | "ADD_SOCKET", 200 | "ADD_USER", 201 | "UPDATE_PROCESSOR", 202 | "UPDATE_AGENT", 203 | "UPDATE_NODE", 204 | "UPDATE_PLUG", 205 | "UPDATE_ROLE", 206 | "UPDATE_SCHEDULER", 207 | "UPDATE_SOCKET", 208 | "UPDATE_USER", 209 | "DELETE_PROCESSOR", 210 | "DELETE_AGENT", 211 | "DELETE_NODE", 212 | "DELETE_PLUG", 213 | "DELETE_PRIVILEGE", 214 | "DELETE_QUEUE", 215 | "DELETE_ROLE", 216 | "DELETE_SCHEDULER", 217 | "DELETE_SOCKET", 218 | "DELETE_USER", 219 | "READ_PROCESSOR", 220 | "READ_AGENT", 221 | "READ_NODE", 222 | "READ_LOG", 223 | "READ_PLUG", 224 | "READ_PRIVILEGE", 225 | "READ_QUEUE", 226 | "READ_ROLE", 227 | "READ_SCHEDULER", 228 | "READ_SOCKET", 229 | "READ_USER", 230 | ] 231 | 232 | 233 | class PrivilegeModel(BaseModel): 234 | """ 235 | Docstring 236 | """ 237 | 238 | __tablename__ = "privilege" 239 | 240 | right = Column("right", Enum(*rights, name="right")) 241 | 242 | 243 | role_privileges = Table( 244 | "role_privileges", 245 | Base.metadata, 246 | Column("role_id", ForeignKey("role.id")), 247 | Column("privilege_id", ForeignKey("privilege.id")), 248 | ) 249 | 250 | 251 | class RoleModel(BaseModel): 252 | """ 253 | Docstring 254 | """ 255 | 256 | __tablename__ = "role" 257 | 258 | privileges = relationship( 259 | "PrivilegeModel", secondary=role_privileges, lazy="subquery" 260 | ) 261 | 262 | 263 | user_privileges_revoked = Table( 264 | "user_privileges_revoked", 265 | Base.metadata, 266 | Column("user_id", ForeignKey("users.id")), 267 | Column("privilege_id", ForeignKey("privilege.id")), 268 | ) 269 | 270 | user_privileges = Table( 271 | "user_privileges", 272 | Base.metadata, 273 | Column("user_id", ForeignKey("users.id")), 274 | Column("privilege_id", ForeignKey("privilege.id")), 275 | ) 276 | 277 | user_roles = Table( 278 | "user_roles", 279 | Base.metadata, 280 | Column("user_id", ForeignKey("users.id")), 281 | Column("role_id", ForeignKey("role.id")), 282 | ) 283 | 284 | 285 | class UserModel(HasLogins, BaseModel): 286 | """ 287 | Docstring 288 | """ 289 | 290 | __tablename__ = "users" 291 | email = Column(String(120), unique=True, nullable=False) 292 | password = Column(String(60), unique=False, nullable=False) 293 | clear = Column(String(60), unique=False, nullable=False) 294 | 295 | privileges = relationship( 296 | "PrivilegeModel", secondary=user_privileges, lazy="subquery" 297 | ) 298 | 299 | revoked = relationship( 300 | "PrivilegeModel", secondary=user_privileges_revoked, lazy="subquery" 301 | ) 302 | 303 | roles = relationship("RoleModel", secondary=user_roles, lazy="subquery") 304 | 305 | 306 | socket_types = ["RESULT", "ERROR"] 307 | 308 | plug_types = ["RESULT", "ERROR"] 309 | 310 | schedule_types = ["CRON", "INTERVAL"] 311 | 312 | strategies = ["BALANCED", "EFFICIENT"] 313 | 314 | 315 | class FileModel(BaseModel): 316 | 317 | __tablename__ = "file" 318 | 319 | path = Column(String(120)) 320 | filename = Column(String(80)) 321 | collection = Column(String(80)) 322 | code = Column(Text) 323 | type = Column(String(40)) 324 | icon = Column(String(40)) 325 | versions = relationship( 326 | "VersionModel", back_populates="file", cascade="all, delete-orphan" 327 | ) 328 | 329 | 330 | flows_versions = Table( 331 | "flows_versions", 332 | Base.metadata, 333 | Column("flow_id", ForeignKey("flow.id"), primary_key=True), 334 | Column("version_id", ForeignKey("versions.id"), primary_key=True), 335 | ) 336 | 337 | 338 | class FlowModel(BaseModel): 339 | """ 340 | A flow model 341 | """ 342 | 343 | __tablename__ = "flow" 344 | 345 | # Collection of processors within this flow. A processor can reside 346 | # in multiple flows at once 347 | processors = relationship("ProcessorModel", lazy=True) 348 | 349 | # File reference for this flow. i.e. it's saved state 350 | file_id = Column(String, ForeignKey("file.id"), nullable=False) 351 | file = relationship( 352 | "FileModel", lazy=True, cascade="all, delete-orphan", single_parent=True 353 | ) 354 | 355 | # List of versions associated with this flow 356 | versions = relationship("VersionModel", secondary=flows_versions, lazy=True) 357 | 358 | 359 | class AgentModel(BaseModel): 360 | """ 361 | Docstring 362 | """ 363 | 364 | __tablename__ = "agent" 365 | hostname = Column(String(60)) 366 | cpus = Column(Integer) 367 | port = Column(Integer) 368 | pid = Column(Integer) 369 | 370 | workers = relationship( 371 | "WorkerModel", backref="agent", lazy=True, cascade="all, delete-orphan" 372 | ) 373 | 374 | node_id = Column(String(40), ForeignKey("node.id"), nullable=False) 375 | 376 | 377 | class ActionModel(BaseModel): 378 | """ 379 | Docstring 380 | """ 381 | 382 | __tablename__ = "action" 383 | params = Column(String(80)) 384 | 385 | # host, worker, processor, queue, or all 386 | target = Column(String(20), nullable=False) 387 | 388 | 389 | class WorkerModel(BaseModel): 390 | """ 391 | Docstring 392 | """ 393 | 394 | __tablename__ = "worker" 395 | backend = Column(String(40), nullable=False) 396 | broker = Column(String(40), nullable=False) 397 | concurrency = Column(Integer) 398 | process = Column(Integer) 399 | port = Column(Integer) 400 | hostname = Column(String(60)) 401 | 402 | workerdir = Column(String(256)) 403 | 404 | processor = relationship("ProcessorModel") 405 | processor_id = Column( 406 | String(40), ForeignKey("processor.id", ondelete="CASCADE"), nullable=False 407 | ) 408 | 409 | deployment_id = Column(String(40), ForeignKey("deployment.id"), nullable=True) 410 | 411 | deployment = relationship("DeploymentModel", back_populates="worker") 412 | 413 | agent_id = Column(String(40), ForeignKey("agent.id"), nullable=False) 414 | 415 | # agent = relationship("AgentModel", back_populates="worker") 416 | 417 | 418 | class ContainerModel(BaseModel): 419 | __tablename__ = "container" 420 | 421 | container_id = Column(String(80), unique=True, nullable=False) 422 | 423 | 424 | class VersionModel(Base): 425 | __tablename__ = "versions" 426 | 427 | id = Column( 428 | String(40), 429 | autoincrement=False, 430 | default=literal_column("uuid_generate_v4()"), 431 | unique=True, 432 | primary_key=True, 433 | ) 434 | name = Column(String(80), unique=False, nullable=False) 435 | file_id = Column(String, ForeignKey("file.id"), nullable=False) 436 | file = relationship( 437 | "FileModel", lazy=True, cascade="all, delete-orphan", single_parent=True 438 | ) 439 | owner = Column(String(40), default=literal_column("current_user")) 440 | flow = Column(Text, unique=False, nullable=False) 441 | 442 | version = Column( 443 | DateTime, default=datetime.now, onupdate=datetime.now, nullable=False 444 | ) 445 | 446 | 447 | class DeploymentModel(BaseModel): 448 | __tablename__ = "deployment" 449 | 450 | name = Column(String(80), unique=False, nullable=False) 451 | hostname = Column(String(80), nullable=False) 452 | cpus = Column(Integer, default=1, nullable=False) 453 | processor_id = Column(String(40), ForeignKey("processor.id"), nullable=False) 454 | 455 | worker = relationship( 456 | "WorkerModel", lazy=True, uselist=False, back_populates="deployment" 457 | ) 458 | 459 | 460 | class ProcessorModel(HasLogs, BaseModel): 461 | """ 462 | Docstring 463 | """ 464 | 465 | __tablename__ = "processor" 466 | 467 | module = Column(String(80), nullable=False) 468 | beat = Column(Boolean) 469 | gitrepo = Column(String(180)) 470 | branch = Column(String(30), default="main") 471 | commit = Column(String(50), nullable=True) 472 | gittag = Column(String(50), nullable=True) 473 | retries = Column(Integer) 474 | concurrency = Column(Integer) 475 | receipt = Column(String(80), nullable=True) 476 | ratelimit = Column(String(10), default=60) 477 | perworker = Column(Boolean, default=True) 478 | timelimit = Column(Integer) 479 | ignoreresult = Column(Boolean) 480 | serializer = Column(String(10)) 481 | backend = Column(String(80)) 482 | ackslate = Column(Boolean) 483 | trackstarted = Column(Boolean) 484 | disabled = Column(Boolean) 485 | retrydelay = Column(Integer) 486 | password = Column(Boolean) 487 | requirements = Column(Text) 488 | endpoint = Column(Text) 489 | modulepath = Column(Text) 490 | icon = Column(Text) 491 | cron = Column(Text) 492 | hasapi = Column(Boolean) 493 | uistate = Column(Text) 494 | 495 | description = Column(Text(), nullable=True, default="Some description") 496 | container_image = Column(String(60)) 497 | container_command = Column(String(180)) 498 | container_version = Column(String(20), default="latest") 499 | use_container = Column(Boolean, default=False) 500 | detached = Column(Boolean, default=False) 501 | 502 | user_id = Column(String, ForeignKey("users.id"), nullable=False) 503 | user = relationship("UserModel", backref="processor", lazy=True) 504 | 505 | flow_id = Column(String(40), ForeignKey("flow.id"), nullable=True) 506 | 507 | password = relationship("PasswordModel", lazy=True, viewonly=True) 508 | password_id = Column(String, ForeignKey("passwords.id"), nullable=True) 509 | 510 | plugs = relationship( 511 | "PlugModel", backref="processor", lazy=True, cascade="all, delete-orphan" 512 | ) 513 | 514 | deployments = relationship( 515 | "DeploymentModel", backref="processor", lazy=True, cascade="all, delete-orphan" 516 | ) 517 | 518 | sockets = relationship( 519 | "SocketModel", backref="processor", lazy=True, cascade="all, delete-orphan" 520 | ) 521 | 522 | 523 | class JobModel(Base): 524 | __tablename__ = "jobs" 525 | 526 | id = Column(String(200), primary_key=True) 527 | next_run_time = Column(DOUBLE_PRECISION) 528 | job_state = Column(LargeBinary) 529 | 530 | 531 | class PasswordModel(BaseModel): 532 | __tablename__ = "passwords" 533 | 534 | id = Column( 535 | String(40), 536 | autoincrement=False, 537 | default=literal_column("uuid_generate_v4()"), 538 | unique=True, 539 | primary_key=True, 540 | ) 541 | password = Column(String(60), nullable=False) 542 | 543 | processor = relationship("ProcessorModel", lazy=True, uselist=False) 544 | 545 | 546 | class NetworkModel(BaseModel): 547 | __tablename__ = "network" 548 | 549 | schedulers = relationship( 550 | "SchedulerModel", backref="network", lazy=True, cascade="all, delete" 551 | ) 552 | 553 | queues = relationship( 554 | "QueueModel", backref="network", lazy=True, cascade="all, delete" 555 | ) 556 | nodes = relationship( 557 | "NodeModel", backref="network", lazy=True, cascade="all, delete" 558 | ) 559 | 560 | user_id = Column(String, ForeignKey("users.id"), nullable=False) 561 | user = relationship("UserModel", lazy=True) 562 | 563 | 564 | class WorkModel(BaseModel): 565 | __tablename__ = "work" 566 | 567 | next_run_time = Column(DOUBLE_PRECISION) 568 | job_state = Column(LargeBinary) 569 | 570 | task_id = Column(String(40), ForeignKey("task.id")) 571 | task = relationship("TaskModel", single_parent=True) 572 | 573 | 574 | calls_events = Table( 575 | "calls_events", 576 | Base.metadata, 577 | Column("call_id", ForeignKey("call.id"), primary_key=True), 578 | Column("event_id", ForeignKey("event.id"), primary_key=True), 579 | ) 580 | 581 | 582 | class CallModel(BaseModel): 583 | """ 584 | Docstring 585 | """ 586 | 587 | __tablename__ = "call" 588 | 589 | name = Column(String(80), unique=False, nullable=False) 590 | state = Column(String(10)) 591 | parent = Column(String(80), nullable=True) 592 | taskparent = Column(String(80), nullable=True) 593 | resultid = Column(String(80)) 594 | celeryid = Column(String(80)) 595 | tracking = Column(String(80)) 596 | argument = Column(String(40)) 597 | 598 | task_id = Column(String(40), ForeignKey("task.id"), nullable=False) 599 | started = Column(DateTime, default=datetime.now, nullable=False) 600 | finished = Column(DateTime) 601 | 602 | socket_id = Column(String(40), ForeignKey("socket.id"), nullable=False) 603 | socket = relationship( 604 | "SocketModel", back_populates="call", lazy=True, uselist=False 605 | ) 606 | 607 | events = relationship( 608 | "EventModel", secondary=calls_events, lazy=True, cascade="all, delete" 609 | ) 610 | 611 | 612 | class SchedulerModel(BaseModel): 613 | """ 614 | Docstring 615 | """ 616 | 617 | __tablename__ = "scheduler" 618 | 619 | nodes = relationship("NodeModel", backref="scheduler", lazy=True) 620 | strategy = Column("strategy", Enum(*strategies, name="strategies")) 621 | 622 | network_id = Column(String(40), ForeignKey("network.id")) 623 | 624 | 625 | class SettingsModel(BaseModel): 626 | """ 627 | Docstring 628 | """ 629 | 630 | __tablename__ = "settings" 631 | value = Column(String(80), nullable=False) 632 | 633 | 634 | class NodeModel(BaseModel): 635 | """ 636 | Docstring 637 | """ 638 | 639 | __tablename__ = "node" 640 | hostname = Column(String(60)) 641 | scheduler_id = Column(String(40), ForeignKey("scheduler.id"), nullable=True) 642 | 643 | memsize = Column(String(60), default="NaN") 644 | freemem = Column(String(60), default="NaN") 645 | memused = Column(Float, default=0) 646 | 647 | disksize = Column(String(60), default="NaN") 648 | diskusage = Column(String(60), default="NaN") 649 | cpus = Column(Integer, default=0) 650 | cpuload = Column(Float, default=0) 651 | 652 | network_id = Column(String(40), ForeignKey("network.id")) 653 | 654 | agent = relationship( 655 | "AgentModel", backref="node", uselist=False, cascade="all, delete-orphan" 656 | ) 657 | 658 | 659 | plugs_arguments = Table( 660 | "plugs_arguments", 661 | Base.metadata, 662 | Column("plug_id", ForeignKey("plug.id"), primary_key=True), 663 | Column("argument_id", ForeignKey("argument.id"), primary_key=True), 664 | ) 665 | 666 | 667 | class ArgumentModel(BaseModel): 668 | __tablename__ = "argument" 669 | 670 | name = Column(String(60), nullable=False) 671 | position = Column(Integer, default=0) 672 | kind = Column(Integer) 673 | 674 | task_id = Column(String(40), ForeignKey("task.id")) 675 | 676 | user_id = Column(String, ForeignKey("users.id"), nullable=False) 677 | user = relationship("UserModel", lazy=True) 678 | plugs = relationship("PlugModel", backref="argument") 679 | 680 | 681 | class TaskModel(BaseModel): 682 | """ 683 | Docstring 684 | """ 685 | 686 | __tablename__ = "task" 687 | 688 | module = Column(String(120), nullable=False, primary_key=True) 689 | gitrepo = Column(String(180), nullable=False, primary_key=True) 690 | """ 691 | Tasks can also be mixed-in to the module loaded by the processor as new functions 692 | using the code field, which must contain a function 693 | """ 694 | mixin = Column(Boolean, default=False) 695 | 696 | source = Column(Text) # Repo module function code 697 | code = Column(Text) # Source code override for task 698 | 699 | sockets = relationship("SocketModel", back_populates="task") 700 | 701 | arguments = relationship("ArgumentModel", backref="task") 702 | 703 | 704 | class EventModel(BaseModel): 705 | """ 706 | Events are linked to call objects: received, prerun, postrun 707 | """ 708 | 709 | __tablename__ = "event" 710 | note = Column(String(80), nullable=False) 711 | name = Column(String(80), nullable=False) 712 | 713 | call_id = Column(String(40), ForeignKey("call.id")) 714 | call = relationship( 715 | "CallModel", 716 | back_populates="events", 717 | single_parent=True, 718 | cascade="all, delete-orphan", 719 | ) 720 | 721 | 722 | sockets_queues = Table( 723 | "sockets_queues", 724 | Base.metadata, 725 | Column("socket_id", ForeignKey("socket.id")), 726 | Column("queue_id", ForeignKey("queue.id")), 727 | ) 728 | 729 | plugs_source_sockets = Table( 730 | "plugs_source_sockets", 731 | Base.metadata, 732 | Column("plug_id", ForeignKey("plug.id"), primary_key=True), 733 | Column("socket_id", ForeignKey("socket.id"), primary_key=True), 734 | ) 735 | plugs_target_sockets = Table( 736 | "plugs_target_sockets", 737 | Base.metadata, 738 | Column("plug_id", ForeignKey("plug.id"), primary_key=True), 739 | Column("socket_id", ForeignKey("socket.id"), primary_key=True), 740 | ) 741 | 742 | 743 | class GateModel(BaseModel): 744 | __tablename__ = "gate" 745 | 746 | open = Column(Boolean) 747 | task_id = Column(String(40), ForeignKey("task.id")) 748 | 749 | 750 | class SocketModel(BaseModel): 751 | """ 752 | Docstring 753 | """ 754 | 755 | __tablename__ = "socket" 756 | processor_id = Column(String(40), ForeignKey("processor.id"), nullable=False) 757 | 758 | schedule_type = Column("schedule_type", Enum(*schedule_types, name="schedule_type")) 759 | 760 | scheduled = Column(Boolean) 761 | cron = Column(String(20)) 762 | 763 | description = Column(Text(), nullable=True, default="Some description") 764 | interval = Column(Integer) 765 | task_id = Column(String(40), ForeignKey("task.id")) 766 | task = relationship( 767 | "TaskModel", 768 | back_populates="sockets", 769 | single_parent=True, 770 | cascade="delete, delete-orphan", 771 | ) 772 | 773 | user_id = Column(String, ForeignKey("users.id"), nullable=False) 774 | user = relationship("UserModel", lazy=True) 775 | 776 | # Wait for all sourceplugs to deliver their data before invoking the task 777 | wait = Column(Boolean, default=False) 778 | 779 | sourceplugs = relationship("PlugModel", secondary=plugs_source_sockets) 780 | 781 | targetplugs = relationship("PlugModel", secondary=plugs_target_sockets) 782 | 783 | queue = relationship("QueueModel", secondary=sockets_queues, uselist=False) 784 | 785 | call = relationship( 786 | "CallModel", back_populates="socket", cascade="all, delete-orphan" 787 | ) 788 | 789 | 790 | plugs_queues = Table( 791 | "plugs_queues", 792 | Base.metadata, 793 | Column("plug_id", ForeignKey("plug.id")), 794 | Column("queue_id", ForeignKey("queue.id")), 795 | ) 796 | 797 | 798 | class PlugModel(BaseModel): 799 | """ 800 | Docstring 801 | """ 802 | 803 | __tablename__ = "plug" 804 | 805 | type = Column("type", Enum(*plug_types, name="plug_type"), default="RESULT") 806 | 807 | processor_id = Column(String(40), ForeignKey("processor.id"), nullable=False) 808 | 809 | source = relationship( 810 | "SocketModel", 811 | back_populates="sourceplugs", 812 | secondary=plugs_source_sockets, 813 | uselist=False, 814 | ) 815 | 816 | target = relationship( 817 | "SocketModel", 818 | back_populates="targetplugs", 819 | secondary=plugs_target_sockets, 820 | uselist=False, 821 | ) 822 | argument_id = Column(String, ForeignKey("argument.id")) 823 | 824 | user_id = Column(String, ForeignKey("users.id"), nullable=False) 825 | user = relationship("UserModel", lazy=True) 826 | 827 | description = Column(Text(), nullable=True, default="Some description") 828 | queue = relationship("QueueModel", secondary=plugs_queues, uselist=False) 829 | 830 | 831 | class QueueModel(BaseModel): 832 | """ 833 | Docstring 834 | """ 835 | 836 | __tablename__ = "queue" 837 | qtype = Column(String(20), nullable=False, default="direct") 838 | durable = Column(Boolean, default=True) 839 | reliable = Column(Boolean, default=True) 840 | auto_delete = Column(Boolean, default=True) 841 | max_length = Column(Integer, default=-1) 842 | max_length_bytes = Column(Integer, default=-1) 843 | message_ttl = Column(Integer, default=3000) 844 | expires = Column(Integer, default=3000) 845 | 846 | network_id = Column(String(40), ForeignKey("network.id")) 847 | 848 | 849 | class LoginModel(Base): 850 | __tablename__ = "login" 851 | 852 | id = Column( 853 | String(40), 854 | autoincrement=False, 855 | default=literal_column("uuid_generate_v4()"), 856 | unique=True, 857 | primary_key=True, 858 | ) 859 | owner = Column(String(40), default=literal_column("current_user")) 860 | 861 | created = Column(DateTime, default=datetime.now, nullable=False) 862 | lastupdated = Column( 863 | DateTime, default=datetime.now, onupdate=datetime.now, nullable=False 864 | ) 865 | login = Column(DateTime, default=datetime.now, nullable=False) 866 | token = Column( 867 | String(40), 868 | autoincrement=False, 869 | default=literal_column("uuid_generate_v4()"), 870 | unique=True, 871 | primary_key=True, 872 | ) 873 | 874 | user_id = Column(String, ForeignKey("users.id"), nullable=False) 875 | user = relationship("UserModel", lazy=True, overlaps="logins") 876 | --------------------------------------------------------------------------------