├── tests ├── __init__.py ├── utils │ ├── __init__.py │ ├── data.py │ ├── pod.jinja2 │ └── cwl.py ├── data │ ├── cwl │ │ └── example │ │ │ ├── config.yml │ │ │ ├── streamflow.yml │ │ │ └── main.cwl │ ├── sqlite │ │ └── sqlite.db │ └── deployment │ │ ├── docker-compose │ │ └── docker-compose.yml │ │ └── slurm │ │ └── docker-compose.yml ├── cwl-conformance │ ├── streamflow-docker.yml │ ├── streamflow-kubernetes.yml │ ├── streamflow-singularity.yml │ └── conftest.py └── test_database.py ├── streamflow ├── core │ ├── __init__.py │ ├── exception.py │ ├── provenance.py │ ├── context.py │ └── data.py ├── cwl │ ├── __init__.py │ ├── antlr │ │ └── __init__.py │ ├── requirement │ │ ├── __init__.py │ │ └── docker │ │ │ ├── schemas │ │ │ ├── no-container.json │ │ │ ├── docker.json │ │ │ ├── singularity.json │ │ │ ├── kubernetes.json │ │ │ └── kubernetes.jinja2 │ │ │ ├── __init__.py │ │ │ ├── nocontainer.py │ │ │ └── translator.py │ └── token.py ├── ext │ └── __init__.py ├── workflow │ ├── __init__.py │ ├── transformer.py │ └── port.py ├── recovery │ ├── policy │ │ └── __init__.py │ ├── schemas │ │ ├── dummy_failure_manager.json │ │ ├── dummy_checkpoint_manager.json │ │ ├── default_checkpoint_manager.json │ │ └── default_failure_manager.json │ ├── __init__.py │ └── checkpoint_manager.py ├── version.py ├── __init__.py ├── __main__.py ├── data │ ├── schemas │ │ └── data_manager.json │ └── __init__.py ├── deployment │ ├── filter │ │ ├── schemas │ │ │ └── shuffle.json │ │ ├── __init__.py │ │ └── shuffle.py │ ├── connector │ │ ├── schemas │ │ │ ├── ssh.json │ │ │ ├── docker.json │ │ │ ├── local.json │ │ │ ├── singularity.json │ │ │ ├── kubernetes.json │ │ │ ├── occam.json │ │ │ └── base │ │ │ │ └── kubernetes.json │ │ └── __init__.py │ ├── schemas │ │ └── deployment_manager.json │ ├── __init__.py │ ├── template.py │ └── stream.py ├── scheduling │ ├── policy │ │ ├── schemas │ │ │ └── data_locality.json │ │ ├── __init__.py │ │ └── data_locality.py │ ├── __init__.py │ └── schemas │ │ └── scheduler.json ├── persistence │ ├── __init__.py │ ├── schemas │ │ ├── sqlite.json │ │ └── sqlite.sql │ ├── utils.py │ └── base.py ├── provenance │ └── __init__.py ├── config │ ├── __init__.py │ ├── validator.py │ └── schema.py └── report.py ├── MANIFEST.in ├── examples ├── failure │ ├── cwl │ │ ├── data │ │ │ ├── extra_nums │ │ │ │ ├── extra0.txt │ │ │ │ ├── extra2.txt │ │ │ │ └── extra1.txt │ │ │ └── num_file.txt │ │ ├── config.yml │ │ ├── clt │ │ │ ├── find.cwl │ │ │ ├── combit.cwl │ │ │ ├── openit.cwl │ │ │ ├── scatter.cwl │ │ │ └── sumit.cwl │ │ ├── master.cwl │ │ └── main.cwl │ ├── environment │ │ └── helm │ │ │ └── failure │ │ │ ├── Chart.yaml │ │ │ ├── values.yaml │ │ │ ├── _helmignore │ │ │ └── templates │ │ │ ├── deployment.yaml │ │ │ └── _helpers.tpl │ └── streamflow.yml ├── flux │ ├── cwl │ │ ├── config.yml │ │ ├── clt │ │ │ ├── execute.cwl │ │ │ └── compile.cwl │ │ ├── main.cwl │ │ └── data │ │ │ └── cs.cxx │ ├── streamflow.yml │ ├── Dockerfile │ └── README.md ├── mpi │ ├── cwl │ │ ├── config.yml │ │ ├── clt │ │ │ ├── compile.cwl │ │ │ └── execute.cwl │ │ ├── main.cwl │ │ └── data │ │ │ └── cs.cxx │ ├── environment │ │ ├── helm │ │ │ └── openmpi │ │ │ │ ├── Chart.yaml │ │ │ │ ├── templates │ │ │ │ ├── secrets.yaml │ │ │ │ ├── deployment.yaml │ │ │ │ └── _helpers.tpl │ │ │ │ └── _helmignore │ │ ├── docker-compose │ │ │ ├── docker-compose.yml │ │ │ ├── id_rsa.pub │ │ │ └── id_rsa │ │ └── k8s │ │ │ └── deployment.yaml │ └── streamflow.yml └── munipack │ ├── environment │ ├── helm │ │ └── stacking │ │ │ ├── Chart.yaml │ │ │ ├── values.yaml │ │ │ ├── _helmignore │ │ │ └── templates │ │ │ ├── deployment.yaml │ │ │ └── _helpers.tpl │ └── occam │ │ └── occamfile.yml │ ├── cwl │ ├── clt │ │ ├── find.cwl │ │ ├── find_star.cwl │ │ ├── aperture_photometry.cwl │ │ ├── combine.cwl │ │ ├── cone_search.cwl │ │ ├── flattening.cwl │ │ ├── astrometry.cwl │ │ ├── dark.cwl │ │ ├── photo_correction.cwl │ │ ├── flat.cwl │ │ └── scatter.cwl │ ├── config.yml │ ├── master.cwl │ ├── download_data.sh │ ├── pre_correction.cwl │ └── stack.cwl │ └── streamflow.yml ├── docs ├── logo.png ├── source │ ├── images │ │ ├── streamflow-model.png │ │ └── streamflow_logo.png │ ├── ext │ │ ├── fault-tolerance.rst │ │ ├── binding-filter.rst │ │ ├── cwl-docker-translator.rst │ │ └── deployment-manager.rst │ ├── cwl │ │ ├── docker │ │ │ ├── docker.rst │ │ │ ├── kubernetes.rst │ │ │ ├── singularity.rst │ │ │ └── no-container.rst │ │ ├── cwl-runner.rst │ │ └── docker-requirement.rst │ ├── connector │ │ ├── container.rst │ │ ├── helm3.rst │ │ ├── docker.rst │ │ ├── kubernetes.rst │ │ ├── singularity.rst │ │ ├── docker-compose.rst │ │ ├── ssh.rst │ │ ├── queue-manager.rst │ │ ├── occam.rst │ │ ├── flux.rst │ │ ├── pbs.rst │ │ └── slurm.rst │ ├── _static │ │ └── theme_overrides.css │ ├── guide │ │ ├── install.rst │ │ ├── deployments.rst │ │ ├── cwl.rst │ │ ├── architecture.rst │ │ └── inspect.rst │ ├── advanced │ │ ├── multiple-targets.rst │ │ ├── port-targets.rst │ │ └── stacked-locations.rst │ └── index.rst ├── make.bat ├── Makefile └── .wci.yml ├── .flake8 ├── codecov.yml ├── .github ├── kind │ └── config.yaml ├── dependabot.yml ├── codeql │ └── config.yml └── workflows │ └── release.yml ├── helm └── chart │ ├── Chart.yaml │ ├── templates │ ├── NOTES.txt │ ├── serviceaccount.yaml │ ├── job.yaml │ └── _helpers.tpl │ ├── .helmignore │ └── values.yaml ├── antlr4-generator.sh ├── Makefile ├── tox.ini ├── CITATION.cff ├── Dockerfile ├── .dockerignore ├── .gitignore └── cwl-conformance-test.sh /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /streamflow/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /streamflow/cwl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /streamflow/ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /streamflow/cwl/antlr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /streamflow/workflow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /streamflow/cwl/requirement/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /streamflow/recovery/policy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md -------------------------------------------------------------------------------- /examples/failure/cwl/data/extra_nums/extra0.txt: -------------------------------------------------------------------------------- 1 | 0 2 | -------------------------------------------------------------------------------- /examples/failure/cwl/data/extra_nums/extra2.txt: -------------------------------------------------------------------------------- 1 | 2 2 | -------------------------------------------------------------------------------- /streamflow/version.py: -------------------------------------------------------------------------------- 1 | VERSION = "0.2.0.dev13" 2 | -------------------------------------------------------------------------------- /examples/failure/cwl/data/extra_nums/extra1.txt: -------------------------------------------------------------------------------- 1 | 1 2 | -------------------------------------------------------------------------------- /tests/data/cwl/example/config.yml: -------------------------------------------------------------------------------- 1 | message: Hello World! 2 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-unito/streamflow/HEAD/docs/logo.png -------------------------------------------------------------------------------- /streamflow/__init__.py: -------------------------------------------------------------------------------- 1 | from streamflow.version import VERSION 2 | 3 | __version__ = VERSION 4 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = streamflow/cwl/antlr 3 | max-line-length = 88 4 | extend-ignore = E203,E501 -------------------------------------------------------------------------------- /examples/flux/cwl/config.yml: -------------------------------------------------------------------------------- 1 | source_file: 2 | class: File 3 | path: data/cs.cxx 4 | num_processes: 2 5 | -------------------------------------------------------------------------------- /examples/mpi/cwl/config.yml: -------------------------------------------------------------------------------- 1 | source_file: 2 | class: File 3 | path: data/cs.cxx 4 | num_processes: 2 5 | -------------------------------------------------------------------------------- /tests/data/sqlite/sqlite.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-unito/streamflow/HEAD/tests/data/sqlite/sqlite.db -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | require_ci_to_pass: true 3 | coverage: 4 | status: 5 | project: off 6 | patch: off 7 | -------------------------------------------------------------------------------- /docs/source/images/streamflow-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-unito/streamflow/HEAD/docs/source/images/streamflow-model.png -------------------------------------------------------------------------------- /docs/source/images/streamflow_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-unito/streamflow/HEAD/docs/source/images/streamflow_logo.png -------------------------------------------------------------------------------- /tests/cwl-conformance/streamflow-docker.yml: -------------------------------------------------------------------------------- 1 | version: v1.0 2 | database: 3 | type: default 4 | config: 5 | connection: ":memory:" -------------------------------------------------------------------------------- /examples/failure/cwl/data/num_file.txt: -------------------------------------------------------------------------------- 1 | 100 43 2 | 101 3 | 102 342 3242 45 56 7 987 4 | 103 5 | 3241 6 | 321 4342 4245 7 | 333 4 8 | 103 343 434 9 | 104 -------------------------------------------------------------------------------- /tests/data/deployment/docker-compose/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | services: 3 | alpine: 4 | image: alpine:3.16.2 5 | stdin_open: true -------------------------------------------------------------------------------- /.github/kind/config.yaml: -------------------------------------------------------------------------------- 1 | kind: Cluster 2 | apiVersion: kind.x-k8s.io/v1alpha4 3 | networking: 4 | disableDefaultCNI: true 5 | podSubnet: 192.168.0.0/16 -------------------------------------------------------------------------------- /streamflow/__main__.py: -------------------------------------------------------------------------------- 1 | """Default entrypoint for the streamflow module.""" 2 | 3 | import sys 4 | 5 | from streamflow import main 6 | 7 | main.main(sys.argv[1:]) 8 | -------------------------------------------------------------------------------- /helm/chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: streamflow 3 | description: A Helm chart for StreamFlow 4 | type: application 5 | version: 0.2.0 6 | appVersion: latest 7 | -------------------------------------------------------------------------------- /examples/munipack/environment/helm/stacking/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for stacking 4 | name: stacking 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /examples/mpi/environment/helm/openmpi/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for OpenMPI executions 4 | name: openmpi 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /tests/data/cwl/example/streamflow.yml: -------------------------------------------------------------------------------- 1 | version: v1.0 2 | workflows: 3 | extract-and-compile: 4 | type: cwl 5 | config: 6 | file: main.cwl 7 | settings: config.yml 8 | -------------------------------------------------------------------------------- /examples/failure/cwl/config.yml: -------------------------------------------------------------------------------- 1 | num_file: 2 | class: File 3 | path: data/num_file.txt 4 | 5 | main_d: 6 | class: Directory 7 | path: data/extra_nums 8 | 9 | worker_n: 3 10 | -------------------------------------------------------------------------------- /examples/failure/environment/helm/failure/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart to simulate failures in workflow steps 4 | name: failure 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /docs/source/ext/fault-tolerance.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Fault tolerance 3 | =============== 4 | 5 | CheckpointManager 6 | ================= 7 | 8 | WIP 9 | 10 | FailureManager 11 | ============== 12 | 13 | WIP -------------------------------------------------------------------------------- /examples/munipack/environment/occam/occamfile.yml: -------------------------------------------------------------------------------- 1 | debian: 2 | image: icolonne/streamflow-tests:munipack 3 | command: ["tail", "-f", "/dev/null"] 4 | nodes: 5 | - node22 6 | workdir: /archive/home/icolonne/munipack 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | - package-ecosystem: "uv" 8 | directory: "/" 9 | schedule: 10 | interval: "daily" -------------------------------------------------------------------------------- /streamflow/data/schemas/data_manager.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/data/data_manager.json", 4 | "type": "object", 5 | "properties": {}, 6 | "additionalProperties": false 7 | } -------------------------------------------------------------------------------- /tests/utils/data.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import tests 4 | 5 | 6 | def get_data_path(*args: str) -> Path: 7 | path = Path(tests.__file__).parent.joinpath("data") 8 | for arg in args: 9 | path = path.joinpath(arg) 10 | return path 11 | -------------------------------------------------------------------------------- /examples/failure/environment/helm/failure/values.yaml: -------------------------------------------------------------------------------- 1 | # Declare variables to be passed into your templates. 2 | 3 | replicaCount: 2 4 | 5 | image: 6 | repository: mul8/sf-failure 7 | tag: latest 8 | pullPolicy: Always 9 | 10 | nameOverride: "" 11 | fullnameOverride: "" 12 | -------------------------------------------------------------------------------- /streamflow/deployment/filter/schemas/shuffle.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/deployment/filter/shuffle.json", 4 | "type": "object", 5 | "properties": {}, 6 | "additionalProperties": false 7 | } -------------------------------------------------------------------------------- /examples/munipack/environment/helm/stacking/values.yaml: -------------------------------------------------------------------------------- 1 | # Declare variables to be passed into your templates. 2 | 3 | replicaCount: 2 4 | 5 | image: 6 | repository: gaet2/munipack-test 7 | tag: latest 8 | pullPolicy: Always 9 | 10 | nameOverride: "" 11 | fullnameOverride: "" 12 | -------------------------------------------------------------------------------- /streamflow/deployment/connector/schemas/ssh.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/deployment/connector/ssh.json", 4 | "type": "object", 5 | "$ref": "base/ssh.json", 6 | "unevaluatedProperties": false 7 | } -------------------------------------------------------------------------------- /streamflow/scheduling/policy/schemas/data_locality.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/scheduling/data_locality.json", 4 | "type": "object", 5 | "properties": {}, 6 | "additionalProperties": false 7 | } -------------------------------------------------------------------------------- /streamflow/deployment/schemas/deployment_manager.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/deployment/depoloyment_manager.json", 4 | "type": "object", 5 | "properties": {}, 6 | "additionalProperties": false 7 | } -------------------------------------------------------------------------------- /streamflow/recovery/schemas/dummy_failure_manager.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/recovery/dummy_failure_manager.json", 4 | "type": "object", 5 | "properties": {}, 6 | "additionalProperties": false 7 | } -------------------------------------------------------------------------------- /streamflow/recovery/schemas/dummy_checkpoint_manager.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/recovery/dummy_checkpoint_manager.json", 4 | "type": "object", 5 | "properties": {}, 6 | "additionalProperties": false 7 | } -------------------------------------------------------------------------------- /streamflow/cwl/requirement/docker/schemas/no-container.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/cwl/requirement/docker/no-container.json", 4 | "type": "object", 5 | "properties": {}, 6 | "additionalProperties": false 7 | } -------------------------------------------------------------------------------- /tests/data/cwl/example/main.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.2 2 | class: ExpressionTool 3 | 4 | requirements: 5 | InlineJavascriptRequirement: {} 6 | 7 | inputs: 8 | message: string 9 | outputs: 10 | uppercase_message: string 11 | 12 | expression: | 13 | ${ return {"uppercase_message": inputs.message.toUpperCase()}; } -------------------------------------------------------------------------------- /streamflow/scheduling/policy/__init__.py: -------------------------------------------------------------------------------- 1 | from collections.abc import MutableMapping 2 | 3 | from streamflow.core.scheduling import Policy 4 | from streamflow.scheduling.policy.data_locality import DataLocalityPolicy 5 | 6 | policy_classes: MutableMapping[str, type[Policy]] = { 7 | "data_locality": DataLocalityPolicy 8 | } 9 | -------------------------------------------------------------------------------- /tests/cwl-conformance/streamflow-kubernetes.yml: -------------------------------------------------------------------------------- 1 | version: v1.0 2 | workflows: 3 | conformance-test: 4 | config: 5 | docker: 6 | - step: / 7 | deployment: 8 | type: kubernetes 9 | config: {} 10 | database: 11 | type: default 12 | config: 13 | connection: ":memory:" -------------------------------------------------------------------------------- /tests/cwl-conformance/streamflow-singularity.yml: -------------------------------------------------------------------------------- 1 | version: v1.0 2 | workflows: 3 | conformance-test: 4 | config: 5 | docker: 6 | - step: / 7 | deployment: 8 | type: singularity 9 | config: {} 10 | database: 11 | type: default 12 | config: 13 | connection: ":memory:" -------------------------------------------------------------------------------- /streamflow/cwl/requirement/docker/schemas/docker.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/cwl/requirement/docker/docker.json", 4 | "type": "object", 5 | "$ref": "/schemas/deployment/connector/base/docker.json", 6 | "unevaluatedProperties": false 7 | } -------------------------------------------------------------------------------- /streamflow/scheduling/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import MutableMapping 4 | 5 | from streamflow.core.scheduling import Scheduler 6 | from streamflow.scheduling.scheduler import DefaultScheduler 7 | 8 | scheduler_classes: MutableMapping[str, type[Scheduler]] = {"default": DefaultScheduler} 9 | -------------------------------------------------------------------------------- /streamflow/cwl/requirement/docker/schemas/singularity.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/cwl/requirement/docker/singularity.json", 4 | "type": "object", 5 | "$ref": "/schemas/deployment/connector/base/singularity.json", 6 | "unevaluatedProperties": false 7 | } -------------------------------------------------------------------------------- /streamflow/data/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import MutableMapping 4 | 5 | from streamflow.core.data import DataManager 6 | from streamflow.data.manager import DefaultDataManager 7 | 8 | data_manager_classes: MutableMapping[str, type[DataManager]] = { 9 | "default": DefaultDataManager 10 | } 11 | -------------------------------------------------------------------------------- /helm/chart/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 1. Get the application URL by running these commands: 2 | export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "streamflow.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") 3 | kubectl --namespace {{ .Release.Namespace }} exec -it $POD_NAME sh 4 | -------------------------------------------------------------------------------- /streamflow/persistence/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import MutableMapping 4 | 5 | from streamflow.core.persistence import Database 6 | from streamflow.persistence.sqlite import SqliteDatabase 7 | 8 | database_classes: MutableMapping[str, type[Database]] = { 9 | "default": SqliteDatabase, 10 | "sqlite": SqliteDatabase, 11 | } 12 | -------------------------------------------------------------------------------- /streamflow/deployment/filter/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import MutableMapping 4 | 5 | from streamflow.core.deployment import BindingFilter 6 | from streamflow.deployment.filter.shuffle import ShuffleBindingFilter 7 | 8 | binding_filter_classes: MutableMapping[str, type[BindingFilter]] = { 9 | "shuffle": ShuffleBindingFilter 10 | } 11 | -------------------------------------------------------------------------------- /streamflow/deployment/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import MutableMapping 4 | 5 | from streamflow.core.deployment import DeploymentManager 6 | from streamflow.deployment.manager import DefaultDeploymentManager 7 | 8 | deployment_manager_classes: MutableMapping[str, type[DeploymentManager]] = { 9 | "default": DefaultDeploymentManager 10 | } 11 | -------------------------------------------------------------------------------- /tests/utils/pod.jinja2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: {{ name }} 5 | labels: 6 | name: {{ name }} 7 | spec: 8 | containers: 9 | - name: sf-test 10 | image: alpine:3.16.2 11 | stdin: true 12 | volumeMounts: 13 | - mountPath: /tmp/streamflow 14 | name: sf-volume 15 | volumes: 16 | - name: sf-volume 17 | emptyDir: {} 18 | -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/find.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | baseCommand: ["ls", "-la"] 4 | 5 | inputs: 6 | main_directory: 7 | type: Directory 8 | loadListing: shallow_listing 9 | inputBinding: 10 | position: 1 11 | 12 | outputs: 13 | files_array: 14 | type: File[] 15 | outputBinding: 16 | outputEval: '$(inputs.main_directory.listing)' 17 | 18 | -------------------------------------------------------------------------------- /examples/failure/cwl/clt/find.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | baseCommand: [ "ls", "-la" ] 4 | 5 | inputs: 6 | main_directory: 7 | type: Directory 8 | loadListing: shallow_listing 9 | inputBinding: 10 | position: 1 11 | 12 | outputs: 13 | files_array: 14 | type: File[] 15 | outputBinding: 16 | outputEval: '$(inputs.main_directory.listing)' 17 | 18 | -------------------------------------------------------------------------------- /helm/chart/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "streamflow.serviceAccountName" . }} 6 | labels: 7 | {{- include "streamflow.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | {{- end -}} 13 | -------------------------------------------------------------------------------- /examples/mpi/environment/helm/openmpi/templates/secrets.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: {{ .Release.Name }}-ssh-key 5 | labels: 6 | {{ include "openmpi.labels" . | indent 4 }} 7 | data: 8 | id_rsa: {{ .Values.sshKey.id_rsa | b64enc | quote }} 9 | id_rsa.pub: {{ .Values.sshKey.id_rsa_pub | b64enc | quote }} 10 | authorized_keys: {{ .Values.sshKey.id_rsa_pub | b64enc | quote }} 11 | -------------------------------------------------------------------------------- /streamflow/provenance/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import MutableMapping 4 | 5 | from streamflow.core.provenance import ProvenanceManager 6 | from streamflow.provenance.run_crate import CWLRunCrateProvenanceManager 7 | 8 | prov_classes: MutableMapping[str, MutableMapping[str, type[ProvenanceManager]]] = { 9 | "run_crate": {"cwl": CWLRunCrateProvenanceManager} 10 | } 11 | -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/find_star.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | baseCommand: ["munipack", "find", "-O", "--mask"] 4 | arguments: 5 | - position: 1 6 | valueFrom: 'find_\\0' 7 | 8 | inputs: 9 | frames: 10 | type: File[] 11 | inputBinding: 12 | position: 2 13 | fwmh: int 14 | 15 | outputs: 16 | find_frames: 17 | type: File[] 18 | outputBinding: 19 | glob: "find_*.fits" -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/aperture_photometry.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | baseCommand: ["munipack", "aphot", "-O", "--mask"] 4 | arguments: 5 | - position: 1 6 | valueFrom: 'aphot_\\0' 7 | 8 | inputs: 9 | find_frames: 10 | type: File[] 11 | inputBinding: 12 | position: 2 13 | 14 | outputs: 15 | aphot_frames: 16 | type: File[] 17 | outputBinding: 18 | glob: "aphot_*.fits" 19 | -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/combine.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | baseCommand: ["munipack", "kombine"] 4 | arguments: 5 | - position: 1 6 | valueFrom: "./processed.fits" 7 | prefix: -o 8 | 9 | inputs: 10 | stack_frames: 11 | type: File[] 12 | inputBinding: 13 | position: 2 14 | 15 | outputs: 16 | processed_frame: 17 | type: File 18 | outputBinding: 19 | glob: "processed.fits" -------------------------------------------------------------------------------- /examples/flux/streamflow.yml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env streamflow 2 | version: v1.0 3 | workflows: 4 | master: 5 | type: cwl 6 | config: 7 | file: cwl/main.cwl 8 | settings: cwl/config.yml 9 | bindings: 10 | - step: /compile 11 | target: 12 | deployment: dc-mpi 13 | - step: /execute 14 | target: 15 | deployment: dc-mpi 16 | deployments: 17 | dc-mpi: 18 | type: flux 19 | config: {} 20 | -------------------------------------------------------------------------------- /streamflow/recovery/schemas/default_checkpoint_manager.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/recovery/default_checkpoint_manager.json", 4 | "type": "object", 5 | "properties": { 6 | "checkpoint_dir": { 7 | "type": "string", 8 | "description": "Directory where checkpoint data must be placed" 9 | } 10 | }, 11 | "additionalProperties": false 12 | } -------------------------------------------------------------------------------- /examples/flux/cwl/clt/execute.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | requirements: 4 | ShellCommandRequirement: {} 5 | baseCommand: ["flux", "run"] 6 | stdout: mpi_output.log 7 | inputs: 8 | num_processes: 9 | type: int 10 | inputBinding: 11 | position: 1 12 | prefix: '-n' 13 | executable_file: 14 | type: File 15 | inputBinding: 16 | position: 4 17 | 18 | outputs: 19 | mpi_output: 20 | type: stdout 21 | -------------------------------------------------------------------------------- /helm/chart/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /examples/mpi/environment/helm/openmpi/_helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | .vscode/ 23 | -------------------------------------------------------------------------------- /examples/failure/environment/helm/failure/_helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | .vscode/ 23 | -------------------------------------------------------------------------------- /examples/mpi/cwl/clt/compile.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | baseCommand: ["mpicxx"] 4 | arguments: 5 | - position: 1 6 | valueFrom: '-O3' 7 | - position: 2 8 | valueFrom: '$(inputs.source_file.nameroot)' 9 | prefix: '-o' 10 | 11 | inputs: 12 | source_file: 13 | type: File 14 | inputBinding: 15 | position: 3 16 | 17 | outputs: 18 | executable_file: 19 | type: File 20 | outputBinding: 21 | glob: '$(inputs.source_file.nameroot)' -------------------------------------------------------------------------------- /examples/munipack/environment/helm/stacking/_helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | .vscode/ 23 | -------------------------------------------------------------------------------- /examples/flux/cwl/clt/compile.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | baseCommand: ["mpicxx"] 4 | arguments: 5 | - position: 1 6 | valueFrom: '-O3' 7 | - position: 2 8 | valueFrom: '$(inputs.source_file.nameroot)' 9 | prefix: '-o' 10 | 11 | inputs: 12 | source_file: 13 | type: File 14 | inputBinding: 15 | position: 3 16 | 17 | outputs: 18 | executable_file: 19 | type: File 20 | outputBinding: 21 | glob: '$(inputs.source_file.nameroot)' -------------------------------------------------------------------------------- /streamflow/scheduling/schemas/scheduler.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/scheduling/scheduler.json", 4 | "type": "object", 5 | "properties": { 6 | "retry_delay": { 7 | "type": "integer", 8 | "title": "Retry delay", 9 | "description": "Time (in seconds) to wait before retrying to schedule a task.", 10 | "default": 0 11 | } 12 | }, 13 | "additionalProperties": false 14 | } -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/cone_search.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | baseCommand: ["munipack", "cone"] 4 | arguments: 5 | - position: 1 6 | prefix: -r 7 | valueFrom: '$(inputs.deg)' 8 | - position: 2 9 | valueFrom: '$(inputs.ra)' 10 | - position: 3 11 | valueFrom: '$(inputs.dec)' 12 | 13 | inputs: 14 | ra: float 15 | dec: float 16 | deg: float 17 | 18 | outputs: 19 | conefile: 20 | type: File 21 | outputBinding: 22 | glob: "cone.fits" 23 | -------------------------------------------------------------------------------- /docs/source/cwl/docker/docker.rst: -------------------------------------------------------------------------------- 1 | ========================= 2 | DockerCWLDockerTranslator 3 | ========================= 4 | 5 | The Docker :ref:`CWLDockerTranslator ` instantiates a :ref:`DockerConnector ` instance with the given configuration for every CWL :ref:`DockerRequirement ` specification in the selected subworkflow. 6 | 7 | .. jsonschema:: https://streamflow.di.unito.it/schemas/cwl/requirement/docker/docker.json 8 | :lift_description: true 9 | -------------------------------------------------------------------------------- /helm/chart/values.yaml: -------------------------------------------------------------------------------- 1 | replicaCount: 1 2 | 3 | image: 4 | repository: alphaunito/streamflow 5 | pullPolicy: Always 6 | 7 | args: ["streamflow", "version"] 8 | restartPolicy: OnFailure 9 | imagePullSecrets: [] 10 | nameOverride: "" 11 | fullnameOverride: "" 12 | 13 | serviceAccount: 14 | create: true 15 | annotations: {} 16 | name: 17 | 18 | podSecurityContext: {} 19 | 20 | securityContext: {} 21 | 22 | resources: {} 23 | 24 | nodeSelector: {} 25 | 26 | tolerations: [] 27 | 28 | affinity: {} 29 | -------------------------------------------------------------------------------- /tests/utils/cwl.py: -------------------------------------------------------------------------------- 1 | import cwl_utils.parser.cwl_v1_0 2 | import cwl_utils.parser.cwl_v1_1 3 | import cwl_utils.parser.cwl_v1_2 4 | 5 | 6 | def get_cwl_parser(version: str): 7 | match version: 8 | case "v1.0": 9 | return cwl_utils.parser.cwl_v1_0 10 | case "v1.1": 11 | return cwl_utils.parser.cwl_v1_1 12 | case "v1.2": 13 | return cwl_utils.parser.cwl_v1_1 14 | case _: 15 | raise ValueError(f"Unsupported CWL version {version}") 16 | -------------------------------------------------------------------------------- /streamflow/deployment/connector/schemas/docker.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/deployment/connector/docker.json", 4 | "type": "object", 5 | "$ref": "base/docker.json", 6 | "properties": { 7 | "image": { 8 | "type": "string", 9 | "title": "Image", 10 | "description": "The name of the Docker image to run" 11 | } 12 | }, 13 | "required": [ 14 | "image" 15 | ], 16 | "unevaluatedProperties": false 17 | } -------------------------------------------------------------------------------- /docs/source/cwl/docker/kubernetes.rst: -------------------------------------------------------------------------------- 1 | ============================= 2 | KubernetesCWLDockerTranslator 3 | ============================= 4 | 5 | The Kubernetes :ref:`CWLDockerTranslator ` instantiates a :ref:`KubernetesConnector ` instance with the given configuration for every CWL :ref:`DockerRequirement ` specification in the selected subworkflow. 6 | 7 | .. jsonschema:: https://streamflow.di.unito.it/schemas/cwl/requirement/docker/kubernetes.json 8 | :lift_description: true 9 | -------------------------------------------------------------------------------- /examples/munipack/cwl/config.yml: -------------------------------------------------------------------------------- 1 | main_d: 2 | class: Directory 3 | path: data/main 4 | 5 | worker_n: 3 6 | 7 | dark_file_n: averageDark120.fits 8 | dark_flat_n: averageDark10.fits 9 | flat_file_n: masterFlat.fits 10 | 11 | dark_frames_d: 12 | class: Directory 13 | path: data/dark120 14 | 15 | dark_flat_frames_d: 16 | class: Directory 17 | path: data/dark10 18 | 19 | flat_frames_d: 20 | class: Directory 21 | path: data/flat 22 | 23 | fwmh_par: 7 24 | right_ascension: 111.205 25 | declination: 71.238 26 | degree: 0.3 27 | -------------------------------------------------------------------------------- /docs/source/cwl/docker/singularity.rst: -------------------------------------------------------------------------------- 1 | ============================== 2 | SingularityCWLDockerTranslator 3 | ============================== 4 | 5 | The Singularity :ref:`CWLDockerTranslator ` instantiates a :ref:`SingularityConnector ` instance with the given configuration for every CWL :ref:`DockerRequirement ` specification in the selected subworkflow. 6 | 7 | .. jsonschema:: https://streamflow.di.unito.it/schemas/cwl/requirement/docker/singularity.json 8 | :lift_description: true 9 | -------------------------------------------------------------------------------- /streamflow/deployment/connector/schemas/local.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/deployment/connector/local.json", 4 | "type": "object", 5 | "properties": { 6 | "transferBufferSize": { 7 | "type": "integer", 8 | "title": "Transfer buffer size", 9 | "description": "Buffer size allocated for local and remote data transfers", 10 | "default": 65536, 11 | "minimum": 1 12 | } 13 | }, 14 | "additionalProperties": false 15 | } -------------------------------------------------------------------------------- /streamflow/deployment/connector/schemas/singularity.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/deployment/connector/singularity.json", 4 | "type": "object", 5 | "$ref": "base/singularity.json", 6 | "properties": { 7 | "image": { 8 | "type": "string", 9 | "title": "Image", 10 | "description": "The name of the Singularity image to run" 11 | } 12 | }, 13 | "required": [ 14 | "image" 15 | ], 16 | "unevaluatedProperties": false 17 | } -------------------------------------------------------------------------------- /.github/codeql/config.yml: -------------------------------------------------------------------------------- 1 | name: "StreamFlow CodeQL configuration" 2 | queries: 3 | - uses: security-and-quality 4 | paths-ignore: 5 | - tests 6 | - streamflow/cwl/antlr 7 | query-filters: 8 | # Reason: this rule targets XSS, which is not a concern here 9 | - exclude: 10 | id: py/jinja2/autoescape-false 11 | # Reason: false positive on function body ellipsis (issue 11351) 12 | - exclude: 13 | id: py/ineffectual-statement 14 | # Reason: no support for the TYPE_CHECKING directive (issue 4258) 15 | - exclude: 16 | id: py/unsafe-cyclic-import 17 | -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/flattening.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: ExpressionTool 3 | 4 | inputs: 5 | inputArray: 6 | type: 7 | type: array 8 | items: [{type: array, items: File}] 9 | 10 | outputs: 11 | flattenedArray: 12 | type: File[] 13 | 14 | expression: > 15 | ${ 16 | var flatArray= []; 17 | for (var i = 0; i < inputs.inputArray.length; i++) { 18 | for (var k = 0; k < inputs.inputArray[i].length; k++) { 19 | flatArray.push((inputs.inputArray[i])[k]); 20 | } 21 | } 22 | return { 'flattenedArray' : flatArray } 23 | } -------------------------------------------------------------------------------- /streamflow/core/exception.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | 4 | class ProcessorTypeError(Exception): 5 | pass 6 | 7 | 8 | class WorkflowException(Exception): 9 | pass 10 | 11 | 12 | class WorkflowDefinitionException(WorkflowException): 13 | pass 14 | 15 | 16 | class WorkflowExecutionException(WorkflowException): 17 | pass 18 | 19 | 20 | class WorkflowProvenanceException(WorkflowException): 21 | pass 22 | 23 | 24 | class FailureHandlingException(WorkflowException): 25 | pass 26 | 27 | 28 | class InvalidPluginException(Exception): 29 | pass 30 | -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/astrometry.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | baseCommand: ["munipack", "astrometry", "-O", "--mask"] 4 | arguments: 5 | - position: 1 6 | valueFrom: 'astro_\\0' 7 | 8 | requirements: 9 | InitialWorkDirRequirement: 10 | listing: 11 | - '$(inputs.conefile)' 12 | 13 | inputs: 14 | conefile: File 15 | 16 | aphot_frame: 17 | type: File 18 | inputBinding: 19 | position: 2 20 | 21 | outputs: 22 | astrometry_frame: 23 | type: File 24 | outputBinding: 25 | glob: "astro_$(inputs.aphot_frame.nameroot).fits" -------------------------------------------------------------------------------- /streamflow/cwl/requirement/docker/schemas/kubernetes.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/cwl/requirement/docker/kubernetes.json", 4 | "type": "object", 5 | "$ref": "/schemas/deployment/connector/base/kubernetes.json", 6 | "properties": { 7 | "file": { 8 | "type": "string", 9 | "description": "Path to a file containing a Jinja2 template, describing how the Docker container should be deployed on Kubernetes", 10 | "default": "./kubernetes.jinja2" 11 | } 12 | }, 13 | "unevaluatedProperties": false 14 | } -------------------------------------------------------------------------------- /examples/mpi/cwl/clt/execute.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | requirements: 4 | ShellCommandRequirement: {} 5 | baseCommand: ["mpirun"] 6 | arguments: 7 | - position: 2 8 | valueFrom: '$STREAMFLOW_HOSTS' 9 | prefix: '--host' 10 | shellQuote: false 11 | - position: 3 12 | valueFrom: '--allow-run-as-root' 13 | stdout: mpi_output.log 14 | inputs: 15 | num_processes: 16 | type: int 17 | inputBinding: 18 | position: 1 19 | prefix: '-np' 20 | executable_file: 21 | type: File 22 | inputBinding: 23 | position: 4 24 | 25 | outputs: 26 | mpi_output: 27 | type: stdout -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/dark.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | id: dark 3 | label: average dark frame 4 | class: CommandLineTool 5 | baseCommand: ["munipack", "dark"] 6 | arguments: 7 | - position: 1 8 | valueFrom: './$(inputs.dark_name)' 9 | prefix: -o 10 | - position: 2 11 | valueFrom: '$(inputs.dark_dir.listing)' 12 | 13 | inputs: 14 | dark_name: string 15 | dark_dir: 16 | type: Directory 17 | label: collection of dark frames 18 | loadListing: shallow_listing 19 | 20 | outputs: 21 | average_dark: 22 | type: File 23 | label: average dark frame 24 | outputBinding: 25 | glob: '$(inputs.dark_name)' -------------------------------------------------------------------------------- /antlr4-generator.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ANTLR4_VERSION="$(pip show antlr4-python3-runtime | grep Version | awk '{print $2}')" 4 | SCRIPT_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 5 | WORKDIR=$(mktemp -d) 6 | 7 | cd "${WORKDIR}" || exit 8 | curl -fsSLO "https://www.antlr.org/download/antlr-${ANTLR4_VERSION}-complete.jar" 9 | curl -fsSLO "https://raw.githubusercontent.com/antlr/grammars-v4/master/javascript/ecmascript/Python3/ECMAScript.g4" 10 | java -jar "antlr-${ANTLR4_VERSION}-complete.jar" -Dlanguage=Python3 ECMAScript.g4 11 | mv ./*.py "${SCRIPT_DIRECTORY}/streamflow/cwl/antlr/" 12 | rm -rf "${WORKDIR}" 13 | -------------------------------------------------------------------------------- /examples/flux/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM fluxrm/flux-sched:focal-v0.28.0 2 | 3 | # ubuntu base with Flux 4 | # this allows for easy development of Flux connector 5 | 6 | LABEL maintainer="Vanessasaurus <@vsoch>" 7 | 8 | USER root 9 | RUN apt-get update \ 10 | && python3 -m pip install IPython 11 | 12 | COPY . /code 13 | WORKDIR /code 14 | 15 | # Install in development mode in case container used for development 16 | RUN pip install develop . \ 17 | && pip install --upgrade --force-reinstall \ 18 | attrs \ 19 | lockfile \ 20 | && chown -R fluxuser /code 21 | 22 | # Ensure we enter the container as the fluxuser 23 | USER fluxuser 24 | -------------------------------------------------------------------------------- /examples/mpi/environment/docker-compose/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | openmpi: 5 | deploy: 6 | replicas: 2 7 | expose: 8 | - 2022 9 | image: everpeace/kube-openmpi:0.7.0 10 | stdin_open: true 11 | volumes: 12 | - type: bind 13 | source: ./id_rsa 14 | target: /ssh-key/openmpi/id_rsa 15 | read_only: true 16 | - type: bind 17 | source: ./id_rsa.pub 18 | target: /ssh-key/openmpi/id_rsa.pub 19 | read_only: true 20 | - type: bind 21 | source: ./id_rsa.pub 22 | target: /ssh-key/openmpi/authorized_keys 23 | read_only: true 24 | -------------------------------------------------------------------------------- /docs/source/cwl/docker/no-container.rst: -------------------------------------------------------------------------------- 1 | ============================== 2 | NoContainerCWLDockerTranslator 3 | ============================== 4 | 5 | The NoContainer :ref:`CWLDockerTranslator ` ignores the given configuration for every CWL :ref:`DockerRequirement ` specification in the selected subworkflow. The :ref:`LocalConnector ` is used by default, unless the step is explicitly bound to a different deployment. 6 | 7 | **WARNING:** Use this option with caution. The step execution may not work. The user must manually ensure that the execution environment is properly configured with all the required software dependencies. -------------------------------------------------------------------------------- /streamflow/deployment/connector/schemas/kubernetes.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/deployment/connector/kubernetes.json", 4 | "type": "object", 5 | "$ref": "base/kubernetes.json", 6 | "properties": { 7 | "files": { 8 | "type": "array", 9 | "items": { 10 | "type": "string" 11 | }, 12 | "title": "Files", 13 | "description": "A list of yaml files to deploy. Files will be deployed in direct order and undeployed in reverse order" 14 | } 15 | }, 16 | "required": [ 17 | "files" 18 | ], 19 | "unevaluatedProperties": false 20 | } -------------------------------------------------------------------------------- /docs/source/connector/container.rst: -------------------------------------------------------------------------------- 1 | ================== 2 | ContainerConnector 3 | ================== 4 | 5 | The ``ContainerConnector`` is an abstract connector that serves as a base class to implement software container connectors (e.g., :ref:`Docker `, :ref:`Docker Compose `, and :ref:`Singularity `). It extends the abstract :ref:`ConnectorWrapper ` interface, allowing users to spawn software containers on top of local or remote execution environments using the :ref:`stacked locations ` mechanism. Plus, it prevents :ref:`BatchConnector ` instances to be wrapped as inner connectors. -------------------------------------------------------------------------------- /streamflow/recovery/schemas/default_failure_manager.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/recovery/default_failure_manager.json", 4 | "type": "object", 5 | "properties": { 6 | "max_retries": { 7 | "type": "integer", 8 | "description": "Number of task retries after a failure, prior to abort the workflow execution." 9 | }, 10 | "retry_delay": { 11 | "type": "integer", 12 | "title": "Retry delay", 13 | "description": "Time (in seconds) to wait before scheduling the next retry.", 14 | "default": 0 15 | } 16 | }, 17 | "additionalProperties": false 18 | } -------------------------------------------------------------------------------- /docs/source/connector/helm3.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Helm3Connector 3 | ============== 4 | 5 | The `Helm v3 `_ connector can spawn complex, multi-container environments on a `Kubernetes `_ cluster. The deployment unit is the entire Helm release, while the binding unit is the single container in a ``Pod``. StreamFlow requires each container in a Helm release to have a unique ``name`` attribute, allowing an unambiguous identification. Finally, the scheduling unit is the single instance of a potentially replicated container in a ``ReplicaSet``. 6 | 7 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/helm3.json 8 | :lift_description: true 9 | -------------------------------------------------------------------------------- /examples/munipack/cwl/master.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.1 3 | class: Workflow 4 | $namespaces: 5 | sf: "https://streamflow.org/cwl#" 6 | 7 | inputs: 8 | main_dir: Directory 9 | worker_number: int 10 | 11 | outputs: 12 | partitioned_dirs: 13 | type: Directory[] 14 | outputSource: scattering/directories 15 | 16 | steps: 17 | find_files: 18 | run: clt/find.cwl 19 | in: 20 | main_directory: main_dir 21 | out: [files_array] 22 | 23 | ####################################################### 24 | 25 | scattering: 26 | run: clt/scatter.cwl 27 | in: 28 | worker: worker_number 29 | files_array: find_files/files_array 30 | out: [directories] 31 | -------------------------------------------------------------------------------- /examples/failure/cwl/master.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.1 3 | class: Workflow 4 | $namespaces: 5 | sf: "https://streamflow.org/cwl#" 6 | 7 | inputs: 8 | main_dir: Directory 9 | worker_number: int 10 | 11 | outputs: 12 | partitioned_dirs: 13 | type: Directory[] 14 | outputSource: scattering/directories 15 | 16 | steps: 17 | find_files: 18 | run: clt/find.cwl 19 | in: 20 | main_directory: main_dir 21 | out: [ files_array ] 22 | 23 | ####################################################### 24 | 25 | scattering: 26 | run: clt/scatter.cwl 27 | in: 28 | worker: worker_number 29 | files_array: find_files/files_array 30 | out: [ directories ] 31 | -------------------------------------------------------------------------------- /streamflow/persistence/schemas/sqlite.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/persistence/sqlite.json", 4 | "type": "object", 5 | "properties": { 6 | "connection": { 7 | "type": "string", 8 | "title": "Connection", 9 | "description": "The path where the sqlite file resides" 10 | }, 11 | "timeout": { 12 | "type": "integer", 13 | "title": "Timeout", 14 | "description": "The time (in seconds) after which a thread waiting for the database lock raises an exception", 15 | "default": 20 16 | } 17 | }, 18 | "required": [ 19 | "connection" 20 | ], 21 | "additionalProperties": false 22 | } -------------------------------------------------------------------------------- /docs/source/_static/theme_overrides.css: -------------------------------------------------------------------------------- 1 | .wy-table-responsive table td, .wy-table-responsive table th { 2 | white-space: normal !important; 3 | } 4 | 5 | .wy-table-responsive { 6 | overflow: visible !important; 7 | } 8 | 9 | .jsonschema-table { 10 | border-left: none !important; 11 | border-right: none !important; 12 | } 13 | 14 | .jsonschema-table tr td { 15 | background-color: #fcfcfc !important; 16 | border-top: 1px solid #e1e4e5 !important; 17 | border-bottom: 1px solid #e1e4e5 !important; 18 | border-left: none !important; 19 | border-right: none !important; 20 | } 21 | 22 | .jsonschema-table tr:first-child td { 23 | text-align: center !important; 24 | text-transform: capitalize; 25 | font-weight: bold; 26 | } -------------------------------------------------------------------------------- /streamflow/deployment/filter/shuffle.py: -------------------------------------------------------------------------------- 1 | import random 2 | from collections.abc import MutableSequence 3 | from importlib.resources import files 4 | 5 | from streamflow.core.deployment import BindingFilter, Target 6 | from streamflow.core.workflow import Job 7 | 8 | 9 | class ShuffleBindingFilter(BindingFilter): 10 | async def get_targets( 11 | self, job: Job, targets: MutableSequence[Target] 12 | ) -> MutableSequence[Target]: 13 | random.shuffle(targets) 14 | return targets 15 | 16 | @classmethod 17 | def get_schema(cls) -> str: 18 | return ( 19 | files(__package__) 20 | .joinpath("schemas") 21 | .joinpath("shuffle.json") 22 | .read_text("utf-8") 23 | ) 24 | -------------------------------------------------------------------------------- /examples/mpi/environment/docker-compose/id_rsa.pub: -------------------------------------------------------------------------------- 1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDIT+72rlPxsLUPgph8vayIzvHVeDm6IebwKAGcg3nDUd5HibrZoymUKd+QTH6FLyp+Mt4R6lkQ7kdKWu87bq2U/LXXUbSDaLRE+yiu/ipHZPXuAnKUipEPZZUt0oAzDAC+tlFuWhpKHyxB6xktSys3lQxVwHT9X91EM9RhWauJkCvMbF0f0fGNB4rpaBKRiJF0QRVe6El8q8L54zzy6SEYH53wlNgJES0oNamjdN5Z9dKD7KXYjkNW33BbIpyGu32EPW2Ip2sGc2brswfLMduj5XgyvO+zPvK4oJ+RXJm5JM23HTN1VjoUKsRS5bk6++V2p216ZeSrFKTtu+hiMmROdZ/G7ftsvhZAQWSQpYVKiuXdZ6A7eSs1mFYp38864all+jT+mhjd+sB8zSbZ5rwBfdGmFq8wyhreJWtWf3PnqTxstJFT0YzhqG7hSHsM/gnjwjwG82pCKchHv9OVel4BsfzjmE4oGvok+RWuGhKT3unXRxTcwZ9gtvTNUc92GqEw3Cy4Y5LuqKXRQeP3U8E/lJBVlL5V+/rWn+x25oEbbmMKvqncxTubSNSmc5alGFFTIGwYF3BCr+RUj0ppbDSUHLbigMyGHFxLxldGD887uLztdHSjITRzfajjOCfitwPwV9TLXunqyOBDBiB7mrNwnEDySppg62CCAJ34COwqiQ== streamflow@mpi 2 | -------------------------------------------------------------------------------- /examples/mpi/environment/k8s/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: openmpi 5 | spec: 6 | replicas: 2 7 | selector: 8 | matchLabels: 9 | app.kubernetes.io/name: openmpi 10 | template: 11 | metadata: 12 | labels: 13 | app.kubernetes.io/name: openmpi 14 | spec: 15 | containers: 16 | - name: openmpi 17 | image: "everpeace/kube-openmpi:0.7.0" 18 | imagePullPolicy: Always 19 | ports: 20 | - containerPort: 2022 21 | volumeMounts: 22 | - name: ssh-key 23 | mountPath: /ssh-key/openmpi 24 | volumes: 25 | - name: ssh-key 26 | secret: 27 | secretName: streamflow-ssh-key 28 | defaultMode: 256 -------------------------------------------------------------------------------- /examples/failure/cwl/clt/combit.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | 4 | # run script: sum-exe 5 | # pars: 6 | # fst must be a operation: {open, sum, comb} 7 | # snd must be a error: {zero, loop} 8 | # with zero will throw a division by zero exception 9 | # with loop will enter in a infinity loop 10 | # trd must be: a integer [0, 100] 11 | # indicate the probability that will throw the error 12 | # following: all file paths necessary 13 | baseCommand: [ "sum-exe", "comb", "zero", "80" ] 14 | 15 | requirements: 16 | ToolTimeLimit: 17 | timelimit: 5 18 | 19 | inputs: 20 | files: 21 | type: File[] 22 | inputBinding: 23 | position: 1 24 | 25 | outputs: 26 | comb_file: 27 | type: File 28 | outputBinding: 29 | glob: "comb.txt" 30 | -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/photo_correction.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | baseCommand: ["munipack", "phcorr", "-O", "--mask"] 4 | arguments: 5 | - position: 1 6 | valueFrom: 'proc_\\0' 7 | 8 | inputs: 9 | dark_frame: 10 | type: File 11 | inputBinding: 12 | position: 2 13 | prefix: -dark 14 | separate: true 15 | 16 | master_flat_frame: 17 | type: File 18 | inputBinding: 19 | position: 3 20 | prefix: -flat 21 | separate: true 22 | 23 | to_correct: 24 | type: Directory 25 | loadListing: shallow_listing 26 | inputBinding: 27 | position: 4 28 | valueFrom: "$(inputs.to_correct.listing)" 29 | 30 | outputs: 31 | corrected: 32 | type: File[] 33 | outputBinding: 34 | glob: "proc*.fits" -------------------------------------------------------------------------------- /streamflow/recovery/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import MutableMapping 4 | 5 | from streamflow.core.recovery import CheckpointManager, FailureManager 6 | from streamflow.recovery.checkpoint_manager import ( 7 | DefaultCheckpointManager, 8 | DummyCheckpointManager, 9 | ) 10 | from streamflow.recovery.failure_manager import ( 11 | DefaultFailureManager, 12 | DummyFailureManager, 13 | ) 14 | 15 | checkpoint_manager_classes: MutableMapping[str, type[CheckpointManager]] = { 16 | "default": DefaultCheckpointManager, 17 | "dummy": DummyCheckpointManager, 18 | } 19 | 20 | failure_manager_classes: MutableMapping[str, type[FailureManager]] = { 21 | "default": DefaultFailureManager, 22 | "dummy": DummyFailureManager, 23 | } 24 | -------------------------------------------------------------------------------- /docs/source/connector/docker.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | DockerConnector 3 | =============== 4 | 5 | The `Docker `_ connector can spawn one or more instances of a Docker container locally on the StreamFlow node. The units of deployment and binding for this connector correspond to the set of homogeneous container instances, while the unit of scheduling is the single instance. It extends the :ref:`ContainerConnector `, which inherits from the :ref:`ConnectorWrapper ` interface, allowing users to spawn Docker containers on top of local or remote execution environments using the :ref:`stacked locations ` mechanism. 6 | 7 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/docker.json 8 | :lift_description: true -------------------------------------------------------------------------------- /tests/data/deployment/slurm/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | services: 3 | slurmctld: 4 | image: alphaunito/slurmctld:21.08.5 5 | environment: 6 | SLURMD_HOSTNAME_PREFIX: ${COMPOSE_PROJECT_NAME}-slurmd 7 | SLURMD_NODES: 2 8 | hostname: slurmctld 9 | networks: 10 | - slurmnet 11 | volumes: 12 | - home:/home/hpcuser 13 | - munge:/etc/munge 14 | - mysql:/var/lib/mysql 15 | slurmd: 16 | image: alphaunito/slurmd:21.08.5 17 | deploy: 18 | mode: replicated 19 | replicas: 2 20 | environment: 21 | SLURMCTLD_HOSTNAME: slurmctld 22 | networks: 23 | - slurmnet 24 | volumes: 25 | - home:/home/hpcuser 26 | - munge:/etc/munge 27 | networks: 28 | slurmnet: 29 | volumes: 30 | home: 31 | munge: 32 | mysql: -------------------------------------------------------------------------------- /docs/source/connector/kubernetes.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | KubernetesConnector 3 | =================== 4 | 5 | The `Kubernetes `_ connector can spawn complex, multi-container environments on a Kubernetes cluster. The deployment unit is a set of Kubernetes YAML files, which are deployed in the order they are written in the ``config`` section and undeployed in the reverse order. The binding unit is the single container in a ``Pod``. StreamFlow requires each container in a Kubernetes namespace to have a unique ``name`` attribute, allowing an unambiguous identification. Finally, the scheduling unit is the single instance of a potentially replicated container in a ``ReplicaSet``. 6 | 7 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/kubernetes.json 8 | :lift_description: true -------------------------------------------------------------------------------- /examples/failure/cwl/clt/openit.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | 4 | # run script: sum-exe 5 | # pars: 6 | # fst must be a operation: {open, sum, comb} 7 | # snd must be a error: {zero, loop} 8 | # with zero will throw a division by zero exception 9 | # with loop will enter in a infinity loop 10 | # trd must be: a integer [0, 100] 11 | # indicate the probability that will throw the error 12 | # following: all file paths necessary 13 | baseCommand: [ "sum-exe", "open", "zero", "0" ] 14 | arguments: 15 | - position: 1 16 | valueFrom: '$(inputs.num_file.path)' 17 | 18 | requirements: 19 | ToolTimeLimit: 20 | timelimit: 5 21 | 22 | inputs: 23 | num_file: File 24 | 25 | outputs: 26 | nums: 27 | type: File[] 28 | outputBinding: 29 | glob: "nums*.txt" 30 | -------------------------------------------------------------------------------- /docs/source/connector/singularity.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | SingularityConnector 3 | ===================== 4 | 5 | The `Singularity `_ connector can spawn one or more instances of a Singularity container locally on the StreamFlow node. The units of deployment and binding for this connector correspond to the set of homogeneous container instances, while the unit of scheduling is the single instance. It extends the :ref:`ContainerConnector `, which inherits from the :ref:`ConnectorWrapper ` interface, allowing users to spawn Singularity containers on top of local or remote execution environments using the :ref:`stacked locations ` mechanism. 6 | 7 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/singularity.json 8 | :lift_description: true -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | codespell: 2 | codespell -w $(shell git ls-files | grep -v streamflow/cwl/antlr) 3 | 4 | codespell-check: 5 | codespell $(shell git ls-files | grep -v streamflow/cwl/antlr) 6 | 7 | coverage.xml: testcov 8 | coverage xml 9 | 10 | coverage-report: testcov 11 | coverage report 12 | 13 | flake8: 14 | flake8 --exclude streamflow/cwl/antlr streamflow tests 15 | 16 | format: 17 | isort streamflow tests 18 | black streamflow tests 19 | 20 | format-check: 21 | isort --check-only streamflow tests 22 | black --diff --check streamflow tests 23 | 24 | pyupgrade: 25 | pyupgrade --py3-only --py310-plus $(shell git ls-files | grep .py | grep -v streamflow/cwl/antlr) 26 | 27 | test: 28 | python -m pytest -rs ${PYTEST_EXTRA} 29 | 30 | testcov: 31 | python -m pytest -rs --cov --junitxml=junit.xml -o junit_family=legacy --cov-report= ${PYTEST_EXTRA} 32 | -------------------------------------------------------------------------------- /streamflow/cwl/requirement/docker/__init__.py: -------------------------------------------------------------------------------- 1 | from collections.abc import MutableMapping 2 | 3 | from streamflow.cwl.requirement.docker.docker import DockerCWLDockerTranslator 4 | from streamflow.cwl.requirement.docker.kubernetes import KubernetesCWLDockerTranslator 5 | from streamflow.cwl.requirement.docker.nocontainer import NoContainerCWLDockerTranslator 6 | from streamflow.cwl.requirement.docker.singularity import SingularityCWLDockerTranslator 7 | from streamflow.cwl.requirement.docker.translator import CWLDockerTranslator 8 | 9 | cwl_docker_translator_classes: MutableMapping[str, type[CWLDockerTranslator]] = { 10 | "default": DockerCWLDockerTranslator, 11 | "docker": DockerCWLDockerTranslator, 12 | "kubernetes": KubernetesCWLDockerTranslator, 13 | "none": NoContainerCWLDockerTranslator, 14 | "singularity": SingularityCWLDockerTranslator, 15 | } 16 | -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/flat.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | id: flat 3 | label: master flat frame 4 | class: CommandLineTool 5 | baseCommand: ["munipack", "flat"] 6 | 7 | requirements: 8 | InitialWorkDirRequirement: 9 | listing: 10 | - $(inputs.dark_frame) 11 | 12 | inputs: 13 | flat_name: 14 | type: string 15 | inputBinding: 16 | position: 1 17 | prefix: -o 18 | 19 | dark_frame: 20 | type: File 21 | inputBinding: 22 | position: 2 23 | prefix: -dark 24 | 25 | flat_dir: 26 | type: Directory 27 | inputBinding: 28 | position: 3 29 | valueFrom: '$(inputs.flat_dir.listing)' 30 | loadListing: shallow_listing 31 | label: collection of flat frames 32 | 33 | outputs: 34 | master_flat: 35 | type: File 36 | label: master flat frame 37 | outputBinding: 38 | glob: '$(inputs.flat_name)' -------------------------------------------------------------------------------- /examples/failure/cwl/clt/scatter.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: ExpressionTool 3 | requirements: 4 | InlineJavascriptRequirement: { } 5 | inputs: 6 | worker: int 7 | files_array: File[] 8 | 9 | outputs: 10 | directories: 11 | type: Directory[] 12 | 13 | expression: > 14 | ${ 15 | var directories = []; 16 | var listings = new Array(inputs.worker); 17 | var current_listing = 0; 18 | 19 | for (var i = 0; i < listings.length; i++) 20 | listings[i] = []; 21 | 22 | for(var i = 0; i < inputs.files_array.length; i++){ 23 | listings[current_listing].push(inputs.files_array[i]); 24 | current_listing = (current_listing + 1) % inputs.worker; 25 | } 26 | 27 | for (var i = 0; i < listings.length; i++) 28 | directories.push( {"class": "Directory", "basename": "dir" + i.toString(), "listing": listings[i]}); 29 | 30 | return {"directories": directories}; 31 | } -------------------------------------------------------------------------------- /examples/munipack/cwl/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPT_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 3 | DATA_DIRECTORY="${SCRIPT_DIRECTORY}/data" 4 | 5 | #Download and untar data 6 | curl -O ftp://munipack.physics.muni.cz/pub/munipack/munipack-data-blazar.tar.gz 7 | tar -xzf munipack-data-blazar.tar.gz 8 | 9 | # Create directories 10 | mkdir -p "${DATA_DIRECTORY}/dark10" 11 | mkdir -p "${DATA_DIRECTORY}/dark120" 12 | mkdir -p "${DATA_DIRECTORY}/flat" 13 | mkdir -p "${DATA_DIRECTORY}/main" 14 | 15 | # Move data 16 | mv munipack-data-blazar/d10_*.fits "${DATA_DIRECTORY}/dark10" 17 | mv munipack-data-blazar/d120_*.fits "${DATA_DIRECTORY}/dark120" 18 | mv munipack-data-blazar/f10_*.fits "${DATA_DIRECTORY}/flat" 19 | mv munipack-data-blazar/0716_*.fits "${DATA_DIRECTORY}/main" 20 | 21 | # Remove unnecessary stuff 22 | rm -f munipack-data-blazar.tar.gz 23 | rm -rf munipack-data-blazar/ 24 | -------------------------------------------------------------------------------- /examples/munipack/cwl/clt/scatter.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: ExpressionTool 3 | requirements: 4 | InlineJavascriptRequirement: {} 5 | inputs: 6 | worker: int 7 | files_array: File[] 8 | 9 | outputs: 10 | directories: 11 | type: Directory[] 12 | 13 | expression: > 14 | ${ 15 | var directories = []; 16 | var listings = new Array(inputs.worker); 17 | var current_listing = 0; 18 | 19 | for (var i = 0; i < listings.length; i++) 20 | listings[i] = []; 21 | 22 | for(var i = 0; i < inputs.files_array.length; i++){ 23 | listings[current_listing].push(inputs.files_array[i]); 24 | current_listing = (current_listing + 1) % inputs.worker; 25 | } 26 | 27 | for (var i = 0; i < listings.length; i++) 28 | directories.push( {"class": "Directory", "basename": "dir" + i.toString(), "listing": listings[i]}); 29 | 30 | return {"directories": directories}; 31 | } -------------------------------------------------------------------------------- /examples/flux/cwl/main.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.1 3 | class: Workflow 4 | $namespaces: 5 | sf: "https://streamflow.org/cwl#" 6 | 7 | inputs: 8 | num_processes: int 9 | source_file: File 10 | 11 | outputs: 12 | result: 13 | type: File 14 | outputSource: execute/mpi_output 15 | 16 | steps: 17 | compile: 18 | run: clt/compile.cwl 19 | doc: | 20 | This step takes as input a C source file and compiles it with the MPI compiler. Its output is an executable 21 | linked with a proper MPI implementation. 22 | in: 23 | source_file: source_file 24 | out: [executable_file] 25 | 26 | ############################################################## 27 | 28 | execute: 29 | run: clt/execute.cwl 30 | doc: | 31 | This step runs the executable.. 32 | in: 33 | executable_file: compile/executable_file 34 | num_processes: num_processes 35 | out: [mpi_output] 36 | -------------------------------------------------------------------------------- /docs/source/connector/docker-compose.rst: -------------------------------------------------------------------------------- 1 | ======================= 2 | DockerComposeConnector 3 | ======================= 4 | 5 | The `DockerCompose `_ connector can spawn complex, multi-container environments described in a Docker Compose file locally on the StreamFlow node. The entire set of ``services`` in the Docker Compose file contitutes the unit of deployment, while a single service is the unit of binding. Finally, the single instance of a potentially replicated service is the unit of scheduling. It extends the :ref:`ContainerConnector `, which inherits from the :ref:`ConnectorWrapper ` interface, allowing users to spawn Docker containers on top of local or remote execution environments using the :ref:`stacked locations ` mechanism. 6 | 7 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/docker-compose.json 8 | :lift_description: true 9 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/guide/install.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Install 3 | ======= 4 | 5 | You can install StreamFlow as a Python package with ``pip``, run it in a `Docker `_ container or deploy it on `Kubernetes `_ with `Helm `_. 6 | 7 | Pip 8 | === 9 | 10 | The StreamFlow module is available on `PyPI `_, so you can install it using the following command:: 11 | 12 | pip install streamflow 13 | 14 | Please note that StreamFlow requires ``python >= 3.10`` to be installed on the system. Then you can execute your workflows through the StreamFlow CLI:: 15 | 16 | streamflow /path/to/streamflow.yml 17 | 18 | Docker 19 | ====== 20 | 21 | StreamFlow Docker images are available on `Docker Hub `_. To download the latest StreamFlow image, you can use the following command:: 22 | 23 | docker pull alphaunito/streamflow:latest 24 | -------------------------------------------------------------------------------- /examples/failure/cwl/clt/sumit.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | 4 | # run script: sum-exe 5 | # pars: 6 | # fst must be a operation: {open, sum, comb} 7 | # snd must be a error: {zero, loop} 8 | # with zero will throw a division by zero exception 9 | # with loop will enter in a infinity loop 10 | # trd must be: a integer [0, 100] 11 | # indicate the probability that will throw the error 12 | # following: all file paths necessary 13 | baseCommand: [ "sum-exe", "sum", "zero", "0" ] 14 | 15 | requirements: 16 | ToolTimeLimit: 17 | timelimit: 5 18 | 19 | inputs: 20 | num_files: 21 | type: File[] 22 | inputBinding: 23 | position: 1 24 | 25 | extra_file: 26 | type: Directory 27 | loadListing: shallow_listing 28 | inputBinding: 29 | position: 2 30 | valueFrom: "$(inputs.extra_file.listing)" 31 | 32 | outputs: 33 | sum_file: 34 | type: File 35 | outputBinding: 36 | glob: "sum*.txt" 37 | -------------------------------------------------------------------------------- /examples/munipack/streamflow.yml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env streamflow 2 | version: v1.0 3 | workflows: 4 | master: 5 | type: cwl 6 | config: 7 | file: cwl/main.cwl 8 | settings: cwl/config.yml 9 | bindings: 10 | - step: /pre_correction 11 | target: 12 | deployment: helm-mf 13 | service: debian 14 | - step: /photo_correction 15 | target: 16 | deployment: helm-mf 17 | service: debian 18 | - step: /stacking 19 | target: 20 | deployment: helm-mf 21 | service: debian 22 | deployments: 23 | helm-mf: 24 | type: helm 25 | config: 26 | chart: environment/helm/stacking 27 | kubeconfig: /home/glassofwhiskey/.kube/config-streamflow 28 | releaseName: munipack-rel 29 | occam-mf: 30 | type: occam 31 | config: 32 | file: environment/occam/occamfile.yml 33 | sshKey: /home/glassofwhiskey/.ssh/keys/occam_rsa 34 | username: icolonne -------------------------------------------------------------------------------- /examples/failure/streamflow.yml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env streamflow 2 | version: v1.0 3 | workflows: 4 | master: 5 | type: cwl 6 | config: 7 | file: cwl/main.cwl 8 | settings: cwl/config.yml 9 | bindings: 10 | - step: /open_step 11 | target: 12 | deployment: helm-failure 13 | service: debian 14 | - step: /sum_step 15 | target: 16 | deployment: helm-failure 17 | service: debian 18 | - step: /comb_step 19 | target: 20 | deployment: helm-failure 21 | service: debian 22 | deployments: 23 | helm-failure: 24 | type: helm 25 | config: 26 | chart: environment/helm/failure 27 | releaseName: sum-exe 28 | 29 | failureManager: 30 | enabled: True 31 | type: default 32 | config: 33 | max_retries: 50 34 | retry_delay: 5 35 | 36 | scheduler: 37 | type: default 38 | config: 39 | retry_delay: 10 40 | 41 | checkpointManager: 42 | enabled: False 43 | -------------------------------------------------------------------------------- /streamflow/cwl/requirement/docker/nocontainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from importlib.resources import files 4 | 5 | from streamflow.core.deployment import Target 6 | from streamflow.cwl.requirement.docker.translator import CWLDockerTranslator 7 | 8 | 9 | class NoContainerCWLDockerTranslator(CWLDockerTranslator): 10 | def __init__( 11 | self, 12 | config_dir: str, 13 | wrapper: bool, 14 | ): 15 | super().__init__(config_dir=config_dir, wrapper=wrapper) 16 | 17 | @classmethod 18 | def get_schema(cls) -> str: 19 | return ( 20 | files(__package__) 21 | .joinpath("schemas") 22 | .joinpath("no-container.json") 23 | .read_text("utf-8") 24 | ) 25 | 26 | def get_target( 27 | self, 28 | image: str, 29 | output_directory: str | None, 30 | network_access: bool, 31 | target: Target, 32 | ) -> Target: 33 | return target 34 | -------------------------------------------------------------------------------- /examples/failure/environment/helm/failure/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "failure.fullname" . }} 5 | labels: 6 | {{ include "failure.labels" . | indent 4 }} 7 | spec: 8 | replicas: {{ .Values.replicaCount }} 9 | selector: 10 | matchLabels: 11 | app.kubernetes.io/name: {{ include "failure.name" . }} 12 | app.kubernetes.io/instance: {{ .Release.Name }} 13 | template: 14 | metadata: 15 | labels: 16 | app.kubernetes.io/name: {{ include "failure.name" . }} 17 | app.kubernetes.io/instance: {{ .Release.Name }} 18 | spec: 19 | containers: 20 | - name: debian 21 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 22 | imagePullPolicy: {{ .Values.image.pullPolicy }} 23 | stdin: true 24 | {{ if .Values.command }} 25 | command: 26 | {{- range .Values.image.command }} 27 | - {{ . }} 28 | {{- end }} 29 | {{ end }} -------------------------------------------------------------------------------- /examples/munipack/environment/helm/stacking/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "stacking.fullname" . }} 5 | labels: 6 | {{ include "stacking.labels" . | indent 4 }} 7 | spec: 8 | replicas: {{ .Values.replicaCount }} 9 | selector: 10 | matchLabels: 11 | app.kubernetes.io/name: {{ include "stacking.name" . }} 12 | app.kubernetes.io/instance: {{ .Release.Name }} 13 | template: 14 | metadata: 15 | labels: 16 | app.kubernetes.io/name: {{ include "stacking.name" . }} 17 | app.kubernetes.io/instance: {{ .Release.Name }} 18 | spec: 19 | containers: 20 | - name: debian 21 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 22 | imagePullPolicy: {{ .Values.image.pullPolicy }} 23 | stdin: true 24 | {{ if .Values.image.command }} 25 | command: 26 | {{- range .Values.image.command }} 27 | - {{ . }} 28 | {{- end }} 29 | {{ end }} -------------------------------------------------------------------------------- /streamflow/config/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from collections.abc import MutableSequence 3 | from importlib.resources import files 4 | 5 | if sys.version_info < (3, 11): 6 | from importlib.abc import Traversable 7 | else: 8 | from importlib.resources.abc import Traversable 9 | 10 | 11 | ext_schemas: MutableSequence[Traversable] = [ 12 | files("streamflow.deployment.connector") 13 | .joinpath("schemas") 14 | .joinpath("base") 15 | .joinpath("docker.json"), 16 | files("streamflow.deployment.connector") 17 | .joinpath("schemas") 18 | .joinpath("base") 19 | .joinpath("kubernetes.json"), 20 | files("streamflow.deployment.connector") 21 | .joinpath("schemas") 22 | .joinpath("base") 23 | .joinpath("queue_manager.json"), 24 | files("streamflow.deployment.connector") 25 | .joinpath("schemas") 26 | .joinpath("base") 27 | .joinpath("singularity.json"), 28 | files("streamflow.deployment.connector") 29 | .joinpath("schemas") 30 | .joinpath("base") 31 | .joinpath("ssh.json"), 32 | ] 33 | -------------------------------------------------------------------------------- /tests/cwl-conformance/conftest.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import io 4 | import json 5 | import os 6 | from contextlib import redirect_stdout 7 | from typing import Any 8 | 9 | from cwltest import utils 10 | 11 | 12 | def pytest_cwl_execute_test( 13 | config: utils.CWLTestConfig, processfile: str, jobfile: str | None 14 | ) -> tuple[int, dict[str, Any] | None]: 15 | from streamflow.core.exception import WorkflowException 16 | from streamflow.cwl.runner import main 17 | 18 | this_directory = os.path.abspath(os.path.dirname(__file__)) 19 | args = [ 20 | "--streamflow-file", 21 | os.path.join(this_directory, "streamflow.yml"), 22 | "--outdir", 23 | config.outdir, 24 | processfile, 25 | ] 26 | if jobfile is not None: 27 | args.append(jobfile) 28 | 29 | try: 30 | f = io.StringIO() 31 | with redirect_stdout(f): 32 | result = main(args) 33 | out = f.getvalue() 34 | return result, json.loads(out) if out else {} 35 | except WorkflowException: 36 | return 1, {} 37 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | clean: 23 | @rm -fr build/ 24 | 25 | checksum: clean html 26 | @echo "Documentation checksum: " 27 | @find build/html/ \ 28 | -not -name 'searchindex.js' \ 29 | -not -name '*.woff' \ 30 | -not -name '*.woff2' \ 31 | -type f -print0 | \ 32 | sort -zd | \ 33 | xargs -r0 sha256sum | \ 34 | sha256sum | \ 35 | head -c 64 36 | @echo "" 37 | -------------------------------------------------------------------------------- /examples/mpi/streamflow.yml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env streamflow 2 | version: v1.0 3 | workflows: 4 | master: 5 | type: cwl 6 | config: 7 | file: cwl/main.cwl 8 | settings: cwl/config.yml 9 | bindings: 10 | - step: /compile 11 | target: 12 | deployment: k8s-mpi 13 | service: openmpi 14 | - step: /execute 15 | target: 16 | deployment: k8s-mpi 17 | locations: 2 18 | service: openmpi 19 | deployments: 20 | dc-mpi: 21 | type: docker-compose 22 | config: 23 | files: 24 | - environment/docker-compose/docker-compose.yml 25 | compatibility: true 26 | projectName: openmpi 27 | helm-mpi: 28 | type: helm 29 | config: 30 | chart: environment/helm/openmpi 31 | kubeconfig: ~/.kube/config-streamflow 32 | releaseName: openmpi-rel 33 | workdir: /tmp 34 | k8s-mpi: 35 | type: kubernetes 36 | config: 37 | files: 38 | - environment/k8s/secrets.yaml 39 | - environment/k8s/deployment.yaml 40 | kubeconfig: ~/.kube/config-streamflow 41 | workdir: /tmp -------------------------------------------------------------------------------- /streamflow/cwl/requirement/docker/translator.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import ABC, abstractmethod 4 | from collections.abc import MutableMapping 5 | from typing import Any 6 | 7 | from streamflow.core.config import Config 8 | from streamflow.core.context import SchemaEntity 9 | from streamflow.core.deployment import Target 10 | 11 | 12 | class CWLDockerTranslatorConfig(Config): 13 | def __init__( 14 | self, 15 | name: str, 16 | type: str, 17 | config: MutableMapping[str, Any], 18 | wrapper: bool = True, 19 | ): 20 | super().__init__(name, type, config) 21 | self.wrapper: bool = wrapper 22 | 23 | 24 | class CWLDockerTranslator(SchemaEntity, ABC): 25 | def __init__(self, config_dir: str, wrapper: bool): 26 | self.config_dir: str = config_dir 27 | self.wrapper: bool = wrapper 28 | 29 | @abstractmethod 30 | def get_target( 31 | self, 32 | image: str, 33 | output_directory: str | None, 34 | network_access: bool, 35 | target: Target, 36 | ) -> Target: ... 37 | -------------------------------------------------------------------------------- /examples/mpi/cwl/main.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.1 3 | class: Workflow 4 | $namespaces: 5 | sf: "https://streamflow.org/cwl#" 6 | 7 | inputs: 8 | num_processes: int 9 | source_file: File 10 | 11 | outputs: 12 | result: 13 | type: File 14 | outputSource: execute/mpi_output 15 | 16 | steps: 17 | compile: 18 | run: clt/compile.cwl 19 | doc: | 20 | This step takes as input a C source file and compiles it with the MPI compiler. Its output is an executable 21 | linked with a proper MPI implementation. 22 | in: 23 | source_file: source_file 24 | out: [executable_file] 25 | 26 | ############################################################## 27 | 28 | execute: 29 | run: clt/execute.cwl 30 | doc: | 31 | This step takes as input the MPI executable file and some mpirun configurations. It runs the MPI executable on 32 | multiple nodes with the aid of StreamFlow's `replicas` setting in the streamflow.yml file. 33 | in: 34 | executable_file: compile/executable_file 35 | num_processes: num_processes 36 | out: [mpi_output] 37 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | bandit 4 | lint 5 | py3.{10,11,12,13,14}-unit 6 | skip_missing_interpreters = True 7 | 8 | [pytest] 9 | asyncio_default_fixture_loop_scope = session 10 | asyncio_default_test_loop_scope = session 11 | asyncio_mode = strict 12 | testpaths = tests 13 | 14 | [testenv] 15 | allowlist_externals = make 16 | commands = 17 | py3.{10,11,12,13,14}-unit: make coverage-report coverage.xml PYTEST_EXTRA={posargs} 18 | dependency_groups = 19 | py3.{10,11,12,13,14}-unit: test 20 | extras = report 21 | description = 22 | py3.{10,11,12,13,14}-unit: Run the unit tests 23 | passenv = 24 | CI 25 | GITHUB_* 26 | runner = uv-venv-lock-runner 27 | setenv = 28 | py3.{10,11,12,13,14}-unit: LC_ALL = C.UTF-8 29 | uv_sync_flags = --no-dev 30 | 31 | [testenv:bandit] 32 | commands = bandit -r streamflow 33 | dependency_groups = bandit 34 | description = Search for common security issues 35 | passenv = 36 | CI 37 | GITHUB_* 38 | 39 | [testenv:lint] 40 | allowlist_externals = make 41 | commands = make flake8 format-check codespell-check pyupgrade 42 | dependency_groups = lint 43 | description = Lint the Python code -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you want to cite StreamFlow, please refer to the article below." 3 | authors: 4 | - family-names: "Colonnelli" 5 | given-names: "Iacopo" 6 | orcid: "https://orcid.org/0000-0001-9290-2017" 7 | title: "StreamFlow" 8 | version: 0.2 9 | url: "https://github.com/alpha-unito/streamflow" 10 | preferred-citation: 11 | type: article 12 | authors: 13 | - family-names: "Colonnelli" 14 | given-names: "Iacopo" 15 | orcid: "https://orcid.org/0000-0001-9290-2017" 16 | - family-names: "Cantalupo" 17 | given-names: "Barbara" 18 | orcid: "https://orcid.org/0000-0001-7575-3902" 19 | - family-names: "Merelli" 20 | given-names: "Ivan" 21 | orcid: "https://orcid.org/0000-0003-3587-3680" 22 | - family-names: "Aldinucci" 23 | given-names: "Marco" 24 | orcid: "https://orcid.org/0000-0001-8788-0829" 25 | doi: 10.1109/TETC.2020.3019202 26 | journal: "IEEE Transactions on Emerging Topics in Computing" 27 | start: 1723 28 | end: 1737 29 | title: "StreamFlow: cross-breeding cloud with HPC" 30 | issue: 4 31 | volume: 9 32 | year: 2021 -------------------------------------------------------------------------------- /docs/source/connector/ssh.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | SSHConnector 3 | ============= 4 | 5 | The `Secure SHell `_ (SSH) connector relies on SSH technology to connect with farms of independent, potentially heterogeneous remote nodes. A single SSH deployment can contain multiple nodes identified by their hostnames. Each hostname is supposed to point to a single node, and distinct hostnames must point to different nodes. 6 | 7 | A single deployment can contain multiple nodes, which represent the deployment unit. Note that SSH nodes are already active, reducing the "deployment" phase to opening an :ref:`SSH Connection `. Nodes in the same deployment are not supposed to be directly connected. Consequently, data transfers always involve the StreamFlow management node, adopting a two-step copy strategy. The binding unit and the scheduling unit coincide with the single SSH host. 8 | 9 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/ssh.json 10 | :lift_description: true 11 | :lift_definitions: true 12 | :auto_reference: true 13 | :auto_target: true 14 | -------------------------------------------------------------------------------- /streamflow/persistence/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections.abc import MutableSequence 3 | 4 | from streamflow.core.context import StreamFlowContext 5 | from streamflow.core.persistence import DatabaseLoadingContext 6 | from streamflow.core.workflow import Token 7 | 8 | 9 | async def load_depender_tokens( 10 | persistent_id: int, 11 | context: StreamFlowContext, 12 | loading_context: DatabaseLoadingContext, 13 | ) -> MutableSequence[Token]: 14 | return await asyncio.gather( 15 | *( 16 | asyncio.create_task(loading_context.load_token(context, row["depender"])) 17 | for row in await context.database.get_dependers(persistent_id) 18 | ) 19 | ) 20 | 21 | 22 | async def load_dependee_tokens( 23 | persistent_id: int, 24 | context: StreamFlowContext, 25 | loading_context: DatabaseLoadingContext, 26 | ) -> MutableSequence[Token]: 27 | return await asyncio.gather( 28 | *( 29 | asyncio.create_task(loading_context.load_token(context, row["dependee"])) 30 | for row in await context.database.get_dependees(persistent_id) 31 | ) 32 | ) 33 | -------------------------------------------------------------------------------- /streamflow/cwl/requirement/docker/schemas/kubernetes.jinja2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: {{ name }} 5 | labels: 6 | name: {{ name }} 7 | spec: 8 | containers: 9 | - name: {{ name }} 10 | image: {{ image }} 11 | stdin: true 12 | {% if output_directory is not none %} 13 | workingDir: {{ output_directory }} 14 | {% endif %} 15 | volumeMounts: 16 | - mountPath: /tmp/streamflow 17 | name: {{ name }}-sf-volume 18 | {% if output_directory is not none %} 19 | - mountPath: {{ output_directory }} 20 | name: {{ name }}-workdir 21 | {% endif %} 22 | volumes: 23 | - name: {{ name }}-sf-volume 24 | emptyDir: {} 25 | {% if output_directory is not none %} 26 | - name: {{ name }}-workdir 27 | emptyDir: {} 28 | {% endif %} 29 | {% if not network_access %} 30 | --- 31 | apiVersion: networking.k8s.io/v1 32 | kind: NetworkPolicy 33 | metadata: 34 | name: {{ name }}-network-policy 35 | spec: 36 | podSelector: 37 | matchLabels: 38 | name: {{ name }} 39 | policyTypes: 40 | - Ingress 41 | - Egress 42 | {% endif %} 43 | -------------------------------------------------------------------------------- /examples/munipack/cwl/pre_correction.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | label: Correction frames, dark and flat-field 3 | class: Workflow 4 | 5 | inputs: 6 | dark_file_name: string 7 | dark_flat_name: string 8 | flat_file_name: string 9 | dark_frames_dir: Directory 10 | dark_flat_frames_dir: Directory 11 | flat_frames_dir: Directory 12 | 13 | outputs: 14 | average_dark_frame: 15 | type: File 16 | outputSource: dark/average_dark 17 | master_flat_frame: 18 | type: File 19 | outputSource: flat/master_flat 20 | 21 | steps: 22 | dark: 23 | run: clt/dark.cwl 24 | in: 25 | dark_name: dark_file_name 26 | dark_dir: dark_frames_dir 27 | out: [average_dark] 28 | 29 | ####################################################### 30 | 31 | flat_dark: 32 | run: clt/dark.cwl 33 | in: 34 | dark_name: dark_flat_name 35 | dark_dir: dark_flat_frames_dir 36 | out: [average_dark] 37 | 38 | ####################################################### 39 | 40 | flat: 41 | run: clt/flat.cwl 42 | in: 43 | flat_name: flat_file_name 44 | dark_frame: flat_dark/average_dark 45 | flat_dir: flat_frames_dir 46 | out: [master_flat] 47 | -------------------------------------------------------------------------------- /streamflow/core/provenance.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import ABC, abstractmethod 4 | from collections.abc import MutableMapping, MutableSequence 5 | from typing import TYPE_CHECKING 6 | 7 | from streamflow.core.persistence import DatabaseLoadingContext 8 | 9 | if TYPE_CHECKING: 10 | from streamflow.core.context import StreamFlowContext 11 | from streamflow.core.workflow import Workflow 12 | 13 | 14 | class ProvenanceManager(ABC): 15 | def __init__( 16 | self, 17 | context: StreamFlowContext, 18 | db_context: DatabaseLoadingContext, 19 | workflows: MutableSequence[Workflow], 20 | ) -> None: 21 | self.context: StreamFlowContext = context 22 | self.db_context: DatabaseLoadingContext = db_context 23 | self.workflows: MutableSequence[Workflow] = workflows 24 | 25 | @abstractmethod 26 | async def create_archive( 27 | self, 28 | outdir: str, 29 | filename: str | None, 30 | config: str | None, 31 | additional_files: MutableSequence[MutableMapping[str, str]] | None, 32 | additional_properties: MutableSequence[MutableMapping[str, str]] | None, 33 | ) -> None: ... 34 | -------------------------------------------------------------------------------- /examples/mpi/environment/helm/openmpi/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "openmpi.fullname" . }} 5 | labels: 6 | {{ include "openmpi.labels" . | indent 4 }} 7 | spec: 8 | replicas: {{ .Values.replicaCount }} 9 | selector: 10 | matchLabels: 11 | app.kubernetes.io/name: {{ include "openmpi.name" . }} 12 | app.kubernetes.io/instance: {{ .Release.Name }} 13 | template: 14 | metadata: 15 | labels: 16 | app.kubernetes.io/name: {{ include "openmpi.name" . }} 17 | app.kubernetes.io/instance: {{ .Release.Name }} 18 | spec: 19 | containers: 20 | - name: openmpi 21 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 22 | imagePullPolicy: {{ .Values.image.pullPolicy }} 23 | ports: 24 | - containerPort: 2022 25 | command: 26 | {{- range .Values.image.command }} 27 | - {{ . }} 28 | {{- end }} 29 | volumeMounts: 30 | - name: ssh-key 31 | mountPath: /ssh-key/openmpi 32 | volumes: 33 | - name: ssh-key 34 | secret: 35 | secretName: {{ .Release.Name }}-ssh-key 36 | defaultMode: 256 -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.14-slim AS builder 2 | ARG HELM_VERSION 3 | 4 | ENV VIRTUAL_ENV="/opt/streamflow" 5 | ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" 6 | 7 | COPY ./pyproject.toml ./MANIFEST.in ./LICENSE ./README.md /build/ 8 | COPY ./streamflow /build/streamflow 9 | 10 | RUN apt update -y \ 11 | && apt install -y --no-install-recommends curl \ 12 | && curl -fsSL \ 13 | --retry 5 \ 14 | --retry-max-time 60 \ 15 | --connect-timeout 5 \ 16 | --max-time 10 \ 17 | https://git.io/get_helm.sh -o /tmp/get_helm.sh \ 18 | && chmod +x /tmp/get_helm.sh \ 19 | && /tmp/get_helm.sh --version ${HELM_VERSION} \ 20 | && cd /build \ 21 | && python -m venv ${VIRTUAL_ENV} \ 22 | && pip install . 23 | 24 | FROM python:3.14-slim 25 | LABEL maintainer="iacopo.colonnelli@unito.it" 26 | 27 | ENV VIRTUAL_ENV="/opt/streamflow" 28 | ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" 29 | 30 | COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} 31 | COPY --from=builder /usr/local/bin/helm /usr/local/bin/helm 32 | 33 | RUN apt update -y \ 34 | && apt install -y --no-install-recommends nodejs \ 35 | && rm -rf /var/lib/apt/lists/* \ 36 | && mkdir -p /streamflow/results 37 | 38 | WORKDIR /streamflow/results 39 | 40 | CMD ["/bin/sh"] 41 | -------------------------------------------------------------------------------- /docs/.wci.yml: -------------------------------------------------------------------------------- 1 | name: StreamFlow 2 | icon: https://raw.githubusercontent.com/alpha-unito/streamflow/master/docs/logo.png 3 | headline: "Towards Cloud-HPC Continuum" 4 | description: "The StreamFlow framework is a container-native Workflow Management System (WMS) written in Python 3 and based on the Common Workflow Language (CWL) open standard. It has been designed around two main principles: allow the execution of tasks in multi-container environments, in order to support concurrent execution of multiple communicating tasks in a multi-agent ecosystem, and relax the requirement of a single shared data space, in order to allow for hybrid workflow executions on top of multi-cloud or hybrid cloud/HPC infrastructures." 5 | language: Python 6 | documentation: 7 | general: https://streamflow.di.unito.it/documentation/latest/ 8 | installation: https://streamflow.di.unito.it/documentation/latest/install.html 9 | tutorial: https://streamflow.di.unito.it/documentation/latest/operations.html 10 | execution_environment: 11 | interfaces: 12 | - CommandLine 13 | resource_managers: 14 | - Local 15 | - SSH 16 | - Kubernetes 17 | - Docker 18 | - Docker Compose 19 | - Singularity 20 | - SLURM 21 | - PBS 22 | transfer_protocols: 23 | - SCP 24 | - WebSocket (Kubernetes) 25 | -------------------------------------------------------------------------------- /streamflow/deployment/connector/schemas/occam.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/deployment/connector/occam.json", 4 | "type": "object", 5 | "properties": { 6 | "file": { 7 | "type": "string", 8 | "description": "Path to the file describing Occam environment" 9 | }, 10 | "hostname": { 11 | "type": "string", 12 | "description": "Hostname of Occam facility", 13 | "default": "occam.c3s.unito.it" 14 | }, 15 | "sshKey": { 16 | "type": "string", 17 | "description": "Path to the SSH key needed to connect with Occam environment" 18 | }, 19 | "sshKeyPassphraseFile": { 20 | "type": "string", 21 | "description": "Path to a file containing the passphrase protecting the SSH key" 22 | }, 23 | "transferBufferSize": { 24 | "type": "integer", 25 | "description": "Buffer size allocated for local and remote data transfers", 26 | "default": 65536 27 | }, 28 | "username": { 29 | "type": "string", 30 | "description": "Username needed to connect with Occam environment" 31 | } 32 | }, 33 | "required": [ 34 | "file", 35 | "sshKey", 36 | "username" 37 | ], 38 | "additionalProperties": false 39 | } -------------------------------------------------------------------------------- /streamflow/deployment/connector/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import MutableMapping 4 | 5 | from streamflow.core.deployment import Connector 6 | from streamflow.deployment.connector.container import ( 7 | DockerComposeConnector, 8 | DockerConnector, 9 | SingularityConnector, 10 | ) 11 | from streamflow.deployment.connector.kubernetes import ( 12 | Helm3Connector, 13 | KubernetesConnector, 14 | ) 15 | from streamflow.deployment.connector.local import LocalConnector 16 | from streamflow.deployment.connector.occam import OccamConnector 17 | from streamflow.deployment.connector.queue_manager import ( 18 | FluxConnector, 19 | PBSConnector, 20 | SlurmConnector, 21 | ) 22 | from streamflow.deployment.connector.ssh import SSHConnector 23 | 24 | connector_classes: MutableMapping[str, type[Connector]] = { 25 | "docker": DockerConnector, 26 | "docker-compose": DockerComposeConnector, 27 | "flux": FluxConnector, 28 | "helm": Helm3Connector, 29 | "helm3": Helm3Connector, 30 | "kubernetes": KubernetesConnector, 31 | "local": LocalConnector, 32 | "occam": OccamConnector, 33 | "pbs": PBSConnector, 34 | "singularity": SingularityConnector, 35 | "slurm": SlurmConnector, 36 | "ssh": SSHConnector, 37 | } 38 | -------------------------------------------------------------------------------- /docs/source/connector/queue-manager.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | QueueManagerConnector 3 | ===================== 4 | 5 | The ``QueueManagerConnector`` is an abstract connector that serves as a base class to implement High Performance Computing connectors, based on queue managers (e.g., :ref:`Slurm `, :ref:`PBS `, and :ref:`Flux `). It extends the :ref:`ConnectorWrapper ` interface, allowing users to offload jobs to local or remote queue managers. Plus, it extends the :ref:`BatchConnector ` interface. The underlying HPC facility is supposed to be constantly active, reducing the deployment phase to deploy the inner connector (e.g., to create an :ref:`SSH Connection ` pointing to an HPC login node). 6 | 7 | .. warning:: 8 | 9 | Note that in StreamFlow ``v0.1``, the ``QueueManagerConnector`` directly inherited from the :ref:`SSHConnector ` at the implementation level. Consequently, all the properties needed to open an SSH connection to the HPC login node (e.g., ``hostname``, ``username``, and ``sshKey``) were defined directly in the ``QueueManagerConnector``. This path is still supported by StreamFlow ``v0.2``, but it is deprecated and will be removed in StreamFlow ``v0.3``. 10 | 11 | 12 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/base/queue_manager.json -------------------------------------------------------------------------------- /docs/source/connector/occam.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | OccamConnector 3 | ===================== 4 | 5 | The `Occam `_ SuperComputer is a High-Performance Computing (HPC) facility designed and managed in collaboration between the `University of Torino `_ (UniTO) and the `National Institute for Nuclear Physics `_ (INFN). 6 | 7 | It is different from standard HPC facilities for two main reasons. First, users can reserve computing nodes for specific time slots instead of relying on a batched interaction orchestrated by a queue manager. Second, the execution model is entirely based on unprivileged `Docker `_ containers. 8 | 9 | This connector allows StreamFlow to offload computation to multi-container environments deployed on the Occam facility. The deployment unit is a multi-container environment deployed on one or more computing nodes. Multi-container environments are described in a YAML file with a syntax similar to the ``service`` section of `Docker Compose `_. Users can pass this file to the connector through the ``file`` parameter. The unit of binding is the single top-level entry in the file, while the scheduling unit is the single container instance. 10 | 11 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/occam.json 12 | :lift_description: true 13 | -------------------------------------------------------------------------------- /streamflow/persistence/base.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from abc import ABC 3 | from collections.abc import MutableMapping 4 | from typing import TYPE_CHECKING, Any 5 | 6 | from cachetools import LRUCache 7 | 8 | from streamflow.core.context import StreamFlowContext 9 | from streamflow.core.persistence import Database 10 | 11 | if TYPE_CHECKING: 12 | from cachetools import Cache 13 | 14 | 15 | class CachedDatabase(Database, ABC): 16 | def __init__(self, context: StreamFlowContext): 17 | super().__init__(context) 18 | self.deployment_cache: Cache[int, MutableMapping[str, Any]] = LRUCache( 19 | maxsize=sys.maxsize 20 | ) 21 | self.port_cache: Cache[int, MutableMapping[str, Any]] = LRUCache( 22 | maxsize=sys.maxsize 23 | ) 24 | self.step_cache: Cache[int, MutableMapping[str, Any]] = LRUCache( 25 | maxsize=sys.maxsize 26 | ) 27 | self.target_cache: Cache[int, MutableMapping[str, Any]] = LRUCache( 28 | maxsize=sys.maxsize 29 | ) 30 | self.filter_cache: Cache[int, MutableMapping[str, Any]] = LRUCache( 31 | maxsize=sys.maxsize 32 | ) 33 | self.token_cache: Cache[int, MutableMapping[str, Any]] = LRUCache( 34 | maxsize=sys.maxsize 35 | ) 36 | self.workflow_cache: Cache[int, MutableMapping[str, Any]] = LRUCache( 37 | maxsize=sys.maxsize 38 | ) 39 | -------------------------------------------------------------------------------- /docs/source/guide/deployments.rst: -------------------------------------------------------------------------------- 1 | ======================= 2 | Import your environment 3 | ======================= 4 | 5 | StreamFlow relies on external specifications and tools to describe and orchestrate a remote execution environment. For example, a Kubernetes-based deployment can be described in Helm, while a resource reservation request on an HPC facility can be specified with Slurm or PBS files. 6 | 7 | This feature allows users to stick with the technologies they already know, or at least with production-grade tools that are solid, maintained and well-documented. Moreover, it adheres to the `infrastructure-as-code `_ principle, making execution environments easily portable and self-documented. 8 | 9 | The lifecycle management of each StreamFlow deployment is demanded to a specific implementation of the ``Connector`` interface. Connectors provided by default in the StreamFlow codebase are reported :ref:`here `, but users can add new connectors to the list by simply creating their implementation of the ``Connector`` interface. 10 | 11 | The following snippet contains a simple example of Docker deployment named ``docker-openjdk``, which instantiates a container from the ``openjdk:9.0.1-11-slim`` image. At runtime, StreamFlow creates a :ref:`DockerConnector ` instance to manage the container lifecycle. 12 | 13 | .. code-block:: yaml 14 | 15 | docker-openjdk: 16 | type: docker 17 | config: 18 | image: openjdk:9.0.1-11-slim 19 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git 3 | .gitignore 4 | 5 | # CI 6 | .codeclimate.yml 7 | .travis.yml 8 | .taskcluster.yml 9 | 10 | # Docker 11 | docker-compose.yml 12 | .docker 13 | 14 | # Byte-compiled / optimized / DLL files 15 | __pycache__/ 16 | */__pycache__/ 17 | */*/__pycache__/ 18 | */*/*/__pycache__/ 19 | *.py[cod] 20 | */*.py[cod] 21 | */*/*.py[cod] 22 | */*/*/*.py[cod] 23 | 24 | # C extensions 25 | *.so 26 | 27 | # Distribution / packaging 28 | .Python 29 | env/ 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | lib/ 36 | lib64/ 37 | parts/ 38 | sdist/ 39 | var/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | 44 | # PyInstaller 45 | # Usually these files are written by a python script from a template 46 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 47 | *.manifest 48 | *.spec 49 | 50 | # Installer logs 51 | pip-log.txt 52 | pip-delete-this-directory.txt 53 | 54 | # Unit test / coverage reports 55 | htmlcov/ 56 | .tox/ 57 | .coverage 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | junit.xml 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Virtual environment 77 | .env/ 78 | .venv/ 79 | venv/ 80 | 81 | # PyCharm 82 | .idea 83 | 84 | # Python mode for VIM 85 | .ropeproject 86 | */.ropeproject 87 | */*/.ropeproject 88 | */*/*/.ropeproject 89 | 90 | # Vim swap files 91 | *.swp 92 | */*.swp 93 | */*/*.swp 94 | */*/*/*.swp 95 | -------------------------------------------------------------------------------- /examples/munipack/cwl/stack.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.1 3 | class: Workflow 4 | $namespaces: 5 | sf: "https://streamflow.org/cwl#" 6 | 7 | requirements: 8 | ScatterFeatureRequirement: {} 9 | 10 | inputs: 11 | corrected_frames: File[] 12 | fwmh: int 13 | ra: float 14 | dec: float 15 | deg: float 16 | 17 | outputs: 18 | final_frame: 19 | type: File 20 | outputSource: combine/processed_frame 21 | 22 | steps: 23 | cone_search: 24 | run: clt/cone_search.cwl 25 | in: 26 | ra: ra 27 | dec: dec 28 | deg: deg 29 | out: [conefile] 30 | 31 | ####################################################### 32 | 33 | find_star: 34 | run: clt/find_star.cwl 35 | in: 36 | frames: corrected_frames 37 | fwmh: fwmh 38 | out: [find_frames] 39 | 40 | ####################################################### 41 | 42 | aperture_photometry: 43 | run: clt/aperture_photometry.cwl 44 | in: 45 | find_frames: find_star/find_frames 46 | out: [aphot_frames] 47 | 48 | 49 | ####################################################### 50 | 51 | astrometry: 52 | run: clt/astrometry.cwl 53 | scatter: aphot_frame 54 | in: 55 | aphot_frame: aperture_photometry/aphot_frames 56 | conefile: cone_search/conefile 57 | out: [astrometry_frame] 58 | 59 | ####################################################### 60 | 61 | combine: 62 | run: clt/combine.cwl 63 | in: 64 | stack_frames: astrometry/astrometry_frame 65 | out: [processed_frame] -------------------------------------------------------------------------------- /helm/chart/templates/job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: {{ include "streamflow.fullname" . }} 5 | labels: 6 | {{- include "streamflow.labels" . | nindent 4 }} 7 | annotations: 8 | "helm.sh/hook": post-install 9 | "helm.sh/hook-delete-policy": hook-succeeded 10 | spec: 11 | template: 12 | metadata: 13 | labels: 14 | {{- include "streamflow.selectorLabels" . | nindent 8 }} 15 | spec: 16 | {{- with .Values.imagePullSecrets }} 17 | imagePullSecrets: 18 | {{- toYaml . | nindent 8 }} 19 | {{- end }} 20 | serviceAccountName: {{ include "streamflow.serviceAccountName" . }} 21 | securityContext: 22 | {{- toYaml .Values.podSecurityContext | nindent 8 }} 23 | containers: 24 | - name: {{ .Chart.Name }} 25 | securityContext: 26 | {{- toYaml .Values.securityContext | nindent 12 }} 27 | image: "{{ .Values.image.repository }}:{{ .Chart.AppVersion }}" 28 | args: {{ .Values.args }} 29 | imagePullPolicy: {{ .Values.image.pullPolicy }} 30 | resources: 31 | {{- toYaml .Values.resources | nindent 12 }} 32 | restartPolicy: {{ .Values.restartPolicy }} 33 | {{- with .Values.nodeSelector }} 34 | nodeSelector: 35 | {{- toYaml . | nindent 8 }} 36 | {{- end }} 37 | {{- with .Values.affinity }} 38 | affinity: 39 | {{- toYaml . | nindent 8 }} 40 | {{- end }} 41 | {{- with .Values.tolerations }} 42 | tolerations: 43 | {{- toYaml . | nindent 8 }} 44 | {{- end }} 45 | -------------------------------------------------------------------------------- /streamflow/deployment/template.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import MutableMapping 4 | 5 | from jinja2 import Template 6 | 7 | 8 | class CommandTemplateMap: 9 | def __init__( 10 | self, 11 | default: str, 12 | template_map: MutableMapping[str, str] | None = None, 13 | ): 14 | self.templates: MutableMapping[str, Template] = { 15 | "__DEFAULT__": Template(default) 16 | } 17 | if template_map: 18 | for name, template in template_map.items(): 19 | self.templates[name] = Template(template) 20 | 21 | def get_command( 22 | self, 23 | command: str, 24 | template: str | None = None, 25 | environment: MutableMapping[str, str] | None = None, 26 | workdir: str | None = None, 27 | **kwargs, 28 | ) -> str: 29 | return self.templates[ 30 | ( 31 | template 32 | if template is not None and template in self.templates.keys() 33 | else "__DEFAULT__" 34 | ) 35 | ].render( 36 | streamflow_command=command, 37 | streamflow_environment=( 38 | " && ".join( 39 | [f'export {key}="{value}"' for (key, value) in environment.items()] 40 | ) 41 | if environment is not None 42 | else "" 43 | ), 44 | streamflow_workdir=workdir, 45 | **kwargs, 46 | ) 47 | 48 | def is_empty(self) -> bool: 49 | return len(self.templates) == 1 50 | -------------------------------------------------------------------------------- /docs/source/ext/binding-filter.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | BindingFilter 3 | ============= 4 | 5 | StreamFlow lets users map steps to :ref:`multiple targets `. A ``BindingFilter`` object implements a strategy to manipulate and reorder the list of targets bound to a given step before the StreamFlow :ref:`Scheduler ` component evaluates them. The ``BindingFilter`` interface specified in the ``streamflow.core.deployment`` module contains a single ``get_targets`` method: 6 | 7 | .. code-block:: python 8 | 9 | async def get_targets( 10 | self, job: Job, targets: MutableSequence[Target] 11 | ) -> MutableSequence[Target]: 12 | ... 13 | 14 | The ``get_targets`` method receives a ``Job`` object and a list of ``Target`` objects, the list of targets specified by the user, and returns another list of ``Target`` objects. The :ref:`Scheduler ` component will evaluate the returned list of targets to find an allocation for the ``Job`` object. 15 | 16 | By default, if no ``BindingFilter`` is specified for a multi-target step binding, all the ``Target`` objects will be evaluated in the original order. In addition, StreamFlow defines a ``ShuffleBindingFilter`` implementation to randomise the evaluation order at any invocation. 17 | 18 | Implementations 19 | =============== 20 | 21 | ======= ========================================================= 22 | Type Class 23 | ======= ========================================================= 24 | shuffle streamflow.deployment.filter.shuffle.ShuffleBindingFilter 25 | ======= ========================================================= 26 | -------------------------------------------------------------------------------- /docs/source/advanced/multiple-targets.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | Multiple targets 3 | ================ 4 | 5 | StreamFlow lets users to map steps and ports to multiple targets in the :ref:`StreamFlow file `. A step bound to multiple locations can be scheduled on each of them at runtime. Plus, if a step encloses multiple instances (e.g., in a CWL ``scatter`` operation), they can run on different targets. 6 | 7 | The `filters` directive defines one or more strategies to select a target among the set of available ones (or among a subset) at runtime. By default, all available targets will be evaluated in the order of appearance in the ``target`` directive. 8 | 9 | Users can select a given :ref:`BindingFilter ` implementation by specifying its name in the ``filters`` directive of a ``binding`` object. If multiple filters are declared, they are applied to the target list in the order of appearance. For example, to evaluate targets in random order at every allocation request, users can specify the following: 10 | 11 | .. code-block:: yaml 12 | 13 | workflows: 14 | example: 15 | type: cwl 16 | config: 17 | file: main.cwl 18 | settings: config.yml 19 | bindings: 20 | - step: /compile 21 | target: 22 | - deployment: first-deployment 23 | - deployment: second-deployment 24 | filters: 25 | - shuffle 26 | 27 | Conversely, a file or directory port bound to multiple locations can be retrieved from each of them at runtime. StreamFlow will always try to minimize the overhead of data transfers, using local data whenever possible. -------------------------------------------------------------------------------- /examples/mpi/environment/helm/openmpi/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "openmpi.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "openmpi.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "openmpi.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "openmpi.labels" -}} 38 | app.kubernetes.io/name: {{ include "openmpi.name" . }} 39 | helm.sh/chart: {{ include "openmpi.chart" . }} 40 | app.kubernetes.io/instance: {{ .Release.Name }} 41 | {{- if .Chart.AppVersion }} 42 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 43 | {{- end }} 44 | app.kubernetes.io/managed-by: {{ .Release.Service }} 45 | {{- end -}} 46 | -------------------------------------------------------------------------------- /examples/failure/environment/helm/failure/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "failure.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "failure.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "failure.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "failure.labels" -}} 38 | app.kubernetes.io/name: {{ include "failure.name" . }} 39 | helm.sh/chart: {{ include "failure.chart" . }} 40 | app.kubernetes.io/instance: {{ .Release.Name }} 41 | {{- if .Chart.AppVersion }} 42 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 43 | {{- end }} 44 | app.kubernetes.io/managed-by: {{ .Release.Service }} 45 | {{- end -}} 46 | -------------------------------------------------------------------------------- /examples/munipack/environment/helm/stacking/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "stacking.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "stacking.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "stacking.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "stacking.labels" -}} 38 | app.kubernetes.io/name: {{ include "stacking.name" . }} 39 | helm.sh/chart: {{ include "stacking.chart" . }} 40 | app.kubernetes.io/instance: {{ .Release.Name }} 41 | {{- if .Chart.AppVersion }} 42 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 43 | {{- end }} 44 | app.kubernetes.io/managed-by: {{ .Release.Service }} 45 | {{- end -}} 46 | -------------------------------------------------------------------------------- /streamflow/config/validator.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable, MutableMapping 2 | from typing import Any 3 | 4 | from jsonschema import ValidationError 5 | from jsonschema.validators import validator_for 6 | from ruamel.yaml import YAML 7 | 8 | from streamflow.config.schema import SfSchema 9 | from streamflow.core.exception import WorkflowDefinitionException 10 | 11 | 12 | def handle_errors(errors: Iterable[ValidationError]) -> None: 13 | if not (errors := list(sorted(errors, key=str))): 14 | return 15 | raise WorkflowDefinitionException( 16 | "The StreamFlow configuration is invalid because:\n{error_msgs}".format( 17 | error_msgs="\n".join([f" - {err}" for err in errors]) 18 | ) 19 | ) 20 | 21 | 22 | class SfValidator: 23 | def __init__(self) -> None: 24 | super().__init__() 25 | self.schema: SfSchema = SfSchema() 26 | self.yaml = YAML(typ="safe") 27 | 28 | def validate_file(self, streamflow_file: str) -> MutableMapping[str, Any]: 29 | with open(streamflow_file) as f: 30 | streamflow_config = self.yaml.load(f) 31 | return self.validate(streamflow_config) 32 | 33 | def validate( 34 | self, streamflow_config: MutableMapping[str, Any] 35 | ) -> MutableMapping[str, Any]: 36 | if "version" not in streamflow_config: 37 | raise WorkflowDefinitionException( 38 | "The `version` clause is mandatory and should be equal to `v1.0`." 39 | ) 40 | config = self.schema.get_config(streamflow_config["version"]).contents 41 | cls = validator_for(config) 42 | validator = cls(config, registry=self.schema.registry) 43 | handle_errors(validator.iter_errors(streamflow_config)) 44 | return streamflow_config 45 | -------------------------------------------------------------------------------- /streamflow/workflow/transformer.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | 3 | from streamflow.core.exception import WorkflowDefinitionException 4 | from streamflow.core.workflow import Port 5 | from streamflow.workflow.step import Transformer 6 | 7 | 8 | class ManyToOneTransformer(Transformer, ABC): 9 | def add_output_port(self, name: str, port: Port) -> None: 10 | if not self.output_ports or port.name in self.output_ports: 11 | super().add_output_port(name, port) 12 | else: 13 | raise WorkflowDefinitionException( 14 | f"{self.name} step must contain a single output port." 15 | ) 16 | 17 | def get_output_name(self) -> str: 18 | return next(iter(self.output_ports)) 19 | 20 | async def run(self) -> None: 21 | if len(self.output_ports) != 1: 22 | raise WorkflowDefinitionException( 23 | f"{self.name} step must contain a single output port." 24 | ) 25 | await super().run() 26 | 27 | 28 | class OneToOneTransformer(ManyToOneTransformer, ABC): 29 | def add_input_port(self, name: str, port: Port) -> None: 30 | if not self.input_ports: 31 | super().add_input_port(name, port) 32 | else: 33 | raise WorkflowDefinitionException( 34 | f"{self.name} step must contain a single input port." 35 | ) 36 | 37 | async def run(self): 38 | if len(self.input_ports) != 1: 39 | raise WorkflowDefinitionException( 40 | f"{self.name} step must contain a single input port." 41 | ) 42 | if len(self.output_ports) != 1: 43 | raise WorkflowDefinitionException( 44 | f"{self.name} step must contain a single output port." 45 | ) 46 | await super().run() 47 | -------------------------------------------------------------------------------- /docs/source/cwl/cwl-runner.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | CWL Runner 3 | ========== 4 | 5 | CWL specifies a standard ``cwl-runner`` interface to execute CWL workflows from the command line. StreamFlow adheres to this interface, installing a ``cwl-runner`` executable in the user's ``$PATH``. In particular, a 6 | 7 | .. code-block:: bash 8 | 9 | cwl-runner processfile jobfile 10 | 11 | command is equivalent to a ``streamflow run`` command with the following ``streamflow.yml`` file: 12 | 13 | .. code-block:: yaml 14 | 15 | version: v1.0 16 | workflows: 17 | workflow_name: 18 | type: cwl 19 | config: 20 | file: processfile 21 | settings: jobfile 22 | 23 | In addition to the standard parameters, it is possible to pass a ``--streamflow-file`` argument to the ``cwl-runner`` CLI with the path of a StreamFlow file containing deployments and bindings (see the :ref:`StreamFlow file ` section). The ``workflows`` section of this file must have a single entry containing a list of ``bindings``. If present, the ``type`` and ``config`` entries will be ignored. Files containing multiple workflow entries will throw an exception. 24 | 25 | For example, the workflow described :ref:`here ` can also be executed with the following command 26 | 27 | .. code-block:: bash 28 | 29 | cwl-runner --streamflow-file /path/to/streamflow.yml main.cwl config.cwl 30 | 31 | where the ``streamflow.yml`` fail contains these lines 32 | 33 | .. code-block:: yaml 34 | 35 | version: v1.0 36 | workflows: 37 | extract-and-compile: 38 | bindings: 39 | - step: /compile 40 | target: 41 | deployment: docker-openjdk 42 | 43 | deployments: 44 | docker-openjdk: 45 | type: docker 46 | config: 47 | image: openjdk:9.0.1-11-slim 48 | 49 | -------------------------------------------------------------------------------- /docs/source/advanced/port-targets.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Port targets 3 | ============ 4 | 5 | In the default case, when a workflow receives files or folders as initial input objects, StreamFlow looks for them in the local file system. Along the same line, whenever a workflow step produces input files or folders, StreamFlow searches them in the location where the step was executed. 6 | 7 | However, there are cases in which these assumptions are not valid. To correctly handle these cases, the user can specify port targets in the ``bindings`` list of a workflow. Port targets are similar to step targets described :ref:`here `, but bind ports instead of steps. 8 | 9 | In particular, a port binding contains a ``port`` directive referring to a specific input/output port in the workflow, a ``target`` directive referring to a deployment entry in the ``deployments`` section of the StreamFlow file, and a (mandatory) ``workdir`` entry identifies the base path where the data should be placed. 10 | 11 | Similarly to steps, ports are uniquely identified using a Posix-like path, where the port is mapped to a file, and the related step is mapped to a folder. Consider the following example, which refers to :ref:`this ` workflow: 12 | 13 | .. code-block:: yaml 14 | 15 | version: v1.0 16 | workflows: 17 | extract-and-compile: 18 | type: cwl 19 | config: 20 | file: main.cwl 21 | settings: config.yml 22 | bindings: 23 | - port: /compile/src 24 | target: 25 | deployment: hpc-slurm 26 | workdir: /archive/home/myuser 27 | 28 | deployments: 29 | hpc-slurm: 30 | type: slurm 31 | config: 32 | ... 33 | 34 | Here, the ``/compile/src`` path refers to the ``src`` port of the ``/compile`` step. StreamFlow will search for the file the ``src`` port requires directly on the remote ``hpc-slurm`` location in the ``/archive/home/myuser`` path. 35 | 36 | -------------------------------------------------------------------------------- /examples/flux/cwl/data/cs.cxx: -------------------------------------------------------------------------------- 1 | // Author: Marco Aldinucci 2 | // Date: 13 May 2010 3 | // Ex. 1-2, for PDS-physics class 2010 4 | 5 | #include "mpi.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | enum messages {msg_tag,eos_tag}; 13 | 14 | 15 | static inline const double diffmsec(const struct timeval & a, 16 | const struct timeval & b) { 17 | long sec = (a.tv_sec - b.tv_sec); 18 | long usec = (a.tv_usec - b.tv_usec); 19 | 20 | if(usec < 0) { 21 | --sec; 22 | usec += 1000000; 23 | } 24 | return ((double)(sec*1000)+ (double)usec/1000.0); 25 | } 26 | 27 | int main( int argc, char **argv ) 28 | { 29 | int myid,numprocs,namelen; 30 | char processor_name[MPI_MAX_PROCESSOR_NAME]; 31 | double t0,t1; 32 | struct timeval wt1,wt0; 33 | // MPI_Wtime cannot be called here 34 | gettimeofday(&wt0,NULL); 35 | MPI_Init(&argc,&argv ); 36 | t0 = MPI_Wtime(); 37 | //gettimeofday(&wt0,NULL); 38 | MPI_Comm_size(MPI_COMM_WORLD,&numprocs); 39 | MPI_Comm_rank(MPI_COMM_WORLD,&myid); 40 | MPI_Get_processor_name(processor_name,&namelen); 41 | srand(time(NULL)); 42 | 43 | 44 | // This is the server code 45 | // Note - I don't understand how this example works - with the previous 46 | // it hung forever, so I reduced to just printing the server id and exiting. 47 | int n_eos = 0; 48 | std::cout << "Hello I'm the server with id " << myid << " on " << processor_name 49 | << " out of " << numprocs << " I'm the server\n"; 50 | 51 | MPI_Barrier(MPI_COMM_WORLD); 52 | t1 = MPI_Wtime(); 53 | //gettimeofday(&wt1,NULL); 54 | MPI_Finalize(); 55 | gettimeofday(&wt1,NULL); 56 | std::cout << "Total time (MPI) " << myid << " is " << t1-t0 << "\n"; 57 | std::cout << "Total time (gtd) " << myid << " is " << 58 | diffmsec(wt1,wt0)/1000 << "\n"; 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /tests/test_database.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from streamflow.core import utils 4 | from streamflow.core.context import StreamFlowContext 5 | from streamflow.workflow.step import ExecuteStep 6 | from tests.utils.workflow import create_workflow 7 | 8 | 9 | @pytest.mark.asyncio 10 | async def test_get_steps_queries(context: StreamFlowContext): 11 | """Test get_input_steps and get_output_steps queries""" 12 | workflow, (port_a, job_port, job_port_2, port_b, port_c) = await create_workflow( 13 | context, num_port=5 14 | ) 15 | step = workflow.create_step( 16 | cls=ExecuteStep, name=utils.random_name(), job_port=job_port 17 | ) 18 | step_2 = workflow.create_step( 19 | cls=ExecuteStep, name=utils.random_name(), job_port=job_port_2 20 | ) 21 | step.add_input_port("in", port_a) 22 | step.add_output_port("out", port_b) 23 | step_2.add_input_port("in2", port_b) 24 | step_2.add_output_port("out2", port_c) 25 | await workflow.save(context) 26 | 27 | input_steps_port_a = await context.database.get_input_steps(port_a.persistent_id) 28 | assert len(input_steps_port_a) == 0 29 | output_steps_port_a = await context.database.get_output_steps(port_a.persistent_id) 30 | assert len(output_steps_port_a) == 1 31 | assert output_steps_port_a[0]["step"] == step.persistent_id 32 | 33 | input_steps_port_b = await context.database.get_input_steps(port_b.persistent_id) 34 | assert len(input_steps_port_b) == 1 35 | assert input_steps_port_b[0]["step"] == step.persistent_id 36 | output_steps_port_b = await context.database.get_output_steps(port_b.persistent_id) 37 | assert len(output_steps_port_b) == 1 38 | assert output_steps_port_b[0]["step"] == step_2.persistent_id 39 | 40 | input_steps_port_c = await context.database.get_input_steps(port_c.persistent_id) 41 | assert len(input_steps_port_c) == 1 42 | assert input_steps_port_c[0]["step"] == step_2.persistent_id 43 | output_steps_port_c = await context.database.get_output_steps(port_c.persistent_id) 44 | assert len(output_steps_port_c) == 0 45 | -------------------------------------------------------------------------------- /streamflow/config/schema.py: -------------------------------------------------------------------------------- 1 | from importlib.resources import files 2 | 3 | from streamflow.config import ext_schemas 4 | from streamflow.core.config import Schema 5 | from streamflow.cwl.requirement.docker import cwl_docker_translator_classes 6 | from streamflow.data import data_manager_classes 7 | from streamflow.deployment import deployment_manager_classes 8 | from streamflow.deployment.connector import connector_classes 9 | from streamflow.deployment.filter import binding_filter_classes 10 | from streamflow.persistence import database_classes 11 | from streamflow.recovery import checkpoint_manager_classes, failure_manager_classes 12 | from streamflow.scheduling import scheduler_classes 13 | from streamflow.scheduling.policy import policy_classes 14 | 15 | 16 | class SfSchema(Schema): 17 | def __init__(self) -> None: 18 | super().__init__( 19 | { 20 | "v1.0": "https://streamflow.di.unito.it/schemas/config/v1.0/config_schema.json" 21 | } 22 | ) 23 | for version in self.configs.keys(): 24 | self.add_schema( 25 | schema=files(__package__) 26 | .joinpath("schemas") 27 | .joinpath(version) 28 | .joinpath("config_schema.json") 29 | .read_text("utf-8") 30 | ) 31 | self.inject_ext(binding_filter_classes, "bindingFilter") 32 | self.inject_ext(checkpoint_manager_classes, "checkpointManager") 33 | self.inject_ext(cwl_docker_translator_classes, "cwl/docker") 34 | self.inject_ext(database_classes, "database") 35 | self.inject_ext(data_manager_classes, "dataManager") 36 | self.inject_ext(connector_classes, "deployment") 37 | self.inject_ext(deployment_manager_classes, "deploymentManager") 38 | self.inject_ext(failure_manager_classes, "failureManager") 39 | self.inject_ext(policy_classes, "policy") 40 | self.inject_ext(scheduler_classes, "scheduler") 41 | for schema in ext_schemas: 42 | self.add_schema(schema.read_text("utf-8"), embed=True) 43 | self._registry = self.registry.crawl() 44 | -------------------------------------------------------------------------------- /helm/chart/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "streamflow.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "streamflow.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "streamflow.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "streamflow.labels" -}} 38 | helm.sh/chart: {{ include "streamflow.chart" . }} 39 | {{ include "streamflow.selectorLabels" . }} 40 | {{- if .Chart.AppVersion }} 41 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 42 | {{- end }} 43 | app.kubernetes.io/managed-by: {{ .Release.Service }} 44 | {{- end -}} 45 | 46 | {{/* 47 | Selector labels 48 | */}} 49 | {{- define "streamflow.selectorLabels" -}} 50 | app.kubernetes.io/name: {{ include "streamflow.name" . }} 51 | app.kubernetes.io/instance: {{ .Release.Name }} 52 | {{- end -}} 53 | 54 | {{/* 55 | Create the name of the service account to use 56 | */}} 57 | {{- define "streamflow.serviceAccountName" -}} 58 | {{- if .Values.serviceAccount.create -}} 59 | {{ default (include "streamflow.fullname" .) .Values.serviceAccount.name }} 60 | {{- else -}} 61 | {{ default "default" .Values.serviceAccount.name }} 62 | {{- end -}} 63 | {{- end -}} 64 | -------------------------------------------------------------------------------- /examples/failure/cwl/main.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.1 3 | class: Workflow 4 | $namespaces: 5 | sf: "https://streamflow.org/cwl#" 6 | 7 | requirements: 8 | ScatterFeatureRequirement: { } 9 | SubworkflowFeatureRequirement: { } 10 | 11 | inputs: 12 | num_file: File 13 | main_d: Directory 14 | worker_n: int 15 | 16 | outputs: 17 | result: 18 | type: File 19 | outputSource: comb_step/comb_file 20 | 21 | steps: 22 | distribute_files: 23 | run: master.cwl 24 | doc: | 25 | This step takes as inputs the directory which contains the text files with numbers. 26 | It distributes equally among directories (whose desidered number is worker_n). 27 | Its output is an array of worker_n Directories, each of them containing a certain 28 | number of the text files and each of them becoming an input directory for the worker node. 29 | in: 30 | main_dir: main_d 31 | worker_number: worker_n 32 | out: [ partitioned_dirs ] 33 | 34 | ############################################################## 35 | 36 | open_step: 37 | run: clt/openit.cwl 38 | doc: | 39 | This step takes as input a file that inside there are integers. 40 | Its output is an array of files, every files contains the same number of integers take 41 | from original file, the last file may have padding 42 | in: 43 | num_file: num_file 44 | out: 45 | [ nums ] 46 | 47 | ############################################################## 48 | 49 | sum_step: 50 | run: clt/sumit.cwl 51 | doc: | 52 | This step takes as input all files from step open_step and that given to it from step distribute_files. 53 | Its output is a file with the sum of all numbers of his input files 54 | scatter: extra_file 55 | in: 56 | num_files: open_step/nums 57 | extra_file: distribute_files/partitioned_dirs 58 | out: 59 | [ sum_file ] 60 | 61 | ############################################################## 62 | 63 | comb_step: 64 | run: clt/combit.cwl 65 | doc: | 66 | This step takes as input all files from step sum_step. 67 | Its output is a only file that contains in every row the number inside the input files 68 | in: 69 | files: sum_step/sum_file 70 | out: 71 | [ comb_file ] 72 | -------------------------------------------------------------------------------- /docs/source/guide/cwl.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | Write your workflow 3 | =================== 4 | 5 | StreamFlow relies on the `Common Workflow Language `_ (CWL) standard to describe workflows. In particular, it supports version ``v1.2`` of the standard, which introduces conditional execution of workflow steps. 6 | 7 | The reader is referred to the `official CWL documentation `_ to learn how the workflow description language works, as StreamFlow does not introduce any modification to the original specification. 8 | 9 | .. note:: 10 | StreamFlow supports all the features required by the CWL standard conformance, and nearly all optional features, for versions ``v1.0``, ``v1.1``, and ``v1.2``. For a complete overview of CWL conformance status, look :ref:`here `. 11 | 12 | The following snippet contain a simple example of CWL workflow, which extracts a Java source file from a tar archive and compiles it. 13 | 14 | .. code-block:: yaml 15 | 16 | cwlVersion: v1.2 17 | class: Workflow 18 | inputs: 19 | tarball: File 20 | name_of_file_to_extract: string 21 | 22 | outputs: 23 | compiled_class: 24 | type: File 25 | outputSource: compile/classfile 26 | 27 | steps: 28 | untar: 29 | run: 30 | class: CommandLineTool 31 | baseCommand: [tar, --extract] 32 | inputs: 33 | tarfile: 34 | type: File 35 | inputBinding: 36 | prefix: --file 37 | extractfile: string 38 | outputs: 39 | extracted_file: 40 | type: File 41 | outputBinding: 42 | glob: $(inputs.extractfile) 43 | in: 44 | tarfile: tarball 45 | extractfile: name_of_file_to_extract 46 | out: [extracted_file] 47 | 48 | compile: 49 | run: 50 | class: CommandLineTool 51 | baseCommand: javac 52 | arguments: ["-d", $(runtime.outdir)] 53 | inputs: 54 | src: 55 | type: File 56 | inputBinding: 57 | position: 1 58 | outputs: 59 | classfile: 60 | type: File 61 | outputBinding: 62 | glob: "*.class" 63 | in: 64 | src: untar/extracted_file 65 | out: [classfile] -------------------------------------------------------------------------------- /streamflow/cwl/token.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections.abc import MutableSequence 3 | from typing import Any 4 | 5 | from streamflow.core.context import StreamFlowContext 6 | from streamflow.core.data import DataType 7 | from streamflow.cwl import utils 8 | from streamflow.data.remotepath import StreamFlowPath 9 | from streamflow.workflow.token import FileToken 10 | 11 | 12 | async def _get_file_token_weight(context: StreamFlowContext, value: Any): 13 | weight = 0 14 | if "size" in value: 15 | weight = value["size"] 16 | else: 17 | if path := utils.get_path_from_token(value): 18 | data_locations = context.data_manager.get_data_locations( 19 | path=path, data_type=DataType.PRIMARY 20 | ) 21 | if data_locations: 22 | data_location = next(iter(data_locations)) 23 | path = StreamFlowPath( 24 | data_location.path, context=context, location=data_location.location 25 | ) 26 | weight = await (await path.resolve()).size() 27 | if "secondaryFiles" in value: 28 | weight += sum( 29 | await asyncio.gather( 30 | *( 31 | asyncio.create_task( 32 | _get_file_token_weight(context=context, value=sf) 33 | ) 34 | for sf in value["secondaryFiles"] 35 | ) 36 | ) 37 | ) 38 | return weight 39 | 40 | 41 | class CWLFileToken(FileToken): 42 | __slots__ = () 43 | 44 | async def get_paths(self, context: StreamFlowContext) -> MutableSequence[str]: 45 | paths = [] 46 | if isinstance(self.value, MutableSequence): 47 | for value in self.value: 48 | if path := utils.get_path_from_token(value): 49 | paths.append(path) 50 | for sf in value.get("secondaryFiles", []): 51 | if path := utils.get_path_from_token(sf): 52 | paths.append(path) 53 | elif self.value and (path := utils.get_path_from_token(self.value)): 54 | paths.append(path) 55 | for sf in self.value.get("secondaryFiles", []): 56 | if path := utils.get_path_from_token(sf): 57 | paths.append(path) 58 | return paths 59 | 60 | async def get_weight(self, context): 61 | return await _get_file_token_weight(context, self.value) 62 | -------------------------------------------------------------------------------- /streamflow/workflow/port.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | from collections.abc import Callable, MutableSequence 5 | 6 | from streamflow.core.deployment import Connector 7 | from streamflow.core.workflow import Job, Port, Token, Workflow 8 | from streamflow.log_handler import logger 9 | from streamflow.workflow.token import TerminationToken 10 | 11 | 12 | class ConnectorPort(Port): 13 | async def get_connector(self, consumer: str) -> Connector: 14 | token = await self.get(consumer) 15 | return self.workflow.context.deployment_manager.get_connector(token.value) 16 | 17 | def put_connector(self, connector_name: str): 18 | self.put(Token(value=connector_name)) 19 | 20 | 21 | class JobPort(Port): 22 | async def get_job(self, consumer: str) -> Job | None: 23 | token = await self.get(consumer) 24 | if isinstance(token, TerminationToken): 25 | return None 26 | else: 27 | return token.value 28 | 29 | def put_job(self, job: Job): 30 | self.put(Token(value=job)) 31 | 32 | 33 | class FilterTokenPort(Port): 34 | def __init__( 35 | self, 36 | workflow: Workflow, 37 | name: str, 38 | filter_function: Callable[[Token], bool] | None = None, 39 | ): 40 | super().__init__(workflow, name) 41 | self.filter_function: Callable[[Token], bool] = filter_function or ( 42 | lambda _: True 43 | ) 44 | 45 | def put(self, token: Token) -> None: 46 | if isinstance(token, TerminationToken) or self.filter_function(token): 47 | super().put(token) 48 | elif logger.isEnabledFor(logging.DEBUG): 49 | logger.debug(f"Port {self.name} skips {token.tag}") 50 | 51 | 52 | class InterWorkflowPort(Port): 53 | def __init__(self, workflow: Workflow, name: str): 54 | super().__init__(workflow, name) 55 | self.inter_ports: MutableSequence[tuple[Port, str | None]] = [] 56 | 57 | def add_inter_port(self, port: Port, border_tag: str | None = None) -> None: 58 | self.inter_ports.append((port, border_tag)) 59 | 60 | def put(self, token: Token) -> None: 61 | if not isinstance(token, TerminationToken): 62 | for port, border_tag in self.inter_ports: 63 | if border_tag is None or border_tag == token.tag: 64 | port.put(token) 65 | super().put(token) 66 | 67 | 68 | class InterWorkflowJobPort(InterWorkflowPort, JobPort): 69 | pass 70 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | junit.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # Environments 101 | .env 102 | .venv 103 | env/ 104 | venv/ 105 | ENV/ 106 | env.bak/ 107 | venv.bak/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | .spyproject 112 | 113 | # Rope project settings 114 | .ropeproject 115 | 116 | # mkdocs documentation 117 | /site 118 | 119 | # mypy 120 | .mypy_cache/ 121 | .dmypy.json 122 | dmypy.json 123 | 124 | # Pyre type checker 125 | .pyre/ 126 | 127 | # IDE 128 | .idea 129 | 130 | # Mac OS X 131 | .DS_Store 132 | 133 | # StreamFlow 134 | .streamflow 135 | 136 | #SQLite 137 | *.db-shm 138 | *.db-wal 139 | -------------------------------------------------------------------------------- /examples/flux/README.md: -------------------------------------------------------------------------------- 1 | # Flux in StreamFlow 2 | 3 | From the root of the repository, build the container: 4 | 5 | ```bash 6 | $ docker build -f examples/flux/Dockerfile -t streamflow-flux . 7 | ``` 8 | 9 | Shell into the container! 10 | 11 | ```bash 12 | $ docker run -it streamflow-flux bash 13 | $ whoami 14 | ``` 15 | ```console 16 | # fluxuser 17 | ``` 18 | 19 | Start a flux instance: 20 | 21 | ```bash 22 | $ flux start --test-size=4 bash 23 | ``` 24 | 25 | Then go into the Flux example directory, and run the workflow. 26 | 27 | ```bash 28 | $ cd examples/flux 29 | $ streamflow run streamflow.yml 30 | ``` 31 | 32 | That will compile a program and run it and exit. Note that the original example 33 | uses `mpirun`, but since Flux has MPI bindings, we replace this with flux run. 34 | You'll see the streamflow result printed to the screen: 35 | 36 | ```console 37 | 2023-04-02 19:35:18.426 INFO COMPLETED workflow execution 38 | { 39 | "result": { 40 | "basename": "mpi_output.log", 41 | "checksum": "sha1$8abcdbccb5d53018e69ac1c1849f50928a6c4669", 42 | "class": "File", 43 | "dirname": "/code/examples/flux/ecc301a4-6fad-4199-b792-c47caaf7a9da", 44 | "location": "file:///code/examples/flux/ecc301a4-6fad-4199-b792-c47caaf7a9da/mpi_output.log", 45 | "nameext": ".log", 46 | "nameroot": "mpi_output", 47 | "path": "/code/examples/flux/ecc301a4-6fad-4199-b792-c47caaf7a9da/mpi_output.log", 48 | "size": 271 49 | } 50 | } 51 | 2023-04-02 19:35:18.428 INFO UNDEPLOYING dc-mpi 52 | 2023-04-02 19:35:18.443 INFO COMPLETED undeployment of dc-mpi 53 | ``` 54 | 55 | And the output directory will be in your working directory: 56 | 57 | ```bash 58 | $ cat ecc301a4-6fad-4199-b792-c47caaf7a9da/mpi_output.log 59 | ``` 60 | ```console 61 | Hello I'm the server with id 1 on bff3d5c1b83d out of 2 I'm the server 62 | Hello I'm the server with id 0 on bff3d5c1b83d out of 2 I'm the server 63 | Total time (MPI) 1 is 0.000105146 64 | Total time (MPI) 0 is 0.000120071 65 | Total time (gtd) 1 is 0.299063 66 | Total time (gtd) 0 is 0.29899 67 | ``` 68 | 69 | ## Development 70 | 71 | To work in development mode, making changes on your local machine that 72 | persist in the container (and you might want to change the user to ROOT) 73 | in the Dockerfile: 74 | 75 | ```bash 76 | $ docker run -it -v $PWD:/code streamflow-flux bash 77 | ``` 78 | 79 | Install! 80 | 81 | ```bash 82 | $ pip install develop . 83 | ``` 84 | 85 | Note that for the example here, MPI doesn't like to be run as root, so you'll 86 | get an error. Also note that because the queue managers are run async, it's 87 | challenging to interactively develop. 88 | -------------------------------------------------------------------------------- /streamflow/deployment/stream.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio.subprocess 4 | from abc import ABC 5 | from collections.abc import Coroutine 6 | from typing import Any, AsyncContextManager 7 | 8 | from streamflow.core.data import StreamWrapper 9 | 10 | 11 | class BaseStreamWrapper(StreamWrapper): 12 | def __init__(self, stream) -> None: 13 | super().__init__(stream) 14 | self.closed = False 15 | 16 | async def close(self) -> None: 17 | if self.closed: 18 | return 19 | self.closed = True 20 | await self.stream.close() 21 | 22 | async def read(self, size: int | None = None): 23 | return await self.stream.read(size) 24 | 25 | async def write(self, data: Any): 26 | return await self.stream.write(data) 27 | 28 | 29 | class StreamReaderWrapper(StreamWrapper): 30 | async def close(self) -> None: 31 | pass 32 | 33 | async def read(self, size: int | None = None): 34 | return await self.stream.read(size) 35 | 36 | async def write(self, data: Any): 37 | raise NotImplementedError 38 | 39 | 40 | class StreamWriterWrapper(StreamWrapper): 41 | async def close(self) -> None: 42 | self.stream.close() 43 | await self.stream.wait_closed() 44 | 45 | async def read(self, size: int | None = None): 46 | raise NotImplementedError 47 | 48 | async def write(self, data: Any): 49 | self.stream.write(data) 50 | await self.stream.drain() 51 | 52 | 53 | class SubprocessStreamWrapperContextManager(AsyncContextManager[StreamWrapper], ABC): 54 | def __init__(self, coro: Coroutine): 55 | self.coro: Coroutine = coro 56 | self.proc: asyncio.subprocess.Process | None = None 57 | self.stream: StreamWrapper | None = None 58 | 59 | 60 | class SubprocessStreamReaderWrapperContextManager( 61 | SubprocessStreamWrapperContextManager 62 | ): 63 | async def __aenter__(self): 64 | self.proc = await self.coro 65 | self.stream = StreamReaderWrapper(self.proc.stdout) 66 | return self.stream 67 | 68 | async def __aexit__(self, exc_type, exc_val, exc_tb): 69 | await self.proc.wait() 70 | if self.stream: 71 | await self.stream.close() 72 | 73 | 74 | class SubprocessStreamWriterWrapperContextManager( 75 | SubprocessStreamWrapperContextManager 76 | ): 77 | async def __aenter__(self): 78 | self.proc = await self.coro 79 | self.stream = StreamWriterWrapper(self.proc.stdin) 80 | return self.stream 81 | 82 | async def __aexit__(self, exc_type, exc_val, exc_tb): 83 | if self.stream: 84 | await self.stream.close() 85 | await self.proc.wait() 86 | -------------------------------------------------------------------------------- /streamflow/recovery/checkpoint_manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | import os 5 | import tempfile 6 | from collections.abc import MutableSequence 7 | from importlib.resources import files 8 | from typing import TYPE_CHECKING 9 | 10 | from streamflow.core import utils 11 | from streamflow.core.data import DataLocation 12 | from streamflow.core.deployment import ExecutionLocation, LocalTarget 13 | from streamflow.core.recovery import CheckpointManager 14 | from streamflow.core.utils import random_name 15 | 16 | if TYPE_CHECKING: 17 | from streamflow.core.context import StreamFlowContext 18 | 19 | 20 | class DefaultCheckpointManager(CheckpointManager): 21 | def __init__(self, context: StreamFlowContext, checkpoint_dir: str | None = None): 22 | super().__init__(context) 23 | self.checkpoint_dir = checkpoint_dir or os.path.join( 24 | os.path.realpath(tempfile.gettempdir()), 25 | "streamflow", 26 | "checkpoint", 27 | utils.random_name(), 28 | ) 29 | self.copy_tasks: MutableSequence[asyncio.Task[None]] = [] 30 | 31 | async def _async_local_copy(self, data_location: DataLocation) -> None: 32 | parent_directory = os.path.join(self.checkpoint_dir, random_name()) 33 | local_path = os.path.join(parent_directory, data_location.relpath) 34 | await self.context.data_manager.transfer_data( 35 | src_location=data_location.location, 36 | src_path=data_location.path, 37 | dst_locations=[ 38 | ExecutionLocation( 39 | deployment=LocalTarget.deployment_name, 40 | local=True, 41 | name="__LOCAL__", 42 | ) 43 | ], 44 | dst_path=local_path, 45 | ) 46 | 47 | async def close(self) -> None: 48 | pass 49 | 50 | @classmethod 51 | def get_schema(cls) -> str: 52 | return ( 53 | files(__package__) 54 | .joinpath("schemas") 55 | .joinpath("default_checkpoint_manager.json") 56 | .read_text("utf-8") 57 | ) 58 | 59 | def register(self, data_location: DataLocation) -> None: 60 | self.copy_tasks.append( 61 | asyncio.create_task(self._async_local_copy(data_location)) 62 | ) 63 | 64 | 65 | class DummyCheckpointManager(CheckpointManager): 66 | async def close(self) -> None: 67 | pass 68 | 69 | @classmethod 70 | def get_schema(cls) -> str: 71 | return ( 72 | files(__package__) 73 | .joinpath("schemas") 74 | .joinpath("dummy_checkpoint_manager.json") 75 | .read_text("utf-8") 76 | ) 77 | 78 | def register(self, data_location: DataLocation) -> None: 79 | pass 80 | -------------------------------------------------------------------------------- /docs/source/guide/architecture.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Architecture 3 | ============ 4 | 5 | .. image:: ../images/streamflow-model.png 6 | :alt: StreamFlow logical stack 7 | :width: 70% 8 | :align: center 9 | 10 | The StreamFlow logical stack has been specifically developed to orchestrate hybrid workflows on top of heterogeneous and geographically distributed architectures. 11 | 12 | StreamFlow input is composed of three main pieces: 13 | 14 | * A workflow description, i.e. a representation of your application as a graph. 15 | * One or more deployment descriptions, which are `infrastructure-as-code `_ representation of your execution environments. 16 | * A StreamFlow file to bind each step of your workflow with the most suitable execution environment. 17 | 18 | The rest of the stack is devoted to the remote step execution management, providing automatic deployment and undeployment, data-transfers, data-locality based scheduling, fault-tolerance, etc. 19 | 20 | The environment stack 21 | ===================== 22 | 23 | Another distinctive feature of the StreamFlow WMS is the possibility to manage complex, multi-agent execution environments ensuring the *co-allocation* of multiple heterogeneous processing elements to execute a single workflow step. The main advantage is introducing a unique interface to a diverse ecosystem of distributed applications, from MPI clusters running on HPC to microservices deployed on Kubernetes. 24 | 25 | To provide enough flexibility, StreamFlow adopts a three-layered hierarchical representation of execution environments: 26 | 27 | * A **deployment** is an entire multi-agent infrastructure and constitutes the *unit of deployment*, i.e., all its components are always co-allocated while executing a step. 28 | * A **service** is a single agent type in a deployment and constitutes the *unit of binding*, i.e., each step of a workflow can be offloaded to a single service for execution. 29 | * A **location** is a single instance of a potentially replicated service and constitutes the *unit of scheduling*, i.e., each step of a workflow is offloaded to a configurable number of service locations to be processed. 30 | 31 | Workflow operations 32 | =================== 33 | 34 | You need three different components to run a hybrid workflow with StreamFlow: 35 | 36 | * A :ref:`workflow description `, i.e. a representation of your application as a graph. 37 | * One or more :ref:`deployment descriptions `, i.e. infrastructure-as-code representations of your execution environments. 38 | * A :ref:`StreamFlow file ` to bind each step of your workflow with the most suitable execution environment. 39 | 40 | StreamFlow will automatically take care of all the secondary aspects, like checkpointing, fault-tolerance, data movements, etc. -------------------------------------------------------------------------------- /streamflow/core/context.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | from abc import ABC, abstractmethod 5 | from collections.abc import MutableMapping 6 | from concurrent.futures import ProcessPoolExecutor 7 | from typing import TYPE_CHECKING, Any 8 | 9 | from streamflow.log_handler import logger 10 | 11 | if TYPE_CHECKING: 12 | from streamflow.core.data import DataManager 13 | from streamflow.core.deployment import DeploymentManager 14 | from streamflow.core.persistence import Database 15 | from streamflow.core.recovery import CheckpointManager, FailureManager 16 | from streamflow.core.scheduling import Scheduler 17 | 18 | 19 | class SchemaEntity(ABC): 20 | @classmethod 21 | @abstractmethod 22 | def get_schema(cls) -> str: ... 23 | 24 | 25 | class StreamFlowContext: 26 | def __init__( 27 | self, 28 | config: MutableMapping[str, Any], 29 | checkpoint_manager_class: type[CheckpointManager], 30 | database_class: type[Database], 31 | data_manager_class: type[DataManager], 32 | deployment_manager_class: type[DeploymentManager], 33 | failure_manager_class: type[FailureManager], 34 | scheduler_class: type[Scheduler], 35 | ): 36 | self.config: MutableMapping[str, Any] = config 37 | self.checkpoint_manager: CheckpointManager = checkpoint_manager_class( 38 | context=self, **config.get("checkpointManager", {}).get("config", {}) 39 | ) 40 | self.database: Database = database_class( 41 | context=self, **config.get("database", {}).get("config", {}) 42 | ) 43 | self.data_manager: DataManager = data_manager_class( 44 | context=self, **config.get("dataManager", {}).get("config", {}) 45 | ) 46 | self.deployment_manager: DeploymentManager = deployment_manager_class( 47 | context=self, **config.get("deploymentManager", {}).get("config", {}) 48 | ) 49 | self.failure_manager: FailureManager = failure_manager_class( 50 | context=self, **config.get("failureManager", {}).get("config", {}) 51 | ) 52 | self.process_executor: ProcessPoolExecutor = ProcessPoolExecutor() 53 | self.scheduler: Scheduler = scheduler_class( 54 | context=self, **config.get("scheduler", {}).get("config", {}) 55 | ) 56 | 57 | async def close(self) -> None: 58 | try: 59 | await asyncio.gather( 60 | asyncio.create_task(self.checkpoint_manager.close()), 61 | asyncio.create_task(self.data_manager.close()), 62 | asyncio.create_task(self.deployment_manager.close()), 63 | asyncio.create_task(self.failure_manager.close()), 64 | asyncio.create_task(self.scheduler.close()), 65 | ) 66 | except Exception as e: 67 | logger.exception(e) 68 | finally: 69 | await self.database.close() 70 | -------------------------------------------------------------------------------- /docs/source/advanced/stacked-locations.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Stacked locations 3 | ================= 4 | 5 | StreamFlow supports the concept of stacked locations, adhering to the separation of concerns principle. This allows the user to describe complex execution environments, e.g., a :ref:`Singularity container ` launched by a :ref:`Slurm queue manager ` called through an :ref:`SSH connection `. 6 | 7 | Users can define stacked locations using the ``wraps`` property in the :ref:`StreamFlow file `. For example, consider a remote Slurm queue manager that can be contacted by connecting to the login node of an HPC facility using SSH. This is a typical configuration for HPC systems. Then a user can write: 8 | 9 | .. code-block:: yaml 10 | 11 | deployments: 12 | ssh-hpc: 13 | type: ssh 14 | config: 15 | ... 16 | slurm-hpc: 17 | type: slurm 18 | config: 19 | ... 20 | wraps: ssh-hpc 21 | 22 | .. warning:: 23 | 24 | Note that in StreamFlow ``v0.1``, the queue manager connectors (:ref:`Slurm ` and :ref:`PBS `) are inherited from the :ref:`SSHConnector ` at the implementation level. Consequently, all the properties needed to open an SSH connection to the HPC login node (e.g., ``hostname``, ``username``, and ``sshKey``) were defined directly in the ``config`` section of the queue manager deployment. This path is still supported by StreamFlow ``v0.2``, but it is deprecated and will be removed in StreamFlow ``v0.3``. 25 | 26 | Note that not all deployment types can wrap other locations. Indeed, only connectors extending the :ref:`ConnectorWrapper ` interface support the ``wraps`` directive. Specifying the ``wraps`` directive on a container type that does not support it will result in an error during StreamFlow initialization. Conversely, if no explicit ``wraps`` directive is specified for a :ref:`ConnectorWrapper `, it wraps the :ref:`LocalConnector `. 27 | 28 | The ``wraps`` directive only supports wrapping a single inner location. However, a single location can be wrapped by multiple deployment definitions. The :ref:`DeploymentManager ` component must guarantee the correct deployment and undeployment order for stacked locations. 29 | 30 | It is also possible to wrap a single `service` instead of generically wrapping the whole `deployment`. This feature can be helpful when dealing with complex deployments that describe entire microservices architectures. To do that, it is necessary to specify the target `service` name in the StreamFlow file as follows: 31 | 32 | .. code-block:: yaml 33 | 34 | deployments: 35 | slurm-compose: 36 | type: docker-compose 37 | config: 38 | ... 39 | slurm: 40 | type: slurm 41 | config: 42 | ... 43 | wraps: 44 | deployment: slurm-compose 45 | service: controller 46 | -------------------------------------------------------------------------------- /streamflow/persistence/schemas/sqlite.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS workflow 2 | ( 3 | id INTEGER PRIMARY KEY, 4 | name TEXT, 5 | params TEXT, 6 | status INTEGER, 7 | type TEXT, 8 | start_time INTEGER, 9 | end_time INTEGER 10 | ); 11 | 12 | CREATE TABLE IF NOT EXISTS step 13 | ( 14 | id INTEGER PRIMARY KEY, 15 | name TEXT, 16 | workflow INTEGER, 17 | status INTEGER, 18 | type TEXT, 19 | params TEXT, 20 | FOREIGN KEY (workflow) REFERENCES workflow (id) 21 | ); 22 | 23 | CREATE TABLE IF NOT EXISTS port 24 | ( 25 | id INTEGER PRIMARY KEY, 26 | name TEXT, 27 | workflow INTEGER, 28 | type TEXT, 29 | params TEXT, 30 | FOREIGN KEY (workflow) REFERENCES workflow (id) 31 | ); 32 | 33 | 34 | CREATE TABLE IF NOT EXISTS dependency 35 | ( 36 | step INTEGER, 37 | port INTEGER, 38 | type INTEGER, 39 | name TEXT, 40 | PRIMARY KEY (step, port, type, name), 41 | FOREIGN KEY (step) REFERENCES step (id), 42 | FOREIGN KEY (port) REFERENCES port (id) 43 | ); 44 | 45 | 46 | CREATE TABLE IF NOT EXISTS execution 47 | ( 48 | id INTEGER PRIMARY KEY, 49 | step INTEGER, 50 | job_token INTEGER, 51 | cmd TEXT, 52 | status INTEGER, 53 | start_time INTEGER, 54 | end_time INTEGER, 55 | FOREIGN KEY (step) REFERENCES step (id) 56 | ); 57 | 58 | 59 | CREATE TABLE IF NOT EXISTS token 60 | ( 61 | id INTEGER PRIMARY KEY, 62 | port INTEGER, 63 | tag TEXT, 64 | type TEXT, 65 | value BLOB, 66 | FOREIGN KEY (port) REFERENCES port (id) 67 | ); 68 | 69 | CREATE TABLE IF NOT EXISTS recoverable 70 | ( 71 | id INTEGER PRIMARY KEY, 72 | FOREIGN KEY (id) REFERENCES token (id) 73 | ); 74 | 75 | 76 | CREATE TABLE IF NOT EXISTS provenance 77 | ( 78 | dependee INTEGER, 79 | depender INTEGER, 80 | PRIMARY KEY (dependee, depender), 81 | FOREIGN KEY (dependee) REFERENCES token (id), 82 | FOREIGN KEY (depender) REFERENCES token (id) 83 | ); 84 | 85 | 86 | CREATE TABLE IF NOT EXISTS deployment 87 | ( 88 | id INTEGER PRIMARY KEY, 89 | name TEXT, 90 | type TEXT, 91 | config TEXT, 92 | external INTEGER, 93 | lazy INTEGER, 94 | scheduling_policy TEXT, 95 | workdir TEXT, 96 | wraps TEXT 97 | ); 98 | 99 | 100 | CREATE TABLE IF NOT EXISTS target 101 | ( 102 | id INTEGER PRIMARY KEY, 103 | deployment INTEGER, 104 | type TEXT, 105 | locations INTEGER, 106 | service TEXT, 107 | workdir TEXT, 108 | params TEXT, 109 | FOREIGN KEY (deployment) REFERENCES deployment (id) 110 | ); 111 | 112 | 113 | CREATE TABLE IF NOT EXISTS filter 114 | ( 115 | id INTEGER PRIMARY KEY, 116 | name TEXT, 117 | type TEXT, 118 | config TEXT 119 | ); -------------------------------------------------------------------------------- /docs/source/ext/cwl-docker-translator.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | CWLDockerTranslator 3 | =================== 4 | 5 | StreamFlow relies on a ``CWLDockerTranslator`` object to convert a CWL `DockerRequirement `_ specification into a step binding on a given :ref:`Connector ` instance. By default, the :ref:`DockerCWLDockerTranslator ` is used to spawn a :ref:`DockerConnector `. 6 | However, StreamFlow also supports translators for :ref:`Kubernetes `, :ref:`Singularity ` and :ref:`NoContainer `. This latter allows to execute the step without a container, even if the ``DockerRequirement`` feature is defined. More ``CWLDockerTranslators`` can be implemented by the community using the :ref:`plugins ` mechanism (see :ref:`here `). 7 | 8 | The ``CWLDockerTranslator`` interface is defined in the ``streamflow.cwl.requirement.docker.translator`` module and exposes a single public method ``get_target``: 9 | 10 | .. code-block:: python 11 | 12 | def get_target( 13 | self, 14 | image: str, 15 | output_directory: str | None, 16 | network_access: bool, 17 | target: Target, 18 | ) -> Target: 19 | ... 20 | 21 | The ``get_target`` method returns a ``Target`` object that contains an auto-generated ``DeploymentConfig`` that reflects the ``CWLDockerTranslator`` configuration. The ``target`` parameter contains the original ``Target`` object of the related step. If the ``Connector`` created by the ``CWLDockerTranslator`` extends the :ref:`ConnectorWrapper ` class and the ``wrapper`` directive is defined as ``True`` in the StreamFlow file, the newly created ``Target`` object wraps the original one. 22 | 23 | The other parameters derive from the CWL workflow specification. In particular, the ``image`` parameter points to the Docker image needed by the step. The ``output_directory`` parameter reflects the ``dockerOutputDirectory`` option of a CWL ``DockerRequirement``. The ``network_access`` parameter derives from the CWL `NetworkAccess `_ requirement. 24 | 25 | Implementations 26 | =============== 27 | 28 | =================================================== ================================================================ 29 | Type Class 30 | =================================================== ================================================================ 31 | :ref:`docker ` streamflow.cwl.requirement.docker.DockerCWLDockerTranslator 32 | :ref:`kubernetes ` streamflow.cwl.requirement.docker.KubernetesCWLDockerTranslator 33 | :ref:`singularity ` streamflow.cwl.requirement.docker.SingularityCWLDockerTranslator 34 | =================================================== ================================================================ 35 | -------------------------------------------------------------------------------- /docs/source/cwl/docker-requirement.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | CWL Docker Requirement 3 | ====================== 4 | 5 | The CWL standard supports a ``DockerRequirement`` feature to execute one or more workflow steps inside a `Docker container `_. A CWL runner must then ensure that all input files are available inside the container and choose a specific Docker runner to deploy the container. For example, the following script invokes a `Node.js `_ command inside a Docker image called `node:slim `_: 6 | 7 | .. code-block:: yaml 8 | 9 | cwlVersion: v1.2 10 | class: CommandLineTool 11 | baseCommand: node 12 | requirements: 13 | DockerRequirement: 14 | dockerPull: node:slim 15 | inputs: 16 | src: 17 | type: File 18 | inputBinding: 19 | position: 1 20 | outputs: 21 | example_out: 22 | type: stdout 23 | stdout: output.txt 24 | 25 | By default, StreamFlow automatically maps a step with the ``DockerRequirement`` option onto a :ref:`Docker ` deployment with the specified image. This mapping is pretty much equivalent to the following ``streamflow.yml`` file: 26 | 27 | .. code-block:: yaml 28 | 29 | version: v1.0 30 | workflows: 31 | example: 32 | type: cwl 33 | config: 34 | file: processfile 35 | settings: jobfile 36 | bindings: 37 | - step: / 38 | target: 39 | deployment: docker-example 40 | 41 | deployments: 42 | docker-example: 43 | type: docker 44 | config: 45 | image: node:slim 46 | 47 | StreamFlow also supports the possibility to map a CWL ``DockerRequirement`` onto different types of connectors through the :ref:`CWLDockerTranslator ` extension point. In particular, the ``docker`` section of a workflow configuration can bind each step or subworkflow to a specific translator type, making it possible to convert a pure CWL workflow with ``DockerRequirement`` features into a hybrid workflow. The available translator types are: ``docker``, ``kubernetes``, ``none`` and ``singularity``. 48 | 49 | As an example, the following ``streamflow.yml`` file runs the above ``CommandLineTool`` using a :ref:`SingularityConnector ` instead of a :ref:`DockerConnector ` to spawn the container: 50 | 51 | .. code-block:: yaml 52 | 53 | version: v1.0 54 | workflows: 55 | example: 56 | type: cwl 57 | config: 58 | file: processfile 59 | settings: jobfile 60 | docker: 61 | - step: / 62 | deployment: 63 | type: singularity 64 | config: {} 65 | 66 | In detail, StreamFlow instantiates a :ref:`SingularityCWLDockerTranslator ` passing the content of the ``config`` field directly to the constructor. The translator is then in charge of generating a :ref:`SingularityConnector ` instance with the specified configuration for each CWL ``DockerRequirement`` configuration in the target subworkflow. -------------------------------------------------------------------------------- /streamflow/report.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import argparse 4 | import csv 5 | import os 6 | 7 | from streamflow.core.context import StreamFlowContext 8 | 9 | 10 | def _export_to_file(fig, args: argparse.Namespace, path: str) -> None: 11 | import plotly.io as pio 12 | 13 | if "html" in args.format: 14 | fullpath = f"{path}.html" 15 | pio.write_html(fig, file=fullpath) 16 | print(f"Report saved to {fullpath}") 17 | if "json" in args.format: 18 | fullpath = f"{path}.json" 19 | pio.write_json(fig, file=fullpath) 20 | print(f"Report saved to {fullpath}") 21 | for f in (f for f in args.format if f not in ["csv", "html", "json"]): 22 | fullpath = f"{path}.{f}" 23 | pio.write_image( 24 | fig, 25 | format=f, 26 | file=fullpath, 27 | ) 28 | print(f"Report saved to {fullpath}") 29 | 30 | 31 | async def create_report(context: StreamFlowContext, args: argparse.Namespace) -> None: 32 | import pandas as pd 33 | import plotly.express as px 34 | 35 | # Retrieve data 36 | path = os.path.abspath( 37 | os.path.join(args.outdir or os.getcwd(), args.name or "report") 38 | ) 39 | reports = [] 40 | workflows = [w.strip() for w in args.workflows.split(",")] 41 | for workflow in workflows: 42 | if wf_reports := [ 43 | r 44 | for r in await context.database.get_reports( 45 | workflow, last_only=not args.all 46 | ) 47 | if r 48 | ]: 49 | for report in wf_reports: 50 | reports.extend( 51 | [ 52 | { 53 | **row, 54 | **{ 55 | "name": ( 56 | workflow + row["name"] 57 | if len(workflows) > 1 58 | else row["name"] 59 | ) 60 | }, 61 | } 62 | for row in report 63 | ] 64 | ) 65 | # If workflow has no step, simply print a message and exit 66 | else: 67 | print( 68 | f"Workflow {workflow} did not execute any step: no report has been generated" 69 | ) 70 | # If output format is csv, print DataFrame 71 | if "csv" in args.format: 72 | with open(f"{path}.csv", "w") as f: 73 | writer = csv.DictWriter(f, reports[0].keys()) 74 | writer.writeheader() 75 | writer.writerows(reports) 76 | print(f"Report saved to {path}.csv") 77 | # Pre-process data 78 | df = pd.DataFrame(data=reports) 79 | df["id"] = df["id"].map(str) 80 | df["start_time"] = pd.to_datetime(df["start_time"]) 81 | df["end_time"] = pd.to_datetime(df["end_time"]) 82 | # Create chart 83 | fig = px.timeline( 84 | df, 85 | x_start="start_time", 86 | x_end="end_time", 87 | y="name" if args.group_by_step else "id", 88 | color="name", 89 | ) 90 | fig.update_yaxes(visible=False) 91 | # Export to file 92 | _export_to_file(fig, args, path) 93 | -------------------------------------------------------------------------------- /docs/source/ext/deployment-manager.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | DeploymentManager 3 | ================= 4 | 5 | The ``DeploymentManager`` interface instantiates and manages :ref:`Connector ` objects for each ``deployment`` object described in a :ref:`StreamFlow file `. It is defined in the ``streamflow.core.deployment`` module and exposes several public methods: 6 | 7 | .. code-block:: python 8 | 9 | async def close(self) -> None: 10 | ... 11 | 12 | async def deploy( 13 | self, deployment_config: DeploymentConfig 14 | ) -> None: 15 | ... 16 | 17 | def get_connector( 18 | self, deployment_name: str 19 | ) -> Connector | None: 20 | ... 21 | 22 | async def undeploy( 23 | self, deployment_name: str 24 | ) -> None: 25 | ... 26 | 27 | async def undeploy_all(self) -> None: 28 | ... 29 | 30 | The ``deploy`` method instantiates a ``Connector`` object starting from the given ``DeploymentConfig`` object, which derives from the ``deployments`` section in the StreamFlow file. Then, it deploys the related execution environment by calling the ``deploy`` method of the ``Connector`` object. Note that if a deployment ``wraps`` another environment (see :ref:`here `), the wrapped environment must be deployed before the wrapper one. It is in charge of each ``DeploymentManager`` implementation to correctly manage these dependencies, potentially throwing a ``WorkflowDefinitionException`` in case of misspecifications (e.g., circular dependencies). Also, it is in charge of the ``DeploymentManager`` to correctly handle concurrent calls to the ``deploy`` method with the same target deployment, e.g., to avoid spurious multiple deployments of identical infrastructures. 31 | 32 | The ``get_connector`` method returns the ``Connector`` object related to the ``deployment_name`` input parameter or ``None`` if the environment has not been deployed yet. Note that calling ``get_connector`` before calling ``deploy`` or after calling ``undeploy`` on the related environment should always return ``None``. 33 | 34 | The ``undeploy`` method undeploys the target execution infrastructure, identified by the ``deployment_name`` input parameter, by calling the ``undeploy`` method of the related ``Connector`` object. Plus, it marks the ``Connector`` object as invalid. It is in charge of the ``DeploymentManager`` to correctly handle concurrent calls to the ``undeploy`` method with the same target deployment. 35 | 36 | The ``undeploy_all`` method undeploys all the active execution environments. It is equivalent to calling the ``undeploy`` method on each active deployment. StreamFlow always calls this method before terminating to clean up the execution interface. 37 | 38 | The ``close`` method receives no input parameter and does not return anything. It frees stateful resources potentially allocated during the object’s lifetime, e.g., network or database connections. 39 | 40 | Implementations 41 | =============== 42 | 43 | ======= ====================================================== 44 | Type Class 45 | ======= ====================================================== 46 | default streamflow.deployment.manager.DefaultDeploymentManager 47 | ======= ====================================================== 48 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | StreamFlow 3 | ========== 4 | 5 | The StreamFlow framework is a container-native Workflow Management System written in Python 3 and based on the `Common Workflow Language `_ (CWL) standard. 6 | 7 | It has been designed around two main principles: 8 | 9 | 1. Allowing the execution of tasks in **multi-container environments**, in order to support concurrent execution of multiple communicating tasks in a multi-agent ecosystem. 10 | 2. Relaxing the requirement of a single shared data space, in order to allow for **hybrid workflow** executions on top of multi-cloud or hybrid cloud/HPC infrastructures. 11 | 12 | StreamFlow source code is available on :repo:`GitHub <.>` under the LGPLv3 license. If you want to cite StreamFlow, please refer to this article: 13 | 14 | .. code-block:: text 15 | 16 | I. Colonnelli, B. Cantalupo, I. Merelli and M. Aldinucci, 17 | "StreamFlow: cross-breeding cloud with HPC," 18 | in IEEE Transactions on Emerging Topics in Computing, vol. 9, iss. 4, p. 1723-1737, 2021. 19 | doi: 10.1109/TETC.2020.3019202. 20 | 21 | For LaTeX users, the following BibTeX entry can be used: 22 | 23 | .. code-block:: bibtex 24 | 25 | @article{StreamFlow, 26 | author = {Iacopo Colonnelli and Barbara Cantalupo and Ivan Merelli and Marco Aldinucci}, 27 | doi = {10.1109/TETC.2020.3019202}, 28 | journal = {{IEEE} {T}ransactions on {E}merging {T}opics in {C}omputing}, 29 | title = {{StreamFlow}: cross-breeding cloud with {HPC}}, 30 | url = {https://doi.org/10.1109/TETC.2020.3019202}, 31 | volume = {9}, 32 | number = {4}, 33 | pages = {1723-1737}, 34 | year = {2021} 35 | } 36 | 37 | .. toctree:: 38 | :caption: Getting Started 39 | :hidden: 40 | 41 | guide/install.rst 42 | guide/architecture.rst 43 | guide/cwl.rst 44 | guide/deployments.rst 45 | guide/bind.rst 46 | guide/run.rst 47 | guide/inspect.rst 48 | 49 | .. toctree:: 50 | :caption: Advanced Features 51 | :hidden: 52 | 53 | advanced/multiple-targets.rst 54 | advanced/port-targets.rst 55 | advanced/stacked-locations.rst 56 | 57 | .. toctree:: 58 | :caption: CWL Standard 59 | :hidden: 60 | 61 | cwl/cwl-conformance.rst 62 | cwl/cwl-runner.rst 63 | cwl/docker-requirement.rst 64 | 65 | .. toctree:: 66 | :caption: Extension Points 67 | :hidden: 68 | 69 | ext/plugins.rst 70 | ext/binding-filter.rst 71 | ext/cwl-docker-translator.rst 72 | ext/connector.rst 73 | ext/data-manager.rst 74 | ext/database.rst 75 | ext/deployment-manager.rst 76 | ext/fault-tolerance.rst 77 | ext/scheduling.rst 78 | 79 | .. toctree:: 80 | :caption: Connectors 81 | :hidden: 82 | 83 | connector/container.rst 84 | connector/docker.rst 85 | connector/docker-compose.rst 86 | connector/flux.rst 87 | connector/helm3.rst 88 | connector/kubernetes.rst 89 | connector/occam.rst 90 | connector/pbs.rst 91 | connector/queue-manager.rst 92 | connector/singularity.rst 93 | connector/slurm.rst 94 | connector/ssh.rst 95 | 96 | .. toctree:: 97 | :caption: CWL Docker Translators 98 | :hidden: 99 | 100 | cwl/docker/docker.rst 101 | cwl/docker/kubernetes.rst 102 | cwl/docker/no-container.rst 103 | cwl/docker/singularity.rst -------------------------------------------------------------------------------- /docs/source/connector/flux.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | FluxConnector 3 | ============= 4 | 5 | The `Flux Framework `_ connector allows running jobs on a cluster with Flux Framework in a High Performance Computing Context. Although Flux can work in a local testing container or a cloud environment and has a Python SDK, to match the design here, we follow suit and inherit from the :ref:`QueueManagerConnector `. In this way, users can offload jobs to local or remote PBS controllers using the :ref:`stacked locations ` mechanism. The HPC facility is supposed to be constantly active, reducing the deployment phase to deploy the inner connector (e.g., to create an :ref:`SSH Connection ` pointing to an HPC login node). 6 | 7 | .. warning:: 8 | 9 | Note that in StreamFlow ``v0.1``, the ``QueueManagerConnector`` directly inherited from the :ref:`SSHConnector ` at the implementation level. Consequently, all the properties needed to open an SSH connection to the HPC login node (e.g., ``hostname``, ``username``, and ``sshKey``) were defined directly in the ``QueueManagerConnector``. This path is still supported by StreamFlow ``v0.2``, but it is deprecated and will be removed in StreamFlow ``v0.3``. 10 | 11 | Interaction with the Flux scheduler happens through a Bash script with ``#flux`` directives. Users can pass the path of a custom script to the connector using the ``file`` attribute of the :ref:`FluxService ` configuration. This file is interpreted as a `Jinja2 `_ template and populated at runtime by the connector. Alternatively, users can pass PBS options directly from YAML using the other options of a :ref:`FluxService ` object. 12 | 13 | As an example, suppose to have a Flux template script called ``batch.sh``, with the following content: 14 | 15 | .. code-block:: bash 16 | 17 | #!/bin/bash 18 | 19 | #flux --nodes=1 20 | #flux --queue=queue_name 21 | 22 | {{streamflow_command}} 23 | 24 | A PBS deployment configuration which uses the ``batch.sh`` file to spawn jobs can be written as follows: 25 | 26 | .. code-block:: yaml 27 | 28 | deployments: 29 | flux-example: 30 | type: pbs 31 | config: 32 | services: 33 | example: 34 | file: batch.sh 35 | 36 | Alternatively, the same behaviour can be recreated by directly passing options through the YAML configuration, as follows: 37 | 38 | .. code-block:: yaml 39 | 40 | deployments: 41 | flux-example: 42 | type: pbs 43 | config: 44 | services: 45 | example: 46 | nodes: 1 47 | queue: queue_name 48 | 49 | Being passed directly to the ``flux batch`` command line, the YAML options have higher priority than the file-based ones. 50 | 51 | .. warning:: 52 | 53 | Note that the ``file`` property in the upper configuration level, i.e., outside a ``service`` definition, is still supported in StreamFlow ``v0.2``, but it is deprecated and will be removed in StreamFlow ``v0.3``. 54 | 55 | For a quick demo or tutorial, see our `example workflow `_. 56 | 57 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/flux.json 58 | :lift_description: true 59 | :lift_definitions: true 60 | :auto_reference: true 61 | :auto_target: true 62 | -------------------------------------------------------------------------------- /examples/mpi/cwl/data/cs.cxx: -------------------------------------------------------------------------------- 1 | // Author: Marco Aldinucci 2 | // Date: 13 May 2010 3 | // Ex. 1-2, for PDS-physics class 2010 4 | 5 | #include "mpi.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | enum messages {msg_tag,eos_tag}; 13 | 14 | 15 | static inline const double diffmsec(const struct timeval & a, 16 | const struct timeval & b) { 17 | long sec = (a.tv_sec - b.tv_sec); 18 | long usec = (a.tv_usec - b.tv_usec); 19 | 20 | if(usec < 0) { 21 | --sec; 22 | usec += 1000000; 23 | } 24 | return ((double)(sec*1000)+ (double)usec/1000.0); 25 | } 26 | 27 | int main( int argc, char **argv ) 28 | { 29 | int myid,numprocs,namelen; 30 | char processor_name[MPI_MAX_PROCESSOR_NAME]; 31 | double t0,t1; 32 | struct timeval wt1,wt0; 33 | // MPI_Wtime cannot be called here 34 | gettimeofday(&wt0,NULL); 35 | MPI_Init(&argc,&argv ); 36 | t0 = MPI_Wtime(); 37 | //gettimeofday(&wt0,NULL); 38 | MPI_Comm_size(MPI_COMM_WORLD,&numprocs); 39 | MPI_Comm_rank(MPI_COMM_WORLD,&myid); 40 | MPI_Get_processor_name(processor_name,&namelen); 41 | srand(time(NULL)); 42 | if (myid == 0) { 43 | // This is the server code 44 | int n_eos = 0; 45 | std::cout << "Hello I'm the server with id " << myid << " on " << processor_name 46 | << " out of " << numprocs << " I'm the server\n"; 47 | while (true) { 48 | MPI_Status status; 49 | int target; 50 | 51 | MPI_Recv(&target,1, MPI_INT, MPI_ANY_SOURCE,MPI_ANY_TAG, 52 | MPI_COMM_WORLD, &status); 53 | if (status.MPI_TAG==eos_tag) { 54 | std::cout << "EOS from " << status.MPI_SOURCE << " received\n"; 55 | if (++n_eos>=(numprocs-1)) break; 56 | } else { 57 | std::cout << "[server] Request from " << status.MPI_SOURCE << " : " 58 | << target << " --> " << target*target << "\n"; 59 | target *=target; 60 | MPI_Send(&target,1, MPI_INT,status.MPI_SOURCE,msg_tag,MPI_COMM_WORLD); 61 | } 62 | } 63 | } else { 64 | // This is the client code 65 | int request; 66 | int rep=0; 67 | int noreq = random()&11; 68 | std::cerr << "N. req " << noreq << "\n"; 69 | MPI_Status status; 70 | std::cout << "Hello I'm " << myid << " on " << processor_name 71 | << " out of " << numprocs << " I'm a client\n"; 72 | while (rep ((diffmsec(wt1,wt0)/1000)/50)) 94 | std::cout << "Why the two measurements are sensibly different?\n"; 95 | return 0; 96 | } 97 | -------------------------------------------------------------------------------- /docs/source/connector/pbs.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | PBSConnector 3 | ===================== 4 | 5 | The `PBS `_ connector allows offloading execution to High-Performance Computing (HPC) facilities orchestrated by the PBS queue manager. It extends the :ref:`QueueManagerConnector `, which inherits from the :ref:`ConnectorWrapper ` interface, allowing users to offload jobs to local or remote PBS controllers using the :ref:`stacked locations ` mechanism. The HPC facility is supposed to be constantly active, reducing the deployment phase to deploy the inner connector (e.g., to create an :ref:`SSH Connection ` pointing to an HPC login node). 6 | 7 | .. warning:: 8 | 9 | Note that in StreamFlow ``v0.1``, the ``QueueManagerConnector`` directly inherited from the :ref:`SSHConnector ` at the implementation level. Consequently, all the properties needed to open an SSH connection to the HPC login node (e.g., ``hostname``, ``username``, and ``sshKey``) were defined directly in the ``QueueManagerConnector``. This path is still supported by StreamFlow ``v0.2``, but it is deprecated and will be removed in StreamFlow ``v0.3``. 10 | 11 | Interaction with the PBS scheduler happens through a Bash script with ``#PBS`` directives. Users can pass the path of a custom script to the connector using the ``file`` attribute of the :ref:`PBSService ` configuration. This file is interpreted as a `Jinja2 `_ template and populated at runtime by the connector. Alternatively, users can pass PBS options directly from YAML using the other options of a :ref:`PBSService ` object. 12 | 13 | As an example, suppose to have a PBS template script called ``qsub.sh``, with the following content: 14 | 15 | .. code-block:: bash 16 | 17 | #!/bin/bash 18 | 19 | #PBS -l nodes=1 20 | #PBS -q queue_name 21 | #PBS -l mem=1gb 22 | 23 | {{streamflow_command}} 24 | 25 | A PBS deployment configuration which uses the ``qsub.sh`` file to spawn jobs can be written as follows: 26 | 27 | .. code-block:: yaml 28 | 29 | deployments: 30 | pbs-example: 31 | type: pbs 32 | config: 33 | services: 34 | example: 35 | file: qsub.sh 36 | 37 | Alternatively, the same behaviour can be recreated by directly passing options through the YAML configuration, as follows: 38 | 39 | .. code-block:: yaml 40 | 41 | deployments: 42 | pbs-example: 43 | type: pbs 44 | config: 45 | services: 46 | example: 47 | destination: queue_name 48 | resources: 49 | mem: 1gb 50 | nodes: 1 51 | 52 | Being passed directly to the ``qsub`` command line, the YAML options have higher priority than the file-based ones. 53 | 54 | .. warning:: 55 | 56 | Note that the ``file`` property in the upper configuration level, i.e., outside a ``service`` definition, is still supported in StreamFlow ``v0.2``, but it is deprecated and will be removed in StreamFlow ``v0.3``. 57 | 58 | The unit of binding is the entire HPC facility. In contrast, the scheduling unit is a single job placement in the PBS queue. Users can limit the maximum number of concurrently placed jobs by setting the ``maxConcurrentJobs`` parameter. 59 | 60 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/pbs.json 61 | :lift_description: true 62 | :lift_definitions: true 63 | :auto_reference: true 64 | :auto_target: true -------------------------------------------------------------------------------- /docs/source/guide/inspect.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | Inspect workflow runs 3 | ===================== 4 | 5 | The StreamFlow Command Line Interface (CLI) offers some feature to inspect workflow runs and collect metadata from them. It is possible to :ref:`list ` past workflow executions, retrieving generic metadata such as execution time and completion status. It is also possible to generate a :ref:`report ` of a specific execution. Finally, it is possible to generate a :ref:`provenance archive ` for a given workflow, ready to be shared and published. 6 | 7 | List executed workflows 8 | ======================= 9 | 10 | The history of workflow executions initiated by the user can be printed using the following subcommand: 11 | 12 | .. code-block:: bash 13 | 14 | streamflow list 15 | 16 | The resulting table will contain, for each workflow name, the workflow type and the number of executions associated with that name. For example: 17 | 18 | =================== ==== ========== 19 | NAME TYPE EXECUTIONS 20 | =================== ==== ========== 21 | my-workflow-example cwl 2 22 | =================== ==== ========== 23 | 24 | To obtain more details related to the different runs of the ```` workflows, i.e., start time, end time, and final status, use the following subcommand: 25 | 26 | .. code-block:: bash 27 | 28 | streamflow list 29 | 30 | For example: 31 | 32 | ================================ ================================ ========== 33 | START_TIME END_TIME STATUS 34 | ================================ ================================ ========== 35 | 2023-03-14T10:44:11.304081+00:00 2023-03-14T10:44:18.345231+00:00 FAILED 36 | 2023-03-14T10:45:28.305321+00:00 2023-03-14T10:46:21.274293+00:00 COMPLETED 37 | ================================ ================================ ========== 38 | 39 | Generate a report 40 | ================= 41 | 42 | To generate a timeline report of a workflow execution, use the following subcommand: 43 | 44 | .. code-block:: bash 45 | 46 | streamflow report 47 | 48 | By default, an interactive ``HTML`` report is generated, but users can specify a different format through the ``--format`` option. 49 | 50 | It is also possible to generate a single report from a list of workflows by passing a comma-separated list of workflow names, as follows 51 | 52 | .. code-block:: bash 53 | 54 | streamflow report ,,... 55 | 56 | Collect provenance data 57 | ======================= 58 | 59 | StreamFlow supports the `Workflow Run RO-Crate `_ provenance format, an `RO-Crate `_ profile for capturing the provenance of an execution of a computational workflow. 60 | 61 | To generate a provenance archive containing the last execution of a given workflow name (see :ref:`above `), use the following command: 62 | 63 | .. code-block:: bash 64 | 65 | streamflow prov 66 | 67 | The ``--all`` option can instead be used to include the whole history of workflow execution inside a single archive. 68 | 69 | The ``--name`` option defines the name of the archive. By default, the archive will take the workflow name as basename and ``.crate.zip`` as extension. 70 | 71 | The ``--outdir`` option states in which location the archive will be placed (by default, it will be created in the current directory). -------------------------------------------------------------------------------- /examples/mpi/environment/docker-compose/id_rsa: -------------------------------------------------------------------------------- 1 | -----BEGIN OPENSSH PRIVATE KEY----- 2 | b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAACFwAAAAdzc2gtcn 3 | NhAAAAAwEAAQAAAgEAyE/u9q5T8bC1D4KYfL2siM7x1Xg5uiHm8CgBnIN5w1HeR4m62aMp 4 | lCnfkEx+hS8qfjLeEepZEO5HSlrvO26tlPy111G0g2i0RPsorv4qR2T17gJylIqRD2WVLd 5 | KAMwwAvrZRbloaSh8sQesZLUsrN5UMVcB0/V/dRDPUYVmriZArzGxdH9HxjQeK6WgSkYiR 6 | dEEVXuhJfKvC+eM88ukhGB+d8JTYCREtKDWpo3TeWfXSg+yl2I5DVt9wWyKchrt9hD1tiK 7 | drBnNm67MHyzHbo+V4Mrzvsz7yuKCfkVyZuSTNtx0zdVY6FCrEUuW5OvvldqdtemXkqxSk 8 | 7bvoYjJkTnWfxu37bL4WQEFkkKWFSorl3WegO3krNZhWKd/POuGpZfo0/poY3frAfM0m2e 9 | a8AX3RphavMMoa3iVrVn9z56k8bLSRU9GM4ahu4Uh7DP4J48I8BvNqQinIR7/TlXpeAbH8 10 | 45hOKBr6JPkVrhoSk97p10cU3MGfYLb0zVHPdhqhMNwsuGOS7qil0UHj91PBP5SQVZS+Vf 11 | v61p/sduaBG25jCr6p3MU7m0jUpnOWpRhRUyBsGBdwQq/kVI9KaWw0lBy24oDMhhxcS8ZX 12 | Rg/PO7i87XR0oyE0c32o4zgn4rcD8FfUy17p6sjgQwYge5qzcJxA8kqaYOtgggCd+AjsKo 13 | kAAAdQGhsNOBobDTgAAAAHc3NoLXJzYQAAAgEAyE/u9q5T8bC1D4KYfL2siM7x1Xg5uiHm 14 | 8CgBnIN5w1HeR4m62aMplCnfkEx+hS8qfjLeEepZEO5HSlrvO26tlPy111G0g2i0RPsorv 15 | 4qR2T17gJylIqRD2WVLdKAMwwAvrZRbloaSh8sQesZLUsrN5UMVcB0/V/dRDPUYVmriZAr 16 | zGxdH9HxjQeK6WgSkYiRdEEVXuhJfKvC+eM88ukhGB+d8JTYCREtKDWpo3TeWfXSg+yl2I 17 | 5DVt9wWyKchrt9hD1tiKdrBnNm67MHyzHbo+V4Mrzvsz7yuKCfkVyZuSTNtx0zdVY6FCrE 18 | UuW5OvvldqdtemXkqxSk7bvoYjJkTnWfxu37bL4WQEFkkKWFSorl3WegO3krNZhWKd/POu 19 | GpZfo0/poY3frAfM0m2ea8AX3RphavMMoa3iVrVn9z56k8bLSRU9GM4ahu4Uh7DP4J48I8 20 | BvNqQinIR7/TlXpeAbH845hOKBr6JPkVrhoSk97p10cU3MGfYLb0zVHPdhqhMNwsuGOS7q 21 | il0UHj91PBP5SQVZS+Vfv61p/sduaBG25jCr6p3MU7m0jUpnOWpRhRUyBsGBdwQq/kVI9K 22 | aWw0lBy24oDMhhxcS8ZXRg/PO7i87XR0oyE0c32o4zgn4rcD8FfUy17p6sjgQwYge5qzcJ 23 | xA8kqaYOtgggCd+AjsKokAAAADAQABAAACADzvE+I2ZZLADlQJhhlsGXAEg8xMJkNYzMeB 24 | Y/wYgpHH9bYNT6mk2KEZm8hpfJNHJcX980+/2hgsY8NapiuOH3+S4D3/vMm8sBnLzTRuXK 25 | 4bKDSeyfwlqrrkS90Ei7r7i7539416G0Uw9mWW/rGojBz8WiQ7x1rMLqLwoUp73JKcKLam 26 | wC1N0Az2nmsiHYPn51dV9QpJ3xDtrwIU/hVmlYqhE4p7vLwFXZc9u7zdcEYtYTyAYHLf09 27 | w5XBRdEwBpSGVRymAnjYcI0+YfL9+/6vCQfV/x2XjNTUick+3EHX8Ukv+BAoZPFJvN5qrQ 28 | 9ufaeJL/m24AUO85+HHZF3fHnHs3nchFAvQjcp+X93YxqvubuxHnoYA1PfeaFubEr+1WpU 29 | IzRUCo2pzbktAgVRQ4V0i6LSxkBBh/nmuOKPVqJBA8MT4fT9Y6wPY19h72B+zZ80FCQvDs 30 | +5JTt99PdPdwNPRPrGxdVr+hGhij5Rvu8gY9PHWT7PRuZPECx3bbaM8aWK3uOmAMwTw9fp 31 | VV7hWqBe+q5Mbyqpj03MWc8YWSBTPjaD6MjoET5Xp0c3dX9oudBhn6JndpGz0HY4ncDuvf 32 | jtv5L3ShQ+pWKIbUQH86d9g72/iNFZUf5i9JEOec8GuG3bWiuZJiZpxQN4R8VBDaB9NL31 33 | SvP1eWBGh+6nAaz6+BAAABAD0F18D1JlPtofwOcNOaIb7gafmvYSeFJsQ2dAxACmGeJHMu 34 | cr5coQBKBN3EqyKP4DbDmVR02gHxgvnltmtKZ6OQABGceoDKXJEWWfLu/h9PPco9zN3tco 35 | GXDxEKa/Q92wvUv0nbbSLvz02DNU90lg5sZ9PYlo7cDC59T0ubmuzdtghprf7K2Mm6Bzxi 36 | 0g2LbmBrhsUYPFjjFCM1dBCfA6xiHvRbhzuRTmCd2Rbq6r0jXYHNdvcUFzyGXsR7ak7Qoo 37 | eh3JnTO0XvjFS284tfmhlX4Y5vl7KG9/XG4ALk3oouG501BpHTLtJWH6C1vJEiD6vEUh6/ 38 | vfKcxRvPRTOdWbQAAAEBAO2sxBiQVGUPCxmwcna63c7bWkmIoz5b0U7ly6jlEotmLcWQ+K 39 | RpnZjnVnKZQoX7flqOjC+NNX3K9KdgubvuCmasz3/6Ogsbi4lYnTage+gBEILg01J/4/za 40 | XiVmpsiqoC206T5ecnnyR30I1fZYDY+DHQmoMXkxVg2sqV1mDe6cL8eGpYusVr7oXdv12E 41 | 8XTQ/sd8JHuv2j9fc2U1N9315is/2K6z1fFoj93q52CDJSUnQN39TOpYf15qhbUuM2grrm 42 | OL2X/zTTflxHoaMBx96XCOEqkEhwcSiCZ/PnYMCPAPbxmOtJBlpJ9Ni6CtAPXXofrMHzpw 43 | QOKWQaVszp5KsAAAEBANfBs+bxjWBWeaRTXv13P9vNHFqojAVqJElAsN10Z4iQ+W576JqF 44 | rP1iPsNFAaHwk1mh8C4CYs0LjoTDemmdxQNC2JNVE+Losd88n4w3zrMAqxzJFJsAkO7ADn 45 | 3P39Wzu9vL7rhoJYCmJdTWMd9WZnlHqtNgO/wVs9vrxCy2HO8arSpSEElL+VDyN41Vhdkp 46 | 0qlcI2BqIPVuXuP/P+HFHE++KA3LWkh/Dnvb+IWrC5gbD3aIUYl66LsIGG/cfri56dS5eX 47 | iDr0AMvqmoqdTg77IHok1AmB5FQJiJtXBSeq2bMfDsYIae2rAUML9e3/fvgw9mTFUHnUa0 48 | kusqrefGJZsAAAAaZ2xhc3NvZndoaXNrZXlAdGhlLW1hY2hpbmUB 49 | -----END OPENSSH PRIVATE KEY----- 50 | -------------------------------------------------------------------------------- /streamflow/scheduling/policy/data_locality.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | from collections.abc import MutableMapping 5 | from importlib.resources import files 6 | from typing import TYPE_CHECKING 7 | 8 | from streamflow.core.context import StreamFlowContext 9 | from streamflow.core.data import DataType 10 | from streamflow.core.exception import WorkflowExecutionException 11 | from streamflow.core.scheduling import Hardware, JobAllocation, Policy 12 | from streamflow.workflow.token import FileToken 13 | 14 | if TYPE_CHECKING: 15 | from streamflow.core.scheduling import AvailableLocation, LocationAllocation 16 | from streamflow.core.workflow import Job 17 | 18 | 19 | class DataLocalityPolicy(Policy): 20 | async def get_location( 21 | self, 22 | context: StreamFlowContext, 23 | job: Job, 24 | hardware_requirement: Hardware, 25 | available_locations: MutableMapping[str, AvailableLocation], 26 | jobs: MutableMapping[str, JobAllocation], 27 | locations: MutableMapping[str, MutableMapping[str, LocationAllocation]], 28 | ) -> AvailableLocation | None: 29 | valid_locations = list(available_locations.keys()) 30 | deployments = {loc.deployment for loc in available_locations.values()} 31 | if len(deployments) > 1: 32 | raise WorkflowExecutionException( 33 | f"Available locations coming from multiple deployments: {deployments}" 34 | ) 35 | # For each input token sorted by weight 36 | weights = { 37 | k: v 38 | for k, v in zip( 39 | job.inputs, 40 | await asyncio.gather( 41 | *( 42 | asyncio.create_task(t.get_weight(context)) 43 | for t in job.inputs.values() 44 | ) 45 | ), 46 | strict=True, 47 | ) 48 | } 49 | for _, token in sorted( 50 | job.inputs.items(), key=lambda item: weights[item[0]], reverse=True 51 | ): 52 | related_locations = set() 53 | # For FileTokens, retrieve related locations 54 | if isinstance(token, FileToken): 55 | for path in await token.get_paths(context): 56 | related_locations.update( 57 | [ 58 | loc.name 59 | for loc in context.data_manager.get_data_locations( 60 | path=path, 61 | deployment=next(iter(deployments)), 62 | data_type=DataType.PRIMARY, 63 | ) 64 | ] 65 | ) 66 | # Check if one of the related locations is free 67 | for current_location in related_locations: 68 | if current_location in valid_locations: 69 | return available_locations[current_location] 70 | # If a data-related allocation is not possible, assign a location among the remaining free ones 71 | for location in valid_locations: 72 | return available_locations[location] 73 | # If there are no available locations, return None 74 | return None 75 | 76 | @classmethod 77 | def get_schema(cls) -> str: 78 | return ( 79 | files(__package__) 80 | .joinpath("schemas") 81 | .joinpath("data_locality.json") 82 | .read_text("utf-8") 83 | ) 84 | -------------------------------------------------------------------------------- /docs/source/connector/slurm.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | SlurmConnector 3 | ===================== 4 | 5 | The `Slurm `_ connector allows offloading execution to High-Performance Computing (HPC) facilities orchestrated by the Slurm queue manager. It extends the :ref:`QueueManagerConnector `, which inherits from the :ref:`ConnectorWrapper ` interface, allowing users to offload jobs to local or remote Slurm controllers using the :ref:`stacked locations ` mechanism. The HPC facility is supposed to be constantly active, reducing the deployment phase to deploy the inner connector (e.g., to create an :ref:`SSH Connection ` pointing to an HPC login node). 6 | 7 | .. warning:: 8 | 9 | Note that in StreamFlow ``v0.1``, the ``QueueManagerConnector`` directly inherited from the :ref:`SSHConnector ` at the implementation level. Consequently, all the properties needed to open an SSH connection to the HPC login node (e.g., ``hostname``, ``username``, and ``sshKey``) were defined directly in the ``QueueManagerConnector``. This path is still supported by StreamFlow ``v0.2``, but it is deprecated and will be removed in StreamFlow ``v0.3``. 10 | 11 | Interaction with the Slurm scheduler happens through a Bash script with ``#SLURM`` directives. Users can pass the path of a custom script to the connector using the ``file`` attribute of the :ref:`SlurmService ` configuration. This file is interpreted as a `Jinja2 `_ template and populated at runtime by the connector. Alternatively, users can pass Slurm options directly from YAML using the other options of a :ref:`SlurmService ` object. 12 | 13 | As an example, suppose to have a Slurm template script called ``sbatch.sh``, with the following content: 14 | 15 | .. code-block:: bash 16 | 17 | #!/bin/bash 18 | 19 | #SBATCH --nodes=1 20 | #SBATCH --partition=queue_name 21 | #SBATCH --mem=1gb 22 | 23 | {{streamflow_command}} 24 | 25 | A Slurm deployment configuration which uses the ``sbatch.sh`` file to spawn jobs can be written as follows: 26 | 27 | .. code-block:: yaml 28 | 29 | deployments: 30 | slurm-example: 31 | type: slurm 32 | config: 33 | services: 34 | example: 35 | file: sbatch.sh 36 | 37 | Alternatively, the same behaviour can be recreated by directly passing options through the YAML configuration, as follows: 38 | 39 | .. code-block:: yaml 40 | 41 | deployments: 42 | slurm-example: 43 | type: slurm 44 | config: 45 | services: 46 | example: 47 | nodes: 1 48 | partition: queue_name 49 | mem: 1gb 50 | 51 | Being passed directly to the ``sbatch`` command line, the YAML options have higher priority than the file-based ones. 52 | 53 | .. warning:: 54 | 55 | Note that the ``file`` property in the upper configuration level, i.e., outside a ``service`` definition, is still supported in StreamFlow ``v0.2``, but it is deprecated and will be removed in StreamFlow ``v0.3``. 56 | 57 | The unit of binding is the entire HPC facility. In contrast, the scheduling unit is a single job placement in the Slurm queue. Users can limit the maximum number of concurrently placed jobs by setting the ``maxConcurrentJobs`` parameter. 58 | 59 | .. jsonschema:: https://streamflow.di.unito.it/schemas/deployment/connector/slurm.json 60 | :lift_description: true 61 | :lift_definitions: true 62 | :auto_reference: true 63 | :auto_target: true 64 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: "Release new version" 2 | on: 3 | workflow_run: 4 | workflows: 5 | - "CI Tests" 6 | branches: 7 | - master 8 | types: 9 | - completed 10 | jobs: 11 | docker: 12 | name: "Build Docker container" 13 | runs-on: ubuntu-24.04 14 | if: ${{ github.event.workflow_run.conclusion == 'success' }} 15 | steps: 16 | - uses: actions/checkout@v6 17 | - uses: docker/setup-qemu-action@v3 18 | - uses: docker/setup-buildx-action@v3 19 | - uses: docker/login-action@v3 20 | with: 21 | username: ${{ secrets.DOCKERHUB_USERNAME }} 22 | password: ${{ secrets.DOCKERHUB_TOKEN }} 23 | - name: "Get StreamFlow version" 24 | run: echo "STREAMFLOW_VERSION=$(cat streamflow/version.py | grep -oP '(?<=VERSION = \")(.*)(?=\")')" >> $GITHUB_ENV 25 | - name: "Check if Docker image already exists" 26 | run: echo "NEW_IMAGE=$(docker buildx imagetools inspect alphaunito/streamflow:${STREAMFLOW_VERSION} > /dev/null 2>&1; echo $?)" >> $GITHUB_ENV 27 | - name: "Build Docker image" 28 | if: ${{ env.NEW_IMAGE == 1 }} 29 | uses: docker/build-push-action@v6 30 | with: 31 | build-args: | 32 | HELM_VERSION=v3.19.0 33 | push: true 34 | tags: | 35 | alphaunito/streamflow:${{ env.STREAMFLOW_VERSION }} 36 | alphaunito/streamflow:latest 37 | github: 38 | name: "Create GitHub Release" 39 | runs-on: ubuntu-24.04 40 | permissions: 41 | contents: write 42 | if: ${{ github.event.workflow_run.conclusion == 'success' }} 43 | steps: 44 | - uses: actions/checkout@v6 45 | - name: "Get StreamFlow version" 46 | run: echo "STREAMFLOW_VERSION=$(cat streamflow/version.py | grep -oP '(?<=VERSION = \")(.*)(?=\")')" >> $GITHUB_ENV 47 | - name: "Check tag existence" 48 | uses: mukunku/tag-exists-action@v1.7.0 49 | id: check-tag 50 | with: 51 | tag: ${{ env.STREAMFLOW_VERSION }} 52 | - name: "Create Release" 53 | id: create-release 54 | uses: actions/create-release@v1 55 | if: ${{ steps.check-tag.outputs.exists == 'false' }} 56 | env: 57 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 58 | with: 59 | tag_name: ${{ env.STREAMFLOW_VERSION }} 60 | release_name: ${{ env.STREAMFLOW_VERSION }} 61 | draft: false 62 | prerelease: false 63 | pypi: 64 | name: "Publish on PyPI" 65 | runs-on: ubuntu-24.04 66 | environment: 67 | name: pypi 68 | url: https://pypi.org/project/streamflow 69 | permissions: 70 | id-token: write 71 | if: ${{ github.event.workflow_run.conclusion == 'success' }} 72 | steps: 73 | - uses: actions/checkout@v6 74 | - uses: actions/setup-python@v6 75 | with: 76 | python-version: "3.14" 77 | - name: "Get StreamFlow version" 78 | run: echo "STREAMFLOW_VERSION=$(cat streamflow/version.py | grep -oP '(?<=VERSION = \")(.*)(?=\")')" >> $GITHUB_ENV 79 | - name: "Get PyPI version" 80 | run: echo "PYPI_VERSION=$(pip index versions --pre streamflow | grep streamflow | sed 's/.*(\(.*\))/\1/')" >> $GITHUB_ENV 81 | - name: "Build Python packages" 82 | if: ${{ env.STREAMFLOW_VERSION != env.PYPI_VERSION }} 83 | run: | 84 | python -m pip install build --user 85 | python -m build --sdist --wheel --outdir dist/ . 86 | - name: "Publish package to PyPI" 87 | uses: pypa/gh-action-pypi-publish@release/v1 88 | if: ${{ env.STREAMFLOW_VERSION != env.PYPI_VERSION }} 89 | -------------------------------------------------------------------------------- /streamflow/core/data.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | from abc import ABC, abstractmethod 5 | from collections.abc import MutableSequence 6 | from enum import Enum 7 | from typing import TYPE_CHECKING 8 | 9 | from streamflow.core.context import SchemaEntity 10 | 11 | if TYPE_CHECKING: 12 | from typing import Any 13 | 14 | from streamflow.core.context import StreamFlowContext 15 | from streamflow.core.deployment import ExecutionLocation 16 | 17 | 18 | class DataType(Enum): 19 | PRIMARY = 0 20 | SYMBOLIC_LINK = 1 21 | INVALID = 2 22 | 23 | 24 | class DataLocation: 25 | __slots__ = ( 26 | "available", 27 | "data_type", 28 | "location", 29 | "path", 30 | "relpath", 31 | ) 32 | 33 | def __init__( 34 | self, 35 | location: ExecutionLocation, 36 | path: str, 37 | relpath: str, 38 | data_type: DataType, 39 | available: bool = False, 40 | ): 41 | self.available: asyncio.Event = asyncio.Event() 42 | self.data_type: DataType = data_type 43 | self.location: ExecutionLocation = location 44 | self.path: str = path 45 | self.relpath: str = relpath 46 | if available: 47 | self.available.set() 48 | 49 | @property 50 | def deployment(self) -> str: 51 | return self.location.deployment 52 | 53 | @property 54 | def name(self) -> str: 55 | return self.location.name 56 | 57 | @property 58 | def service(self) -> str | None: 59 | return self.location.service 60 | 61 | @property 62 | def wraps(self) -> ExecutionLocation | None: 63 | return self.location.wraps 64 | 65 | 66 | class DataManager(SchemaEntity): 67 | def __init__(self, context: StreamFlowContext): 68 | self.context: StreamFlowContext = context 69 | 70 | @abstractmethod 71 | async def close(self) -> None: ... 72 | 73 | @abstractmethod 74 | def get_data_locations( 75 | self, 76 | path: str, 77 | deployment: str | None = None, 78 | location_name: str | None = None, 79 | data_type: DataType | None = None, 80 | ) -> MutableSequence[DataLocation]: ... 81 | 82 | @abstractmethod 83 | async def get_source_location( 84 | self, path: str, dst_deployment: str 85 | ) -> DataLocation | None: ... 86 | 87 | @abstractmethod 88 | def invalidate_location(self, location: ExecutionLocation, path: str) -> None: ... 89 | 90 | @abstractmethod 91 | def register_path( 92 | self, 93 | location: ExecutionLocation, 94 | path: str, 95 | relpath: str | None = None, 96 | data_type: DataType = DataType.PRIMARY, 97 | ) -> DataLocation: ... 98 | 99 | @abstractmethod 100 | def register_relation( 101 | self, src_location: DataLocation, dst_location: DataLocation 102 | ) -> None: ... 103 | 104 | @abstractmethod 105 | async def transfer_data( 106 | self, 107 | src_location: ExecutionLocation, 108 | src_path: str, 109 | dst_locations: MutableSequence[ExecutionLocation], 110 | dst_path: str, 111 | writable: bool = False, 112 | ) -> None: ... 113 | 114 | 115 | class StreamWrapper(ABC): 116 | def __init__(self, stream: Any): 117 | self.stream: Any = stream 118 | 119 | @abstractmethod 120 | async def close(self) -> None: ... 121 | 122 | @abstractmethod 123 | async def read(self, size: int | None = None): ... 124 | 125 | @abstractmethod 126 | async def write(self, data: Any): ... 127 | -------------------------------------------------------------------------------- /cwl-conformance-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pip() { 4 | if command -v uv > /dev/null; then 5 | uv pip "$@" 6 | else 7 | python3 -m pip "$@" 8 | fi 9 | } 10 | 11 | venv() { 12 | if ! test -d "$1" ; then 13 | if command -v uv > /dev/null; then 14 | uv venv "$1" 15 | uv sync --locked --no-dev || exit 1 16 | elif command -v virtualenv > /dev/null; then 17 | virtualenv -p python3 "$1" 18 | else 19 | python3 -m venv "$1" 20 | fi 21 | fi 22 | source "$1"/bin/activate 23 | } 24 | 25 | # Version of the standard to test against 26 | # Current options: v1.0, v1.1, v1.2, and v1.3 27 | VERSION=${VERSION:-"v1.2"} 28 | 29 | # Which commit of the standard's repo to use 30 | # Defaults to the last commit of the main branch 31 | COMMIT=${COMMIT:-"main"} 32 | 33 | # Comma-separated list of test names that should be excluded from execution 34 | # Defaults to "docker_entrypoint, inplace_update_on_file_content" 35 | EXCLUDE=${EXCLUDE:-"docker_entrypoint,modify_file_content"} 36 | 37 | # Name of the CWLDockerTranslator plugin to use for test execution 38 | # This parameter allows to test automatic CWL requirements translators 39 | DOCKER=${DOCKER:-"docker"} 40 | 41 | # Additional arguments for the pytest command 42 | # Defaults to none 43 | # PYTEST_EXTRA= 44 | 45 | # The directory where this script resides 46 | SCRIPT_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 47 | 48 | # Download archive from GitHub 49 | if [[ "${VERSION}" = "v1.0" ]] ; then 50 | REPO="common-workflow-language" 51 | else 52 | REPO="cwl-$VERSION" 53 | fi 54 | 55 | if [ ! -d "${REPO}-${COMMIT}" ] ; then 56 | if [ ! -f "${COMMIT}.tar.gz" ] ; then 57 | wget "https://github.com/common-workflow-language/${REPO}/archive/${COMMIT}.tar.gz" 58 | fi 59 | tar xzf "${COMMIT}.tar.gz" 60 | fi 61 | 62 | # Setup environment 63 | venv cwl-conformance-venv 64 | pip install -U setuptools wheel "pip>=25.1" 65 | pip install "${SCRIPT_DIRECTORY}" 66 | pip install --group test "${SCRIPT_DIRECTORY}" 67 | if [[ "${VERSION}" = "v1.3" ]] ; then 68 | pip uninstall -y cwl-utils 69 | pip install git+https://github.com/common-workflow-language/cwl-utils.git@refs/pull/370/head 70 | fi 71 | 72 | # Set conformance test filename 73 | if [[ "${VERSION}" = "v1.0" ]] ; then 74 | CONFORMANCE_TEST="${SCRIPT_DIRECTORY}/${REPO}-${COMMIT}/${VERSION}/conformance_test_v1.0.yaml" 75 | else 76 | CONFORMANCE_TEST="${SCRIPT_DIRECTORY}/${REPO}-${COMMIT}/conformance_tests.yaml" 77 | fi 78 | mv "${CONFORMANCE_TEST}" "${CONFORMANCE_TEST%".yaml"}.cwltest.yaml" 79 | CONFORMANCE_TEST="${CONFORMANCE_TEST%".yaml"}.cwltest.yaml" 80 | 81 | # Build command 82 | TEST_COMMAND="python -m pytest ${CONFORMANCE_TEST} -n auto -rs" 83 | if [[ -n "${EXCLUDE}" ]] ; then 84 | TEST_COMMAND="${TEST_COMMAND} --cwl-exclude ${EXCLUDE}" 85 | fi 86 | TEST_COMMAND="${TEST_COMMAND} --cov --junitxml=junit.xml -o junit_family=legacy --cov-report= ${PYTEST_EXTRA}" 87 | 88 | # Cleanup coverage 89 | rm -rf "${SCRIPT_DIRECTORY}/.coverage" "${SCRIPT_DIRECTORY}/coverage.xml ${SCRIPT_DIRECTORY}/junit.xml" 90 | 91 | # Run test 92 | cp "${SCRIPT_DIRECTORY}/tests/cwl-conformance/conftest.py" "$(dirname "${CONFORMANCE_TEST}")/" 93 | cp "${SCRIPT_DIRECTORY}/tests/cwl-conformance/streamflow-${DOCKER}.yml" "$(dirname "${CONFORMANCE_TEST}")/streamflow.yml" 94 | bash -c "${TEST_COMMAND}" 95 | RETURN_CODE=$? 96 | 97 | # Coverage report 98 | if [ "${RETURN_CODE}" -eq "0" ] ; then 99 | coverage report 100 | coverage xml 101 | fi 102 | 103 | # Cleanup 104 | rm -rf "${COMMIT}.tar.gz" "${SCRIPT_DIRECTORY}/${REPO}-${COMMIT}" "${SCRIPT_DIRECTORY}/cwl-conformance-venv" 105 | 106 | # Exit 107 | exit ${RETURN_CODE} -------------------------------------------------------------------------------- /streamflow/deployment/connector/schemas/base/kubernetes.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://streamflow.di.unito.it/schemas/deployment/connector/base/kubernetes.json", 4 | "type": "object", 5 | "properties": { 6 | "debug": { 7 | "type": "boolean", 8 | "title": "Debug", 9 | "description": "Enable verbose output" 10 | }, 11 | "inCluster": { 12 | "type": "boolean", 13 | "title": "In cluster", 14 | "description": "If true, the Kubernetes connector will use a ServiceAccount to connect to the cluster. This is useful when StreamFlow runs directly inside a Kubernetes Pod", 15 | "default": false 16 | }, 17 | "kubeContext": { 18 | "type": "string", 19 | "title": "Kubernetes context", 20 | "description": "Name of the kubeconfig context to use" 21 | }, 22 | "kubeconfig": { 23 | "type": "string", 24 | "title": "Kubernetes config", 25 | "description": "Absolute path of the kubeconfig file to be used" 26 | }, 27 | "maxConcurrentConnections": { 28 | "type": "integer", 29 | "title": "Max concurrent connections", 30 | "description": "Maximum number of concurrent connections to open for a single Kubernetes client", 31 | "default": 4096 32 | }, 33 | "namespace": { 34 | "type": "string", 35 | "title": "Namespace", 36 | "description": "Namespace to deploy into" 37 | }, 38 | "locationsCacheSize": { 39 | "type": "integer", 40 | "title": "Locations cache size", 41 | "description": "Available locations cache size", 42 | "default": 10 43 | }, 44 | "locationsCacheTTL": { 45 | "type": "integer", 46 | "title": "Locations cache TTL", 47 | "description": "Available locations cache TTL (in seconds). When such cache expires, the connector performs a new request to check locations availability", 48 | "default": 10 49 | }, 50 | "resourcesCacheSize": { 51 | "type": "integer", 52 | "title": "Resources cache size", 53 | "deprecated": true, 54 | "description": "(**Deprecated.** Use locationsCacheSize.) Available resources cache size", 55 | "default": 10 56 | }, 57 | "resourcesCacheTTL": { 58 | "type": "integer", 59 | "title": "Resources cache TTL", 60 | "deprecated": true, 61 | "description": "(**Deprecated.** Use locationsCacheTTL.) Available resources cache TTL (in seconds). When such cache expires, the connector performs a new request to check resources availability", 62 | "default": 10 63 | }, 64 | "timeout": { 65 | "type": "integer", 66 | "title": "Timeout", 67 | "description": "Time (in seconds) to wait for any individual Kubernetes operation", 68 | "default": 60000 69 | }, 70 | "transferBufferSize": { 71 | "type": "integer", 72 | "title": "Transfer buffer size", 73 | "description": "Buffer size allocated for local and remote data transfers", 74 | "default": 33554431, 75 | "$comment": "Kubernetes Python client talks with its server counterpart, written in Golang, via Websocket protocol. The standard websocket package in Golang defines DefaultMaxPayloadBytes equal to 32 MB. Nevertheless, since kubernetes-client prepends channel number to the actual payload (which is always 0 for STDIN), we must reserve 1 byte for this purpose" 76 | }, 77 | "wait": { 78 | "type": "boolean", 79 | "title": "Wait", 80 | "description": "If set, will wait until all Pods, PVCs, Services, and minimum number of Pods of a Deployment are in a ready state before marking the deployment as successful. It will wait for as long as timeout", 81 | "default": true 82 | } 83 | } 84 | } --------------------------------------------------------------------------------