├── lambda ├── __init__.py ├── src │ ├── __init__.py │ ├── chooser │ │ ├── __init__.py │ │ ├── requirements.txt │ │ └── multichooser.py │ ├── common │ │ ├── __init__.py │ │ └── python │ │ │ ├── __init__.py │ │ │ ├── lambda_logs.py │ │ │ ├── substitutions.py │ │ │ ├── file_select.py │ │ │ └── repo_utils.py │ ├── compiler │ │ ├── __init__.py │ │ ├── pkg │ │ │ ├── __init__.py │ │ │ ├── native_step_resources.py │ │ │ ├── chooser_resources.py │ │ │ ├── compiler.py │ │ │ ├── enhanced_parallel_resources.py │ │ │ ├── subpipe_resources.py │ │ │ ├── util.py │ │ │ └── scatter_gather_resources.py │ │ ├── requirements.txt │ │ ├── handler.py │ │ └── compiler_cli.py │ ├── gather │ │ ├── __init__.py │ │ ├── requirements.txt │ │ └── gather.py │ ├── job_def │ │ ├── __init__.py │ │ ├── requirements.txt │ │ └── register.py │ ├── qc_checker │ │ ├── __init__.py │ │ ├── requirements.txt │ │ └── qc_checker.py │ ├── router │ │ ├── __init__.py │ │ └── job_router.py │ ├── scatter │ │ ├── __init__.py │ │ └── requirements.txt │ ├── subpipes │ │ ├── __init__.py │ │ ├── requirements.txt │ │ └── subpipes.py │ ├── initializer │ │ ├── __init__.py │ │ ├── requirements.txt │ │ └── initializer.py │ ├── notifications │ │ ├── __init__.py │ │ ├── requirements.txt │ │ └── notifications.py │ └── scatter_init │ │ ├── __init__.py │ │ └── scatter_init.py └── tests │ ├── __init__.py │ ├── chooser │ └── __init__.py │ ├── common │ ├── __init__.py │ ├── test_repo_utils.py │ ├── test_substitutions.py │ └── test_file_select.py │ ├── compiler │ ├── __init__.py │ ├── conftest.py │ ├── test_chooser_resources.py │ ├── test_state_machine_resources.py │ ├── test_util.py │ ├── test_subpipe_resources.py │ └── test_enhanced_parallel_resources.py │ ├── gather │ ├── __init__.py │ └── test_gather.py │ ├── job_def │ └── __init__.py │ ├── router │ └── __init__.py │ ├── scatter │ └── __init__.py │ ├── subpipes │ └── __init__.py │ ├── initializer │ └── __init__.py │ ├── notifications │ ├── __init__.py │ └── test_notifications.py │ ├── qc_checker │ └── __init__.py │ ├── scatter_init │ ├── __init__.py │ └── test_scatter_init.py │ └── requirements.txt ├── bclaw_runner ├── __init__.py ├── src │ ├── __init__.py │ ├── runner │ │ ├── __init__.py │ │ ├── preamble.py │ │ ├── string_subs.py │ │ ├── signal_trapper.py │ │ ├── qc_check.py │ │ ├── workspace.py │ │ ├── cache.py │ │ └── runner_main.py │ └── runner_cli.py ├── tests │ ├── __init__.py │ ├── test_signal_trapper.py │ ├── test_workspace.py │ ├── conftest.py │ ├── test_qc_check.py │ ├── test_cache.py │ └── test_string_subs.py ├── .dockerignore ├── requirements.txt ├── Dockerfile └── Dockerfile.alpine ├── MAINTAINERS ├── util └── bclaw_logs │ ├── lambda │ ├── __init__.py │ ├── src │ │ ├── __init__.py │ │ └── job_status.py │ └── tests │ │ ├── __init__.py │ │ └── test_job_status.py │ ├── sam_install.txt │ └── template.yaml ├── doc ├── tutorial │ ├── sf_exec_list.png │ ├── sf_exec_history.png │ ├── sf_visual_workflow.png │ └── bclaw_architecture2.png ├── resources │ ├── subpipes_step_functions_link1.png │ └── subpipes_step_functions_link2.png ├── qc.md ├── runtime_env.md ├── workflow_versions.md ├── quick-start.md ├── options_and_parameters.md ├── subpipes.md └── notifications.md ├── LICENSE ├── .gitignore ├── README.md ├── .github └── workflows │ └── installer.yaml ├── cloudformation └── bc_ecs_task_role.yaml └── CONTRIBUTING.md /lambda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bclaw_runner/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bclaw_runner/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bclaw_runner/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/chooser/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/compiler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/gather/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/job_def/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/qc_checker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/router/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/scatter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/subpipes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/chooser/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/compiler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/gather/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/job_def/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/router/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/scatter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/subpipes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MAINTAINERS: -------------------------------------------------------------------------------- 1 | jetaba (Jack Tabaska) 2 | -------------------------------------------------------------------------------- /lambda/src/common/python/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/compiler/pkg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/gather/requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/initializer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/notifications/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/qc_checker/requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/scatter_init/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/subpipes/requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/initializer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/notifications/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/qc_checker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/tests/scatter_init/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /util/bclaw_logs/lambda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /util/bclaw_logs/lambda/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /util/bclaw_logs/lambda/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lambda/src/job_def/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.38.0 -------------------------------------------------------------------------------- /lambda/src/initializer/requirements.txt: -------------------------------------------------------------------------------- 1 | jmespath 2 | -------------------------------------------------------------------------------- /lambda/src/notifications/requirements.txt: -------------------------------------------------------------------------------- 1 | pyyaml 2 | -------------------------------------------------------------------------------- /bclaw_runner/.dockerignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ 2 | *.pyc 3 | -------------------------------------------------------------------------------- /lambda/src/chooser/requirements.txt: -------------------------------------------------------------------------------- 1 | python-box[all]~=6.0 2 | -------------------------------------------------------------------------------- /lambda/src/scatter/requirements.txt: -------------------------------------------------------------------------------- 1 | jsonpath 2 | pyyaml 3 | -------------------------------------------------------------------------------- /lambda/src/compiler/requirements.txt: -------------------------------------------------------------------------------- 1 | humanfriendly 2 | pyyaml 3 | voluptuous 4 | -------------------------------------------------------------------------------- /bclaw_runner/src/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .runner_main import cli 2 | 3 | __all__ = ["cli"] 4 | -------------------------------------------------------------------------------- /doc/tutorial/sf_exec_list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bayer-Group/BayerCLAW/HEAD/doc/tutorial/sf_exec_list.png -------------------------------------------------------------------------------- /doc/tutorial/sf_exec_history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bayer-Group/BayerCLAW/HEAD/doc/tutorial/sf_exec_history.png -------------------------------------------------------------------------------- /doc/tutorial/sf_visual_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bayer-Group/BayerCLAW/HEAD/doc/tutorial/sf_visual_workflow.png -------------------------------------------------------------------------------- /bclaw_runner/src/runner_cli.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from runner import cli 3 | 4 | if __name__ == "__main__": 5 | sys.exit(cli()) 6 | -------------------------------------------------------------------------------- /doc/tutorial/bclaw_architecture2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bayer-Group/BayerCLAW/HEAD/doc/tutorial/bclaw_architecture2.png -------------------------------------------------------------------------------- /doc/resources/subpipes_step_functions_link1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bayer-Group/BayerCLAW/HEAD/doc/resources/subpipes_step_functions_link1.png -------------------------------------------------------------------------------- /doc/resources/subpipes_step_functions_link2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bayer-Group/BayerCLAW/HEAD/doc/resources/subpipes_step_functions_link2.png -------------------------------------------------------------------------------- /bclaw_runner/requirements.txt: -------------------------------------------------------------------------------- 1 | backoff 2 | boto3==1.34.38 3 | docker 4 | docopt 5 | jmespath 6 | more_itertools 7 | pytest 8 | pytest-mock 9 | requests 10 | -------------------------------------------------------------------------------- /lambda/tests/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.38.3 2 | humanfriendly 3 | jmespath 4 | jsonpath 5 | moto[all]==5.0.1 6 | pytest 7 | pytest-mock 8 | python-box[all]~=6.0 9 | pyyaml 10 | voluptuous 11 | -------------------------------------------------------------------------------- /util/bclaw_logs/sam_install.txt: -------------------------------------------------------------------------------- 1 | sam build -b ./build -s . -t template.yaml 2 | 3 | sam deploy \ 4 | --template-file build/template.yaml \ 5 | --stack-name bclaw-logs \ 6 | --resolve-s3 \ 7 | --capabilities CAPABILITY_IAM \ 8 | --profile bclaw-public 9 | -------------------------------------------------------------------------------- /bclaw_runner/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/docker/library/python:3.10-slim AS base 2 | 3 | LABEL maintainer="jack.tabaska@bayer.com" 4 | 5 | # https://www.cynnovative.com/simple-multi-stage-docker-builds/ 6 | 7 | WORKDIR /bclaw_runner 8 | 9 | COPY requirements.txt ./ 10 | RUN pip install --no-cache-dir --upgrade pip && \ 11 | pip install --no-cache-dir -r requirements.txt 12 | 13 | COPY src src 14 | COPY __init__.py __init__.py 15 | 16 | FROM base AS test 17 | 18 | RUN pip install --no-cache-dir pytest moto requests_mock 19 | 20 | COPY tests tests 21 | RUN pytest -s -vvv tests/ 22 | 23 | FROM base AS build 24 | 25 | ENV PYTHONBUFFERED=1 26 | ENV PATH=/bclaw:$PATH 27 | -------------------------------------------------------------------------------- /bclaw_runner/src/runner/preamble.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | def log_preamble(): 8 | logger.info(f"workflow_name={os.environ['BC_WORKFLOW_NAME']}") 9 | logger.info(f"step_name={os.environ['BC_STEP_NAME']}") 10 | logger.info(f"job_file=s3://{os.environ['BC_LAUNCH_BUCKET']}/{os.environ['BC_LAUNCH_KEY']}:{os.environ['BC_LAUNCH_VERSION']}") 11 | logger.info(f"sfn_execution_id={os.environ['BC_EXECUTION_ID']}") 12 | logger.info(f"branch={os.environ['BC_BRANCH_IDX']}") 13 | logger.info(f"batch_job_id={os.environ['AWS_BATCH_JOB_ID']}") 14 | logger.info(f"bclaw_version={os.environ['BC_VERSION']}") 15 | -------------------------------------------------------------------------------- /bclaw_runner/tests/test_signal_trapper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import threading 3 | import time 4 | 5 | from ..src.runner.signal_trapper import signal_trapper 6 | 7 | 8 | def test_signal_trapper(mock_container_factory): 9 | pid = os.getpid() 10 | 11 | def trigger_signal(): 12 | time.sleep(1) 13 | os.kill(pid, 2) 14 | 15 | thread = threading.Thread(target=trigger_signal) 16 | thread.daemon = True 17 | thread.start() 18 | 19 | test_container = mock_container_factory(0, False) 20 | 21 | with signal_trapper(test_container): 22 | time.sleep(3) 23 | print("yo") 24 | 25 | assert test_container.exit_code == 99 # test_container.stop() was called 26 | -------------------------------------------------------------------------------- /bclaw_runner/Dockerfile.alpine: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/docker/library/python:3.12-alpine AS base 2 | 3 | LABEL maintainer="jack.tabaska@bayer.com" 4 | 5 | # https://www.cynnovative.com/simple-multi-stage-docker-builds/ 6 | 7 | WORKDIR /bclaw_runner 8 | 9 | COPY requirements.txt ./ 10 | RUN pip install --no-cache-dir --upgrade pip && \ 11 | pip install --no-cache-dir -r requirements.txt 12 | 13 | COPY src src 14 | COPY __init__.py __init__.py 15 | 16 | FROM base AS test 17 | 18 | # https://github.com/pachisi456/alpine-pytest-docker 19 | 20 | RUN apk add --no-cache --virtual .build-deps \ 21 | build-base openssl-dev libffi-dev && \ 22 | pip install --no-cache-dir pytest moto[all]==5.0.1 requests_mock 23 | 24 | COPY tests tests 25 | RUN pytest -s -vvv tests/ 26 | 27 | FROM base AS build 28 | 29 | ARG BC_VERSION_ARG 30 | ENV BC_VERSION=$BC_VERSION_ARG 31 | 32 | ENV PYTHONBUFFERED=1 33 | ENV PATH=/bclaw_runner:$PATH 34 | -------------------------------------------------------------------------------- /lambda/src/common/python/lambda_logs.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | import json 3 | import logging 4 | import os 5 | from textwrap import dedent 6 | 7 | 8 | def log_preamble(logger: logging.Logger, 9 | branch: str = "N/A", 10 | job_file_bucket: str = "N/A", 11 | job_file_key: str = "N/A", 12 | job_file_version: str = "N/A", 13 | sfn_execution_id: str = "N/A", 14 | step_name: str = "N/A", 15 | workflow_name: str = "N/A") -> None: 16 | logger.info(dedent(f"""---------- preamble ---------- 17 | {workflow_name=} 18 | {step_name=} 19 | job_file=s3://{job_file_bucket}/{job_file_key}:{job_file_version} 20 | {sfn_execution_id=} 21 | {branch=} 22 | bclaw_version={os.environ.get("BCLAW_VERSION", "N/A")} 23 | """)) 24 | 25 | 26 | def log_event(logger: logging.Logger, event: dict) -> None: 27 | logger.info("---------- event ----------" + json.dumps(event, indent=2)) 28 | -------------------------------------------------------------------------------- /bclaw_runner/src/runner/string_subs.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import jmespath 3 | import re 4 | from typing import Any 5 | 6 | 7 | def lookup(m: re.Match, spec: dict) -> str: 8 | ret = jmespath.search(m.group(1), spec) 9 | if ret is None: 10 | ret = m.group(0) 11 | return str(ret) 12 | 13 | 14 | SUB_FINDER = re.compile(r"\${(.+?)}") 15 | 16 | def substitute(target: Any, spec: dict) -> Any: 17 | if isinstance(target, str): 18 | _lookup = partial(lookup, spec=spec) 19 | ret = SUB_FINDER.sub(_lookup, target) 20 | elif isinstance(target, list): 21 | ret = [substitute(v, spec) for v in target] 22 | elif isinstance(target, dict): 23 | ret = {k: substitute(v, spec) for k, v in target.items()} 24 | else: 25 | ret = target 26 | 27 | return ret 28 | 29 | 30 | def substitute_image_tag(image_spec: dict, sub_spec: dict) -> dict: 31 | name = image_spec["name"] 32 | parts = name.split("/") 33 | name_ver = parts.pop(-1) 34 | _lookup = partial(lookup, spec=sub_spec) 35 | subbed = SUB_FINDER.sub(_lookup, name_ver) 36 | 37 | ret = image_spec.copy() 38 | ret["name"] = "/".join(parts + [subbed]) 39 | return ret 40 | -------------------------------------------------------------------------------- /lambda/src/compiler/handler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from pkg.compiler import compile_template 4 | 5 | logger = logging.getLogger() 6 | logger.setLevel(logging.INFO) 7 | 8 | 9 | def lambda_handler(event: dict, context: object) -> dict: 10 | # event = { 11 | # accountId: str 12 | # fragment: { 13 | # Repository: str 14 | # Parameters: {...} 15 | # Options: {...} 16 | # Steps: [] 17 | # } 18 | # region: str 19 | # params: {} # empty 20 | # requestId: uuid, 21 | # templateParameterValues: { 22 | # param1: value1 23 | # param2: value2 24 | # ... 25 | # } 26 | # transformId: str 27 | # } 28 | logger.info(f"{event=}") 29 | ret = event.copy() 30 | 31 | try: 32 | ret["fragment"] = compile_template(event["fragment"], event["templateParameterValues"]) 33 | ret["status"] = "success" 34 | 35 | except Exception as e: 36 | # https://stackoverflow.com/questions/55190232/aws-cloudformation-transform-how-do-i-properly-return-an-error-message 37 | logger.exception("failed: ") 38 | ret["status"] = "failure" 39 | ret["errorMessage"] = str(e) 40 | 41 | return ret 42 | -------------------------------------------------------------------------------- /lambda/src/compiler/compiler_cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | This CLI depends on having valid AWS credentials active, to query the account environment, 3 | and on the environment variable CORE_STACK_NAME, which defaults to 'bclaw-core' if not set. 4 | """ 5 | 6 | import argparse 7 | import logging 8 | import yaml 9 | import sys 10 | 11 | from dotenv import load_dotenv 12 | 13 | from pkg.compiler import compile_template 14 | 15 | if __name__ == "__main__": 16 | parser = argparse.ArgumentParser(description=__doc__) 17 | parser.add_argument("infile", type=argparse.FileType("r"), nargs="?", default=sys.stdin) 18 | parser.add_argument("cfn_file", type=argparse.FileType("w"), nargs="?", default=sys.stdout) 19 | parser.add_argument("sfn_file", type=argparse.FileType("w"), nargs="?", default=sys.stderr) 20 | parser.add_argument("--verbose", "-v", action="count") 21 | args = parser.parse_args() 22 | 23 | load_dotenv() 24 | 25 | logging.basicConfig(level=(logging.DEBUG if args.verbose else logging.INFO)) 26 | 27 | wf_spec = yaml.safe_load(args.infile) 28 | wf_spec.pop("Transform", None) 29 | 30 | result = compile_template(wf_spec, {}, state_machine_out=args.sfn_file) 31 | yaml.safe_dump(result, args.cfn_file) 32 | 33 | sys.exit(0) 34 | -------------------------------------------------------------------------------- /lambda/tests/compiler/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | 6 | @pytest.fixture(scope="session") 7 | def compiler_env(): 8 | os.environ.update({ 9 | "CORE_STACK_NAME": "bclaw-core", 10 | "CHOOSER_LAMBDA_ARN": "chooser_lambda_arn", 11 | "ECS_TASK_ROLE_ARN": "ecs_task_role_arn", 12 | "NOTIFICATIONS_LAMBDA_ARN": "notifications_lambda_arn", 13 | "GATHER_LAMBDA_ARN": "gather_lambda_arn", 14 | "ON_DEMAND_GPU_QUEUE_ARN": "on_demand_gpu_queue_arn", 15 | "ON_DEMAND_QUEUE_ARN": "on_demand_queue_arn", 16 | "INITIALIZER_LAMBDA_ARN": "initializer_lambda_arn", 17 | "JOB_DEF_LAMBDA_ARN": "job_def_lambda_arn", 18 | "LAUNCHER_BUCKET_NAME": "launcher_bucket_name", 19 | "LOG_RETENTION_DAYS": "99", 20 | "LOGGING_DESTINATION_ARN": "logging_destination_arn", 21 | "RESOURCE_BUCKET_NAME": "resource_bucket_name", 22 | "RUNNER_REPO_URI": "runner_repo_uri", 23 | "SCATTER_INIT_LAMBDA_ARN": "scatter_init_lambda_arn", 24 | "SCATTER_LAMBDA_ARN": "scatter_lambda_arn", 25 | "SOURCE_VERSION": "1234567", 26 | "SPOT_GPU_QUEUE_ARN": "spot_gpu_queue_arn", 27 | "SPOT_QUEUE_ARN": "spot_queue_arn", 28 | "STATES_EXECUTION_ROLE_ARN": "states_execution_role_arn", 29 | "SUBPIPES_LAMBDA_ARN": "subpipes_lambda_arn", 30 | }) 31 | -------------------------------------------------------------------------------- /bclaw_runner/src/runner/signal_trapper.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | import logging 3 | import signal 4 | 5 | from docker.models.containers import Container 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | SKIP = { 10 | # these signal handlers cannot be overridden 11 | signal.SIGKILL, 12 | signal.SIGSTOP, 13 | # these signals are typically ignored (https://man.netbsd.org/signal.7) 14 | signal.SIGURG, 15 | signal.SIGCONT, 16 | signal.SIGCHLD, 17 | signal.SIGIO, 18 | signal.SIGWINCH, 19 | # signal.SIGINFO, 20 | # signal.SIGPWR, 21 | } 22 | 23 | 24 | # https://stackoverflow.com/questions/2148888/python-trap-all-signals 25 | @contextmanager 26 | def signal_trapper(container: Container): 27 | def _handler(signal_number: int, _): 28 | logger.warning(f"received signal {signal.strsignal(signal_number)}") 29 | logger.warning("stopping subprocess") 30 | container.stop(timeout=5) 31 | 32 | original_handlers = {} 33 | try: 34 | logger.debug("setting new signal handlers") 35 | for sig in signal.valid_signals() - SKIP: 36 | if signal.getsignal(sig) is not signal.SIG_IGN: 37 | original_handlers[sig] = signal.signal(sig, _handler) 38 | yield 39 | finally: 40 | logger.debug("restoring signal handlers") 41 | for k, v in original_handlers.items(): 42 | signal.signal(k, v) 43 | -------------------------------------------------------------------------------- /lambda/src/compiler/pkg/native_step_resources.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Generator, List 3 | 4 | from . import state_machine_resources as sm 5 | from .util import Step, Resource, State 6 | 7 | 8 | def handle_native_step(step: Step, 9 | options: dict, 10 | map_depth: int) -> Generator[Resource, None, List[State]]: 11 | logger = logging.getLogger(__name__) 12 | logger.info(f"making native step {step.name}") 13 | 14 | step_type = step.spec["Type"] 15 | 16 | ret = step.spec.copy() 17 | 18 | if step_type == "Parallel": 19 | sub_branches = [] 20 | 21 | for branch in step.spec["Branches"]: 22 | sub_branch = yield from sm.make_branch(branch["steps"], options, depth=map_depth) 23 | sub_branches.append(sub_branch) 24 | 25 | ret.update({"Branches": sub_branches}) 26 | 27 | try: 28 | # if this native step was generated by the compiler, don't modify ResultPath or OutputPath 29 | ret.pop("_stet") 30 | 31 | except KeyError: 32 | if step_type not in {"Wait", "Succeed", "Fail"}: 33 | ret.update({"ResultPath": None}) 34 | 35 | if step_type != "Fail": 36 | ret.update({"OutputPath": "$"}) 37 | 38 | ret.pop("Next", None) 39 | ret.pop("End", None) 40 | 41 | if step_type not in {"Succeed", "Fail"}: 42 | ret.update(**step.next_or_end) 43 | 44 | return [State(step.name, ret)] 45 | -------------------------------------------------------------------------------- /lambda/src/qc_checker/qc_checker.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | import json 3 | import logging 4 | 5 | import boto3 6 | 7 | # from lambda_logs import JSONFormatter, custom_lambda_logs 8 | 9 | logger = logging.getLogger() 10 | logger.setLevel(logging.INFO) 11 | # logger.handlers[0].setFormatter(JSONFormatter()) 12 | 13 | 14 | class QCFailed(Exception): 15 | def __init__(self, message: str): 16 | self.message = message 17 | 18 | 19 | def lambda_handler(event: dict, context: object): 20 | # with custom_lambda_logs(**event["logging"]): 21 | logger.info(f"event: {str(event)}") 22 | 23 | s3_path = f"{event['repo']}/{event['qc_result_file']}" 24 | bucket, key = s3_path.split("/", 3)[2:] 25 | 26 | s3 = boto3.client("s3") 27 | response = s3.get_object(Bucket=bucket, Key=key) 28 | with closing(response["Body"]) as fp: 29 | qc_object = json.load(fp) 30 | 31 | logger.info(f"input: {str(qc_object)}") 32 | 33 | result = eval(event["qc_expression"], globals(), qc_object) 34 | 35 | if result: 36 | logger.warning("failed QC check") 37 | sfn = boto3.client("stepfunctions") 38 | sfn.stop_execution( 39 | executionArn=event["execution_id"], 40 | error=f"Job {event['logging']['job_file_key']} failed QC check at step {event['logging']['step_name']}", 41 | cause=f"failed condition: {event['qc_expression']}" 42 | ) 43 | raise QCFailed(f"QC check failed ({event['qc_expression']})") 44 | else: 45 | logger.info("passed QC check") 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, Bayer AG 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /lambda/src/compiler/pkg/chooser_resources.py: -------------------------------------------------------------------------------- 1 | import jmespath 2 | import logging 3 | import os 4 | from typing import List 5 | 6 | from voluptuous.error import Invalid 7 | 8 | from .util import Step, State, lambda_logging_block, lambda_retry 9 | 10 | 11 | def choice_spec(expr_str: str, next_step: str) -> dict: 12 | ret = { 13 | "Variable": "$.choice", 14 | "StringEquals": expr_str, 15 | "Next": next_step 16 | } 17 | return ret 18 | 19 | 20 | def handle_chooser_step(step: Step) -> List[State]: 21 | logger = logging.getLogger(__name__) 22 | logger.info(f"making chooser step {step.name}") 23 | 24 | if step.is_terminal: 25 | raise Invalid("chooser steps cannot be terminal") 26 | 27 | choice_step_name = f"{step.name}.choose" 28 | 29 | exprs = jmespath.search("choices[].if", step.spec) 30 | nexts = jmespath.search("choices[].next", step.spec) 31 | 32 | choices = [choice_spec(e, n) for e, n in zip(exprs, nexts)] 33 | 34 | task_step = { 35 | "Type": "Task", 36 | "Resource": os.environ["CHOOSER_LAMBDA_ARN"], 37 | "Parameters": { 38 | "repo.$": "$.repo.uri", 39 | **step.input_field, 40 | "expressions": exprs, 41 | **lambda_logging_block(step.name), 42 | }, 43 | **lambda_retry(), 44 | "ResultPath": "$.choice", 45 | "OutputPath": "$", 46 | "Next": choice_step_name, 47 | } 48 | 49 | choice_step = { 50 | "Type": "Choice", 51 | "Choices": choices, 52 | "Default": step.next, 53 | } 54 | 55 | ret = [ 56 | State(step.name, task_step), 57 | State(choice_step_name, choice_step), 58 | ] 59 | 60 | return ret 61 | -------------------------------------------------------------------------------- /lambda/src/common/python/substitutions.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import jmespath 3 | import json 4 | import re 5 | from string import Template 6 | from typing import Any 7 | 8 | 9 | def _lookup(target: str, data: dict): 10 | ret0 = jmespath.search(target, data) 11 | 12 | if ret0 is None: 13 | raise RuntimeError(f"{target} not found in job data") 14 | elif isinstance(ret0, (dict, list)): 15 | ret = json.dumps(json.dumps(ret0)) 16 | else: 17 | ret = str(ret0) 18 | 19 | return ret 20 | 21 | 22 | JOB_DATA_FINDER = re.compile(r"\${(.+?)}") 23 | 24 | def substitute_job_data(subject: Any, job_data: dict): 25 | lookup = partial(_lookup, data=job_data) 26 | 27 | if isinstance(subject, str): 28 | result = JOB_DATA_FINDER.sub(lambda m: lookup(m.group(1)), subject) 29 | 30 | elif isinstance(subject, list): 31 | result = [substitute_job_data(v, job_data) for v in subject] 32 | 33 | elif isinstance(subject, dict): 34 | result = {k: substitute_job_data(v, job_data) for k, v in subject.items()} 35 | 36 | else: 37 | result = subject 38 | 39 | return result 40 | 41 | 42 | def substitute_into_filenames(subject: Any, subs: dict): 43 | if isinstance(subject, str): 44 | try: 45 | result = Template(subject).safe_substitute(subs) 46 | 47 | except KeyError: 48 | raise RuntimeError(f"unrecognized substitution in {subject}") 49 | 50 | elif isinstance(subject, list): 51 | result = [substitute_into_filenames(v, subs) for v in subject] 52 | 53 | elif isinstance(subject, dict): 54 | result = {k: substitute_into_filenames(v, subs) for k, v in subject.items()} 55 | 56 | else: 57 | result = subject 58 | 59 | return result 60 | -------------------------------------------------------------------------------- /lambda/tests/scatter_init/test_scatter_init.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | import json 3 | 4 | import boto3 5 | import moto 6 | import pytest 7 | 8 | from ...src.scatter_init.scatter_init import lambda_handler 9 | 10 | JOB_DATA_TEMPLATE = { 11 | "job": {"job": "data"}, 12 | "scatter": {}, 13 | "parent": {"file": "s3://test-bucket/repo/path/file.txt"} 14 | } 15 | 16 | 17 | @pytest.fixture(scope="module") 18 | def repo_bucket(): 19 | with moto.mock_aws(): 20 | yld = boto3.resource("s3", region_name="us-east-1").Bucket("test-bucket") 21 | yld.create() 22 | yld.put_object(Key="repo/path/Scatter/_JOB_DATA_", Body=json.dumps(JOB_DATA_TEMPLATE).encode("utf-8")) 23 | yield yld 24 | 25 | 26 | def test_lambda_handler(repo_bucket): 27 | event = { 28 | "index": "99", 29 | "repo": { 30 | "bucket": repo_bucket.name, 31 | "prefix": "repo/path/Scatter", 32 | "uri": "s3://this/is/not/used", 33 | }, 34 | "scatter": { 35 | "number": "88", 36 | "file": "s3://bucket/yada/yada/file.txt", 37 | }, 38 | "logging": {}, 39 | } 40 | 41 | result = lambda_handler(event, {}) 42 | expect = { 43 | "bucket": repo_bucket.name, 44 | "prefix": "repo/path/Scatter/00099", 45 | "uri": f"s3://{repo_bucket.name}/repo/path/Scatter/00099" 46 | } 47 | 48 | assert result == expect 49 | 50 | job_data_obj = boto3.resource("s3").Object(result["bucket"], f"{result['prefix']}/_JOB_DATA_") 51 | response = job_data_obj.get() 52 | with closing(response["Body"]) as fp: 53 | job_data = json.load(fp) 54 | 55 | expected_job_data = { 56 | "job": JOB_DATA_TEMPLATE["job"], 57 | "scatter": event["scatter"], 58 | "parent": JOB_DATA_TEMPLATE["parent"], 59 | } 60 | assert job_data == expected_job_data 61 | -------------------------------------------------------------------------------- /lambda/src/scatter_init/scatter_init.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | import json 3 | import logging 4 | 5 | import boto3 6 | 7 | from lambda_logs import log_preamble, log_event 8 | from repo_utils import SYSTEM_FILE_TAG, Repo 9 | 10 | logger = logging.getLogger() 11 | logger.setLevel(logging.INFO) 12 | 13 | 14 | def lambda_handler(event: dict, context: object): 15 | # event = { 16 | # index: str, 17 | # repo: { 18 | # bucket: str 19 | # prefix: str 20 | # } 21 | # scatter: { 22 | # key: value 23 | # } 24 | # logging: { 25 | # branch: str 26 | # job_file_bucket: str 27 | # job_file_key: str 28 | # job_file_version: str 29 | # sfn_execution_id: str 30 | # step_name: str 31 | # workflow_name: str 32 | # } 33 | # ... 34 | # } 35 | 36 | log_preamble(**event.pop("logging"), logger=logger) 37 | log_event(logger, event) 38 | 39 | s3 = boto3.resource("s3") 40 | 41 | # read job data template 42 | scatter_repo = Repo(event["repo"]) 43 | job_data_template = scatter_repo.qualify("_JOB_DATA_") 44 | obj = s3.Object(job_data_template.bucket, job_data_template.key) 45 | response = obj.get() 46 | with closing(response["Body"]) as fp: 47 | job_data = json.load(fp) 48 | 49 | # replace scatter field 50 | job_data["scatter"].update(event["scatter"]) 51 | 52 | # establish branch repo 53 | branch_repo = scatter_repo.sub_repo(f"{int(event['index']):05}") 54 | 55 | # write job data 56 | job_data_file = branch_repo.qualify("_JOB_DATA_") 57 | job_data_obj = s3.Object(job_data_file.bucket, job_data_file.key) 58 | job_data_obj.put(Body=json.dumps(job_data).encode("utf-8"), 59 | ServerSideEncryption="AES256", 60 | Tagging=SYSTEM_FILE_TAG) 61 | 62 | # return repo uri 63 | return dict(branch_repo) 64 | -------------------------------------------------------------------------------- /bclaw_runner/src/runner/qc_check.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from typing import Generator 5 | 6 | import boto3 7 | 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | class QCFailure(Exception): 12 | def __init__(self, message: str, failures: list): 13 | super().__init__(message) 14 | self.failures = failures 15 | 16 | 17 | def abort_execution(failed_expressions: list) -> None: 18 | logger.warning("aborting workflow execution") 19 | 20 | region = os.environ["AWS_DEFAULT_REGION"] 21 | acct = os.environ["AWS_ACCOUNT_ID"] 22 | wf_name = os.environ["BC_WORKFLOW_NAME"] 23 | exec_id = os.environ["BC_EXECUTION_ID"] 24 | step_name = os.environ["BC_STEP_NAME"] 25 | execution_arn = f"arn:aws:states:{region}:{acct}:execution:{wf_name}:{exec_id}" 26 | 27 | cause = "failed QC conditions: " + "; ".join(failed_expressions) 28 | 29 | sfn = boto3.client("stepfunctions") 30 | sfn.stop_execution( 31 | executionArn=execution_arn, 32 | error=f"Job {exec_id} failed QC check at step {step_name}", 33 | cause=cause 34 | ) 35 | 36 | def run_one_qc_check(qc_data: dict, qc_expression: str) -> bool: 37 | if result := eval(qc_expression, globals(), qc_data): 38 | logger.warning(f"failed QC check: {qc_expression}") 39 | else: 40 | logger.info(f"passed QC check: {qc_expression}") 41 | return result 42 | 43 | 44 | def run_all_qc_checks(checks: list) -> Generator[str, None, None]: 45 | for item in checks: 46 | qc_file = item["qc_result_file"] 47 | logger.info(f"{qc_file=}") 48 | 49 | with open(qc_file) as fp: 50 | qc_data = json.load(fp) 51 | 52 | for qc_expression in item["stop_early_if"]: 53 | if run_one_qc_check(qc_data, qc_expression): 54 | yld = f"{os.path.basename(qc_file)}: {qc_expression}" 55 | yield yld 56 | 57 | 58 | def do_checks(checks: list) -> None: 59 | if checks: 60 | logger.info("starting QC checks") 61 | if failures := list(run_all_qc_checks(checks)): 62 | logger.error(f"{len(failures)} QC checks failed") 63 | raise QCFailure("QC checks failed", failures) 64 | logger.info("QC checks finished") 65 | else: 66 | logger.info("no QC checks requested") 67 | -------------------------------------------------------------------------------- /lambda/src/common/python/file_select.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | import csv 3 | import json 4 | import re 5 | 6 | import boto3 7 | from jsonpath import jsonpath 8 | import yaml 9 | 10 | # matches: 11 | # s3://(bucket)/(key/key/key.ext):(jsonpath) 12 | # s3://(bucket)/(key/key/key.ext) 13 | PARSER = re.compile(r"^s3://(.+?)/([^:]+)(?::(.+))?$") 14 | 15 | 16 | def read_json(body): 17 | ret = json.load(body) 18 | return ret 19 | 20 | 21 | def read_json_lines(body): 22 | ret = [json.loads(l) for l in body.iter_lines()] 23 | return ret 24 | 25 | 26 | def read_yaml(body): 27 | ret = yaml.load(body, Loader=yaml.SafeLoader) 28 | return ret 29 | 30 | 31 | def read_csv(body, delim=","): 32 | text = (l.decode("utf-8") for l in body.iter_lines()) 33 | ret = list(csv.DictReader(text, delimiter=delim)) 34 | return ret 35 | 36 | 37 | def slurp(body): 38 | ret = [l.decode("utf-8") for l in body.iter_lines()] 39 | return ret 40 | 41 | 42 | def stringify(item) -> str: 43 | if isinstance(item, (dict, list)): 44 | return json.dumps(item) 45 | else: 46 | return str(item) 47 | 48 | 49 | def select_file_contents(s3_path: str) -> list: 50 | bucket, key, selector = PARSER.fullmatch(s3_path).groups() 51 | 52 | s3 = boto3.client("s3") 53 | response = s3.get_object(Bucket=bucket, Key=key) 54 | with closing(response["Body"]) as fp: 55 | if selector is None: 56 | ret0 = slurp(fp) 57 | else: 58 | if key.endswith(".json"): 59 | contents = read_json(fp) 60 | elif key.endswith(".jsonl") or key.endswith(".ndjson"): 61 | contents = read_json_lines(fp) 62 | elif key.endswith(".yaml") or key.endswith(".yml"): 63 | contents = read_yaml(fp) 64 | elif key.endswith(".csv"): 65 | contents = read_csv(fp) 66 | elif key.endswith(".tsv") or key.endswith(".tab"): 67 | contents = read_csv(fp, delim="\t") 68 | else: 69 | contents = slurp(fp) 70 | 71 | ret0 = jsonpath(contents, selector) 72 | 73 | if not isinstance(ret0, list): 74 | raise AssertionError("selector did not create a list") 75 | 76 | ret = [stringify(i) for i in ret0] 77 | 78 | return ret 79 | -------------------------------------------------------------------------------- /bclaw_runner/src/runner/workspace.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | import json 3 | import logging 4 | import os 5 | import shutil 6 | from tempfile import mkdtemp, NamedTemporaryFile 7 | from typing import Generator 8 | 9 | from .dind import run_child_container 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class UserCommandsFailed(Exception): 15 | def __init__(self, message: str, exit_code:int): 16 | super().__init__(message) 17 | self.exit_code = exit_code 18 | 19 | 20 | @contextmanager 21 | def workspace() -> Generator[str, None, None]: 22 | orig_path = os.getcwd() 23 | work_path = mkdtemp(dir=os.environ["BC_SCRATCH_PATH"]) 24 | 25 | logger.debug(f"workspace={work_path}") 26 | 27 | try: 28 | os.chdir(work_path) 29 | yield work_path 30 | 31 | finally: 32 | logger.debug("cleaning up workspace") 33 | os.chdir(orig_path) 34 | shutil.rmtree(work_path, ignore_errors=True) 35 | logger.debug("cleanup finished") 36 | 37 | 38 | def write_job_data_file(job_data: dict, dest_dir: str) -> str: 39 | with NamedTemporaryFile(prefix="job_data_", suffix=".json", dir=dest_dir, mode="w", delete=False) as fp: 40 | json.dump(job_data, fp) 41 | return fp.name 42 | 43 | 44 | def run_commands(image_spec: dict, commands: list, work_dir: str, job_data_file: str, shell_opt: str) -> None: 45 | script_file = "_commands.sh" 46 | 47 | with open(script_file, "w") as fp: 48 | for command in commands: 49 | print(command, file=fp) 50 | 51 | logger.info(f"shell option={shell_opt}") 52 | 53 | if shell_opt == "sh": 54 | shell_cmd = "sh -veu" 55 | elif shell_opt == "bash": 56 | shell_cmd = "bash -veuo pipefail" 57 | elif shell_opt == "sh-pipefail": 58 | shell_cmd = "sh -veuo pipefail" 59 | else: 60 | raise RuntimeError(f"unrecognized shell: {shell_opt}") 61 | 62 | os.chmod(script_file, 0o700) 63 | command = f"{shell_cmd} {script_file}" 64 | 65 | if (exit_code := run_child_container(image_spec, command, work_dir, job_data_file)) == 0: 66 | logger.info("command block succeeded") 67 | else: 68 | logger.error("command block failed") 69 | raise UserCommandsFailed(f"command block failed with exit code {exit_code}", exit_code) 70 | -------------------------------------------------------------------------------- /lambda/src/compiler/pkg/compiler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from . import state_machine_resources as sm 5 | from .util import Resource, substitute_params 6 | from .validation import workflow_schema 7 | 8 | logger = logging.getLogger() 9 | 10 | 11 | # remove this after everybody gets used to capitalized top level keys 12 | def _capitalize_top_level_keys(frag: dict) -> dict: 13 | ret = {k.capitalize(): v for k, v in frag.items()} 14 | return ret 15 | 16 | 17 | def compile_template(fragment: dict, param_values: dict, state_machine_out=None) -> dict: 18 | # normalize workflow spec 19 | capitalized_fragment = _capitalize_top_level_keys(fragment) 20 | subbed_fragment = substitute_params(param_values, capitalized_fragment) 21 | normalized_wf = workflow_schema(subbed_fragment) 22 | 23 | options = normalized_wf["Options"] 24 | repository = normalized_wf["Repository"].rstrip("/") 25 | steps = normalized_wf["Steps"] 26 | 27 | # create state machine and associated resources 28 | resources = {} 29 | curr_resource = Resource("fake", {}) 30 | for curr_resource in sm.handle_state_machine(steps, options, repository, state_machine_out): 31 | resources.update([curr_resource]) 32 | 33 | # the main state machine Resource should be the last thing yielded by sm.handle_state_machine 34 | state_machine = curr_resource 35 | sm.add_definition_substitutions(state_machine, resources) 36 | 37 | state_machine_version = sm.state_machine_version_rc(state_machine) 38 | state_machine_alias = sm.state_machine_alias_rc(state_machine_version) 39 | resources.update([state_machine_alias, state_machine_version]) 40 | 41 | # create cloudformation template fragment to return 42 | ret = { 43 | "AWSTemplateFormatVersion": "2010-09-09", 44 | "Resources": resources, 45 | "Outputs": { 46 | "LauncherBucketName": { 47 | "Value": os.environ["LAUNCHER_BUCKET_NAME"], 48 | }, 49 | "LauncherURI": { 50 | "Value": {"Fn::Sub": f"s3://{os.environ['LAUNCHER_BUCKET_NAME']}/${{AWS::StackName}}/"}, 51 | }, 52 | "StepFunctionsStateMachineArn": { 53 | "Value": {"Ref": state_machine.name}, 54 | }, 55 | }, 56 | } 57 | 58 | if "Parameters" in normalized_wf: 59 | ret["Parameters"] = normalized_wf["Parameters"] 60 | 61 | return ret 62 | -------------------------------------------------------------------------------- /doc/qc.md: -------------------------------------------------------------------------------- 1 | # The BayerCLAW language: Quality Control (QC) checks 2 | 3 | The BayerCLAW language provides a way to define quality control (QC) checks that can be applied to 4 | analysis results. These checks can be used to ensure that the results are consistent with the 5 | expectations of the user or the requirements of downstream processes. 6 | 7 | ## The qc_check block 8 | 9 | The `qc_check` block an optional batch step element used to define a QC check. It has the following 10 | structure: 11 | 12 | ```yaml 13 | qc_check: 14 | qc_result_file: 15 | stop_early_if: 16 | ``` 17 | 18 | The `qc_result_file` field specifies the path to the file containing the QC results. The file must 19 | be in the JSON format and contain a dictionary with the QC results. The dictionary keys are the 20 | names of the QC checks and the values are the results of the checks. 21 | 22 | The `stop_early_if` field specifies a condition that, if met, will cause workflow execution to be 23 | aborted. The conditions are Python expressions that yield a Boolean value. The expression can refer 24 | to the QC results using the dictionary keys as variables. 25 | 26 | You may provide multiple qc_check blocks in each batch step, and multiple conditions per qc_check 27 | block: 28 | 29 | ```yaml 30 | qc_check: 31 | - 32 | qc_result_file: qc_results1.json 33 | stop_early_if: 34 | - "mean_coverage < 0.30" 35 | - "total_length < 100" 36 | - 37 | qc_result_file: qc_results2.json 38 | stop_early_if: 39 | - "mean_coverage < 0.30" 40 | - "total_length < 100" 41 | ``` 42 | 43 | If any `stop_early_if` condition is met, the workflow execution will be aborted. 44 | 45 | Note that in the second example above, it is assumed that the `qc_results*` files are of the 46 | format: 47 | 48 | ```json5 49 | { 50 | "mean_coverage": 0.25, 51 | "total_length": 50, 52 | // other fields... 53 | } 54 | ``` 55 | 56 | so that the keys `mean_coverage` and `total_length` become variables in the `stop_early_if` conditions. 57 | 58 | ## Notifications 59 | 60 | To receive notifications of failed QC checks, you must subscribe to BayerCLAW's SNS topic. Workflow 61 | executions that fail due to a QC check will terminate in an ABORTED state, therefore to receive only 62 | notifications for failed QC checks, your subscription must include a filter policy like the following: 63 | 64 | ```json 65 | { 66 | "workflow_name": ["my_workflow"], 67 | "status": ["ABORTED"] 68 | } 69 | ``` 70 | 71 | See the [notifications document](notifications.md) for more information. 72 | -------------------------------------------------------------------------------- /util/bclaw_logs/lambda/src/job_status.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | # from datetime import datetime, timedelta 3 | import json 4 | import os 5 | 6 | import boto3 7 | from boto3.dynamodb.conditions import Attr 8 | from botocore.exceptions import ClientError 9 | 10 | 11 | def lambda_handler(event: dict, context: object) -> None: 12 | print(f"{event=}") 13 | 14 | dynamodb = boto3.resource("dynamodb") 15 | table = dynamodb.Table(os.environ["JOB_STATUS_TABLE"]) 16 | 17 | # todo: 18 | # should launcher put wf name in input object? 19 | # should eventbridge rule put something bclaw-specific in input object for recognition? 20 | 21 | try: 22 | wf_name = event["detail"]["stateMachineArn"].rsplit(":", 1)[-1] 23 | exec_id = event["detail"]["name"] 24 | job_status = event["detail"]["status"] 25 | 26 | input_obj = json.loads(event["detail"]["input"]) 27 | 28 | # HEY! this causes subpipe executions to look like superpipe executions 29 | # wf_name, job_file_name = input_obj["job_file"]["key"].split("/", 1) 30 | job_file_name = input_obj["job_file"]["key"].split("/", 1)[-1] 31 | job_file_version = input_obj["job_file"]["version"] 32 | 33 | time_str = event["time"] 34 | timestamp = dt.datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S%z") 35 | expiration = timestamp + dt.timedelta(days=int(os.environ["EXPIRATION_DAYS"])) 36 | 37 | item = { 38 | "Item": { 39 | "workflowName": wf_name, 40 | "executionId": exec_id, 41 | "jobFile": f"{job_file_name}#{job_file_version}", 42 | "status": job_status, 43 | "timestamp": int(timestamp.timestamp()), 44 | "expiration": int(expiration.timestamp()), 45 | } 46 | } 47 | 48 | # events might arrive out of order: this condition prevents 49 | # existing SUCCEEDED, FAILED, or ABORTED records in the table 50 | # from being overwritten by incoming RUNNING records 51 | if job_status == "RUNNING": 52 | item["ConditionExpression"] = ( 53 | Attr("status").not_exists() | 54 | Attr("status").eq("RUNNING") 55 | ) 56 | 57 | try: 58 | result = table.put_item(**item) 59 | print(str(result)) 60 | 61 | except ClientError as e: 62 | if e.response["Error"]["Code"] == "ConditionalCheckFailedException": 63 | pass 64 | 65 | except (KeyError, ValueError): 66 | print("not a bayerclaw execution") 67 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # custom 2 | *~ 3 | .idea/ 4 | .DS_Store 5 | defunct/ 6 | notes/ 7 | scratch/ 8 | cheez/ 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | pip-wheel-metadata/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | db.sqlite3 71 | db.sqlite3-journal 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 104 | __pypackages__/ 105 | 106 | # Celery stuff 107 | celerybeat-schedule 108 | celerybeat.pid 109 | 110 | # SageMath parsed files 111 | *.sage.py 112 | 113 | # Environments 114 | .env 115 | .venv 116 | env/ 117 | venv/ 118 | ENV/ 119 | env.bak/ 120 | venv.bak/ 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | .dmypy.json 135 | dmypy.json 136 | 137 | # Pyre type checker 138 | .pyre/ 139 | -------------------------------------------------------------------------------- /lambda/src/gather/gather.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | from itertools import groupby 3 | import json 4 | import logging 5 | from os.path import basename 6 | 7 | import boto3 8 | 9 | from lambda_logs import log_preamble, log_event 10 | from repo_utils import SYSTEM_FILE_TAG 11 | from substitutions import substitute_job_data 12 | 13 | logger = logging.getLogger() 14 | logger.setLevel(logging.INFO) 15 | 16 | 17 | def lambda_handler(event: dict, context: object): 18 | # event = { 19 | # outputs: str 20 | # repo: str 21 | # step_name: str 22 | # logging: { 23 | # branch: str 24 | # job_file_bucket: str 25 | # job_file_key: str 26 | # job_file_version: str 27 | # sfn_execution_id: str 28 | # step_name: str 29 | # workflow_name: str 30 | # } 31 | # } 32 | 33 | log_preamble(**event["logging"], logger=logger) 34 | log_event(logger, event) 35 | 36 | parent_outputs = json.loads(event["outputs"]) 37 | if parent_outputs: 38 | step_name = event["step_name"] 39 | 40 | parent_repo = event["repo"] 41 | parent_repo_bucket, parent_repo_prefix = parent_repo.split("/", 3)[2:] 42 | parent_job_data_key = f"{parent_repo_prefix}/_JOB_DATA_" 43 | 44 | response = boto3.resource("s3").Object(parent_repo_bucket, parent_job_data_key).get() 45 | with closing(response["Body"]) as fp: 46 | parent_job_data = json.load(fp) 47 | 48 | jobby_outputs = substitute_job_data(parent_outputs, parent_job_data) 49 | 50 | bucket = boto3.resource("s3").Bucket(parent_repo_bucket) 51 | prefix = f"{parent_repo_prefix}/{step_name}" 52 | scatter_output_objs = bucket.objects.filter(Prefix=prefix) 53 | scatter_output_uris = [f"s3://{o.bucket_name}/{o.key}" for o in scatter_output_objs] 54 | scatter_output_uris.sort(key=basename) 55 | 56 | filename2group = {k: list(g) for k, g in groupby(scatter_output_uris, key=basename)} 57 | manifest = {} 58 | for key, filename in jobby_outputs.items(): 59 | if filename in filename2group: 60 | manifest[key] = filename2group[filename] 61 | else: 62 | logger.warning(f"no files named {filename} found") 63 | manifest[key] = [] 64 | 65 | manifest_filename = f"{step_name}_manifest.json" 66 | manifest_path = f"{parent_repo}/{manifest_filename}" 67 | manifest_bucket, manifest_key = manifest_path.split("/", 3)[2:] 68 | manifest_obj = boto3.resource("s3").Object(manifest_bucket, manifest_key) 69 | manifest_obj.put(Body=json.dumps(manifest).encode("utf-8"), 70 | Tagging=SYSTEM_FILE_TAG) 71 | 72 | ret = {"manifest": manifest_filename} 73 | else: 74 | ret = {} 75 | 76 | return ret 77 | -------------------------------------------------------------------------------- /lambda/src/compiler/pkg/enhanced_parallel_resources.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from typing import Generator, List 4 | 5 | from . import state_machine_resources as sm 6 | from .util import Step, Resource, State, lambda_logging_block, lambda_retry 7 | 8 | 9 | def handle_parallel_step(step: Step, 10 | options: dict, 11 | map_depth: int) -> Generator[Resource, None, List[State]]: 12 | logger = logging.getLogger(__name__) 13 | logger.info(f"making enhanced parallel step {step.name}") 14 | 15 | sfn_branches = [] 16 | 17 | for idx, branch in enumerate(step.spec["branches"], start=1): 18 | steps = branch["steps"] 19 | try: 20 | expression = branch["if"] 21 | next_step_name = next(iter(steps[0])) 22 | skip_step_name = f"{step.name}: skip_{idx}" 23 | 24 | # note: this creates two native-type steps in the BayerCLAW spec language. 25 | # They will be processed into Amazon States Language in the sm.make_branch() 26 | # call below. 27 | preamble = [ 28 | { 29 | f"{step.name}: {expression}?": { 30 | "Type": "Task", 31 | "Resource": os.environ["CHOOSER_LAMBDA_ARN"], 32 | "Parameters": { 33 | "repo.$": "$.repo.uri", 34 | **step.input_field, 35 | "expression": expression, 36 | **lambda_logging_block(step.name) 37 | }, 38 | **lambda_retry(), 39 | "Catch": [ 40 | { 41 | "ErrorEquals": ["ConditionFailed"], 42 | "Next": skip_step_name 43 | }, 44 | ], 45 | "ResultPath": None, 46 | "OutputPath": "$", 47 | "_stet": True, 48 | 49 | # don't have to do the next_or_end thing, per validation there 50 | # has to be a next step 51 | "Next": next_step_name, 52 | }, 53 | }, 54 | { 55 | skip_step_name: { 56 | "Type": "Succeed", 57 | "_stet": True, 58 | }, 59 | }, 60 | ] 61 | 62 | steps = preamble + steps 63 | 64 | except KeyError: 65 | pass 66 | 67 | sfn_branch = yield from sm.make_branch(steps, options, depth=map_depth) 68 | sfn_branches.append(sfn_branch) 69 | 70 | ret = { 71 | "Type": "Parallel", 72 | "Branches": sfn_branches, 73 | "ResultPath": None, 74 | "OutputPath": "$", 75 | **step.next_or_end, 76 | } 77 | 78 | return [State(step.name, ret)] 79 | -------------------------------------------------------------------------------- /lambda/tests/common/test_repo_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | 5 | from ...src.common.python.repo_utils import S3File, Repo 6 | 7 | 8 | def test_s3file(): 9 | result = S3File("bucket", "path/to/file.txt") 10 | assert result.bucket == "bucket" 11 | assert result.key == "path/to/file.txt" 12 | assert result == "s3://bucket/path/to/file.txt" 13 | 14 | 15 | def test_s3file_json_serialize(): 16 | s3file = S3File("bucket", "path/to/file.txt") 17 | result = json.dumps({"file": s3file}) 18 | expect = '{"file": "s3://bucket/path/to/file.txt"}' 19 | assert result == expect 20 | 21 | 22 | @pytest.mark.parametrize("repo_spec, expected_uri", [ 23 | ({"bucket": "repo-bucket", "prefix": "path/to/repo"}, "s3://repo-bucket/path/to/repo"), 24 | ({"bucket": "repo-bucket", "prefix": "path/to/repo", "uri": "s3://just/for/testing"}, "s3://just/for/testing") 25 | ]) 26 | def test_repo_init(repo_spec, expected_uri): 27 | result = Repo(repo_spec) 28 | assert result.bucket == repo_spec["bucket"] 29 | assert result.prefix == repo_spec["prefix"] 30 | assert result.uri == expected_uri 31 | 32 | 33 | def test_repo_from_uri(): 34 | uri = "s3://repo-bucket/path/to/repo" 35 | result = Repo.from_uri(uri) 36 | assert result.bucket == "repo-bucket" 37 | assert result.prefix == "path/to/repo" 38 | 39 | 40 | @pytest.mark.parametrize("spec, expected_bucket, expected_key", [ 41 | ("plain_filename.txt", "repo-bucket", "repo/prefix/plain_filename.txt"), 42 | ("s3://other-bucket/other/dir/filename.txt", "other-bucket", "other/dir/filename.txt") 43 | ]) 44 | def test_repo_qualify(spec, expected_bucket, expected_key): 45 | repo = Repo(bucket="repo-bucket", prefix="repo/prefix") 46 | result = repo.qualify(spec) 47 | assert isinstance(result, S3File) 48 | assert result.bucket == expected_bucket 49 | assert result.key == expected_key 50 | 51 | 52 | def test_repo_sub_repo(): 53 | repo = Repo(bucket="repo-bucket", prefix="repo/prefix") 54 | result = repo.sub_repo("sub-repo") 55 | assert isinstance(result, Repo) 56 | assert result.bucket == "repo-bucket" 57 | assert result.prefix == "repo/prefix/sub-repo" 58 | 59 | 60 | def test_repo_repr(): 61 | result = Repo(bucket="repo-bucket", prefix="repo/prefix") 62 | expect = "s3://repo-bucket/repo/prefix" 63 | assert str(result) == expect 64 | 65 | 66 | def test_repo_json_serialize(): 67 | repo = Repo(bucket="repo-bucket", prefix="repo/prefix") 68 | result = json.dumps({"repo": repo}, sort_keys=True) 69 | expect0 = { 70 | "repo": { 71 | "bucket": "repo-bucket", 72 | "prefix": "repo/prefix", 73 | "uri": "s3://repo-bucket/repo/prefix" 74 | } 75 | } 76 | expect = json.dumps(expect0, sort_keys=True) 77 | assert result == expect 78 | 79 | 80 | def test_repo_to_dict(): 81 | repo = Repo(bucket="repo-bucket", prefix="repo/prefix") 82 | result = dict(repo) 83 | expect = { 84 | "bucket": "repo-bucket", 85 | "prefix": "repo/prefix", 86 | "uri": "s3://repo-bucket/repo/prefix" 87 | } 88 | assert result == expect -------------------------------------------------------------------------------- /lambda/src/chooser/multichooser.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | import json 3 | import logging 4 | import math 5 | import re 6 | from typing import Generator, Tuple, Any 7 | 8 | import boto3 9 | from box import Box, BoxList 10 | 11 | from lambda_logs import log_preamble, log_event 12 | from substitutions import substitute_job_data 13 | 14 | logger = logging.getLogger() 15 | logger.setLevel(logging.INFO) 16 | 17 | 18 | class ConditionFailed(Exception): 19 | pass 20 | 21 | 22 | def load_s3_object(repo: str, input_file: str) -> Any: 23 | if input_file.startswith("s3://"): 24 | s3_path = input_file 25 | else: 26 | s3_path = f"{repo}/{input_file}" 27 | 28 | logger.info(f"loading {s3_path}") 29 | s3 = boto3.client("s3") 30 | 31 | bucket, key = s3_path.split("/", 3)[2:] 32 | response = s3.get_object(Bucket=bucket, Key=key) 33 | with closing(response["Body"]) as fp: 34 | ret = json.load(fp) 35 | 36 | return ret 37 | 38 | 39 | def load_vals(inputs_json: str, repo: str) -> Generator[Tuple, None, None]: 40 | job_data = load_s3_object(repo, "_JOB_DATA_") 41 | yield "job", job_data["job"] 42 | 43 | inputs = json.loads(inputs_json) 44 | jobby_inputs = substitute_job_data(inputs, job_data) 45 | for name, input_file in jobby_inputs.items(): 46 | vals = load_s3_object(repo, input_file) 47 | yield name, vals 48 | 49 | if len(inputs) == 1 and isinstance(vals, dict): 50 | vals.pop(name, None) 51 | for name2, val2 in vals.items(): 52 | yield name2, val2 53 | 54 | 55 | def eval_this(expr: str, vals: dict): 56 | result = eval(expr, globals(), vals) 57 | return result 58 | 59 | 60 | def run_exprs(exprs: list, vals: dict): 61 | for expr in exprs: 62 | result = eval_this(expr, vals) 63 | logger.info(f"evaluating '{expr}': {result}") 64 | if result: 65 | logger.info(f"returning '{expr}'") 66 | return expr 67 | logger.info("no conditions evaluated True, returning null") 68 | return None 69 | 70 | 71 | def lambda_handler(event: dict, context: object): 72 | # event = { 73 | # repo: str 74 | # inputs: str # needs to be a json string for auto inputs compatibility 75 | # expressions: [str] | expression: str 76 | # logging: { 77 | # branch: str 78 | # job_file_bucket: str 79 | # job_file_key: str 80 | # job_file_version: str 81 | # sfn_execution_id: str 82 | # step_name: str 83 | # workflow_name: str 84 | # } 85 | # } 86 | 87 | log_preamble(**event.pop("logging"), logger=logger) 88 | log_event(logger, event) 89 | 90 | vals = Box(load_vals(event["inputs"], event["repo"])) 91 | 92 | if "expressions" in event: 93 | ret = run_exprs(event["expressions"], vals) 94 | return ret 95 | 96 | elif "expression" in event: 97 | result = eval_this(event["expression"], vals) 98 | if not result: 99 | raise ConditionFailed 100 | return event["expression"] 101 | -------------------------------------------------------------------------------- /bclaw_runner/src/runner/cache.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | import fcntl 3 | import logging 4 | import os 5 | from typing import Dict, Tuple 6 | 7 | import backoff 8 | import boto3 9 | 10 | 11 | logger = logging.getLogger(__name__) 12 | logging.getLogger("backoff").setLevel(logging.ERROR) 13 | 14 | 15 | def _backoff_handler(details): 16 | name = details["kwargs"]["name_for_logging"] 17 | wait = details["wait"] 18 | logger.debug(f"failed to lock {name}, retrying in {wait} seconds") 19 | 20 | 21 | def _blocking_download(s3_object, dest_path: str, name_for_logging: str) -> None: 22 | if os.path.isfile(dest_path): 23 | logger.info(f"found {name_for_logging} in cache") 24 | else: 25 | logger.debug(f"acquiring a lock on {name_for_logging}") 26 | lock_path = f"{os.path.dirname(dest_path)}.lock" 27 | with open(lock_path, "w") as lfp: 28 | fcntl.flock(lfp, fcntl.LOCK_EX | fcntl.LOCK_NB) 29 | logger.debug(f"lock acquired for {name_for_logging}") 30 | s3_size = s3_object.content_length 31 | logger.info(f"downloading {name_for_logging} ({s3_size} bytes) to cache") 32 | os.makedirs(os.path.dirname(dest_path), exist_ok=True) 33 | s3_object.download_file(dest_path) 34 | local_size = os.path.getsize(dest_path) 35 | logger.info(f"{name_for_logging} ({s3_size} bytes) downloaded to cache ({local_size} bytes)") 36 | logger.debug(f"releasing lock on {name_for_logging}") 37 | fcntl.flock(lfp, fcntl.LOCK_UN) 38 | os.remove(lock_path) 39 | 40 | 41 | @backoff.on_exception(backoff.constant, 42 | BlockingIOError, 43 | interval=5, 44 | jitter=None, 45 | on_backoff=_backoff_handler) 46 | def _download_loop(s3_object, dest_path: str, *, name_for_logging: str) -> None: 47 | _blocking_download(s3_object, dest_path, name_for_logging) 48 | 49 | 50 | def _download_to_cache(item: Tuple[str, str]) -> Tuple[str, str]: 51 | session = boto3.Session() 52 | 53 | key, s3_path = item 54 | s3_bucket, s3_key = s3_path.split("/", 3)[2:] 55 | src = session.resource("s3").Object(s3_bucket, s3_key) 56 | 57 | cache_path = os.environ["BC_SCRATCH_PATH"] 58 | src_etag = src.e_tag.strip('"') # ETag comes wrapped in double quotes for some reason 59 | file_name = os.path.basename(s3_key) 60 | 61 | cached_file = f"{cache_path}/{src_etag}/{file_name}" 62 | 63 | _download_loop(src, cached_file, name_for_logging=file_name) 64 | 65 | return key, cached_file 66 | 67 | 68 | def get_reference_inputs(ref_spec: Dict[str, str]) -> Dict[str, str]: 69 | ret = {} 70 | 71 | if len(ref_spec) > 0: 72 | logger.info(f"caching references: {list(ref_spec.values())}") 73 | 74 | with ThreadPoolExecutor(max_workers=len(ref_spec)) as executor: 75 | result = list(executor.map(_download_to_cache, ref_spec.items())) 76 | 77 | for key, src in result: 78 | dst = ret[key] = os.path.basename(src) 79 | logger.info(f"linking cached {dst} to workspace") 80 | os.link(src, dst) 81 | 82 | return ret 83 | -------------------------------------------------------------------------------- /lambda/src/compiler/pkg/subpipe_resources.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from typing import List 5 | 6 | from .util import Step, State, lambda_logging_block, lambda_retry 7 | 8 | 9 | def file_submit_step(step: Step, run_subpipe_step_name: str) -> dict: 10 | ret = { 11 | "Type": "Task", 12 | "Resource": os.environ["SUBPIPES_LAMBDA_ARN"], 13 | "Parameters": { 14 | "repo.$": "$.repo.uri", 15 | "job_data": step.spec["job_data"], 16 | "submit": json.dumps(step.spec["submit"]), 17 | "step_name": step.name, 18 | **lambda_logging_block(step.name), 19 | }, 20 | **lambda_retry(), 21 | "ResultPath": "$.subpipe", 22 | "OutputPath": "$", 23 | "Next": run_subpipe_step_name, 24 | } 25 | 26 | return ret 27 | 28 | 29 | def run_subpipe_step(step: Step, retrieve_step_name: str) -> dict: 30 | state_machine_arn = step.spec["subpipe"] 31 | 32 | if not state_machine_arn.startswith("arn:"): 33 | state_machine_arn = "arn:aws:states:${AWSRegion}:${AWSAccountId}:stateMachine:" + state_machine_arn 34 | 35 | ret = { 36 | "Type": "Task", 37 | "Resource": "arn:aws:states:::states:startExecution.sync", 38 | "Parameters": { 39 | "Input": { 40 | "index": "main", 41 | "job_file.$": "$.job_file", 42 | "prev_outputs": {}, 43 | "repo.$": "$.subpipe.sub_repo", 44 | "share_id.$": "$.share_id", 45 | "AWS_STEP_FUNCTIONS_STARTED_BY_EXECUTION_ID.$": "$$.Execution.Id", 46 | }, 47 | # todo: this could get to be too long if you have nested subpipes 48 | # might be better to compute it in subpipe lambda 49 | "Name.$": f"States.Format('{{}}_{step.name}', $$.Execution.Name)", 50 | "StateMachineArn": state_machine_arn, 51 | }, 52 | "ResultPath": None, 53 | "OutputPath": "$", 54 | "Next": retrieve_step_name 55 | } 56 | 57 | return ret 58 | 59 | 60 | def file_retrieve_step(step: Step) -> dict: 61 | ret = { 62 | "Type": "Task", 63 | "Resource": os.environ["SUBPIPES_LAMBDA_ARN"], 64 | "Parameters": { 65 | "repo.$": "$.repo.uri", 66 | "retrieve": json.dumps(step.spec["retrieve"]), 67 | "subpipe": { 68 | "sub_repo.$": "$.subpipe.sub_repo.uri", 69 | }, 70 | **lambda_logging_block(step.name) 71 | }, 72 | **lambda_retry(), 73 | "ResultSelector": {}, 74 | "ResultPath": "$.prev_outputs", 75 | "OutputPath": "$", 76 | **step.next_or_end, 77 | } 78 | 79 | return ret 80 | 81 | 82 | def handle_subpipe(step: Step) -> List[State]: 83 | logger = logging.getLogger(__name__) 84 | logger.info(f"making subpipe step {step.name}") 85 | 86 | submit_step_name = step.name 87 | subpipe_step_name = f"{step.name}.subpipe" 88 | retrieve_step_name = f"{step.name}.retrieve" 89 | 90 | ret = [ 91 | State(submit_step_name, file_submit_step(step, subpipe_step_name)), 92 | State(subpipe_step_name, run_subpipe_step(step, retrieve_step_name)), 93 | State(retrieve_step_name, file_retrieve_step(step)), 94 | ] 95 | 96 | return ret 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bayer CLoud Automated Workflows (BayerCLAW) 2 | 3 | BayerCLAW is a workflow orchestration system targeted at bioinformatics pipelines. 4 | A workflow consists of a sequence of computational steps, each of which is captured in a Docker container. 5 | Some steps may parallelize work across many executions of the same container (scatter/gather pattern). 6 | 7 | A workflow is described in a YAML file. 8 | The BayerCLAW compiler uses AWS CloudFormation to transform the workflow description into AWS resources used by the workflow. 9 | This includes an AWS StepFunctions state machine that represents the sequence of steps in the workflow. 10 | 11 | A workflow typically takes several parameters, such as sample IDs or paths to input files. 12 | Once the workflow definition has been deployed, the workflow can be executed by copying a JSON file with the 13 | execution parameters to a "launcher" S3 bucket, which is constructed by BayerCLAW. 14 | The workflow state machine uses AWS Batch to actually run the Docker containers, in the proper order. 15 | 16 | ## Documentation 17 | 18 | - [Quick start -- deploying a BayerCLAW workflow](doc/quick-start.md) 19 | - [Tutorial -- detailed example of writing, deploying, and debugging](doc/tutorial.md) 20 | 21 | - [Installing BayerCLAW into a new AWS account](doc/installation.md) 22 | - [The BayerCLAW language reference](doc/language.md) 23 | - [The BayerCLAW language -- scatter/gather](doc/scatter.md) 24 | - [The BayerCLAW language -- QC checks](doc/qc.md) 25 | - [The BayerCLAW language -- subpipes](doc/subpipes.md) 26 | - [Runtime environment and Docker guidelines](doc/runtime_env.md) for steps 27 | - [BayerCLAW notifications](doc/notifications.md) 28 | 29 | The [doc/](doc/) directory of this repo contains all the pages linked above. 30 | 31 | ## Key components of BayerCLAW 32 | 33 | ### The workflow definition 34 | 35 | The BayerCLAW workflow template is a JSON- or YAML-formatted file describing the processing steps of the pipeline. 36 | Here is an example of a very simple, one-step workflow: 37 | 38 | ```YAML 39 | Transform: BC2_Compiler 40 | 41 | Repository: s3://example-bucket/hello-world/${job.SAMPLE_ID} 42 | 43 | Steps: 44 | - hello: 45 | image: docker.io/library/ubuntu 46 | commands: 47 | - echo "Hello world! This is job ${job.SAMPLE_ID}!" 48 | ``` 49 | 50 | ### The repository 51 | 52 | The repository is a path within an S3 bucket where a given workflow stores its output files, such as `s3://generic-workflow-bucket/my-workflow-repo/`. 53 | The repo is typically parameterized with some job-specific unique ID, so that each execution of the workflow is kept separate. 54 | For example, `s3://generic-workflow-bucket/my-workflow-repo/job12345/` 55 | 56 | ### Job data file 57 | The job data file contains data needed for a single pipeline execution. 58 | This data must be encoded as a flat JSON object with string keys and string values. 59 | Even integer or float values should be quoted as strings. 60 | 61 | Copying the job data file into the launcher bucket will trigger an execution of the pipeline. 62 | Overwriting the job data file, even with the same contents, will trigger another execution. 63 | 64 | #### Sample job data file 65 | ```json5 66 | { 67 | "SAMPLE_ID": "ABC123", 68 | "READS1": "s3://workflow-bucket/inputs/reads1.fq", 69 | "READS2": "s3://workflow-bucket/inputs/reads2.fq" 70 | } 71 | ``` 72 | -------------------------------------------------------------------------------- /doc/runtime_env.md: -------------------------------------------------------------------------------- 1 | # BayerCLAW runtime environment 2 | 3 | Each BayerCLAW step runs on AWS Batch, using the specified Docker image. 4 | 5 | The entry point is a program called `bclaw_runner`, which is hosted in its own Docker 6 | container and does not need to be baked into the user's Docker image. 7 | The runner is responsible for downloading inputs from S3, 8 | running the user-specified commands, and uploading the output to S3. 9 | BayerCLAW manages this; the runner should be basically invisible to users. 10 | 11 | If one user command exits with an error (non-zero exit code), 12 | the following commands in that step will not be run. 13 | However, any outputs will still be uploaded to S3. 14 | If the container exceeds its maximum allotted memory, 15 | all processes in the container will be killed immediately so no upload is possible. 16 | (Batch will typical report an error code 137 for out of memory.) 17 | 18 | Each Batch EC2 instance has a temporary EBS volume attached as scratch space. 19 | By default, each *instance* has a 1 Tb scratch volume. 20 | However, multiple jobs may share a single instance, in which case they have to share the scratch space. 21 | AWS Batch controls how jobs are packed onto instances, and we are not aware of a way for users to control this. 22 | 23 | For each job, `bclaw_runner` will create a temporary directory on the scratch volume. 24 | User commands will be started in this directory. Inputs and outputs will upload/download from this directory. 25 | Before exiting, `bclaw_runner` will remove the directory, to free up space for future jobs that may run on this machine. 26 | 27 | # Environment variables 28 | 29 | The following environment variables are available in BayerCLAW Batch jobs: 30 | 31 | - `BC_BRANCH_IDX`: For jobs running inside of a Scatter step, this will be a number corresponding to the map index 32 | assigned by Step Functions. Outside of a Scatter step, this will always be `main`. 33 | - `BC_EXECUTION_ID`: The ID of the Step Functions execution that triggered this Batch job. You can use this to find 34 | the execution in the Step Functions console. 35 | - `BC_JOB_DATA_FILE`: This is a fully-qualified path to a JSON-formatted file containing the input job data. 36 | - `BC_STEP_NAME`: The name of the current workflow step. 37 | - `BC_WORKFLOW_NAME`: The workflow name. This is the same as the name of the workflow's CloudFormation stack. 38 | - `BC_WORKSPACE`: This is the fully-qualified path to the job's working directory. 39 | - `AWS_ACCOUNT_ID`: The ID of the AWS account the job is running in. 40 | - `AWS_DEFAULT_REGION`: The AWS region the job is running in. 41 | 42 | These can be incorporated into commands just as one would normally use environment variables, e.g.: 43 | 44 | ```bash 45 | do_something --cfg ${BC_JOB_DATA_FILE} ${input1} ${input2} 46 | ``` 47 | 48 | # Docker guidelines 49 | Docker limits the number of anonymous pull requests that a single IP address can perform against Docker Hub. Therefore, 50 | while you can use Docker Hub images for low-throughput workflows or for workflows in development, it is 51 | recommended that you store all of your Docker images in Amazon ECR for high-throughput production workflows. 52 | 53 | Docker images must not specify an ENTRYPOINT -- this prevents `bclaw_runner` from executing correctly. 54 | 55 | If the Docker image specifies a WORKDIR, it will be ignored when run under BayerCLAW. 56 | -------------------------------------------------------------------------------- /.github/workflows/installer.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - installer 5 | workflow_dispatch: 6 | 7 | jobs: 8 | doIt: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | contents: "read" 12 | id-token: "write" 13 | steps: 14 | - name: checkout 15 | uses: actions/checkout@v4 16 | with: 17 | fetch-depth: 0 18 | fetch-tags: true 19 | 20 | - name: setupPython 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: "3.10" 24 | 25 | - name: setupSam 26 | uses: aws-actions/setup-sam@v2 27 | with: 28 | use-installer: true 29 | token: ${{ secrets.GITHUB_TOKEN }} 30 | 31 | - name: getCreds 32 | uses: aws-actions/configure-aws-credentials@v4 33 | with: 34 | # role-to-assume: "arn:aws:iam::934778205429:role/jaxGithubActionsRole" 35 | role-to-assume: "arn:aws:iam::934778205429:role/jax-github-role-2025" 36 | aws-region: us-east-2 37 | 38 | - name: runTests 39 | run: | 40 | pip install -r lambda/tests/requirements.txt 41 | PYTHONPATH=${GITHUB_WORKSPACE}/lambda/src/common/python:$PYTHONPATH pytest -s -vvv lambda/tests/ 42 | 43 | - name: installCore 44 | id: install-core 45 | env: 46 | SUBNETS: "subnet-3ffe7854,subnet-b3b296ff,subnet-e1c63a9c" 47 | VPC_ID: "vpc-00cb556b" 48 | run: | 49 | export SOURCE_VERSION=$(git describe --tags) 50 | export UNIQIFIER=$(date | md5sum | head -c 16) 51 | 52 | sam build -b ./build -s . -t cloudformation/bc_core.yaml 53 | sam deploy \ 54 | --template-file build/template.yaml \ 55 | --stack-name bayerclaw2-core \ 56 | --resolve-s3 \ 57 | --capabilities "CAPABILITY_NAMED_IAM" "CAPABILITY_AUTO_EXPAND" \ 58 | --no-fail-on-empty-changeset \ 59 | --tags "bclaw:version=${SOURCE_VERSION}" \ 60 | --parameter-overrides \ 61 | AmiId=auto \ 62 | CompilerMacroName=BC2_Compiler \ 63 | ExistingBatchSubscriptionFilter=none \ 64 | GpuAmiId=auto \ 65 | InstallationName=bayerclaw2 \ 66 | LauncherBucketName=default \ 67 | LogRetentionDays=30 \ 68 | MaxvCpus=256 \ 69 | MinvCpus=0 \ 70 | RootVolumeSize=50 \ 71 | ScratchVolumeSize=100 \ 72 | SecurityGroups=auto \ 73 | SourceVersion=${SOURCE_VERSION} \ 74 | Subnets=${SUBNETS} \ 75 | Uniqifier=${UNIQIFIER} \ 76 | VpcId=${VPC_ID} 77 | 78 | echo "runner_image_tag=$(aws cloudformation describe-stacks --query "Stacks[?StackName=='bayerclaw2-core'][].Outputs[?OutputKey=='RunnerImageUri'].OutputValue" --output text)" >> $GITHUB_OUTPUT 79 | 80 | - name: loginToEcr 81 | id: login-to-ecr 82 | uses: aws-actions/amazon-ecr-login@v2 83 | 84 | - name: buildRunner 85 | working-directory: bclaw_runner 86 | env: 87 | DOCKER_BUILDKIT: 1 88 | RUNNER_IMAGE_TAG: ${{ steps.install-core.outputs.runner_image_tag }} 89 | run: | 90 | docker build --target test -f Dockerfile.alpine "." 91 | docker build --target build -t ${RUNNER_IMAGE_TAG} -f Dockerfile.alpine "." 92 | docker push ${RUNNER_IMAGE_TAG} || true 93 | -------------------------------------------------------------------------------- /lambda/tests/compiler/test_chooser_resources.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import pytest 4 | 5 | from voluptuous.error import Invalid 6 | 7 | logging.basicConfig(level=logging.INFO) 8 | 9 | from ...src.compiler.pkg.chooser_resources import choice_spec, handle_chooser_step 10 | from ...src.compiler.pkg.util import Step, State, lambda_logging_block, lambda_retry 11 | 12 | 13 | def test_choice_spec(): 14 | condition = "x == 1" 15 | next_step = "step99" 16 | 17 | result = choice_spec(condition, next_step) 18 | expect = { 19 | "Variable": "$.choice", 20 | "StringEquals": condition, 21 | "Next": next_step, 22 | } 23 | assert result == expect 24 | 25 | 26 | def test_make_chooser_steps(compiler_env): 27 | spec = { 28 | "inputs": { 29 | "infile1": "file1.json", 30 | "infile2": "file2.json", 31 | }, 32 | "choices": [ 33 | { 34 | "if": "infile1.var1 == 1", 35 | "next": "step99", 36 | }, 37 | { 38 | "if": "infile2.var2 == 2", 39 | "next": "step98", 40 | }, 41 | { 42 | "if": "job.var3 == 3", 43 | "next": "step97", 44 | }, 45 | ] 46 | } 47 | 48 | test_step = Step("step_name", spec, "next_step") 49 | 50 | expected_task_spec = { 51 | "Type": "Task", 52 | "Resource": "chooser_lambda_arn", 53 | "Parameters": { 54 | "repo.$": "$.repo.uri", 55 | "inputs": json.dumps(spec["inputs"], separators=(",", ":")), 56 | "expressions": [ 57 | "infile1.var1 == 1", 58 | "infile2.var2 == 2", 59 | "job.var3 == 3", 60 | ], 61 | **lambda_logging_block("step_name") 62 | }, 63 | **lambda_retry(), 64 | "ResultPath": "$.choice", 65 | "OutputPath": "$", 66 | "Next": "step_name.choose", 67 | } 68 | 69 | expected_choice_spec = { 70 | "Type": "Choice", 71 | "Choices": [ 72 | { 73 | "Variable": "$.choice", 74 | "StringEquals": "infile1.var1 == 1", 75 | "Next": "step99", 76 | }, 77 | { 78 | "Variable": "$.choice", 79 | "StringEquals": "infile2.var2 == 2", 80 | "Next": "step98", 81 | }, 82 | { 83 | "Variable": "$.choice", 84 | "StringEquals": "job.var3 == 3", 85 | "Next": "step97", 86 | }, 87 | ], 88 | "Default": "next_step" 89 | } 90 | 91 | result = handle_chooser_step(test_step) 92 | assert len(result) == 2 93 | assert all(isinstance(s, State) for s in result) 94 | 95 | task_state = result[0] 96 | assert task_state.name == "step_name" 97 | assert task_state.spec == expected_task_spec 98 | 99 | choice_state = result[1] 100 | assert choice_state.name == "step_name.choose" 101 | assert choice_state.spec == expected_choice_spec 102 | 103 | 104 | def test_make_chooser_steps_terminal_state_fail(): 105 | test_step = Step("step_name", {"not": "used"}, "") 106 | with pytest.raises(Invalid, match="chooser steps cannot be terminal"): 107 | _ = handle_chooser_step(test_step) 108 | -------------------------------------------------------------------------------- /cloudformation/bc_ecs_task_role.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: "2010-09-09" 2 | 3 | Parameters: 4 | RoleName: 5 | Type: String 6 | Default: "default" 7 | 8 | PolicyName: 9 | Type: String 10 | Default: "default" 11 | 12 | Conditions: 13 | UseDefaultRoleName: !Or [ !Equals [!Ref RoleName, "Default"], 14 | !Equals [!Ref RoleName, "default"] ] 15 | 16 | UseDefaultPolicyName: !Or [ !Equals [!Ref PolicyName, "Default"], 17 | !Equals [!Ref PolicyName, "default"] ] 18 | 19 | Resources: 20 | EcsTaskRole: 21 | Type: AWS::IAM::Role 22 | DeletionPolicy: Retain 23 | Properties: 24 | RoleName: 25 | !If [UseDefaultRoleName, !Ref AWS::NoValue, !Ref RoleName] 26 | AssumeRolePolicyDocument: 27 | Version: "2012-10-17" 28 | Statement: 29 | - Effect: "Allow" 30 | Principal: 31 | Service: 32 | - "ecs-tasks.amazonaws.com" 33 | Action: 34 | - "sts:AssumeRole" 35 | 36 | EcsTaskPolicy: 37 | Type: AWS::IAM::ManagedPolicy 38 | DeletionPolicy: Retain 39 | Properties: 40 | ManagedPolicyName: 41 | !If [UseDefaultPolicyName, !Ref AWS::NoValue, !Ref PolicyName] 42 | Roles: 43 | - !Ref EcsTaskRole 44 | PolicyDocument: 45 | Version: "2012-10-17" 46 | Statement: 47 | - Sid: "writeLogs" 48 | Effect: "Allow" 49 | Action: 50 | - "logs:CreateLogStream" 51 | - "logs:PutLogEvents" 52 | Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:*" 53 | - Sid: "s3BucketAccess" 54 | Effect: "Allow" 55 | Action: 56 | - "s3:ListBucket" 57 | Resource: "arn:aws:s3:::*" 58 | - Sid: "s3ObjectAccess" 59 | Effect: "Allow" 60 | Action: 61 | - "s3:PutObject" 62 | - "s3:GetObject" 63 | - "s3:PutObjectAcl" 64 | - "s3:GetObjectTagging" 65 | - "s3:PutObjectTagging" 66 | - "s3:AbortMultipartUpload" 67 | Resource: "arn:aws:s3:::*/*" 68 | - Sid: "deleteControlObjects" 69 | Effect: "Allow" 70 | Action: 71 | - "s3:DeleteObject" 72 | Resource: "arn:aws:s3:::*/_control_/*" 73 | - Sid: "ecrAccess" 74 | Effect: "Allow" 75 | Action: 76 | - "ecr:GetAuthorizationToken" 77 | - "ecr:BatchCheckLayerAvailability" 78 | - "ecr:GetDownloadUrlForLayer" 79 | - "ecr:BatchGetImage" 80 | Resource: "*" 81 | - Sid: "ec2InstanceTagging" 82 | Effect: "Allow" 83 | Action: 84 | - "ec2:CreateTags" 85 | - "ec2:DeleteTags" 86 | Resource: "*" 87 | - Sid: "abortExecution" 88 | Effect: "Allow" 89 | Action: 90 | - "states:StopExecution" 91 | Resource: "*" 92 | - Sid: "getSecrets" 93 | Effect: "Allow" 94 | Action: 95 | - "secretsmanager:GetSecretValue" 96 | Resource: "*" 97 | 98 | Outputs: 99 | EcsTaskRoleArn: 100 | Value: !GetAtt EcsTaskRole.Arn 101 | 102 | EcsTaskPolicyArn: 103 | Value: !Ref EcsTaskPolicy 104 | -------------------------------------------------------------------------------- /lambda/src/common/python/repo_utils.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | import json 3 | 4 | 5 | SYSTEM_FILE_TAG = "bclaw.system=true" 6 | 7 | # @dataclass 8 | # class S3File: 9 | # bucket: str 10 | # key: str 11 | # 12 | # def __repr__(self): 13 | # return f"s3://{self.bucket}/{self.key}" 14 | # 15 | # # def __eq__(self, other): 16 | # # return self.bucket == other.bucket and self.key == other.key 17 | 18 | class S3File(str): 19 | def __new__(cls, bucket: str, key: str): 20 | return str.__new__(cls, f"s3://{bucket}/{key}") 21 | 22 | def __init__(self, bucket: str, key: str): 23 | self.bucket = bucket 24 | self.key = key 25 | 26 | 27 | # @dataclass 28 | # class Repo: 29 | # bucket: str 30 | # prefix: str 31 | # 32 | # @classmethod 33 | # def from_uri(cls, uri: str): 34 | # bucket, key = uri.split("/", 3)[2:] 35 | # return cls(bucket, key) 36 | # 37 | # def qualify(self, uri: str) -> S3File: 38 | # if uri.startswith("s3://"): 39 | # ret = S3File(*uri.split("/", 3)[2:]) 40 | # else: 41 | # wtf = f"{self.prefix}/{uri}" 42 | # ret = S3File(bucket=self.bucket, key=f"{self.prefix}/{uri}") 43 | # return ret 44 | # 45 | # def sub_repo(self, name): 46 | # ret = Repo(self.bucket, f"{self.prefix}/{name}") 47 | # return ret 48 | # 49 | # @property 50 | # def job_data_file(self) -> S3File: 51 | # return self.qualify("_JOB_DATA_") 52 | # 53 | # def __repr__(self): 54 | # return f"s3://{self.bucket}/{self.prefix}" 55 | # 56 | # # def __eq__(self, other): 57 | # # return self.bucket == other.bucket and self.prefix == other.prefix 58 | 59 | class Repo(dict): 60 | def __init__(self, *args, **kwargs): 61 | dict.__init__(self, *args, **kwargs) 62 | self.setdefault("uri", f"s3://{self.bucket}/{self.prefix}") 63 | 64 | @classmethod 65 | def from_uri(cls, uri: str): 66 | bucket, prefix = uri.split("/", 3)[2:] 67 | return cls(bucket=bucket, prefix=prefix) 68 | 69 | @property 70 | def bucket(self) -> str: 71 | return self["bucket"] 72 | 73 | @property 74 | def prefix(self) -> str: 75 | return self["prefix"] 76 | 77 | @property 78 | def uri(self) -> str: 79 | return self["uri"] 80 | 81 | def qualify(self, file_spec: str) -> S3File: 82 | if file_spec.startswith("s3://"): 83 | ret = S3File(*file_spec.split("/", 3)[2:]) 84 | else: 85 | ret = S3File(self.bucket, f"{self.prefix}/{file_spec}") 86 | return ret 87 | 88 | def sub_repo(self, name): 89 | ret = Repo(bucket=self.bucket, prefix=f"{self.prefix}/{name}") 90 | return ret 91 | 92 | def __repr__(self) -> str: 93 | return self.uri 94 | 95 | 96 | # https://stackoverflow.com/questions/51286748/make-the-python-json-encoder-support-pythons-new-dataclasses 97 | # class RepoEncoder(json.JSONEncoder): 98 | # def default(self, o): 99 | # if isinstance(o, (Repo, S3File)): 100 | # return str(o) 101 | # else: 102 | # return super().default(o) 103 | 104 | 105 | # class OtherEncoder(json.JSONEncoder): 106 | # def default(self, o): 107 | # if isinstance(o, Repo): 108 | # return {"bucket": o.bucket, "prefix": o.prefix, "uri": str(o)} 109 | # else: 110 | # return super().default(o) 111 | -------------------------------------------------------------------------------- /doc/workflow_versions.md: -------------------------------------------------------------------------------- 1 | # Versioned Workflows and Blue/Green Deployment 2 | 3 | BayerCLAW2 workflows are deployed using a Blue/Green method, which allows you to publish updated 4 | versions of your workflow without downtime, even when jobs are in progress. Blue/Green deployment 5 | also enables you to roll your workflows back to earlier versions if necessary. 6 | 7 | ## Step Function versions and aliases 8 | 9 | Blue/Green deployment is implemented through the use of Step Functions versions and aliases. 10 | When you compile a workflow of a given name, the resulting Step Function state machine receives 11 | a unique version number. The version number increases monotonically, is immutable, and will never 12 | be reused. Older versions of the state machine are not automatically deleted (although you may 13 | delete them manually), so any jobs running on a previous version will not be interrupted. 14 | 15 | During compilation, the newest version of a state machine also receives the alias `current`. The 16 | `current` alias points to the currently active version of the workflow state machine -- when you put 17 | a job data file into the launcher bucket, a job is triggered on the `current` state machine. 18 | 19 | See the AWS documentation for more information on Step Functions 20 | [versions](https://docs.aws.amazon.com/step-functions/latest/dg/concepts-state-machine-version.html) and 21 | [aliases](https://docs.aws.amazon.com/step-functions/latest/dg/concepts-state-machine-alias.html). 22 | 23 | ## Rolling Back to Earlier Versions 24 | 25 | Faulty workflow deployments can be rolled back to a previous state by reassigning the `current` alias to 26 | the desired state machine version. To do so on the AWS console, navigate to the state machine's page and select 27 | the `Aliases` tab; then select the `current` alias and click `Edit` You can select the desired version in 28 | the dropdown[1](#f1). 29 | 30 | Rollbacks may also be conducted using the AWS CLI: 31 | 32 | ```bash 33 | aws stepfunctions update-state-machine-alias \ 34 | --state-machine-alias-arn arn:aws:states:us-east-1:123456789012:stateMachine:my-workflow:current \ 35 | --routing-configuration stateMachineVersionArn=arn:aws:states:us-east-1:123456789012:stateMachine:my-workflow:2,weight=100 36 | ``` 37 | 38 | It is also possible to submit jobs directly to a previous version of a workflow. To do so, append a colon 39 | and version number to the workflow name in the launcher bucket path, for example: 40 | 41 | 's3://bclaw2-launcher-123456789012/**my-workflow:9**/job_data.json' 42 | 43 | If you assign a custom alias to a certain workflow version, you can submit jobs to that aliased version 44 | in a similar manner: 45 | 46 | 's3://bclaw2-launcher-123456789012/**my-workflow:my-alias**/job_data.json' 47 | 48 | ### *Important!* 49 | 50 | Proper workflow rollbacks depend critically on the use of versioned Docker images. If you rely on Docker's 51 | default `:latest` tag (or even on a mutable generic tag like `:prod`), BayerCLAW could roll back your 52 | workflow's structure, but continue to use buggy Docker images. Consider using a CI/CD system such 53 | as AWS' CodeBuild to build your Docker images upon each new release, and pass the fully-qualified 54 | image tag to BayerCLAW (using `aws cloudformation deploy --parameter-overrides...`) as Parameter values. 55 | 56 |
57 | 58 | 1 Note that you have the option to split incoming jobs between two 59 | state machine versions, assigning a percentage of traffic to each. This is not likely 60 | to be too useful but is still available.[↵](#a1) 61 | -------------------------------------------------------------------------------- /bclaw_runner/tests/test_workspace.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | import subprocess 5 | 6 | import pytest 7 | 8 | from ..src import runner 9 | from ..src.runner.workspace import workspace, write_job_data_file, run_commands, run_commands, UserCommandsFailed 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | 13 | 14 | def test_workspace(monkeypatch, tmp_path): 15 | monkeypatch.setenv("BC_SCRATCH_PATH", str(tmp_path)) 16 | orig_dir = os.getcwd() 17 | 18 | with workspace() as wrk: 19 | assert os.path.isdir(wrk) 20 | assert os.getcwd() == wrk 21 | assert os.path.dirname(wrk) == str(tmp_path) 22 | 23 | assert os.getcwd() == orig_dir 24 | assert not os.path.isdir(wrk) 25 | 26 | 27 | def test_write_job_data_file(tmp_path): 28 | job_data = { 29 | "one": 1, 30 | "two": 2, 31 | "three": 3, 32 | } 33 | 34 | jdf = write_job_data_file(job_data, str(tmp_path)) 35 | 36 | assert os.path.exists(jdf) 37 | assert os.path.dirname(jdf) == str(tmp_path) 38 | 39 | with open(jdf) as fp: 40 | jdf_contents = json.load(fp) 41 | assert jdf_contents == job_data 42 | 43 | 44 | def fake_container(image_tag: str, command: str, work_dir: str, job_data_file) -> int: 45 | response = subprocess.run(command, shell=True) 46 | return response.returncode 47 | 48 | 49 | def test_run_commands(tmp_path, monkeypatch, caplog): 50 | caplog.set_level(logging.INFO) 51 | monkeypatch.setattr(runner.workspace, "run_child_container", fake_container) 52 | f = tmp_path / "test_success.out" 53 | 54 | commands = [ 55 | f"echo 'one' > {str(f)}", 56 | "z='two'", 57 | f"echo $z >> {str(f)}" 58 | ] 59 | 60 | os.chdir(tmp_path) 61 | response = run_commands("fake/image:tag", commands, tmp_path, "fake/job/data/file.json", "sh") 62 | 63 | assert "command block succeeded" in caplog.text 64 | assert f.exists() 65 | with f.open() as fp: 66 | lines = fp.readlines() 67 | assert lines == ["one\n", "two\n"] 68 | 69 | 70 | def test_exit_on_command_fail1(tmp_path, monkeypatch): 71 | monkeypatch.setattr(runner.workspace, "run_child_container", fake_container) 72 | f = tmp_path / "test_exit_on_command_fail.out" 73 | 74 | commands = [ 75 | f"echo 'one' > {str(f)}", 76 | "false", 77 | f"echo $z >> {str(f)}" 78 | ] 79 | 80 | os.chdir(tmp_path) 81 | with pytest.raises(UserCommandsFailed) as ucf: 82 | run_commands("fake/image:tag", commands, tmp_path, "fake/job/data/file.json", "sh") 83 | assert ucf.value.exit_code != 0 84 | 85 | assert f.exists() 86 | with f.open() as fp: 87 | lines = fp.readlines() 88 | assert lines == ["one\n"] 89 | 90 | 91 | def test_exit_on_undef_var1(tmp_path, monkeypatch): 92 | monkeypatch.setattr(runner.workspace, "run_child_container", fake_container) 93 | f = tmp_path / "test_exit_on_undef_var.out" 94 | 95 | commands = [ 96 | f"echo 'one' > {str(f)}", 97 | "echo $UNDEFINED_VAR", 98 | f"echo $z >> {str(f)}" 99 | ] 100 | 101 | os.chdir(tmp_path) 102 | with pytest.raises(UserCommandsFailed) as ucf: 103 | run_commands("fake/image:tag", commands, tmp_path, "fake/job/data/file.json", "sh") 104 | assert ucf.value.exit_code != 0 105 | 106 | assert f.exists() 107 | with f.open() as fp: 108 | lines = fp.readlines() 109 | assert lines == ["one\n"] 110 | -------------------------------------------------------------------------------- /bclaw_runner/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import io 2 | from typing import Optional 3 | 4 | import boto3 5 | from docker.errors import ImageNotFound 6 | import moto 7 | import pytest 8 | 9 | 10 | class MockImage: 11 | def __init__(self, name: str, source: str, auth: Optional[dict] = None): 12 | self.tags = [name] 13 | self.source = source 14 | self.auth = auth 15 | self.attrs = { 16 | "RepoDigests": [f"{name}@sha256:1234567890abcdef"], 17 | } 18 | 19 | 20 | class MockImages: 21 | @staticmethod 22 | def get(img_uri: str) -> MockImage: 23 | if img_uri == "local/image": 24 | return MockImage(img_uri, "local repo") 25 | else: 26 | raise ImageNotFound("not found message") 27 | 28 | @staticmethod 29 | def pull(img_uri: str, auth_config: dict) -> MockImage: 30 | if "dkr.ecr" in img_uri: 31 | return MockImage(img_uri, "ecr", auth_config) 32 | else: 33 | if auth_config: 34 | return MockImage(img_uri, "private repo", auth_config) 35 | else: 36 | return MockImage(img_uri, "public repo") 37 | 38 | 39 | class MockContainer: 40 | def __init__(self, exit_code): 41 | self.args = None 42 | self.kwargs = None 43 | self.exit_code = exit_code 44 | self.removed = False 45 | self.status = "created" 46 | 47 | def logs(self, *args, **kwargs) -> io.BytesIO: 48 | ret = io.BytesIO(b"line 1\nline 2\nline 3") 49 | return ret 50 | 51 | def stop(self, *args, **kwargs) -> None: 52 | self.exit_code = 99 53 | 54 | def wait(self, *args, **kwargs) -> dict: 55 | ret = {"StatusCode": self.exit_code} 56 | return ret 57 | 58 | def remove(self, *args, **kwargs) -> None: 59 | self.removed = True 60 | 61 | def reload(self): 62 | self.status = "running" 63 | 64 | 65 | class FailingContainer(MockContainer): 66 | def __init__(self, exit_code: int): 67 | super().__init__(exit_code) 68 | 69 | def logs(self, *args, **kwargs) -> io.BytesIO: 70 | raise RuntimeError("hey") 71 | 72 | 73 | @pytest.fixture(scope="function") 74 | def mock_container_factory(): 75 | def _ret(exit_code: int, logging_crash: bool): 76 | if logging_crash: 77 | return FailingContainer(exit_code) 78 | else: 79 | return MockContainer(exit_code) 80 | return _ret 81 | 82 | 83 | class MockContainers: 84 | def __init__(self, ret: MockContainer): 85 | self.ret = ret 86 | 87 | def run(self, *args, **kwargs) -> MockContainer: 88 | self.ret.args = args 89 | self.ret.kwargs = kwargs 90 | return self.ret 91 | 92 | 93 | class MockDockerClient(): 94 | def __init__(self, container: Optional[MockContainer] = None): 95 | self.images = MockImages() 96 | self.containers = MockContainers(container) 97 | 98 | def close(self): 99 | pass 100 | 101 | 102 | @pytest.fixture(scope="function") 103 | def mock_docker_client_factory(): 104 | def _ret(container: Optional[MockContainer] = None): 105 | return MockDockerClient(container) 106 | return _ret 107 | 108 | 109 | @pytest.fixture(scope="module") 110 | def mock_ec2_instance(): 111 | with moto.mock_aws(): 112 | ec2 = boto3.resource("ec2", region_name="us-east-1") 113 | instances = ec2.create_instances(ImageId="ami-12345", MinCount=1, MaxCount=1) 114 | yield instances[0] 115 | -------------------------------------------------------------------------------- /lambda/tests/gather/test_gather.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | import json 3 | import logging 4 | 5 | import boto3 6 | import moto 7 | import pytest 8 | 9 | from ...src.gather.gather import lambda_handler 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | 13 | TEST_BUCKET = "test-bucket" 14 | JOB_DATA = {"job": {"job": "data"}, "parent": {}, "scatter": {}} 15 | 16 | 17 | @pytest.fixture(scope="module") 18 | def repo_bucket(): 19 | with moto.mock_aws(): 20 | yld = boto3.resource("s3", region_name="us-east-1").Bucket(TEST_BUCKET) 21 | yld.create() 22 | yld.put_object(Key="repo/path/_JOB_DATA_", Body=json.dumps(JOB_DATA).encode("utf-8")) 23 | 24 | yld.put_object(Key="repo/path/test-step/00000/output1", Body=b"00000.output1") 25 | yld.put_object(Key="repo/path/test-step/00000/output2", Body=b"00000.output2") 26 | yld.put_object(Key="repo/path/test-step/00000/zoutput2", Body=b"00000.zoutput2") 27 | yld.put_object(Key="repo/path/test-step/00000/unoutput", Body=b"00000.unoutput") 28 | 29 | yld.put_object(Key="repo/path/test-step/00001/output1", Body=b"00001.output1") 30 | # no output2 in subdir 00001 31 | yld.put_object(Key="repo/path/test-step/00001/unoutput", Body=b"00001.unoutput") 32 | 33 | yld.put_object(Key="repo/path/test-step/00002/output1", Body=b"00002.output1") 34 | yld.put_object(Key="repo/path/test-step/00002/output2", Body=b"00002.output2") 35 | yld.put_object(Key="repo/path/test-step/00002/unoutput", Body=b"00002.unoutput") 36 | 37 | yield yld 38 | 39 | 40 | def test_lambda_handler(caplog, repo_bucket): 41 | event = { 42 | "repo": f"s3://{repo_bucket.name}/repo/path", 43 | "outputs": json.dumps({"out1": "output1", "out2": "output2", "out3": "output3"}), 44 | "step_name": "test-step", 45 | "items": [ 46 | {"repo": f"s3://{repo_bucket.name}/repo/path/test-step/00000"}, 47 | {"repo": f"s3://{repo_bucket.name}/repo/path/test-step/00001"}, 48 | {"repo": f"s3://{repo_bucket.name}/repo/path/test-step/00002"}, 49 | ], 50 | "logging": { 51 | "step_name": "test-step", 52 | }, 53 | } 54 | 55 | expect = {"manifest": "test-step_manifest.json"} 56 | result = lambda_handler(event, {}) 57 | assert result == expect 58 | 59 | manifest_key = f"repo/path/{result['manifest']}" 60 | manifest_s3 = repo_bucket.Object(manifest_key) 61 | response = manifest_s3.get() 62 | with closing(response["Body"]) as fp: 63 | manifest = json.load(fp) 64 | 65 | expect = { 66 | "out1": [ 67 | f"s3://{repo_bucket.name}/repo/path/test-step/00000/output1", 68 | f"s3://{repo_bucket.name}/repo/path/test-step/00001/output1", 69 | f"s3://{repo_bucket.name}/repo/path/test-step/00002/output1", 70 | ], 71 | "out2": [ 72 | f"s3://{repo_bucket.name}/repo/path/test-step/00000/output2", 73 | f"s3://{repo_bucket.name}/repo/path/test-step/00002/output2", 74 | ], 75 | "out3": [], 76 | } 77 | assert manifest == expect 78 | 79 | assert "no files named output3 found" in caplog.text 80 | 81 | 82 | def test_lambda_handler_no_manifest(caplog, repo_bucket): 83 | event = { 84 | "repo": f"s3://{repo_bucket.name}/repo/path", 85 | "outputs": "{}", 86 | "results": ["fake", "results"], 87 | "logging": { 88 | "step_name": "test-step", 89 | }, 90 | } 91 | 92 | result = lambda_handler(event, {}) 93 | assert result == {} 94 | -------------------------------------------------------------------------------- /doc/quick-start.md: -------------------------------------------------------------------------------- 1 | # Quick start - creating and running a pipeline 2 | 3 | ## 1. Containerize your tools 4 | 5 | Because BayerCLAW runs jobs on AWS Batch, all the software for your pipeline must be built into Docker containers. 6 | These can be stored in any Docker repository, but the default is the AWS Elastic Container Registry (ECR) in your AWS 7 | account. If you just specify a simple image name, like `ubuntu`, BayerCLAW will assume it is in ECR. 8 | To reference an image in the public DockerHub repo, you should specify `docker.io/library/ubuntu` (or whatever). 9 | 10 | ## 2. Choose an S3 repository location 11 | 12 | In addition to the **Docker** repository for your images, BayerCLAW uses an S3 location as a **file** repository. 13 | This bucket is not created for you by BayerCLAW, because it is intended to be the long-term home of your data. 14 | You should create this bucket yourself, with appropriate life-cycle policies and other settings, or use an existing bucket. 15 | 16 | You must NOT use a BayerCLAW launcher bucket as a repository; that one is ONLY for triggering new workflow executions. 17 | 18 | ## 3. Create a workflow template 19 | 20 | Use the [BayerCLAW Language References](language.md) to help you author your workflow. 21 | 22 | ## 4. Deploy the workflow 23 | 24 | Deploying a workflow creates a StepFunctions state machine and associated resources. 25 | Deployment happens through AWS CloudFormation, and can be done through the console or the command line. 26 | In this example, the workflow is named `bclaw-demo`: 27 | 28 | ``` 29 | # Please edit this name before using 30 | export MYSTACK=bclaw-demo 31 | 32 | aws cloudformation deploy --template-file bclaw-demo.yaml --stack-name $MYSTACK --capabilities CAPABILITY_IAM 33 | ``` 34 | 35 | If deployment fails, check the logs for the `bclawCompilerLambda` function in the AWS web console. 36 | You can modify the workflow template and re-run the `deploy` command to update the workflow definition. 37 | If for some reason you need to remove the workflow entirely, try: 38 | 39 | ``` 40 | aws cloudformation delete-stack --stack-name $MYSTACK 41 | aws cloudformation wait stack-delete-complete --stack-name $MYSTACK 42 | ``` 43 | 44 | ## 5. Launch a job 45 | 46 | Assuming BayerCLAW was installed under default parameters, you should find an S3 bucket named something 47 | like `bclaw-main-launcher-` in your account. BayerCLAW watches this bucket for new input files. 48 | 49 | To launch an BayerCLAW job, just copy a job file into the launcher bucket. The file must be placed into a 50 | folder with the same name as the workflow you want to run, e.g.: 51 | 52 | ``` 53 | aws s3 cp job.json s3://bclaw-main-launcher-123456789012/bclaw-demo/job.json 54 | ``` 55 | 56 | If you overwrite a file, *even with the same data*, it will trigger the workflow to run again. 57 | Best practice would be to give each job file a unique name -- preferably something based on the file's 58 | contents -- rather than `job.json` (see the [About job naming](#about-job-naming) section below). 59 | 60 | To monitor the job in the AWS web console, check the pages for Batch and StepFunctions. 61 | If a task fails, you will be able to see it in either place, and there will be links to CloudWatch Logs. 62 | 63 | ### About job naming 64 | BayerCLAW's StepFunction job names are derived from the name of the job data file. Some characters 65 | may be transformed or removed due to naming constraints. Therefore, if you submit a file named 66 | `my.input.json`, you my trigger a StepFunctions job named something like `my-input_Q7Pz7WYb`. 67 | The trailing `Q7Pz7WYb` is the first 8 characters of the file's version ID, assigned by S3. 68 | StepFunction execution names are limited to 80 characters, so BayerCLAW may truncate very 69 | long file names. -------------------------------------------------------------------------------- /util/bclaw_logs/template.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: "2010-09-09" 2 | Transform: AWS::Serverless-2016-10-31 3 | 4 | Resources: 5 | # dynamo db 6 | JobStatusTable: 7 | Type: AWS::DynamoDB::Table 8 | Properties: 9 | TableName: "bclawLogsTable" 10 | AttributeDefinitions: 11 | - 12 | AttributeName: "workflowName" 13 | AttributeType: "S" 14 | - 15 | AttributeName: "executionId" 16 | AttributeType: "S" 17 | - 18 | AttributeName: "timestamp" 19 | AttributeType: "N" 20 | - 21 | AttributeName: "jobFile" 22 | AttributeType: "S" 23 | KeySchema: 24 | - 25 | AttributeName: "workflowName" 26 | KeyType: "HASH" 27 | - 28 | AttributeName: "executionId" 29 | KeyType: "RANGE" 30 | LocalSecondaryIndexes: 31 | - 32 | IndexName: executionsByTimestamp 33 | KeySchema: 34 | - 35 | AttributeName: "workflowName" 36 | KeyType: "HASH" 37 | - 38 | AttributeName: "timestamp" 39 | KeyType: "RANGE" 40 | Projection: 41 | ProjectionType: INCLUDE 42 | NonKeyAttributes: 43 | - "executionId" 44 | - "jobFile" 45 | - "status" 46 | - 47 | IndexName: executionsByJobFile 48 | KeySchema: 49 | - 50 | AttributeName: "workflowName" 51 | KeyType: "HASH" 52 | - 53 | AttributeName: "jobFile" 54 | KeyType: "RANGE" 55 | Projection: 56 | ProjectionType: INCLUDE 57 | NonKeyAttributes: 58 | - "executionId" 59 | - "timestamp" 60 | - "status" 61 | TimeToLiveSpecification: 62 | AttributeName: "expiration" 63 | Enabled: true 64 | BillingMode: PAY_PER_REQUEST 65 | 66 | # job status lambda 67 | JobStatusLambda: 68 | Type: AWS::Serverless::Function 69 | Properties: 70 | Handler: job_status.lambda_handler 71 | Runtime: python3.10 72 | CodeUri: lambda/src 73 | Environment: 74 | Variables: 75 | JOB_STATUS_TABLE: !Ref JobStatusTable 76 | EXPIRATION_DAYS: "90" 77 | MemorySize: 128 78 | Timeout: 60 79 | Policies: 80 | - 81 | Version: "2012-10-17" 82 | Statement: 83 | - 84 | Effect: Allow 85 | Action: 86 | - "dynamodb:DescribeTable" 87 | - "dynamodb:PutItem" 88 | - "dynamodb:UpdateItem" 89 | Resource: !GetAtt JobStatusTable.Arn 90 | DeploymentPreference: 91 | Enabled: False 92 | 93 | JobStatusLambdaLogGroup: 94 | Type: AWS::Logs::LogGroup 95 | Properties: 96 | LogGroupName: !Sub "/aws/lambda/${JobStatusLambda}" 97 | RetentionInDays: 30 98 | 99 | # event bridge rule 100 | JobStatusSFNRule: 101 | Type: AWS::Events::Rule 102 | Properties: 103 | EventPattern: 104 | source: 105 | - aws.states 106 | detail-type: 107 | - Step Functions Execution Status Change 108 | State: ENABLED 109 | Targets: 110 | - 111 | Id: job-status-lambda 112 | Arn: !GetAtt JobStatusLambda.Arn 113 | 114 | # connector: event bridge -> lambda 115 | JobStatusSFNtoLambda: 116 | Type: AWS::Serverless::Connector 117 | Properties: 118 | Source: 119 | Id: JobStatusSFNRule 120 | Destination: 121 | Id: JobStatusLambda 122 | Permissions: 123 | - Write 124 | -------------------------------------------------------------------------------- /lambda/src/notifications/notifications.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | 5 | import boto3 6 | import yaml 7 | 8 | logger = logging.getLogger() 9 | logger.setLevel(logging.INFO) 10 | 11 | 12 | def make_state_change_message(attributes: dict) -> str: 13 | status = attributes["status"]["StringValue"] 14 | workflow_name = attributes["workflow_name"]["StringValue"] 15 | execution_id = attributes["execution_id"]["StringValue"] 16 | job_file_bucket = attributes["job_file_bucket"]["StringValue"] 17 | job_file_key = attributes["job_file_key"]["StringValue"] 18 | job_file_version = attributes["job_file_version"]["StringValue"] 19 | 20 | details = { 21 | "details": { 22 | "workflow_name": workflow_name, 23 | "execution_id": execution_id, 24 | "job_status": status, 25 | "job_data": f"s3://{job_file_bucket}/{job_file_key}", 26 | "job_data_version": job_file_version, 27 | } 28 | } 29 | 30 | if status == "RUNNING": 31 | action = "has started" 32 | 33 | elif status == "SUCCEEDED": 34 | action = "has finished" 35 | 36 | elif status == "FAILED": 37 | action = "has failed" 38 | 39 | elif status == "ABORTED": 40 | action = "has been aborted" 41 | 42 | elif status == "TIMED_OUT": 43 | action = "has timed out" 44 | 45 | else: 46 | raise RuntimeError(f"status {status} not recognized") 47 | 48 | job_file_name = job_file_key.rsplit("/", 1)[-1] 49 | 50 | text = f"Job {execution_id} ('{job_file_name}') on workflow {workflow_name} {action}." 51 | message = yaml.safe_dump_all([text, details]) 52 | 53 | return message 54 | 55 | 56 | def make_message_attributes(event: dict) -> dict: 57 | input_obj = json.loads(event["detail"]["input"]) 58 | 59 | ret = { 60 | "status": { 61 | "DataType": "String", 62 | "StringValue": event["detail"]["status"], 63 | }, 64 | "workflow_name": { 65 | "DataType": "String", 66 | "StringValue": event["detail"]["stateMachineArn"].rsplit(":", 1)[-1], 67 | }, 68 | "execution_id": { 69 | "DataType": "String", 70 | "StringValue": event["detail"]["name"], 71 | }, 72 | "job_file_bucket": { 73 | "DataType": "String", 74 | "StringValue": input_obj["job_file"]["bucket"], 75 | }, 76 | "job_file_key": { 77 | "DataType": "String", 78 | "StringValue": input_obj["job_file"]["key"], 79 | }, 80 | "job_file_version": { 81 | "DataType": "String", 82 | "StringValue": input_obj["job_file"]["version"], 83 | }, 84 | } 85 | 86 | return ret 87 | 88 | 89 | def make_sns_payload(message: str, attributes: dict) -> dict: 90 | status = attributes["status"]["StringValue"] 91 | workflow_name = attributes["workflow_name"]["StringValue"] 92 | 93 | ret = { 94 | "TopicArn": os.environ["TOPIC_ARN"], 95 | "Message": message, 96 | "Subject": f"{workflow_name}: job {status.lower()}", 97 | "MessageAttributes": attributes, 98 | } 99 | return ret 100 | 101 | 102 | def lambda_handler(event: dict, context: object) -> dict: 103 | print(f"{event=}") 104 | 105 | try: 106 | attributes = make_message_attributes(event) 107 | message = make_state_change_message(attributes) 108 | payload = make_sns_payload(message, attributes) 109 | 110 | client = boto3.client("sns") 111 | response = client.publish(**payload) 112 | 113 | return response 114 | 115 | except KeyError: 116 | logger.warning("unable to parse BayerCLAW information from event") 117 | -------------------------------------------------------------------------------- /lambda/src/router/job_router.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import re 5 | 6 | import boto3 7 | 8 | from lambda_logs import log_preamble, log_event 9 | 10 | logger = logging.getLogger() 11 | logger.setLevel(logging.INFO) 12 | 13 | 14 | def get_state_machine_name(s3_key: str) -> (str, str, str): 15 | # (? str: 27 | # assume the filename extension is something uninformative like ".json" 28 | ret = os.path.splitext(key)[0] 29 | return ret 30 | 31 | 32 | def normalize(string: str) -> str: 33 | ret0 = re.sub(r"[^A-Za-z0-9]+", "-", string) 34 | ret = ret0.strip("-") 35 | return ret 36 | 37 | 38 | def make_execution_name(s3_path: str, version: str) -> str: 39 | # assumes the top level directory (= workflow name) has been stripped from s3_path 40 | norm_key = normalize(shorten_filename(s3_path)) 41 | norm_version = normalize(version) or "NULL" 42 | ret = f"{norm_key:.71}_{norm_version:.8}" 43 | return ret 44 | 45 | 46 | def get_state_machine_arn(state_machine_name: str, state_machine_version: str) -> str: 47 | region = os.environ["REGION"] 48 | acct_num = os.environ["ACCT_NUM"] 49 | ret = f"arn:aws:states:{region}:{acct_num}:stateMachine:{state_machine_name}:{state_machine_version}" 50 | return ret 51 | 52 | 53 | def lambda_handler(event: dict, context: object) -> None: 54 | # event = { 55 | # branch: str 56 | # job_file_bucket: str, 57 | # job_file_key: str, 58 | # job_file_version: str # empty string if launcher bucket versioning is suspended 59 | # } 60 | 61 | log_preamble(**event, logger=logger) 62 | log_event(logger, event) 63 | 64 | assert "_DIE_DIE_DIE_" not in event["job_file_key"] 65 | 66 | sfn = boto3.client("stepfunctions") 67 | 68 | try: 69 | # throws AttributeError if regex wasn't matched 70 | state_machine_name, state_machine_version, remainder = get_state_machine_name(event["job_file_key"]) 71 | 72 | exec_name = make_execution_name(remainder, event["job_file_version"]) 73 | logger.info(f"{exec_name=}") 74 | 75 | input_obj = { 76 | "job_file": { 77 | "bucket": event["job_file_bucket"], 78 | "key": event["job_file_key"], 79 | "version": event["job_file_version"], 80 | }, 81 | "index": event["branch"], 82 | } 83 | 84 | state_machine_arn = get_state_machine_arn(state_machine_name, state_machine_version) 85 | 86 | if "dry_run" not in event: 87 | response = sfn.start_execution( 88 | stateMachineArn=state_machine_arn, 89 | name=exec_name, 90 | input=json.dumps(input_obj) 91 | ) 92 | logger.info(f"{response=}") 93 | 94 | except AttributeError: 95 | logger.info("no workflow name found") 96 | 97 | except sfn.exceptions.ExecutionAlreadyExists: 98 | # duplicated s3 events are way more likely than bona fide name collisions 99 | logger.info(f"duplicate event: {exec_name}") 100 | 101 | # throws AccessDeniedException if state machine is not a bclaw workflow from this installation 102 | # throws StateMachineDoesNotExist if alias "current" does not exist on state machine -------------------------------------------------------------------------------- /lambda/tests/compiler/test_state_machine_resources.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ...src.compiler.pkg.state_machine_resources import (make_initializer_step, make_step_list, 4 | state_machine_version_rc, 5 | state_machine_alias_rc, 6 | STATE_MACHINE_VERSION_NAME, 7 | STATE_MACHINE_ALIAS_NAME) 8 | from ...src.compiler.pkg.util import Step, Resource, lambda_logging_block, lambda_retry 9 | 10 | 11 | def test_make_initializer_step(compiler_env): 12 | repository = "s3://bucket/repo/path/${template}" 13 | 14 | result = make_initializer_step(repository) 15 | expect = { 16 | "Initialize": { 17 | "Type": "Task", 18 | "Resource": "initializer_lambda_arn", 19 | "Parameters": { 20 | "workflow_name": "${WorkflowName}", 21 | "repo_template": repository, 22 | "input_obj.$": "$", 23 | **lambda_logging_block("Initialize"), 24 | }, 25 | **lambda_retry(), 26 | "ResultPath": "$", 27 | "OutputPath": "$", 28 | "_stet": True, 29 | }, 30 | } 31 | 32 | assert result == expect 33 | 34 | 35 | def test_make_step_list(): 36 | steps = [ 37 | {"step1": {"data": "1"}}, 38 | {"step2": {"data": "2"}}, 39 | {"step3": {"data": "3", "next": "step5"}}, 40 | {"step4": {"data": "4", "end": True}}, 41 | {"step5": {"data": "5", "Next": "step7"}}, 42 | {"step6": {"data": "6", "End": True}}, 43 | {"step7": {"data": "7"}}, 44 | ] 45 | expected_nexts = [ 46 | "step2", 47 | "step3", 48 | "step5", 49 | "", 50 | "step7", 51 | "", 52 | "", 53 | ] 54 | 55 | results = make_step_list(steps) 56 | 57 | for orig, result, exp_next in zip(steps, results, expected_nexts): 58 | assert isinstance(result, Step) 59 | k, v = next(iter(orig.items())) 60 | assert result.name == k 61 | assert result.spec == v 62 | assert result.next == exp_next 63 | 64 | 65 | def test_state_machine_version_rc(): 66 | state_machine = Resource("stateMachineLogicalName", {}) 67 | result = state_machine_version_rc(state_machine) 68 | expect = Resource(STATE_MACHINE_VERSION_NAME, 69 | { 70 | "Type": "AWS::StepFunctions::StateMachineVersion", 71 | "UpdateReplacePolicy": "Retain", 72 | "Properties": { 73 | "Description": "No description", 74 | "StateMachineArn": {"Ref": "stateMachineLogicalName"}, 75 | "StateMachineRevisionId": {"Fn::GetAtt": ["stateMachineLogicalName", "StateMachineRevisionId"]}, 76 | }, 77 | }) 78 | assert result == expect 79 | 80 | 81 | def test_state_machine_alias_rc(): 82 | state_machine_version = Resource(STATE_MACHINE_VERSION_NAME, {}) 83 | result = state_machine_alias_rc(state_machine_version) 84 | expect = Resource(STATE_MACHINE_ALIAS_NAME, 85 | { 86 | "Type": "AWS::StepFunctions::StateMachineAlias", 87 | "Properties": { 88 | "Name": "current", 89 | "Description": "Current active version", 90 | "DeploymentPreference": { 91 | "StateMachineVersionArn": {"Ref": STATE_MACHINE_VERSION_NAME}, 92 | "Type": "ALL_AT_ONCE", 93 | }, 94 | }, 95 | }) 96 | assert result == expect 97 | -------------------------------------------------------------------------------- /lambda/src/compiler/pkg/util.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | import json 3 | import os 4 | import re 5 | from typing import Any, NamedTuple 6 | 7 | import boto3 8 | import jmespath 9 | 10 | 11 | class Step(NamedTuple): 12 | name: str 13 | spec: dict 14 | next: str 15 | 16 | @property 17 | def is_terminal(self) -> bool: 18 | return self.next == "" 19 | 20 | @property 21 | def next_or_end(self) -> dict: 22 | if self.is_terminal: 23 | return {"End": True} 24 | else: 25 | return {"Next": self.next} 26 | 27 | @property 28 | def input_field(self) -> dict: 29 | if self.spec["inputs"] is None: 30 | ret = {"inputs.$": "States.JsonToString($.prev_outputs)"} 31 | else: 32 | ret = {"inputs": json.dumps(self.spec["inputs"], separators=(",", ":"))} 33 | return ret 34 | 35 | 36 | class Resource(NamedTuple): 37 | name: str 38 | spec: dict 39 | 40 | 41 | class State(NamedTuple): 42 | name: str 43 | spec: dict 44 | 45 | 46 | def make_logical_name(s: str) -> str: 47 | words = (w.capitalize() for w in re.split(r"[\W_]+", s)) 48 | ret = "".join(words) 49 | return ret 50 | 51 | 52 | # given "${something}": 53 | # match.group(0) == "${something}" 54 | # match.group(1) == "something" 55 | PARAM_FINDER = re.compile(r"\${([A-Za-z0-9]+)}") 56 | 57 | def substitute_params(params: dict, target: Any): 58 | if isinstance(target, str): 59 | ret = PARAM_FINDER.sub(lambda m: str(params.get(m.group(1), m.group(0))), target) 60 | elif isinstance(target, list): 61 | ret = [substitute_params(params, v) for v in target] 62 | elif isinstance(target, dict): 63 | ret = {k: substitute_params(params, v) for k, v in target.items()} 64 | else: 65 | ret = target 66 | return ret 67 | 68 | 69 | def lambda_logging_block(step_name: str) -> dict: 70 | ret = { 71 | "logging": { 72 | "branch.$": "$.index", 73 | "job_file_bucket.$": "$.job_file.bucket", 74 | "job_file_key.$": "$.job_file.key", 75 | "job_file_version.$": "$.job_file.version", 76 | "sfn_execution_id.$": "$$.Execution.Name", 77 | "step_name": step_name, 78 | "workflow_name": "${WorkflowName}", 79 | }, 80 | } 81 | return ret 82 | 83 | 84 | def time_string_to_seconds(time: str) -> int: 85 | units = {"s": "seconds", "m": "minutes", "h": "hours", "d": "days", "w": "weeks"} 86 | count = int(time[:-1]) 87 | unit = units[time[-1]] 88 | td = timedelta(**{unit: count}) 89 | ret = td.seconds + 60 * 60 * 24 * td.days 90 | return ret 91 | 92 | 93 | def lambda_retry( 94 | max_attempts: int = 5, 95 | interval_seconds: int = 2, 96 | backoff_rate: float = 2.0, 97 | jitter_strategy: str = "FULL") -> dict: 98 | # https://docs.aws.amazon.com/step-functions/latest/dg/bp-lambda-serviceexception.html 99 | ret = { 100 | "Retry": [ 101 | { 102 | "ErrorEquals": [ 103 | "Lambda.ClientExecutionTimeoutException", 104 | "Lambda.ServiceException", 105 | "Lambda.AWSLambdaException", 106 | "Lambda.SdkClientException", 107 | "Lambda.TooManyRequestsException", 108 | ], 109 | "MaxAttempts": max_attempts, 110 | "IntervalSeconds": interval_seconds, 111 | "BackoffRate": backoff_rate, 112 | "JitterStrategy": jitter_strategy, 113 | }, 114 | ] 115 | } 116 | return ret 117 | 118 | 119 | def merge_params_and_options(params: dict, options: dict) -> dict: 120 | ret = params | options 121 | if ret["task_role"] is None: 122 | ret["task_role"] = params["task_role"] 123 | return ret 124 | -------------------------------------------------------------------------------- /lambda/tests/common/test_substitutions.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | 5 | from ...src.common.python.substitutions import _lookup, substitute_job_data, substitute_into_filenames 6 | 7 | 8 | @pytest.mark.parametrize("target, expect", [ 9 | ("string", "string_value"), 10 | ("number", "99"), 11 | ("list", [1,2,3]), 12 | ("dict", {"a": 1,"b": 2}), 13 | ("boolean_T", "True"), 14 | ("boolean_F", "False"), 15 | ("empty_string", ""), 16 | ("zero", "0"), 17 | ]) 18 | def test_lookup(target, expect): 19 | data = { 20 | "string": "string_value", 21 | "number": 99, 22 | "list": [1, 2, 3], 23 | "dict": {"a": 1, "b": 2}, 24 | "boolean_T": True, 25 | "boolean_F": False, 26 | "empty_string": "", 27 | "zero": 0, 28 | } 29 | result = _lookup(target, data) 30 | assert isinstance(result, str) 31 | 32 | if isinstance(expect, str): 33 | assert result == expect 34 | else: 35 | result2 = json.loads(json.loads(result)) 36 | assert result2 == expect 37 | 38 | 39 | def test_lookup_fail(): 40 | data = {"a": 1, "b": 2} 41 | target = "z" 42 | with pytest.raises(RuntimeError, match="not found in job data"): 43 | _lookup(target, data) 44 | 45 | 46 | def test_substitute_job_data(): 47 | subject = { 48 | "a_string": "a ${job.value} b ${scatter.value} c ${parent.value}", 49 | "a_list": [ 50 | "e ${job.value}", 51 | "f ${scatter.value}", 52 | "g ${parent.value}", 53 | ], 54 | "a_dict": { 55 | "eh": "h ${job.value}", 56 | "bee": "i ${scatter.value}", 57 | "sea": "j ${parent.value}", 58 | }, 59 | } 60 | 61 | job_data = { 62 | "job": { 63 | "value": "one" 64 | }, 65 | "scatter": { 66 | "value": 2 67 | }, 68 | "parent": { 69 | "value": ["three"] 70 | }, 71 | } 72 | 73 | expect = { 74 | "a_string": 'a one b 2 c "[\\"three\\"]"', 75 | "a_list": [ 76 | "e one", 77 | "f 2", 78 | 'g "[\\"three\\"]"', 79 | ], 80 | "a_dict": { 81 | "eh": "h one", 82 | "bee": "i 2", 83 | "sea": 'j "[\\"three\\"]"', 84 | }, 85 | } 86 | 87 | result = substitute_job_data(subject, job_data) 88 | assert result == expect 89 | 90 | 91 | def test_substitute_into_filenames(): 92 | subject = { 93 | "file": "s3:/${bucket}/${path}/${name}.${ext}", 94 | "files": [ 95 | "s3:/${bucket}/${path}/${name}1.${ext}", 96 | "s3:/${bucket}/${path}/${name}2.${ext}", 97 | "s3:/${bucket}/${path}/${name}3.${ext}", 98 | ], 99 | "fileses": { 100 | "file_a": "s3:/${bucket}/${path}/${name}_a.${ext}", 101 | "file_b": "s3:/${bucket}/${path}/${name}_b.${ext}", 102 | "file_c": "s3:/${bucket}/${path}/${name}_c.${ext}", 103 | } 104 | } 105 | subs = { 106 | "bucket": "bucket_name", 107 | "path": "path/to/whatever", 108 | "name": "file_name", 109 | } 110 | expect = { 111 | "file": "s3:/bucket_name/path/to/whatever/file_name.${ext}", 112 | "files": [ 113 | "s3:/bucket_name/path/to/whatever/file_name1.${ext}", 114 | "s3:/bucket_name/path/to/whatever/file_name2.${ext}", 115 | "s3:/bucket_name/path/to/whatever/file_name3.${ext}", 116 | ], 117 | "fileses": { 118 | "file_a": "s3:/bucket_name/path/to/whatever/file_name_a.${ext}", 119 | "file_b": "s3:/bucket_name/path/to/whatever/file_name_b.${ext}", 120 | "file_c": "s3:/bucket_name/path/to/whatever/file_name_c.${ext}", 121 | } 122 | } 123 | result = substitute_into_filenames(subject, subs) 124 | assert result == expect 125 | -------------------------------------------------------------------------------- /lambda/tests/compiler/test_util.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pytest 3 | 4 | from ...src.compiler.pkg.util import Step, make_logical_name, substitute_params, time_string_to_seconds, \ 5 | merge_params_and_options 6 | 7 | 8 | @pytest.mark.parametrize("next_step, expect", [ 9 | ("", True), 10 | ("not_terminal", False) 11 | ]) 12 | def test_step_is_terminal(next_step, expect): 13 | step = Step("name", {}, next_step) 14 | result = step.is_terminal 15 | assert result == expect 16 | 17 | 18 | @pytest.mark.parametrize("step, expect", [ 19 | (Step("name1", {"Other": "stuff"}, "next_step"), {"Next": "next_step"}), 20 | (Step("name2", {"Other": "stuff"}, ""), {"End": True}), 21 | ]) 22 | def test_step_next_or_end(step, expect): 23 | result = step.next_or_end 24 | assert result == expect 25 | 26 | 27 | @pytest.mark.parametrize("step, expect", [ 28 | (Step("name1", {"Other": "stuff", "inputs": {"file1": "one", "file2": "two"}}, ""), {"inputs": json.dumps({"file1": "one", "file2": "two"}, separators=(",", ":"))}), 29 | (Step("name2", {"Other": "stuff", "inputs": {}}, ""), {"inputs": json.dumps({})}), 30 | (Step("name3", {"Other": "stuff", "inputs": None}, ""), {"inputs.$": "States.JsonToString($.prev_outputs)"}) 31 | ]) 32 | def test_step_input_field(step, expect): 33 | result = step.input_field 34 | assert result == expect 35 | 36 | 37 | def test_make_logical_name(): 38 | orig_name = "a-name with++LOTS___of%wEiRd,,\n,,characters/that~will&NEVER(work)as\ta##LOGICAL!name12345" 39 | result = make_logical_name(orig_name) 40 | expect = "ANameWithLotsOfWeirdCharactersThatWillNeverWorkAsALogicalName12345" 41 | assert result == expect 42 | 43 | 44 | def test_substitute_params(): 45 | target = { 46 | "one": "${value1} ${value2} ${value3} ${skip_me} ${value1} again", 47 | "two": [ 48 | "eh ${value1}", 49 | "bee ${value2}", 50 | "sea ${value3}", 51 | "dee ${skip_me}" 52 | ], 53 | "three": { 54 | "k1": "double-u ${value1}", 55 | "k2": "ecks ${value2}", 56 | "k3": "why ${value3}", 57 | "k4": "zee ${skip_me}" 58 | }, 59 | "four": 99, 60 | } 61 | params = { 62 | "value1": "string", 63 | "value2": "${reference}", 64 | "value3": 42, 65 | "value4": "not used", 66 | } 67 | expect = { 68 | "one": "string ${reference} 42 ${skip_me} string again", 69 | "two": [ 70 | "eh string", 71 | "bee ${reference}", 72 | "sea 42", 73 | "dee ${skip_me}" 74 | ], 75 | "three": { 76 | "k1": "double-u string", 77 | "k2": "ecks ${reference}", 78 | "k3": "why 42", 79 | "k4": "zee ${skip_me}" 80 | }, 81 | "four": 99, 82 | } 83 | result = substitute_params(params, target) 84 | assert result == expect 85 | 86 | 87 | def test_substitute_params_empty_params(): 88 | params = {} 89 | target = "${one} ${two} ${three}" 90 | result = substitute_params(params, target) 91 | assert result == target 92 | 93 | 94 | @pytest.mark.parametrize("timestring, seconds", [ 95 | ("70s", 70), 96 | ("20 m", 1200), 97 | ("3h", 3600*3), 98 | ("2 d", 86400*2), 99 | ("1w", 86400*7) 100 | ]) 101 | def test_time_string_to_seconds(timestring, seconds): 102 | result = time_string_to_seconds(timestring) 103 | assert result == seconds 104 | 105 | 106 | @pytest.mark.parametrize("p_role, o_role, x_role", [ 107 | (None, None, None), 108 | (None, "opt_role", "opt_role"), 109 | ("parm_role", None, "parm_role"), 110 | ("parm_role", "opt_role", "opt_role"), 111 | ]) 112 | def test_merge_params_and_options(p_role, o_role, x_role): 113 | params = {"a": 1, "b": 2, "c": 3, "task_role": p_role} 114 | options = {"z": 9, "y": 8, "task_role": o_role} 115 | expect = {"a": 1, "b": 2, "c": 3, "z": 9, "y": 8, "task_role": x_role} 116 | result = merge_params_and_options(params, options) 117 | assert result == expect 118 | -------------------------------------------------------------------------------- /lambda/tests/common/test_file_select.py: -------------------------------------------------------------------------------- 1 | import json as j 2 | import os 3 | 4 | import boto3 5 | import moto 6 | import pytest 7 | 8 | from ...src.common.python.file_select import select_file_contents, read_json, read_yaml 9 | 10 | csv = b"""\ 11 | id,one,two,three,four 12 | a,11,12,13,14 13 | b,21,22,23,24 14 | c,31,32,33,34 15 | d,41,42,43,44 16 | """ 17 | 18 | json = b"""\ 19 | [ 20 | {"a":11, "b":12, "c":13, "d":14}, 21 | {"a":21, "b":22, "c":23, "d":24}, 22 | {"a":31, "b":32, "c":33, "d":34}, 23 | {"a":41, "b":42, "c":43, "d":44} 24 | ] 25 | """ 26 | 27 | jsonl = b"""\ 28 | {"a":11, "b":12, "c":13, "d":14} 29 | {"a":21, "b":22, "c":23, "d":24} 30 | {"a":31, "b":32, "c":33, "d":34} 31 | {"a":41, "b":42, "c":43, "d":44} 32 | """ 33 | 34 | tsv = b"""\ 35 | id one two three four 36 | a 11 12 13 14 37 | b 21 22 23 24 38 | c 31 32 33 34 39 | d 41 42 43 44 40 | """ 41 | 42 | txt = b"""\ 43 | row1 44 | row2 45 | row3 46 | row4 47 | row5 48 | """ 49 | 50 | yaml = b"""\ 51 | data: 52 | - a: 11 53 | b: 12 54 | c: 13 55 | d: 14 56 | - a: 21 57 | b: 22 58 | c: 23 59 | d: 24 60 | - a: 31 61 | b: 32 62 | c: 33 63 | d: 34 64 | - a: 41 65 | b: 42 66 | c: 43 67 | d: 44 68 | """ 69 | 70 | 71 | @pytest.fixture(scope="module") 72 | def src_bucket(): 73 | with moto.mock_aws(): 74 | yld = boto3.resource("s3", region_name="us-east-1").Bucket("test-bucket") 75 | yld.create() 76 | yld.put_object(Key="test-data/file.csv", Body=csv) 77 | yld.put_object(Key="test-data/file.json", Body=json) 78 | yld.put_object(Key="test-data/file.jsonl", Body=jsonl) 79 | yld.put_object(Key="test-data/file.tsv", Body=tsv) 80 | yld.put_object(Key="test-data/file.txt", Body=txt) 81 | yld.put_object(Key="test-data/file.yaml", Body=yaml) 82 | yield yld 83 | 84 | 85 | @pytest.mark.parametrize("query, expect", [ 86 | # ("s3://test-bucket/test-data/file.jsonl", ["14", "24", "34", "44"]), # select all "d" elements 87 | ("s3://test-bucket/test-data/file.json:$[*].d", ["14", "24", "34", "44"]), # select all "d" elements 88 | ("s3://test-bucket/test-data/file.jsonl:$[*].c", ["13", "23", "33", "43"]), # select all "c" elements 89 | ("s3://test-bucket/test-data/file.yaml:$.data[*].a", ["11", "21", "31", "41"]), # select all "a" elements 90 | ("s3://test-bucket/test-data/file.csv:$[*].three", ["13", "23", "33", "43"]), # select column "three" 91 | ("s3://test-bucket/test-data/file.csv:$[*].two", ["12", "22", "32", "42"]), # select column "two" 92 | ("s3://test-bucket/test-data/file.txt:$[2:4]", ["row3", "row4"]), # select lines 2 and 3 (zero-based) 93 | ("s3://test-bucket/test-data/file.txt", ["row1", "row2", "row3", "row4", "row5"]), # select all lines 94 | ]) 95 | def test_select_file_contents(src_bucket, query, expect): 96 | result = select_file_contents(query) 97 | print(str(result)) 98 | assert result == expect 99 | 100 | 101 | """ 102 | Tests: file_select.read_json(body) 103 | This converts a file with a JSON-like structure into JSON format 104 | """ 105 | # @pytest.mark.skip 106 | def test_read_json0(tmp_path): 107 | json_data = {"key": "value"} 108 | json_file = tmp_path / "test.json" 109 | with json_file.open(mode="w") as fp: 110 | j.dump(json_data, fp) 111 | 112 | # read in file contents in memory 113 | with json_file.open(mode="r") as json_content: 114 | response = read_json( 115 | body=json_content 116 | ) 117 | 118 | assert(response == json_data) 119 | 120 | 121 | """ 122 | Tests: file_select.read_yaml(body) 123 | Test the Conversion of YAML into a dictionary object 124 | """ 125 | #@pytest.mark.skip() 126 | def test_read_yaml0(): 127 | 128 | # expected response 129 | response_should_be = {'a': 1, 'b': {'c': 3, 'd': 4}} 130 | 131 | # test input value to function 132 | input_body = """ 133 | a: 1 134 | b: 135 | c: 3 136 | d: 4 137 | """ 138 | 139 | response = read_yaml ( 140 | body=input_body 141 | ) 142 | 143 | assert(response == response_should_be) 144 | -------------------------------------------------------------------------------- /bclaw_runner/tests/test_qc_check.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import boto3 4 | import moto 5 | import pytest 6 | 7 | from ..src.runner.qc_check import abort_execution, run_one_qc_check, run_all_qc_checks, do_checks, QCFailure 8 | 9 | QC_DATA_1 = { 10 | "a": 1, 11 | "b": 2. 12 | } 13 | QC_DATA_2 = { 14 | "x": 99, 15 | "y": 98, 16 | } 17 | 18 | 19 | @pytest.fixture(scope="function") 20 | def mock_state_machine(): 21 | with moto.mock_aws(): 22 | iam = boto3.resource("iam", region_name="us-east-1") 23 | role = iam.create_role( 24 | RoleName="fakeRole", 25 | AssumeRolePolicyDocument="{}" 26 | ) 27 | 28 | sfn = boto3.client("stepfunctions", region_name="us-east-1") 29 | state_machine = sfn.create_state_machine( 30 | name="fakeStateMachine", 31 | definition="{}", 32 | roleArn=role.arn 33 | ) 34 | 35 | yield state_machine["stateMachineArn"] 36 | 37 | 38 | @pytest.fixture(scope="function") 39 | def mock_qc_data_files(mocker, request): 40 | qc_file1 = mocker.mock_open(read_data=json.dumps(QC_DATA_1)) 41 | qc_file2 = mocker.mock_open(read_data=json.dumps(QC_DATA_2)) 42 | ret = mocker.patch("builtins.open", qc_file1) 43 | ret.side_effect = [qc_file1.return_value, qc_file2.return_value] 44 | 45 | 46 | def test_abort_execution(mock_state_machine, monkeypatch): 47 | sfn = boto3.client("stepfunctions", region_name="us-east-1") 48 | sfn_execution = sfn.start_execution( 49 | stateMachineArn=mock_state_machine, 50 | name="fake_execution", 51 | input='{"in": "put"}' 52 | ) 53 | 54 | monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") 55 | monkeypatch.setenv("AWS_ACCOUNT_ID", "123456789012") 56 | monkeypatch.setenv("BC_WORKFLOW_NAME", "fakeStateMachine") 57 | monkeypatch.setenv("BC_EXECUTION_ID", "fake_execution") 58 | monkeypatch.setenv("BC_STEP_NAME", "test_step") 59 | 60 | abort_execution(["failure1", "failure2"]) 61 | 62 | execution_desc = sfn.describe_execution(executionArn=sfn_execution["executionArn"]) 63 | assert execution_desc["status"] == "ABORTED" 64 | 65 | 66 | @pytest.mark.parametrize("expression, expect", [ 67 | ("x == 1", True), 68 | ("x != 1", False), 69 | ]) 70 | def test_run_one_qc_check(expression, expect): 71 | qc_data = {"x": 1} 72 | result = run_one_qc_check(qc_data, expression) 73 | assert result == expect 74 | 75 | 76 | @pytest.mark.parametrize("fake1_cond, fake2_cond, expect", [ 77 | (["a>1"], ["x<99"], []), # all pass 78 | (["a>1", "b==2"], ["y<98"], ["fake1: b==2"]), # one fail 79 | (["b==1"], ["x==99", "y==98"], ["fake2: x==99", "fake2: y==98"]), # multi fail 80 | (["a==1", "b==2"], ["x==99", "y==98"], ["fake1: a==1", "fake1: b==2", "fake2: x==99", "fake2: y==98"]), # all fail 81 | ]) 82 | def test_run_all_qc_checks(fake1_cond, fake2_cond, expect, mock_qc_data_files): 83 | spec = [ 84 | { 85 | "qc_result_file": "fake1", 86 | "stop_early_if": fake1_cond, 87 | }, 88 | { 89 | "qc_result_file": "fake2", 90 | "stop_early_if": fake2_cond, 91 | }, 92 | ] 93 | 94 | result = list(run_all_qc_checks(spec)) 95 | assert result == expect 96 | 97 | 98 | @pytest.mark.parametrize("fake1_cond, fake2_cond, expect", [ 99 | (None, None, []), # no checks 100 | (["a>1"], ["x<99"], []), # all pass 101 | (["a>1", "b==2"], ["y<98"], ["fake1: b==2"]), # one fail 102 | (["b==1"], ["x==99", "y==98"], ["fake2: x==99", "fake2: y==98"]), # multi fail 103 | (["a==1", "b==2"], ["x==99", "y==98"], 104 | ["fake1: a==1", "fake1: b==2", "fake2: x==99", "fake2: y==98"]), # all fail 105 | ]) 106 | def test_do_checks(fake1_cond, fake2_cond, expect, mock_qc_data_files, mocker): 107 | mock_abort_execution = mocker.patch("bclaw_runner.src.runner.qc_check.abort_execution") 108 | 109 | if fake1_cond is None: 110 | spec = [] 111 | else: 112 | spec = [ 113 | { 114 | "qc_result_file": "fake1", 115 | "stop_early_if": fake1_cond, 116 | }, 117 | { 118 | "qc_result_file": "fake2", 119 | "stop_early_if": fake2_cond, 120 | }, 121 | ] 122 | 123 | if expect: 124 | with pytest.raises(QCFailure) as qcf: 125 | do_checks(spec) 126 | assert qcf.value.failures == expect 127 | else: 128 | do_checks(spec) 129 | -------------------------------------------------------------------------------- /bclaw_runner/tests/test_cache.py: -------------------------------------------------------------------------------- 1 | import fcntl 2 | import logging 3 | import os 4 | 5 | import boto3 6 | import moto 7 | import pytest 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | from ..src.runner.cache import _blocking_download, _download_to_cache, get_reference_inputs 12 | 13 | TEST_BUCKET = "test-bucket" 14 | FILE1_CONTENT = "file one" 15 | FILE2_CONTENT = "file two" 16 | FILE3_CONTENT = "file three" 17 | 18 | 19 | @pytest.fixture(scope="module") 20 | def s3_bucket(): 21 | with moto.mock_aws(): 22 | boto3.client("s3", region_name="us-east-1").create_bucket(Bucket=TEST_BUCKET) 23 | yld = boto3.resource("s3", region_name="us-east-1").Bucket(TEST_BUCKET) 24 | yld.put_object(Key="some/path/file1", Body=FILE1_CONTENT.encode("utf-8")) 25 | yld.put_object(Key="other/path/file2", Body=FILE2_CONTENT.encode("utf-8")) 26 | yld.put_object(Key="one/more/path/file3", Body=FILE3_CONTENT.encode("utf-8")) 27 | yield yld 28 | 29 | 30 | def test_blocking_download(tmp_path, s3_bucket): 31 | src = s3_bucket.Object("some/path/file1") 32 | dst = f"{tmp_path}/file1" 33 | _blocking_download(src, dst, "file1") 34 | assert os.path.isfile(dst) 35 | 36 | 37 | def test_blocking_download_already_there(tmp_path, caplog): 38 | caplog.set_level(logging.INFO) 39 | dst = f"{tmp_path}/file99" 40 | open(dst, "w").close() 41 | _blocking_download("s3://does/not/exist", dst, "file99") 42 | assert "found file99 in cache" in caplog.text 43 | 44 | 45 | def test_blocking_download_blocked(tmp_path, s3_bucket): 46 | src = s3_bucket.Object("some/path/file1") 47 | dst = f"{tmp_path}/file1" 48 | lock_file = f"{os.path.dirname(dst)}.lock" 49 | with open(lock_file, "w") as lfp: 50 | fcntl.flock(lfp, fcntl.LOCK_EX | fcntl.LOCK_NB) 51 | with pytest.raises(BlockingIOError): 52 | _blocking_download(src, dst, "file1") 53 | os.remove(lock_file) 54 | 55 | 56 | def test_download_to_cache(monkeypatch, tmp_path, s3_bucket): 57 | monkeypatch.setenv("BC_SCRATCH_PATH", str(tmp_path)) 58 | 59 | src_etag = s3_bucket.Object("some/path/file1").e_tag.strip('"') 60 | 61 | result = _download_to_cache(("test_file", f"s3://{TEST_BUCKET}/some/path/file1")) 62 | expected = "test_file", f"{tmp_path}/{src_etag}/file1" 63 | assert result == expected 64 | 65 | key, cached_file = result 66 | assert os.path.isfile(cached_file) 67 | with open(cached_file) as fp: 68 | cached_content = fp.readline() 69 | assert cached_content == FILE1_CONTENT 70 | 71 | with open(cached_file, "a") as fp: 72 | print("extra content", file=fp) 73 | 74 | result2 = _download_to_cache(("test_file", f"s3://{TEST_BUCKET}/some/path/file1")) 75 | _, cached_file2 = result2 76 | with open(cached_file2) as fp2: 77 | cached_content2 = fp2.readline() 78 | assert cached_content2 == FILE1_CONTENT + "extra content\n" 79 | 80 | 81 | def test_get_reference_inputs(monkeypatch, tmp_path, s3_bucket): 82 | monkeypatch.setenv("BC_SCRATCH_PATH", str(tmp_path)) 83 | 84 | ref_spec = { 85 | "file1": f"s3://{TEST_BUCKET}/some/path/file1", 86 | "file2": f"s3://{TEST_BUCKET}/other/path/file2", 87 | "file3": f"s3://{TEST_BUCKET}/one/more/path/file3", 88 | } 89 | 90 | workspace = f"{str(tmp_path)}/workdir" 91 | os.makedirs(workspace) 92 | os.chdir(workspace) 93 | 94 | result = get_reference_inputs(ref_spec) 95 | expect = { 96 | "file1": "file1", 97 | "file2": "file2", 98 | "file3": "file3", 99 | } 100 | assert result == expect 101 | 102 | for file, expected_content in {"file1": FILE1_CONTENT, "file2": FILE2_CONTENT, "file3": FILE3_CONTENT}.items(): 103 | assert os.path.isfile(file) 104 | assert os.stat(file).st_nlink >= 2 # make sure the file is a hard link 105 | with open(file) as fp: 106 | content = fp.readline() 107 | assert content == expected_content 108 | 109 | 110 | def test_get_reference_inputs_fail(monkeypatch, tmp_path, s3_bucket): 111 | monkeypatch.setenv("BC_SCRATCH_PATH", str(tmp_path)) 112 | 113 | ref_spec = { 114 | "file1": f"s3://{TEST_BUCKET}/some/path/file1", 115 | "file2": f"s3://{TEST_BUCKET}/other/path/file2", 116 | "file3": f"s3://{TEST_BUCKET}/does/not/exist/file99", 117 | } 118 | 119 | workspace = f"{str(tmp_path)}/workdir" 120 | os.makedirs(workspace) 121 | os.chdir(workspace) 122 | 123 | with pytest.raises(Exception): 124 | _ = get_reference_inputs(ref_spec) 125 | -------------------------------------------------------------------------------- /doc/options_and_parameters.md: -------------------------------------------------------------------------------- 1 | # Workflow options and parameters 2 | 3 | ## Options 4 | 5 | The Options block of a BayerCLAW workflow template allows you to set values that affect how 6 | BayerCLAW itself operates when building and running your workflow. 7 | 8 | ### `shell` 9 | 10 | BayerCLAW provides the ability to choose which Unix shell to run Batch job commands 11 | under. You can specify the shell to use globally, using the setting in the `Options` block 12 | or for individual steps in the `compute` block. The choices for the `shell` setting are 13 | `sh`, `bash`, and `sh-pipefail`: 14 | 15 | | Choice | Shell | Shell options | Default? | 16 | |-------------|-------|----------------|----------| 17 | | sh | sh | -veu | yes | 18 | | bash | bash | -veuo pipefail | no | 19 | | sh-pipefail | sh | -veuo pipefail | no | 20 | 21 | Bourne shell (`sh`) is for all intents and purposes supported by all Unix implementations, 22 | so it is the default. The `bash` choice is provided mostly for backward compatibility 23 | but is still supported by most popular Linuxen. 24 | 25 | The shell options are based on the so-called [Bash Strict Mode](http://redsymbol.net/articles/unofficial-bash-strict-mode/) 26 | as an aid to debugging. Since the `pipefail` option is not included in the Bourne shell 27 | specification (as of June 2022), it is not included in the default shell options. Nevertheless, 28 | some `sh` implementations do provide a `pipefail` option, 29 | hence the `sh-pipefail` choice. To check whether `pipefail` is implemented in your favorite 30 | `sh`, use the command `sh -c "set -o"` and look for a `pipefail` entry in the resulting list. 31 | 32 | The `-v` shell option is used to echo each command before execution. Some users 33 | may prefer the similar `-x` option. The difference is that `-x` prints commands after 34 | variable substitution has happened, which can cause privileged information (passwords, 35 | etc.) to be exposed in the logs. With `-v`, commands are printed before variable substitution, 36 | and thus is the safer choice. 37 | 38 | ### `task_role` 39 | 40 | The `task_role` option allows you to override the IAM role that BayerCLAW will use to run your workflow. 41 | By default, BayerCLAW batch jobs run under an IAM role that provides access to a minimal set of AWS 42 | services (S3, EC2, ECR, CloudWatch logs). If your workflow has tasks that utilize other services, you can 43 | create a custom task role and provide its ARN to through the `task_role` option. 44 | 45 | The global `task_role` setting can itself be overridden using the per-step `task_role` option. 46 | 47 | ### `versioned` ‼️ **DEPRECATED** 48 | 49 | BayerCLAW workflows are now always versioned. 50 | 51 | ## Parameters 52 | 53 | The Parameters block allows you to customize workflows without editing the template file. The basic Parameter 54 | definition format is described [here](./language.md/#the-parameters-block). 55 | 56 | ### Setting parameters 57 | 58 | If you compile your workflow using the AWS CloudFormation console, you will be prompted for Parameter values on 59 | the `Specify stack details` page. If you use the AWS CLI, you can provide Parameter values using the `parameter 60 | overrides` option: 61 | 62 | ```bash 63 | aws cloudformation deploy \ 64 | --template-file my-template.yaml \ 65 | --stack-name my-workflow \ 66 | --capabilities CAPABILITY_IAM \ 67 | --parameter-overrides theKing="elvis" status="lives" 68 | ``` 69 | 70 | You can also provide Parameter values using a JSON file: 71 | 72 | ```bash 73 | aws cloudformation deploy \ 74 | --template-file my-template.yaml \ 75 | --stack-name my-workflow \ 76 | --capabilities CAPABILITY_IAM \ 77 | --parameter-overrides file:///path/more_path/parameters.json 78 | ``` 79 | 80 | where `parameters.json` contains: 81 | 82 | ```json5 83 | [ 84 | "theKing=elvis", 85 | "status=lives" 86 | ] 87 | ``` 88 | 89 | In addition, Parameter values can be retrieved from 90 | (AWS Systems Manager Parameter Store)[https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/parameters-section-structure.html#aws-ssm-parameter-types]. 91 | To do so, in the simplest case, declare your Parameter with type `AWS::SSM::Parameter::Value` and 92 | use the name of a parameter stored in Parameter Store. For example for the Parameter value: 93 | 94 | ```yaml 95 | Parameters: 96 | storedParameter: 97 | Type: AWS::SSM::Parameter::Value 98 | Default: myStoredParameter 99 | ``` 100 | 101 | This will retrieve the value of `myStoredParameter` from Parameter Store and use it in your workflow. Parameters 102 | used in this way must exist before compilation time. Parameter Store SecureString parameters are not supported. 103 | -------------------------------------------------------------------------------- /doc/subpipes.md: -------------------------------------------------------------------------------- 1 | # Using Subpipes in BayerCLAW 2 | 3 | ## Overview 4 | An BayerCLAW workflow can execute other BayerCLAW workflows as subpipes. Use cases for this include: 5 | - Creating reusable, modular workflows 6 | - Breaking large, complex workflows into manageable units 7 | - Enabling workflows to be developed and tested as separate, logical units 8 | 9 | ### How it works 10 | 11 | In general, the main workflow should create a new job data file that can be submitted to the subpipe: 12 | 13 | ```yaml 14 | - 15 | MakeNewJobData: 16 | image: docker.io/library/ubuntu 17 | commands: 18 | - "echo '{\"a\": \"eh\", \"b\": \"bee\", \"c\": \"sea\"}' > ${sub_job_data}" 19 | outputs: 20 | sub_job_data: sub_job.json 21 | - 22 | RunTheSubpipe: 23 | job_data: sub_job.json 24 | subpipe: my-subpipe 25 | ``` 26 | 27 | If the main workflow's job data file contains all of the information needed to run the subpipe, you may 28 | omit the subpipe step's `job_data` field, and the original job data file will be submitted directly to the 29 | subpipe. 30 | 31 | The subpipe step creates a repository for the subpipe (this will be in a folder inside of the main 32 | repository) where the subpipe will store its intermediate files. After the subpipe finishes, the 33 | main pipeline can optionally copy files out of the sub-repository into the main repository. 34 | 35 | There are no special requirements for the subpipe. It can be an ordinary BayerCLAW workflow -- however, 36 | the repository established by the main workflow overrides the repository designated in the subpipe's 37 | workflow definition. 38 | 39 | ## Calling a subpipe 40 | To invoke a subpipe, the parent pipeline must contain a *subpipe step*. 41 | 42 | ### Subpipe step syntax 43 | ```yaml 44 | SubpipeStepName: 45 | job_data: sub_job.json 46 | subpipe: my-subpipe-workflow 47 | retrieve: 48 | - filenameX.txt -> filenameY.txt 49 | - filenameZ.txt 50 | ``` 51 | The fields of the subpipe step are: 52 | - `job_data`: An S3 file that will be used to launch the subpipe. This may be the name of a file in the 53 | main workflow's repository, or a full S3 URI of a file that exists elsewhere. 54 | 55 | - `subpipe`: The name of the BayerCLAW workflow to be run as a subpipe. For testing purposes, you may also provide the 56 | Amazon Resource Name (ARN) of a Step Functions state machine that simulates the behavior of the real subpipe. 57 | 58 | - `retrieve`: A list of files to be copied from the subpipe's repository to the parent workflow's repository. 59 | Use the syntax `subpipe_filename -> parent_wf_filename` to rename the file, or just the name 60 | of the file if it does not need to be renamed. The `retrieve` field may be omitted if there are no files to 61 | copy into the parent workflow's repository. 62 | 63 | ### String substitution and file globs 64 | Values from the execution's job data file can be substituted into any filename in the `retrieve` 65 | field. For instance, this would be valid (though not really recommented): `${job.project_id}.txt -> ${job.sample_id}.txt`. 66 | 67 | Filename globbing is not available in subpipe steps. 68 | 69 | ### Subpipes and scatter/gather 70 | A subpipe may be invoked from inside of a scatter step. For instance, this is a small workflow that scatters 71 | over a set of sequence files, each branch passing a sequence file and a configuration file to a subpipe and 72 | collecting the .bam files produced: 73 | 74 | ```yaml 75 | DoScatter: 76 | scatter: 77 | contigs: contigs*.fa 78 | inputs: 79 | config: config.cfg 80 | steps: 81 | - 82 | RunSubpipe: 83 | # no "job_data" field here, were passing along the main job data file 84 | subpipe: sub-workflow 85 | retrieve: 86 | - output.bam 87 | outputs: 88 | bamfile: output.bam 89 | ``` 90 | 91 | While the `scatter` and `parent` variables from the scatter step are available to the subpipe 92 | step itself, *the workflow invoked by the subpipe will not have access to these values*. 93 | 94 | The sub-workflow, itself being an BayerCLAW workflow, may also contain its own scatter steps. 95 | 96 | ## Job tracking in the AWS console 97 | Although a subpipe call involves invoking a completely different workflow, AWS Step Functions makes it easy to track 98 | both executions through the AWS console. 99 | 100 | In the console, the parent pipeline execution will contain links to the subpipe execution under the 101 | `Execution event history` list: 102 | ![link to subpipe](resources/subpipes_step_functions_link1.png) 103 | 104 | And the subpipe execution console page will be linked back to the parent in the `Execution details` box: 105 | ![link to parent](resources/subpipes_step_functions_link2.png) 106 | 107 | Due to Step Functions execution naming restrictions, the subpipe execution will have a different name from the 108 | parent execution. 109 | -------------------------------------------------------------------------------- /util/bclaw_logs/lambda/tests/test_job_status.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | from decimal import Decimal 3 | import json 4 | 5 | import boto3 6 | from boto3.dynamodb.conditions import Key 7 | from moto import mock_dynamodb2 8 | import pytest 9 | 10 | from ..src.job_status import lambda_handler 11 | 12 | 13 | @pytest.fixture(scope="function") 14 | def ddb_table(): 15 | with mock_dynamodb2(): 16 | dynamodb = boto3.resource("dynamodb", region_name="us-east-1") 17 | yld = dynamodb.create_table( 18 | AttributeDefinitions=[ 19 | { 20 | "AttributeName": "workflowName", 21 | "AttributeType": "S", 22 | }, 23 | { 24 | "AttributeName": "executionId", 25 | "AttributeType": "S", 26 | }, 27 | ], 28 | TableName="testTable", 29 | KeySchema=[ 30 | { 31 | "AttributeName": "workflowName", 32 | "KeyType": "HASH", 33 | }, 34 | { 35 | "AttributeName": "executionId", 36 | "KeyType": "RANGE", 37 | } 38 | ], 39 | BillingMode="PAY_PER_REQUEST" 40 | ) 41 | 42 | yield yld 43 | 44 | 45 | @pytest.mark.parametrize("status", ["RUNNING", "SUCCEEDED", "FAILED", "ABORTED"]) 46 | def test_lambda_handler(status, ddb_table, monkeypatch): 47 | monkeypatch.setenv("JOB_STATUS_TABLE", "testTable") 48 | monkeypatch.setenv("EXPIRATION_DAYS", "90") 49 | monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") 50 | 51 | timestamp_str = "2023-02-24T15:18:13Z" 52 | 53 | event = { 54 | "time": timestamp_str, 55 | "detail": { 56 | "name": "12345678-1234...", 57 | "status": status, 58 | "input": json.dumps( 59 | { 60 | "job_file": { 61 | "key": "test-workflow/path/to/job.file", 62 | "version": "987654321", 63 | }, 64 | } 65 | ), 66 | }, 67 | } 68 | 69 | lambda_handler(event, {}) 70 | 71 | chek = ddb_table.query( 72 | KeyConditionExpression=Key("workflow_name").eq("test-workflow"), 73 | Select="ALL_ATTRIBUTES" 74 | ) 75 | 76 | timestamp_obj = dt.datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S%z") 77 | expiration_obj = timestamp_obj + dt.timedelta(days=90) 78 | expected_timestamp = Decimal(int(timestamp_obj.timestamp())) 79 | expected_expiration = Decimal(int(expiration_obj.timestamp())) 80 | 81 | expect = { 82 | "executionId": "12345678-1234...", 83 | "workflowName": "test-workflow", 84 | "jobFile": "path/to/job.file#987654321", 85 | "status": status, 86 | "timestamp": expected_timestamp, 87 | "expiration": expected_expiration, 88 | } 89 | assert chek["Items"][0] == expect 90 | 91 | 92 | @pytest.mark.parametrize("old_status, new_status, expected_file_version", [ 93 | ("RUNNING", "RUNNING", "new"), 94 | ("SUCCEEDED", "RUNNING", "old"), 95 | ("FAILED", "RUNNING", "old"), 96 | ("ABORTED", "RUNNING", "old"), 97 | ("RUNNING", "SUCCEEDED", "new"), 98 | ("RUNNING", "FAILED", "new"), 99 | ("RUNNING", "ABORTED", "new"), 100 | ]) 101 | def test_lambda_handler_overwrite(old_status, new_status, expected_file_version, ddb_table, monkeypatch): 102 | monkeypatch.setenv("JOB_STATUS_TABLE", "testTable") 103 | monkeypatch.setenv("EXPIRATION_DAYS", "90") 104 | monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") 105 | 106 | ddb_table.put_item( 107 | Item={ 108 | "workflowName": "test-workflow", 109 | "executionId": "12345678-1234...", 110 | "status": old_status, 111 | "jobFile": "path/to/job.file#old" 112 | } 113 | ) 114 | 115 | event = { 116 | "time": "2021-05-28T17:53:54Z", 117 | "detail": { 118 | "name": "12345678-1234...", 119 | "status": new_status, 120 | "input": json.dumps( 121 | { 122 | "job_file": { 123 | "key": "test-workflow/path/to/job.file", 124 | # job file versions won't actually change during a run, this is just 125 | # a hack to check whether the record was overwritten 126 | "version": "new", 127 | }, 128 | } 129 | ), 130 | }, 131 | } 132 | 133 | lambda_handler(event, {}) 134 | 135 | chek = ddb_table.query( 136 | KeyConditionExpression=Key("workflow_name").eq("test-workflow"), 137 | Select="ALL_ATTRIBUTES" 138 | ) 139 | 140 | assert chek["Items"][0]["jobFile"].endswith("#" + expected_file_version) 141 | -------------------------------------------------------------------------------- /lambda/tests/compiler/test_subpipe_resources.py: -------------------------------------------------------------------------------- 1 | import json 2 | import textwrap 3 | 4 | import pytest 5 | import yaml 6 | 7 | from ...src.compiler.pkg.subpipe_resources import file_submit_step, run_subpipe_step, file_retrieve_step, handle_subpipe 8 | from ...src.compiler.pkg.util import Step, lambda_retry 9 | 10 | SUBMIT_BLOCK = { 11 | "submit": [ 12 | "file1.txt -> fileA.txt", 13 | "file2.txt", 14 | ], 15 | } 16 | 17 | 18 | @pytest.fixture(scope="module") 19 | def sample_subpipe_spec() -> dict: 20 | ret = { 21 | "job_data": "test_job_data.json", 22 | **SUBMIT_BLOCK, 23 | "subpipe": "arn:aws:states:us-east-1:123456789012:StateMachine:test-machine", 24 | "retrieve": [ 25 | "fileX.txt -> file3.txt", 26 | "fileY.txt", 27 | ], 28 | } 29 | return ret 30 | 31 | 32 | def test_file_submit_step(sample_subpipe_spec, compiler_env): 33 | test_step = Step("step_name", sample_subpipe_spec, "next_step_name") 34 | result = file_submit_step(test_step, "run_subpipe_step_name") 35 | expect = { 36 | "Type": "Task", 37 | "Resource": "subpipes_lambda_arn", 38 | "Parameters": { 39 | "repo.$": "$.repo.uri", 40 | "job_data": "test_job_data.json", 41 | "submit": json.dumps(SUBMIT_BLOCK["submit"]), 42 | "step_name": "step_name", 43 | "logging": { 44 | "branch.$": "$.index", 45 | "job_file_bucket.$": "$.job_file.bucket", 46 | "job_file_key.$": "$.job_file.key", 47 | "job_file_version.$": "$.job_file.version", 48 | "sfn_execution_id.$": "$$.Execution.Name", 49 | "step_name": "step_name", 50 | "workflow_name": "${WorkflowName}", 51 | }, 52 | }, 53 | **lambda_retry(), 54 | "ResultPath": "$.subpipe", 55 | "OutputPath": "$", 56 | "Next": "run_subpipe_step_name", 57 | } 58 | assert result == expect 59 | 60 | 61 | def test_run_subpipe_step(sample_subpipe_spec): 62 | test_step = Step("step_name", sample_subpipe_spec, "next_step_name") 63 | result = run_subpipe_step(test_step, "retrieve_step_name") 64 | expect = { 65 | "Type": "Task", 66 | "Resource": "arn:aws:states:::states:startExecution.sync", 67 | "Parameters": { 68 | "Input": { 69 | "index": "main", 70 | "job_file.$": "$.job_file", 71 | "prev_outputs": {}, 72 | "repo.$": "$.subpipe.sub_repo", 73 | "share_id.$": "$.share_id", 74 | "AWS_STEP_FUNCTIONS_STARTED_BY_EXECUTION_ID.$": "$$.Execution.Id", 75 | }, 76 | "Name.$": "States.Format('{}_step_name', $$.Execution.Name)", 77 | "StateMachineArn": sample_subpipe_spec["subpipe"], 78 | }, 79 | "ResultPath": None, 80 | "OutputPath": "$", 81 | "Next": "retrieve_step_name" 82 | } 83 | assert result == expect 84 | 85 | 86 | @pytest.mark.parametrize("next_step_name, next_or_end", [ 87 | ("next_step", {"Next": "next_step"}), 88 | ("", {"End": True}), 89 | ]) 90 | def test_file_retrieve_step(next_step_name, next_or_end, sample_subpipe_spec, compiler_env): 91 | test_step = Step("step_name", sample_subpipe_spec, next_step_name) 92 | result = file_retrieve_step(test_step) 93 | expect = { 94 | "Type": "Task", 95 | "Resource": "subpipes_lambda_arn", 96 | "Parameters": { 97 | "repo.$": "$.repo.uri", 98 | "retrieve": json.dumps(sample_subpipe_spec["retrieve"]), 99 | "subpipe": { 100 | "sub_repo.$": "$.subpipe.sub_repo.uri", 101 | }, 102 | "logging": { 103 | "branch.$": "$.index", 104 | "job_file_bucket.$": "$.job_file.bucket", 105 | "job_file_key.$": "$.job_file.key", 106 | "job_file_version.$": "$.job_file.version", 107 | "sfn_execution_id.$": "$$.Execution.Name", 108 | "step_name": "step_name", 109 | "workflow_name": "${WorkflowName}", 110 | }, 111 | }, 112 | **lambda_retry(), 113 | "ResultSelector": {}, 114 | "ResultPath": "$.prev_outputs", 115 | "OutputPath": "$", 116 | **next_or_end 117 | } 118 | assert result == expect 119 | 120 | 121 | def test_handle_subpipe(sample_subpipe_spec, compiler_env): 122 | test_step = Step("step_name", sample_subpipe_spec, "next_step_name") 123 | states = handle_subpipe(test_step) 124 | assert len(states) == 3 125 | 126 | assert states[0].name == "step_name" 127 | assert states[0].spec["Next"] == "step_name.subpipe" 128 | 129 | assert states[1].name == "step_name.subpipe" 130 | assert states[1].spec["Next"] == "step_name.retrieve" 131 | 132 | assert states[2].name == "step_name.retrieve" 133 | assert states[2].spec["Next"] == "next_step_name" 134 | -------------------------------------------------------------------------------- /lambda/src/compiler/pkg/scatter_gather_resources.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import re 5 | from typing import Generator, List 6 | 7 | from . import state_machine_resources as sm 8 | from .util import Step, Resource, State, lambda_logging_block, lambda_retry 9 | 10 | 11 | def scatter_step(step: Step, map_step_name: str) -> dict: 12 | ret = { 13 | "Type": "Task", 14 | "Resource": os.environ["SCATTER_LAMBDA_ARN"], 15 | "Parameters": { 16 | "repo.$": "$.repo", 17 | "scatter": json.dumps(step.spec["scatter"]), 18 | **step.input_field, 19 | "outputs": json.dumps(step.spec["outputs"]), 20 | "step_name": step.name, 21 | **lambda_logging_block(step.name), 22 | }, 23 | **lambda_retry(), 24 | "ResultPath": "$.scatter", 25 | "Next": map_step_name 26 | } 27 | 28 | return ret 29 | 30 | 31 | def error_tolerance(spec) -> dict: 32 | # spec will have passed validation, so... 33 | if isinstance(spec, str): 34 | # if it's a string, it's a percentage between 0 and 100 and the last character is % 35 | ret = {"ToleratedFailurePercentage": int(spec[:-1])} 36 | else: 37 | # otherwise it's an int >= 0 38 | ret = {"ToleratedFailureCount": spec} 39 | return ret 40 | 41 | 42 | def map_step(step: Step, sub_branch: dict, gather_step_name: str) -> dict: 43 | label = re.sub(r"\W", "", step.name) 44 | 45 | ret = { 46 | "Type": "Map", 47 | "MaxConcurrency": step.spec["max_concurrency"], 48 | **error_tolerance(step.spec["error_tolerance"]), 49 | "Label": label[:40], 50 | "ItemReader": { 51 | "Resource": "arn:aws:states:::s3:getObject", 52 | "ReaderConfig": { 53 | "InputType": "CSV", 54 | "CSVHeaderLocation": "FIRST_ROW", 55 | }, 56 | "Parameters": { 57 | "Bucket.$": "$.scatter.items.bucket", 58 | "Key.$": "$.scatter.items.key", 59 | } 60 | }, 61 | "ItemSelector": { 62 | "index.$": "States.Format('{}', $$.Map.Item.Index)", # stringify the index 63 | "job_file.$": "$.job_file", 64 | "prev_outputs": {}, 65 | "scatter.$": "$$.Map.Item.Value", 66 | "repo.$": "$.scatter.repo", 67 | "share_id.$": "$.share_id" 68 | }, 69 | "ItemProcessor": { 70 | "ProcessorConfig": { 71 | "Mode": "DISTRIBUTED", 72 | "ExecutionType": "STANDARD" 73 | }, 74 | **sub_branch 75 | }, 76 | "ResultPath": None, 77 | "Next": gather_step_name, 78 | } 79 | 80 | return ret 81 | 82 | 83 | def scatter_init_step(parent_step_name: str) -> dict: 84 | step_name = f"{parent_step_name}.initialize" 85 | ret = { 86 | step_name: { 87 | "Type": "Task", 88 | "Resource": os.environ["SCATTER_INIT_LAMBDA_ARN"], 89 | "Parameters": { 90 | "index.$": "$.index", 91 | "repo.$": "$.repo", 92 | "scatter.$": "$.scatter", 93 | **lambda_logging_block(step_name) 94 | }, 95 | **lambda_retry(max_attempts=10), 96 | "ResultPath": "$.repo", 97 | "_stet": True, 98 | }, 99 | } 100 | return ret 101 | 102 | 103 | def gather_step(step: Step) -> dict: 104 | ret = { 105 | "Type": "Task", 106 | "Resource": os.environ["GATHER_LAMBDA_ARN"], 107 | "Parameters": { 108 | "repo.$": "$.repo.uri", 109 | "outputs": json.dumps(step.spec["outputs"]), 110 | "step_name": step.name, 111 | **lambda_logging_block(step.name), 112 | }, 113 | **lambda_retry(), 114 | "ResultPath": "$.prev_outputs", 115 | "OutputPath": "$", 116 | **step.next_or_end, 117 | } 118 | 119 | return ret 120 | 121 | 122 | def handle_scatter_gather(step: Step, 123 | options: dict, 124 | map_depth: int 125 | ) -> Generator[Resource, None, List[State]]: 126 | logger = logging.getLogger(__name__) 127 | logger.info(f"making scatter gather steps for {step.name}") 128 | 129 | if map_depth > 0: 130 | raise RuntimeError("Nested Scatter steps are not supported") 131 | 132 | sub_branch = yield from sm.make_branch([scatter_init_step(step.name)] + step.spec["steps"], 133 | options, depth=map_depth + 1) 134 | 135 | scatter_step_name = step.name 136 | map_step_name = f"{step.name}.map" 137 | gather_step_name = f"{step.name}.gather" 138 | 139 | ret = [ 140 | State(scatter_step_name, scatter_step(step, map_step_name)), 141 | State(map_step_name, map_step(step, sub_branch, gather_step_name)), 142 | State(gather_step_name, gather_step(step)) 143 | ] 144 | 145 | return ret 146 | -------------------------------------------------------------------------------- /lambda/src/job_def/register.py: -------------------------------------------------------------------------------- 1 | """ 2 | When CloudFormation updates a Batch job definition, it will deactivate the old version automatically. This doesn't 3 | work well with blue/green deployments, where we want to keep the old version active in case a rollback is required. 4 | This lambda function will register a new version of the job definition without deactivating the old one. It is meant 5 | to be used as a custom resource in CloudFormation. 6 | """ 7 | 8 | from contextlib import contextmanager 9 | from dataclasses import dataclass, asdict, field 10 | import http.client 11 | import json 12 | import logging 13 | import os 14 | from typing import Generator 15 | import urllib.parse 16 | 17 | import boto3 18 | 19 | logger = logging.getLogger() 20 | logger.setLevel(logging.INFO) 21 | 22 | 23 | @dataclass() 24 | class Response: 25 | PhysicalResourceId: str 26 | StackId: str 27 | RequestId: str 28 | LogicalResourceId: str 29 | Status: str = "FAILED" 30 | Reason: str = "" 31 | NoEcho: bool = False 32 | Data: dict = field(default_factory=dict) 33 | 34 | def return_this(self, **kwargs): 35 | self.Data.update(**kwargs) 36 | 37 | 38 | def respond(url: str, body: dict): 39 | url_obj = urllib.parse.urlparse(url) 40 | body_json = json.dumps(body) 41 | 42 | https = http.client.HTTPSConnection(url_obj.hostname) 43 | https.request("PUT", url_obj.path + "?" + url_obj.query, body_json) 44 | 45 | 46 | @contextmanager 47 | def responder(event, context, no_echo=False) -> Generator[Response, None, None]: 48 | response = Response( 49 | PhysicalResourceId=event.get("PhysicalResourceId"), 50 | StackId=event["StackId"], 51 | RequestId=event["RequestId"], 52 | LogicalResourceId=event["LogicalResourceId"], 53 | NoEcho=no_echo 54 | ) 55 | try: 56 | yield response 57 | logger.info("succeeded") 58 | response.Status = "SUCCESS" 59 | except: 60 | logger.exception("failed: ") 61 | response.Reason = f"see log group {context.log_group_name} / log stream {context.log_stream_name}" 62 | finally: 63 | logger.info(f"{asdict(response)=}") 64 | respond(event["ResponseURL"], asdict(response)) 65 | 66 | 67 | def edit_spec(spec: dict, wf_name: str, step_name: str, image: dict) -> dict: 68 | ret = spec.copy() 69 | ret["jobDefinitionName"] = f"{wf_name}_{step_name}" 70 | ret["containerProperties"]["environment"] += [{"name": "BC_WORKFLOW_NAME", "value": wf_name}, 71 | {"name": "BC_STEP_NAME", "value": step_name}, 72 | {"name": "AWS_DEFAULT_REGION", "value": os.environ["REGION"]}, 73 | {"name": "AWS_ACCOUNT_ID", "value": os.environ["ACCT_NUM"]}] 74 | ret["parameters"]["image"] = json.dumps(image, sort_keys=True, separators=(",", ":")) 75 | ret["tags"]["bclaw:workflow"] = wf_name 76 | return ret 77 | 78 | 79 | def lambda_handler(event: dict, context: object): 80 | # event[ResourceProperties] = { 81 | # workflowName: str 82 | # stepName: str 83 | # image: dict # str 84 | # spec: "{ 85 | # type: str 86 | # parameters: {str: str} 87 | # containerProperties: { 88 | # image: str 89 | # command: [str] 90 | # jobRoleArn: str 91 | # volumes: [dict] 92 | # environment: [{name: str, value: str}] 93 | # mountPoints: [dict] 94 | # resourceRequirements: [{value: str, type: str}] 95 | # } 96 | # consumableResourceProperties: dict 97 | # schedulingPriority: int 98 | # timeout: dict 99 | # propagateTags: bool 100 | # tags: dict 101 | # }" 102 | # } 103 | 104 | logger.info(f"{event=}") 105 | 106 | batch = boto3.client("batch") 107 | 108 | with responder(event, context) as cfn_response: 109 | if event["RequestType"] in ["Create", "Update"]: 110 | spec0 = json.loads(event["ResourceProperties"]["spec"]) 111 | spec = edit_spec(spec0, 112 | event["ResourceProperties"]["workflowName"], 113 | event["ResourceProperties"]["stepName"], 114 | event["ResourceProperties"]["image"]) 115 | logger.info(f"{spec=}") 116 | 117 | result = batch.register_job_definition(**spec) 118 | cfn_response.PhysicalResourceId = result["jobDefinitionArn"] 119 | cfn_response.return_this(Arn=result["jobDefinitionArn"]) 120 | 121 | else: 122 | # handle Delete requests 123 | try: 124 | if (job_def_id := event.get("PhysicalResourceId")) is not None: 125 | batch.deregister_job_definition(jobDefinition=job_def_id) 126 | else: 127 | logger.warning("no physical resource id found") 128 | except: 129 | logger.warning("deregistration failed: ") 130 | -------------------------------------------------------------------------------- /lambda/tests/compiler/test_enhanced_parallel_resources.py: -------------------------------------------------------------------------------- 1 | import json 2 | import textwrap 3 | 4 | import pytest 5 | import yaml 6 | 7 | from ...src.compiler.pkg.enhanced_parallel_resources import handle_parallel_step 8 | from ...src.compiler.pkg.util import Step, State, lambda_logging_block, lambda_retry 9 | 10 | 11 | @pytest.mark.parametrize("next_step_name, next_or_end", [ 12 | ("next_step", {"Next": "next_step"}), 13 | ("", {"End": True}), 14 | ]) 15 | def test_handle_parallel_step_enhanced(next_step_name, next_or_end, compiler_env): 16 | spec_yaml = textwrap.dedent("""\ 17 | inputs: 18 | input1: file1.json 19 | input2: file2.json 20 | branches: 21 | - 22 | if: input1.qc == 1 23 | steps: 24 | - 25 | do_this: 26 | Type: Pass 27 | - 28 | do_that: 29 | Type: Pass 30 | - 31 | if: input2.qc == 2 32 | steps: 33 | - 34 | do_the_other: 35 | Type: Pass 36 | - 37 | steps: 38 | - 39 | always_do_this: 40 | Type: Pass 41 | - 42 | this_too: 43 | Type: Pass 44 | """) 45 | 46 | spec = yaml.safe_load(spec_yaml) 47 | options = {"wf": "params"} 48 | 49 | def helper(): 50 | step = Step("step_name", spec, next_step_name) 51 | 52 | result, *more = yield from handle_parallel_step(step, options, 0) 53 | assert len(more) == 0 54 | assert isinstance(result, State) 55 | assert result.spec["Type"] == "Parallel" 56 | assert result.spec["ResultPath"] is None 57 | assert result.spec["OutputPath"] == "$" 58 | assert next_or_end.items() <= result.spec.items() 59 | assert len(result.spec["Branches"]) == 3 60 | 61 | # branch "1" 62 | branch_1 = result.spec["Branches"][0] 63 | condition_1 = step.spec["branches"][0]["if"] 64 | check_step_name_1 = f"step_name: {condition_1}?" 65 | skip_step_name_1 = "step_name: skip_1" 66 | assert branch_1["StartAt"] == check_step_name_1 67 | assert set(branch_1["States"].keys()) == {check_step_name_1, skip_step_name_1, "do_this", "do_that"} 68 | 69 | expected_inputs = json.dumps(step.spec["inputs"], separators=(",", ":")) 70 | 71 | # -- check step 1 72 | check_1 = branch_1["States"][check_step_name_1] 73 | expect_1 = { 74 | "Type": "Task", 75 | "Resource": "chooser_lambda_arn", 76 | "Parameters": { 77 | "repo.$": "$.repo.uri", 78 | "inputs": expected_inputs, 79 | "expression": condition_1, 80 | **lambda_logging_block("step_name") 81 | }, 82 | **lambda_retry(), 83 | "Catch": [ 84 | { 85 | "ErrorEquals": ["ConditionFailed"], 86 | "Next": skip_step_name_1, 87 | }, 88 | ], 89 | "ResultPath": None, 90 | "OutputPath": "$", 91 | "Next": "do_this", 92 | } 93 | assert check_1 == expect_1 94 | 95 | # -- skip step 1 96 | skip_branch_1 = branch_1["States"][skip_step_name_1] 97 | assert skip_branch_1["Type"] == "Succeed" 98 | 99 | # branch "2" 100 | branch_2 = result.spec["Branches"][1] 101 | condition_2 = step.spec["branches"][1]["if"] 102 | check_step_name_2 = f"step_name: {condition_2}?" 103 | skip_step_name_2 = "step_name: skip_2" 104 | assert branch_2["StartAt"] == check_step_name_2 105 | assert set(branch_2["States"].keys()) == {check_step_name_2, skip_step_name_2, "do_the_other"} 106 | 107 | # -- step check_2 108 | check_2 = branch_2["States"][check_step_name_2] 109 | expect_2 = { 110 | "Type": "Task", 111 | "Resource": "chooser_lambda_arn", 112 | "Parameters": { 113 | "repo.$": "$.repo.uri", 114 | "inputs": expected_inputs, 115 | "expression": condition_2, 116 | **lambda_logging_block("step_name") 117 | }, 118 | **lambda_retry(), 119 | "Catch": [ 120 | { 121 | "ErrorEquals": ["ConditionFailed"], 122 | "Next": skip_step_name_2, 123 | }, 124 | ], 125 | "ResultPath": None, 126 | "OutputPath": "$", 127 | "Next": "do_the_other", 128 | } 129 | assert check_2 == expect_2 130 | 131 | # -- step skip_branch_2 132 | skip_branch_2 = branch_2["States"][skip_step_name_2] 133 | assert skip_branch_2["Type"] == "Succeed" 134 | 135 | # branch "3" 136 | branch_3 = result.spec["Branches"][2] 137 | assert branch_3["StartAt"] == "always_do_this" 138 | assert set(branch_3["States"].keys()) == {"always_do_this", "this_too"} 139 | 140 | _ = list(helper()) 141 | -------------------------------------------------------------------------------- /lambda/src/initializer/initializer.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | from functools import partial 3 | import json 4 | import logging 5 | import re 6 | 7 | import boto3 8 | import jmespath 9 | 10 | from lambda_logs import log_preamble, log_event 11 | from repo_utils import SYSTEM_FILE_TAG 12 | 13 | logger = logging.getLogger() 14 | logger.setLevel(logging.INFO) 15 | 16 | EXTENDED_JOB_DATA_FILE_NAME = "_JOB_DATA_" 17 | 18 | 19 | def read_s3_object(bucket: str, key: str, version: str) -> dict: 20 | s3 = boto3.client("s3") 21 | 22 | # raises "ClientError: An error occurred (NoSuchVersion)...The specified version does not exist." if file doesn't exist 23 | # raises "ClientError: An error occurred (InvalidArgument)...Invalid version id specified" if version doesn't exist 24 | response = s3.get_object(Bucket=bucket, Key=key, VersionId=version) 25 | 26 | with closing(response["Body"]) as fp: 27 | # this will raise JSONDecodeError for folder creation events (also empty 28 | # files & malformed JSON) 29 | ret = json.load(fp) 30 | return ret 31 | 32 | 33 | JOB_FINDER = re.compile(r"\${!?job.(.+?)}") 34 | 35 | def lookup(m: re.Match, job_data: dict) -> str: 36 | ret = jmespath.search(m.group(1), job_data) 37 | if ret is None: 38 | raise KeyError(f"'{m.group(1)}' not found in job data") 39 | return str(ret) 40 | 41 | 42 | def substitute_job_data(target: str, job_data: dict) -> str: 43 | _lookup = partial(lookup, job_data=job_data) 44 | ret = JOB_FINDER.sub(_lookup, target) 45 | return ret 46 | 47 | 48 | def check_recursive_launch(src_bucket: str, src_path: str, repo_bucket: str, repo_prefix: str) -> None: 49 | if src_bucket == repo_bucket: 50 | src_path_top_dir = src_path.split("/", 1)[0] 51 | repo_path_top_dir = repo_prefix.split("/", 1)[0] 52 | if src_path_top_dir == repo_path_top_dir: 53 | raise RuntimeError("repo cannot be in the launcher folder") 54 | 55 | 56 | def copy_job_data_to_repo(src_bucket: str, src_key: str, src_version: str, dst_bucket: str, dst_prefix: str) -> None: 57 | filename = src_key.rsplit("/", 1)[-1] 58 | dst_key = f"{dst_prefix}/{filename}" 59 | s3 = boto3.client("s3") 60 | s3.copy_object(CopySource={"Bucket": src_bucket, "Key": src_key, "VersionId": src_version}, 61 | Bucket=dst_bucket, Key=dst_key, 62 | Tagging=SYSTEM_FILE_TAG, 63 | TaggingDirective="REPLACE") 64 | 65 | 66 | def write_extended_job_data_object(raw_job_data: dict, dst_bucket: str, dst_prefix: str) -> None: 67 | job_data = { 68 | "job": raw_job_data, 69 | "scatter": {}, 70 | "parent": {}, 71 | } 72 | dst_key = f"{dst_prefix}/{EXTENDED_JOB_DATA_FILE_NAME}" 73 | s3 = boto3.client("s3") 74 | s3.put_object(Bucket=dst_bucket, Key=dst_key, 75 | Body=json.dumps(job_data).encode("utf-8"), 76 | Tagging=SYSTEM_FILE_TAG) 77 | 78 | 79 | def handle_s3_launch(event: dict) -> dict: 80 | src_bucket = event["input_obj"]["job_file"]["bucket"] 81 | src_key = event["input_obj"]["job_file"]["key"] 82 | src_version = event["input_obj"]["job_file"]["version"] 83 | 84 | # if bucket versioning is suspended,version will be an empty string 85 | job_data = read_s3_object(src_bucket, src_key, src_version) 86 | 87 | repo = substitute_job_data(event["repo_template"], job_data) 88 | repo_bucket, repo_prefix = repo.split("/", 3)[2:] 89 | 90 | check_recursive_launch(src_bucket, src_key, repo_bucket, repo_prefix) 91 | 92 | copy_job_data_to_repo(src_bucket, src_key, src_version, repo_bucket, repo_prefix) 93 | write_extended_job_data_object(job_data, repo_bucket, repo_prefix) 94 | 95 | share_id = re.sub(r"[\W_]+", "", event["workflow_name"]) 96 | 97 | ret = { 98 | "index": event["input_obj"]["index"], 99 | "job_file": { 100 | "bucket": src_bucket, 101 | "key": src_key, 102 | "version": src_version, 103 | }, 104 | "repo": { 105 | "bucket": repo_bucket, 106 | "prefix": repo_prefix, 107 | "uri": repo, 108 | }, 109 | "prev_outputs": {}, 110 | "share_id": share_id, 111 | } 112 | 113 | return ret 114 | 115 | 116 | def lambda_handler(event: dict, context: object) -> dict: 117 | # event = { 118 | # workflow_name: str 119 | # repo_template: str 120 | # input_obj: {} 121 | # logging: { 122 | # branch: str 123 | # job_file_bucket: str 124 | # job_file_key: str 125 | # job_file_version: str 126 | # sfn_execution_id: str 127 | # step_name: str 128 | # workflow_name: str 129 | # } 130 | # } 131 | 132 | log_preamble(**event.pop("logging"), logger=logger) 133 | log_event(logger, event) 134 | 135 | if "AWS_STEP_FUNCTIONS_STARTED_BY_EXECUTION_ID" in event["input_obj"]: 136 | # this is a subpipe execution...nothing to do but pass along the input object 137 | logger.info("subpipe launch detected") 138 | ret = event["input_obj"] 139 | 140 | else: 141 | logger.info(f"s3 launch detected") 142 | ret = handle_s3_launch(event) 143 | 144 | logger.info(f"returning {str(ret)}") 145 | return ret 146 | -------------------------------------------------------------------------------- /bclaw_runner/src/runner/runner_main.py: -------------------------------------------------------------------------------- 1 | """ 2 | run stuff 3 | 4 | Usage: 5 | bclaw_runner.py [options] 6 | 7 | Options: 8 | -c COMMANDS command 9 | -f JSON_STRING reference files 10 | -i JSON_STRING input files 11 | -k STRING step skip condition: output, rerun, none [default: none] 12 | -m JSON_STRING Docker image spec 13 | -o JSON_STRING output files 14 | -q JSON_STRING QC check spec 15 | -r S3_PATH repository path 16 | -s SHELL unix shell to run commands in (bash | sh | sh-pipefail) [default: sh] 17 | -t JSON_STRING global s3 tags 18 | -h show help 19 | --version show version 20 | """ 21 | 22 | from functools import partial, partialmethod 23 | import json 24 | import logging.config 25 | import os 26 | from typing import Dict, List 27 | 28 | from docopt import docopt 29 | 30 | from .cache import get_reference_inputs 31 | from .string_subs import substitute, substitute_image_tag 32 | from .preamble import log_preamble 33 | from .qc_check import do_checks, abort_execution, QCFailure 34 | from .repo import Repository, SkipExecution 35 | from .instance import get_imdsv2_token, tag_this_instance, spot_termination_checker 36 | from .workspace import workspace, write_job_data_file, run_commands, UserCommandsFailed 37 | 38 | logging.basicConfig(level=logging.INFO) 39 | logger = logging.getLogger(__name__) 40 | 41 | 42 | def main(commands: List[str], 43 | image_spec: dict, 44 | inputs: Dict[str, str], 45 | outputs: Dict[str, str | Dict], 46 | qc: List[dict], 47 | references: Dict[str, str], 48 | repo_path: str, 49 | shell: str, 50 | skip: str, 51 | tags: Dict[str, str]) -> int: 52 | exit_code = 0 53 | try: 54 | repo = Repository(repo_path) 55 | 56 | if skip == "rerun": 57 | repo.check_for_previous_run() 58 | elif skip == "output": 59 | repo.check_files_exist(list(outputs.values())) 60 | 61 | repo.clear_run_status() 62 | 63 | job_data_obj = repo.read_job_data() 64 | 65 | jobby_commands = substitute(commands, job_data_obj) 66 | jobby_inputs = substitute(inputs, job_data_obj) 67 | jobby_outputs = substitute(outputs, job_data_obj) # this will recurse down to s3_tags 68 | jobby_references = substitute(references, job_data_obj) 69 | jobby_tags = substitute(tags, job_data_obj) 70 | 71 | jobby_image_spec = substitute_image_tag(image_spec, job_data_obj) 72 | 73 | with workspace() as wrk: 74 | # download references, link to workspace 75 | local_references = get_reference_inputs(jobby_references) 76 | 77 | # download inputs -> returns local filenames 78 | local_inputs = repo.download_inputs(jobby_inputs) 79 | local_outputs = {k.rstrip("!"): v["name"] for k, v in jobby_outputs.items()} 80 | 81 | subbed_commands = substitute(jobby_commands, 82 | local_inputs | 83 | local_outputs | 84 | local_references) 85 | 86 | local_job_data = write_job_data_file(job_data_obj, wrk) 87 | 88 | try: 89 | run_commands(jobby_image_spec, subbed_commands, wrk, local_job_data, shell) 90 | do_checks(qc) 91 | 92 | finally: 93 | repo.upload_outputs(jobby_outputs, jobby_tags) 94 | 95 | except UserCommandsFailed as uce: 96 | logger.error(str(uce)) 97 | exit_code = uce.exit_code 98 | 99 | except QCFailure as qcf: 100 | logger.error(str(qcf)) 101 | abort_execution(qcf.failures) 102 | 103 | except SkipExecution as se: 104 | logger.info(str(se)) 105 | pass 106 | 107 | except Exception as e: 108 | logger.exception("bclaw_runner error: ") 109 | exit_code = 199 110 | 111 | else: 112 | repo.put_run_status() 113 | logger.info("runner finished") 114 | 115 | return exit_code 116 | 117 | 118 | def cli() -> int: 119 | log_preamble() 120 | logger.info("---------- bclaw_runner starting ----------") 121 | get_imdsv2_token() 122 | tag_this_instance() 123 | 124 | # create custom log level for user commands 125 | # https://stackoverflow.com/a/55276759 126 | logging.USER_CMD = logging.INFO + 5 # between INFO and WARNING 127 | logging.addLevelName(logging.USER_CMD, "USER_CMD") 128 | logging.Logger.user_cmd = partialmethod(logging.Logger.log, logging.USER_CMD) 129 | logging.user_cmd = partial(logging.log, logging.USER_CMD) 130 | 131 | with spot_termination_checker(): 132 | args = docopt(__doc__, version=os.environ["BC_VERSION"]) 133 | 134 | commands = json.loads(args["-c"]) 135 | image = json.loads(args["-m"]) 136 | inputs = json.loads(args["-i"]) 137 | outputs = json.loads(args["-o"]) 138 | qc = json.loads(args["-q"]) 139 | refs = json.loads(args["-f"]) 140 | repo = args["-r"] 141 | shell = args["-s"] 142 | skip = args["-k"] 143 | tags = json.loads(args["-t"]) 144 | 145 | ret = main(commands, image, inputs, outputs, qc, refs, repo, shell, skip, tags) 146 | return ret 147 | -------------------------------------------------------------------------------- /bclaw_runner/tests/test_string_subs.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import pytest 4 | 5 | from ..src.runner.string_subs import lookup, substitute, substitute_image_tag 6 | 7 | 8 | @pytest.mark.parametrize("pattern, string, expect", [ 9 | (r"(one)", "one", "wun"), 10 | (r"(two)", "two", "2"), 11 | (r"(three)", "three", ""), 12 | (r"(four)", "four", "False"), 13 | (r"(not_found)", "not_found", "not_found") 14 | ]) 15 | def test_lookup(pattern, string, expect): 16 | spec = { 17 | "one": "wun", 18 | "two": 2, 19 | "three": "", 20 | "four": False, 21 | } 22 | match = re.match(pattern, string) 23 | result = lookup(match, spec) 24 | assert isinstance(result, str) 25 | assert result == expect 26 | 27 | 28 | def test_substitute_string(): 29 | subs = { 30 | "w": "was", 31 | "x": "am", 32 | "y": { 33 | "z": "very", 34 | "t": "singular", 35 | }, 36 | "p": ["exemplar", {"what": "model"}, "blueprint"], 37 | "q": ["modern", "major"], 38 | } 39 | target = "I ${x} the ${y.z} ${p[1].what} of a ${q} ${general}" 40 | result = substitute(target, subs) 41 | expect = "I am the very model of a ['modern', 'major'] ${general}" 42 | assert result == expect 43 | 44 | 45 | def test_substitute_nested(): 46 | subs = { 47 | "metadata": { 48 | "received": "2022-06-01", 49 | }, 50 | } 51 | target = "received on ${metadata.received}" 52 | result = substitute(target, subs) 53 | expect = "received on 2022-06-01" 54 | assert result == expect 55 | 56 | 57 | def test_substitute_falsy_values(): 58 | subs = { 59 | "job": { 60 | "boolean_T": True, 61 | "boolean_F": False, 62 | "null": None, 63 | "zero": 0, 64 | "empty_string": "", 65 | } 66 | } 67 | target = "command ${job.boolean_T} ${job.boolean_F} ${job.null} ${job.zero} <${job.empty_string}> ${job.not_found}" 68 | result = substitute(target, subs) 69 | expect = "command True False ${job.null} 0 <> ${job.not_found}" 70 | assert result == expect 71 | 72 | 73 | def test_substitute_recursion(): 74 | subs = { 75 | "a": 99, 76 | "b": "two", 77 | } 78 | target = { 79 | "one": [ 80 | { 81 | "three": "${a}", 82 | "four": "${b}", 83 | }, 84 | [ 85 | "${a}", 86 | "${b}", 87 | ], 88 | "${a} ${b}", 89 | ], 90 | "two": { 91 | "seven": { 92 | "five": "${a}", 93 | "six": "${b}", 94 | }, 95 | "eight": [ 96 | "${a}", 97 | "${b}", 98 | ], 99 | "nine": "${a} ${b}" 100 | }, 101 | } 102 | result = substitute(target, subs) 103 | expect = { 104 | "one": [ 105 | { 106 | "three": "99", 107 | "four": "two", 108 | }, 109 | [ 110 | "99", 111 | "two", 112 | ], 113 | "99 two", 114 | ], 115 | "two": { 116 | "seven": { 117 | "five": "99", 118 | "six": "two", 119 | }, 120 | "eight": [ 121 | "99", 122 | "two", 123 | ], 124 | "nine": "99 two" 125 | }, 126 | } 127 | assert result == expect 128 | 129 | 130 | @pytest.mark.parametrize("original, expect", [ 131 | ("docker.io/library/single:${sub}", "docker.io/library/single:tag"), 132 | ("no_${a}_registry:${sub}", "no_eh_registry:tag"), 133 | ("no_registry:no_subs", "no_registry:no_subs"), 134 | ("public.ecr.aws/docker/library/multi:${a}_${b}_${c}", "public.ecr.aws/docker/library/multi:eh_bee_sea"), 135 | ("123456789012.dkr.ecr.us-east-1.amazonaws.com/no:subs", "123456789012.dkr.ecr.us-east-1.amazonaws.com/no:subs"), 136 | ("123456789012.dkr.ecr.us-east-1.amazonaws.com/no_tags", "123456789012.dkr.ecr.us-east-1.amazonaws.com/no_tags"), 137 | ("myregistryhost:5000/fedora/httpd:${sub}", "myregistryhost:5000/fedora/httpd:tag"), # https://docs.docker.com/engine/reference/commandline/tag/#tag-an-image-for-a-private-repository 138 | ("probably:${a}/highly/${b}/illegal/${c}:${sub}", "probably:${a}/highly/${b}/illegal/sea:tag"), 139 | ]) 140 | def test_substitute_image_tag(original, expect): 141 | spec = { 142 | "sub": "tag", 143 | "a": "eh", 144 | "b": "bee", 145 | "c": "sea", 146 | } 147 | image_spec = {"name": original, "auth": "doesnt_change"} 148 | result = substitute_image_tag(image_spec, spec) 149 | assert result["name"] == expect 150 | assert result["auth"] == "doesnt_change" 151 | 152 | 153 | def test_substitute_tagged_output(): 154 | output_spec = { 155 | "name": "fake_${a}_filename", 156 | "s3_tags": { 157 | "tag1": "value_${a}", 158 | "tag2": "value_${b}", 159 | } 160 | } 161 | 162 | subs = { 163 | "a": 99, 164 | "b": "two", 165 | } 166 | 167 | expect = { 168 | "name": "fake_99_filename", 169 | "s3_tags": { 170 | "tag1": "value_99", 171 | "tag2": "value_two", 172 | } 173 | } 174 | 175 | result = substitute(output_spec, subs) 176 | assert result == expect 177 | -------------------------------------------------------------------------------- /lambda/src/subpipes/subpipes.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | from contextlib import closing 3 | from functools import partial 4 | import json 5 | import logging 6 | import re 7 | 8 | import boto3 9 | 10 | from lambda_logs import log_preamble, log_event 11 | from repo_utils import SYSTEM_FILE_TAG 12 | from substitutions import substitute_job_data 13 | 14 | logger = logging.getLogger() 15 | logger.setLevel(logging.INFO) 16 | 17 | 18 | def get_s3_object(s3_uri: str) -> dict: 19 | logger.info(f"reading {s3_uri}") 20 | bucket, key = s3_uri.split("/", 3)[2:] 21 | obj = boto3.resource("s3").Object(bucket, key) 22 | response = obj.get() 23 | with closing(response["Body"]) as fp: 24 | ret = json.load(fp) 25 | return ret 26 | 27 | 28 | # this is only used to write the _JOB_DATA_ object in the subpipe repo 29 | def put_s3_object(s3_uri: str, body: bytes) -> None: 30 | logger.info(f"writing {s3_uri}") 31 | bucket, key = s3_uri.split("/", 3)[2:] 32 | obj = boto3.resource("s3").Object(bucket, key) 33 | obj.put(Body=body, Tagging=SYSTEM_FILE_TAG) 34 | 35 | 36 | def copy_file_impl(spec: str, src_repo_uri: str, dst_repo_uri: str) -> None: 37 | try: 38 | src_file, dst_file = re.split(r"\s*->\s*", spec) 39 | except ValueError: 40 | src_file = dst_file = spec 41 | 42 | src_uri = f"{src_repo_uri}/{src_file}" 43 | dst_uri = f"{dst_repo_uri}/{dst_file}" 44 | 45 | src_bucket, src_key = src_uri.split("/", 3)[2:] 46 | dst_bucket, dst_key = dst_uri.split("/", 3)[2:] 47 | 48 | logger.info(f"copying s3://{src_bucket}/{src_key} to s3://{dst_bucket}/{dst_key}") 49 | 50 | copy_src = { 51 | "Bucket": src_bucket, 52 | "Key": src_key 53 | } 54 | dst_obj = boto3.resource("s3").Object(dst_bucket, dst_key) 55 | dst_obj.copy(copy_src) 56 | 57 | 58 | def lambda_handler(event: dict, context: object) -> dict: 59 | # submit event = { 60 | # repo: str 61 | # job_data: str | None 62 | # submit: str 63 | # step_name: str 64 | # logging: { 65 | # branch: str 66 | # job_file_bucket: str 67 | # job_file_key: str 68 | # job_file_version: str 69 | # sfn_execution_id: str 70 | # step_name: str 71 | # workflow_name: str 72 | # } 73 | # } 74 | # retrieve event = { 75 | # repo: str 76 | # retrieve: str 77 | # subpipe: { 78 | # sub_repo: str 79 | # } 80 | # logging: { 81 | # branch: str 82 | # job_file_bucket: str 83 | # job_file_key: str 84 | # job_file_version: str 85 | # sfn_execution_id: str 86 | # step_name: str 87 | # workflow_name: str 88 | # } 89 | # } 90 | 91 | log_preamble(**event.pop("logging"), logger=logger) 92 | log_event(logger, event) 93 | 94 | parent_repo = event["repo"] 95 | parent_job_data = get_s3_object(f"{parent_repo}/_JOB_DATA_") 96 | 97 | if "submit" in event: 98 | # establish subpipe repo 99 | sub_repo = f"{parent_repo}/{event['step_name']}" 100 | logger.info(f"{sub_repo=}") 101 | 102 | if (sub_job_data_uri := event.get("job_data")) is not None: 103 | if not sub_job_data_uri.startswith("s3://"): 104 | sub_job_data_uri = f"{parent_repo}/{sub_job_data_uri}" 105 | logger.info(f"{sub_job_data_uri=}") 106 | sub_job_data = get_s3_object(sub_job_data_uri) 107 | 108 | else: 109 | logger.info("using parent job data for subpipe") 110 | sub_job_data = parent_job_data["job"] 111 | 112 | # create job data for subpipe 113 | sub_job_data = { 114 | "job": sub_job_data, 115 | "parent": {}, 116 | "scatter": {}, 117 | } 118 | 119 | # write job data to subpipe repo 120 | sub_job_data_dst = f"{sub_repo}/_JOB_DATA_" 121 | logger.info(f"writing job data to {sub_job_data_dst}") 122 | put_s3_object(sub_job_data_dst, json.dumps(sub_job_data).encode("utf-8")) 123 | 124 | # get submit strings -> spec strings 125 | spec_strings = json.loads(event["submit"]) 126 | 127 | src_repo_uri = parent_repo 128 | dst_repo_uri = sub_repo 129 | 130 | elif "retrieve" in event: 131 | # get retrieve strings -> spec strings 132 | spec_strings = json.loads(event["retrieve"]) 133 | 134 | # get subpipe repo 135 | src_repo_uri = sub_repo = event["subpipe"]["sub_repo"] 136 | dst_repo_uri = parent_repo 137 | 138 | else: 139 | raise RuntimeError("unknown input type") 140 | 141 | if spec_strings: 142 | # substitute job data into spec strings 143 | subbed_specs = substitute_job_data(spec_strings, parent_job_data) 144 | 145 | # copy files from src repo to dest 146 | with ThreadPoolExecutor(max_workers=len(subbed_specs)) as executor: 147 | copy_file = partial(copy_file_impl, 148 | src_repo_uri=src_repo_uri, 149 | dst_repo_uri=dst_repo_uri) 150 | _ = list(executor.map(copy_file, subbed_specs)) 151 | 152 | # return sub repo 153 | sub_repo_bucket, sub_repo_prefix = sub_repo.split("/", 3)[2:] 154 | ret = { 155 | "sub_repo": { 156 | "bucket": sub_repo_bucket, 157 | "prefix": sub_repo_prefix, 158 | "uri": sub_repo, 159 | } 160 | } 161 | 162 | return ret 163 | -------------------------------------------------------------------------------- /doc/notifications.md: -------------------------------------------------------------------------------- 1 | # BayerCLAW notifications 2 | 3 | BayerCLAW is able to send notifications of about job status to users through Amazon's Simple 4 | Notification Service (SNS). Through SNS, users may receive email or SMS text messages when 5 | a job is received for processing or when processing starts, finishes successfully, or 6 | fails. 7 | 8 | ### Subscribing to BayerCLAW notifications 9 | 10 | The default BayerCLAW notification topic is named `bayerclaw2-core-notifications`[1](#f1). 11 | All workflows send notifications to this topic. To receive notifications from a workflow, users must 12 | subscribe to this topic. Using the AWS console: 13 | 14 | 1. Navigate to Amazon SNS → Subscriptions → Create subscription. 15 | 2. Find the `bayerclaw2-core-notifications` topic in the Topic ARN search box. 16 | 3. Choose a protocol, such as Email or SMS. Other protocols, such as AWS Lambda or Amazon SQS, 17 | are also available to use for automation (see [Automation](#automation)). 18 | 4. Enter an endpoint: your email address or mobile number for SMS. 19 | 5. (Optional) To subscribe to a subset of messages, enter a filter policy. 20 | See [Filtering notifications](#filtering-notifications) for details. 21 | 6. Click `Create subscription`. 22 | 23 | AWS will send you an email or text message requesting confirmation of your subscription. You must 24 | accept to start receiving notifications. 25 | 26 | Notification messages will have a format similar to this: 27 | ```yaml 28 | Job input_file_09876543 ('input_file.json') on workflow sample-workflow has finished. 29 | --- 30 | details: 31 | workflow_name: sample-workflow 32 | execution_id: input_file_09876543 33 | job_status: SUCCEEDED 34 | job_data: s3://bclaw-main-launcher-123456789012/sample-workflow/input_file.json 35 | job_data_version: 09876543211234567890 36 | ``` 37 | 38 | See the [SNS documentation](https://docs.aws.amazon.com/sns/latest/dg/sns-create-subscribe-endpoint-to-topic.html) 39 | for more information on SNS subscriptions. 40 | 41 | ### Filtering notifications 42 | 43 | It is very unlikely that you will want to be notified of every event in every workflow that you run. 44 | SNS messages can be filtered based on attributes attached to the message. Filters are 45 | expressed as filter policies that are added to your subscription. 46 | 47 | BayerCLAW provides the following attributes for filtering messages: 48 | 49 | - `workflow_name`: The name of the workflow that sent the notification. 50 | - `status`: The value of the `job_status` detail as shown in the sample 51 | message above. The possible values of `status` are: 52 | - RECEIVED: your job data file has been received by the workflow. 53 | - RUNNING: execution of your job has started. 54 | - SUCCEEDED: execution finished successfully. 55 | - FAILED: execution finished unsuccessfully. 56 | - ABORTED: the job was aborted, possibly on user request. 57 | - TIMED_OUT: if, somehow, your job manages to run for more than a year, you'll see this one... 58 | - `execution_id`: The ID of the Step Functions execution that sent the notification. 59 | - `job_file_bucket`, `job_file_key` and `job_file_version`: Together, these specify the job data file that 60 | launched the execution in question. 61 | 62 | Filter policies are JSON-formatted documents. 63 | As an example, a filter policy that only allows messages from jobs that failed or were aborted 64 | on workflow `sample-workflow` would look like this: 65 | 66 | ```json5 67 | { 68 | "workflow_name": ["sample_workflow"], 69 | "status": ["FAILED", "ABORTED"] 70 | } 71 | ``` 72 | 73 | For more information on SNS filter policies, see the AWS documentation 74 | [here](https://docs.aws.amazon.com/sns/latest/dg/sns-subscription-filter-policies.html) and 75 | [here](https://docs.aws.amazon.com/sns/latest/dg/message-filtering-apply.html). 76 | 77 | ### Automation 78 | 79 | Besides sending messages to users, SNS can be used to trigger AWS Lambda functions which can in turn 80 | launch follow-on processes or send the notifications on to services like Slack. To facilitate this, 81 | BayerCLAW notification messages are actually YAML-formatted data structures[2](#f2). 82 | 83 | In Python, an BayerCLAW message can be parsed using the [PyYAML package](https://pypi.org/project/PyYAML/) as follows: 84 | 85 | ```python 86 | import yaml 87 | ... 88 | result = list(yaml.safe_load_all(message)) 89 | ``` 90 | 91 | Using this command, message that looks like this: 92 | 93 | ```yaml 94 | Job input_file_09876543 ('input_file.json') on workflow sample-workflow has finished. 95 | --- 96 | details: 97 | workflow_name: sample-workflow 98 | execution_id: input_file_09876543 99 | job_status: SUCCEEDED 100 | job_data: s3://bclaw-main-launcher-123456789012/sample-workflow/input_file.json 101 | job_data_version: 09876543211234567890 102 | ``` 103 | 104 | will become a data structure that looks like this: 105 | 106 | ```python 107 | [ 108 | "Job input_file_09876543 ('input_file.json') on workflow sample-workflow has finished.", 109 | { 110 | "details": { 111 | "workflow_name": "sample-workflow", 112 | "job_status": "SUCCEEDED", 113 | "execution_id": "input_file_09876543", 114 | "job_data": "s3://bclaw-main-launcher-123456789012/sample-workflow/input_file.json", 115 | "job_data_version": "09876543211234567890", 116 | } 117 | } 118 | ] 119 | ``` 120 | 121 |
122 | 123 | 1 If you have multiple BayerCLAW installations in your account, each installation will have 124 | a topic with a corresponding name[↵](#a1) 125 | 126 | 2 Technically, a pair of YAML documents: a bare string and a mapping. [↵](#a2) 127 | 128 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines 2 | 3 | ## Pull requests are always welcome 4 | 5 | We're trying very hard to keep our systems simple, lean and focused. We don't want them to be everything for everybody. This means that we might decide 6 | against incorporating a new request. 7 | 8 | 9 | ## Create issues... 10 | 11 | Any significant change should be documented as a GitHub issue before anybody starts working on it. 12 | 13 | 14 | ### ...but check for existing issues first! 15 | 16 | Please take a moment to check that an issue doesn't already exist documenting your request. If it does, it never hurts to add a quick "+1" or "I need this too". This will help prioritize the most common requests. 17 | 18 | 19 | ## Conventions 20 | 21 | Fork the repository and make changes on your fork on a branch: 22 | 23 | 1. Create the right type of issue (defect, enhancement, test, etc) 24 | 2. Name the branch N-something where N is the number of the issue. 25 | 26 | Note that the maintainers work on branches in this repository. 27 | 28 | Work hard to ensure your pull request is valid. This includes code quality, clear naming, and including unit tests. Please read the Code Of Conduct at the bottom of this file. 29 | 30 | Pull request descriptions should be as clear as possible and include a reference to all the issues that they address. In GitHub, you can reference an 31 | issue by adding a line to your commit description that follows the format: 32 | 33 | `Fixes #N` 34 | 35 | where N is the issue number. 36 | 37 | 38 | ## Merge approval 39 | 40 | Repository maintainers will review the pull request and make sure it provides the correct level of code quality & correctness. 41 | 42 | 43 | 44 | ## How are decisions made? 45 | 46 | Short answer: with pull requests to this repository. 47 | 48 | All decisions, big and small, follow the same 3 steps: 49 | 50 | 1. Open a pull request. Anyone can do this. 51 | 52 | 2. Discuss the pull request. Anyone can do this. 53 | 54 | 3. Accept or refuse a pull request. The relevant maintainers do this (see below "Who decides what?") 55 | 56 | 1. Accepting pull requests 57 | 58 | 1. If the pull request appears to be ready to merge, approve it. 59 | 60 | 2. If the pull request has some small problems that need to be changed, make a comment addressing the issues. 61 | 62 | 3. If the changes needed to a PR are small, you can add a "LGTM once the following comments are addressed..." this will reduce needless back and forth. 63 | 64 | 4. If the PR only needs a few changes before being merged, any MAINTAINER can make a replacement PR that incorporates the existing commits and fixes the problems before a fast track merge. 65 | 66 | 2. Closing pull requests 67 | 68 | 1. If a PR appears to be abandoned, after having attempted to contact the original contributor, then a replacement PR may be made. Once the replacement PR is made, any contributor may close the original one. 69 | 70 | 2. If you are not sure if the pull request implements a good feature or you do not understand the purpose of the PR, ask the contributor to provide more documentation. If the contributor is not able to adequately explain the purpose of the PR, the PR may be closed by any MAINTAINER. 71 | 72 | 3. If a MAINTAINER feels that the pull request is sufficiently architecturally flawed, or if the pull request needs significantly more design discussion before being considered, the MAINTAINER should close the pull request with a short explanation of what discussion still needs to be had. It is important not to leave such pull requests open, as this will waste both the MAINTAINER's time and the contributor's time. It is not good to string a contributor on for weeks or months, having them make many changes to a PR that will eventually be rejected. 73 | 74 | 75 | ## Who decides what? 76 | 77 | All decisions are pull requests, and the relevant maintainers make decisions by accepting or refusing pull requests. Review and acceptance by anyone is 78 | denoted by adding a comment in the pull request: `LGTM`. However, only currently listed `MAINTAINERS` are counted towards the required majority. 79 | 80 | The maintainers will be listed in the MAINTAINER file, all these people will be in the employment of Bayer. 81 | 82 | 83 | ## I'm a maintainer, should I make pull requests too? 84 | 85 | Yes. Nobody should ever push to master directly. All changes should be made through a pull request. 86 | 87 | ## Code Of Conduct 88 | 89 | As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities. 90 | 91 | We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 92 | 93 | Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct. 94 | 95 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team. 96 | 97 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers. 98 | 99 | This Code of Conduct is adapted from the Contributor Covenant, version 1.0.0, available at https://www.contributor-covenant.org/version/1/0/0/code-of-conduct.html 100 | -------------------------------------------------------------------------------- /lambda/tests/notifications/test_notifications.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import boto3 4 | import moto 5 | import pytest 6 | import yaml 7 | 8 | from ...src.notifications.notifications import (make_state_change_message, make_message_attributes, 9 | make_sns_payload, lambda_handler) 10 | 11 | WORKFLOW_NAME = "test_workflow" 12 | 13 | REGION = "us-east-1" 14 | EXECUTION_NAME = "12345678-etc-etc" 15 | STATE_MACHINE_NAME = "testStateMachine" 16 | STATE_MACHINE_ARN = f"arn:aws:states:{REGION}:123456789012:stateMachine:{STATE_MACHINE_NAME}" 17 | EXECUTION_ARN = f"arn:aws:states:{REGION}:123456789012:execution:{STATE_MACHINE_NAME}:{EXECUTION_NAME}" 18 | 19 | LAUNCHER_BUCKET = "test-bucket" 20 | JOB_DATA_KEY = "path/to/job.json" 21 | JOB_DATA_VERSION = "1234567890" 22 | JOB_DATA_URI = f"s3://{LAUNCHER_BUCKET}/{JOB_DATA_KEY}" 23 | 24 | 25 | @pytest.fixture(scope="module") 26 | def state_change_event_factory(): 27 | input_obj = { 28 | "job_file": { 29 | "bucket": LAUNCHER_BUCKET, 30 | "key": JOB_DATA_KEY, 31 | "version": JOB_DATA_VERSION, 32 | }, 33 | "index": "main", 34 | } 35 | 36 | def _event_impl(status: str = "UNKNOWN") -> dict: 37 | ret = { 38 | "detail": { 39 | "executionArn": EXECUTION_ARN, 40 | "stateMachineArn": STATE_MACHINE_ARN, 41 | "name": EXECUTION_NAME, 42 | "status": status, 43 | "input": json.dumps(input_obj), 44 | "inputDetails": { 45 | "included": True, 46 | }, 47 | }, 48 | } 49 | return ret 50 | 51 | return _event_impl 52 | 53 | 54 | @pytest.mark.parametrize("status, action", [ 55 | ("RUNNING", "has started."), 56 | ("SUCCEEDED", "has finished."), 57 | ("FAILED", "has failed."), 58 | ("ABORTED", "has been aborted."), 59 | ("TIMED_OUT", "has timed out."), 60 | ]) 61 | def test_make_state_change_message(status, action): 62 | attributes = { 63 | "status": { 64 | "DataType": "String", 65 | "StringValue": status, 66 | }, 67 | "workflow_name": { 68 | "DataType": "String", 69 | "StringValue": WORKFLOW_NAME, 70 | }, 71 | "execution_id": { 72 | "DataType": "String", 73 | "StringValue": EXECUTION_NAME, 74 | }, 75 | "job_file_bucket": { 76 | "DataType": "String", 77 | "StringValue": LAUNCHER_BUCKET, 78 | }, 79 | "job_file_key": { 80 | "DataType": "String", 81 | "StringValue": JOB_DATA_KEY, 82 | }, 83 | "job_file_version": { 84 | "DataType": "String", 85 | "StringValue": JOB_DATA_VERSION, 86 | }, 87 | } 88 | 89 | expected_details = { 90 | "details": { 91 | "workflow_name": WORKFLOW_NAME, 92 | "execution_id": EXECUTION_NAME, 93 | "job_status": status, 94 | "job_data": JOB_DATA_URI, 95 | "job_data_version": JOB_DATA_VERSION, 96 | }, 97 | } 98 | 99 | message = make_state_change_message(attributes) 100 | text, details = yaml.safe_load_all(message) 101 | 102 | assert WORKFLOW_NAME in text 103 | assert EXECUTION_NAME in text 104 | assert "job.json" in text 105 | assert text.endswith(action) 106 | 107 | assert details == expected_details 108 | 109 | 110 | def test_make_message_attributes(state_change_event_factory): 111 | event = state_change_event_factory(status="FAKE_STATUS") 112 | result = make_message_attributes(event) 113 | expect = { 114 | "status": { 115 | "DataType": "String", 116 | "StringValue": "FAKE_STATUS", 117 | }, 118 | "workflow_name": { 119 | "DataType": "String", 120 | "StringValue": STATE_MACHINE_NAME, 121 | }, 122 | "execution_id": { 123 | "DataType": "String", 124 | "StringValue": EXECUTION_NAME, 125 | }, 126 | "job_file_bucket": { 127 | "DataType": "String", 128 | "StringValue": LAUNCHER_BUCKET, 129 | }, 130 | "job_file_key": { 131 | "DataType": "String", 132 | "StringValue": JOB_DATA_KEY, 133 | }, 134 | "job_file_version": { 135 | "DataType": "String", 136 | "StringValue": JOB_DATA_VERSION, 137 | }, 138 | } 139 | assert result == expect 140 | 141 | 142 | def test_make_sns_payload(state_change_event_factory, monkeypatch): 143 | monkeypatch.setenv("TOPIC_ARN", "arn:of:fake:topic") 144 | attributes = { 145 | "status": { 146 | "DataType": "String", 147 | "StringValue": "FAKE_STATUS" 148 | }, 149 | "workflow_name": { 150 | "DataType": "String", 151 | "StringValue": WORKFLOW_NAME 152 | } 153 | } 154 | result = make_sns_payload("test message", attributes) 155 | expect = { 156 | "TopicArn": "arn:of:fake:topic", 157 | "Message": "test message", 158 | "Subject": f"{WORKFLOW_NAME}: job fake_status", 159 | "MessageAttributes": attributes, 160 | } 161 | assert result == expect 162 | 163 | 164 | @moto.mock_aws 165 | def test_lambda_handler(monkeypatch, state_change_event_factory): 166 | monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") 167 | sns = boto3.client("sns") 168 | response0 = sns.create_topic(Name="test_topic") 169 | 170 | monkeypatch.setenv("TOPIC_ARN", response0["TopicArn"]) 171 | event = state_change_event_factory(status="SUCCEEDED") 172 | 173 | response = lambda_handler(event, {}) 174 | assert response["ResponseMetadata"]["HTTPStatusCode"] == 200 175 | --------------------------------------------------------------------------------