├── docs ├── source │ ├── _static │ │ └── .gitkeep │ ├── README.rst │ ├── welder.jpg │ ├── util.rst │ ├── core.rst │ ├── cli.rst │ ├── README │ ├── index.rst │ └── conf.py ├── make.bat └── Makefile ├── setup.cfg ├── welder.jpg ├── tests ├── test_data │ ├── echoer │ │ ├── tasks │ │ │ ├── second.txt │ │ │ └── first.txt │ │ ├── run │ │ └── values.json │ ├── pipewelder.json │ └── pipeline_definition.json ├── test_core.py └── test_cli.py ├── requirements.txt ├── .travis.yml ├── Makefile ├── NOTICE ├── .ppt-version ├── pipewelder ├── __init__.py ├── util.py ├── metadata.py ├── cli.py ├── connection.py ├── translator.py └── core.py ├── requirements-dev.txt ├── .editorconfig ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── tox.ini ├── README.rst ├── pavement.py └── setup.py /docs/source/_static/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/README.rst: -------------------------------------------------------------------------------- 1 | ../../README.rst -------------------------------------------------------------------------------- /docs/source/welder.jpg: -------------------------------------------------------------------------------- 1 | ../../welder.jpg -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /welder.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimpleFinance/pipewelder/HEAD/welder.jpg -------------------------------------------------------------------------------- /tests/test_data/echoer/tasks/second.txt: -------------------------------------------------------------------------------- 1 | This text should also appear in the output directory. 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Python 2.6 compatibility 2 | # argparse==1.2.1 3 | six==1.9.0 4 | boto==2.36.0 5 | -------------------------------------------------------------------------------- /docs/source/util.rst: -------------------------------------------------------------------------------- 1 | Pipewelder Util 2 | =============== 3 | 4 | .. automodule:: pipewelder.util 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/core.rst: -------------------------------------------------------------------------------- 1 | Pipewelder Core API 2 | =================== 3 | 4 | .. automodule:: pipewelder.core 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/cli.rst: -------------------------------------------------------------------------------- 1 | Pipewelder Command-Line Interface 2 | ================================= 3 | 4 | .. automodule:: pipewelder.cli 5 | :members: 6 | -------------------------------------------------------------------------------- /tests/test_data/echoer/tasks/first.txt: -------------------------------------------------------------------------------- 1 | This is the first task file for the Echoer pipeline. 2 | 3 | This text should appear in the output directory for this pipeline. 4 | -------------------------------------------------------------------------------- /docs/source/README: -------------------------------------------------------------------------------- 1 | Run `sphinx-apidoc -o . ../../pipewelder' in this directory. 2 | 3 | This will generate `modules.rst' and `pipewelder.rst'. 4 | 5 | Then include `modules.rst' in your `index.rst' file. 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 2.7 3 | env: 4 | - TOXENV=py26 5 | - TOXENV=py27 6 | - TOXENV=py33 7 | - TOXENV=py34 8 | - TOXENV=pypy 9 | - TOXENV=docs 10 | install: 11 | - pip install -r requirements-dev.txt 12 | script: 13 | - tox 14 | -------------------------------------------------------------------------------- /tests/test_data/pipewelder.json: -------------------------------------------------------------------------------- 1 | { 2 | 3 | "defaults" : { 4 | "dirs" : ["*"], 5 | "region" : "us-west-2", 6 | "template" : "pipeline_definition.json" 7 | }, 8 | 9 | "dev" : { 10 | "values" : { 11 | "myEnv" : "dev" 12 | } 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VENV := $(CURDIR)/venv 2 | export PATH := $(VENV)/bin:$(PATH) 3 | 4 | test: install 5 | paver test_all 6 | 7 | install: $(VENV) 8 | $(VENV)/bin/pip install -r requirements-dev.txt 9 | 10 | $(VENV): 11 | virtualenv $@ 12 | 13 | requirements.txt: 14 | pip freeze > $@ 15 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Pipewelder 2 | ========== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | README 8 | core 9 | util 10 | cli 11 | 12 | .. only:: html 13 | 14 | Indices and tables 15 | ================== 16 | 17 | * :ref:`genindex` 18 | * :ref:`modindex` 19 | * :ref:`search` 20 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Pipewelder 2 | Copyright 2015 Simple Finance Technology Corporation 3 | 4 | The banner image in the documentation is cropped from an original 5 | photo owned by the PEO ACWA: 6 | https://flic.kr/p/ejYqQe 7 | 8 | Package layout is based on a template by Sean Fisk: 9 | https://github.com/seanfisk/python-project-template 10 | -------------------------------------------------------------------------------- /.ppt-version: -------------------------------------------------------------------------------- 1 | # This file specifies the version of the Python Project Template 2 | # (https://github.com/seanfisk/python-project-template) from which 3 | # this project was created. It is here for the purposes of possibly 4 | # updating this project to use a newer version of the template. Please 5 | # do not remove this file. 6 | df37ac91c8293f907ac755822702303d75afee3f 7 | -------------------------------------------------------------------------------- /pipewelder/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # flake8: noqa 3 | """ 4 | Scheduled task execution on top of AWS Data Pipeline 5 | """ 6 | import pipewelder.connection 7 | from pipewelder import metadata 8 | from pipewelder.core import * 9 | 10 | __version__ = metadata.version 11 | __author__ = metadata.authors[0] 12 | __license__ = metadata.license 13 | __copyright__ = metadata.copyright 14 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # Runtime requirements 2 | --requirement requirements.txt 3 | 4 | # Testing 5 | pytest==2.6.4 6 | py==1.4.19 7 | mock==1.0.1 8 | tox==1.8.1 9 | 10 | # Linting 11 | flake8==2.3.0 12 | mccabe==0.3 13 | pep8==1.6.2 14 | pyflakes==0.8.1 15 | 16 | # Documentation 17 | Sphinx==1.2 18 | docutils==0.11 19 | Jinja2==2.7.1 20 | MarkupSafe==0.18 21 | Pygments==1.6 22 | 23 | # Miscellaneous 24 | Paver==1.2.3 25 | colorama==0.2.7 26 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # -*- mode: conf-unix; -*- 2 | 3 | # EditorConfig is awesome: http://EditorConfig.org 4 | 5 | # top-most EditorConfig file 6 | root = true 7 | 8 | # defaults 9 | [*] 10 | insert_final_newline = true 11 | 12 | # 4 space indentation 13 | [*.{ini,py,py.tpl,rst}] 14 | indent_style = space 15 | indent_size = 4 16 | 17 | # 4-width tabbed indentation 18 | [*.{sh,bat.tpl,Makefile.tpl}] 19 | indent_style = tab 20 | indent_size = 4 21 | 22 | # and travis does its own thing 23 | [.travis.yml] 24 | indent_style = space 25 | indent_size = 2 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Emacs rope configuration 2 | .ropeproject 3 | .project 4 | .pydevproject 5 | .settings 6 | 7 | # pyenv version file 8 | .python-version 9 | 10 | # Python 11 | *.py[co] 12 | 13 | ## Packages 14 | *.egg 15 | *.egg-info 16 | dist 17 | build 18 | eggs 19 | parts 20 | bin 21 | var 22 | sdist 23 | deb_dist 24 | develop-eggs 25 | .installed.cfg 26 | 27 | ## Installer logs 28 | pip-log.txt 29 | 30 | ## Unit test / coverage reports 31 | .coverage 32 | .tox 33 | 34 | ## Translations 35 | *.mo 36 | 37 | ## paver generated files 38 | /paver-minilib.zip 39 | 40 | ## virtualenv 41 | /venv 42 | -------------------------------------------------------------------------------- /tests/test_data/echoer/run: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | from glob import glob 5 | from shutil import copyfile 6 | 7 | OUTPUT_DIR = os.environ['OUTPUT1_STAGING_DIR'] 8 | 9 | 10 | def write_to_output_dir(task_path): 11 | basename = os.path.basename(task_path) 12 | destination = os.path.join(OUTPUT_DIR, basename) 13 | copyfile(task_path, destination) 14 | 15 | 16 | def main(): 17 | print("Echoer writes some files to", OUTPUT_DIR) 18 | for txtfile in glob("tasks/*.txt"): 19 | write_to_output_dir(txtfile) 20 | 21 | 22 | if __name__ == "__main__": 23 | main() 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Simple Finance Technology Corp. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # Informational files 2 | include README.rst 3 | include welder.jpg 4 | include LICENSE 5 | include NOTICE 6 | 7 | # Include docs and tests. It's unclear whether convention dictates 8 | # including built docs. However, Sphinx doesn't include built docs, so 9 | # we are following their lead. 10 | graft docs 11 | prune docs/build 12 | graft tests 13 | 14 | # Exclude any compile Python files (most likely grafted by tests/ directory). 15 | global-exclude *.pyc 16 | 17 | # Setup-related things 18 | include pavement.py 19 | include requirements-dev.txt 20 | include requirements.txt 21 | include setup.py 22 | include tox.ini 23 | -------------------------------------------------------------------------------- /pipewelder/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import contextlib 3 | import json 4 | 5 | 6 | @contextlib.contextmanager 7 | def cd(new_path): 8 | """ 9 | Change to a different directory within a limited context. 10 | """ 11 | saved_path = os.getcwd() 12 | os.chdir(new_path) 13 | yield 14 | os.chdir(saved_path) 15 | 16 | 17 | def load_json(filename): 18 | with open(filename) as f: 19 | try: 20 | data = json.load(f) 21 | except ValueError as e: 22 | raise ValueError("Unable to parse '{0}' as json; {1}" 23 | .format(filename, e)) 24 | return data 25 | -------------------------------------------------------------------------------- /pipewelder/metadata.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Project metadata 3 | 4 | Information describing the project. 5 | """ 6 | 7 | # The package name, which is also the "UNIX name" for the project. 8 | package = 'pipewelder' 9 | project = "Pipewelder" 10 | project_no_spaces = project.replace(' ', '') 11 | version = '0.1.4' 12 | description = 'Scheduled task execution on top of AWS Data Pipeline' 13 | authors = ['Jeff Klukas'] 14 | authors_string = ', '.join(authors) 15 | emails = ['klukas@simple.com'] 16 | license = 'Apache V2.0' 17 | copyright = '2015 Simple Finance Technology Corporation' 18 | url = 'http://github.com/jklukas/pipewelder' 19 | -------------------------------------------------------------------------------- /tests/test_data/echoer/values.json: -------------------------------------------------------------------------------- 1 | { 2 | 3 | "values": { 4 | "myName" : "echoer", 5 | "myDescription" : "an example pipeline that simply prints tasks files to STDOUT", 6 | "myEnv" : "this will get replaced by pipewelder.json", 7 | "myS3InputDir": "s3://pipewelder-example/#{myEnv}/echoer/inputs", 8 | "myS3OutputDir": "s3://pipewelder-example/#{myEnv}/echoer/outputs", 9 | "myS3LogDir": "s3://pipewelder-example/#{myEnv}/echoer/logs", 10 | "myStartDateTime": "2015-01-01T00:00:02", 11 | "mySchedulePeriod": "15 minutes", 12 | "myTerminateAfter": "10 minutes", 13 | "myTags": [ 14 | "pipewelder-environment:dev" 15 | ] 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (http://tox.testrun.org/) is a tool for running tests in 2 | # multiple virtualenvs. This configuration file will run the test 3 | # suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | # 6 | # To run tox faster, check out Detox 7 | # (https://pypi.python.org/pypi/detox), which runs your tox runs in 8 | # parallel. To use it, "pip install detox" and then run "detox" from 9 | # this directory. 10 | 11 | [tox] 12 | envlist = py26,py27,py33,py34,docs 13 | 14 | [testenv] 15 | deps = 16 | --no-deps 17 | --requirement 18 | {toxinidir}/requirements-dev.txt 19 | commands = paver test_all 20 | 21 | [testenv:docs] 22 | basepython = python 23 | commands = paver doc_html 24 | -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | import os 5 | 6 | from pipewelder import core 7 | from datetime import datetime 8 | 9 | import logging 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | HERE = os.path.abspath(os.path.dirname(__file__)) 13 | DATA_DIR = os.path.join(HERE, 'test_data') 14 | 15 | 16 | def data_path(path): 17 | return os.path.join(DATA_DIR, path) 18 | 19 | 20 | def test_adjusted_to_future(): 21 | now = datetime.utcnow() 22 | timestamp = "{0}-01-01T00:00:00".format(now.year) 23 | adjusted = core.adjusted_to_future(timestamp, "1 days") 24 | target_dt = datetime(year=now.year, month=now.month, day=(now.day + 1)) 25 | assert adjusted == target_dt.strftime(core.PIPELINE_DATETIME_FORMAT) 26 | 27 | 28 | @pytest.fixture 29 | def pipeline_description(): 30 | return { 31 | u'description': u'my description', 32 | u'fields': [ 33 | {u'key': u'@pipelineState', u'stringValue': u'PENDING'}, 34 | {u'key': u'@creationTime', u'stringValue': u'2015-02-11T21:17:10'}, 35 | {u'key': u'@sphere', u'stringValue': u'PIPELINE'}, 36 | {u'key': u'uniqueId', u'stringValue': u'pipeweldertest1'}, 37 | {u'key': u'@accountId', u'stringValue': u'543715240000'}, 38 | {u'key': u'description', u'stringValue': u'my description'}, 39 | {u'key': u'name', u'stringValue': u'Pipewelder test'}, 40 | {u'key': u'pipelineCreator', u'stringValue': u'AIDAIWZQRURDOOOOO'}, 41 | {u'key': u'@id', u'stringValue': u'df-07437251YGRXOY19OOOO'}, 42 | {u'key': u'@userId', u'stringValue': u'AIDAIWZQRURDXI4UKOOOO'}], 43 | u'name': u'Pipewelder test', 44 | u'pipelineId': u'df-07437251YGRXOY19OOOO', 45 | u'tags': [], 46 | } 47 | 48 | 49 | def test_pipeline_state(pipeline_description): 50 | state = core.fetch_field_value(pipeline_description, '@pipelineState') 51 | assert state == 'PENDING' 52 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from pytest import raises 3 | 4 | # The parametrize function is generated, so this doesn't work: 5 | # 6 | # from pytest.mark import parametrize 7 | # 8 | import pytest 9 | parametrize = pytest.mark.parametrize # NOPEP8 10 | 11 | import os 12 | 13 | from pipewelder.cli import pipewelder_configs, main, metadata 14 | 15 | import logging 16 | logging.basicConfig(level=logging.INFO) 17 | 18 | HERE = os.path.abspath(os.path.dirname(__file__)) 19 | DATA_DIR = os.path.join(HERE, 'test_data') 20 | 21 | 22 | def data_path(path): 23 | return os.path.join(DATA_DIR, path) 24 | 25 | 26 | def test_pipewelder_configs(): 27 | configs = pipewelder_configs(data_path('pipewelder.json')) 28 | assert configs["dev"] == { 29 | "name": "dev", 30 | "dirs": ["echoer"], 31 | "region": "us-west-2", 32 | "template": "pipeline_definition.json", 33 | "values": { 34 | "myEnv": "dev" 35 | } 36 | } 37 | 38 | 39 | class TestMain(object): 40 | @parametrize('helparg', ['-h', '--help']) 41 | def test_help(self, helparg, capsys): 42 | with raises(SystemExit) as exc_info: 43 | main(['progname', helparg]) 44 | out, err = capsys.readouterr() 45 | # Should have printed some sort of usage message. We don't 46 | # need to explicitly test the content of the message. 47 | assert 'usage' in out 48 | # Should have used the program name from the argument 49 | # vector. 50 | assert 'progname' in out 51 | # Should exit with zero return code. 52 | assert exc_info.value.code == 0 53 | 54 | @parametrize('versionarg', ['-V', '--version']) 55 | def test_version(self, versionarg, capsys): 56 | with raises(SystemExit) as exc_info: 57 | main(['progname', versionarg]) 58 | out, err = capsys.readouterr() 59 | # Should print out version. 60 | expected = '{0} {1}\n'.format(metadata.project, metadata.version) 61 | assert (out == expected or err == expected) 62 | # Should exit with zero return code. 63 | assert exc_info.value.code == 0 64 | -------------------------------------------------------------------------------- /tests/test_data/pipeline_definition.json: -------------------------------------------------------------------------------- 1 | { 2 | "objects" : [ 3 | { 4 | "id" : "Default", 5 | "scheduleType" : "cron", 6 | "failureAndRerunMode" : "CASCADE", 7 | "schedule" : { "ref" : "PipewelderSchedule" }, 8 | "pipelineLogUri" : "#{myS3LogDir}", 9 | "role" : "DataPipelineDefaultRole", 10 | "resourceRole" : "DataPipelineDefaultResourceRole" 11 | }, 12 | { 13 | "id" : "PipewelderShellCommandActivity", 14 | "command" : "(cd ${INPUT1_STAGING_DIR} && chmod +x run && ./run) > ${OUTPUT1_STAGING_DIR}/stdout.txt", 15 | "runsOn" : { "ref" : "PipewelderEC2Resource" }, 16 | "input" : { "ref" : "PipewelderS3InputLocation" }, 17 | "output" : { "ref" : "PipewelderS3OutputLocation" }, 18 | "type" : "ShellCommandActivity", 19 | "stage" : "true" 20 | }, 21 | { 22 | "id" : "PipewelderSchedule", 23 | "startDateTime" : "#{myStartDateTime}", 24 | "type" : "Schedule", 25 | "period" : "#{mySchedulePeriod}" 26 | }, 27 | { 28 | "id" : "PipewelderEC2Resource", 29 | "terminateAfter" : "#{myTerminateAfter}", 30 | "instanceType" : "t1.micro", 31 | "type" : "Ec2Resource" 32 | }, 33 | { 34 | "id" : "PipewelderS3InputLocation", 35 | "directoryPath" : "#{myS3InputDir}", 36 | "type" : "S3DataNode" 37 | }, 38 | { 39 | "id" : "PipewelderS3OutputLocation", 40 | "directoryPath" : "#{myS3OutputDir}/#{format(@scheduledStartTime, 'YYYY-MM-dd_HHmmss')}", 41 | "type" : "S3DataNode" 42 | } 43 | ], 44 | 45 | "parameters" : [ 46 | { 47 | "id": "myName", 48 | "description": "A unique name for this pipeline, passed to CreatePipeline", 49 | "type": "String" 50 | }, 51 | { 52 | "id": "myDescription", 53 | "description": "A description of this pipeline, passed to CreatePipeline", 54 | "type": "String" 55 | }, 56 | { 57 | "id": "myTags", 58 | "description": "A list of tag:value pairs, passed to CreatePipeline", 59 | "default": [], 60 | "type": "String", 61 | "isArray": "True" 62 | }, 63 | { 64 | "id": "myS3InputDir", 65 | "description": "S3 directory where the run executable lives, destination for Pipewelder 'upload' commands", 66 | "type": "AWS::S3::ObjectKey" 67 | }, 68 | { 69 | "id": "myS3OutputDir", 70 | "description": "S3 directory where output files are collected", 71 | "type": "AWS::S3::ObjectKey" 72 | }, 73 | { 74 | "id": "myS3LogDir", 75 | "description": "S3 log folder", 76 | "type": "AWS::S3::ObjectKey" 77 | }, 78 | { 79 | "id": "myStartDateTime", 80 | "description": "Instant for the first run; Pipewelder will add multiples of mySchedulePeriod to ensure this instant is in the future", 81 | "type": "String" 82 | }, 83 | { 84 | "id": "mySchedulePeriod", 85 | "description": "How often to run, such as '1 hours'", 86 | "type": "String" 87 | }, 88 | { 89 | "id": "myTerminateAfter", 90 | "default": "#{format(minusMinutes(#{mySchedulePeriod}, 10))}", 91 | "description": "duration after which the run should be terminated", 92 | "type": "String" 93 | } 94 | ] 95 | } 96 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Pipewelder 2 | ========== 3 | 4 | .. figure:: welder.jpg 5 | :alt: A worker welding a pipe 6 | 7 | Pipewelder is a framework that provides a command-line tool and Python 8 | API to manage `AWS Data 9 | Pipeline `__ jobs from flat files. 10 | Simple uses it as a cron-like job scheduler. 11 | 12 | Source 13 | https://github.com/SimpleFinance/pipewelder 14 | 15 | Documentation 16 | http://pipewelder.readthedocs.org 17 | 18 | PyPI 19 | https://pypi.python.org/pypi/pipewelder 20 | 21 | Overview 22 | -------- 23 | 24 | Pipewelder aims to ease the task of scheduling jobs by defining very 25 | simple pipelines which are little more than an execution schedule, 26 | offloading most of the execution logic to files in S3. Pipewelder uses 27 | Data Pipeline's concept of `data 28 | staging `__ 29 | to pull input files from S3 at the beginning of execution and to upload 30 | output files back to S3 at the end of execution. 31 | 32 | If you follow Pipewelder's directory structure, all of your pipeline 33 | logic can live in version-controlled flat files. The included 34 | command-line interface gives you simple commands to validate your 35 | pipeline definitions, upload task definitions to S3, and activate your 36 | pipelines. 37 | 38 | Installation 39 | ------------ 40 | 41 | Pipewelder is available from `PyPI `__ via 42 | ``pip`` and is compatible with Python 2.6, 2.7, 3.3, and 3.4: 43 | 44 | :: 45 | 46 | pip install pipewelder 47 | 48 | The easiest way to get started is to clone the project from GitHub, copy 49 | the example project from Pipewelder's tests, and then modify to suit: 50 | 51 | .. code:: bash 52 | 53 | git clone https://github.com/SimpleFinance/pipewelder.git 54 | cp -r pipewelder/tests/test_data my-pipewelder-project 55 | 56 | If you're setting up Pipewelder and need help, feel free to email the 57 | author. 58 | 59 | Development 60 | ----------- 61 | 62 | To do development on Pipewelder, clone the repository and run ``make`` 63 | to install dependencies and run tests. 64 | 65 | Directory Structure 66 | ------------------- 67 | 68 | To use Pipewelder, you provide a template pipeline definition along with 69 | one or more directories that correspond to particular pipeline 70 | instances. The directory structure looks like this (see 71 | `test\_data `__ for a working example): 72 | 73 | :: 74 | 75 | pipeline_definition.json 76 | pipewelder.json <- optional configuration file 77 | my_first_pipeline/ 78 | run 79 | values.json 80 | tasks/ 81 | task1.sh 82 | task2.sh 83 | my_second_pipeline/ 84 | ... 85 | 86 | The ``values.json`` file in each pipeline directory specifies parameter 87 | values that are used modify the template definition including the S3 88 | paths for inputs, outputs, and logs. Some of these values are used 89 | directly by Pipewelder as well. 90 | 91 | A 92 | ```ShellCommandActivity`` `__ 93 | in the template definition simply looks for an executable file named 94 | ``run`` and executes it. ``run`` is the entry point for whatever work 95 | you want your pipeline to do. 96 | 97 | Often, your ``run`` executable will be a wrapper script to execute a 98 | variety of similar tasks. When that's the case, use the ``tasks`` 99 | subdirectory to hold these definitions. These tasks could be text files, 100 | shell scripts, SQL code, or whatever else your ``run`` file expects. 101 | Pipewelder gives ``tasks`` folder special treatment in that the CLI will 102 | make sure to remove existing task definitions when uploading files. 103 | 104 | Using the Command-Line Interface 105 | -------------------------------- 106 | 107 | The Pipewelder CLI should always be invoked from the top-level directory 108 | of your definitions (the directory where ``pipeline_definition.json`` 109 | lives). If your directory structure matches Pipewelder's expectations, 110 | it should work without further configuration. 111 | 112 | As you make changes to your template definition or ``values.json`` 113 | files, it can be useful to check whether AWS considers your definitions 114 | valid: 115 | 116 | :: 117 | 118 | $ pipewelder validate 119 | 120 | Once you've defined your pipelines, you'll need to upload the files to 121 | S3: 122 | 123 | :: 124 | 125 | $ pipewelder upload 126 | 127 | Finally, activate your pipelines: 128 | 129 | :: 130 | 131 | $ pipewelder activate 132 | 133 | Any time you change the ``values.json`` or ``pipeline_definition.json``, 134 | you'll need to run the ``activate`` subcommand again. Because active 135 | pipelines can't be modified, the ``activate`` command will delete the 136 | existing pipeline and create a new one in its place. The run history for 137 | the previous pipeline will be discarded. 138 | 139 | Acknowledgments 140 | --------------- 141 | 142 | Pipewelder's package structure is based on 143 | `python-project-template `__. 144 | -------------------------------------------------------------------------------- /pipewelder/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | The Pipewelder command-line interface. 5 | """ 6 | 7 | from __future__ import print_function 8 | 9 | import argparse 10 | import os 11 | import sys 12 | import boto.datapipeline 13 | 14 | from glob import glob 15 | 16 | from pipewelder import metadata, util, Pipewelder 17 | 18 | import logging 19 | logging.basicConfig(level="INFO") 20 | 21 | 22 | CONFIG_DEFAULTS = { 23 | "dirs": ["*"], 24 | "region": "", 25 | "template": "pipeline_definition.json", 26 | "values": [], 27 | } 28 | 29 | 30 | def main(argv): 31 | """Program entry point. 32 | :param argv: command-line arguments 33 | :type argv: :class:`list` 34 | """ 35 | author_strings = [] 36 | for name, email in zip(metadata.authors, metadata.emails): 37 | author_strings.append('Author: {0} <{1}>'.format(name, email)) 38 | 39 | epilog = ''' 40 | Pipewelder {version} 41 | {authors} 42 | URL: <{url}> 43 | '''.format( 44 | project=metadata.project, 45 | version=metadata.version, 46 | authors='\n'.join(author_strings), 47 | url=metadata.url) 48 | 49 | parser = argparse.ArgumentParser( 50 | prog=argv[0], 51 | formatter_class=argparse.RawDescriptionHelpFormatter, 52 | description=metadata.description, 53 | epilog=epilog) 54 | parser.add_argument( 55 | '-V', '--version', 56 | action='version', 57 | version='{0} {1}'.format(metadata.project, metadata.version)) 58 | parser.add_argument( 59 | 'action', 60 | help="""Action to take: 61 | 'validate' pipeline definitions with AWS; 62 | 'put-definition' of pipelines to AWS; 63 | 'upload' pipeline files to myInputS3Dir; 64 | 'activate' defined pipelines (also puts definitions if needed); 65 | 'delete' pipelines from AWS 66 | """) 67 | parser.add_argument( 68 | '--group', 69 | default=None, 70 | help="Group within pipewelder.json to act on; defaults to all") 71 | 72 | args = parser.parse_args(args=argv[1:]) 73 | args.action = args.action.replace('-', '_') 74 | 75 | defaults = {} 76 | 77 | if 'AWS_ACCESS_KEY_ID' not in os.environ: 78 | parser.error("Must set AWS_ACCESS_KEY_ID") 79 | if 'AWS_SECRET_ACCESS_KEY' not in os.environ: 80 | parser.error("Must set AWS_SECRET_ACCESS_KEY") 81 | if 'AWS_DEFAULT_REGION' in os.environ: 82 | defaults['region'] = os.environ['AWS_DEFAULT_REGION'] 83 | 84 | config_path = (os.path.exists('pipewelder.json') and 85 | 'pipewelder.json' or None) 86 | configs = pipewelder_configs(config_path, defaults) 87 | print("Reading configuration from {0}".format(config_path)) 88 | 89 | for name, config in configs.items(): 90 | if args.group and args.group != name: 91 | continue 92 | if name == 'defaults': 93 | continue 94 | print("Acting on configuration '{0}'".format(name)) 95 | conn = boto.datapipeline.connect_to_region(config['region']) 96 | pw = build_pipewelder(conn, config) 97 | if not execute_pipewelder_action(pw, args.action): 98 | return 1 99 | 100 | return 0 101 | 102 | 103 | def entry_point(): 104 | """ 105 | Zero-argument entry point for use with setuptools/distribute. 106 | """ 107 | raise SystemExit(main(sys.argv)) 108 | 109 | 110 | def build_pipewelder(conn, config): 111 | """ 112 | Return a Pipewelder object defined by *config*. 113 | """ 114 | try: 115 | pw = Pipewelder(conn, config['template']) 116 | except IOError as e: 117 | print(e) 118 | return 1 119 | for d in config['dirs']: 120 | p = pw.add_pipeline(d) 121 | for k, v in config["values"].items(): 122 | p.values[k] = v 123 | return pw 124 | 125 | 126 | def execute_pipewelder_action(pw, action): 127 | return_value = call_method(pw, action) 128 | if not return_value: 129 | print("Failed '{0}' action" 130 | .format(action)) 131 | return return_value 132 | 133 | 134 | def pipewelder_configs(filename=None, defaults=None): 135 | """ 136 | Parse json from *filename* for Pipewelder object configurations. 137 | 138 | Returns a dict which maps config names to dicts of options. 139 | """ 140 | if filename is None: 141 | data = {"pipewelder": {}} 142 | dirname = os.path.abspath('.') 143 | else: 144 | dirname = os.path.dirname(os.path.abspath(filename)) 145 | data = util.load_json(filename) 146 | defaults = defaults or {} 147 | data_defaults = data.get('defaults', {}) 148 | defaults = dict(list(CONFIG_DEFAULTS.items()) + 149 | list(data_defaults.items()) + 150 | list(defaults.items())) 151 | outputs = {} 152 | for name in data: 153 | if name == 'defaults': 154 | continue 155 | this_config = dict(list(defaults.items()) + 156 | list(data[name].items())) 157 | dirs = [] 158 | with util.cd(dirname): 159 | for entry in this_config['dirs']: 160 | for item in glob(entry): 161 | if os.path.exists(os.path.join(item, 'values.json')): 162 | dirs.append(item) 163 | outputs[name] = { 164 | "name": name, 165 | "dirs": dirs, 166 | "region": this_config['region'], 167 | "template": this_config['template'], 168 | "values": this_config['values'], 169 | } 170 | return outputs 171 | 172 | 173 | def call_method(obj, name): 174 | """ 175 | Call the method *name* on *obj*. 176 | """ 177 | return getattr(obj, name)() 178 | 179 | 180 | if __name__ == '__main__': 181 | entry_point() 182 | -------------------------------------------------------------------------------- /pipewelder/connection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The code in this file is modified from: 4 | # https://github.com/boto/boto/blob/2.36.0/boto/datapipeline/layer1.py 5 | # 6 | # The original code carries the following license: 7 | # # Copyright (c) 2013 Amazon.com, Inc. or its affiliates. All Rights Reserved 8 | # # 9 | # # Permission is hereby granted, free of charge, to any person obtaining a 10 | # # copy of this software and associated documentation files (the 11 | # # "Software"), to deal in the Software without restriction, including 12 | # # without limitation the rights to use, copy, modify, merge, publish, dis- 13 | # # tribute, sublicense, and/or sell copies of the Software, and to permit 14 | # # persons to whom the Software is furnished to do so, subject to the fol- 15 | # # lowing conditions: 16 | # # 17 | # # The above copyright notice and this permission notice shall be included 18 | # # in all copies or substantial portions of the Software. 19 | # # 20 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 21 | # # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- 22 | # # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 23 | # # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 24 | # # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 26 | # # IN THE SOFTWARE. 27 | 28 | """ 29 | A patch to the boto DataPipelineConnection object. 30 | 31 | As of boto 2.36.0, putting and validating pipeline parameters/values 32 | was not supported. 33 | """ 34 | 35 | import json 36 | 37 | from boto.datapipeline.layer1 import DataPipelineConnection 38 | 39 | 40 | def put_pipeline_definition(self, 41 | pipeline_objects, 42 | pipeline_id, 43 | parameter_objects=None, 44 | parameter_values=None): 45 | """ 46 | Adds tasks, schedules, and preconditions that control the 47 | behavior of the pipeline. You can use PutPipelineDefinition to 48 | populate a new pipeline or to update an existing pipeline that 49 | has not yet been activated. 50 | """ 51 | params = { 52 | 'pipelineId': pipeline_id, 53 | 'pipelineObjects': pipeline_objects, 54 | } 55 | if parameter_objects is not None: 56 | params['parameterObjects'] = parameter_objects 57 | if parameter_values is not None: 58 | params['parameterValues'] = parameter_values 59 | return self.make_request(action='PutPipelineDefinition', 60 | body=json.dumps(params)) 61 | 62 | 63 | def validate_pipeline_definition(self, 64 | pipeline_objects, 65 | pipeline_id, 66 | parameter_objects=None, 67 | parameter_values=None): 68 | """ 69 | Tests the pipeline definition with a set of validation checks 70 | to ensure that it is well formed and can run without error. 71 | """ 72 | params = { 73 | 'pipelineId': pipeline_id, 74 | 'pipelineObjects': pipeline_objects, 75 | } 76 | if parameter_objects is not None: 77 | params['parameterObjects'] = parameter_objects 78 | if parameter_values is not None: 79 | params['parameterValues'] = parameter_values 80 | return self.make_request(action='ValidatePipelineDefinition', 81 | body=json.dumps(params)) 82 | 83 | 84 | def create_pipeline(self, name, unique_id, description=None, tags=None): 85 | """ 86 | Creates a new empty pipeline. When this action succeeds, you 87 | can then use the PutPipelineDefinition action to populate the 88 | pipeline. 89 | :type name: string 90 | :param name: The name of the new pipeline. You can use the same name 91 | for multiple pipelines associated with your AWS account, because 92 | AWS Data Pipeline assigns each new pipeline a unique pipeline 93 | identifier. 94 | :type unique_id: string 95 | :param unique_id: A unique identifier that you specify. This identifier 96 | is not the same as the pipeline identifier assigned by AWS Data 97 | Pipeline. You are responsible for defining the format and ensuring 98 | the uniqueness of this identifier. You use this parameter to ensure 99 | idempotency during repeated calls to CreatePipeline. For example, 100 | if the first call to CreatePipeline does not return a clear 101 | success, you can pass in the same unique identifier and pipeline 102 | name combination on a subsequent call to CreatePipeline. 103 | CreatePipeline ensures that if a pipeline already exists with the 104 | same name and unique identifier, a new pipeline will not be 105 | created. Instead, you'll receive the pipeline identifier from the 106 | previous attempt. The uniqueness of the name and unique identifier 107 | combination is scoped to the AWS account or IAM user credentials. 108 | :type description: string 109 | :param description: The description of the new pipeline. 110 | """ 111 | params = { 112 | 'name': name, 113 | 'uniqueId': unique_id, 114 | } 115 | if description is not None: 116 | params['description'] = description 117 | if tags is not None: 118 | params['tags'] = tags 119 | return self.make_request(action='CreatePipeline', 120 | body=json.dumps(params)) 121 | 122 | 123 | DataPipelineConnection.put_pipeline_definition = ( 124 | put_pipeline_definition) 125 | DataPipelineConnection.validate_pipeline_definition = ( 126 | validate_pipeline_definition) 127 | DataPipelineConnection.create_pipeline = ( 128 | create_pipeline) 129 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set SPHINXOPTS=-W 10 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 11 | set I18NSPHINXOPTS=%SPHINXOPTS% source 12 | if NOT "%PAPER%" == "" ( 13 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 14 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 15 | ) 16 | 17 | if "%1" == "" goto help 18 | 19 | if "%1" == "help" ( 20 | :help 21 | echo.Please use `make ^` where ^ is one of 22 | echo. html to make standalone HTML files 23 | echo. dirhtml to make HTML files named index.html in directories 24 | echo. singlehtml to make a single large HTML file 25 | echo. pickle to make pickle files 26 | echo. json to make JSON files 27 | echo. htmlhelp to make HTML files and a HTML help project 28 | echo. qthelp to make HTML files and a qthelp project 29 | echo. devhelp to make HTML files and a Devhelp project 30 | echo. epub to make an epub 31 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 32 | echo. text to make text files 33 | echo. man to make manual pages 34 | echo. texinfo to make Texinfo files 35 | echo. gettext to make PO message catalogs 36 | echo. changes to make an overview over all changed/added/deprecated items 37 | echo. linkcheck to check all external links for integrity 38 | echo. doctest to run all doctests embedded in the documentation if enabled 39 | goto end 40 | ) 41 | 42 | if "%1" == "clean" ( 43 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 44 | del /q /s %BUILDDIR%\* 45 | goto end 46 | ) 47 | 48 | if "%1" == "html" ( 49 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 50 | if errorlevel 1 exit /b 1 51 | echo. 52 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 53 | goto end 54 | ) 55 | 56 | if "%1" == "dirhtml" ( 57 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 58 | if errorlevel 1 exit /b 1 59 | echo. 60 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 61 | goto end 62 | ) 63 | 64 | if "%1" == "singlehtml" ( 65 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 66 | if errorlevel 1 exit /b 1 67 | echo. 68 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 69 | goto end 70 | ) 71 | 72 | if "%1" == "pickle" ( 73 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 74 | if errorlevel 1 exit /b 1 75 | echo. 76 | echo.Build finished; now you can process the pickle files. 77 | goto end 78 | ) 79 | 80 | if "%1" == "json" ( 81 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 82 | if errorlevel 1 exit /b 1 83 | echo. 84 | echo.Build finished; now you can process the JSON files. 85 | goto end 86 | ) 87 | 88 | if "%1" == "htmlhelp" ( 89 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 90 | if errorlevel 1 exit /b 1 91 | echo. 92 | echo.Build finished; now you can run HTML Help Workshop with the ^ 93 | .hhp project file in %BUILDDIR%/htmlhelp. 94 | goto end 95 | ) 96 | 97 | if "%1" == "qthelp" ( 98 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 99 | if errorlevel 1 exit /b 1 100 | echo. 101 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 102 | .qhcp project file in %BUILDDIR%/qthelp, like this: 103 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Pipewelder.qhcp 104 | echo.To view the help file: 105 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Pipewelder.qhc 106 | goto end 107 | ) 108 | 109 | if "%1" == "devhelp" ( 110 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 111 | if errorlevel 1 exit /b 1 112 | echo. 113 | echo.Build finished. 114 | goto end 115 | ) 116 | 117 | if "%1" == "epub" ( 118 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 119 | if errorlevel 1 exit /b 1 120 | echo. 121 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 122 | goto end 123 | ) 124 | 125 | if "%1" == "latex" ( 126 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 127 | if errorlevel 1 exit /b 1 128 | echo. 129 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 130 | goto end 131 | ) 132 | 133 | if "%1" == "text" ( 134 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 135 | if errorlevel 1 exit /b 1 136 | echo. 137 | echo.Build finished. The text files are in %BUILDDIR%/text. 138 | goto end 139 | ) 140 | 141 | if "%1" == "man" ( 142 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 143 | if errorlevel 1 exit /b 1 144 | echo. 145 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 146 | goto end 147 | ) 148 | 149 | if "%1" == "texinfo" ( 150 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 151 | if errorlevel 1 exit /b 1 152 | echo. 153 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 154 | goto end 155 | ) 156 | 157 | if "%1" == "gettext" ( 158 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 159 | if errorlevel 1 exit /b 1 160 | echo. 161 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 162 | goto end 163 | ) 164 | 165 | if "%1" == "changes" ( 166 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 167 | if errorlevel 1 exit /b 1 168 | echo. 169 | echo.The overview file is in %BUILDDIR%/changes. 170 | goto end 171 | ) 172 | 173 | if "%1" == "linkcheck" ( 174 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 175 | if errorlevel 1 exit /b 1 176 | echo. 177 | echo.Link check complete; look for any errors in the above output ^ 178 | or in %BUILDDIR%/linkcheck/output.txt. 179 | goto end 180 | ) 181 | 182 | if "%1" == "doctest" ( 183 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 184 | if errorlevel 1 exit /b 1 185 | echo. 186 | echo.Testing of doctests in the sources finished, look at the ^ 187 | results in %BUILDDIR%/doctest/output.txt. 188 | goto end 189 | ) 190 | 191 | :end 192 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = -W 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Pipewelder.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Pipewelder.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $HOME/.local/share/devhelp/Pipewelder" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $HOME/.local/share/devhelp/Pipewelder" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /pipewelder/translator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | import json 14 | 15 | 16 | class PipelineDefinitionError(Exception): 17 | def __init__(self, msg, definition): 18 | full_msg = ( 19 | "Error in pipeline definition: %s\n" % msg) 20 | super(PipelineDefinitionError, self).__init__(full_msg) 21 | self.msg = msg 22 | self.definition = definition 23 | 24 | 25 | def api_to_definition(definition): 26 | # When we're translating from api_response -> definition 27 | # we have to be careful *not* to mutate the existing 28 | # response as other code might need to the original 29 | # api_response. 30 | if 'pipelineObjects' in definition: 31 | definition['objects'] = _api_to_objects_definition( 32 | definition.pop('pipelineObjects')) 33 | if 'parameterObjects' in definition: 34 | definition['parameters'] = _api_to_parameters_definition( 35 | definition.pop('parameterObjects')) 36 | if 'parameterValues' in definition: 37 | definition['values'] = _api_to_values_definition( 38 | definition.pop('parameterValues')) 39 | return definition 40 | 41 | 42 | def definition_to_api_objects(definition): 43 | if 'objects' not in definition: 44 | raise PipelineDefinitionError('Missing "objects" key', definition) 45 | api_elements = [] 46 | # To convert to the structure expected by the service, 47 | # we convert the existing structure to a list of dictionaries. 48 | # Each dictionary has a 'fields', 'id', and 'name' key. 49 | for element in definition['objects']: 50 | try: 51 | element_id = element.pop('id') 52 | except KeyError: 53 | raise PipelineDefinitionError('Missing "id" key of element: %s' % 54 | json.dumps(element), definition) 55 | api_object = {'id': element_id} 56 | # If a name is provided, then we use that for the name, 57 | # otherwise the id is used for the name. 58 | name = element.pop('name', element_id) 59 | api_object['name'] = name 60 | # Now we need the field list. Each element in the field list is a dict 61 | # with a 'key', 'stringValue'|'refValue' 62 | fields = [] 63 | for key, value in sorted(element.items()): 64 | fields.extend(_parse_each_field(key, value)) 65 | api_object['fields'] = fields 66 | api_elements.append(api_object) 67 | return api_elements 68 | 69 | 70 | def definition_to_api_parameters(definition): 71 | if 'parameters' not in definition: 72 | return None 73 | parameter_objects = [] 74 | for element in definition['parameters']: 75 | try: 76 | parameter_id = element.pop('id') 77 | except KeyError: 78 | raise PipelineDefinitionError('Missing "id" key of parameter: %s' % 79 | json.dumps(element), definition) 80 | parameter_object = {'id': parameter_id} 81 | # Now we need the attribute list. Each element in the attribute list 82 | # is a dict with a 'key', 'stringValue' 83 | attributes = [] 84 | for key, value in sorted(element.items()): 85 | attributes.extend(_parse_each_field(key, value)) 86 | parameter_object['attributes'] = attributes 87 | parameter_objects.append(parameter_object) 88 | return parameter_objects 89 | 90 | 91 | def definition_to_parameter_values(definition): 92 | if 'values' not in definition: 93 | return None 94 | parameter_values = [] 95 | for key in definition['values']: 96 | parameter_values.extend( 97 | _convert_single_parameter_value(key, definition['values'][key])) 98 | 99 | return parameter_values 100 | 101 | 102 | def _parse_each_field(key, value): 103 | values = [] 104 | if isinstance(value, list): 105 | for item in value: 106 | values.append(_convert_single_field(key, item)) 107 | else: 108 | values.append(_convert_single_field(key, value)) 109 | return values 110 | 111 | 112 | def _convert_single_field(key, value): 113 | field = {'key': key} 114 | if isinstance(value, dict) and list(value.keys()) == ['ref']: 115 | field['refValue'] = value['ref'] 116 | else: 117 | field['stringValue'] = value 118 | return field 119 | 120 | 121 | def _convert_single_parameter_value(key, values): 122 | parameter_values = [] 123 | if isinstance(values, list): 124 | for each_value in values: 125 | parameter_value = {'id': key, 'stringValue': each_value} 126 | parameter_values.append(parameter_value) 127 | else: 128 | parameter_value = {'id': key, 'stringValue': values} 129 | parameter_values.append(parameter_value) 130 | return parameter_values 131 | 132 | 133 | def _api_to_objects_definition(api_response): 134 | pipeline_objects = [] 135 | for element in api_response: 136 | current = { 137 | 'id': element['id'], 138 | 'name': element['name'] 139 | } 140 | for field in element['fields']: 141 | key = field['key'] 142 | if 'stringValue' in field: 143 | value = field['stringValue'] 144 | else: 145 | value = {'ref': field['refValue']} 146 | _add_value(key, value, current) 147 | pipeline_objects.append(current) 148 | return pipeline_objects 149 | 150 | 151 | def _api_to_parameters_definition(api_response): 152 | parameter_objects = [] 153 | for element in api_response: 154 | current = { 155 | 'id': element['id'] 156 | } 157 | for attribute in element['attributes']: 158 | _add_value(attribute['key'], attribute['stringValue'], current) 159 | parameter_objects.append(current) 160 | return parameter_objects 161 | 162 | 163 | def _api_to_values_definition(api_response): 164 | pipeline_values = {} 165 | for element in api_response: 166 | _add_value(element['id'], element['stringValue'], pipeline_values) 167 | return pipeline_values 168 | 169 | 170 | def _add_value(key, value, current_map): 171 | if key not in current_map: 172 | current_map[key] = value 173 | elif isinstance(current_map[key], list): 174 | # Dupe keys result in values aggregating 175 | # into a list. 176 | current_map[key].append(value) 177 | else: 178 | converted_list = [current_map[key], value] 179 | current_map[key] = converted_list 180 | -------------------------------------------------------------------------------- /pavement.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import time 8 | import subprocess 9 | 10 | from paver.easy import options, task, needs, consume_args 11 | from paver.setuputils import install_distutils_tasks 12 | 13 | # Import parameters from the setup file. 14 | sys.path.insert(0, os.path.abspath('.')) # NOPEP8 15 | from setup import ( 16 | setup_dict, get_project_files, print_success_message, 17 | print_failure_message, _lint, _test, _test_all, 18 | CODE_DIRECTORY, DOCS_DIRECTORY, TESTS_DIRECTORY, PYTEST_FLAGS) 19 | 20 | options(setup=setup_dict) 21 | 22 | install_distutils_tasks() 23 | 24 | # Miscellaneous helper functions 25 | 26 | 27 | def print_passed(): 28 | # generated on http://patorjk.com/software/taag/#p=display&f=Small&t=PASSED 29 | print_success_message(r''' ___ _ ___ ___ ___ ___ 30 | | _ \/_\ / __/ __| __| \ 31 | | _/ _ \\__ \__ \ _|| |) | 32 | |_|/_/ \_\___/___/___|___/ 33 | ''') 34 | 35 | 36 | def print_failed(): 37 | # generated on http://patorjk.com/software/taag/#p=display&f=Small&t=FAILED 38 | print_failure_message(r''' ___ _ ___ _ ___ ___ 39 | | __/_\ |_ _| | | __| \ 40 | | _/ _ \ | || |__| _|| |) | 41 | |_/_/ \_\___|____|___|___/ 42 | ''') 43 | 44 | 45 | class cwd(object): 46 | """Class used for temporarily changing directories. Can be though of 47 | as a `pushd /my/dir' then a `popd' at the end. 48 | """ 49 | def __init__(self, newcwd): 50 | """:param newcwd: directory to make the cwd 51 | :type newcwd: :class:`str` 52 | """ 53 | self.newcwd = newcwd 54 | 55 | def __enter__(self): 56 | self.oldcwd = os.getcwd() 57 | os.chdir(self.newcwd) 58 | return os.getcwd() 59 | 60 | def __exit__(self, type_, value, traceback): 61 | # This acts like a `finally' clause: it will always be executed. 62 | os.chdir(self.oldcwd) 63 | 64 | 65 | # Task-related functions 66 | 67 | def _doc_make(*make_args): 68 | """Run make in sphinx' docs directory. 69 | 70 | :return: exit code 71 | """ 72 | if sys.platform == 'win32': 73 | # Windows 74 | make_cmd = ['make.bat'] 75 | else: 76 | # Linux, Mac OS X, and others 77 | make_cmd = ['make'] 78 | make_cmd.extend(make_args) 79 | 80 | # Account for a stupid Python "bug" on Windows: 81 | # 82 | with cwd(DOCS_DIRECTORY): 83 | retcode = subprocess.call(make_cmd) 84 | return retcode 85 | 86 | 87 | # Tasks 88 | 89 | @task 90 | @needs('doc_html', 'setuptools.command.sdist') 91 | def sdist(): 92 | """Build the HTML docs and the tarball.""" 93 | pass 94 | 95 | 96 | @task 97 | def test(): 98 | """Run the unit tests.""" 99 | raise SystemExit(_test()) 100 | 101 | 102 | @task 103 | def lint(): 104 | # This refuses to format properly when running `paver help' unless 105 | # this ugliness is used. 106 | ('Perform PEP8 style check, run PyFlakes, and run McCabe complexity ' 107 | 'metrics on the code.') 108 | raise SystemExit(_lint()) 109 | 110 | 111 | @task 112 | def test_all(): 113 | """Perform a style check and run all unit tests.""" 114 | retcode = _test_all() 115 | if retcode == 0: 116 | print_passed() 117 | else: 118 | print_failed() 119 | raise SystemExit(retcode) 120 | 121 | 122 | @task 123 | @consume_args 124 | def run(args): 125 | """Run the package's main script. All arguments are passed to it.""" 126 | # The main script expects to get the called executable's name as 127 | # argv[0]. However, paver doesn't provide that in args. Even if it did (or 128 | # we dove into sys.argv), it wouldn't be useful because it would be paver's 129 | # executable. So we just pass the package name in as the executable name, 130 | # since it's close enough. This should never be seen by an end user 131 | # installing through Setuptools anyway. 132 | from pipewelder.main import main 133 | raise SystemExit(main([CODE_DIRECTORY] + args)) 134 | 135 | 136 | @task 137 | def commit(): 138 | """Commit only if all the tests pass.""" 139 | if _test_all() == 0: 140 | subprocess.check_call(['git', 'commit']) 141 | else: 142 | print_failure_message('\nTests failed, not committing.') 143 | 144 | 145 | @task 146 | def coverage(): 147 | """Run tests and show test coverage report.""" 148 | try: 149 | import pytest_cov # NOQA 150 | except ImportError: 151 | print_failure_message( 152 | 'Install the pytest coverage plugin to use this task, ' 153 | "i.e., `pip install pytest-cov'.") 154 | raise SystemExit(1) 155 | import pytest 156 | pytest.main(PYTEST_FLAGS + [ 157 | '--cov', CODE_DIRECTORY, 158 | '--cov-report', 'term-missing', 159 | TESTS_DIRECTORY]) 160 | 161 | 162 | @task # NOQA 163 | def doc_watch(): 164 | """Watch for changes in the docs and rebuild HTML docs when changed.""" 165 | try: 166 | from watchdog.events import FileSystemEventHandler 167 | from watchdog.observers import Observer 168 | except ImportError: 169 | print_failure_message('Install the watchdog package to use this task, ' 170 | "i.e., `pip install watchdog'.") 171 | raise SystemExit(1) 172 | 173 | class RebuildDocsEventHandler(FileSystemEventHandler): 174 | def __init__(self, base_paths): 175 | self.base_paths = base_paths 176 | 177 | def dispatch(self, event): 178 | """Dispatches events to the appropriate methods. 179 | :param event: The event object representing the file system event. 180 | :type event: :class:`watchdog.events.FileSystemEvent` 181 | """ 182 | for base_path in self.base_paths: 183 | if event.src_path.endswith(base_path): 184 | super(RebuildDocsEventHandler, self).dispatch(event) 185 | # We found one that matches. We're done. 186 | return 187 | 188 | def on_modified(self, event): 189 | print_failure_message('Modification detected. Rebuilding docs.') 190 | # # Strip off the path prefix. 191 | # import os 192 | # if event.src_path[len(os.getcwd()) + 1:].startswith( 193 | # CODE_DIRECTORY): 194 | # # sphinx-build doesn't always pick up changes on code files, 195 | # # even though they are used to generate the documentation. As 196 | # # a workaround, just clean before building. 197 | doc_html() 198 | print_success_message('Docs have been rebuilt.') 199 | 200 | print_success_message( 201 | 'Watching for changes in project files, press Ctrl-C to cancel...') 202 | handler = RebuildDocsEventHandler(get_project_files()) 203 | observer = Observer() 204 | observer.schedule(handler, path='.', recursive=True) 205 | observer.start() 206 | try: 207 | while True: 208 | time.sleep(1) 209 | except KeyboardInterrupt: 210 | observer.stop() 211 | observer.join() 212 | 213 | 214 | @task 215 | @needs('doc_html') 216 | def doc_open(): 217 | """Build the HTML docs and open them in a web browser.""" 218 | doc_index = os.path.join(DOCS_DIRECTORY, 'build', 'html', 'index.html') 219 | if sys.platform == 'darwin': 220 | # Mac OS X 221 | subprocess.check_call(['open', doc_index]) 222 | elif sys.platform == 'win32': 223 | # Windows 224 | subprocess.check_call(['start', doc_index], shell=True) 225 | elif sys.platform == 'linux2': 226 | # All freedesktop-compatible desktops 227 | subprocess.check_call(['xdg-open', doc_index]) 228 | else: 229 | print_failure_message( 230 | "Unsupported platform. Please open `{0}' manually.".format( 231 | doc_index)) 232 | 233 | 234 | @task 235 | def get_tasks(): 236 | """Get all paver-defined tasks.""" 237 | from paver.tasks import environment 238 | for t in environment.get_tasks(): 239 | print(t.shortname) 240 | 241 | 242 | @task 243 | def doc_html(): 244 | """Build the HTML docs.""" 245 | retcode = _doc_make('html') 246 | 247 | if retcode: 248 | raise SystemExit(retcode) 249 | 250 | 251 | @task 252 | def doc_clean(): 253 | """Clean (delete) the built docs.""" 254 | retcode = _doc_make('clean') 255 | 256 | if retcode: 257 | raise SystemExit(retcode) 258 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # flake8: noqa 3 | 4 | # This file is based upon the file generated by sphinx-quickstart. However, 5 | # where sphinx-quickstart hardcodes values in this file that you input, this 6 | # file has been changed to pull from your module's metadata module. 7 | # 8 | # This file is execfile()d with the current directory set to its containing 9 | # dir. 10 | # 11 | # Note that not all possible configuration values are present in this 12 | # autogenerated file. 13 | # 14 | # All configuration values have a default; values that are commented out 15 | # serve to show the default. 16 | 17 | import os 18 | import sys 19 | 20 | # If extensions (or modules to document with autodoc) are in another directory, 21 | # add these directories to sys.path here. If the directory is relative to the 22 | # documentation root, use os.path.abspath to make it absolute, like shown here. 23 | sys.path.insert(0, os.path.abspath('../..')) 24 | 25 | # Import project metadata 26 | from pipewelder import metadata 27 | 28 | # -- General configuration ---------------------------------------------------- 29 | 30 | # If your documentation needs a minimal Sphinx version, state it here. 31 | #needs_sphinx = '1.0' 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 35 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 36 | 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode'] 37 | 38 | # show todos 39 | todo_include_todos = True 40 | 41 | # Add any paths that contain templates here, relative to this directory. 42 | templates_path = ['_templates'] 43 | 44 | # The suffix of source filenames. 45 | source_suffix = '.rst' 46 | 47 | # The encoding of source files. 48 | #source_encoding = 'utf-8-sig' 49 | 50 | # The master toctree document. 51 | master_doc = 'index' 52 | 53 | # General information about the project. 54 | project = metadata.project 55 | copyright = metadata.copyright 56 | 57 | # The version info for the project you're documenting, acts as replacement for 58 | # |version| and |release|, also used in various other places throughout the 59 | # built documents. 60 | # 61 | # The short X.Y version. 62 | version = metadata.version 63 | # The full version, including alpha/beta/rc tags. 64 | release = metadata.version 65 | 66 | # The language for content autogenerated by Sphinx. Refer to documentation 67 | # for a list of supported languages. 68 | #language = None 69 | 70 | # There are two options for replacing |today|: either, you set today to some 71 | # non-false value, then it is used: 72 | #today = '' 73 | # Else, today_fmt is used as the format for a strftime call. 74 | #today_fmt = '%B %d, %Y' 75 | 76 | # List of patterns, relative to source directory, that match files and 77 | # directories to ignore when looking for source files. 78 | exclude_patterns = [] 79 | 80 | # The reST default role (used for this markup: `text`) to use for all 81 | # documents. 82 | #default_role = None 83 | 84 | # If true, '()' will be appended to :func: etc. cross-reference text. 85 | #add_function_parentheses = True 86 | 87 | # If true, the current module name will be prepended to all description 88 | # unit titles (such as .. function::). 89 | #add_module_names = True 90 | 91 | # If true, sectionauthor and moduleauthor directives will be shown in the 92 | # output. They are ignored by default. 93 | #show_authors = False 94 | 95 | # The name of the Pygments (syntax highlighting) style to use. 96 | pygments_style = 'sphinx' 97 | 98 | # A list of ignored prefixes for module index sorting. 99 | #modindex_common_prefix = [] 100 | 101 | 102 | # -- Options for HTML output -------------------------------------------------- 103 | 104 | # The theme to use for HTML and HTML Help pages. See the documentation for 105 | # a list of builtin themes. 106 | html_theme = 'default' 107 | 108 | # Theme options are theme-specific and customize the look and feel of a theme 109 | # further. For a list of options available for each theme, see the 110 | # documentation. 111 | #html_theme_options = {} 112 | 113 | # Add any paths that contain custom themes here, relative to this directory. 114 | #html_theme_path = [] 115 | 116 | # The name for this set of Sphinx documents. If None, it defaults to 117 | # " v documentation". 118 | #html_title = None 119 | 120 | # A shorter title for the navigation bar. Default is the same as html_title. 121 | #html_short_title = None 122 | 123 | # The name of an image file (relative to this directory) to place at the top 124 | # of the sidebar. 125 | #html_logo = None 126 | 127 | # The name of an image file (within the static path) to use as favicon of the 128 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 129 | # pixels large. 130 | #html_favicon = None 131 | 132 | # Add any paths that contain custom static files (such as style sheets) here, 133 | # relative to this directory. They are copied after the builtin static files, 134 | # so a file named "default.css" will overwrite the builtin "default.css". 135 | html_static_path = ['_static'] 136 | 137 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 138 | # using the given strftime format. 139 | #html_last_updated_fmt = '%b %d, %Y' 140 | 141 | # If true, SmartyPants will be used to convert quotes and dashes to 142 | # typographically correct entities. 143 | #html_use_smartypants = True 144 | 145 | # Custom sidebar templates, maps document names to template names. 146 | #html_sidebars = {} 147 | 148 | # Additional templates that should be rendered to pages, maps page names to 149 | # template names. 150 | #html_additional_pages = {} 151 | 152 | # If false, no module index is generated. 153 | #html_domain_indices = True 154 | 155 | # If false, no index is generated. 156 | #html_use_index = True 157 | 158 | # If true, the index is split into individual pages for each letter. 159 | #html_split_index = False 160 | 161 | # If true, links to the reST sources are added to the pages. 162 | #html_show_sourcelink = True 163 | 164 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 165 | #html_show_sphinx = True 166 | 167 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 168 | #html_show_copyright = True 169 | 170 | # If true, an OpenSearch description file will be output, and all pages will 171 | # contain a tag referring to it. The value of this option must be the 172 | # base URL from which the finished HTML is served. 173 | #html_use_opensearch = '' 174 | 175 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 176 | #html_file_suffix = None 177 | 178 | # Output file base name for HTML help builder. 179 | htmlhelp_basename = metadata.project_no_spaces + 'doc' 180 | 181 | 182 | # -- Options for LaTeX output ------------------------------------------------- 183 | 184 | latex_elements = { 185 | # The paper size ('letterpaper' or 'a4paper'). 186 | #'papersize': 'letterpaper', 187 | 188 | # The font size ('10pt', '11pt' or '12pt'). 189 | #'pointsize': '10pt', 190 | 191 | # Additional stuff for the LaTeX preamble. 192 | #'preamble': '', 193 | } 194 | 195 | # Grouping the document tree into LaTeX files. List of tuples 196 | # (source start file, target name, title, author, 197 | # documentclass [howto/manual]). 198 | latex_documents = [ 199 | ('index', metadata.project_no_spaces + '.tex', 200 | metadata.project + ' Documentation', metadata.authors_string, 201 | 'manual'), 202 | ] 203 | 204 | # The name of an image file (relative to this directory) to place at the top of 205 | # the title page. 206 | #latex_logo = None 207 | 208 | # For "manual" documents, if this is true, then toplevel headings are parts, 209 | # not chapters. 210 | #latex_use_parts = False 211 | 212 | # If true, show page references after internal links. 213 | #latex_show_pagerefs = False 214 | 215 | # If true, show URL addresses after external links. 216 | #latex_show_urls = False 217 | 218 | # Documents to append as an appendix to all manuals. 219 | #latex_appendices = [] 220 | 221 | # If false, no module index is generated. 222 | #latex_domain_indices = True 223 | 224 | 225 | # -- Options for manual page output ------------------------------------------- 226 | 227 | # One entry per manual page. List of tuples 228 | # (source start file, name, description, authors, manual section). 229 | man_pages = [ 230 | ('index', metadata.package, metadata.project + ' Documentation', 231 | metadata.authors_string, 1) 232 | ] 233 | 234 | # If true, show URL addresses after external links. 235 | #man_show_urls = False 236 | 237 | 238 | # -- Options for Texinfo output ----------------------------------------------- 239 | 240 | # Grouping the document tree into Texinfo files. List of tuples 241 | # (source start file, target name, title, author, 242 | # dir menu entry, description, category) 243 | texinfo_documents = [ 244 | ('index', metadata.project_no_spaces, 245 | metadata.project + ' Documentation', metadata.authors_string, 246 | metadata.project_no_spaces, metadata.description, 'Miscellaneous'), 247 | ] 248 | 249 | # Documents to append as an appendix to all manuals. 250 | #texinfo_appendices = [] 251 | 252 | # If false, no module index is generated. 253 | #texinfo_domain_indices = True 254 | 255 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 256 | #texinfo_show_urls = 'footnote' 257 | 258 | 259 | # Example configuration for intersphinx: refer to the Python standard library. 260 | intersphinx_mapping = { 261 | 'python': ('http://docs.python.org/', None), 262 | 'boto': ('https://boto.readthedocs.org/en/latest/', None), 263 | } 264 | 265 | # Extra local configuration. This is useful for placing the class description 266 | # in the class docstring and the __init__ parameter documentation in the 267 | # __init__ docstring. See 268 | # for more 269 | # information. 270 | autoclass_content = 'both' 271 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | 4 | import os 5 | import sys 6 | import imp 7 | import subprocess 8 | 9 | from setuptools import setup, find_packages 10 | from setuptools.command.test import test as TestCommand 11 | from distutils import spawn 12 | 13 | # Python 2.6 subprocess.check_output compatibility. Thanks Greg Hewgill! 14 | if 'check_output' not in dir(subprocess): 15 | def check_output(cmd_args, *args, **kwargs): 16 | proc = subprocess.Popen( 17 | cmd_args, *args, 18 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs) 19 | out, err = proc.communicate() 20 | if proc.returncode != 0: 21 | raise subprocess.CalledProcessError(args) 22 | return out 23 | subprocess.check_output = check_output 24 | 25 | try: 26 | import colorama 27 | colorama.init() # Initialize colorama on Windows 28 | except ImportError: 29 | # Don't require colorama just for running paver tasks. This allows us to 30 | # run `paver install' without requiring the user to first have colorama 31 | # installed. 32 | pass 33 | 34 | # Add the current directory to the module search path. 35 | sys.path.append('.') 36 | 37 | # Constants 38 | CODE_DIRECTORY = 'pipewelder' 39 | DOCS_DIRECTORY = 'docs' 40 | TESTS_DIRECTORY = 'tests' 41 | PYTEST_FLAGS = ['--doctest-modules'] 42 | 43 | # Import metadata. Normally this would just be: 44 | # 45 | # from pipewelder import metadata 46 | # 47 | # However, when we do this, we also import `pipewelder/__init__.py'. If this 48 | # imports names from some other modules and these modules have third-party 49 | # dependencies that need installing (which happens after this file is run), the 50 | # script will crash. What we do instead is to load the metadata module by path 51 | # instead, effectively side-stepping the dependency problem. Please make sure 52 | # metadata has no dependencies, otherwise they will need to be added to 53 | # the setup_requires keyword. 54 | metadata = imp.load_source( 55 | 'metadata', os.path.join(CODE_DIRECTORY, 'metadata.py')) 56 | 57 | 58 | # Miscellaneous helper functions 59 | 60 | def get_project_files(): 61 | """Retrieve a list of project files, ignoring hidden files. 62 | 63 | :return: sorted list of project files 64 | :rtype: :class:`list` 65 | """ 66 | if is_git_project() and has_git(): 67 | return get_git_project_files() 68 | 69 | project_files = [] 70 | for top, subdirs, files in os.walk('.'): 71 | for subdir in subdirs: 72 | if subdir.startswith('.'): 73 | subdirs.remove(subdir) 74 | 75 | for f in files: 76 | if f.startswith('.'): 77 | continue 78 | project_files.append(os.path.join(top, f)) 79 | 80 | return project_files 81 | 82 | 83 | def is_git_project(): 84 | return os.path.isdir('.git') 85 | 86 | 87 | def has_git(): 88 | return bool(spawn.find_executable("git")) 89 | 90 | 91 | def get_git_project_files(): 92 | """Retrieve a list of all non-ignored files, including untracked files, 93 | excluding deleted files. 94 | 95 | :return: sorted list of git project files 96 | :rtype: :class:`list` 97 | """ 98 | cached_and_untracked_files = git_ls_files( 99 | '--cached', # All files cached in the index 100 | '--others', # Untracked files 101 | # Exclude untracked files that would be excluded by .gitignore, etc. 102 | '--exclude-standard') 103 | uncommitted_deleted_files = git_ls_files('--deleted') 104 | 105 | # Since sorting of files in a set is arbitrary, return a sorted list to 106 | # provide a well-defined order to tools like flake8, etc. 107 | return sorted(cached_and_untracked_files - uncommitted_deleted_files) 108 | 109 | 110 | def git_ls_files(*cmd_args): 111 | """Run ``git ls-files`` in the top-level project directory. Arguments go 112 | directly to execution call. 113 | 114 | :return: set of file names 115 | :rtype: :class:`set` 116 | """ 117 | cmd = ['git', 'ls-files'] 118 | cmd.extend(cmd_args) 119 | return set(subprocess.check_output(cmd).splitlines()) 120 | 121 | 122 | def print_success_message(message): 123 | """Print a message indicating success in green color to STDOUT. 124 | 125 | :param message: the message to print 126 | :type message: :class:`str` 127 | """ 128 | try: 129 | import colorama 130 | print(colorama.Fore.GREEN + message + colorama.Fore.RESET) 131 | except ImportError: 132 | print(message) 133 | 134 | 135 | def print_failure_message(message): 136 | """Print a message indicating failure in red color to STDERR. 137 | 138 | :param message: the message to print 139 | :type message: :class:`str` 140 | """ 141 | try: 142 | import colorama 143 | print(colorama.Fore.RED + message + colorama.Fore.RESET, 144 | file=sys.stderr) 145 | except ImportError: 146 | print(message, file=sys.stderr) 147 | 148 | 149 | def read(filename): 150 | """Return the contents of a file. 151 | 152 | :param filename: file path 153 | :type filename: :class:`str` 154 | :return: the file's content 155 | :rtype: :class:`str` 156 | """ 157 | with open(os.path.join(os.path.dirname(__file__), filename)) as f: 158 | return f.read() 159 | 160 | 161 | def _lint(): 162 | """Run lint and return an exit code.""" 163 | # Flake8 doesn't have an easy way to run checks using a Python function, so 164 | # just fork off another process to do it. 165 | 166 | # Python 3 compat: 167 | # - The result of subprocess call outputs are byte strings, meaning we need 168 | # to pass a byte string to endswith. 169 | project_python_files = [filename for filename in get_project_files() 170 | if filename.endswith(b'.py')] 171 | retcode = subprocess.call( 172 | ['flake8', '--max-complexity=10'] + project_python_files) 173 | if retcode == 0: 174 | print_success_message('No style errors') 175 | return retcode 176 | 177 | 178 | def _test(): 179 | """Run the unit tests. 180 | 181 | :return: exit code 182 | """ 183 | # Make sure to import pytest in this function. For the reason, see here: 184 | # # NOPEP8 185 | import pytest 186 | # Run the doctests 187 | import doctest 188 | import pipewelder 189 | doctest.testmod(pipewelder.core) 190 | # This runs the unit tests. 191 | # It also runs doctest, but only on the modules in TESTS_DIRECTORY. 192 | return pytest.main(PYTEST_FLAGS + [TESTS_DIRECTORY]) 193 | 194 | 195 | def _test_all(): 196 | """Run lint and tests. 197 | 198 | :return: exit code 199 | """ 200 | return _lint() + _test() 201 | 202 | 203 | # The following code is to allow tests to be run with `python setup.py test'. 204 | # The main reason to make this possible is to allow tests to be run as part of 205 | # Setuptools' automatic run of 2to3 on the source code. The recommended way to 206 | # run tests is still `paver test_all'. 207 | # See 208 | # Code based on # NOPEP8 209 | class TestAllCommand(TestCommand): 210 | def finalize_options(self): 211 | TestCommand.finalize_options(self) 212 | # These are fake, and just set to appease distutils and setuptools. 213 | self.test_suite = True 214 | self.test_args = [] 215 | 216 | def run_tests(self): 217 | raise SystemExit(_test_all()) 218 | 219 | 220 | # define install_requires for specific Python versions 221 | python_version_specific_requires = [] 222 | 223 | # as of Python >= 2.7 and >= 3.2, the argparse module is maintained within 224 | # the Python standard library, otherwise we install it as a separate package 225 | if sys.version_info < (2, 7) or (3, 0) <= sys.version_info < (3, 3): 226 | python_version_specific_requires.append('argparse') 227 | 228 | 229 | # See here for more options: 230 | # 231 | setup_dict = dict( 232 | name=metadata.package, 233 | version=metadata.version, 234 | author=metadata.authors[0], 235 | author_email=metadata.emails[0], 236 | maintainer=metadata.authors[0], 237 | maintainer_email=metadata.emails[0], 238 | url=metadata.url, 239 | description=metadata.description, 240 | long_description=read('README.rst'), 241 | # Find a list of classifiers here: 242 | # 243 | classifiers=[ 244 | 'Development Status :: 4 - Beta', 245 | 'Environment :: Console', 246 | 'Intended Audience :: Developers', 247 | 'License :: OSI Approved :: Apache Software License', 248 | 'Natural Language :: English', 249 | 'Operating System :: OS Independent', 250 | 'Programming Language :: Python :: 2.6', 251 | 'Programming Language :: Python :: 2.7', 252 | 'Programming Language :: Python :: 3.3', 253 | 'Programming Language :: Python :: 3.4', 254 | 'Topic :: Software Development :: Libraries :: Python Modules', 255 | ], 256 | packages=find_packages(exclude=(TESTS_DIRECTORY,)), 257 | install_requires=[ 258 | 'boto', 259 | 'six' 260 | ] + python_version_specific_requires, 261 | # Allow tests to be run with `python setup.py test'. 262 | tests_require=[ 263 | 'pytest', 264 | 'mock', 265 | 'flake8', 266 | ], 267 | cmdclass={'test': TestAllCommand}, 268 | zip_safe=False, # don't use eggs 269 | entry_points={ 270 | 'console_scripts': [ 271 | 'pipewelder = pipewelder.cli:entry_point' 272 | ], 273 | } 274 | ) 275 | 276 | 277 | def main(): 278 | setup(**setup_dict) 279 | 280 | 281 | if __name__ == '__main__': 282 | main() 283 | -------------------------------------------------------------------------------- /pipewelder/core.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | The core Pipewelder API. 4 | """ 5 | 6 | from __future__ import print_function 7 | 8 | import re 9 | import os 10 | import logging 11 | import hashlib 12 | from copy import deepcopy 13 | from datetime import datetime, timedelta 14 | 15 | from pipewelder import translator 16 | from boto import connect_s3 17 | from boto.s3.key import Key as S3Key 18 | 19 | from pipewelder import util 20 | 21 | import six 22 | if six.PY2: 23 | from urlparse import urlparse 24 | else: 25 | from urllib.parse import urlparse 26 | 27 | PIPELINE_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" 28 | PIPELINE_FREQUENCY_RE = re.compile(r'(?P\d+) (?P\w+s)') 29 | PIPELINE_PARAM_RE = re.compile(r'\#\{(my[a-zA-Z0-9]+)\}') 30 | PIPEWELDER_STUB_PARAMS = { 31 | 'name': "Pipewelder validation stub", 32 | 'unique_id': 'stub', 33 | "description": """ 34 | This pipeline should always be in 'PENDING' status. 35 | It is used by Pipewelder to validate pipeline definitions. 36 | """.strip() 37 | } 38 | 39 | 40 | class Pipewelder(object): 41 | """ 42 | A collection of Pipelines sharing a definition template. 43 | """ 44 | def __init__(self, conn, template_path, s3_conn=None): 45 | """ 46 | *conn* is a :class:`boto.datapipeline.layer1.DataPipelineConnection` 47 | instance used to manipulate added pipelines, 48 | *s3_conn* is a :class:`boto.s3.connection.S3Connection` 49 | used to upload pipeline tasks to S3, 50 | and *template_path* is the path to a local file containing the 51 | template pipeline definition. 52 | """ 53 | self.conn = conn 54 | self.s3_conn = s3_conn 55 | if self.s3_conn is None: 56 | self.s3_conn = connect_s3() 57 | template_path = os.path.normpath(template_path) 58 | self.template = definition_from_file(template_path) 59 | self.pipelines = {} 60 | 61 | def add_pipeline(self, dirpath): 62 | """ 63 | Load a new :class:`Pipeline` object based on the files contained in 64 | *dirpath*. 65 | """ 66 | pipeline = Pipeline(self.conn, self.s3_conn, self.template, dirpath) 67 | self.pipelines[pipeline.name] = pipeline 68 | return pipeline 69 | 70 | def are_pipelines_valid(self): 71 | """ 72 | Returns ``True`` if all pipeline definition validate with AWS. 73 | """ 74 | return all([p.is_valid() for p in self.pipelines.values()]) 75 | 76 | def validate(self): 77 | """ 78 | Synonym for :meth:`are_pipelines_valid`. 79 | """ 80 | return self.are_pipelines_valid() 81 | 82 | def upload(self): 83 | """ 84 | Upload files to S3 corresponding to each pipeline and its tasks. 85 | 86 | Returns ``True`` is successful. 87 | """ 88 | return all([p.upload() for p in self.pipelines.values()]) 89 | 90 | def delete(self): 91 | """ 92 | Delete all pipeline definitions. 93 | 94 | Returns ``True`` if successful. 95 | """ 96 | return all([p.delete() for p in self.pipelines.values()]) 97 | 98 | def put_definition(self): 99 | """ 100 | Puts definitions for all pipelines. 101 | 102 | Returns ``True`` if successful. 103 | """ 104 | return all([p.put_definition() for p in self.pipelines.values()]) 105 | 106 | def activate(self): 107 | """ 108 | Activate all pipeline definitions, 109 | deleting existing pipeline if needed. 110 | 111 | Returns ``True`` if successful. 112 | """ 113 | if not self.are_pipelines_valid(): 114 | logging.error("Not activating pipelines due to validation errors.") 115 | return False 116 | return all([p.activate() for p in self.pipelines.values()]) 117 | 118 | 119 | class Pipeline(object): 120 | """ 121 | A class defining a single pipeline definition and associated tasks. 122 | """ 123 | def __init__(self, conn, s3_conn, template, dirpath): 124 | """ 125 | Create a Pipeline based on definition dict *template*. 126 | 127 | *dirpath* is a directory containing a 'values.json' file, 128 | a 'run' executable, and a 'tasks' directory. 129 | *conn* is a DataPipelineConnection and *s3_conn* is an S3Connection. 130 | """ 131 | self.conn = conn 132 | self.s3_conn = s3_conn 133 | self.dirpath = os.path.normpath(dirpath) 134 | self.definition = template.copy() 135 | values_path = os.path.join(dirpath, 'values.json') 136 | decoded = util.load_json(values_path) 137 | self.values = decoded.get('values', {}) 138 | if 'myName' not in self.values: 139 | self.values['myName'] = os.path.basename(dirpath) 140 | # adjust the start timestamp to the future 141 | timestamp = self.values['myStartDateTime'] 142 | period = self.values['mySchedulePeriod'] 143 | adjusted_timestamp = adjusted_to_future(timestamp, period) 144 | self.values['myStartDateTime'] = adjusted_timestamp 145 | 146 | @property 147 | def name(self): 148 | return self._get_value('myName') 149 | 150 | @property 151 | def description(self): 152 | try: 153 | return self._get_value('myDescription') 154 | except ValueError: 155 | return None 156 | 157 | @property 158 | def tags(self): 159 | if 'myTags' not in self.values: 160 | return {} 161 | return dict(tag_expression.split(':') 162 | for tag_expression in self.values['myTags']) 163 | 164 | @property 165 | def unique_id(self): 166 | return hashlib.md5(self.name + str(self.tags)).hexdigest() 167 | 168 | def api_objects(self): 169 | """ 170 | Return a dict containing the pipeline objects in AWS API format. 171 | """ 172 | d = deepcopy(self.definition) 173 | return translator.definition_to_api_objects(d) 174 | 175 | def api_parameters(self): 176 | """ 177 | Return a dict containing the pipeline parameters in AWS API format. 178 | """ 179 | d = deepcopy(self.definition) 180 | return translator.definition_to_api_parameters(d) 181 | 182 | def api_values(self): 183 | """ 184 | Return a dict containing the pipeline param values in AWS API format. 185 | """ 186 | d = {'values': self.values} 187 | return translator.definition_to_parameter_values(d) 188 | 189 | def api_tags(self): 190 | """ 191 | Return a list containing the pipeline tags in AWS API format. 192 | """ 193 | tag_list = [{'key': k, 'value': v} 194 | for k, v in self.tags.items()] 195 | return tag_list 196 | 197 | def create(self): 198 | """ 199 | Create a pipeline in AWS if it does not already exist. 200 | 201 | Returns the pipeline id. 202 | """ 203 | response = self.conn.create_pipeline(self.name, self.unique_id, 204 | self.description, self.api_tags()) 205 | return response['pipelineId'] 206 | 207 | def is_valid(self): 208 | """ 209 | Returns ``True`` if the pipeline definition validates to AWS. 210 | """ 211 | response = self.conn.create_pipeline(**PIPEWELDER_STUB_PARAMS) 212 | pipeline_id = response["pipelineId"] 213 | response = self.conn.validate_pipeline_definition( 214 | self.api_objects(), pipeline_id, 215 | self.api_parameters(), self.api_values()) 216 | self._log_validation_messages(response) 217 | if response['errored']: 218 | return False 219 | else: 220 | logging.info("Pipeline '{0}' is valid".format(self.name)) 221 | return True 222 | 223 | def upload(self): 224 | """ 225 | Uploads the contents of `dirpath` to S3. 226 | 227 | The destination path in S3 is determined by 'myS3InputDirectory' 228 | in the 'values.json' file for this pipeline. 229 | Existing contents of the 'tasks' subdirectory are deleted. 230 | 231 | Returns ``True`` if successful. 232 | """ 233 | s3_dir = self._get_value('myS3InputDir') 234 | bucket_path, input_dir = bucket_and_path(s3_dir) 235 | bucket = self.s3_conn.get_bucket(bucket_path) 236 | 237 | remote_task_path = os.path.join(input_dir, 'tasks') 238 | existing_task_keys = bucket.list(prefix=remote_task_path) 239 | existing_tasks = [key.name for key in existing_task_keys] 240 | bucket.delete_keys(existing_tasks) 241 | logging.info("Deleted from bucket '{0}': {1}" 242 | .format(bucket_path, existing_tasks)) 243 | 244 | with util.cd(self.dirpath): 245 | for root, dirs, files in os.walk('.'): 246 | for f in files: 247 | filepath = os.path.join(root, f) 248 | k = S3Key(bucket) 249 | k.key = os.path.normpath(os.path.join(input_dir, filepath)) 250 | k.set_contents_from_filename(filepath) 251 | logging.info('Copied {0} to {1}' 252 | .format(os.path.abspath(filepath), 253 | os.path.normpath( 254 | os.path.join(s3_dir, filepath)))) 255 | return True 256 | 257 | def delete(self): 258 | """ 259 | Delete this pipeline definition from AWS. 260 | 261 | Returns ``True`` if successful. 262 | """ 263 | pipeline_id = self.create() 264 | logging.info("Deleting pipeline with id {0}".format(pipeline_id)) 265 | self.conn.delete_pipeline(pipeline_id) 266 | return True 267 | 268 | def put_definition(self): 269 | """ 270 | Put this pipeline definition to AWS. 271 | 272 | Returns ``True`` if successful. 273 | """ 274 | pipeline_id = self.create() 275 | logging.info("Putting pipeline definition for {0}".format(pipeline_id)) 276 | self.conn.put_pipeline_definition(self.api_objects(), 277 | pipeline_id, 278 | self.api_parameters(), 279 | self.api_values()) 280 | return True 281 | 282 | def activate(self): 283 | """ 284 | Activate this pipeline definition in AWS. 285 | 286 | Deletes the existing pipeline if it has previously been activated. 287 | 288 | Returns ``True`` if successful. 289 | """ 290 | pipeline_id = self.create() 291 | existing_definition = definition_from_id(self.conn, pipeline_id) 292 | state = state_from_id(self.conn, pipeline_id) 293 | if existing_definition == self.definition: 294 | return True 295 | elif state == 'PENDING': 296 | self.put_definition() 297 | else: 298 | self.delete() 299 | return self.activate() 300 | logging.info("Activating pipeline with id {0}".format(pipeline_id)) 301 | self.conn.activate_pipeline(pipeline_id) 302 | return True 303 | 304 | def _log_validation_messages(self, response): 305 | for container in response['validationWarnings']: 306 | logging.warning("Warnings in validation response for %s", 307 | container['id']) 308 | for message in container['warnings']: 309 | logging.warning(message) 310 | for container in response['validationErrors']: 311 | logging.error("Errors in validation response for %s", 312 | container['id']) 313 | for message in container['errors']: 314 | logging.error(message) 315 | 316 | def _get_value(self, key): 317 | if key in self.values: 318 | return self._parsed_via_parameters(self.values[key]) 319 | params = self.definition['parameters'] 320 | default = fetch_default(params, key) 321 | if default is None: 322 | raise ValueError("No value or default found for '{0}'" 323 | .format(key)) 324 | return self._parsed_via_parameters(default) 325 | 326 | def _parsed_via_parameters(self, expression): 327 | placeholders = re.findall(PIPELINE_PARAM_RE, expression) 328 | if not placeholders: 329 | return expression 330 | key = placeholders[0] 331 | value = self._get_value(key) 332 | placeholder = '#{' + key + '}' 333 | expression = expression.replace(placeholder, value) 334 | return self._parsed_via_parameters(expression) 335 | 336 | def _parsed_object(self, name): 337 | return parsed_object(self.conn, self.create(), name) 338 | 339 | def _parsed_location(self, name): 340 | obj = self._parsed_object(name) 341 | fetch_field_value(obj, 'directoryPath') 342 | 343 | 344 | def bucket_and_path(s3_uri): 345 | """ 346 | Return a bucket name and key path from *s3_uri*. 347 | 348 | >>> bucket_and_path('s3://pipewelder-bucket/pipewelder-test/inputs') 349 | ('pipewelder-bucket', 'pipewelder-test/inputs') 350 | """ 351 | uri = urlparse(s3_uri) 352 | return (uri.netloc, uri.path[1:]) 353 | 354 | 355 | def parse_period(period): 356 | """ 357 | Return a timedelta object parsed from string *period*. 358 | 359 | >>> parse_period("15 minutes") 360 | datetime.timedelta(0, 900) 361 | >>> parse_period("3 hours") 362 | datetime.timedelta(0, 10800) 363 | >>> parse_period("1 days") 364 | datetime.timedelta(1) 365 | """ 366 | parts = PIPELINE_FREQUENCY_RE.match(period) 367 | if not parts: 368 | raise ValueError("'{0}' cannot be parsed as a period".format(period)) 369 | parts = parts.groupdict() 370 | kwargs = {parts['unit']: int(parts['number'])} 371 | return timedelta(**kwargs) 372 | 373 | 374 | def adjusted_to_future(timestamp, period): 375 | """ 376 | Return *timestamp* string, adjusted to the future if necessary. 377 | 378 | If *timestamp* is in the future, it will be returned unchanged. 379 | If it's in the past, *period* will be repeatedly added until the 380 | result is in the future. 381 | 382 | All times are assumed to be in UTC. 383 | 384 | >>> adjusted_to_future('2199-01-01T00:00:00', '1 days') 385 | '2199-01-01T00:00:00' 386 | """ 387 | dt = datetime.strptime(timestamp, PIPELINE_DATETIME_FORMAT) 388 | delta = parse_period(period) 389 | now = datetime.utcnow() 390 | while dt < now: 391 | dt += delta 392 | return dt.strftime(PIPELINE_DATETIME_FORMAT) 393 | 394 | 395 | def fetch_field_value(aws_response, field_name): 396 | """ 397 | Return a value nested within the 'fields' entry of dict *aws_response*. 398 | 399 | The returned value is the second item from a dict with 'key' *field_name*. 400 | 401 | >>> r = {'fields': [{'key': 'someKey', 'stringValue': 'someValue'}]} 402 | >>> fetch_field_value(r, 'someKey') 403 | 'someValue' 404 | """ 405 | for container in aws_response['fields']: 406 | if container['key'] == field_name: 407 | for (k, v) in container.items(): 408 | if k != 'key': 409 | return v 410 | raise ValueError("Did not find a field called {0} in response {1}" 411 | .format(field_name, aws_response)) 412 | 413 | 414 | def fetch_default(params, key): 415 | """ 416 | Return the default associated with *key* from parameter list *params*. 417 | 418 | If no default, returns None. 419 | >>> p = [{'type': 'String', 'id': 'myParam', 'default': 'foo'}] 420 | >>> fetch_default(p, 'myParam') 421 | 'foo' 422 | >>> p = [{'type': 'String', 'id': 'myParam'}] 423 | >>> fetch_default(p, 'myParam') 424 | """ 425 | for container in params: 426 | if container['id'] == key: 427 | if 'default' in container: 428 | return container['default'] 429 | return None 430 | 431 | 432 | def state_from_id(conn, pipeline_id): 433 | """ 434 | Return the *@pipelineState* string for object matching *pipeline_id*. 435 | 436 | *conn* is a DataPipelineConnection object. 437 | """ 438 | response = conn.describe_pipelines([pipeline_id]) 439 | description = response['pipelineDescriptionList'][0] 440 | return fetch_field_value(description, '@pipelineState') 441 | 442 | 443 | def definition_from_file(filename): 444 | """ 445 | Return a dict containing the contents of pipeline definition *filename*. 446 | """ 447 | return util.load_json(filename) 448 | 449 | 450 | def definition_from_id(conn, pipeline_id): 451 | """ 452 | Return a dict containing the definition of *pipeline_id*. 453 | 454 | *conn* is a DataPipelineConnection object. 455 | """ 456 | response = conn.get_pipeline_definition(pipeline_id) 457 | return translator.api_to_definition(response) 458 | 459 | 460 | def parsed_objects(conn, pipeline_id, object_ids): 461 | """ 462 | Return a list of object dicts as evaluated by Data Pipeline. 463 | """ 464 | response = conn.describe_objects(object_ids, pipeline_id, 465 | evaluate_expressions=True) 466 | return response['pipelineObjects'] 467 | 468 | 469 | def parsed_object(conn, pipeline_id, object_id): 470 | """ 471 | Return an object dict as evaluated by Data Pipeline. 472 | """ 473 | return parsed_objects(conn, pipeline_id, [object_id])[0] 474 | --------------------------------------------------------------------------------