├── docs
├── source
│ ├── _static
│ │ └── .gitkeep
│ ├── README.rst
│ ├── welder.jpg
│ ├── util.rst
│ ├── core.rst
│ ├── cli.rst
│ ├── README
│ ├── index.rst
│ └── conf.py
├── make.bat
└── Makefile
├── setup.cfg
├── welder.jpg
├── tests
├── test_data
│ ├── echoer
│ │ ├── tasks
│ │ │ ├── second.txt
│ │ │ └── first.txt
│ │ ├── run
│ │ └── values.json
│ ├── pipewelder.json
│ └── pipeline_definition.json
├── test_core.py
└── test_cli.py
├── requirements.txt
├── .travis.yml
├── Makefile
├── NOTICE
├── .ppt-version
├── pipewelder
├── __init__.py
├── util.py
├── metadata.py
├── cli.py
├── connection.py
├── translator.py
└── core.py
├── requirements-dev.txt
├── .editorconfig
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── tox.ini
├── README.rst
├── pavement.py
└── setup.py
/docs/source/_static/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/source/README.rst:
--------------------------------------------------------------------------------
1 | ../../README.rst
--------------------------------------------------------------------------------
/docs/source/welder.jpg:
--------------------------------------------------------------------------------
1 | ../../welder.jpg
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 |
--------------------------------------------------------------------------------
/welder.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimpleFinance/pipewelder/HEAD/welder.jpg
--------------------------------------------------------------------------------
/tests/test_data/echoer/tasks/second.txt:
--------------------------------------------------------------------------------
1 | This text should also appear in the output directory.
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Python 2.6 compatibility
2 | # argparse==1.2.1
3 | six==1.9.0
4 | boto==2.36.0
5 |
--------------------------------------------------------------------------------
/docs/source/util.rst:
--------------------------------------------------------------------------------
1 | Pipewelder Util
2 | ===============
3 |
4 | .. automodule:: pipewelder.util
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/core.rst:
--------------------------------------------------------------------------------
1 | Pipewelder Core API
2 | ===================
3 |
4 | .. automodule:: pipewelder.core
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/cli.rst:
--------------------------------------------------------------------------------
1 | Pipewelder Command-Line Interface
2 | =================================
3 |
4 | .. automodule:: pipewelder.cli
5 | :members:
6 |
--------------------------------------------------------------------------------
/tests/test_data/echoer/tasks/first.txt:
--------------------------------------------------------------------------------
1 | This is the first task file for the Echoer pipeline.
2 |
3 | This text should appear in the output directory for this pipeline.
4 |
--------------------------------------------------------------------------------
/docs/source/README:
--------------------------------------------------------------------------------
1 | Run `sphinx-apidoc -o . ../../pipewelder' in this directory.
2 |
3 | This will generate `modules.rst' and `pipewelder.rst'.
4 |
5 | Then include `modules.rst' in your `index.rst' file.
6 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python: 2.7
3 | env:
4 | - TOXENV=py26
5 | - TOXENV=py27
6 | - TOXENV=py33
7 | - TOXENV=py34
8 | - TOXENV=pypy
9 | - TOXENV=docs
10 | install:
11 | - pip install -r requirements-dev.txt
12 | script:
13 | - tox
14 |
--------------------------------------------------------------------------------
/tests/test_data/pipewelder.json:
--------------------------------------------------------------------------------
1 | {
2 |
3 | "defaults" : {
4 | "dirs" : ["*"],
5 | "region" : "us-west-2",
6 | "template" : "pipeline_definition.json"
7 | },
8 |
9 | "dev" : {
10 | "values" : {
11 | "myEnv" : "dev"
12 | }
13 | }
14 |
15 | }
16 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | VENV := $(CURDIR)/venv
2 | export PATH := $(VENV)/bin:$(PATH)
3 |
4 | test: install
5 | paver test_all
6 |
7 | install: $(VENV)
8 | $(VENV)/bin/pip install -r requirements-dev.txt
9 |
10 | $(VENV):
11 | virtualenv $@
12 |
13 | requirements.txt:
14 | pip freeze > $@
15 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | Pipewelder
2 | ==========
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 |
7 | README
8 | core
9 | util
10 | cli
11 |
12 | .. only:: html
13 |
14 | Indices and tables
15 | ==================
16 |
17 | * :ref:`genindex`
18 | * :ref:`modindex`
19 | * :ref:`search`
20 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Pipewelder
2 | Copyright 2015 Simple Finance Technology Corporation
3 |
4 | The banner image in the documentation is cropped from an original
5 | photo owned by the PEO ACWA:
6 | https://flic.kr/p/ejYqQe
7 |
8 | Package layout is based on a template by Sean Fisk:
9 | https://github.com/seanfisk/python-project-template
10 |
--------------------------------------------------------------------------------
/.ppt-version:
--------------------------------------------------------------------------------
1 | # This file specifies the version of the Python Project Template
2 | # (https://github.com/seanfisk/python-project-template) from which
3 | # this project was created. It is here for the purposes of possibly
4 | # updating this project to use a newer version of the template. Please
5 | # do not remove this file.
6 | df37ac91c8293f907ac755822702303d75afee3f
7 |
--------------------------------------------------------------------------------
/pipewelder/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # flake8: noqa
3 | """
4 | Scheduled task execution on top of AWS Data Pipeline
5 | """
6 | import pipewelder.connection
7 | from pipewelder import metadata
8 | from pipewelder.core import *
9 |
10 | __version__ = metadata.version
11 | __author__ = metadata.authors[0]
12 | __license__ = metadata.license
13 | __copyright__ = metadata.copyright
14 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | # Runtime requirements
2 | --requirement requirements.txt
3 |
4 | # Testing
5 | pytest==2.6.4
6 | py==1.4.19
7 | mock==1.0.1
8 | tox==1.8.1
9 |
10 | # Linting
11 | flake8==2.3.0
12 | mccabe==0.3
13 | pep8==1.6.2
14 | pyflakes==0.8.1
15 |
16 | # Documentation
17 | Sphinx==1.2
18 | docutils==0.11
19 | Jinja2==2.7.1
20 | MarkupSafe==0.18
21 | Pygments==1.6
22 |
23 | # Miscellaneous
24 | Paver==1.2.3
25 | colorama==0.2.7
26 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | # -*- mode: conf-unix; -*-
2 |
3 | # EditorConfig is awesome: http://EditorConfig.org
4 |
5 | # top-most EditorConfig file
6 | root = true
7 |
8 | # defaults
9 | [*]
10 | insert_final_newline = true
11 |
12 | # 4 space indentation
13 | [*.{ini,py,py.tpl,rst}]
14 | indent_style = space
15 | indent_size = 4
16 |
17 | # 4-width tabbed indentation
18 | [*.{sh,bat.tpl,Makefile.tpl}]
19 | indent_style = tab
20 | indent_size = 4
21 |
22 | # and travis does its own thing
23 | [.travis.yml]
24 | indent_style = space
25 | indent_size = 2
26 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Emacs rope configuration
2 | .ropeproject
3 | .project
4 | .pydevproject
5 | .settings
6 |
7 | # pyenv version file
8 | .python-version
9 |
10 | # Python
11 | *.py[co]
12 |
13 | ## Packages
14 | *.egg
15 | *.egg-info
16 | dist
17 | build
18 | eggs
19 | parts
20 | bin
21 | var
22 | sdist
23 | deb_dist
24 | develop-eggs
25 | .installed.cfg
26 |
27 | ## Installer logs
28 | pip-log.txt
29 |
30 | ## Unit test / coverage reports
31 | .coverage
32 | .tox
33 |
34 | ## Translations
35 | *.mo
36 |
37 | ## paver generated files
38 | /paver-minilib.zip
39 |
40 | ## virtualenv
41 | /venv
42 |
--------------------------------------------------------------------------------
/tests/test_data/echoer/run:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 | from glob import glob
5 | from shutil import copyfile
6 |
7 | OUTPUT_DIR = os.environ['OUTPUT1_STAGING_DIR']
8 |
9 |
10 | def write_to_output_dir(task_path):
11 | basename = os.path.basename(task_path)
12 | destination = os.path.join(OUTPUT_DIR, basename)
13 | copyfile(task_path, destination)
14 |
15 |
16 | def main():
17 | print("Echoer writes some files to", OUTPUT_DIR)
18 | for txtfile in glob("tasks/*.txt"):
19 | write_to_output_dir(txtfile)
20 |
21 |
22 | if __name__ == "__main__":
23 | main()
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015 Simple Finance Technology Corp.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # Informational files
2 | include README.rst
3 | include welder.jpg
4 | include LICENSE
5 | include NOTICE
6 |
7 | # Include docs and tests. It's unclear whether convention dictates
8 | # including built docs. However, Sphinx doesn't include built docs, so
9 | # we are following their lead.
10 | graft docs
11 | prune docs/build
12 | graft tests
13 |
14 | # Exclude any compile Python files (most likely grafted by tests/ directory).
15 | global-exclude *.pyc
16 |
17 | # Setup-related things
18 | include pavement.py
19 | include requirements-dev.txt
20 | include requirements.txt
21 | include setup.py
22 | include tox.ini
23 |
--------------------------------------------------------------------------------
/pipewelder/util.py:
--------------------------------------------------------------------------------
1 | import os
2 | import contextlib
3 | import json
4 |
5 |
6 | @contextlib.contextmanager
7 | def cd(new_path):
8 | """
9 | Change to a different directory within a limited context.
10 | """
11 | saved_path = os.getcwd()
12 | os.chdir(new_path)
13 | yield
14 | os.chdir(saved_path)
15 |
16 |
17 | def load_json(filename):
18 | with open(filename) as f:
19 | try:
20 | data = json.load(f)
21 | except ValueError as e:
22 | raise ValueError("Unable to parse '{0}' as json; {1}"
23 | .format(filename, e))
24 | return data
25 |
--------------------------------------------------------------------------------
/pipewelder/metadata.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Project metadata
3 |
4 | Information describing the project.
5 | """
6 |
7 | # The package name, which is also the "UNIX name" for the project.
8 | package = 'pipewelder'
9 | project = "Pipewelder"
10 | project_no_spaces = project.replace(' ', '')
11 | version = '0.1.4'
12 | description = 'Scheduled task execution on top of AWS Data Pipeline'
13 | authors = ['Jeff Klukas']
14 | authors_string = ', '.join(authors)
15 | emails = ['klukas@simple.com']
16 | license = 'Apache V2.0'
17 | copyright = '2015 Simple Finance Technology Corporation'
18 | url = 'http://github.com/jklukas/pipewelder'
19 |
--------------------------------------------------------------------------------
/tests/test_data/echoer/values.json:
--------------------------------------------------------------------------------
1 | {
2 |
3 | "values": {
4 | "myName" : "echoer",
5 | "myDescription" : "an example pipeline that simply prints tasks files to STDOUT",
6 | "myEnv" : "this will get replaced by pipewelder.json",
7 | "myS3InputDir": "s3://pipewelder-example/#{myEnv}/echoer/inputs",
8 | "myS3OutputDir": "s3://pipewelder-example/#{myEnv}/echoer/outputs",
9 | "myS3LogDir": "s3://pipewelder-example/#{myEnv}/echoer/logs",
10 | "myStartDateTime": "2015-01-01T00:00:02",
11 | "mySchedulePeriod": "15 minutes",
12 | "myTerminateAfter": "10 minutes",
13 | "myTags": [
14 | "pipewelder-environment:dev"
15 | ]
16 | }
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | # Tox (http://tox.testrun.org/) is a tool for running tests in
2 | # multiple virtualenvs. This configuration file will run the test
3 | # suite on all supported python versions. To use it, "pip install tox"
4 | # and then run "tox" from this directory.
5 | #
6 | # To run tox faster, check out Detox
7 | # (https://pypi.python.org/pypi/detox), which runs your tox runs in
8 | # parallel. To use it, "pip install detox" and then run "detox" from
9 | # this directory.
10 |
11 | [tox]
12 | envlist = py26,py27,py33,py34,docs
13 |
14 | [testenv]
15 | deps =
16 | --no-deps
17 | --requirement
18 | {toxinidir}/requirements-dev.txt
19 | commands = paver test_all
20 |
21 | [testenv:docs]
22 | basepython = python
23 | commands = paver doc_html
24 |
--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import pytest
4 | import os
5 |
6 | from pipewelder import core
7 | from datetime import datetime
8 |
9 | import logging
10 | logging.basicConfig(level=logging.INFO)
11 |
12 | HERE = os.path.abspath(os.path.dirname(__file__))
13 | DATA_DIR = os.path.join(HERE, 'test_data')
14 |
15 |
16 | def data_path(path):
17 | return os.path.join(DATA_DIR, path)
18 |
19 |
20 | def test_adjusted_to_future():
21 | now = datetime.utcnow()
22 | timestamp = "{0}-01-01T00:00:00".format(now.year)
23 | adjusted = core.adjusted_to_future(timestamp, "1 days")
24 | target_dt = datetime(year=now.year, month=now.month, day=(now.day + 1))
25 | assert adjusted == target_dt.strftime(core.PIPELINE_DATETIME_FORMAT)
26 |
27 |
28 | @pytest.fixture
29 | def pipeline_description():
30 | return {
31 | u'description': u'my description',
32 | u'fields': [
33 | {u'key': u'@pipelineState', u'stringValue': u'PENDING'},
34 | {u'key': u'@creationTime', u'stringValue': u'2015-02-11T21:17:10'},
35 | {u'key': u'@sphere', u'stringValue': u'PIPELINE'},
36 | {u'key': u'uniqueId', u'stringValue': u'pipeweldertest1'},
37 | {u'key': u'@accountId', u'stringValue': u'543715240000'},
38 | {u'key': u'description', u'stringValue': u'my description'},
39 | {u'key': u'name', u'stringValue': u'Pipewelder test'},
40 | {u'key': u'pipelineCreator', u'stringValue': u'AIDAIWZQRURDOOOOO'},
41 | {u'key': u'@id', u'stringValue': u'df-07437251YGRXOY19OOOO'},
42 | {u'key': u'@userId', u'stringValue': u'AIDAIWZQRURDXI4UKOOOO'}],
43 | u'name': u'Pipewelder test',
44 | u'pipelineId': u'df-07437251YGRXOY19OOOO',
45 | u'tags': [],
46 | }
47 |
48 |
49 | def test_pipeline_state(pipeline_description):
50 | state = core.fetch_field_value(pipeline_description, '@pipelineState')
51 | assert state == 'PENDING'
52 |
--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from pytest import raises
3 |
4 | # The parametrize function is generated, so this doesn't work:
5 | #
6 | # from pytest.mark import parametrize
7 | #
8 | import pytest
9 | parametrize = pytest.mark.parametrize # NOPEP8
10 |
11 | import os
12 |
13 | from pipewelder.cli import pipewelder_configs, main, metadata
14 |
15 | import logging
16 | logging.basicConfig(level=logging.INFO)
17 |
18 | HERE = os.path.abspath(os.path.dirname(__file__))
19 | DATA_DIR = os.path.join(HERE, 'test_data')
20 |
21 |
22 | def data_path(path):
23 | return os.path.join(DATA_DIR, path)
24 |
25 |
26 | def test_pipewelder_configs():
27 | configs = pipewelder_configs(data_path('pipewelder.json'))
28 | assert configs["dev"] == {
29 | "name": "dev",
30 | "dirs": ["echoer"],
31 | "region": "us-west-2",
32 | "template": "pipeline_definition.json",
33 | "values": {
34 | "myEnv": "dev"
35 | }
36 | }
37 |
38 |
39 | class TestMain(object):
40 | @parametrize('helparg', ['-h', '--help'])
41 | def test_help(self, helparg, capsys):
42 | with raises(SystemExit) as exc_info:
43 | main(['progname', helparg])
44 | out, err = capsys.readouterr()
45 | # Should have printed some sort of usage message. We don't
46 | # need to explicitly test the content of the message.
47 | assert 'usage' in out
48 | # Should have used the program name from the argument
49 | # vector.
50 | assert 'progname' in out
51 | # Should exit with zero return code.
52 | assert exc_info.value.code == 0
53 |
54 | @parametrize('versionarg', ['-V', '--version'])
55 | def test_version(self, versionarg, capsys):
56 | with raises(SystemExit) as exc_info:
57 | main(['progname', versionarg])
58 | out, err = capsys.readouterr()
59 | # Should print out version.
60 | expected = '{0} {1}\n'.format(metadata.project, metadata.version)
61 | assert (out == expected or err == expected)
62 | # Should exit with zero return code.
63 | assert exc_info.value.code == 0
64 |
--------------------------------------------------------------------------------
/tests/test_data/pipeline_definition.json:
--------------------------------------------------------------------------------
1 | {
2 | "objects" : [
3 | {
4 | "id" : "Default",
5 | "scheduleType" : "cron",
6 | "failureAndRerunMode" : "CASCADE",
7 | "schedule" : { "ref" : "PipewelderSchedule" },
8 | "pipelineLogUri" : "#{myS3LogDir}",
9 | "role" : "DataPipelineDefaultRole",
10 | "resourceRole" : "DataPipelineDefaultResourceRole"
11 | },
12 | {
13 | "id" : "PipewelderShellCommandActivity",
14 | "command" : "(cd ${INPUT1_STAGING_DIR} && chmod +x run && ./run) > ${OUTPUT1_STAGING_DIR}/stdout.txt",
15 | "runsOn" : { "ref" : "PipewelderEC2Resource" },
16 | "input" : { "ref" : "PipewelderS3InputLocation" },
17 | "output" : { "ref" : "PipewelderS3OutputLocation" },
18 | "type" : "ShellCommandActivity",
19 | "stage" : "true"
20 | },
21 | {
22 | "id" : "PipewelderSchedule",
23 | "startDateTime" : "#{myStartDateTime}",
24 | "type" : "Schedule",
25 | "period" : "#{mySchedulePeriod}"
26 | },
27 | {
28 | "id" : "PipewelderEC2Resource",
29 | "terminateAfter" : "#{myTerminateAfter}",
30 | "instanceType" : "t1.micro",
31 | "type" : "Ec2Resource"
32 | },
33 | {
34 | "id" : "PipewelderS3InputLocation",
35 | "directoryPath" : "#{myS3InputDir}",
36 | "type" : "S3DataNode"
37 | },
38 | {
39 | "id" : "PipewelderS3OutputLocation",
40 | "directoryPath" : "#{myS3OutputDir}/#{format(@scheduledStartTime, 'YYYY-MM-dd_HHmmss')}",
41 | "type" : "S3DataNode"
42 | }
43 | ],
44 |
45 | "parameters" : [
46 | {
47 | "id": "myName",
48 | "description": "A unique name for this pipeline, passed to CreatePipeline",
49 | "type": "String"
50 | },
51 | {
52 | "id": "myDescription",
53 | "description": "A description of this pipeline, passed to CreatePipeline",
54 | "type": "String"
55 | },
56 | {
57 | "id": "myTags",
58 | "description": "A list of tag:value pairs, passed to CreatePipeline",
59 | "default": [],
60 | "type": "String",
61 | "isArray": "True"
62 | },
63 | {
64 | "id": "myS3InputDir",
65 | "description": "S3 directory where the run executable lives, destination for Pipewelder 'upload' commands",
66 | "type": "AWS::S3::ObjectKey"
67 | },
68 | {
69 | "id": "myS3OutputDir",
70 | "description": "S3 directory where output files are collected",
71 | "type": "AWS::S3::ObjectKey"
72 | },
73 | {
74 | "id": "myS3LogDir",
75 | "description": "S3 log folder",
76 | "type": "AWS::S3::ObjectKey"
77 | },
78 | {
79 | "id": "myStartDateTime",
80 | "description": "Instant for the first run; Pipewelder will add multiples of mySchedulePeriod to ensure this instant is in the future",
81 | "type": "String"
82 | },
83 | {
84 | "id": "mySchedulePeriod",
85 | "description": "How often to run, such as '1 hours'",
86 | "type": "String"
87 | },
88 | {
89 | "id": "myTerminateAfter",
90 | "default": "#{format(minusMinutes(#{mySchedulePeriod}, 10))}",
91 | "description": "duration after which the run should be terminated",
92 | "type": "String"
93 | }
94 | ]
95 | }
96 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | Pipewelder
2 | ==========
3 |
4 | .. figure:: welder.jpg
5 | :alt: A worker welding a pipe
6 |
7 | Pipewelder is a framework that provides a command-line tool and Python
8 | API to manage `AWS Data
9 | Pipeline `__ jobs from flat files.
10 | Simple uses it as a cron-like job scheduler.
11 |
12 | Source
13 | https://github.com/SimpleFinance/pipewelder
14 |
15 | Documentation
16 | http://pipewelder.readthedocs.org
17 |
18 | PyPI
19 | https://pypi.python.org/pypi/pipewelder
20 |
21 | Overview
22 | --------
23 |
24 | Pipewelder aims to ease the task of scheduling jobs by defining very
25 | simple pipelines which are little more than an execution schedule,
26 | offloading most of the execution logic to files in S3. Pipewelder uses
27 | Data Pipeline's concept of `data
28 | staging `__
29 | to pull input files from S3 at the beginning of execution and to upload
30 | output files back to S3 at the end of execution.
31 |
32 | If you follow Pipewelder's directory structure, all of your pipeline
33 | logic can live in version-controlled flat files. The included
34 | command-line interface gives you simple commands to validate your
35 | pipeline definitions, upload task definitions to S3, and activate your
36 | pipelines.
37 |
38 | Installation
39 | ------------
40 |
41 | Pipewelder is available from `PyPI `__ via
42 | ``pip`` and is compatible with Python 2.6, 2.7, 3.3, and 3.4:
43 |
44 | ::
45 |
46 | pip install pipewelder
47 |
48 | The easiest way to get started is to clone the project from GitHub, copy
49 | the example project from Pipewelder's tests, and then modify to suit:
50 |
51 | .. code:: bash
52 |
53 | git clone https://github.com/SimpleFinance/pipewelder.git
54 | cp -r pipewelder/tests/test_data my-pipewelder-project
55 |
56 | If you're setting up Pipewelder and need help, feel free to email the
57 | author.
58 |
59 | Development
60 | -----------
61 |
62 | To do development on Pipewelder, clone the repository and run ``make``
63 | to install dependencies and run tests.
64 |
65 | Directory Structure
66 | -------------------
67 |
68 | To use Pipewelder, you provide a template pipeline definition along with
69 | one or more directories that correspond to particular pipeline
70 | instances. The directory structure looks like this (see
71 | `test\_data `__ for a working example):
72 |
73 | ::
74 |
75 | pipeline_definition.json
76 | pipewelder.json <- optional configuration file
77 | my_first_pipeline/
78 | run
79 | values.json
80 | tasks/
81 | task1.sh
82 | task2.sh
83 | my_second_pipeline/
84 | ...
85 |
86 | The ``values.json`` file in each pipeline directory specifies parameter
87 | values that are used modify the template definition including the S3
88 | paths for inputs, outputs, and logs. Some of these values are used
89 | directly by Pipewelder as well.
90 |
91 | A
92 | ```ShellCommandActivity`` `__
93 | in the template definition simply looks for an executable file named
94 | ``run`` and executes it. ``run`` is the entry point for whatever work
95 | you want your pipeline to do.
96 |
97 | Often, your ``run`` executable will be a wrapper script to execute a
98 | variety of similar tasks. When that's the case, use the ``tasks``
99 | subdirectory to hold these definitions. These tasks could be text files,
100 | shell scripts, SQL code, or whatever else your ``run`` file expects.
101 | Pipewelder gives ``tasks`` folder special treatment in that the CLI will
102 | make sure to remove existing task definitions when uploading files.
103 |
104 | Using the Command-Line Interface
105 | --------------------------------
106 |
107 | The Pipewelder CLI should always be invoked from the top-level directory
108 | of your definitions (the directory where ``pipeline_definition.json``
109 | lives). If your directory structure matches Pipewelder's expectations,
110 | it should work without further configuration.
111 |
112 | As you make changes to your template definition or ``values.json``
113 | files, it can be useful to check whether AWS considers your definitions
114 | valid:
115 |
116 | ::
117 |
118 | $ pipewelder validate
119 |
120 | Once you've defined your pipelines, you'll need to upload the files to
121 | S3:
122 |
123 | ::
124 |
125 | $ pipewelder upload
126 |
127 | Finally, activate your pipelines:
128 |
129 | ::
130 |
131 | $ pipewelder activate
132 |
133 | Any time you change the ``values.json`` or ``pipeline_definition.json``,
134 | you'll need to run the ``activate`` subcommand again. Because active
135 | pipelines can't be modified, the ``activate`` command will delete the
136 | existing pipeline and create a new one in its place. The run history for
137 | the previous pipeline will be discarded.
138 |
139 | Acknowledgments
140 | ---------------
141 |
142 | Pipewelder's package structure is based on
143 | `python-project-template `__.
144 |
--------------------------------------------------------------------------------
/pipewelder/cli.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | The Pipewelder command-line interface.
5 | """
6 |
7 | from __future__ import print_function
8 |
9 | import argparse
10 | import os
11 | import sys
12 | import boto.datapipeline
13 |
14 | from glob import glob
15 |
16 | from pipewelder import metadata, util, Pipewelder
17 |
18 | import logging
19 | logging.basicConfig(level="INFO")
20 |
21 |
22 | CONFIG_DEFAULTS = {
23 | "dirs": ["*"],
24 | "region": "",
25 | "template": "pipeline_definition.json",
26 | "values": [],
27 | }
28 |
29 |
30 | def main(argv):
31 | """Program entry point.
32 | :param argv: command-line arguments
33 | :type argv: :class:`list`
34 | """
35 | author_strings = []
36 | for name, email in zip(metadata.authors, metadata.emails):
37 | author_strings.append('Author: {0} <{1}>'.format(name, email))
38 |
39 | epilog = '''
40 | Pipewelder {version}
41 | {authors}
42 | URL: <{url}>
43 | '''.format(
44 | project=metadata.project,
45 | version=metadata.version,
46 | authors='\n'.join(author_strings),
47 | url=metadata.url)
48 |
49 | parser = argparse.ArgumentParser(
50 | prog=argv[0],
51 | formatter_class=argparse.RawDescriptionHelpFormatter,
52 | description=metadata.description,
53 | epilog=epilog)
54 | parser.add_argument(
55 | '-V', '--version',
56 | action='version',
57 | version='{0} {1}'.format(metadata.project, metadata.version))
58 | parser.add_argument(
59 | 'action',
60 | help="""Action to take:
61 | 'validate' pipeline definitions with AWS;
62 | 'put-definition' of pipelines to AWS;
63 | 'upload' pipeline files to myInputS3Dir;
64 | 'activate' defined pipelines (also puts definitions if needed);
65 | 'delete' pipelines from AWS
66 | """)
67 | parser.add_argument(
68 | '--group',
69 | default=None,
70 | help="Group within pipewelder.json to act on; defaults to all")
71 |
72 | args = parser.parse_args(args=argv[1:])
73 | args.action = args.action.replace('-', '_')
74 |
75 | defaults = {}
76 |
77 | if 'AWS_ACCESS_KEY_ID' not in os.environ:
78 | parser.error("Must set AWS_ACCESS_KEY_ID")
79 | if 'AWS_SECRET_ACCESS_KEY' not in os.environ:
80 | parser.error("Must set AWS_SECRET_ACCESS_KEY")
81 | if 'AWS_DEFAULT_REGION' in os.environ:
82 | defaults['region'] = os.environ['AWS_DEFAULT_REGION']
83 |
84 | config_path = (os.path.exists('pipewelder.json') and
85 | 'pipewelder.json' or None)
86 | configs = pipewelder_configs(config_path, defaults)
87 | print("Reading configuration from {0}".format(config_path))
88 |
89 | for name, config in configs.items():
90 | if args.group and args.group != name:
91 | continue
92 | if name == 'defaults':
93 | continue
94 | print("Acting on configuration '{0}'".format(name))
95 | conn = boto.datapipeline.connect_to_region(config['region'])
96 | pw = build_pipewelder(conn, config)
97 | if not execute_pipewelder_action(pw, args.action):
98 | return 1
99 |
100 | return 0
101 |
102 |
103 | def entry_point():
104 | """
105 | Zero-argument entry point for use with setuptools/distribute.
106 | """
107 | raise SystemExit(main(sys.argv))
108 |
109 |
110 | def build_pipewelder(conn, config):
111 | """
112 | Return a Pipewelder object defined by *config*.
113 | """
114 | try:
115 | pw = Pipewelder(conn, config['template'])
116 | except IOError as e:
117 | print(e)
118 | return 1
119 | for d in config['dirs']:
120 | p = pw.add_pipeline(d)
121 | for k, v in config["values"].items():
122 | p.values[k] = v
123 | return pw
124 |
125 |
126 | def execute_pipewelder_action(pw, action):
127 | return_value = call_method(pw, action)
128 | if not return_value:
129 | print("Failed '{0}' action"
130 | .format(action))
131 | return return_value
132 |
133 |
134 | def pipewelder_configs(filename=None, defaults=None):
135 | """
136 | Parse json from *filename* for Pipewelder object configurations.
137 |
138 | Returns a dict which maps config names to dicts of options.
139 | """
140 | if filename is None:
141 | data = {"pipewelder": {}}
142 | dirname = os.path.abspath('.')
143 | else:
144 | dirname = os.path.dirname(os.path.abspath(filename))
145 | data = util.load_json(filename)
146 | defaults = defaults or {}
147 | data_defaults = data.get('defaults', {})
148 | defaults = dict(list(CONFIG_DEFAULTS.items()) +
149 | list(data_defaults.items()) +
150 | list(defaults.items()))
151 | outputs = {}
152 | for name in data:
153 | if name == 'defaults':
154 | continue
155 | this_config = dict(list(defaults.items()) +
156 | list(data[name].items()))
157 | dirs = []
158 | with util.cd(dirname):
159 | for entry in this_config['dirs']:
160 | for item in glob(entry):
161 | if os.path.exists(os.path.join(item, 'values.json')):
162 | dirs.append(item)
163 | outputs[name] = {
164 | "name": name,
165 | "dirs": dirs,
166 | "region": this_config['region'],
167 | "template": this_config['template'],
168 | "values": this_config['values'],
169 | }
170 | return outputs
171 |
172 |
173 | def call_method(obj, name):
174 | """
175 | Call the method *name* on *obj*.
176 | """
177 | return getattr(obj, name)()
178 |
179 |
180 | if __name__ == '__main__':
181 | entry_point()
182 |
--------------------------------------------------------------------------------
/pipewelder/connection.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # The code in this file is modified from:
4 | # https://github.com/boto/boto/blob/2.36.0/boto/datapipeline/layer1.py
5 | #
6 | # The original code carries the following license:
7 | # # Copyright (c) 2013 Amazon.com, Inc. or its affiliates. All Rights Reserved
8 | # #
9 | # # Permission is hereby granted, free of charge, to any person obtaining a
10 | # # copy of this software and associated documentation files (the
11 | # # "Software"), to deal in the Software without restriction, including
12 | # # without limitation the rights to use, copy, modify, merge, publish, dis-
13 | # # tribute, sublicense, and/or sell copies of the Software, and to permit
14 | # # persons to whom the Software is furnished to do so, subject to the fol-
15 | # # lowing conditions:
16 | # #
17 | # # The above copyright notice and this permission notice shall be included
18 | # # in all copies or substantial portions of the Software.
19 | # #
20 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 | # # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
22 | # # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
23 | # # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 | # # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 | # # IN THE SOFTWARE.
27 |
28 | """
29 | A patch to the boto DataPipelineConnection object.
30 |
31 | As of boto 2.36.0, putting and validating pipeline parameters/values
32 | was not supported.
33 | """
34 |
35 | import json
36 |
37 | from boto.datapipeline.layer1 import DataPipelineConnection
38 |
39 |
40 | def put_pipeline_definition(self,
41 | pipeline_objects,
42 | pipeline_id,
43 | parameter_objects=None,
44 | parameter_values=None):
45 | """
46 | Adds tasks, schedules, and preconditions that control the
47 | behavior of the pipeline. You can use PutPipelineDefinition to
48 | populate a new pipeline or to update an existing pipeline that
49 | has not yet been activated.
50 | """
51 | params = {
52 | 'pipelineId': pipeline_id,
53 | 'pipelineObjects': pipeline_objects,
54 | }
55 | if parameter_objects is not None:
56 | params['parameterObjects'] = parameter_objects
57 | if parameter_values is not None:
58 | params['parameterValues'] = parameter_values
59 | return self.make_request(action='PutPipelineDefinition',
60 | body=json.dumps(params))
61 |
62 |
63 | def validate_pipeline_definition(self,
64 | pipeline_objects,
65 | pipeline_id,
66 | parameter_objects=None,
67 | parameter_values=None):
68 | """
69 | Tests the pipeline definition with a set of validation checks
70 | to ensure that it is well formed and can run without error.
71 | """
72 | params = {
73 | 'pipelineId': pipeline_id,
74 | 'pipelineObjects': pipeline_objects,
75 | }
76 | if parameter_objects is not None:
77 | params['parameterObjects'] = parameter_objects
78 | if parameter_values is not None:
79 | params['parameterValues'] = parameter_values
80 | return self.make_request(action='ValidatePipelineDefinition',
81 | body=json.dumps(params))
82 |
83 |
84 | def create_pipeline(self, name, unique_id, description=None, tags=None):
85 | """
86 | Creates a new empty pipeline. When this action succeeds, you
87 | can then use the PutPipelineDefinition action to populate the
88 | pipeline.
89 | :type name: string
90 | :param name: The name of the new pipeline. You can use the same name
91 | for multiple pipelines associated with your AWS account, because
92 | AWS Data Pipeline assigns each new pipeline a unique pipeline
93 | identifier.
94 | :type unique_id: string
95 | :param unique_id: A unique identifier that you specify. This identifier
96 | is not the same as the pipeline identifier assigned by AWS Data
97 | Pipeline. You are responsible for defining the format and ensuring
98 | the uniqueness of this identifier. You use this parameter to ensure
99 | idempotency during repeated calls to CreatePipeline. For example,
100 | if the first call to CreatePipeline does not return a clear
101 | success, you can pass in the same unique identifier and pipeline
102 | name combination on a subsequent call to CreatePipeline.
103 | CreatePipeline ensures that if a pipeline already exists with the
104 | same name and unique identifier, a new pipeline will not be
105 | created. Instead, you'll receive the pipeline identifier from the
106 | previous attempt. The uniqueness of the name and unique identifier
107 | combination is scoped to the AWS account or IAM user credentials.
108 | :type description: string
109 | :param description: The description of the new pipeline.
110 | """
111 | params = {
112 | 'name': name,
113 | 'uniqueId': unique_id,
114 | }
115 | if description is not None:
116 | params['description'] = description
117 | if tags is not None:
118 | params['tags'] = tags
119 | return self.make_request(action='CreatePipeline',
120 | body=json.dumps(params))
121 |
122 |
123 | DataPipelineConnection.put_pipeline_definition = (
124 | put_pipeline_definition)
125 | DataPipelineConnection.validate_pipeline_definition = (
126 | validate_pipeline_definition)
127 | DataPipelineConnection.create_pipeline = (
128 | create_pipeline)
129 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | REM Command file for Sphinx documentation
4 |
5 | if "%SPHINXBUILD%" == "" (
6 | set SPHINXBUILD=sphinx-build
7 | )
8 | set BUILDDIR=build
9 | set SPHINXOPTS=-W
10 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
11 | set I18NSPHINXOPTS=%SPHINXOPTS% source
12 | if NOT "%PAPER%" == "" (
13 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
14 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
15 | )
16 |
17 | if "%1" == "" goto help
18 |
19 | if "%1" == "help" (
20 | :help
21 | echo.Please use `make ^` where ^ is one of
22 | echo. html to make standalone HTML files
23 | echo. dirhtml to make HTML files named index.html in directories
24 | echo. singlehtml to make a single large HTML file
25 | echo. pickle to make pickle files
26 | echo. json to make JSON files
27 | echo. htmlhelp to make HTML files and a HTML help project
28 | echo. qthelp to make HTML files and a qthelp project
29 | echo. devhelp to make HTML files and a Devhelp project
30 | echo. epub to make an epub
31 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
32 | echo. text to make text files
33 | echo. man to make manual pages
34 | echo. texinfo to make Texinfo files
35 | echo. gettext to make PO message catalogs
36 | echo. changes to make an overview over all changed/added/deprecated items
37 | echo. linkcheck to check all external links for integrity
38 | echo. doctest to run all doctests embedded in the documentation if enabled
39 | goto end
40 | )
41 |
42 | if "%1" == "clean" (
43 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
44 | del /q /s %BUILDDIR%\*
45 | goto end
46 | )
47 |
48 | if "%1" == "html" (
49 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
50 | if errorlevel 1 exit /b 1
51 | echo.
52 | echo.Build finished. The HTML pages are in %BUILDDIR%/html.
53 | goto end
54 | )
55 |
56 | if "%1" == "dirhtml" (
57 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
58 | if errorlevel 1 exit /b 1
59 | echo.
60 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
61 | goto end
62 | )
63 |
64 | if "%1" == "singlehtml" (
65 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
66 | if errorlevel 1 exit /b 1
67 | echo.
68 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
69 | goto end
70 | )
71 |
72 | if "%1" == "pickle" (
73 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
74 | if errorlevel 1 exit /b 1
75 | echo.
76 | echo.Build finished; now you can process the pickle files.
77 | goto end
78 | )
79 |
80 | if "%1" == "json" (
81 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
82 | if errorlevel 1 exit /b 1
83 | echo.
84 | echo.Build finished; now you can process the JSON files.
85 | goto end
86 | )
87 |
88 | if "%1" == "htmlhelp" (
89 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
90 | if errorlevel 1 exit /b 1
91 | echo.
92 | echo.Build finished; now you can run HTML Help Workshop with the ^
93 | .hhp project file in %BUILDDIR%/htmlhelp.
94 | goto end
95 | )
96 |
97 | if "%1" == "qthelp" (
98 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
99 | if errorlevel 1 exit /b 1
100 | echo.
101 | echo.Build finished; now you can run "qcollectiongenerator" with the ^
102 | .qhcp project file in %BUILDDIR%/qthelp, like this:
103 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Pipewelder.qhcp
104 | echo.To view the help file:
105 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Pipewelder.qhc
106 | goto end
107 | )
108 |
109 | if "%1" == "devhelp" (
110 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
111 | if errorlevel 1 exit /b 1
112 | echo.
113 | echo.Build finished.
114 | goto end
115 | )
116 |
117 | if "%1" == "epub" (
118 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
119 | if errorlevel 1 exit /b 1
120 | echo.
121 | echo.Build finished. The epub file is in %BUILDDIR%/epub.
122 | goto end
123 | )
124 |
125 | if "%1" == "latex" (
126 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
127 | if errorlevel 1 exit /b 1
128 | echo.
129 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
130 | goto end
131 | )
132 |
133 | if "%1" == "text" (
134 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
135 | if errorlevel 1 exit /b 1
136 | echo.
137 | echo.Build finished. The text files are in %BUILDDIR%/text.
138 | goto end
139 | )
140 |
141 | if "%1" == "man" (
142 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
143 | if errorlevel 1 exit /b 1
144 | echo.
145 | echo.Build finished. The manual pages are in %BUILDDIR%/man.
146 | goto end
147 | )
148 |
149 | if "%1" == "texinfo" (
150 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
151 | if errorlevel 1 exit /b 1
152 | echo.
153 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
154 | goto end
155 | )
156 |
157 | if "%1" == "gettext" (
158 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
159 | if errorlevel 1 exit /b 1
160 | echo.
161 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
162 | goto end
163 | )
164 |
165 | if "%1" == "changes" (
166 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
167 | if errorlevel 1 exit /b 1
168 | echo.
169 | echo.The overview file is in %BUILDDIR%/changes.
170 | goto end
171 | )
172 |
173 | if "%1" == "linkcheck" (
174 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
175 | if errorlevel 1 exit /b 1
176 | echo.
177 | echo.Link check complete; look for any errors in the above output ^
178 | or in %BUILDDIR%/linkcheck/output.txt.
179 | goto end
180 | )
181 |
182 | if "%1" == "doctest" (
183 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
184 | if errorlevel 1 exit /b 1
185 | echo.
186 | echo.Testing of doctests in the sources finished, look at the ^
187 | results in %BUILDDIR%/doctest/output.txt.
188 | goto end
189 | )
190 |
191 | :end
192 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS = -W
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = build
9 |
10 | # Internal variables.
11 | PAPEROPT_a4 = -D latex_paper_size=a4
12 | PAPEROPT_letter = -D latex_paper_size=letter
13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
14 | # the i18n builder cannot share the environment and doctrees with the others
15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
16 |
17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
18 |
19 | help:
20 | @echo "Please use \`make ' where is one of"
21 | @echo " html to make standalone HTML files"
22 | @echo " dirhtml to make HTML files named index.html in directories"
23 | @echo " singlehtml to make a single large HTML file"
24 | @echo " pickle to make pickle files"
25 | @echo " json to make JSON files"
26 | @echo " htmlhelp to make HTML files and a HTML help project"
27 | @echo " qthelp to make HTML files and a qthelp project"
28 | @echo " devhelp to make HTML files and a Devhelp project"
29 | @echo " epub to make an epub"
30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
31 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
32 | @echo " text to make text files"
33 | @echo " man to make manual pages"
34 | @echo " texinfo to make Texinfo files"
35 | @echo " info to make Texinfo files and run them through makeinfo"
36 | @echo " gettext to make PO message catalogs"
37 | @echo " changes to make an overview of all changed/added/deprecated items"
38 | @echo " linkcheck to check all external links for integrity"
39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
40 |
41 | clean:
42 | -rm -rf $(BUILDDIR)/*
43 |
44 | html:
45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
46 | @echo
47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
48 |
49 | dirhtml:
50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
51 | @echo
52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
53 |
54 | singlehtml:
55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
56 | @echo
57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
58 |
59 | pickle:
60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
61 | @echo
62 | @echo "Build finished; now you can process the pickle files."
63 |
64 | json:
65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
66 | @echo
67 | @echo "Build finished; now you can process the JSON files."
68 |
69 | htmlhelp:
70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
71 | @echo
72 | @echo "Build finished; now you can run HTML Help Workshop with the" \
73 | ".hhp project file in $(BUILDDIR)/htmlhelp."
74 |
75 | qthelp:
76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
77 | @echo
78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Pipewelder.qhcp"
81 | @echo "To view the help file:"
82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Pipewelder.qhc"
83 |
84 | devhelp:
85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
86 | @echo
87 | @echo "Build finished."
88 | @echo "To view the help file:"
89 | @echo "# mkdir -p $HOME/.local/share/devhelp/Pipewelder"
90 | @echo "# ln -s $(BUILDDIR)/devhelp $HOME/.local/share/devhelp/Pipewelder"
91 | @echo "# devhelp"
92 |
93 | epub:
94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
95 | @echo
96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
97 |
98 | latex:
99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | @echo
101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | "(use \`make latexpdf' here to do that automatically)."
104 |
105 | latexpdf:
106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | @echo "Running LaTeX files through pdflatex..."
108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 |
111 | text:
112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | @echo
114 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
115 |
116 | man:
117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | @echo
119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 |
121 | texinfo:
122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | @echo
124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | @echo "Run \`make' in that directory to run these through makeinfo" \
126 | "(use \`make info' here to do that automatically)."
127 |
128 | info:
129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | @echo "Running Texinfo files through makeinfo..."
131 | make -C $(BUILDDIR)/texinfo info
132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 |
134 | gettext:
135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | @echo
137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 |
139 | changes:
140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | @echo
142 | @echo "The overview file is in $(BUILDDIR)/changes."
143 |
144 | linkcheck:
145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | @echo
147 | @echo "Link check complete; look for any errors in the above output " \
148 | "or in $(BUILDDIR)/linkcheck/output.txt."
149 |
150 | doctest:
151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | @echo "Testing of doctests in the sources finished, look at the " \
153 | "results in $(BUILDDIR)/doctest/output.txt."
154 |
--------------------------------------------------------------------------------
/pipewelder/translator.py:
--------------------------------------------------------------------------------
1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
4 | # may not use this file except in compliance with the License. A copy of
5 | # the License is located at
6 | #
7 | # http://aws.amazon.com/apache2.0/
8 | #
9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | import json
14 |
15 |
16 | class PipelineDefinitionError(Exception):
17 | def __init__(self, msg, definition):
18 | full_msg = (
19 | "Error in pipeline definition: %s\n" % msg)
20 | super(PipelineDefinitionError, self).__init__(full_msg)
21 | self.msg = msg
22 | self.definition = definition
23 |
24 |
25 | def api_to_definition(definition):
26 | # When we're translating from api_response -> definition
27 | # we have to be careful *not* to mutate the existing
28 | # response as other code might need to the original
29 | # api_response.
30 | if 'pipelineObjects' in definition:
31 | definition['objects'] = _api_to_objects_definition(
32 | definition.pop('pipelineObjects'))
33 | if 'parameterObjects' in definition:
34 | definition['parameters'] = _api_to_parameters_definition(
35 | definition.pop('parameterObjects'))
36 | if 'parameterValues' in definition:
37 | definition['values'] = _api_to_values_definition(
38 | definition.pop('parameterValues'))
39 | return definition
40 |
41 |
42 | def definition_to_api_objects(definition):
43 | if 'objects' not in definition:
44 | raise PipelineDefinitionError('Missing "objects" key', definition)
45 | api_elements = []
46 | # To convert to the structure expected by the service,
47 | # we convert the existing structure to a list of dictionaries.
48 | # Each dictionary has a 'fields', 'id', and 'name' key.
49 | for element in definition['objects']:
50 | try:
51 | element_id = element.pop('id')
52 | except KeyError:
53 | raise PipelineDefinitionError('Missing "id" key of element: %s' %
54 | json.dumps(element), definition)
55 | api_object = {'id': element_id}
56 | # If a name is provided, then we use that for the name,
57 | # otherwise the id is used for the name.
58 | name = element.pop('name', element_id)
59 | api_object['name'] = name
60 | # Now we need the field list. Each element in the field list is a dict
61 | # with a 'key', 'stringValue'|'refValue'
62 | fields = []
63 | for key, value in sorted(element.items()):
64 | fields.extend(_parse_each_field(key, value))
65 | api_object['fields'] = fields
66 | api_elements.append(api_object)
67 | return api_elements
68 |
69 |
70 | def definition_to_api_parameters(definition):
71 | if 'parameters' not in definition:
72 | return None
73 | parameter_objects = []
74 | for element in definition['parameters']:
75 | try:
76 | parameter_id = element.pop('id')
77 | except KeyError:
78 | raise PipelineDefinitionError('Missing "id" key of parameter: %s' %
79 | json.dumps(element), definition)
80 | parameter_object = {'id': parameter_id}
81 | # Now we need the attribute list. Each element in the attribute list
82 | # is a dict with a 'key', 'stringValue'
83 | attributes = []
84 | for key, value in sorted(element.items()):
85 | attributes.extend(_parse_each_field(key, value))
86 | parameter_object['attributes'] = attributes
87 | parameter_objects.append(parameter_object)
88 | return parameter_objects
89 |
90 |
91 | def definition_to_parameter_values(definition):
92 | if 'values' not in definition:
93 | return None
94 | parameter_values = []
95 | for key in definition['values']:
96 | parameter_values.extend(
97 | _convert_single_parameter_value(key, definition['values'][key]))
98 |
99 | return parameter_values
100 |
101 |
102 | def _parse_each_field(key, value):
103 | values = []
104 | if isinstance(value, list):
105 | for item in value:
106 | values.append(_convert_single_field(key, item))
107 | else:
108 | values.append(_convert_single_field(key, value))
109 | return values
110 |
111 |
112 | def _convert_single_field(key, value):
113 | field = {'key': key}
114 | if isinstance(value, dict) and list(value.keys()) == ['ref']:
115 | field['refValue'] = value['ref']
116 | else:
117 | field['stringValue'] = value
118 | return field
119 |
120 |
121 | def _convert_single_parameter_value(key, values):
122 | parameter_values = []
123 | if isinstance(values, list):
124 | for each_value in values:
125 | parameter_value = {'id': key, 'stringValue': each_value}
126 | parameter_values.append(parameter_value)
127 | else:
128 | parameter_value = {'id': key, 'stringValue': values}
129 | parameter_values.append(parameter_value)
130 | return parameter_values
131 |
132 |
133 | def _api_to_objects_definition(api_response):
134 | pipeline_objects = []
135 | for element in api_response:
136 | current = {
137 | 'id': element['id'],
138 | 'name': element['name']
139 | }
140 | for field in element['fields']:
141 | key = field['key']
142 | if 'stringValue' in field:
143 | value = field['stringValue']
144 | else:
145 | value = {'ref': field['refValue']}
146 | _add_value(key, value, current)
147 | pipeline_objects.append(current)
148 | return pipeline_objects
149 |
150 |
151 | def _api_to_parameters_definition(api_response):
152 | parameter_objects = []
153 | for element in api_response:
154 | current = {
155 | 'id': element['id']
156 | }
157 | for attribute in element['attributes']:
158 | _add_value(attribute['key'], attribute['stringValue'], current)
159 | parameter_objects.append(current)
160 | return parameter_objects
161 |
162 |
163 | def _api_to_values_definition(api_response):
164 | pipeline_values = {}
165 | for element in api_response:
166 | _add_value(element['id'], element['stringValue'], pipeline_values)
167 | return pipeline_values
168 |
169 |
170 | def _add_value(key, value, current_map):
171 | if key not in current_map:
172 | current_map[key] = value
173 | elif isinstance(current_map[key], list):
174 | # Dupe keys result in values aggregating
175 | # into a list.
176 | current_map[key].append(value)
177 | else:
178 | converted_list = [current_map[key], value]
179 | current_map[key] = converted_list
180 |
--------------------------------------------------------------------------------
/pavement.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import print_function
4 |
5 | import os
6 | import sys
7 | import time
8 | import subprocess
9 |
10 | from paver.easy import options, task, needs, consume_args
11 | from paver.setuputils import install_distutils_tasks
12 |
13 | # Import parameters from the setup file.
14 | sys.path.insert(0, os.path.abspath('.')) # NOPEP8
15 | from setup import (
16 | setup_dict, get_project_files, print_success_message,
17 | print_failure_message, _lint, _test, _test_all,
18 | CODE_DIRECTORY, DOCS_DIRECTORY, TESTS_DIRECTORY, PYTEST_FLAGS)
19 |
20 | options(setup=setup_dict)
21 |
22 | install_distutils_tasks()
23 |
24 | # Miscellaneous helper functions
25 |
26 |
27 | def print_passed():
28 | # generated on http://patorjk.com/software/taag/#p=display&f=Small&t=PASSED
29 | print_success_message(r''' ___ _ ___ ___ ___ ___
30 | | _ \/_\ / __/ __| __| \
31 | | _/ _ \\__ \__ \ _|| |) |
32 | |_|/_/ \_\___/___/___|___/
33 | ''')
34 |
35 |
36 | def print_failed():
37 | # generated on http://patorjk.com/software/taag/#p=display&f=Small&t=FAILED
38 | print_failure_message(r''' ___ _ ___ _ ___ ___
39 | | __/_\ |_ _| | | __| \
40 | | _/ _ \ | || |__| _|| |) |
41 | |_/_/ \_\___|____|___|___/
42 | ''')
43 |
44 |
45 | class cwd(object):
46 | """Class used for temporarily changing directories. Can be though of
47 | as a `pushd /my/dir' then a `popd' at the end.
48 | """
49 | def __init__(self, newcwd):
50 | """:param newcwd: directory to make the cwd
51 | :type newcwd: :class:`str`
52 | """
53 | self.newcwd = newcwd
54 |
55 | def __enter__(self):
56 | self.oldcwd = os.getcwd()
57 | os.chdir(self.newcwd)
58 | return os.getcwd()
59 |
60 | def __exit__(self, type_, value, traceback):
61 | # This acts like a `finally' clause: it will always be executed.
62 | os.chdir(self.oldcwd)
63 |
64 |
65 | # Task-related functions
66 |
67 | def _doc_make(*make_args):
68 | """Run make in sphinx' docs directory.
69 |
70 | :return: exit code
71 | """
72 | if sys.platform == 'win32':
73 | # Windows
74 | make_cmd = ['make.bat']
75 | else:
76 | # Linux, Mac OS X, and others
77 | make_cmd = ['make']
78 | make_cmd.extend(make_args)
79 |
80 | # Account for a stupid Python "bug" on Windows:
81 | #
82 | with cwd(DOCS_DIRECTORY):
83 | retcode = subprocess.call(make_cmd)
84 | return retcode
85 |
86 |
87 | # Tasks
88 |
89 | @task
90 | @needs('doc_html', 'setuptools.command.sdist')
91 | def sdist():
92 | """Build the HTML docs and the tarball."""
93 | pass
94 |
95 |
96 | @task
97 | def test():
98 | """Run the unit tests."""
99 | raise SystemExit(_test())
100 |
101 |
102 | @task
103 | def lint():
104 | # This refuses to format properly when running `paver help' unless
105 | # this ugliness is used.
106 | ('Perform PEP8 style check, run PyFlakes, and run McCabe complexity '
107 | 'metrics on the code.')
108 | raise SystemExit(_lint())
109 |
110 |
111 | @task
112 | def test_all():
113 | """Perform a style check and run all unit tests."""
114 | retcode = _test_all()
115 | if retcode == 0:
116 | print_passed()
117 | else:
118 | print_failed()
119 | raise SystemExit(retcode)
120 |
121 |
122 | @task
123 | @consume_args
124 | def run(args):
125 | """Run the package's main script. All arguments are passed to it."""
126 | # The main script expects to get the called executable's name as
127 | # argv[0]. However, paver doesn't provide that in args. Even if it did (or
128 | # we dove into sys.argv), it wouldn't be useful because it would be paver's
129 | # executable. So we just pass the package name in as the executable name,
130 | # since it's close enough. This should never be seen by an end user
131 | # installing through Setuptools anyway.
132 | from pipewelder.main import main
133 | raise SystemExit(main([CODE_DIRECTORY] + args))
134 |
135 |
136 | @task
137 | def commit():
138 | """Commit only if all the tests pass."""
139 | if _test_all() == 0:
140 | subprocess.check_call(['git', 'commit'])
141 | else:
142 | print_failure_message('\nTests failed, not committing.')
143 |
144 |
145 | @task
146 | def coverage():
147 | """Run tests and show test coverage report."""
148 | try:
149 | import pytest_cov # NOQA
150 | except ImportError:
151 | print_failure_message(
152 | 'Install the pytest coverage plugin to use this task, '
153 | "i.e., `pip install pytest-cov'.")
154 | raise SystemExit(1)
155 | import pytest
156 | pytest.main(PYTEST_FLAGS + [
157 | '--cov', CODE_DIRECTORY,
158 | '--cov-report', 'term-missing',
159 | TESTS_DIRECTORY])
160 |
161 |
162 | @task # NOQA
163 | def doc_watch():
164 | """Watch for changes in the docs and rebuild HTML docs when changed."""
165 | try:
166 | from watchdog.events import FileSystemEventHandler
167 | from watchdog.observers import Observer
168 | except ImportError:
169 | print_failure_message('Install the watchdog package to use this task, '
170 | "i.e., `pip install watchdog'.")
171 | raise SystemExit(1)
172 |
173 | class RebuildDocsEventHandler(FileSystemEventHandler):
174 | def __init__(self, base_paths):
175 | self.base_paths = base_paths
176 |
177 | def dispatch(self, event):
178 | """Dispatches events to the appropriate methods.
179 | :param event: The event object representing the file system event.
180 | :type event: :class:`watchdog.events.FileSystemEvent`
181 | """
182 | for base_path in self.base_paths:
183 | if event.src_path.endswith(base_path):
184 | super(RebuildDocsEventHandler, self).dispatch(event)
185 | # We found one that matches. We're done.
186 | return
187 |
188 | def on_modified(self, event):
189 | print_failure_message('Modification detected. Rebuilding docs.')
190 | # # Strip off the path prefix.
191 | # import os
192 | # if event.src_path[len(os.getcwd()) + 1:].startswith(
193 | # CODE_DIRECTORY):
194 | # # sphinx-build doesn't always pick up changes on code files,
195 | # # even though they are used to generate the documentation. As
196 | # # a workaround, just clean before building.
197 | doc_html()
198 | print_success_message('Docs have been rebuilt.')
199 |
200 | print_success_message(
201 | 'Watching for changes in project files, press Ctrl-C to cancel...')
202 | handler = RebuildDocsEventHandler(get_project_files())
203 | observer = Observer()
204 | observer.schedule(handler, path='.', recursive=True)
205 | observer.start()
206 | try:
207 | while True:
208 | time.sleep(1)
209 | except KeyboardInterrupt:
210 | observer.stop()
211 | observer.join()
212 |
213 |
214 | @task
215 | @needs('doc_html')
216 | def doc_open():
217 | """Build the HTML docs and open them in a web browser."""
218 | doc_index = os.path.join(DOCS_DIRECTORY, 'build', 'html', 'index.html')
219 | if sys.platform == 'darwin':
220 | # Mac OS X
221 | subprocess.check_call(['open', doc_index])
222 | elif sys.platform == 'win32':
223 | # Windows
224 | subprocess.check_call(['start', doc_index], shell=True)
225 | elif sys.platform == 'linux2':
226 | # All freedesktop-compatible desktops
227 | subprocess.check_call(['xdg-open', doc_index])
228 | else:
229 | print_failure_message(
230 | "Unsupported platform. Please open `{0}' manually.".format(
231 | doc_index))
232 |
233 |
234 | @task
235 | def get_tasks():
236 | """Get all paver-defined tasks."""
237 | from paver.tasks import environment
238 | for t in environment.get_tasks():
239 | print(t.shortname)
240 |
241 |
242 | @task
243 | def doc_html():
244 | """Build the HTML docs."""
245 | retcode = _doc_make('html')
246 |
247 | if retcode:
248 | raise SystemExit(retcode)
249 |
250 |
251 | @task
252 | def doc_clean():
253 | """Clean (delete) the built docs."""
254 | retcode = _doc_make('clean')
255 |
256 | if retcode:
257 | raise SystemExit(retcode)
258 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # flake8: noqa
3 |
4 | # This file is based upon the file generated by sphinx-quickstart. However,
5 | # where sphinx-quickstart hardcodes values in this file that you input, this
6 | # file has been changed to pull from your module's metadata module.
7 | #
8 | # This file is execfile()d with the current directory set to its containing
9 | # dir.
10 | #
11 | # Note that not all possible configuration values are present in this
12 | # autogenerated file.
13 | #
14 | # All configuration values have a default; values that are commented out
15 | # serve to show the default.
16 |
17 | import os
18 | import sys
19 |
20 | # If extensions (or modules to document with autodoc) are in another directory,
21 | # add these directories to sys.path here. If the directory is relative to the
22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
23 | sys.path.insert(0, os.path.abspath('../..'))
24 |
25 | # Import project metadata
26 | from pipewelder import metadata
27 |
28 | # -- General configuration ----------------------------------------------------
29 |
30 | # If your documentation needs a minimal Sphinx version, state it here.
31 | #needs_sphinx = '1.0'
32 |
33 | # Add any Sphinx extension module names here, as strings. They can be
34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
35 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx',
36 | 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode']
37 |
38 | # show todos
39 | todo_include_todos = True
40 |
41 | # Add any paths that contain templates here, relative to this directory.
42 | templates_path = ['_templates']
43 |
44 | # The suffix of source filenames.
45 | source_suffix = '.rst'
46 |
47 | # The encoding of source files.
48 | #source_encoding = 'utf-8-sig'
49 |
50 | # The master toctree document.
51 | master_doc = 'index'
52 |
53 | # General information about the project.
54 | project = metadata.project
55 | copyright = metadata.copyright
56 |
57 | # The version info for the project you're documenting, acts as replacement for
58 | # |version| and |release|, also used in various other places throughout the
59 | # built documents.
60 | #
61 | # The short X.Y version.
62 | version = metadata.version
63 | # The full version, including alpha/beta/rc tags.
64 | release = metadata.version
65 |
66 | # The language for content autogenerated by Sphinx. Refer to documentation
67 | # for a list of supported languages.
68 | #language = None
69 |
70 | # There are two options for replacing |today|: either, you set today to some
71 | # non-false value, then it is used:
72 | #today = ''
73 | # Else, today_fmt is used as the format for a strftime call.
74 | #today_fmt = '%B %d, %Y'
75 |
76 | # List of patterns, relative to source directory, that match files and
77 | # directories to ignore when looking for source files.
78 | exclude_patterns = []
79 |
80 | # The reST default role (used for this markup: `text`) to use for all
81 | # documents.
82 | #default_role = None
83 |
84 | # If true, '()' will be appended to :func: etc. cross-reference text.
85 | #add_function_parentheses = True
86 |
87 | # If true, the current module name will be prepended to all description
88 | # unit titles (such as .. function::).
89 | #add_module_names = True
90 |
91 | # If true, sectionauthor and moduleauthor directives will be shown in the
92 | # output. They are ignored by default.
93 | #show_authors = False
94 |
95 | # The name of the Pygments (syntax highlighting) style to use.
96 | pygments_style = 'sphinx'
97 |
98 | # A list of ignored prefixes for module index sorting.
99 | #modindex_common_prefix = []
100 |
101 |
102 | # -- Options for HTML output --------------------------------------------------
103 |
104 | # The theme to use for HTML and HTML Help pages. See the documentation for
105 | # a list of builtin themes.
106 | html_theme = 'default'
107 |
108 | # Theme options are theme-specific and customize the look and feel of a theme
109 | # further. For a list of options available for each theme, see the
110 | # documentation.
111 | #html_theme_options = {}
112 |
113 | # Add any paths that contain custom themes here, relative to this directory.
114 | #html_theme_path = []
115 |
116 | # The name for this set of Sphinx documents. If None, it defaults to
117 | # " v documentation".
118 | #html_title = None
119 |
120 | # A shorter title for the navigation bar. Default is the same as html_title.
121 | #html_short_title = None
122 |
123 | # The name of an image file (relative to this directory) to place at the top
124 | # of the sidebar.
125 | #html_logo = None
126 |
127 | # The name of an image file (within the static path) to use as favicon of the
128 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
129 | # pixels large.
130 | #html_favicon = None
131 |
132 | # Add any paths that contain custom static files (such as style sheets) here,
133 | # relative to this directory. They are copied after the builtin static files,
134 | # so a file named "default.css" will overwrite the builtin "default.css".
135 | html_static_path = ['_static']
136 |
137 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
138 | # using the given strftime format.
139 | #html_last_updated_fmt = '%b %d, %Y'
140 |
141 | # If true, SmartyPants will be used to convert quotes and dashes to
142 | # typographically correct entities.
143 | #html_use_smartypants = True
144 |
145 | # Custom sidebar templates, maps document names to template names.
146 | #html_sidebars = {}
147 |
148 | # Additional templates that should be rendered to pages, maps page names to
149 | # template names.
150 | #html_additional_pages = {}
151 |
152 | # If false, no module index is generated.
153 | #html_domain_indices = True
154 |
155 | # If false, no index is generated.
156 | #html_use_index = True
157 |
158 | # If true, the index is split into individual pages for each letter.
159 | #html_split_index = False
160 |
161 | # If true, links to the reST sources are added to the pages.
162 | #html_show_sourcelink = True
163 |
164 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
165 | #html_show_sphinx = True
166 |
167 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
168 | #html_show_copyright = True
169 |
170 | # If true, an OpenSearch description file will be output, and all pages will
171 | # contain a tag referring to it. The value of this option must be the
172 | # base URL from which the finished HTML is served.
173 | #html_use_opensearch = ''
174 |
175 | # This is the file name suffix for HTML files (e.g. ".xhtml").
176 | #html_file_suffix = None
177 |
178 | # Output file base name for HTML help builder.
179 | htmlhelp_basename = metadata.project_no_spaces + 'doc'
180 |
181 |
182 | # -- Options for LaTeX output -------------------------------------------------
183 |
184 | latex_elements = {
185 | # The paper size ('letterpaper' or 'a4paper').
186 | #'papersize': 'letterpaper',
187 |
188 | # The font size ('10pt', '11pt' or '12pt').
189 | #'pointsize': '10pt',
190 |
191 | # Additional stuff for the LaTeX preamble.
192 | #'preamble': '',
193 | }
194 |
195 | # Grouping the document tree into LaTeX files. List of tuples
196 | # (source start file, target name, title, author,
197 | # documentclass [howto/manual]).
198 | latex_documents = [
199 | ('index', metadata.project_no_spaces + '.tex',
200 | metadata.project + ' Documentation', metadata.authors_string,
201 | 'manual'),
202 | ]
203 |
204 | # The name of an image file (relative to this directory) to place at the top of
205 | # the title page.
206 | #latex_logo = None
207 |
208 | # For "manual" documents, if this is true, then toplevel headings are parts,
209 | # not chapters.
210 | #latex_use_parts = False
211 |
212 | # If true, show page references after internal links.
213 | #latex_show_pagerefs = False
214 |
215 | # If true, show URL addresses after external links.
216 | #latex_show_urls = False
217 |
218 | # Documents to append as an appendix to all manuals.
219 | #latex_appendices = []
220 |
221 | # If false, no module index is generated.
222 | #latex_domain_indices = True
223 |
224 |
225 | # -- Options for manual page output -------------------------------------------
226 |
227 | # One entry per manual page. List of tuples
228 | # (source start file, name, description, authors, manual section).
229 | man_pages = [
230 | ('index', metadata.package, metadata.project + ' Documentation',
231 | metadata.authors_string, 1)
232 | ]
233 |
234 | # If true, show URL addresses after external links.
235 | #man_show_urls = False
236 |
237 |
238 | # -- Options for Texinfo output -----------------------------------------------
239 |
240 | # Grouping the document tree into Texinfo files. List of tuples
241 | # (source start file, target name, title, author,
242 | # dir menu entry, description, category)
243 | texinfo_documents = [
244 | ('index', metadata.project_no_spaces,
245 | metadata.project + ' Documentation', metadata.authors_string,
246 | metadata.project_no_spaces, metadata.description, 'Miscellaneous'),
247 | ]
248 |
249 | # Documents to append as an appendix to all manuals.
250 | #texinfo_appendices = []
251 |
252 | # If false, no module index is generated.
253 | #texinfo_domain_indices = True
254 |
255 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
256 | #texinfo_show_urls = 'footnote'
257 |
258 |
259 | # Example configuration for intersphinx: refer to the Python standard library.
260 | intersphinx_mapping = {
261 | 'python': ('http://docs.python.org/', None),
262 | 'boto': ('https://boto.readthedocs.org/en/latest/', None),
263 | }
264 |
265 | # Extra local configuration. This is useful for placing the class description
266 | # in the class docstring and the __init__ parameter documentation in the
267 | # __init__ docstring. See
268 | # for more
269 | # information.
270 | autoclass_content = 'both'
271 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import print_function
3 |
4 | import os
5 | import sys
6 | import imp
7 | import subprocess
8 |
9 | from setuptools import setup, find_packages
10 | from setuptools.command.test import test as TestCommand
11 | from distutils import spawn
12 |
13 | # Python 2.6 subprocess.check_output compatibility. Thanks Greg Hewgill!
14 | if 'check_output' not in dir(subprocess):
15 | def check_output(cmd_args, *args, **kwargs):
16 | proc = subprocess.Popen(
17 | cmd_args, *args,
18 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs)
19 | out, err = proc.communicate()
20 | if proc.returncode != 0:
21 | raise subprocess.CalledProcessError(args)
22 | return out
23 | subprocess.check_output = check_output
24 |
25 | try:
26 | import colorama
27 | colorama.init() # Initialize colorama on Windows
28 | except ImportError:
29 | # Don't require colorama just for running paver tasks. This allows us to
30 | # run `paver install' without requiring the user to first have colorama
31 | # installed.
32 | pass
33 |
34 | # Add the current directory to the module search path.
35 | sys.path.append('.')
36 |
37 | # Constants
38 | CODE_DIRECTORY = 'pipewelder'
39 | DOCS_DIRECTORY = 'docs'
40 | TESTS_DIRECTORY = 'tests'
41 | PYTEST_FLAGS = ['--doctest-modules']
42 |
43 | # Import metadata. Normally this would just be:
44 | #
45 | # from pipewelder import metadata
46 | #
47 | # However, when we do this, we also import `pipewelder/__init__.py'. If this
48 | # imports names from some other modules and these modules have third-party
49 | # dependencies that need installing (which happens after this file is run), the
50 | # script will crash. What we do instead is to load the metadata module by path
51 | # instead, effectively side-stepping the dependency problem. Please make sure
52 | # metadata has no dependencies, otherwise they will need to be added to
53 | # the setup_requires keyword.
54 | metadata = imp.load_source(
55 | 'metadata', os.path.join(CODE_DIRECTORY, 'metadata.py'))
56 |
57 |
58 | # Miscellaneous helper functions
59 |
60 | def get_project_files():
61 | """Retrieve a list of project files, ignoring hidden files.
62 |
63 | :return: sorted list of project files
64 | :rtype: :class:`list`
65 | """
66 | if is_git_project() and has_git():
67 | return get_git_project_files()
68 |
69 | project_files = []
70 | for top, subdirs, files in os.walk('.'):
71 | for subdir in subdirs:
72 | if subdir.startswith('.'):
73 | subdirs.remove(subdir)
74 |
75 | for f in files:
76 | if f.startswith('.'):
77 | continue
78 | project_files.append(os.path.join(top, f))
79 |
80 | return project_files
81 |
82 |
83 | def is_git_project():
84 | return os.path.isdir('.git')
85 |
86 |
87 | def has_git():
88 | return bool(spawn.find_executable("git"))
89 |
90 |
91 | def get_git_project_files():
92 | """Retrieve a list of all non-ignored files, including untracked files,
93 | excluding deleted files.
94 |
95 | :return: sorted list of git project files
96 | :rtype: :class:`list`
97 | """
98 | cached_and_untracked_files = git_ls_files(
99 | '--cached', # All files cached in the index
100 | '--others', # Untracked files
101 | # Exclude untracked files that would be excluded by .gitignore, etc.
102 | '--exclude-standard')
103 | uncommitted_deleted_files = git_ls_files('--deleted')
104 |
105 | # Since sorting of files in a set is arbitrary, return a sorted list to
106 | # provide a well-defined order to tools like flake8, etc.
107 | return sorted(cached_and_untracked_files - uncommitted_deleted_files)
108 |
109 |
110 | def git_ls_files(*cmd_args):
111 | """Run ``git ls-files`` in the top-level project directory. Arguments go
112 | directly to execution call.
113 |
114 | :return: set of file names
115 | :rtype: :class:`set`
116 | """
117 | cmd = ['git', 'ls-files']
118 | cmd.extend(cmd_args)
119 | return set(subprocess.check_output(cmd).splitlines())
120 |
121 |
122 | def print_success_message(message):
123 | """Print a message indicating success in green color to STDOUT.
124 |
125 | :param message: the message to print
126 | :type message: :class:`str`
127 | """
128 | try:
129 | import colorama
130 | print(colorama.Fore.GREEN + message + colorama.Fore.RESET)
131 | except ImportError:
132 | print(message)
133 |
134 |
135 | def print_failure_message(message):
136 | """Print a message indicating failure in red color to STDERR.
137 |
138 | :param message: the message to print
139 | :type message: :class:`str`
140 | """
141 | try:
142 | import colorama
143 | print(colorama.Fore.RED + message + colorama.Fore.RESET,
144 | file=sys.stderr)
145 | except ImportError:
146 | print(message, file=sys.stderr)
147 |
148 |
149 | def read(filename):
150 | """Return the contents of a file.
151 |
152 | :param filename: file path
153 | :type filename: :class:`str`
154 | :return: the file's content
155 | :rtype: :class:`str`
156 | """
157 | with open(os.path.join(os.path.dirname(__file__), filename)) as f:
158 | return f.read()
159 |
160 |
161 | def _lint():
162 | """Run lint and return an exit code."""
163 | # Flake8 doesn't have an easy way to run checks using a Python function, so
164 | # just fork off another process to do it.
165 |
166 | # Python 3 compat:
167 | # - The result of subprocess call outputs are byte strings, meaning we need
168 | # to pass a byte string to endswith.
169 | project_python_files = [filename for filename in get_project_files()
170 | if filename.endswith(b'.py')]
171 | retcode = subprocess.call(
172 | ['flake8', '--max-complexity=10'] + project_python_files)
173 | if retcode == 0:
174 | print_success_message('No style errors')
175 | return retcode
176 |
177 |
178 | def _test():
179 | """Run the unit tests.
180 |
181 | :return: exit code
182 | """
183 | # Make sure to import pytest in this function. For the reason, see here:
184 | # # NOPEP8
185 | import pytest
186 | # Run the doctests
187 | import doctest
188 | import pipewelder
189 | doctest.testmod(pipewelder.core)
190 | # This runs the unit tests.
191 | # It also runs doctest, but only on the modules in TESTS_DIRECTORY.
192 | return pytest.main(PYTEST_FLAGS + [TESTS_DIRECTORY])
193 |
194 |
195 | def _test_all():
196 | """Run lint and tests.
197 |
198 | :return: exit code
199 | """
200 | return _lint() + _test()
201 |
202 |
203 | # The following code is to allow tests to be run with `python setup.py test'.
204 | # The main reason to make this possible is to allow tests to be run as part of
205 | # Setuptools' automatic run of 2to3 on the source code. The recommended way to
206 | # run tests is still `paver test_all'.
207 | # See
208 | # Code based on # NOPEP8
209 | class TestAllCommand(TestCommand):
210 | def finalize_options(self):
211 | TestCommand.finalize_options(self)
212 | # These are fake, and just set to appease distutils and setuptools.
213 | self.test_suite = True
214 | self.test_args = []
215 |
216 | def run_tests(self):
217 | raise SystemExit(_test_all())
218 |
219 |
220 | # define install_requires for specific Python versions
221 | python_version_specific_requires = []
222 |
223 | # as of Python >= 2.7 and >= 3.2, the argparse module is maintained within
224 | # the Python standard library, otherwise we install it as a separate package
225 | if sys.version_info < (2, 7) or (3, 0) <= sys.version_info < (3, 3):
226 | python_version_specific_requires.append('argparse')
227 |
228 |
229 | # See here for more options:
230 | #
231 | setup_dict = dict(
232 | name=metadata.package,
233 | version=metadata.version,
234 | author=metadata.authors[0],
235 | author_email=metadata.emails[0],
236 | maintainer=metadata.authors[0],
237 | maintainer_email=metadata.emails[0],
238 | url=metadata.url,
239 | description=metadata.description,
240 | long_description=read('README.rst'),
241 | # Find a list of classifiers here:
242 | #
243 | classifiers=[
244 | 'Development Status :: 4 - Beta',
245 | 'Environment :: Console',
246 | 'Intended Audience :: Developers',
247 | 'License :: OSI Approved :: Apache Software License',
248 | 'Natural Language :: English',
249 | 'Operating System :: OS Independent',
250 | 'Programming Language :: Python :: 2.6',
251 | 'Programming Language :: Python :: 2.7',
252 | 'Programming Language :: Python :: 3.3',
253 | 'Programming Language :: Python :: 3.4',
254 | 'Topic :: Software Development :: Libraries :: Python Modules',
255 | ],
256 | packages=find_packages(exclude=(TESTS_DIRECTORY,)),
257 | install_requires=[
258 | 'boto',
259 | 'six'
260 | ] + python_version_specific_requires,
261 | # Allow tests to be run with `python setup.py test'.
262 | tests_require=[
263 | 'pytest',
264 | 'mock',
265 | 'flake8',
266 | ],
267 | cmdclass={'test': TestAllCommand},
268 | zip_safe=False, # don't use eggs
269 | entry_points={
270 | 'console_scripts': [
271 | 'pipewelder = pipewelder.cli:entry_point'
272 | ],
273 | }
274 | )
275 |
276 |
277 | def main():
278 | setup(**setup_dict)
279 |
280 |
281 | if __name__ == '__main__':
282 | main()
283 |
--------------------------------------------------------------------------------
/pipewelder/core.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | The core Pipewelder API.
4 | """
5 |
6 | from __future__ import print_function
7 |
8 | import re
9 | import os
10 | import logging
11 | import hashlib
12 | from copy import deepcopy
13 | from datetime import datetime, timedelta
14 |
15 | from pipewelder import translator
16 | from boto import connect_s3
17 | from boto.s3.key import Key as S3Key
18 |
19 | from pipewelder import util
20 |
21 | import six
22 | if six.PY2:
23 | from urlparse import urlparse
24 | else:
25 | from urllib.parse import urlparse
26 |
27 | PIPELINE_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
28 | PIPELINE_FREQUENCY_RE = re.compile(r'(?P\d+) (?P\w+s)')
29 | PIPELINE_PARAM_RE = re.compile(r'\#\{(my[a-zA-Z0-9]+)\}')
30 | PIPEWELDER_STUB_PARAMS = {
31 | 'name': "Pipewelder validation stub",
32 | 'unique_id': 'stub',
33 | "description": """
34 | This pipeline should always be in 'PENDING' status.
35 | It is used by Pipewelder to validate pipeline definitions.
36 | """.strip()
37 | }
38 |
39 |
40 | class Pipewelder(object):
41 | """
42 | A collection of Pipelines sharing a definition template.
43 | """
44 | def __init__(self, conn, template_path, s3_conn=None):
45 | """
46 | *conn* is a :class:`boto.datapipeline.layer1.DataPipelineConnection`
47 | instance used to manipulate added pipelines,
48 | *s3_conn* is a :class:`boto.s3.connection.S3Connection`
49 | used to upload pipeline tasks to S3,
50 | and *template_path* is the path to a local file containing the
51 | template pipeline definition.
52 | """
53 | self.conn = conn
54 | self.s3_conn = s3_conn
55 | if self.s3_conn is None:
56 | self.s3_conn = connect_s3()
57 | template_path = os.path.normpath(template_path)
58 | self.template = definition_from_file(template_path)
59 | self.pipelines = {}
60 |
61 | def add_pipeline(self, dirpath):
62 | """
63 | Load a new :class:`Pipeline` object based on the files contained in
64 | *dirpath*.
65 | """
66 | pipeline = Pipeline(self.conn, self.s3_conn, self.template, dirpath)
67 | self.pipelines[pipeline.name] = pipeline
68 | return pipeline
69 |
70 | def are_pipelines_valid(self):
71 | """
72 | Returns ``True`` if all pipeline definition validate with AWS.
73 | """
74 | return all([p.is_valid() for p in self.pipelines.values()])
75 |
76 | def validate(self):
77 | """
78 | Synonym for :meth:`are_pipelines_valid`.
79 | """
80 | return self.are_pipelines_valid()
81 |
82 | def upload(self):
83 | """
84 | Upload files to S3 corresponding to each pipeline and its tasks.
85 |
86 | Returns ``True`` is successful.
87 | """
88 | return all([p.upload() for p in self.pipelines.values()])
89 |
90 | def delete(self):
91 | """
92 | Delete all pipeline definitions.
93 |
94 | Returns ``True`` if successful.
95 | """
96 | return all([p.delete() for p in self.pipelines.values()])
97 |
98 | def put_definition(self):
99 | """
100 | Puts definitions for all pipelines.
101 |
102 | Returns ``True`` if successful.
103 | """
104 | return all([p.put_definition() for p in self.pipelines.values()])
105 |
106 | def activate(self):
107 | """
108 | Activate all pipeline definitions,
109 | deleting existing pipeline if needed.
110 |
111 | Returns ``True`` if successful.
112 | """
113 | if not self.are_pipelines_valid():
114 | logging.error("Not activating pipelines due to validation errors.")
115 | return False
116 | return all([p.activate() for p in self.pipelines.values()])
117 |
118 |
119 | class Pipeline(object):
120 | """
121 | A class defining a single pipeline definition and associated tasks.
122 | """
123 | def __init__(self, conn, s3_conn, template, dirpath):
124 | """
125 | Create a Pipeline based on definition dict *template*.
126 |
127 | *dirpath* is a directory containing a 'values.json' file,
128 | a 'run' executable, and a 'tasks' directory.
129 | *conn* is a DataPipelineConnection and *s3_conn* is an S3Connection.
130 | """
131 | self.conn = conn
132 | self.s3_conn = s3_conn
133 | self.dirpath = os.path.normpath(dirpath)
134 | self.definition = template.copy()
135 | values_path = os.path.join(dirpath, 'values.json')
136 | decoded = util.load_json(values_path)
137 | self.values = decoded.get('values', {})
138 | if 'myName' not in self.values:
139 | self.values['myName'] = os.path.basename(dirpath)
140 | # adjust the start timestamp to the future
141 | timestamp = self.values['myStartDateTime']
142 | period = self.values['mySchedulePeriod']
143 | adjusted_timestamp = adjusted_to_future(timestamp, period)
144 | self.values['myStartDateTime'] = adjusted_timestamp
145 |
146 | @property
147 | def name(self):
148 | return self._get_value('myName')
149 |
150 | @property
151 | def description(self):
152 | try:
153 | return self._get_value('myDescription')
154 | except ValueError:
155 | return None
156 |
157 | @property
158 | def tags(self):
159 | if 'myTags' not in self.values:
160 | return {}
161 | return dict(tag_expression.split(':')
162 | for tag_expression in self.values['myTags'])
163 |
164 | @property
165 | def unique_id(self):
166 | return hashlib.md5(self.name + str(self.tags)).hexdigest()
167 |
168 | def api_objects(self):
169 | """
170 | Return a dict containing the pipeline objects in AWS API format.
171 | """
172 | d = deepcopy(self.definition)
173 | return translator.definition_to_api_objects(d)
174 |
175 | def api_parameters(self):
176 | """
177 | Return a dict containing the pipeline parameters in AWS API format.
178 | """
179 | d = deepcopy(self.definition)
180 | return translator.definition_to_api_parameters(d)
181 |
182 | def api_values(self):
183 | """
184 | Return a dict containing the pipeline param values in AWS API format.
185 | """
186 | d = {'values': self.values}
187 | return translator.definition_to_parameter_values(d)
188 |
189 | def api_tags(self):
190 | """
191 | Return a list containing the pipeline tags in AWS API format.
192 | """
193 | tag_list = [{'key': k, 'value': v}
194 | for k, v in self.tags.items()]
195 | return tag_list
196 |
197 | def create(self):
198 | """
199 | Create a pipeline in AWS if it does not already exist.
200 |
201 | Returns the pipeline id.
202 | """
203 | response = self.conn.create_pipeline(self.name, self.unique_id,
204 | self.description, self.api_tags())
205 | return response['pipelineId']
206 |
207 | def is_valid(self):
208 | """
209 | Returns ``True`` if the pipeline definition validates to AWS.
210 | """
211 | response = self.conn.create_pipeline(**PIPEWELDER_STUB_PARAMS)
212 | pipeline_id = response["pipelineId"]
213 | response = self.conn.validate_pipeline_definition(
214 | self.api_objects(), pipeline_id,
215 | self.api_parameters(), self.api_values())
216 | self._log_validation_messages(response)
217 | if response['errored']:
218 | return False
219 | else:
220 | logging.info("Pipeline '{0}' is valid".format(self.name))
221 | return True
222 |
223 | def upload(self):
224 | """
225 | Uploads the contents of `dirpath` to S3.
226 |
227 | The destination path in S3 is determined by 'myS3InputDirectory'
228 | in the 'values.json' file for this pipeline.
229 | Existing contents of the 'tasks' subdirectory are deleted.
230 |
231 | Returns ``True`` if successful.
232 | """
233 | s3_dir = self._get_value('myS3InputDir')
234 | bucket_path, input_dir = bucket_and_path(s3_dir)
235 | bucket = self.s3_conn.get_bucket(bucket_path)
236 |
237 | remote_task_path = os.path.join(input_dir, 'tasks')
238 | existing_task_keys = bucket.list(prefix=remote_task_path)
239 | existing_tasks = [key.name for key in existing_task_keys]
240 | bucket.delete_keys(existing_tasks)
241 | logging.info("Deleted from bucket '{0}': {1}"
242 | .format(bucket_path, existing_tasks))
243 |
244 | with util.cd(self.dirpath):
245 | for root, dirs, files in os.walk('.'):
246 | for f in files:
247 | filepath = os.path.join(root, f)
248 | k = S3Key(bucket)
249 | k.key = os.path.normpath(os.path.join(input_dir, filepath))
250 | k.set_contents_from_filename(filepath)
251 | logging.info('Copied {0} to {1}'
252 | .format(os.path.abspath(filepath),
253 | os.path.normpath(
254 | os.path.join(s3_dir, filepath))))
255 | return True
256 |
257 | def delete(self):
258 | """
259 | Delete this pipeline definition from AWS.
260 |
261 | Returns ``True`` if successful.
262 | """
263 | pipeline_id = self.create()
264 | logging.info("Deleting pipeline with id {0}".format(pipeline_id))
265 | self.conn.delete_pipeline(pipeline_id)
266 | return True
267 |
268 | def put_definition(self):
269 | """
270 | Put this pipeline definition to AWS.
271 |
272 | Returns ``True`` if successful.
273 | """
274 | pipeline_id = self.create()
275 | logging.info("Putting pipeline definition for {0}".format(pipeline_id))
276 | self.conn.put_pipeline_definition(self.api_objects(),
277 | pipeline_id,
278 | self.api_parameters(),
279 | self.api_values())
280 | return True
281 |
282 | def activate(self):
283 | """
284 | Activate this pipeline definition in AWS.
285 |
286 | Deletes the existing pipeline if it has previously been activated.
287 |
288 | Returns ``True`` if successful.
289 | """
290 | pipeline_id = self.create()
291 | existing_definition = definition_from_id(self.conn, pipeline_id)
292 | state = state_from_id(self.conn, pipeline_id)
293 | if existing_definition == self.definition:
294 | return True
295 | elif state == 'PENDING':
296 | self.put_definition()
297 | else:
298 | self.delete()
299 | return self.activate()
300 | logging.info("Activating pipeline with id {0}".format(pipeline_id))
301 | self.conn.activate_pipeline(pipeline_id)
302 | return True
303 |
304 | def _log_validation_messages(self, response):
305 | for container in response['validationWarnings']:
306 | logging.warning("Warnings in validation response for %s",
307 | container['id'])
308 | for message in container['warnings']:
309 | logging.warning(message)
310 | for container in response['validationErrors']:
311 | logging.error("Errors in validation response for %s",
312 | container['id'])
313 | for message in container['errors']:
314 | logging.error(message)
315 |
316 | def _get_value(self, key):
317 | if key in self.values:
318 | return self._parsed_via_parameters(self.values[key])
319 | params = self.definition['parameters']
320 | default = fetch_default(params, key)
321 | if default is None:
322 | raise ValueError("No value or default found for '{0}'"
323 | .format(key))
324 | return self._parsed_via_parameters(default)
325 |
326 | def _parsed_via_parameters(self, expression):
327 | placeholders = re.findall(PIPELINE_PARAM_RE, expression)
328 | if not placeholders:
329 | return expression
330 | key = placeholders[0]
331 | value = self._get_value(key)
332 | placeholder = '#{' + key + '}'
333 | expression = expression.replace(placeholder, value)
334 | return self._parsed_via_parameters(expression)
335 |
336 | def _parsed_object(self, name):
337 | return parsed_object(self.conn, self.create(), name)
338 |
339 | def _parsed_location(self, name):
340 | obj = self._parsed_object(name)
341 | fetch_field_value(obj, 'directoryPath')
342 |
343 |
344 | def bucket_and_path(s3_uri):
345 | """
346 | Return a bucket name and key path from *s3_uri*.
347 |
348 | >>> bucket_and_path('s3://pipewelder-bucket/pipewelder-test/inputs')
349 | ('pipewelder-bucket', 'pipewelder-test/inputs')
350 | """
351 | uri = urlparse(s3_uri)
352 | return (uri.netloc, uri.path[1:])
353 |
354 |
355 | def parse_period(period):
356 | """
357 | Return a timedelta object parsed from string *period*.
358 |
359 | >>> parse_period("15 minutes")
360 | datetime.timedelta(0, 900)
361 | >>> parse_period("3 hours")
362 | datetime.timedelta(0, 10800)
363 | >>> parse_period("1 days")
364 | datetime.timedelta(1)
365 | """
366 | parts = PIPELINE_FREQUENCY_RE.match(period)
367 | if not parts:
368 | raise ValueError("'{0}' cannot be parsed as a period".format(period))
369 | parts = parts.groupdict()
370 | kwargs = {parts['unit']: int(parts['number'])}
371 | return timedelta(**kwargs)
372 |
373 |
374 | def adjusted_to_future(timestamp, period):
375 | """
376 | Return *timestamp* string, adjusted to the future if necessary.
377 |
378 | If *timestamp* is in the future, it will be returned unchanged.
379 | If it's in the past, *period* will be repeatedly added until the
380 | result is in the future.
381 |
382 | All times are assumed to be in UTC.
383 |
384 | >>> adjusted_to_future('2199-01-01T00:00:00', '1 days')
385 | '2199-01-01T00:00:00'
386 | """
387 | dt = datetime.strptime(timestamp, PIPELINE_DATETIME_FORMAT)
388 | delta = parse_period(period)
389 | now = datetime.utcnow()
390 | while dt < now:
391 | dt += delta
392 | return dt.strftime(PIPELINE_DATETIME_FORMAT)
393 |
394 |
395 | def fetch_field_value(aws_response, field_name):
396 | """
397 | Return a value nested within the 'fields' entry of dict *aws_response*.
398 |
399 | The returned value is the second item from a dict with 'key' *field_name*.
400 |
401 | >>> r = {'fields': [{'key': 'someKey', 'stringValue': 'someValue'}]}
402 | >>> fetch_field_value(r, 'someKey')
403 | 'someValue'
404 | """
405 | for container in aws_response['fields']:
406 | if container['key'] == field_name:
407 | for (k, v) in container.items():
408 | if k != 'key':
409 | return v
410 | raise ValueError("Did not find a field called {0} in response {1}"
411 | .format(field_name, aws_response))
412 |
413 |
414 | def fetch_default(params, key):
415 | """
416 | Return the default associated with *key* from parameter list *params*.
417 |
418 | If no default, returns None.
419 | >>> p = [{'type': 'String', 'id': 'myParam', 'default': 'foo'}]
420 | >>> fetch_default(p, 'myParam')
421 | 'foo'
422 | >>> p = [{'type': 'String', 'id': 'myParam'}]
423 | >>> fetch_default(p, 'myParam')
424 | """
425 | for container in params:
426 | if container['id'] == key:
427 | if 'default' in container:
428 | return container['default']
429 | return None
430 |
431 |
432 | def state_from_id(conn, pipeline_id):
433 | """
434 | Return the *@pipelineState* string for object matching *pipeline_id*.
435 |
436 | *conn* is a DataPipelineConnection object.
437 | """
438 | response = conn.describe_pipelines([pipeline_id])
439 | description = response['pipelineDescriptionList'][0]
440 | return fetch_field_value(description, '@pipelineState')
441 |
442 |
443 | def definition_from_file(filename):
444 | """
445 | Return a dict containing the contents of pipeline definition *filename*.
446 | """
447 | return util.load_json(filename)
448 |
449 |
450 | def definition_from_id(conn, pipeline_id):
451 | """
452 | Return a dict containing the definition of *pipeline_id*.
453 |
454 | *conn* is a DataPipelineConnection object.
455 | """
456 | response = conn.get_pipeline_definition(pipeline_id)
457 | return translator.api_to_definition(response)
458 |
459 |
460 | def parsed_objects(conn, pipeline_id, object_ids):
461 | """
462 | Return a list of object dicts as evaluated by Data Pipeline.
463 | """
464 | response = conn.describe_objects(object_ids, pipeline_id,
465 | evaluate_expressions=True)
466 | return response['pipelineObjects']
467 |
468 |
469 | def parsed_object(conn, pipeline_id, object_id):
470 | """
471 | Return an object dict as evaluated by Data Pipeline.
472 | """
473 | return parsed_objects(conn, pipeline_id, [object_id])[0]
474 |
--------------------------------------------------------------------------------