├── docs
    ├── source
    │   ├── _static
    │   │   └── .gitkeep
    │   ├── README.rst
    │   ├── welder.jpg
    │   ├── util.rst
    │   ├── core.rst
    │   ├── cli.rst
    │   ├── README
    │   ├── index.rst
    │   └── conf.py
    ├── make.bat
    └── Makefile
├── setup.cfg
├── welder.jpg
├── tests
    ├── test_data
    │   ├── echoer
    │   │   ├── tasks
    │   │   │   ├── second.txt
    │   │   │   └── first.txt
    │   │   ├── run
    │   │   └── values.json
    │   ├── pipewelder.json
    │   └── pipeline_definition.json
    ├── test_core.py
    └── test_cli.py
├── requirements.txt
├── .travis.yml
├── Makefile
├── NOTICE
├── .ppt-version
├── pipewelder
    ├── __init__.py
    ├── util.py
    ├── metadata.py
    ├── cli.py
    ├── connection.py
    ├── translator.py
    └── core.py
├── requirements-dev.txt
├── .editorconfig
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── tox.ini
├── README.rst
├── pavement.py
└── setup.py


/docs/source/_static/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/source/README.rst:
--------------------------------------------------------------------------------
1 | ../../README.rst


--------------------------------------------------------------------------------
/docs/source/welder.jpg:
--------------------------------------------------------------------------------
1 | ../../welder.jpg


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 


--------------------------------------------------------------------------------
/welder.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimpleFinance/pipewelder/HEAD/welder.jpg


--------------------------------------------------------------------------------
/tests/test_data/echoer/tasks/second.txt:
--------------------------------------------------------------------------------
1 | This text should also appear in the output directory.
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Python 2.6 compatibility
2 | # argparse==1.2.1
3 | six==1.9.0
4 | boto==2.36.0
5 | 


--------------------------------------------------------------------------------
/docs/source/util.rst:
--------------------------------------------------------------------------------
1 | Pipewelder Util
2 | ===============
3 | 
4 | .. automodule:: pipewelder.util
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/core.rst:
--------------------------------------------------------------------------------
1 | Pipewelder Core API
2 | ===================
3 | 
4 | .. automodule:: pipewelder.core
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/cli.rst:
--------------------------------------------------------------------------------
1 | Pipewelder Command-Line Interface
2 | =================================
3 | 
4 | .. automodule:: pipewelder.cli
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/tests/test_data/echoer/tasks/first.txt:
--------------------------------------------------------------------------------
1 | This is the first task file for the Echoer pipeline.
2 | 
3 | This text should appear in the output directory for this pipeline.
4 | 


--------------------------------------------------------------------------------
/docs/source/README:
--------------------------------------------------------------------------------
1 | Run `sphinx-apidoc -o . ../../pipewelder' in this directory.
2 | 
3 | This will generate `modules.rst' and `pipewelder.rst'.
4 | 
5 | Then include `modules.rst' in your `index.rst' file.
6 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python: 2.7
 3 | env:
 4 |   - TOXENV=py26
 5 |   - TOXENV=py27
 6 |   - TOXENV=py33
 7 |   - TOXENV=py34
 8 |   - TOXENV=pypy
 9 |   - TOXENV=docs
10 | install:
11 |   - pip install -r requirements-dev.txt
12 | script:
13 |   - tox
14 | 


--------------------------------------------------------------------------------
/tests/test_data/pipewelder.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 |   "defaults" : {
 4 |     "dirs" : ["*"],
 5 |     "region" : "us-west-2",
 6 |     "template" : "pipeline_definition.json"
 7 |   },
 8 | 
 9 |   "dev" : {
10 |     "values" : {
11 |       "myEnv" : "dev"
12 |     }
13 |   }
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | VENV := $(CURDIR)/venv
 2 | export PATH := $(VENV)/bin:$(PATH)
 3 | 
 4 | test: install
 5 | 	paver test_all
 6 | 
 7 | install: $(VENV)
 8 | 	$(VENV)/bin/pip install -r requirements-dev.txt
 9 | 
10 | $(VENV):
11 | 	virtualenv $@
12 | 
13 | requirements.txt:
14 | 	pip freeze > $@
15 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Pipewelder
 2 | ==========
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    README
 8 |    core
 9 |    util
10 |    cli
11 | 
12 | .. only:: html
13 | 
14 |    Indices and tables
15 |    ==================
16 | 
17 |    * :ref:`genindex`
18 |    * :ref:`modindex`
19 |    * :ref:`search`
20 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 | Pipewelder
 2 | Copyright 2015 Simple Finance Technology Corporation
 3 | 
 4 | The banner image in the documentation is cropped from an original
 5 | photo owned by the PEO ACWA:
 6 |     https://flic.kr/p/ejYqQe
 7 | 
 8 | Package layout is based on a template by Sean Fisk:
 9 |     https://github.com/seanfisk/python-project-template
10 | 


--------------------------------------------------------------------------------
/.ppt-version:
--------------------------------------------------------------------------------
1 | # This file specifies the version of the Python Project Template
2 | # (https://github.com/seanfisk/python-project-template) from which
3 | # this project was created. It is here for the purposes of possibly
4 | # updating this project to use a newer version of the template. Please
5 | # do not remove this file.
6 | df37ac91c8293f907ac755822702303d75afee3f
7 | 


--------------------------------------------------------------------------------
/pipewelder/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # flake8: noqa
 3 | """
 4 | Scheduled task execution on top of AWS Data Pipeline
 5 | """
 6 | import pipewelder.connection
 7 | from pipewelder import metadata
 8 | from pipewelder.core import *
 9 | 
10 | __version__ = metadata.version
11 | __author__ = metadata.authors[0]
12 | __license__ = metadata.license
13 | __copyright__ = metadata.copyright
14 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | # Runtime requirements
 2 | --requirement requirements.txt
 3 | 
 4 | # Testing
 5 | pytest==2.6.4
 6 | py==1.4.19
 7 | mock==1.0.1
 8 | tox==1.8.1
 9 | 
10 | # Linting
11 | flake8==2.3.0
12 | mccabe==0.3
13 | pep8==1.6.2
14 | pyflakes==0.8.1
15 | 
16 | # Documentation
17 | Sphinx==1.2
18 | docutils==0.11
19 | Jinja2==2.7.1
20 | MarkupSafe==0.18
21 | Pygments==1.6
22 | 
23 | # Miscellaneous
24 | Paver==1.2.3
25 | colorama==0.2.7
26 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # -*- mode: conf-unix; -*-
 2 | 
 3 | # EditorConfig is awesome: http://EditorConfig.org
 4 | 
 5 | # top-most EditorConfig file
 6 | root = true
 7 | 
 8 | # defaults
 9 | [*]
10 | insert_final_newline = true     
11 | 
12 | # 4 space indentation
13 | [*.{ini,py,py.tpl,rst}]
14 | indent_style = space
15 | indent_size = 4
16 | 
17 | # 4-width tabbed indentation
18 | [*.{sh,bat.tpl,Makefile.tpl}]
19 | indent_style = tab
20 | indent_size = 4
21 | 
22 | # and travis does its own thing
23 | [.travis.yml]
24 | indent_style = space
25 | indent_size = 2
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Emacs rope configuration
 2 | .ropeproject
 3 | .project
 4 | .pydevproject
 5 | .settings
 6 | 
 7 | # pyenv version file
 8 | .python-version
 9 | 
10 | # Python
11 | *.py[co]
12 | 
13 | ## Packages
14 | *.egg
15 | *.egg-info
16 | dist
17 | build
18 | eggs
19 | parts
20 | bin
21 | var
22 | sdist
23 | deb_dist
24 | develop-eggs
25 | .installed.cfg
26 | 
27 | ## Installer logs
28 | pip-log.txt
29 | 
30 | ## Unit test / coverage reports
31 | .coverage
32 | .tox
33 | 
34 | ## Translations
35 | *.mo
36 | 
37 | ## paver generated files
38 | /paver-minilib.zip
39 | 
40 | ## virtualenv
41 | /venv
42 | 


--------------------------------------------------------------------------------
/tests/test_data/echoer/run:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | from glob import glob
 5 | from shutil import copyfile
 6 | 
 7 | OUTPUT_DIR = os.environ['OUTPUT1_STAGING_DIR']
 8 | 
 9 | 
10 | def write_to_output_dir(task_path):
11 |     basename = os.path.basename(task_path)
12 |     destination = os.path.join(OUTPUT_DIR, basename)
13 |     copyfile(task_path, destination)
14 | 
15 | 
16 | def main():
17 |     print("Echoer writes some files to", OUTPUT_DIR)
18 |     for txtfile in glob("tasks/*.txt"):
19 |         write_to_output_dir(txtfile)
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     main()
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Simple Finance Technology Corp.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | # Informational files
 2 | include README.rst
 3 | include welder.jpg
 4 | include LICENSE
 5 | include NOTICE
 6 | 
 7 | # Include docs and tests. It's unclear whether convention dictates
 8 | # including built docs. However, Sphinx doesn't include built docs, so
 9 | # we are following their lead.
10 | graft docs
11 | prune docs/build
12 | graft tests
13 | 
14 | # Exclude any compile Python files (most likely grafted by tests/ directory).
15 | global-exclude *.pyc
16 | 
17 | # Setup-related things
18 | include pavement.py
19 | include requirements-dev.txt
20 | include requirements.txt
21 | include setup.py
22 | include tox.ini
23 | 


--------------------------------------------------------------------------------
/pipewelder/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import contextlib
 3 | import json
 4 | 
 5 | 
 6 | @contextlib.contextmanager
 7 | def cd(new_path):
 8 |     """
 9 |     Change to a different directory within a limited context.
10 |     """
11 |     saved_path = os.getcwd()
12 |     os.chdir(new_path)
13 |     yield
14 |     os.chdir(saved_path)
15 | 
16 | 
17 | def load_json(filename):
18 |     with open(filename) as f:
19 |         try:
20 |             data = json.load(f)
21 |         except ValueError as e:
22 |             raise ValueError("Unable to parse '{0}' as json; {1}"
23 |                              .format(filename, e))
24 |     return data
25 | 


--------------------------------------------------------------------------------
/pipewelder/metadata.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Project metadata
 3 | 
 4 | Information describing the project.
 5 | """
 6 | 
 7 | # The package name, which is also the "UNIX name" for the project.
 8 | package = 'pipewelder'
 9 | project = "Pipewelder"
10 | project_no_spaces = project.replace(' ', '')
11 | version = '0.1.4'
12 | description = 'Scheduled task execution on top of AWS Data Pipeline'
13 | authors = ['Jeff Klukas']
14 | authors_string = ', '.join(authors)
15 | emails = ['klukas@simple.com']
16 | license = 'Apache V2.0'
17 | copyright = '2015 Simple Finance Technology Corporation'
18 | url = 'http://github.com/jklukas/pipewelder'
19 | 


--------------------------------------------------------------------------------
/tests/test_data/echoer/values.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 
 3 |   "values": {
 4 |     "myName" : "echoer",
 5 |     "myDescription" : "an example pipeline that simply prints tasks files to STDOUT",
 6 |     "myEnv" : "this will get replaced by pipewelder.json",
 7 |     "myS3InputDir": "s3://pipewelder-example/#{myEnv}/echoer/inputs",
 8 |     "myS3OutputDir": "s3://pipewelder-example/#{myEnv}/echoer/outputs",
 9 |     "myS3LogDir": "s3://pipewelder-example/#{myEnv}/echoer/logs",
10 |     "myStartDateTime": "2015-01-01T00:00:02",
11 |     "mySchedulePeriod": "15 minutes",
12 |     "myTerminateAfter": "10 minutes",
13 |     "myTags": [
14 |       "pipewelder-environment:dev"
15 |     ]
16 |   }
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # Tox (http://tox.testrun.org/) is a tool for running tests in
 2 | # multiple virtualenvs. This configuration file will run the test
 3 | # suite on all supported python versions. To use it, "pip install tox"
 4 | # and then run "tox" from this directory.
 5 | #
 6 | # To run tox faster, check out Detox
 7 | # (https://pypi.python.org/pypi/detox), which runs your tox runs in
 8 | # parallel. To use it, "pip install detox" and then run "detox" from
 9 | # this directory.
10 | 
11 | [tox]
12 | envlist = py26,py27,py33,py34,docs
13 | 
14 | [testenv]
15 | deps =
16 |      --no-deps
17 |      --requirement
18 |      {toxinidir}/requirements-dev.txt
19 | commands = paver test_all
20 | 
21 | [testenv:docs]
22 | basepython = python
23 | commands = paver doc_html
24 | 


--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | import os
 5 | 
 6 | from pipewelder import core
 7 | from datetime import datetime
 8 | 
 9 | import logging
10 | logging.basicConfig(level=logging.INFO)
11 | 
12 | HERE = os.path.abspath(os.path.dirname(__file__))
13 | DATA_DIR = os.path.join(HERE, 'test_data')
14 | 
15 | 
16 | def data_path(path):
17 |     return os.path.join(DATA_DIR, path)
18 | 
19 | 
20 | def test_adjusted_to_future():
21 |     now = datetime.utcnow()
22 |     timestamp = "{0}-01-01T00:00:00".format(now.year)
23 |     adjusted = core.adjusted_to_future(timestamp, "1 days")
24 |     target_dt = datetime(year=now.year, month=now.month, day=(now.day + 1))
25 |     assert adjusted == target_dt.strftime(core.PIPELINE_DATETIME_FORMAT)
26 | 
27 | 
28 | @pytest.fixture
29 | def pipeline_description():
30 |     return {
31 |         u'description': u'my description',
32 |         u'fields': [
33 |             {u'key': u'@pipelineState', u'stringValue': u'PENDING'},
34 |             {u'key': u'@creationTime', u'stringValue': u'2015-02-11T21:17:10'},
35 |             {u'key': u'@sphere', u'stringValue': u'PIPELINE'},
36 |             {u'key': u'uniqueId', u'stringValue': u'pipeweldertest1'},
37 |             {u'key': u'@accountId', u'stringValue': u'543715240000'},
38 |             {u'key': u'description', u'stringValue': u'my description'},
39 |             {u'key': u'name', u'stringValue': u'Pipewelder test'},
40 |             {u'key': u'pipelineCreator', u'stringValue': u'AIDAIWZQRURDOOOOO'},
41 |             {u'key': u'@id', u'stringValue': u'df-07437251YGRXOY19OOOO'},
42 |             {u'key': u'@userId', u'stringValue': u'AIDAIWZQRURDXI4UKOOOO'}],
43 |         u'name': u'Pipewelder test',
44 |         u'pipelineId': u'df-07437251YGRXOY19OOOO',
45 |         u'tags': [],
46 |     }
47 | 
48 | 
49 | def test_pipeline_state(pipeline_description):
50 |     state = core.fetch_field_value(pipeline_description, '@pipelineState')
51 |     assert state == 'PENDING'
52 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from pytest import raises
 3 | 
 4 | # The parametrize function is generated, so this doesn't work:
 5 | #
 6 | #     from pytest.mark import parametrize
 7 | #
 8 | import pytest
 9 | parametrize = pytest.mark.parametrize  # NOPEP8
10 | 
11 | import os
12 | 
13 | from pipewelder.cli import pipewelder_configs, main, metadata
14 | 
15 | import logging
16 | logging.basicConfig(level=logging.INFO)
17 | 
18 | HERE = os.path.abspath(os.path.dirname(__file__))
19 | DATA_DIR = os.path.join(HERE, 'test_data')
20 | 
21 | 
22 | def data_path(path):
23 |     return os.path.join(DATA_DIR, path)
24 | 
25 | 
26 | def test_pipewelder_configs():
27 |     configs = pipewelder_configs(data_path('pipewelder.json'))
28 |     assert configs["dev"] == {
29 |         "name": "dev",
30 |         "dirs": ["echoer"],
31 |         "region": "us-west-2",
32 |         "template": "pipeline_definition.json",
33 |         "values": {
34 |             "myEnv": "dev"
35 |         }
36 |     }
37 | 
38 | 
39 | class TestMain(object):
40 |     @parametrize('helparg', ['-h', '--help'])
41 |     def test_help(self, helparg, capsys):
42 |         with raises(SystemExit) as exc_info:
43 |             main(['progname', helparg])
44 |         out, err = capsys.readouterr()
45 |         # Should have printed some sort of usage message. We don't
46 |         # need to explicitly test the content of the message.
47 |         assert 'usage' in out
48 |         # Should have used the program name from the argument
49 |         # vector.
50 |         assert 'progname' in out
51 |         # Should exit with zero return code.
52 |         assert exc_info.value.code == 0
53 | 
54 |     @parametrize('versionarg', ['-V', '--version'])
55 |     def test_version(self, versionarg, capsys):
56 |         with raises(SystemExit) as exc_info:
57 |             main(['progname', versionarg])
58 |         out, err = capsys.readouterr()
59 |         # Should print out version.
60 |         expected = '{0} {1}\n'.format(metadata.project, metadata.version)
61 |         assert (out == expected or err == expected)
62 |         # Should exit with zero return code.
63 |         assert exc_info.value.code == 0
64 | 


--------------------------------------------------------------------------------
/tests/test_data/pipeline_definition.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "objects" : [
 3 |     {
 4 |       "id" : "Default",
 5 |       "scheduleType" : "cron",
 6 |       "failureAndRerunMode" : "CASCADE",
 7 |       "schedule" : { "ref" : "PipewelderSchedule" },
 8 |       "pipelineLogUri" : "#{myS3LogDir}",
 9 |       "role" : "DataPipelineDefaultRole",
10 |       "resourceRole" : "DataPipelineDefaultResourceRole"
11 |     },
12 |     {
13 |       "id" : "PipewelderShellCommandActivity",
14 |       "command" : "(cd ${INPUT1_STAGING_DIR} && chmod +x run && ./run) > ${OUTPUT1_STAGING_DIR}/stdout.txt",
15 |       "runsOn" : { "ref" : "PipewelderEC2Resource" },
16 |       "input"  : { "ref" : "PipewelderS3InputLocation" },
17 |       "output" : { "ref" : "PipewelderS3OutputLocation" },
18 |       "type" : "ShellCommandActivity",
19 |       "stage" : "true"
20 |     },
21 |     {
22 |       "id" : "PipewelderSchedule",
23 |       "startDateTime" : "#{myStartDateTime}",
24 |       "type" : "Schedule",
25 |       "period" : "#{mySchedulePeriod}"
26 |     },
27 |     {
28 |       "id" : "PipewelderEC2Resource",
29 |       "terminateAfter" : "#{myTerminateAfter}",
30 |       "instanceType" : "t1.micro",
31 |       "type" : "Ec2Resource"
32 |     },
33 |     {
34 |       "id" : "PipewelderS3InputLocation",
35 |       "directoryPath" : "#{myS3InputDir}",
36 |       "type" : "S3DataNode"
37 |     },
38 |     {
39 |       "id" : "PipewelderS3OutputLocation",
40 |       "directoryPath" : "#{myS3OutputDir}/#{format(@scheduledStartTime, 'YYYY-MM-dd_HHmmss')}",
41 |       "type" : "S3DataNode"
42 |     }
43 |   ],
44 | 
45 |   "parameters" : [
46 |     {
47 |       "id": "myName",
48 |       "description": "A unique name for this pipeline, passed to CreatePipeline",
49 |       "type": "String"
50 |     },
51 |     {
52 |       "id": "myDescription",
53 |       "description": "A description of this pipeline, passed to CreatePipeline",
54 |       "type": "String"
55 |     },
56 |     {
57 |       "id": "myTags",
58 |       "description": "A list of tag:value pairs, passed to CreatePipeline",
59 |       "default": [],
60 |       "type": "String",
61 |       "isArray": "True"
62 |     },
63 |     {
64 |       "id": "myS3InputDir",
65 |       "description": "S3 directory where the run executable lives, destination for Pipewelder 'upload' commands",
66 |       "type": "AWS::S3::ObjectKey"
67 |     },
68 |     {
69 |       "id": "myS3OutputDir",
70 |       "description": "S3 directory where output files are collected",
71 |       "type": "AWS::S3::ObjectKey"
72 |     },
73 |     {
74 |       "id": "myS3LogDir",
75 |       "description": "S3 log folder",
76 |       "type": "AWS::S3::ObjectKey"
77 |     },
78 |     {
79 |       "id": "myStartDateTime",
80 |       "description": "Instant for the first run; Pipewelder will add multiples of mySchedulePeriod to ensure this instant is in the future",
81 |       "type": "String"
82 |     },
83 |     {
84 |       "id": "mySchedulePeriod",
85 |       "description": "How often to run, such as '1 hours'",
86 |       "type": "String"
87 |     },
88 |     {
89 |       "id": "myTerminateAfter",
90 |       "default": "#{format(minusMinutes(#{mySchedulePeriod}, 10))}",
91 |       "description": "duration after which the run should be terminated",
92 |       "type": "String"
93 |     }
94 |   ]
95 | }
96 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Pipewelder
  2 | ==========
  3 | 
  4 | .. figure:: welder.jpg
  5 |    :alt: A worker welding a pipe
  6 | 
  7 | Pipewelder is a framework that provides a command-line tool and Python
  8 | API to manage `AWS Data
  9 | Pipeline <http://aws.amazon.com/datapipeline/>`__ jobs from flat files.
 10 | Simple uses it as a cron-like job scheduler.
 11 | 
 12 | Source
 13 |   https://github.com/SimpleFinance/pipewelder
 14 | 
 15 | Documentation
 16 |   http://pipewelder.readthedocs.org
 17 | 
 18 | PyPI
 19 |   https://pypi.python.org/pypi/pipewelder
 20 | 
 21 | Overview
 22 | --------
 23 | 
 24 | Pipewelder aims to ease the task of scheduling jobs by defining very
 25 | simple pipelines which are little more than an execution schedule,
 26 | offloading most of the execution logic to files in S3. Pipewelder uses
 27 | Data Pipeline's concept of `data
 28 | staging <http://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-concepts-staging.html>`__
 29 | to pull input files from S3 at the beginning of execution and to upload
 30 | output files back to S3 at the end of execution.
 31 | 
 32 | If you follow Pipewelder's directory structure, all of your pipeline
 33 | logic can live in version-controlled flat files. The included
 34 | command-line interface gives you simple commands to validate your
 35 | pipeline definitions, upload task definitions to S3, and activate your
 36 | pipelines.
 37 | 
 38 | Installation
 39 | ------------
 40 | 
 41 | Pipewelder is available from `PyPI <https://pypi.python.org/pypi>`__ via
 42 | ``pip`` and is compatible with Python 2.6, 2.7, 3.3, and 3.4:
 43 | 
 44 | ::
 45 | 
 46 |     pip install pipewelder
 47 | 
 48 | The easiest way to get started is to clone the project from GitHub, copy
 49 | the example project from Pipewelder's tests, and then modify to suit:
 50 | 
 51 | .. code:: bash
 52 | 
 53 |     git clone https://github.com/SimpleFinance/pipewelder.git
 54 |     cp -r pipewelder/tests/test_data my-pipewelder-project
 55 | 
 56 | If you're setting up Pipewelder and need help, feel free to email the
 57 | author.
 58 | 
 59 | Development
 60 | -----------
 61 | 
 62 | To do development on Pipewelder, clone the repository and run ``make``
 63 | to install dependencies and run tests.
 64 | 
 65 | Directory Structure
 66 | -------------------
 67 | 
 68 | To use Pipewelder, you provide a template pipeline definition along with
 69 | one or more directories that correspond to particular pipeline
 70 | instances. The directory structure looks like this (see
 71 | `test\_data <tests/test_data>`__ for a working example):
 72 | 
 73 | ::
 74 | 
 75 |     pipeline_definition.json
 76 |     pipewelder.json <- optional configuration file
 77 |     my_first_pipeline/
 78 |         run
 79 |         values.json
 80 |         tasks/
 81 |             task1.sh
 82 |             task2.sh
 83 |     my_second_pipeline/
 84 |     ...
 85 | 
 86 | The ``values.json`` file in each pipeline directory specifies parameter
 87 | values that are used modify the template definition including the S3
 88 | paths for inputs, outputs, and logs. Some of these values are used
 89 | directly by Pipewelder as well.
 90 | 
 91 | A
 92 | ```ShellCommandActivity`` <http://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-object-shellcommandactivity.html>`__
 93 | in the template definition simply looks for an executable file named
 94 | ``run`` and executes it. ``run`` is the entry point for whatever work
 95 | you want your pipeline to do.
 96 | 
 97 | Often, your ``run`` executable will be a wrapper script to execute a
 98 | variety of similar tasks. When that's the case, use the ``tasks``
 99 | subdirectory to hold these definitions. These tasks could be text files,
100 | shell scripts, SQL code, or whatever else your ``run`` file expects.
101 | Pipewelder gives ``tasks`` folder special treatment in that the CLI will
102 | make sure to remove existing task definitions when uploading files.
103 | 
104 | Using the Command-Line Interface
105 | --------------------------------
106 | 
107 | The Pipewelder CLI should always be invoked from the top-level directory
108 | of your definitions (the directory where ``pipeline_definition.json``
109 | lives). If your directory structure matches Pipewelder's expectations,
110 | it should work without further configuration.
111 | 
112 | As you make changes to your template definition or ``values.json``
113 | files, it can be useful to check whether AWS considers your definitions
114 | valid:
115 | 
116 | ::
117 | 
118 |     $ pipewelder validate
119 | 
120 | Once you've defined your pipelines, you'll need to upload the files to
121 | S3:
122 | 
123 | ::
124 | 
125 |     $ pipewelder upload
126 | 
127 | Finally, activate your pipelines:
128 | 
129 | ::
130 | 
131 |     $ pipewelder activate
132 | 
133 | Any time you change the ``values.json`` or ``pipeline_definition.json``,
134 | you'll need to run the ``activate`` subcommand again. Because active
135 | pipelines can't be modified, the ``activate`` command will delete the
136 | existing pipeline and create a new one in its place. The run history for
137 | the previous pipeline will be discarded.
138 | 
139 | Acknowledgments
140 | ---------------
141 | 
142 | Pipewelder's package structure is based on
143 | `python-project-template <https://github.com/seanfisk/python-project-template>`__.
144 | 


--------------------------------------------------------------------------------
/pipewelder/cli.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | The Pipewelder command-line interface.
  5 | """
  6 | 
  7 | from __future__ import print_function
  8 | 
  9 | import argparse
 10 | import os
 11 | import sys
 12 | import boto.datapipeline
 13 | 
 14 | from glob import glob
 15 | 
 16 | from pipewelder import metadata, util, Pipewelder
 17 | 
 18 | import logging
 19 | logging.basicConfig(level="INFO")
 20 | 
 21 | 
 22 | CONFIG_DEFAULTS = {
 23 |     "dirs": ["*"],
 24 |     "region": "",
 25 |     "template": "pipeline_definition.json",
 26 |     "values": [],
 27 | }
 28 | 
 29 | 
 30 | def main(argv):
 31 |     """Program entry point.
 32 |     :param argv: command-line arguments
 33 |     :type argv: :class:`list`
 34 |     """
 35 |     author_strings = []
 36 |     for name, email in zip(metadata.authors, metadata.emails):
 37 |         author_strings.append('Author: {0} <{1}>'.format(name, email))
 38 | 
 39 |     epilog = '''
 40 | Pipewelder {version}
 41 | {authors}
 42 | URL: <{url}>
 43 | '''.format(
 44 |         project=metadata.project,
 45 |         version=metadata.version,
 46 |         authors='\n'.join(author_strings),
 47 |         url=metadata.url)
 48 | 
 49 |     parser = argparse.ArgumentParser(
 50 |         prog=argv[0],
 51 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 52 |         description=metadata.description,
 53 |         epilog=epilog)
 54 |     parser.add_argument(
 55 |         '-V', '--version',
 56 |         action='version',
 57 |         version='{0} {1}'.format(metadata.project, metadata.version))
 58 |     parser.add_argument(
 59 |         'action',
 60 |         help="""Action to take:
 61 |         'validate' pipeline definitions with AWS;
 62 |         'put-definition' of pipelines to AWS;
 63 |         'upload' pipeline files to myInputS3Dir;
 64 |         'activate' defined pipelines (also puts definitions if needed);
 65 |         'delete' pipelines from AWS
 66 |         """)
 67 |     parser.add_argument(
 68 |         '--group',
 69 |         default=None,
 70 |         help="Group within pipewelder.json to act on; defaults to all")
 71 | 
 72 |     args = parser.parse_args(args=argv[1:])
 73 |     args.action = args.action.replace('-', '_')
 74 | 
 75 |     defaults = {}
 76 | 
 77 |     if 'AWS_ACCESS_KEY_ID' not in os.environ:
 78 |         parser.error("Must set AWS_ACCESS_KEY_ID")
 79 |     if 'AWS_SECRET_ACCESS_KEY' not in os.environ:
 80 |         parser.error("Must set AWS_SECRET_ACCESS_KEY")
 81 |     if 'AWS_DEFAULT_REGION' in os.environ:
 82 |         defaults['region'] = os.environ['AWS_DEFAULT_REGION']
 83 | 
 84 |     config_path = (os.path.exists('pipewelder.json') and
 85 |                    'pipewelder.json' or None)
 86 |     configs = pipewelder_configs(config_path, defaults)
 87 |     print("Reading configuration from {0}".format(config_path))
 88 | 
 89 |     for name, config in configs.items():
 90 |         if args.group and args.group != name:
 91 |             continue
 92 |         if name == 'defaults':
 93 |             continue
 94 |         print("Acting on configuration '{0}'".format(name))
 95 |         conn = boto.datapipeline.connect_to_region(config['region'])
 96 |         pw = build_pipewelder(conn, config)
 97 |         if not execute_pipewelder_action(pw, args.action):
 98 |             return 1
 99 | 
100 |     return 0
101 | 
102 | 
103 | def entry_point():
104 |     """
105 |     Zero-argument entry point for use with setuptools/distribute.
106 |     """
107 |     raise SystemExit(main(sys.argv))
108 | 
109 | 
110 | def build_pipewelder(conn, config):
111 |     """
112 |     Return a Pipewelder object defined by *config*.
113 |     """
114 |     try:
115 |         pw = Pipewelder(conn, config['template'])
116 |     except IOError as e:
117 |         print(e)
118 |         return 1
119 |     for d in config['dirs']:
120 |         p = pw.add_pipeline(d)
121 |         for k, v in config["values"].items():
122 |             p.values[k] = v
123 |     return pw
124 | 
125 | 
126 | def execute_pipewelder_action(pw, action):
127 |     return_value = call_method(pw, action)
128 |     if not return_value:
129 |         print("Failed '{0}' action"
130 |               .format(action))
131 |     return return_value
132 | 
133 | 
134 | def pipewelder_configs(filename=None, defaults=None):
135 |     """
136 |     Parse json from *filename* for Pipewelder object configurations.
137 | 
138 |     Returns a dict which maps config names to dicts of options.
139 |     """
140 |     if filename is None:
141 |         data = {"pipewelder": {}}
142 |         dirname = os.path.abspath('.')
143 |     else:
144 |         dirname = os.path.dirname(os.path.abspath(filename))
145 |         data = util.load_json(filename)
146 |     defaults = defaults or {}
147 |     data_defaults = data.get('defaults', {})
148 |     defaults = dict(list(CONFIG_DEFAULTS.items()) +
149 |                     list(data_defaults.items()) +
150 |                     list(defaults.items()))
151 |     outputs = {}
152 |     for name in data:
153 |         if name == 'defaults':
154 |             continue
155 |         this_config = dict(list(defaults.items()) +
156 |                            list(data[name].items()))
157 |         dirs = []
158 |         with util.cd(dirname):
159 |             for entry in this_config['dirs']:
160 |                 for item in glob(entry):
161 |                     if os.path.exists(os.path.join(item, 'values.json')):
162 |                         dirs.append(item)
163 |         outputs[name] = {
164 |             "name": name,
165 |             "dirs": dirs,
166 |             "region": this_config['region'],
167 |             "template": this_config['template'],
168 |             "values": this_config['values'],
169 |         }
170 |     return outputs
171 | 
172 | 
173 | def call_method(obj, name):
174 |     """
175 |     Call the method *name* on *obj*.
176 |     """
177 |     return getattr(obj, name)()
178 | 
179 | 
180 | if __name__ == '__main__':
181 |     entry_point()
182 | 


--------------------------------------------------------------------------------
/pipewelder/connection.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # The code in this file is modified from:
  4 | #   https://github.com/boto/boto/blob/2.36.0/boto/datapipeline/layer1.py
  5 | #
  6 | # The original code carries the following license:
  7 | # # Copyright (c) 2013 Amazon.com, Inc. or its affiliates.  All Rights Reserved
  8 | # #
  9 | # # Permission is hereby granted, free of charge, to any person obtaining a
 10 | # # copy of this software and associated documentation files (the
 11 | # # "Software"), to deal in the Software without restriction, including
 12 | # # without limitation the rights to use, copy, modify, merge, publish, dis-
 13 | # # tribute, sublicense, and/or sell copies of the Software, and to permit
 14 | # # persons to whom the Software is furnished to do so, subject to the fol-
 15 | # # lowing conditions:
 16 | # #
 17 | # # The above copyright notice and this permission notice shall be included
 18 | # # in all copies or substantial portions of the Software.
 19 | # #
 20 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 21 | # # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
 22 | # # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
 23 | # # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 24 | # # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 25 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 26 | # # IN THE SOFTWARE.
 27 | 
 28 | """
 29 | A patch to the boto DataPipelineConnection object.
 30 | 
 31 | As of boto 2.36.0, putting and validating pipeline parameters/values
 32 | was not supported.
 33 | """
 34 | 
 35 | import json
 36 | 
 37 | from boto.datapipeline.layer1 import DataPipelineConnection
 38 | 
 39 | 
 40 | def put_pipeline_definition(self,
 41 |                             pipeline_objects,
 42 |                             pipeline_id,
 43 |                             parameter_objects=None,
 44 |                             parameter_values=None):
 45 |     """
 46 |     Adds tasks, schedules, and preconditions that control the
 47 |     behavior of the pipeline. You can use PutPipelineDefinition to
 48 |     populate a new pipeline or to update an existing pipeline that
 49 |     has not yet been activated.
 50 |     """
 51 |     params = {
 52 |         'pipelineId': pipeline_id,
 53 |         'pipelineObjects': pipeline_objects,
 54 |     }
 55 |     if parameter_objects is not None:
 56 |         params['parameterObjects'] = parameter_objects
 57 |     if parameter_values is not None:
 58 |         params['parameterValues'] = parameter_values
 59 |     return self.make_request(action='PutPipelineDefinition',
 60 |                              body=json.dumps(params))
 61 | 
 62 | 
 63 | def validate_pipeline_definition(self,
 64 |                                  pipeline_objects,
 65 |                                  pipeline_id,
 66 |                                  parameter_objects=None,
 67 |                                  parameter_values=None):
 68 |     """
 69 |     Tests the pipeline definition with a set of validation checks
 70 |     to ensure that it is well formed and can run without error.
 71 |     """
 72 |     params = {
 73 |         'pipelineId': pipeline_id,
 74 |         'pipelineObjects': pipeline_objects,
 75 |     }
 76 |     if parameter_objects is not None:
 77 |         params['parameterObjects'] = parameter_objects
 78 |     if parameter_values is not None:
 79 |         params['parameterValues'] = parameter_values
 80 |     return self.make_request(action='ValidatePipelineDefinition',
 81 |                              body=json.dumps(params))
 82 | 
 83 | 
 84 | def create_pipeline(self, name, unique_id, description=None, tags=None):
 85 |     """
 86 |     Creates a new empty pipeline. When this action succeeds, you
 87 |     can then use the PutPipelineDefinition action to populate the
 88 |     pipeline.
 89 |     :type name: string
 90 |     :param name: The name of the new pipeline. You can use the same name
 91 |         for multiple pipelines associated with your AWS account, because
 92 |         AWS Data Pipeline assigns each new pipeline a unique pipeline
 93 |         identifier.
 94 |     :type unique_id: string
 95 |     :param unique_id: A unique identifier that you specify. This identifier
 96 |         is not the same as the pipeline identifier assigned by AWS Data
 97 |         Pipeline. You are responsible for defining the format and ensuring
 98 |         the uniqueness of this identifier. You use this parameter to ensure
 99 |         idempotency during repeated calls to CreatePipeline. For example,
100 |         if the first call to CreatePipeline does not return a clear
101 |         success, you can pass in the same unique identifier and pipeline
102 |         name combination on a subsequent call to CreatePipeline.
103 |         CreatePipeline ensures that if a pipeline already exists with the
104 |         same name and unique identifier, a new pipeline will not be
105 |         created. Instead, you'll receive the pipeline identifier from the
106 |         previous attempt. The uniqueness of the name and unique identifier
107 |         combination is scoped to the AWS account or IAM user credentials.
108 |     :type description: string
109 |     :param description: The description of the new pipeline.
110 |     """
111 |     params = {
112 |         'name': name,
113 |         'uniqueId': unique_id,
114 |     }
115 |     if description is not None:
116 |         params['description'] = description
117 |     if tags is not None:
118 |         params['tags'] = tags
119 |     return self.make_request(action='CreatePipeline',
120 |                              body=json.dumps(params))
121 | 
122 | 
123 | DataPipelineConnection.put_pipeline_definition = (
124 |     put_pipeline_definition)
125 | DataPipelineConnection.validate_pipeline_definition = (
126 |     validate_pipeline_definition)
127 | DataPipelineConnection.create_pipeline = (
128 |     create_pipeline)
129 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set SPHINXOPTS=-W
 10 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 11 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 12 | if NOT "%PAPER%" == "" (
 13 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 14 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 15 | )
 16 | 
 17 | if "%1" == "" goto help
 18 | 
 19 | if "%1" == "help" (
 20 | 	:help
 21 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 22 | 	echo.  html       to make standalone HTML files
 23 | 	echo.  dirhtml    to make HTML files named index.html in directories
 24 | 	echo.  singlehtml to make a single large HTML file
 25 | 	echo.  pickle     to make pickle files
 26 | 	echo.  json       to make JSON files
 27 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 28 | 	echo.  qthelp     to make HTML files and a qthelp project
 29 | 	echo.  devhelp    to make HTML files and a Devhelp project
 30 | 	echo.  epub       to make an epub
 31 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 32 | 	echo.  text       to make text files
 33 | 	echo.  man        to make manual pages
 34 | 	echo.  texinfo    to make Texinfo files
 35 | 	echo.  gettext    to make PO message catalogs
 36 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 37 | 	echo.  linkcheck  to check all external links for integrity
 38 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 39 | 	goto end
 40 | )
 41 | 
 42 | if "%1" == "clean" (
 43 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 44 | 	del /q /s %BUILDDIR%\*
 45 | 	goto end
 46 | )
 47 | 
 48 | if "%1" == "html" (
 49 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 50 | 	if errorlevel 1 exit /b 1
 51 | 	echo.
 52 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 53 | 	goto end
 54 | )
 55 | 
 56 | if "%1" == "dirhtml" (
 57 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 58 | 	if errorlevel 1 exit /b 1
 59 | 	echo.
 60 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 61 | 	goto end
 62 | )
 63 | 
 64 | if "%1" == "singlehtml" (
 65 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 66 | 	if errorlevel 1 exit /b 1
 67 | 	echo.
 68 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 69 | 	goto end
 70 | )
 71 | 
 72 | if "%1" == "pickle" (
 73 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 74 | 	if errorlevel 1 exit /b 1
 75 | 	echo.
 76 | 	echo.Build finished; now you can process the pickle files.
 77 | 	goto end
 78 | )
 79 | 
 80 | if "%1" == "json" (
 81 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 82 | 	if errorlevel 1 exit /b 1
 83 | 	echo.
 84 | 	echo.Build finished; now you can process the JSON files.
 85 | 	goto end
 86 | )
 87 | 
 88 | if "%1" == "htmlhelp" (
 89 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 90 | 	if errorlevel 1 exit /b 1
 91 | 	echo.
 92 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 93 | .hhp project file in %BUILDDIR%/htmlhelp.
 94 | 	goto end
 95 | )
 96 | 
 97 | if "%1" == "qthelp" (
 98 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 99 | 	if errorlevel 1 exit /b 1
100 | 	echo.
101 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
102 | .qhcp project file in %BUILDDIR%/qthelp, like this:
103 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Pipewelder.qhcp
104 | 	echo.To view the help file:
105 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Pipewelder.qhc
106 | 	goto end
107 | )
108 | 
109 | if "%1" == "devhelp" (
110 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
111 | 	if errorlevel 1 exit /b 1
112 | 	echo.
113 | 	echo.Build finished.
114 | 	goto end
115 | )
116 | 
117 | if "%1" == "epub" (
118 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
119 | 	if errorlevel 1 exit /b 1
120 | 	echo.
121 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
122 | 	goto end
123 | )
124 | 
125 | if "%1" == "latex" (
126 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
127 | 	if errorlevel 1 exit /b 1
128 | 	echo.
129 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
130 | 	goto end
131 | )
132 | 
133 | if "%1" == "text" (
134 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
135 | 	if errorlevel 1 exit /b 1
136 | 	echo.
137 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
138 | 	goto end
139 | )
140 | 
141 | if "%1" == "man" (
142 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
143 | 	if errorlevel 1 exit /b 1
144 | 	echo.
145 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
146 | 	goto end
147 | )
148 | 
149 | if "%1" == "texinfo" (
150 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
151 | 	if errorlevel 1 exit /b 1
152 | 	echo.
153 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
154 | 	goto end
155 | )
156 | 
157 | if "%1" == "gettext" (
158 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
159 | 	if errorlevel 1 exit /b 1
160 | 	echo.
161 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
162 | 	goto end
163 | )
164 | 
165 | if "%1" == "changes" (
166 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
167 | 	if errorlevel 1 exit /b 1
168 | 	echo.
169 | 	echo.The overview file is in %BUILDDIR%/changes.
170 | 	goto end
171 | )
172 | 
173 | if "%1" == "linkcheck" (
174 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
175 | 	if errorlevel 1 exit /b 1
176 | 	echo.
177 | 	echo.Link check complete; look for any errors in the above output ^
178 | or in %BUILDDIR%/linkcheck/output.txt.
179 | 	goto end
180 | )
181 | 
182 | if "%1" == "doctest" (
183 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
184 | 	if errorlevel 1 exit /b 1
185 | 	echo.
186 | 	echo.Testing of doctests in the sources finished, look at the ^
187 | results in %BUILDDIR%/doctest/output.txt.
188 | 	goto end
189 | )
190 | 
191 | :end
192 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    = -W
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Pipewelder.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Pipewelder.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $HOME/.local/share/devhelp/Pipewelder"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $HOME/.local/share/devhelp/Pipewelder"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/pipewelder/translator.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | import json
 14 | 
 15 | 
 16 | class PipelineDefinitionError(Exception):
 17 |     def __init__(self, msg, definition):
 18 |         full_msg = (
 19 |             "Error in pipeline definition: %s\n" % msg)
 20 |         super(PipelineDefinitionError, self).__init__(full_msg)
 21 |         self.msg = msg
 22 |         self.definition = definition
 23 | 
 24 | 
 25 | def api_to_definition(definition):
 26 |     # When we're translating from api_response -> definition
 27 |     # we have to be careful *not* to mutate the existing
 28 |     # response as other code might need to the original
 29 |     # api_response.
 30 |     if 'pipelineObjects' in definition:
 31 |         definition['objects'] = _api_to_objects_definition(
 32 |             definition.pop('pipelineObjects'))
 33 |     if 'parameterObjects' in definition:
 34 |         definition['parameters'] = _api_to_parameters_definition(
 35 |             definition.pop('parameterObjects'))
 36 |     if 'parameterValues' in definition:
 37 |         definition['values'] = _api_to_values_definition(
 38 |             definition.pop('parameterValues'))
 39 |     return definition
 40 | 
 41 | 
 42 | def definition_to_api_objects(definition):
 43 |     if 'objects' not in definition:
 44 |         raise PipelineDefinitionError('Missing "objects" key', definition)
 45 |     api_elements = []
 46 |     # To convert to the structure expected by the service,
 47 |     # we convert the existing structure to a list of dictionaries.
 48 |     # Each dictionary has a 'fields', 'id', and 'name' key.
 49 |     for element in definition['objects']:
 50 |         try:
 51 |             element_id = element.pop('id')
 52 |         except KeyError:
 53 |             raise PipelineDefinitionError('Missing "id" key of element: %s' %
 54 |                                           json.dumps(element), definition)
 55 |         api_object = {'id': element_id}
 56 |         # If a name is provided, then we use that for the name,
 57 |         # otherwise the id is used for the name.
 58 |         name = element.pop('name', element_id)
 59 |         api_object['name'] = name
 60 |         # Now we need the field list.  Each element in the field list is a dict
 61 |         # with a 'key', 'stringValue'|'refValue'
 62 |         fields = []
 63 |         for key, value in sorted(element.items()):
 64 |             fields.extend(_parse_each_field(key, value))
 65 |         api_object['fields'] = fields
 66 |         api_elements.append(api_object)
 67 |     return api_elements
 68 | 
 69 | 
 70 | def definition_to_api_parameters(definition):
 71 |     if 'parameters' not in definition:
 72 |         return None
 73 |     parameter_objects = []
 74 |     for element in definition['parameters']:
 75 |         try:
 76 |             parameter_id = element.pop('id')
 77 |         except KeyError:
 78 |             raise PipelineDefinitionError('Missing "id" key of parameter: %s' %
 79 |                                           json.dumps(element), definition)
 80 |         parameter_object = {'id': parameter_id}
 81 |         # Now we need the attribute list.  Each element in the attribute list
 82 |         # is a dict with a 'key', 'stringValue'
 83 |         attributes = []
 84 |         for key, value in sorted(element.items()):
 85 |             attributes.extend(_parse_each_field(key, value))
 86 |         parameter_object['attributes'] = attributes
 87 |         parameter_objects.append(parameter_object)
 88 |     return parameter_objects
 89 | 
 90 | 
 91 | def definition_to_parameter_values(definition):
 92 |     if 'values' not in definition:
 93 |         return None
 94 |     parameter_values = []
 95 |     for key in definition['values']:
 96 |         parameter_values.extend(
 97 |             _convert_single_parameter_value(key, definition['values'][key]))
 98 | 
 99 |     return parameter_values
100 | 
101 | 
102 | def _parse_each_field(key, value):
103 |     values = []
104 |     if isinstance(value, list):
105 |         for item in value:
106 |             values.append(_convert_single_field(key, item))
107 |     else:
108 |         values.append(_convert_single_field(key, value))
109 |     return values
110 | 
111 | 
112 | def _convert_single_field(key, value):
113 |     field = {'key': key}
114 |     if isinstance(value, dict) and list(value.keys()) == ['ref']:
115 |         field['refValue'] = value['ref']
116 |     else:
117 |         field['stringValue'] = value
118 |     return field
119 | 
120 | 
121 | def _convert_single_parameter_value(key, values):
122 |     parameter_values = []
123 |     if isinstance(values, list):
124 |         for each_value in values:
125 |             parameter_value = {'id': key, 'stringValue': each_value}
126 |             parameter_values.append(parameter_value)
127 |     else:
128 |         parameter_value = {'id': key, 'stringValue': values}
129 |         parameter_values.append(parameter_value)
130 |     return parameter_values
131 | 
132 | 
133 | def _api_to_objects_definition(api_response):
134 |     pipeline_objects = []
135 |     for element in api_response:
136 |         current = {
137 |             'id': element['id'],
138 |             'name': element['name']
139 |         }
140 |         for field in element['fields']:
141 |             key = field['key']
142 |             if 'stringValue' in field:
143 |                 value = field['stringValue']
144 |             else:
145 |                 value = {'ref': field['refValue']}
146 |             _add_value(key, value, current)
147 |         pipeline_objects.append(current)
148 |     return pipeline_objects
149 | 
150 | 
151 | def _api_to_parameters_definition(api_response):
152 |     parameter_objects = []
153 |     for element in api_response:
154 |         current = {
155 |             'id': element['id']
156 |         }
157 |         for attribute in element['attributes']:
158 |             _add_value(attribute['key'], attribute['stringValue'], current)
159 |         parameter_objects.append(current)
160 |     return parameter_objects
161 | 
162 | 
163 | def _api_to_values_definition(api_response):
164 |     pipeline_values = {}
165 |     for element in api_response:
166 |         _add_value(element['id'], element['stringValue'], pipeline_values)
167 |     return pipeline_values
168 | 
169 | 
170 | def _add_value(key, value, current_map):
171 |     if key not in current_map:
172 |         current_map[key] = value
173 |     elif isinstance(current_map[key], list):
174 |         # Dupe keys result in values aggregating
175 |         # into a list.
176 |         current_map[key].append(value)
177 |     else:
178 |         converted_list = [current_map[key], value]
179 |         current_map[key] = converted_list
180 | 


--------------------------------------------------------------------------------
/pavement.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import time
  8 | import subprocess
  9 | 
 10 | from paver.easy import options, task, needs, consume_args
 11 | from paver.setuputils import install_distutils_tasks
 12 | 
 13 | # Import parameters from the setup file.
 14 | sys.path.insert(0, os.path.abspath('.'))  # NOPEP8
 15 | from setup import (
 16 |     setup_dict, get_project_files, print_success_message,
 17 |     print_failure_message, _lint, _test, _test_all,
 18 |     CODE_DIRECTORY, DOCS_DIRECTORY, TESTS_DIRECTORY, PYTEST_FLAGS)
 19 | 
 20 | options(setup=setup_dict)
 21 | 
 22 | install_distutils_tasks()
 23 | 
 24 | # Miscellaneous helper functions
 25 | 
 26 | 
 27 | def print_passed():
 28 |     # generated on http://patorjk.com/software/taag/#p=display&f=Small&t=PASSED
 29 |     print_success_message(r'''  ___  _   ___ ___ ___ ___
 30 |  | _ \/_\ / __/ __| __|   \
 31 |  |  _/ _ \\__ \__ \ _|| |) |
 32 |  |_|/_/ \_\___/___/___|___/
 33 | ''')
 34 | 
 35 | 
 36 | def print_failed():
 37 |     # generated on http://patorjk.com/software/taag/#p=display&f=Small&t=FAILED
 38 |     print_failure_message(r'''  ___ _   ___ _    ___ ___
 39 |  | __/_\ |_ _| |  | __|   \
 40 |  | _/ _ \ | || |__| _|| |) |
 41 |  |_/_/ \_\___|____|___|___/
 42 | ''')
 43 | 
 44 | 
 45 | class cwd(object):
 46 |     """Class used for temporarily changing directories. Can be though of
 47 |     as a `pushd /my/dir' then a `popd' at the end.
 48 |     """
 49 |     def __init__(self, newcwd):
 50 |         """:param newcwd: directory to make the cwd
 51 |         :type newcwd: :class:`str`
 52 |         """
 53 |         self.newcwd = newcwd
 54 | 
 55 |     def __enter__(self):
 56 |         self.oldcwd = os.getcwd()
 57 |         os.chdir(self.newcwd)
 58 |         return os.getcwd()
 59 | 
 60 |     def __exit__(self, type_, value, traceback):
 61 |         # This acts like a `finally' clause: it will always be executed.
 62 |         os.chdir(self.oldcwd)
 63 | 
 64 | 
 65 | # Task-related functions
 66 | 
 67 | def _doc_make(*make_args):
 68 |     """Run make in sphinx' docs directory.
 69 | 
 70 |     :return: exit code
 71 |     """
 72 |     if sys.platform == 'win32':
 73 |         # Windows
 74 |         make_cmd = ['make.bat']
 75 |     else:
 76 |         # Linux, Mac OS X, and others
 77 |         make_cmd = ['make']
 78 |     make_cmd.extend(make_args)
 79 | 
 80 |     # Account for a stupid Python "bug" on Windows:
 81 |     # <http://bugs.python.org/issue15533>
 82 |     with cwd(DOCS_DIRECTORY):
 83 |         retcode = subprocess.call(make_cmd)
 84 |     return retcode
 85 | 
 86 | 
 87 | # Tasks
 88 | 
 89 | @task
 90 | @needs('doc_html', 'setuptools.command.sdist')
 91 | def sdist():
 92 |     """Build the HTML docs and the tarball."""
 93 |     pass
 94 | 
 95 | 
 96 | @task
 97 | def test():
 98 |     """Run the unit tests."""
 99 |     raise SystemExit(_test())
100 | 
101 | 
102 | @task
103 | def lint():
104 |     # This refuses to format properly when running `paver help' unless
105 |     # this ugliness is used.
106 |     ('Perform PEP8 style check, run PyFlakes, and run McCabe complexity '
107 |      'metrics on the code.')
108 |     raise SystemExit(_lint())
109 | 
110 | 
111 | @task
112 | def test_all():
113 |     """Perform a style check and run all unit tests."""
114 |     retcode = _test_all()
115 |     if retcode == 0:
116 |         print_passed()
117 |     else:
118 |         print_failed()
119 |     raise SystemExit(retcode)
120 | 
121 | 
122 | @task
123 | @consume_args
124 | def run(args):
125 |     """Run the package's main script. All arguments are passed to it."""
126 |     # The main script expects to get the called executable's name as
127 |     # argv[0]. However, paver doesn't provide that in args. Even if it did (or
128 |     # we dove into sys.argv), it wouldn't be useful because it would be paver's
129 |     # executable. So we just pass the package name in as the executable name,
130 |     # since it's close enough. This should never be seen by an end user
131 |     # installing through Setuptools anyway.
132 |     from pipewelder.main import main
133 |     raise SystemExit(main([CODE_DIRECTORY] + args))
134 | 
135 | 
136 | @task
137 | def commit():
138 |     """Commit only if all the tests pass."""
139 |     if _test_all() == 0:
140 |         subprocess.check_call(['git', 'commit'])
141 |     else:
142 |         print_failure_message('\nTests failed, not committing.')
143 | 
144 | 
145 | @task
146 | def coverage():
147 |     """Run tests and show test coverage report."""
148 |     try:
149 |         import pytest_cov  # NOQA
150 |     except ImportError:
151 |         print_failure_message(
152 |             'Install the pytest coverage plugin to use this task, '
153 |             "i.e., `pip install pytest-cov'.")
154 |         raise SystemExit(1)
155 |     import pytest
156 |     pytest.main(PYTEST_FLAGS + [
157 |         '--cov', CODE_DIRECTORY,
158 |         '--cov-report', 'term-missing',
159 |         TESTS_DIRECTORY])
160 | 
161 | 
162 | @task  # NOQA
163 | def doc_watch():
164 |     """Watch for changes in the docs and rebuild HTML docs when changed."""
165 |     try:
166 |         from watchdog.events import FileSystemEventHandler
167 |         from watchdog.observers import Observer
168 |     except ImportError:
169 |         print_failure_message('Install the watchdog package to use this task, '
170 |                               "i.e., `pip install watchdog'.")
171 |         raise SystemExit(1)
172 | 
173 |     class RebuildDocsEventHandler(FileSystemEventHandler):
174 |         def __init__(self, base_paths):
175 |             self.base_paths = base_paths
176 | 
177 |         def dispatch(self, event):
178 |             """Dispatches events to the appropriate methods.
179 |             :param event: The event object representing the file system event.
180 |             :type event: :class:`watchdog.events.FileSystemEvent`
181 |             """
182 |             for base_path in self.base_paths:
183 |                 if event.src_path.endswith(base_path):
184 |                     super(RebuildDocsEventHandler, self).dispatch(event)
185 |                     # We found one that matches. We're done.
186 |                     return
187 | 
188 |         def on_modified(self, event):
189 |             print_failure_message('Modification detected. Rebuilding docs.')
190 |             # # Strip off the path prefix.
191 |             # import os
192 |             # if event.src_path[len(os.getcwd()) + 1:].startswith(
193 |             #         CODE_DIRECTORY):
194 |             #     # sphinx-build doesn't always pick up changes on code files,
195 |             #     # even though they are used to generate the documentation. As
196 |             #     # a workaround, just clean before building.
197 |             doc_html()
198 |             print_success_message('Docs have been rebuilt.')
199 | 
200 |     print_success_message(
201 |         'Watching for changes in project files, press Ctrl-C to cancel...')
202 |     handler = RebuildDocsEventHandler(get_project_files())
203 |     observer = Observer()
204 |     observer.schedule(handler, path='.', recursive=True)
205 |     observer.start()
206 |     try:
207 |         while True:
208 |             time.sleep(1)
209 |     except KeyboardInterrupt:
210 |         observer.stop()
211 |         observer.join()
212 | 
213 | 
214 | @task
215 | @needs('doc_html')
216 | def doc_open():
217 |     """Build the HTML docs and open them in a web browser."""
218 |     doc_index = os.path.join(DOCS_DIRECTORY, 'build', 'html', 'index.html')
219 |     if sys.platform == 'darwin':
220 |         # Mac OS X
221 |         subprocess.check_call(['open', doc_index])
222 |     elif sys.platform == 'win32':
223 |         # Windows
224 |         subprocess.check_call(['start', doc_index], shell=True)
225 |     elif sys.platform == 'linux2':
226 |         # All freedesktop-compatible desktops
227 |         subprocess.check_call(['xdg-open', doc_index])
228 |     else:
229 |         print_failure_message(
230 |             "Unsupported platform. Please open `{0}' manually.".format(
231 |                 doc_index))
232 | 
233 | 
234 | @task
235 | def get_tasks():
236 |     """Get all paver-defined tasks."""
237 |     from paver.tasks import environment
238 |     for t in environment.get_tasks():
239 |         print(t.shortname)
240 | 
241 | 
242 | @task
243 | def doc_html():
244 |     """Build the HTML docs."""
245 |     retcode = _doc_make('html')
246 | 
247 |     if retcode:
248 |         raise SystemExit(retcode)
249 | 
250 | 
251 | @task
252 | def doc_clean():
253 |     """Clean (delete) the built docs."""
254 |     retcode = _doc_make('clean')
255 | 
256 |     if retcode:
257 |         raise SystemExit(retcode)
258 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # flake8: noqa
  3 | 
  4 | # This file is based upon the file generated by sphinx-quickstart. However,
  5 | # where sphinx-quickstart hardcodes values in this file that you input, this
  6 | # file has been changed to pull from your module's metadata module.
  7 | #
  8 | # This file is execfile()d with the current directory set to its containing
  9 | # dir.
 10 | #
 11 | # Note that not all possible configuration values are present in this
 12 | # autogenerated file.
 13 | #
 14 | # All configuration values have a default; values that are commented out
 15 | # serve to show the default.
 16 | 
 17 | import os
 18 | import sys
 19 | 
 20 | # If extensions (or modules to document with autodoc) are in another directory,
 21 | # add these directories to sys.path here. If the directory is relative to the
 22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 23 | sys.path.insert(0, os.path.abspath('../..'))
 24 | 
 25 | # Import project metadata
 26 | from pipewelder import metadata
 27 | 
 28 | # -- General configuration ----------------------------------------------------
 29 | 
 30 | # If your documentation needs a minimal Sphinx version, state it here.
 31 | #needs_sphinx = '1.0'
 32 | 
 33 | # Add any Sphinx extension module names here, as strings. They can be
 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 35 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx',
 36 |               'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode']
 37 | 
 38 | # show todos
 39 | todo_include_todos = True
 40 | 
 41 | # Add any paths that contain templates here, relative to this directory.
 42 | templates_path = ['_templates']
 43 | 
 44 | # The suffix of source filenames.
 45 | source_suffix = '.rst'
 46 | 
 47 | # The encoding of source files.
 48 | #source_encoding = 'utf-8-sig'
 49 | 
 50 | # The master toctree document.
 51 | master_doc = 'index'
 52 | 
 53 | # General information about the project.
 54 | project = metadata.project
 55 | copyright = metadata.copyright
 56 | 
 57 | # The version info for the project you're documenting, acts as replacement for
 58 | # |version| and |release|, also used in various other places throughout the
 59 | # built documents.
 60 | #
 61 | # The short X.Y version.
 62 | version = metadata.version
 63 | # The full version, including alpha/beta/rc tags.
 64 | release = metadata.version
 65 | 
 66 | # The language for content autogenerated by Sphinx. Refer to documentation
 67 | # for a list of supported languages.
 68 | #language = None
 69 | 
 70 | # There are two options for replacing |today|: either, you set today to some
 71 | # non-false value, then it is used:
 72 | #today = ''
 73 | # Else, today_fmt is used as the format for a strftime call.
 74 | #today_fmt = '%B %d, %Y'
 75 | 
 76 | # List of patterns, relative to source directory, that match files and
 77 | # directories to ignore when looking for source files.
 78 | exclude_patterns = []
 79 | 
 80 | # The reST default role (used for this markup: `text`) to use for all
 81 | # documents.
 82 | #default_role = None
 83 | 
 84 | # If true, '()' will be appended to :func: etc. cross-reference text.
 85 | #add_function_parentheses = True
 86 | 
 87 | # If true, the current module name will be prepended to all description
 88 | # unit titles (such as .. function::).
 89 | #add_module_names = True
 90 | 
 91 | # If true, sectionauthor and moduleauthor directives will be shown in the
 92 | # output. They are ignored by default.
 93 | #show_authors = False
 94 | 
 95 | # The name of the Pygments (syntax highlighting) style to use.
 96 | pygments_style = 'sphinx'
 97 | 
 98 | # A list of ignored prefixes for module index sorting.
 99 | #modindex_common_prefix = []
100 | 
101 | 
102 | # -- Options for HTML output --------------------------------------------------
103 | 
104 | # The theme to use for HTML and HTML Help pages.  See the documentation for
105 | # a list of builtin themes.
106 | html_theme = 'default'
107 | 
108 | # Theme options are theme-specific and customize the look and feel of a theme
109 | # further.  For a list of options available for each theme, see the
110 | # documentation.
111 | #html_theme_options = {}
112 | 
113 | # Add any paths that contain custom themes here, relative to this directory.
114 | #html_theme_path = []
115 | 
116 | # The name for this set of Sphinx documents.  If None, it defaults to
117 | # "<project> v<release> documentation".
118 | #html_title = None
119 | 
120 | # A shorter title for the navigation bar.  Default is the same as html_title.
121 | #html_short_title = None
122 | 
123 | # The name of an image file (relative to this directory) to place at the top
124 | # of the sidebar.
125 | #html_logo = None
126 | 
127 | # The name of an image file (within the static path) to use as favicon of the
128 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
129 | # pixels large.
130 | #html_favicon = None
131 | 
132 | # Add any paths that contain custom static files (such as style sheets) here,
133 | # relative to this directory. They are copied after the builtin static files,
134 | # so a file named "default.css" will overwrite the builtin "default.css".
135 | html_static_path = ['_static']
136 | 
137 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
138 | # using the given strftime format.
139 | #html_last_updated_fmt = '%b %d, %Y'
140 | 
141 | # If true, SmartyPants will be used to convert quotes and dashes to
142 | # typographically correct entities.
143 | #html_use_smartypants = True
144 | 
145 | # Custom sidebar templates, maps document names to template names.
146 | #html_sidebars = {}
147 | 
148 | # Additional templates that should be rendered to pages, maps page names to
149 | # template names.
150 | #html_additional_pages = {}
151 | 
152 | # If false, no module index is generated.
153 | #html_domain_indices = True
154 | 
155 | # If false, no index is generated.
156 | #html_use_index = True
157 | 
158 | # If true, the index is split into individual pages for each letter.
159 | #html_split_index = False
160 | 
161 | # If true, links to the reST sources are added to the pages.
162 | #html_show_sourcelink = True
163 | 
164 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
165 | #html_show_sphinx = True
166 | 
167 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
168 | #html_show_copyright = True
169 | 
170 | # If true, an OpenSearch description file will be output, and all pages will
171 | # contain a <link> tag referring to it.  The value of this option must be the
172 | # base URL from which the finished HTML is served.
173 | #html_use_opensearch = ''
174 | 
175 | # This is the file name suffix for HTML files (e.g. ".xhtml").
176 | #html_file_suffix = None
177 | 
178 | # Output file base name for HTML help builder.
179 | htmlhelp_basename = metadata.project_no_spaces + 'doc'
180 | 
181 | 
182 | # -- Options for LaTeX output -------------------------------------------------
183 | 
184 | latex_elements = {
185 |     # The paper size ('letterpaper' or 'a4paper').
186 |     #'papersize': 'letterpaper',
187 | 
188 |     # The font size ('10pt', '11pt' or '12pt').
189 |     #'pointsize': '10pt',
190 | 
191 |     # Additional stuff for the LaTeX preamble.
192 |     #'preamble': '',
193 | }
194 | 
195 | # Grouping the document tree into LaTeX files. List of tuples
196 | # (source start file, target name, title, author,
197 | # documentclass [howto/manual]).
198 | latex_documents = [
199 |     ('index', metadata.project_no_spaces + '.tex',
200 |      metadata.project + ' Documentation', metadata.authors_string,
201 |      'manual'),
202 | ]
203 | 
204 | # The name of an image file (relative to this directory) to place at the top of
205 | # the title page.
206 | #latex_logo = None
207 | 
208 | # For "manual" documents, if this is true, then toplevel headings are parts,
209 | # not chapters.
210 | #latex_use_parts = False
211 | 
212 | # If true, show page references after internal links.
213 | #latex_show_pagerefs = False
214 | 
215 | # If true, show URL addresses after external links.
216 | #latex_show_urls = False
217 | 
218 | # Documents to append as an appendix to all manuals.
219 | #latex_appendices = []
220 | 
221 | # If false, no module index is generated.
222 | #latex_domain_indices = True
223 | 
224 | 
225 | # -- Options for manual page output -------------------------------------------
226 | 
227 | # One entry per manual page. List of tuples
228 | # (source start file, name, description, authors, manual section).
229 | man_pages = [
230 |     ('index', metadata.package, metadata.project + ' Documentation',
231 |      metadata.authors_string, 1)
232 | ]
233 | 
234 | # If true, show URL addresses after external links.
235 | #man_show_urls = False
236 | 
237 | 
238 | # -- Options for Texinfo output -----------------------------------------------
239 | 
240 | # Grouping the document tree into Texinfo files. List of tuples
241 | # (source start file, target name, title, author,
242 | #  dir menu entry, description, category)
243 | texinfo_documents = [
244 |     ('index', metadata.project_no_spaces,
245 |      metadata.project + ' Documentation', metadata.authors_string,
246 |      metadata.project_no_spaces, metadata.description, 'Miscellaneous'),
247 | ]
248 | 
249 | # Documents to append as an appendix to all manuals.
250 | #texinfo_appendices = []
251 | 
252 | # If false, no module index is generated.
253 | #texinfo_domain_indices = True
254 | 
255 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
256 | #texinfo_show_urls = 'footnote'
257 | 
258 | 
259 | # Example configuration for intersphinx: refer to the Python standard library.
260 | intersphinx_mapping = {
261 |     'python': ('http://docs.python.org/', None),
262 |     'boto': ('https://boto.readthedocs.org/en/latest/', None),
263 | }
264 | 
265 | # Extra local configuration. This is useful for placing the class description
266 | # in the class docstring and the __init__ parameter documentation in the
267 | # __init__ docstring. See
268 | # <http://sphinx-doc.org/ext/autodoc.html#confval-autoclass_content> for more
269 | # information.
270 | autoclass_content = 'both'
271 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import print_function
  3 | 
  4 | import os
  5 | import sys
  6 | import imp
  7 | import subprocess
  8 | 
  9 | from setuptools import setup, find_packages
 10 | from setuptools.command.test import test as TestCommand
 11 | from distutils import spawn
 12 | 
 13 | # Python 2.6 subprocess.check_output compatibility. Thanks Greg Hewgill!
 14 | if 'check_output' not in dir(subprocess):
 15 |     def check_output(cmd_args, *args, **kwargs):
 16 |         proc = subprocess.Popen(
 17 |             cmd_args, *args,
 18 |             stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs)
 19 |         out, err = proc.communicate()
 20 |         if proc.returncode != 0:
 21 |             raise subprocess.CalledProcessError(args)
 22 |         return out
 23 |     subprocess.check_output = check_output
 24 | 
 25 | try:
 26 |     import colorama
 27 |     colorama.init()  # Initialize colorama on Windows
 28 | except ImportError:
 29 |     # Don't require colorama just for running paver tasks. This allows us to
 30 |     # run `paver install' without requiring the user to first have colorama
 31 |     # installed.
 32 |     pass
 33 | 
 34 | # Add the current directory to the module search path.
 35 | sys.path.append('.')
 36 | 
 37 | # Constants
 38 | CODE_DIRECTORY = 'pipewelder'
 39 | DOCS_DIRECTORY = 'docs'
 40 | TESTS_DIRECTORY = 'tests'
 41 | PYTEST_FLAGS = ['--doctest-modules']
 42 | 
 43 | # Import metadata. Normally this would just be:
 44 | #
 45 | #     from pipewelder import metadata
 46 | #
 47 | # However, when we do this, we also import `pipewelder/__init__.py'. If this
 48 | # imports names from some other modules and these modules have third-party
 49 | # dependencies that need installing (which happens after this file is run), the
 50 | # script will crash. What we do instead is to load the metadata module by path
 51 | # instead, effectively side-stepping the dependency problem. Please make sure
 52 | # metadata has no dependencies, otherwise they will need to be added to
 53 | # the setup_requires keyword.
 54 | metadata = imp.load_source(
 55 |     'metadata', os.path.join(CODE_DIRECTORY, 'metadata.py'))
 56 | 
 57 | 
 58 | # Miscellaneous helper functions
 59 | 
 60 | def get_project_files():
 61 |     """Retrieve a list of project files, ignoring hidden files.
 62 | 
 63 |     :return: sorted list of project files
 64 |     :rtype: :class:`list`
 65 |     """
 66 |     if is_git_project() and has_git():
 67 |         return get_git_project_files()
 68 | 
 69 |     project_files = []
 70 |     for top, subdirs, files in os.walk('.'):
 71 |         for subdir in subdirs:
 72 |             if subdir.startswith('.'):
 73 |                 subdirs.remove(subdir)
 74 | 
 75 |         for f in files:
 76 |             if f.startswith('.'):
 77 |                 continue
 78 |             project_files.append(os.path.join(top, f))
 79 | 
 80 |     return project_files
 81 | 
 82 | 
 83 | def is_git_project():
 84 |     return os.path.isdir('.git')
 85 | 
 86 | 
 87 | def has_git():
 88 |     return bool(spawn.find_executable("git"))
 89 | 
 90 | 
 91 | def get_git_project_files():
 92 |     """Retrieve a list of all non-ignored files, including untracked files,
 93 |     excluding deleted files.
 94 | 
 95 |     :return: sorted list of git project files
 96 |     :rtype: :class:`list`
 97 |     """
 98 |     cached_and_untracked_files = git_ls_files(
 99 |         '--cached',  # All files cached in the index
100 |         '--others',  # Untracked files
101 |         # Exclude untracked files that would be excluded by .gitignore, etc.
102 |         '--exclude-standard')
103 |     uncommitted_deleted_files = git_ls_files('--deleted')
104 | 
105 |     # Since sorting of files in a set is arbitrary, return a sorted list to
106 |     # provide a well-defined order to tools like flake8, etc.
107 |     return sorted(cached_and_untracked_files - uncommitted_deleted_files)
108 | 
109 | 
110 | def git_ls_files(*cmd_args):
111 |     """Run ``git ls-files`` in the top-level project directory. Arguments go
112 |     directly to execution call.
113 | 
114 |     :return: set of file names
115 |     :rtype: :class:`set`
116 |     """
117 |     cmd = ['git', 'ls-files']
118 |     cmd.extend(cmd_args)
119 |     return set(subprocess.check_output(cmd).splitlines())
120 | 
121 | 
122 | def print_success_message(message):
123 |     """Print a message indicating success in green color to STDOUT.
124 | 
125 |     :param message: the message to print
126 |     :type message: :class:`str`
127 |     """
128 |     try:
129 |         import colorama
130 |         print(colorama.Fore.GREEN + message + colorama.Fore.RESET)
131 |     except ImportError:
132 |         print(message)
133 | 
134 | 
135 | def print_failure_message(message):
136 |     """Print a message indicating failure in red color to STDERR.
137 | 
138 |     :param message: the message to print
139 |     :type message: :class:`str`
140 |     """
141 |     try:
142 |         import colorama
143 |         print(colorama.Fore.RED + message + colorama.Fore.RESET,
144 |               file=sys.stderr)
145 |     except ImportError:
146 |         print(message, file=sys.stderr)
147 | 
148 | 
149 | def read(filename):
150 |     """Return the contents of a file.
151 | 
152 |     :param filename: file path
153 |     :type filename: :class:`str`
154 |     :return: the file's content
155 |     :rtype: :class:`str`
156 |     """
157 |     with open(os.path.join(os.path.dirname(__file__), filename)) as f:
158 |         return f.read()
159 | 
160 | 
161 | def _lint():
162 |     """Run lint and return an exit code."""
163 |     # Flake8 doesn't have an easy way to run checks using a Python function, so
164 |     # just fork off another process to do it.
165 | 
166 |     # Python 3 compat:
167 |     # - The result of subprocess call outputs are byte strings, meaning we need
168 |     #   to pass a byte string to endswith.
169 |     project_python_files = [filename for filename in get_project_files()
170 |                             if filename.endswith(b'.py')]
171 |     retcode = subprocess.call(
172 |         ['flake8', '--max-complexity=10'] + project_python_files)
173 |     if retcode == 0:
174 |         print_success_message('No style errors')
175 |     return retcode
176 | 
177 | 
178 | def _test():
179 |     """Run the unit tests.
180 | 
181 |     :return: exit code
182 |     """
183 |     # Make sure to import pytest in this function. For the reason, see here:
184 |     # <http://pytest.org/latest/goodpractises.html#integration-with-setuptools-test-commands>  # NOPEP8
185 |     import pytest
186 |     # Run the doctests
187 |     import doctest
188 |     import pipewelder
189 |     doctest.testmod(pipewelder.core)
190 |     # This runs the unit tests.
191 |     # It also runs doctest, but only on the modules in TESTS_DIRECTORY.
192 |     return pytest.main(PYTEST_FLAGS + [TESTS_DIRECTORY])
193 | 
194 | 
195 | def _test_all():
196 |     """Run lint and tests.
197 | 
198 |     :return: exit code
199 |     """
200 |     return _lint() + _test()
201 | 
202 | 
203 | # The following code is to allow tests to be run with `python setup.py test'.
204 | # The main reason to make this possible is to allow tests to be run as part of
205 | # Setuptools' automatic run of 2to3 on the source code. The recommended way to
206 | # run tests is still `paver test_all'.
207 | # See <http://pythonhosted.org/setuptools/python3.html>
208 | # Code based on <http://pytest.org/latest/goodpractises.html#integration-with-setuptools-test-commands>  # NOPEP8
209 | class TestAllCommand(TestCommand):
210 |     def finalize_options(self):
211 |         TestCommand.finalize_options(self)
212 |         # These are fake, and just set to appease distutils and setuptools.
213 |         self.test_suite = True
214 |         self.test_args = []
215 | 
216 |     def run_tests(self):
217 |         raise SystemExit(_test_all())
218 | 
219 | 
220 | # define install_requires for specific Python versions
221 | python_version_specific_requires = []
222 | 
223 | # as of Python >= 2.7 and >= 3.2, the argparse module is maintained within
224 | # the Python standard library, otherwise we install it as a separate package
225 | if sys.version_info < (2, 7) or (3, 0) <= sys.version_info < (3, 3):
226 |     python_version_specific_requires.append('argparse')
227 | 
228 | 
229 | # See here for more options:
230 | # <http://pythonhosted.org/setuptools/setuptools.html>
231 | setup_dict = dict(
232 |     name=metadata.package,
233 |     version=metadata.version,
234 |     author=metadata.authors[0],
235 |     author_email=metadata.emails[0],
236 |     maintainer=metadata.authors[0],
237 |     maintainer_email=metadata.emails[0],
238 |     url=metadata.url,
239 |     description=metadata.description,
240 |     long_description=read('README.rst'),
241 |     # Find a list of classifiers here:
242 |     # <http://pypi.python.org/pypi?%3Aaction=list_classifiers>
243 |     classifiers=[
244 |         'Development Status :: 4 - Beta',
245 |         'Environment :: Console',
246 |         'Intended Audience :: Developers',
247 |         'License :: OSI Approved :: Apache Software License',
248 |         'Natural Language :: English',
249 |         'Operating System :: OS Independent',
250 |         'Programming Language :: Python :: 2.6',
251 |         'Programming Language :: Python :: 2.7',
252 |         'Programming Language :: Python :: 3.3',
253 |         'Programming Language :: Python :: 3.4',
254 |         'Topic :: Software Development :: Libraries :: Python Modules',
255 |     ],
256 |     packages=find_packages(exclude=(TESTS_DIRECTORY,)),
257 |     install_requires=[
258 |         'boto',
259 |         'six'
260 |     ] + python_version_specific_requires,
261 |     # Allow tests to be run with `python setup.py test'.
262 |     tests_require=[
263 |         'pytest',
264 |         'mock',
265 |         'flake8',
266 |     ],
267 |     cmdclass={'test': TestAllCommand},
268 |     zip_safe=False,  # don't use eggs
269 |     entry_points={
270 |         'console_scripts': [
271 |             'pipewelder = pipewelder.cli:entry_point'
272 |         ],
273 |     }
274 | )
275 | 
276 | 
277 | def main():
278 |     setup(**setup_dict)
279 | 
280 | 
281 | if __name__ == '__main__':
282 |     main()
283 | 


--------------------------------------------------------------------------------
/pipewelder/core.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | The core Pipewelder API.
  4 | """
  5 | 
  6 | from __future__ import print_function
  7 | 
  8 | import re
  9 | import os
 10 | import logging
 11 | import hashlib
 12 | from copy import deepcopy
 13 | from datetime import datetime, timedelta
 14 | 
 15 | from pipewelder import translator
 16 | from boto import connect_s3
 17 | from boto.s3.key import Key as S3Key
 18 | 
 19 | from pipewelder import util
 20 | 
 21 | import six
 22 | if six.PY2:
 23 |     from urlparse import urlparse
 24 | else:
 25 |     from urllib.parse import urlparse
 26 | 
 27 | PIPELINE_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
 28 | PIPELINE_FREQUENCY_RE = re.compile(r'(?P<number>\d+) (?P<unit>\w+s)')
 29 | PIPELINE_PARAM_RE = re.compile(r'\#\{(my[a-zA-Z0-9]+)\}')
 30 | PIPEWELDER_STUB_PARAMS = {
 31 |     'name': "Pipewelder validation stub",
 32 |     'unique_id': 'stub',
 33 |     "description": """
 34 | This pipeline should always be in 'PENDING' status.
 35 | It is used by Pipewelder to validate pipeline definitions.
 36 |     """.strip()
 37 | }
 38 | 
 39 | 
 40 | class Pipewelder(object):
 41 |     """
 42 |     A collection of Pipelines sharing a definition template.
 43 |     """
 44 |     def __init__(self, conn, template_path, s3_conn=None):
 45 |         """
 46 |         *conn* is a :class:`boto.datapipeline.layer1.DataPipelineConnection`
 47 |         instance used to manipulate added pipelines,
 48 |         *s3_conn* is a :class:`boto.s3.connection.S3Connection`
 49 |         used to upload pipeline tasks to S3,
 50 |         and *template_path* is the path to a local file containing the
 51 |         template pipeline definition.
 52 |         """
 53 |         self.conn = conn
 54 |         self.s3_conn = s3_conn
 55 |         if self.s3_conn is None:
 56 |             self.s3_conn = connect_s3()
 57 |         template_path = os.path.normpath(template_path)
 58 |         self.template = definition_from_file(template_path)
 59 |         self.pipelines = {}
 60 | 
 61 |     def add_pipeline(self, dirpath):
 62 |         """
 63 |         Load a new :class:`Pipeline` object based on the files contained in
 64 |         *dirpath*.
 65 |         """
 66 |         pipeline = Pipeline(self.conn, self.s3_conn, self.template, dirpath)
 67 |         self.pipelines[pipeline.name] = pipeline
 68 |         return pipeline
 69 | 
 70 |     def are_pipelines_valid(self):
 71 |         """
 72 |         Returns ``True`` if all pipeline definition validate with AWS.
 73 |         """
 74 |         return all([p.is_valid() for p in self.pipelines.values()])
 75 | 
 76 |     def validate(self):
 77 |         """
 78 |         Synonym for :meth:`are_pipelines_valid`.
 79 |         """
 80 |         return self.are_pipelines_valid()
 81 | 
 82 |     def upload(self):
 83 |         """
 84 |         Upload files to S3 corresponding to each pipeline and its tasks.
 85 | 
 86 |         Returns ``True`` is successful.
 87 |         """
 88 |         return all([p.upload() for p in self.pipelines.values()])
 89 | 
 90 |     def delete(self):
 91 |         """
 92 |         Delete all pipeline definitions.
 93 | 
 94 |         Returns ``True`` if successful.
 95 |         """
 96 |         return all([p.delete() for p in self.pipelines.values()])
 97 | 
 98 |     def put_definition(self):
 99 |         """
100 |         Puts definitions for all pipelines.
101 | 
102 |         Returns ``True`` if successful.
103 |         """
104 |         return all([p.put_definition() for p in self.pipelines.values()])
105 | 
106 |     def activate(self):
107 |         """
108 |         Activate all pipeline definitions,
109 |         deleting existing pipeline if needed.
110 | 
111 |         Returns ``True`` if successful.
112 |         """
113 |         if not self.are_pipelines_valid():
114 |             logging.error("Not activating pipelines due to validation errors.")
115 |             return False
116 |         return all([p.activate() for p in self.pipelines.values()])
117 | 
118 | 
119 | class Pipeline(object):
120 |     """
121 |     A class defining a single pipeline definition and associated tasks.
122 |     """
123 |     def __init__(self, conn, s3_conn, template, dirpath):
124 |         """
125 |         Create a Pipeline based on definition dict *template*.
126 | 
127 |         *dirpath* is a directory containing a 'values.json' file,
128 |         a 'run' executable, and a 'tasks' directory.
129 |         *conn* is a DataPipelineConnection and *s3_conn* is an S3Connection.
130 |         """
131 |         self.conn = conn
132 |         self.s3_conn = s3_conn
133 |         self.dirpath = os.path.normpath(dirpath)
134 |         self.definition = template.copy()
135 |         values_path = os.path.join(dirpath, 'values.json')
136 |         decoded = util.load_json(values_path)
137 |         self.values = decoded.get('values', {})
138 |         if 'myName' not in self.values:
139 |             self.values['myName'] = os.path.basename(dirpath)
140 |         # adjust the start timestamp to the future
141 |         timestamp = self.values['myStartDateTime']
142 |         period = self.values['mySchedulePeriod']
143 |         adjusted_timestamp = adjusted_to_future(timestamp, period)
144 |         self.values['myStartDateTime'] = adjusted_timestamp
145 | 
146 |     @property
147 |     def name(self):
148 |         return self._get_value('myName')
149 | 
150 |     @property
151 |     def description(self):
152 |         try:
153 |             return self._get_value('myDescription')
154 |         except ValueError:
155 |             return None
156 | 
157 |     @property
158 |     def tags(self):
159 |         if 'myTags' not in self.values:
160 |             return {}
161 |         return dict(tag_expression.split(':')
162 |                     for tag_expression in self.values['myTags'])
163 | 
164 |     @property
165 |     def unique_id(self):
166 |         return hashlib.md5(self.name + str(self.tags)).hexdigest()
167 | 
168 |     def api_objects(self):
169 |         """
170 |         Return a dict containing the pipeline objects in AWS API format.
171 |         """
172 |         d = deepcopy(self.definition)
173 |         return translator.definition_to_api_objects(d)
174 | 
175 |     def api_parameters(self):
176 |         """
177 |         Return a dict containing the pipeline parameters in AWS API format.
178 |         """
179 |         d = deepcopy(self.definition)
180 |         return translator.definition_to_api_parameters(d)
181 | 
182 |     def api_values(self):
183 |         """
184 |         Return a dict containing the pipeline param values in AWS API format.
185 |         """
186 |         d = {'values': self.values}
187 |         return translator.definition_to_parameter_values(d)
188 | 
189 |     def api_tags(self):
190 |         """
191 |         Return a list containing the pipeline tags in AWS API format.
192 |         """
193 |         tag_list = [{'key': k, 'value': v}
194 |                     for k, v in self.tags.items()]
195 |         return tag_list
196 | 
197 |     def create(self):
198 |         """
199 |         Create a pipeline in AWS if it does not already exist.
200 | 
201 |         Returns the pipeline id.
202 |         """
203 |         response = self.conn.create_pipeline(self.name, self.unique_id,
204 |                                              self.description, self.api_tags())
205 |         return response['pipelineId']
206 | 
207 |     def is_valid(self):
208 |         """
209 |         Returns ``True`` if the pipeline definition validates to AWS.
210 |         """
211 |         response = self.conn.create_pipeline(**PIPEWELDER_STUB_PARAMS)
212 |         pipeline_id = response["pipelineId"]
213 |         response = self.conn.validate_pipeline_definition(
214 |             self.api_objects(), pipeline_id,
215 |             self.api_parameters(), self.api_values())
216 |         self._log_validation_messages(response)
217 |         if response['errored']:
218 |             return False
219 |         else:
220 |             logging.info("Pipeline '{0}' is valid".format(self.name))
221 |             return True
222 | 
223 |     def upload(self):
224 |         """
225 |         Uploads the contents of `dirpath` to S3.
226 | 
227 |         The destination path in S3 is determined by 'myS3InputDirectory'
228 |         in the 'values.json' file for this pipeline.
229 |         Existing contents of the 'tasks' subdirectory are deleted.
230 | 
231 |         Returns ``True`` if successful.
232 |         """
233 |         s3_dir = self._get_value('myS3InputDir')
234 |         bucket_path, input_dir = bucket_and_path(s3_dir)
235 |         bucket = self.s3_conn.get_bucket(bucket_path)
236 | 
237 |         remote_task_path = os.path.join(input_dir, 'tasks')
238 |         existing_task_keys = bucket.list(prefix=remote_task_path)
239 |         existing_tasks = [key.name for key in existing_task_keys]
240 |         bucket.delete_keys(existing_tasks)
241 |         logging.info("Deleted from bucket '{0}': {1}"
242 |                      .format(bucket_path, existing_tasks))
243 | 
244 |         with util.cd(self.dirpath):
245 |             for root, dirs, files in os.walk('.'):
246 |                 for f in files:
247 |                     filepath = os.path.join(root, f)
248 |                     k = S3Key(bucket)
249 |                     k.key = os.path.normpath(os.path.join(input_dir, filepath))
250 |                     k.set_contents_from_filename(filepath)
251 |                     logging.info('Copied {0} to {1}'
252 |                                  .format(os.path.abspath(filepath),
253 |                                          os.path.normpath(
254 |                                              os.path.join(s3_dir, filepath))))
255 |         return True
256 | 
257 |     def delete(self):
258 |         """
259 |         Delete this pipeline definition from AWS.
260 | 
261 |         Returns ``True`` if successful.
262 |         """
263 |         pipeline_id = self.create()
264 |         logging.info("Deleting pipeline with id {0}".format(pipeline_id))
265 |         self.conn.delete_pipeline(pipeline_id)
266 |         return True
267 | 
268 |     def put_definition(self):
269 |         """
270 |         Put this pipeline definition to AWS.
271 | 
272 |         Returns ``True`` if successful.
273 |         """
274 |         pipeline_id = self.create()
275 |         logging.info("Putting pipeline definition for {0}".format(pipeline_id))
276 |         self.conn.put_pipeline_definition(self.api_objects(),
277 |                                           pipeline_id,
278 |                                           self.api_parameters(),
279 |                                           self.api_values())
280 |         return True
281 | 
282 |     def activate(self):
283 |         """
284 |         Activate this pipeline definition in AWS.
285 | 
286 |         Deletes the existing pipeline if it has previously been activated.
287 | 
288 |         Returns ``True`` if successful.
289 |         """
290 |         pipeline_id = self.create()
291 |         existing_definition = definition_from_id(self.conn, pipeline_id)
292 |         state = state_from_id(self.conn, pipeline_id)
293 |         if existing_definition == self.definition:
294 |             return True
295 |         elif state == 'PENDING':
296 |             self.put_definition()
297 |         else:
298 |             self.delete()
299 |             return self.activate()
300 |         logging.info("Activating pipeline with id {0}".format(pipeline_id))
301 |         self.conn.activate_pipeline(pipeline_id)
302 |         return True
303 | 
304 |     def _log_validation_messages(self, response):
305 |         for container in response['validationWarnings']:
306 |             logging.warning("Warnings in validation response for %s",
307 |                             container['id'])
308 |             for message in container['warnings']:
309 |                 logging.warning(message)
310 |         for container in response['validationErrors']:
311 |             logging.error("Errors in validation response for %s",
312 |                           container['id'])
313 |             for message in container['errors']:
314 |                 logging.error(message)
315 | 
316 |     def _get_value(self, key):
317 |         if key in self.values:
318 |             return self._parsed_via_parameters(self.values[key])
319 |         params = self.definition['parameters']
320 |         default = fetch_default(params, key)
321 |         if default is None:
322 |             raise ValueError("No value or default found for '{0}'"
323 |                              .format(key))
324 |         return self._parsed_via_parameters(default)
325 | 
326 |     def _parsed_via_parameters(self, expression):
327 |         placeholders = re.findall(PIPELINE_PARAM_RE, expression)
328 |         if not placeholders:
329 |             return expression
330 |         key = placeholders[0]
331 |         value = self._get_value(key)
332 |         placeholder = '#{' + key + '}'
333 |         expression = expression.replace(placeholder, value)
334 |         return self._parsed_via_parameters(expression)
335 | 
336 |     def _parsed_object(self, name):
337 |         return parsed_object(self.conn, self.create(), name)
338 | 
339 |     def _parsed_location(self, name):
340 |         obj = self._parsed_object(name)
341 |         fetch_field_value(obj, 'directoryPath')
342 | 
343 | 
344 | def bucket_and_path(s3_uri):
345 |     """
346 |     Return a bucket name and key path from *s3_uri*.
347 | 
348 |     >>> bucket_and_path('s3://pipewelder-bucket/pipewelder-test/inputs')
349 |     ('pipewelder-bucket', 'pipewelder-test/inputs')
350 |     """
351 |     uri = urlparse(s3_uri)
352 |     return (uri.netloc, uri.path[1:])
353 | 
354 | 
355 | def parse_period(period):
356 |     """
357 |     Return a timedelta object parsed from string *period*.
358 | 
359 |     >>> parse_period("15 minutes")
360 |     datetime.timedelta(0, 900)
361 |     >>> parse_period("3 hours")
362 |     datetime.timedelta(0, 10800)
363 |     >>> parse_period("1 days")
364 |     datetime.timedelta(1)
365 |     """
366 |     parts = PIPELINE_FREQUENCY_RE.match(period)
367 |     if not parts:
368 |         raise ValueError("'{0}' cannot be parsed as a period".format(period))
369 |     parts = parts.groupdict()
370 |     kwargs = {parts['unit']: int(parts['number'])}
371 |     return timedelta(**kwargs)
372 | 
373 | 
374 | def adjusted_to_future(timestamp, period):
375 |     """
376 |     Return *timestamp* string, adjusted to the future if necessary.
377 | 
378 |     If *timestamp* is in the future, it will be returned unchanged.
379 |     If it's in the past, *period* will be repeatedly added until the
380 |     result is in the future.
381 | 
382 |     All times are assumed to be in UTC.
383 | 
384 |     >>> adjusted_to_future('2199-01-01T00:00:00', '1 days')
385 |     '2199-01-01T00:00:00'
386 |     """
387 |     dt = datetime.strptime(timestamp, PIPELINE_DATETIME_FORMAT)
388 |     delta = parse_period(period)
389 |     now = datetime.utcnow()
390 |     while dt < now:
391 |         dt += delta
392 |     return dt.strftime(PIPELINE_DATETIME_FORMAT)
393 | 
394 | 
395 | def fetch_field_value(aws_response, field_name):
396 |     """
397 |     Return a value nested within the 'fields' entry of dict *aws_response*.
398 | 
399 |     The returned value is the second item from a dict with 'key' *field_name*.
400 | 
401 |     >>> r = {'fields': [{'key': 'someKey', 'stringValue': 'someValue'}]}
402 |     >>> fetch_field_value(r, 'someKey')
403 |     'someValue'
404 |     """
405 |     for container in aws_response['fields']:
406 |         if container['key'] == field_name:
407 |             for (k, v) in container.items():
408 |                 if k != 'key':
409 |                     return v
410 |     raise ValueError("Did not find a field called {0} in response {1}"
411 |                      .format(field_name, aws_response))
412 | 
413 | 
414 | def fetch_default(params, key):
415 |     """
416 |     Return the default associated with *key* from parameter list *params*.
417 | 
418 |     If no default, returns None.
419 |     >>> p = [{'type': 'String', 'id': 'myParam', 'default': 'foo'}]
420 |     >>> fetch_default(p, 'myParam')
421 |     'foo'
422 |     >>> p = [{'type': 'String', 'id': 'myParam'}]
423 |     >>> fetch_default(p, 'myParam')
424 |     """
425 |     for container in params:
426 |         if container['id'] == key:
427 |             if 'default' in container:
428 |                 return container['default']
429 |     return None
430 | 
431 | 
432 | def state_from_id(conn, pipeline_id):
433 |     """
434 |     Return the *@pipelineState* string for object matching *pipeline_id*.
435 | 
436 |     *conn* is a DataPipelineConnection object.
437 |     """
438 |     response = conn.describe_pipelines([pipeline_id])
439 |     description = response['pipelineDescriptionList'][0]
440 |     return fetch_field_value(description, '@pipelineState')
441 | 
442 | 
443 | def definition_from_file(filename):
444 |     """
445 |     Return a dict containing the contents of pipeline definition *filename*.
446 |     """
447 |     return util.load_json(filename)
448 | 
449 | 
450 | def definition_from_id(conn, pipeline_id):
451 |     """
452 |     Return a dict containing the definition of *pipeline_id*.
453 | 
454 |     *conn* is a DataPipelineConnection object.
455 |     """
456 |     response = conn.get_pipeline_definition(pipeline_id)
457 |     return translator.api_to_definition(response)
458 | 
459 | 
460 | def parsed_objects(conn, pipeline_id, object_ids):
461 |     """
462 |     Return a list of object dicts as evaluated by Data Pipeline.
463 |     """
464 |     response = conn.describe_objects(object_ids, pipeline_id,
465 |                                      evaluate_expressions=True)
466 |     return response['pipelineObjects']
467 | 
468 | 
469 | def parsed_object(conn, pipeline_id, object_id):
470 |     """
471 |     Return an object dict as evaluated by Data Pipeline.
472 |     """
473 |     return parsed_objects(conn, pipeline_id, [object_id])[0]
474 | 


--------------------------------------------------------------------------------