├── .editorconfig ├── .github └── ISSUE_TEMPLATE.md ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.rst ├── HISTORY.rst ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── airflow_plugins ├── __init__.py ├── hooks │ ├── __init__.py │ └── ftp_hook.py ├── operators │ ├── __init__.py │ ├── base.py │ ├── cookiecutter.py │ ├── csv.py │ ├── db.py │ ├── defer.py │ ├── files.py │ ├── git.py │ ├── mixins.py │ ├── run_evaluation.py │ ├── sensors │ │ ├── __init__.py │ │ ├── file_sensor.py │ │ └── task_sensor.py │ ├── slack │ │ ├── __init__.py │ │ ├── hooks.py │ │ ├── notifications.py │ │ ├── operators.py │ │ └── sensors.py │ └── zip.py ├── utils.py ├── variables │ ├── __init__.py │ └── value_resolver.py └── xcom.py ├── docs ├── .gitignore ├── Makefile ├── conf.py ├── contributing.rst ├── history.rst ├── index.rst ├── installation.rst ├── make.bat ├── plugins │ ├── index.rst │ ├── operators │ │ ├── base.rst │ │ ├── csv.rst │ │ ├── db.rst │ │ ├── files.rst │ │ ├── git.rst │ │ ├── slack.rst │ │ └── zip.rst │ ├── sensors.rst │ └── utils.rst └── readme.rst ├── requirements_dev.txt ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── conftest.py ├── operators │ ├── __init__.py │ ├── sensors │ │ ├── __init__.py │ │ └── test_file_sensor.py │ ├── test_db.py │ ├── test_db_columns.csv │ ├── test_run_evaluation.py │ └── test_zip_operator.py └── variables │ ├── __init__.py │ └── test_value_resolver.py ├── tox.ini └── travis_pypi_setup.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | * Airflow Plugins version: 2 | * Python version: 3 | * Operating System: 4 | 5 | ### Description 6 | 7 | Describe what you were trying to get done. 8 | Tell us what happened, what went wrong, and what you expected to happen. 9 | 10 | ### What I Did 11 | 12 | ``` 13 | Paste the command(s) you ran and the output. 14 | If there was a crash, please include the traceback here. 15 | ``` 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # pyenv python configuration file 62 | .python-version 63 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Config file for automatic testing at travis-ci.org 2 | # This file will be regenerated if you run travis_pypi_setup.py 3 | 4 | language: python 5 | python: 6 | - 3.5 7 | 8 | # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors 9 | install: 10 | - pip install -U tox-travis airflow psycopg2>=2.6.2 11 | - airflow initdb 12 | 13 | # command to run tests, e.g. python setup.py test 14 | script: tox 15 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Contributing 5 | ============ 6 | 7 | Contributions are welcome, and they are greatly appreciated! Every 8 | little bit helps, and credit will always be given. 9 | 10 | You can contribute in many ways: 11 | 12 | Types of Contributions 13 | ---------------------- 14 | 15 | Report Bugs 16 | ~~~~~~~~~~~ 17 | 18 | Report bugs at https://github.com/storiesbi/airflow-plugins/issues. 19 | 20 | If you are reporting a bug, please include: 21 | 22 | * Your operating system name and version. 23 | * Any details about your local setup that might be helpful in troubleshooting. 24 | * Detailed steps to reproduce the bug. 25 | 26 | Fix Bugs 27 | ~~~~~~~~ 28 | 29 | Look through the GitHub issues for bugs. Anything tagged with "bug" 30 | and "help wanted" is open to whoever wants to implement it. 31 | 32 | Implement Features 33 | ~~~~~~~~~~~~~~~~~~ 34 | 35 | Look through the GitHub issues for features. Anything tagged with "enhancement" 36 | and "help wanted" is open to whoever wants to implement it. 37 | 38 | Write Documentation 39 | ~~~~~~~~~~~~~~~~~~~ 40 | 41 | Airflow Plugins could always use more documentation, whether as part of the 42 | official Airflow Plugins docs, in docstrings, or even on the web in blog posts, 43 | articles, and such. 44 | 45 | Submit Feedback 46 | ~~~~~~~~~~~~~~~ 47 | 48 | The best way to send feedback is to file an issue at https://github.com/storiesbi/airflow-plugins/issues. 49 | 50 | If you are proposing a feature: 51 | 52 | * Explain in detail how it would work. 53 | * Keep the scope as narrow as possible, to make it easier to implement. 54 | * Remember that this is a volunteer-driven project, and that contributions 55 | are welcome :) 56 | 57 | Get Started! 58 | ------------ 59 | 60 | Ready to contribute? Here's how to set up `airflow_plugins` for local development. 61 | 62 | 1. Fork the `airflow_plugins` repo on GitHub. 63 | 2. Clone your fork locally:: 64 | 65 | $ git clone git@github.com:your_name_here/airflow-plugins.git 66 | 67 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: 68 | 69 | $ mkvirtualenv airflow-plugins 70 | $ cd airflow-plugins/ 71 | $ python setup.py develop 72 | 73 | 4. Create a branch for local development:: 74 | 75 | $ git checkout -b name-of-your-bugfix-or-feature 76 | 77 | Now you can make your changes locally. 78 | 79 | 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: 80 | 81 | $ flake8 airflow-plugins tests 82 | $ python setup.py test or py.test 83 | $ tox 84 | 85 | To get flake8 and tox, just pip install them into your virtualenv. 86 | 87 | 6. Commit your changes and push your branch to GitHub:: 88 | 89 | $ git add . 90 | $ git commit -m "Your detailed description of your changes." 91 | $ git push origin name-of-your-bugfix-or-feature 92 | 93 | 7. Submit a pull request through the GitHub website. 94 | 95 | Pull Request Guidelines 96 | ----------------------- 97 | 98 | Before you submit a pull request, check that it meets these guidelines: 99 | 100 | 1. The pull request should include tests. 101 | 2. If the pull request adds functionality, the docs should be updated. Put 102 | your new functionality into a function with a docstring, and add the 103 | feature to the list in README.rst. 104 | 3. The pull request should work for Python 2.6, 2.7, 3.3, 3.4 and 3.5, and for PyPy. Check 105 | https://travis-ci.org/storiesbi/airflow_plugins/pull_requests 106 | and make sure that the tests pass for all supported Python versions. 107 | 108 | Tips 109 | ---- 110 | 111 | To run a subset of tests:: 112 | 113 | $ py.test tests.test_airflow_plugins 114 | 115 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | History 3 | ======= 4 | 5 | 0.1.3 (2018-01-18) 6 | ------------------ 7 | 8 | * First release on PyPI. 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | MIT License 3 | 4 | Copyright (c) 2018, Michael Kuty 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CONTRIBUTING.rst 2 | include HISTORY.rst 3 | include LICENSE 4 | include README.rst 5 | 6 | recursive-include tests * 7 | recursive-exclude * __pycache__ 8 | recursive-exclude * *.py[co] 9 | 10 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | define BROWSER_PYSCRIPT 4 | import os, webbrowser, sys 5 | try: 6 | from urllib import pathname2url 7 | except: 8 | from urllib.request import pathname2url 9 | 10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 11 | endef 12 | export BROWSER_PYSCRIPT 13 | 14 | define PRINT_HELP_PYSCRIPT 15 | import re, sys 16 | 17 | for line in sys.stdin: 18 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 19 | if match: 20 | target, help = match.groups() 21 | print("%-20s %s" % (target, help)) 22 | endef 23 | export PRINT_HELP_PYSCRIPT 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 25 | 26 | help: 27 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 28 | 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 30 | 31 | 32 | clean-build: ## remove build artifacts 33 | rm -fr build/ 34 | rm -fr dist/ 35 | rm -fr .eggs/ 36 | find . -name '*.egg-info' -exec rm -fr {} + 37 | find . -name '*.egg' -exec rm -f {} + 38 | 39 | clean-pyc: ## remove Python file artifacts 40 | find . -name '*.pyc' -exec rm -f {} + 41 | find . -name '*.pyo' -exec rm -f {} + 42 | find . -name '*~' -exec rm -f {} + 43 | find . -name '__pycache__' -exec rm -fr {} + 44 | 45 | clean-test: ## remove test and coverage artifacts 46 | rm -fr .tox/ 47 | rm -f .coverage 48 | rm -fr htmlcov/ 49 | 50 | lint: ## check style with flake8 51 | flake8 airflow_plugins tests 52 | 53 | test: ## run tests quickly with the default Python 54 | py.test 55 | 56 | 57 | test-all: ## run tests on every Python version with tox 58 | tox 59 | 60 | coverage: ## check code coverage quickly with the default Python 61 | coverage run --source airflow_plugins -m pytest 62 | coverage report -m 63 | coverage html 64 | $(BROWSER) htmlcov/index.html 65 | 66 | docs: ## generate Sphinx HTML documentation, including API docs 67 | rm -f docs/airflow_plugins.rst 68 | rm -f docs/modules.rst 69 | sphinx-apidoc -o docs/ airflow_plugins 70 | $(MAKE) -C docs clean 71 | $(MAKE) -C docs html 72 | $(BROWSER) docs/_build/html/index.html 73 | 74 | servedocs: docs ## compile the docs watching for changes 75 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 76 | 77 | release: clean ## package and upload a release 78 | python setup.py sdist upload 79 | python setup.py bdist_wheel upload 80 | 81 | dist: clean ## builds source and wheel package 82 | python setup.py sdist 83 | python setup.py bdist_wheel 84 | ls -l dist 85 | 86 | install: clean ## install the package to the active Python's site-packages 87 | python setup.py install 88 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Airflow Plugins 3 | =============== 4 | 5 | .. image:: https://readthedocs.org/projects/airflow-plugins/badge/?version=latest 6 | :target: https://airflow-plugins.readthedocs.io/en/latest/?badge=latest 7 | :alt: Documentation Status 8 | 9 | 10 | Airflow plugins. 11 | 12 | * Free software: MIT license 13 | * Documentation: https://airflow-plugins.readthedocs.io. 14 | 15 | 16 | Features 17 | -------- 18 | 19 | * Database operations 20 | * Slack operations 21 | * ZIP operations 22 | * Git operations 23 | * File operations 24 | * File sensors 25 | * Cookiecutter operations 26 | * Airflow variables utils 27 | 28 | -------------------------------------------------------------------------------- /airflow_plugins/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import logging 3 | from datetime import timedelta 4 | 5 | __version__ = "0.1.3" 6 | 7 | try: 8 | from airflow.configuration import get 9 | from raven.contrib.celery import register_signal, register_logger_signal 10 | from raven.base import Client 11 | except ImportError: 12 | pass 13 | else: 14 | try: 15 | dsn = get("core", "sentry_dsn") 16 | except Exception as e: 17 | pass 18 | else: 19 | client = Client(dsn=dsn) 20 | 21 | # hook into the Celery error handler 22 | register_signal(client) 23 | 24 | register_logger_signal(client, loglevel=logging.ERROR) 25 | 26 | 27 | DEFAULT_RETRIES = 1 28 | DEFAULT_RETRY_DELAY = timedelta(0, 60) 29 | -------------------------------------------------------------------------------- /airflow_plugins/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .ftp_hook import FTPHook 2 | 3 | __all__ = ['FTPHook'] 4 | -------------------------------------------------------------------------------- /airflow_plugins/hooks/ftp_hook.py: -------------------------------------------------------------------------------- 1 | from airflow.contrib.hooks.ftp_hook import FTPHook as FTPHookBase 2 | 3 | 4 | class FTPHook(FTPHookBase): 5 | 6 | def get_conn(self): 7 | super(FTPHook, self).get_conn() 8 | 9 | params = self.get_connection(self.ftp_conn_id) 10 | pasv = params.extra_dejson.get("passive", True) 11 | self.conn.set_pasv(pasv) 12 | 13 | return self.conn 14 | -------------------------------------------------------------------------------- /airflow_plugins/operators/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BashOperator, ExecutableOperator, FileOperator 2 | from .csv import CSVSQL, CSVLook, CSVStats, CSVtoDB, DBtoCSV, SplitCSVtoDB 3 | from .db import ( 4 | ChangeDatabaseName, 5 | CreateDatabase, 6 | CreateTableWithColumns, 7 | DropDatabase, 8 | PostgresOperator 9 | ) 10 | from .defer import DeferOperator 11 | from .files import ( 12 | DeleteFile, 13 | DownloadFile, 14 | DynamicDeleteFile, 15 | DynamicDownloadFile, 16 | DynamicUploadFile, 17 | UploadFile 18 | ) 19 | from .run_evaluation import RunEvaluationOperator 20 | from .sensors import FileSensor, FTPDirSensor, TaskRuntimeSensor 21 | from .slack import Message, SlackMessageSensor 22 | from .zip import UnzipOperator, ZipOperator 23 | 24 | OPERATORS = [ 25 | BashOperator, ChangeDatabaseName, CreateDatabase, 26 | CreateTableWithColumns, CSVLook, CSVSQL, CSVStats, CSVtoDB, DBtoCSV, 27 | DeferOperator, DeleteFile, DownloadFile, 28 | DropDatabase, DynamicDeleteFile, DynamicDownloadFile, DynamicUploadFile, 29 | ExecutableOperator, FileOperator, FileSensor, FTPDirSensor, 30 | Message, PostgresOperator, 31 | RunEvaluationOperator, 32 | SlackMessageSensor, SplitCSVtoDB, 33 | TaskRuntimeSensor, UnzipOperator, UploadFile, ZipOperator, 34 | ] 35 | -------------------------------------------------------------------------------- /airflow_plugins/operators/base.py: -------------------------------------------------------------------------------- 1 | import re 2 | from urllib.parse import urlparse 3 | 4 | from airflow.models import BaseOperator 5 | from airflow.operators.bash_operator import BashOperator as BashOperatorBase 6 | from airflow.operators.postgres_operator import \ 7 | PostgresOperator as PostgresOperatorBase 8 | from airflow.utils.decorators import apply_defaults 9 | 10 | from airflow_plugins import utils 11 | 12 | 13 | class ExecutableOperator(BaseOperator): 14 | """ 15 | Simple wrapper around command line executable programs with helper 16 | functions to add options, flags and arguments. 17 | """ 18 | bash_command = "" 19 | 20 | def add_flag(self, flag_name): 21 | """Add boolean flag option used as enabled or disabled state""" 22 | self.bash_command += " {0}".format(flag_name) 23 | 24 | def add_option(self, option_name, value): 25 | """Add option to command""" 26 | if value is "" or value is None: 27 | return 28 | 29 | if isinstance(value, str) and '--' in value: 30 | options = " {0} {1}".format(option_name, value) 31 | else: 32 | options = ' {0} "{1}"'.format(option_name, value) 33 | options = re.sub('\s+', ' ', options) 34 | self.bash_command += options 35 | 36 | 37 | class BashOperator(BashOperatorBase): 38 | 39 | """Bash Operator 40 | """ 41 | 42 | bash_command = None 43 | 44 | @apply_defaults 45 | def __init__(self, bash_command=None, *args, **kwargs): 46 | super(BashOperator, self).__init__( 47 | bash_command=bash_command or self.bash_command, *args, **kwargs) 48 | 49 | 50 | class PostgresOperator(PostgresOperatorBase): 51 | 52 | """Run SQL on Postgresql based systems. 53 | """ 54 | 55 | sql = None 56 | 57 | @apply_defaults 58 | def __init__(self, sql=None, *args, **kwargs): 59 | super(PostgresOperator, self).__init__( 60 | sql=sql or self.sql, *args, **kwargs) 61 | 62 | 63 | class FileOperator(BaseOperator): 64 | 65 | @staticmethod 66 | def _split_path(path): 67 | parsed = urlparse(path) 68 | scheme = parsed.scheme 69 | netloc = parsed.netloc if scheme else None 70 | path = parsed.path if scheme else path 71 | return (scheme, netloc, path) 72 | 73 | def _get_ftp_path(self, path): 74 | return self._split_path(path)[-1] 75 | 76 | def _get_s3_path(self, path): 77 | bucket, key = self._split_path(path)[1:] 78 | bucket = bucket or 'storiesbi-datapipeline' 79 | return (bucket, key) 80 | 81 | def pre_execute(self, context): 82 | params = context['params'] 83 | for param in ['local_path', 'remote_path']: 84 | setattr(self, param, params.get(param)) 85 | 86 | conn_id = None 87 | if hasattr(self, 'conn_id'): 88 | conn_id = self.conn_id 89 | 90 | if not conn_id: 91 | conn_params = ['conn_id', 'remote_connection'] 92 | for conn_param in conn_params: 93 | conn_id = params.get(conn_param) 94 | if conn_id: 95 | break 96 | 97 | if not conn_id: 98 | path_attrs = ['path', 'remote_path'] 99 | for path_attr in path_attrs: 100 | if hasattr(self, path_attr): 101 | path = getattr(self, path_attr) 102 | if path: 103 | engine, target = self._split_path(path)[:2] 104 | if engine == 'ftp': 105 | conn_id = target 106 | elif engine == 's3': 107 | conn_id = 's3.stories.bi' 108 | break 109 | 110 | conn = utils.get_connection(conn_id) 111 | self.conn_id = conn_id 112 | self.conn = conn 113 | -------------------------------------------------------------------------------- /airflow_plugins/operators/cookiecutter.py: -------------------------------------------------------------------------------- 1 | from airflow_plugins.operators import BashOperator 2 | 3 | 4 | class CookiecutterOperator(BashOperator): 5 | 6 | """Run cookiecutter as operator.""" 7 | 8 | bash_command = """ 9 | cookiecutter {{ params.source_path }} -o {{ params.output_path }} \ 10 | {%- for key, value in params.options.items() %} 11 | {{ key }}={{ value }} \ 12 | {%- endfor %} 13 | """ 14 | -------------------------------------------------------------------------------- /airflow_plugins/operators/csv.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from airflow_plugins.operators import BashOperator 5 | 6 | 7 | class CSVLook(BashOperator): 8 | 9 | """Get stats of the CSV file""" 10 | 11 | bash_command = """ 12 | csvlook {{ params.extra }} {{ params.path }} 13 | """ 14 | 15 | 16 | class CSVSQL(BashOperator): 17 | 18 | """Use csvsql tool for migration CSV to SQL. 19 | For more parameters check csvsql.""" 20 | 21 | bash_command = """ 22 | csvsql {{ params.extra }} {{ params.path }} 23 | """ 24 | 25 | 26 | class CSVtoDB(BashOperator): 27 | 28 | """Use csvsql tool for migration csv to SQL database. 29 | For more parameters check csvsql.""" 30 | 31 | bash_command = """ 32 | csvsql {{ params.extra }} \ 33 | {%- if params.db %} --db="{{ params.db }}/{{ params.company|lower }} \ 34 | {%- if params.company %}_{%- endif %}{{ params.database_name }}" \ 35 | --no-inference -y 200 --insert --tables {{ params.get("table_name", "import") }} \ 36 | {%- endif %} {{ params.local_path }} 37 | """ # noqa 38 | 39 | 40 | class DBtoCSV(BashOperator): 41 | 42 | bash_command = """ 43 | sql2csv {{ params.extra }} --query "{{ params.query }}" \ 44 | {%- if params.db %} --db="{{ params.db }}/{{ params.company|lower }} \ 45 | {%- if params.company %}_{%- endif %}{{ params.database_name }}" \ 46 | {%- endif %} > {{ params.output_path_temp }} 47 | """ # noqa 48 | 49 | 50 | class CSVStats(BashOperator): 51 | 52 | """Get stats of the CSV file 53 | Use csvstat. 54 | """ 55 | 56 | bash_command = """ 57 | csvstat {{ params.extra }} {{ params.path }} 58 | """ 59 | 60 | 61 | class SplitCSVtoDB(CSVtoDB): 62 | 63 | """Split CSV and upload to DB. 64 | """ 65 | 66 | @staticmethod 67 | def _split_file(filepath, n): 68 | if n <= 1: 69 | return 70 | 71 | files = [open('{}.{}'.format(filepath, i), mode='w') for i in range(n)] 72 | 73 | with open(filepath, mode='r') as f: 74 | line = f.readline() 75 | for file in files: 76 | # header line 77 | file.write(line) 78 | 79 | i = 0 80 | line = f.readline() 81 | while line: 82 | files[i].write(line) 83 | line = f.readline() 84 | i = (i + 1) % n 85 | 86 | for file in files: 87 | file.close() 88 | 89 | @staticmethod 90 | def _determine_splits(filepath): 91 | size = os.stat(filepath).st_size 92 | # splits as hundreds of megabytes 93 | splits = size // (100 * 1000 * 1000) + 1 94 | logging.info('File size: {} bytes ==> {} splits'.format( 95 | size, splits if splits > 1 else 'no')) 96 | return splits 97 | 98 | def pre_execute(self, context): 99 | filepath = context['params']['local_path'] 100 | self._splits = self._determine_splits(filepath) 101 | try: 102 | self._split_file(filepath, self._splits) 103 | except Exception as e: 104 | self._splits = 0 105 | logging.warning('Splitting the input file failed: {}'.format(e)) 106 | logging.info('Trying to load the whole file.') 107 | if self._splits > 1: 108 | self.bash_command = 'for i in $(seq 0 {}); do {}.$i; done'.format( 109 | self._splits - 1, self.bash_command.strip()) 110 | 111 | def post_execute(self, context): 112 | filepath = context['params']['local_path'] 113 | if self._splits > 1: 114 | for i in range(self._splits): 115 | file = '{}.{}'.format(filepath, i) 116 | try: 117 | os.remove(file) 118 | except Exception as e: 119 | # it's ok, these are just helper files 120 | logging.warning('Unable to delete file' 121 | '{}: {}'.format(file, e)) 122 | -------------------------------------------------------------------------------- /airflow_plugins/operators/db.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | 4 | import psycopg2 5 | import psycopg2.extensions 6 | from airflow.hooks.postgres_hook import PostgresHook as PostgresHookBase 7 | from airflow.operators.postgres_operator import \ 8 | PostgresOperator as PostgresOperatorBase 9 | from airflow.utils.decorators import apply_defaults 10 | from airflow_plugins.operators.base import \ 11 | PostgresOperator as PostgresOperatorStatic 12 | 13 | 14 | class PostgresHook(PostgresHookBase): 15 | 16 | """Tuned PostgreSQL hook which support 17 | running SQL like create database. 18 | Supports silent fail. 19 | """ 20 | 21 | def __init__(self, database=None, fail_silently=False, *args, **kwargs): 22 | super(PostgresHook, self).__init__(*args, **kwargs) 23 | self.fail_silently = fail_silently 24 | self.schema = database 25 | 26 | def get_conn(self): 27 | conn = self.get_connection(self.postgres_conn_id) 28 | conn_args = dict( 29 | host=conn.host, 30 | user=conn.login, 31 | password=conn.password, 32 | dbname=self.schema or conn.schema, 33 | port=conn.port) 34 | # check for ssl parameters in conn.extra 35 | for arg_name, arg_val in conn.extra_dejson.items(): 36 | if arg_name in ['sslmode', 'sslcert', 'sslkey', 37 | 'sslrootcert', 'sslcrl']: 38 | conn_args[arg_name] = arg_val 39 | psycopg2_conn = psycopg2.connect(**conn_args) 40 | if psycopg2_conn.server_version < 70400: 41 | self.supports_autocommit = True 42 | return psycopg2_conn 43 | 44 | def run(self, sql, autocommit=False, parameters=None): 45 | """ 46 | Runs a command or a list of commands. Pass a list of sql 47 | statements to the sql parameter to get them to execute 48 | sequentially 49 | 50 | :param sql: the sql statement to be executed (str) or a list of 51 | sql statements to execute 52 | :type sql: str or list 53 | :param autocommit: What to set the connection's autocommit setting to 54 | before executing the query. 55 | :type autocommit: bool 56 | :param parameters: The parameters to render the SQL query with. 57 | :type parameters: mapping or iterable 58 | """ 59 | conn = self.get_conn() 60 | if isinstance(sql, str): 61 | sql = [sql] 62 | 63 | self.set_autocommit(conn, autocommit) 64 | 65 | cur = conn.cursor() 66 | for s in sql: 67 | logging.info(s) 68 | if parameters is not None: 69 | cur.execute(s, parameters) 70 | else: 71 | if self.fail_silently: 72 | try: 73 | cur.execute(s) 74 | conn.commit() 75 | except Exception as e: 76 | conn.rollback() 77 | logging.exception(e) 78 | else: 79 | cur.execute(s) 80 | conn.commit() 81 | 82 | cur.close() 83 | conn.close() 84 | 85 | 86 | class PostgresOperator(PostgresOperatorBase): 87 | 88 | """PostgreSQL operator which uses PostgresHook""" 89 | 90 | @apply_defaults 91 | def __init__(self, database=None, fail_silently=True, *args, **kwargs): 92 | super(PostgresOperator, self).__init__(*args, **kwargs) 93 | self.fail_silently = fail_silently 94 | self.schema = database 95 | 96 | def pre_execute(self, context): 97 | self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, 98 | database=self.schema, 99 | fail_silently=self.fail_silently) 100 | 101 | def execute(self, context): 102 | logging.info('Executing: ' + str(self.sql)) 103 | self.hook.run(self.sql, self.autocommit, parameters=self.parameters) 104 | 105 | 106 | class CreateDatabase(PostgresOperatorStatic): 107 | 108 | """Operator which creates database in PostgreSQL.""" 109 | 110 | _sql = [ 111 | "CREATE DATABASE {{ params.database_name }};", # keep create db at top 112 | "GRANT ALL PRIVILEGES ON DATABASE {{ params.database_name }} " 113 | "TO {{ params.user }};", # set user in pre_execute if not in params 114 | ] 115 | 116 | def pre_execute(self, context): 117 | params = context['params'] 118 | company = params.get('company') 119 | if company is not None: 120 | db_name = params['database_name'] 121 | self.params['database_name'] = company.lower() + '_' + db_name 122 | 123 | hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) 124 | conn = hook.get_connection(self.postgres_conn_id) 125 | if conn is not None: 126 | user = params.get('user', conn.login) 127 | if user == conn.login: 128 | self.params['user'] = user 129 | else: 130 | for item in reversed([ 131 | "CREATE USER {{ params.user }} " 132 | "WITH PASSWORD '{{ params.password }}';", 133 | "ALTER ROLE {{ params.user }} " 134 | "SET client_encoding TO 'utf8';", 135 | "ALTER ROLE {{ params.user }} " 136 | "SET default_transaction_isolation TO 'read committed';", 137 | "ALTER ROLE {{ params.user }} SET timezone TO 'UTC';", 138 | ]): 139 | self._sql.insert(1, item) 140 | 141 | self.sql = self._sql 142 | context['ti'].render_templates() 143 | 144 | def execute(self, context): 145 | sqls = (self.sql[0:1], self.sql[1:]) 146 | logging.info('Executing: ' + str(self.sql)) 147 | 148 | self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, 149 | fail_silently=True) # fails if db exists 150 | self.hook.run(sqls[0], self.autocommit, parameters=self.parameters) 151 | self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, 152 | fail_silently=False) # should not fail 153 | if len(sqls[1]) > 0: 154 | self.hook.run(sqls[1], self.autocommit, parameters=self.parameters) 155 | 156 | 157 | class DropDatabase(PostgresOperatorStatic): 158 | 159 | """Drop database operator.""" 160 | 161 | sql = [ 162 | "DROP DATABASE {{ params.company|lower }}_{{ params.database_name }};", 163 | ] 164 | 165 | def execute(self, context): 166 | logging.info('Executing: ' + str(self.sql)) 167 | 168 | self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, 169 | fail_silently=True) 170 | self.hook.run(self.sql, self.autocommit, parameters=self.parameters) 171 | 172 | 173 | class ChangeDatabaseName(PostgresOperatorStatic): 174 | 175 | """Rename database in operator.""" 176 | 177 | sql = [ 178 | "ALTER DATABASE {{ params.company|lower }}_{{ params.database_name }} " 179 | "RENAME TO {{ params.company|lower }}_{{ params.database_name }}" 180 | "_{{ ts_nodash[:15] }};", 181 | ] 182 | 183 | def execute(self, context): 184 | logging.info('Executing: ' + str(self.sql)) 185 | 186 | self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, 187 | fail_silently=False) 188 | self.hook.run(self.sql, autocommit=True, 189 | parameters=self.parameters) 190 | 191 | 192 | class CreateTableWithColumns(PostgresOperator): 193 | 194 | """Create database with columns.""" 195 | 196 | _sql = [ 197 | "DROP TABLE IF EXISTS {{ params.table_name }};", 198 | "CREATE TABLE {{ params.table_name }} ({{ params.table_columns }});" 199 | ] 200 | 201 | @apply_defaults 202 | def __init__(self, *args, **kwargs): 203 | super(CreateTableWithColumns, self).__init__(sql=self._sql, 204 | *args, **kwargs) 205 | 206 | @classmethod 207 | def _parse_extra_args(cls, args): 208 | parsed = [] 209 | if not args: 210 | return parsed 211 | if not isinstance(args, list): 212 | args = args.strip().split() 213 | 214 | def add_stripped(*values): 215 | for val in values: 216 | if not val: 217 | continue 218 | else: 219 | while (len(val) > 1 220 | and val[0] == val[-1] 221 | and val[0] in ['"', "'"]): 222 | val = val[1:-1] 223 | if val: 224 | parsed.append(val) 225 | 226 | for arg in args: 227 | if re.match(r'-[-\w]+=', arg): 228 | argsplit = arg.split('=') 229 | opt = argsplit[0] 230 | val = '='.join(argsplit[1:]) 231 | add_stripped(opt, val) 232 | else: 233 | add_stripped(arg) 234 | 235 | return parsed 236 | 237 | @classmethod 238 | def _get_table_columns(cls, csv_file_path, extra=None): 239 | from io import StringIO 240 | from csvkit.utilities.csvcut import CSVCut 241 | output = StringIO() 242 | extra_args = cls._parse_extra_args(extra) if extra else [] 243 | args = ['-n', *extra_args, csv_file_path] 244 | print(args) 245 | csvcut = CSVCut(args=args) 246 | csvcut.output_file = output 247 | csvcut.run() 248 | csv_columns = [ 249 | col.split(': ')[1] 250 | for col in output.getvalue().splitlines() 251 | ] 252 | table_columns = [ 253 | '"{}"'.format(col) 254 | if (col != col.lower() or ' ' in col) else col 255 | for col in csv_columns 256 | ] 257 | return table_columns 258 | 259 | def pre_execute(self, context): 260 | if context['params'].get('table_columns') is None: 261 | columns = self._get_table_columns(context['params']['csv_file'], 262 | context['params'].get('extra')) 263 | self.params['table_columns'] = ', '.join([ 264 | '{} TEXT'.format(col) for col in columns 265 | ]) 266 | self.sql = self._sql 267 | context['ti'].render_templates() 268 | super(CreateTableWithColumns, self).pre_execute(context) 269 | -------------------------------------------------------------------------------- /airflow_plugins/operators/defer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | from airflow.models import BaseOperator 5 | from airflow.utils.decorators import apply_defaults 6 | from airflow.utils.state import State 7 | 8 | 9 | class DeferOperator(BaseOperator): 10 | 11 | """Check pipeline.""" 12 | 13 | @apply_defaults 14 | def __init__(self, check_delay=5.0, *args, **kwargs): 15 | super(DeferOperator, self).__init__(*args, **kwargs) 16 | self.check_delay = check_delay 17 | self.deferred_task_ids = [] 18 | 19 | def _tasks_finished(self, dag_run): 20 | for ti in dag_run.get_task_instances(): 21 | if ti.task_id in self.deferred_task_ids: 22 | continue 23 | 24 | if ti.state in State.unfinished(): 25 | logging.info("Deferred tasks are not yet executable. Found " 26 | "unfinished task `{}`.".format(ti.task_id)) 27 | return False 28 | 29 | return True 30 | 31 | def pre_execute(self, context): 32 | self.deferred_task_ids.append(context['task'].task_id) 33 | for task in context['task'].get_flat_relatives(): 34 | self.deferred_task_ids.append(task.task_id) 35 | 36 | def execute(self, context): 37 | while not self._tasks_finished(context['dag_run']): 38 | logging.debug("Next check in {} s.".format(self.check_delay)) 39 | time.sleep(self.check_delay) 40 | 41 | logging.info("Start executing deferred tasks: `{}`." 42 | .format(", ".join(self.deferred_task_ids))) 43 | -------------------------------------------------------------------------------- /airflow_plugins/operators/files.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from airflow.exceptions import AirflowException 4 | from airflow.hooks.S3_hook import S3Hook 5 | from airflow_plugins.hooks import FTPHook 6 | from airflow_plugins.operators import FileOperator 7 | 8 | 9 | class DynamicTargetFile(FileOperator): 10 | 11 | """Dynamic target file operator""" 12 | 13 | def pre_execute(self, context): 14 | params = context['params'] 15 | for target in ['local_path', 'remote_path']: 16 | value = context['ti'].xcom_pull(task_ids=None, key=target) 17 | if value is not None: 18 | # update context params for base pre_execute 19 | params[target] = value 20 | super(DynamicTargetFile, self).pre_execute(context) 21 | 22 | 23 | class DownloadFile(FileOperator): 24 | """Download file operator.""" 25 | 26 | def execute(self, context): 27 | logging.info( 28 | "Downloading %s to %s" % (self.remote_path, self.local_path)) 29 | 30 | if self.conn and self.conn.conn_type == "ftp": 31 | hook = FTPHook(self.conn_id) 32 | path = self._get_ftp_path(self.remote_path) 33 | hook.retrieve_file(path, self.local_path) 34 | 35 | elif self.conn and self.conn.conn_type == "s3": 36 | hook = S3Hook(self.conn_id) 37 | bucket, key = self._get_s3_path(self.remote_path) 38 | fileobj = hook.get_bucket(bucket).get_key(key) 39 | fileobj.get_contents_to_filename(self.local_path) 40 | 41 | else: 42 | raise AirflowException('Connection: {}'.format(self.conn_id)) 43 | 44 | 45 | class DynamicDownloadFile(DownloadFile, DynamicTargetFile): 46 | """Dynamic download file operator.""" 47 | pass 48 | 49 | 50 | class UploadFile(FileOperator): 51 | """Upload file operator.""" 52 | 53 | def execute(self, context): 54 | logging.info( 55 | "Uploading %s to %s" % (self.local_path, self.remote_path)) 56 | 57 | if self.conn and self.conn.conn_type == "ftp": 58 | hook = FTPHook(self.conn_id) 59 | path = self._get_ftp_path(self.remote_path) 60 | hook.store_file(path, self.local_path) 61 | 62 | elif self.conn and self.conn.conn_type == "s3": 63 | hook = S3Hook(self.conn_id) 64 | bucket, key = self._get_s3_path(self.remote_path) 65 | hook.load_file(self.local_path, key, bucket, replace=True) 66 | 67 | else: 68 | raise AirflowException('Connection: {}'.format(self.conn_id)) 69 | 70 | 71 | class DynamicUploadFile(UploadFile, DynamicTargetFile): 72 | """Dynamic upload file operator.""" 73 | 74 | 75 | class DeleteFile(FileOperator): 76 | """Delete file operator.""" 77 | 78 | def execute(self, context): 79 | logging.info("Deleting %s" % self.remote_path) 80 | 81 | if self.conn and self.conn.conn_type == "ftp": 82 | hook = FTPHook(self.conn_id) 83 | path = self._get_ftp_path(self.remote_path) 84 | hook.delete_file(path) 85 | 86 | elif self.conn and self.conn.conn_type == "s3": 87 | raise NotImplementedError( 88 | 'Storage engine: {}'.format(self.conn.conn_type)) 89 | 90 | else: 91 | raise AirflowException('Connection: {}'.format(self.conn_id)) 92 | 93 | 94 | class DynamicDeleteFile(DeleteFile, DynamicTargetFile): 95 | """Dynamic delete file operator.""" 96 | pass 97 | -------------------------------------------------------------------------------- /airflow_plugins/operators/git.py: -------------------------------------------------------------------------------- 1 | from airflow_plugins.operators import BashOperator 2 | 3 | 4 | class GitOperator(BashOperator): 5 | """Base Git operator.""" 6 | 7 | template_fields = ('bash_command', 'env', 'options') 8 | 9 | bash_command = """ 10 | git {{ params.action }} {{ params.options }} 11 | """ 12 | 13 | options = "" 14 | 15 | def __init__(self, *args, **kwargs): 16 | 17 | if 'params' in kwargs: 18 | kwargs['params'].update({'action': self.action, 19 | 'options': self.options}) 20 | 21 | super(BashOperator, self).__init__(*args, **kwargs) 22 | 23 | 24 | class GitClone(GitOperator): 25 | """Git clone operator.""" 26 | 27 | action = "clone" 28 | 29 | options = """ 30 | {%- if params.source_path %}{{ params.source_path }} \{%- endif %} 31 | {%- if params.output_path %} {{ params.output_path }}{%- endif %} 32 | """ 33 | 34 | 35 | class GitCommit(GitOperator): 36 | """Git commit operator.""" 37 | 38 | action = "commit" 39 | 40 | bash_command = """ 41 | cd {{ params.source_path }}; git {{ params.action }} {{ params.options }} 42 | """ 43 | 44 | options = """-m '{{ params.message }}'""" 45 | 46 | 47 | class GitPush(GitOperator): 48 | """Git push operator.""" 49 | 50 | action = "push" 51 | 52 | bash_command = """ 53 | cd {{ params.source_path }}; git {{ params.action }} {{ params.options }} 54 | """ 55 | -------------------------------------------------------------------------------- /airflow_plugins/operators/mixins.py: -------------------------------------------------------------------------------- 1 | from airflow import AirflowException 2 | from airflow.utils.state import State 3 | 4 | 5 | class ShutdownOnDependencyMissOperator: 6 | dependant_tasks = [] 7 | 8 | def set_dependant_tasks(self, dependant_tasks=None): 9 | if not dependant_tasks: 10 | dependant_tasks = [] 11 | 12 | self.dependant_tasks = dependant_tasks 13 | 14 | def pre_execute(self, context): 15 | failed = [ 16 | State.SHUTDOWN, 17 | State.FAILED, 18 | State.SKIPPED, 19 | State.UPSTREAM_FAILED, 20 | ] 21 | 22 | for task in self.dependant_tasks: 23 | ti = context['dag_run'].get_task_instance(task) 24 | if not ti or ti.state in failed: 25 | raise AirflowException("Task `{}` failed, skipping...".format( 26 | task 27 | )) 28 | -------------------------------------------------------------------------------- /airflow_plugins/operators/run_evaluation.py: -------------------------------------------------------------------------------- 1 | from airflow.models import BaseOperator 2 | from airflow.utils.state import State 3 | 4 | 5 | class RunEvaluationOperator(BaseOperator): 6 | """Check the pipeline status.""" 7 | 8 | def execute(self, context): 9 | if not context['dag_run']: 10 | return 11 | 12 | dag_run = context['dag_run'] 13 | tis = dag_run.get_task_instances() 14 | failed_tis = [] 15 | for ti in tis: 16 | if ti.state == State.FAILED: 17 | failed_tis.append("{}.{}".format(ti.dag_id, ti.task_id)) 18 | 19 | if len(failed_tis) == 0: 20 | return 21 | 22 | tasks_ids = ",".join(failed_tis) 23 | raise RuntimeError( 24 | "Failed tasks instances detected - `{}`.".format(tasks_ids) 25 | ) 26 | -------------------------------------------------------------------------------- /airflow_plugins/operators/sensors/__init__.py: -------------------------------------------------------------------------------- 1 | from .file_sensor import FileSensor, FTPDirSensor 2 | from .task_sensor import TaskRuntimeSensor 3 | 4 | __all__ = ['FileSensor', 'FTPDirSensor', 'TaskRuntimeSensor'] 5 | -------------------------------------------------------------------------------- /airflow_plugins/operators/sensors/file_sensor.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os.path 3 | import time 4 | from datetime import datetime, timedelta 5 | 6 | from airflow.exceptions import ( 7 | AirflowException, 8 | AirflowSensorTimeout, 9 | AirflowSkipException 10 | ) 11 | from airflow.hooks.S3_hook import S3Hook 12 | from airflow.operators.sensors import BaseSensorOperator 13 | from airflow.utils.decorators import apply_defaults 14 | from pytz import timezone 15 | 16 | from airflow_plugins.hooks import FTPHook 17 | from airflow_plugins.operators import FileOperator 18 | from airflow_plugins.operators.slack.notifications import send_notification 19 | 20 | 21 | class FileSensor(BaseSensorOperator, FileOperator): 22 | 23 | """Check file presence on hook""" 24 | 25 | @apply_defaults 26 | def __init__( 27 | self, path, 28 | modified=None, 29 | notify_after=8*60*60, 30 | notify_delta=1*60*60, 31 | conn_id=None, 32 | *args, **kwargs): 33 | super(FileSensor, self).__init__(*args, **kwargs) 34 | 35 | self.path = path 36 | self.modified = modified 37 | self._init_notification_variables(notify_after=notify_after, 38 | notify_delta=notify_delta) 39 | self.conn_id = conn_id 40 | 41 | def _init_notification_variables(self, **kwargs): 42 | self.last_notification = None 43 | for key, val in kwargs.items(): 44 | if isinstance(val, int): 45 | val = timedelta(seconds=val) 46 | setattr(self, key, val) 47 | 48 | def _send_notification(self, context, success=False): 49 | if self.notify_after is None: 50 | return 51 | 52 | ti = context['ti'] 53 | title = ti.task_id.upper() 54 | text_lines = [ 55 | 'Path: {}'.format(self.path), 56 | 'DAG: {}'.format(ti.dag_id), 57 | ] 58 | color = 'warning' 59 | 60 | if success: 61 | color = 'good' 62 | title += ' finally' * (self.last_notification is not None) 63 | title += ' succeeded :white_check_mark:' 64 | text = '\n'.join(text_lines) 65 | logging.info('Sending notification about exit.') 66 | send_notification(ti.get_dagrun(), text, title, color) 67 | return 68 | 69 | runtime = datetime.now() - ti.start_date 70 | if runtime >= self.notify_after: 71 | if (self.last_notification is None or 72 | runtime >= self.last_notification + self.notify_delta): 73 | title += ' is still waiting :redsiren:' 74 | runtime_str = str(runtime).split('.')[0] 75 | text = 'Still not finished after {}'.format(runtime_str) 76 | text = '\n'.join([text, *text_lines]) 77 | logging.info('Sending notification about runtime.') 78 | send_notification(ti.get_dagrun(), text, title, color) 79 | self.last_notification = runtime 80 | 81 | def pre_execute(self, context): 82 | FileOperator.pre_execute(self, context) 83 | 84 | if self.modified is None: 85 | self.modified = context['ti'].start_date 86 | 87 | def floor_datetime(dt, precision): 88 | dt_items = ('microsecond', 'second', 'minute', 'hour', 'day') 89 | to_replace = dt_items[:dt_items.index(precision)] 90 | replaced = {item: 0 for item in to_replace} 91 | return dt.replace(**replaced) 92 | 93 | dt = context['ti'].start_date 94 | dt_day = floor_datetime(dt, 'day') 95 | 96 | if isinstance(self.modified, str): 97 | # H / D / W / M / A <==> start of: 98 | # hour / day / week / month / anytime 99 | modkey = self.modified[0].upper() 100 | for precision in ['hour', 'day']: 101 | if modkey == precision[0].upper(): 102 | self.modified = floor_datetime(dt, precision) 103 | break 104 | else: 105 | if modkey == 'W': # week start (Monday) 106 | self.modified = dt_day - timedelta(days=dt_day.weekday()) 107 | elif modkey == 'M': # month start (1st day) 108 | self.modified = dt_day - timedelta(days=(dt_day.day - 1)) 109 | elif modkey == 'A': # anytime -- exists 110 | self.modified = None 111 | else: 112 | raise AirflowException('Unable to devise modified time: {}' 113 | ' (supported: H / D / W / M / A)' 114 | .format(self.modified)) 115 | 116 | elif isinstance(self.modified, int): 117 | modkey = self.modified 118 | if modkey < 0: 119 | # subtract given number of days 120 | daydiff = abs(modkey) 121 | self.modified = dt_day - timedelta(days=daydiff) 122 | else: # modkey >= 0 123 | # 1-7 <==> last Monday-Sunday (goes at most 6 days back) 124 | if not (1 <= self.modified <= 7): 125 | raise AirflowException('Unable to devise modified time: {}' 126 | ' (a weekday number expected, 1-7)' 127 | .format(self.modified)) 128 | 129 | weekday = dt.weekday() + 1 # datetime weekdays 0-6 130 | daydiff = (weekday - modkey) % 7 131 | self.modified = dt_day - timedelta(days=daydiff) 132 | 133 | def execute(self, context): 134 | started_at = datetime.now() 135 | while True: 136 | poke_result = self.poke(context) 137 | if poke_result: 138 | break 139 | if (datetime.now() - started_at).total_seconds() > self.timeout: 140 | timeout_msg = 'Snap. Time is OUT.' 141 | if self.soft_fail: 142 | raise AirflowSkipException(timeout_msg) 143 | else: 144 | raise AirflowSensorTimeout(timeout_msg) 145 | else: 146 | self._send_notification(context, success=False) 147 | time.sleep(self.poke_interval) 148 | if self.last_notification is not None: 149 | # notify about success in case of previous warnings 150 | self._send_notification(context, success=True) 151 | logging.info('Success criteria met. Exiting.') 152 | return poke_result 153 | 154 | def poke(self, context): 155 | logging.info( 156 | 'Poking for file: {} in {}'.format(self.path, self.conn_id)) 157 | 158 | if not self.conn: 159 | raise AirflowException( 160 | "Connection not found: `{}`".format(self.conn_id)) 161 | 162 | if self.conn.conn_type not in ["ftp", "s3"]: 163 | raise NotImplementedError( 164 | "Unsupported engine: `{}`".format(self.conn.conn_type)) 165 | 166 | if self.conn.conn_type == "ftp": 167 | hook = FTPHook(self.conn_id) 168 | try: 169 | path = self._get_ftp_path(self.path) 170 | last_modified = hook.get_mod_time(path) 171 | except Exception as e: 172 | msg = ('Error getting file modification time: {} ' 173 | '(The file most likely does not exist)' 174 | .format(e)) 175 | if self.modified: 176 | # looking for a new version of the file 177 | raise AirflowException(msg) 178 | else: 179 | # waiting for the file to appear 180 | logging.warning(msg) 181 | return False 182 | 183 | elif self.conn.conn_type == "s3": 184 | hook = S3Hook(self.conn_id) 185 | bucket, key = self._get_s3_path(self.path) 186 | fileobj = hook.get_bucket(bucket).get_key(key) 187 | 188 | if not fileobj: 189 | msg = 'The file does not exist' 190 | if self.modified: 191 | # looking for a new version of the file 192 | raise AirflowException(msg) 193 | else: 194 | # waiting for the file to appear 195 | logging.info(msg) 196 | return False 197 | 198 | def get_last_modified(fileobj): 199 | timestamp = fileobj.last_modified 200 | tformat = '%a, %d %b %Y %H:%M:%S %Z' 201 | dt = datetime.strptime(timestamp, tformat) 202 | t = time.strptime(timestamp, tformat) 203 | 204 | try: 205 | tz = timezone(t.tm_zone) 206 | except AttributeError: # tm_zone not set on t 207 | return dt 208 | else: 209 | dt_local = dt.replace(tzinfo=tz).astimezone() 210 | return dt_local.replace(tzinfo=None) 211 | 212 | last_modified = get_last_modified(fileobj) 213 | 214 | if not self.modified: 215 | logging.info('File found, last modified: {}' 216 | .format(last_modified.isoformat())) 217 | return last_modified 218 | 219 | logging.info( 220 | "File last modified: {} (checking for {})".format( 221 | last_modified.isoformat(), 222 | self.modified.isoformat())) 223 | 224 | if last_modified > self.modified: 225 | return last_modified 226 | else: 227 | return False 228 | 229 | 230 | class FTPDirSensor(FileSensor): 231 | 232 | def pre_execute(self, context): 233 | super(FTPDirSensor, self).pre_execute(context) 234 | self.dirpath = self.path 235 | 236 | def poke(self, context): 237 | hook = FTPHook(self.conn_id) 238 | dirpath = self._get_ftp_path(self.dirpath) 239 | files = hook.list_directory(dirpath) 240 | if len(files) == 0: 241 | logging.info('Directory {} is empty'.format(self.dirpath)) 242 | return False 243 | else: 244 | filepaths = [os.path.join(self.dirpath, f) for f in files] 245 | filemodts = { 246 | f: hook.get_mod_time(self._get_ftp_path(f)) 247 | for f in filepaths 248 | } 249 | self.path = sorted(filepaths, key=lambda f: filemodts[f])[-1] 250 | return super(FTPDirSensor, self).poke(context) 251 | 252 | def post_execute(self, context): 253 | super(FTPDirSensor, self).post_execute(context) 254 | context['ti'].xcom_push(key='remote_path', value=self.path) 255 | -------------------------------------------------------------------------------- /airflow_plugins/operators/sensors/task_sensor.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pickle 3 | from datetime import datetime, timedelta 4 | 5 | from airflow.models import DagModel, TaskInstance 6 | from airflow.operators.sensors import BaseSensorOperator 7 | from airflow.settings import Session 8 | from airflow.utils.decorators import apply_defaults 9 | from airflow.utils.state import State 10 | 11 | from airflow_plugins.operators.slack.notifications import send_notification 12 | 13 | 14 | class TaskRuntimeSensor(BaseSensorOperator): 15 | 16 | """ 17 | Checks whether particular tasks are still running 18 | after a period of time and notify about them if so 19 | 20 | :param notify_after: Start sending notifications after given number 21 | of seconds (of runtime) 22 | :type notify_after: int (or timedelta) 23 | :param notify_delta: Time interval between successive notifications 24 | in seconds, defaults to one hour (60*60 seconds) 25 | :type notify_delta: int (or timedelta) 26 | :param start_wait: Wait at start for at least given number of seconds 27 | for tasks to be registered (set if this op runs continuously) 28 | :type start_wait: int (or timedelta) 29 | :param dag_ids: List of dag_ids determining target task instances, 30 | can be set as a mask (e.g. "kiwi_master" for all kiwi master dags) 31 | :type dag_ids: list 32 | :param task_ids: List of task_ids determining target task instances 33 | :type task_ids: list 34 | :param operator_ids: List of operators determining target task instances 35 | :type operator_ids: list 36 | :param include_subdags: Whether to include subdags of target dags (dag_ids) 37 | (i.e. "kiwi_master" to also match "kiwi_master.storyteller" tasks), 38 | default True (always True if dag_ids not set) 39 | :type include_subdags: bool 40 | :param check_execution_time: Whether to check task instance execution time, 41 | or wall clock time (time elapsed from midnight), default True 42 | :type check_execution_time: bool 43 | """ 44 | 45 | @apply_defaults 46 | def __init__( 47 | self, 48 | notify_after, 49 | notify_delta=60*60, 50 | start_wait=0, 51 | dag_ids=None, 52 | task_ids=None, 53 | operator_ids=None, 54 | include_subdags=True, 55 | check_execution_time=True, 56 | *args, **kwargs): 57 | 58 | super(TaskRuntimeSensor, self).__init__(*args, **kwargs) 59 | 60 | if dag_ids is None and task_ids is None and operator_ids is None: 61 | raise ValueError( 62 | 'Provide at least one of `dag_ids`, `task_ids`, `operator_ids`' 63 | ' to determine the task instances to check.') 64 | 65 | self.include_subdags = include_subdags 66 | self.check_execution_time = check_execution_time 67 | self._init_target_variables(dag_ids=dag_ids, 68 | task_ids=task_ids, 69 | operator_ids=operator_ids) 70 | self._init_notification_variables(notify_after=notify_after, 71 | notify_delta=notify_delta, 72 | start_wait=start_wait) 73 | 74 | def _init_target_variables(self, **kwargs): 75 | for key, val in kwargs.items(): 76 | if val is not None: 77 | val = [_id.strip() for _id in val.split(',') if _id.strip()] 78 | setattr(self, key, val) 79 | 80 | def _init_notification_variables(self, **kwargs): 81 | self.last_notifications = {} # ti-specific last notification time 82 | for key, val in kwargs.items(): 83 | if isinstance(val, int): 84 | val = timedelta(seconds=val) 85 | setattr(self, key, val) 86 | 87 | def _get_target_dags(self): 88 | session = Session() 89 | active_dags = session.query(DagModel.dag_id).filter( 90 | DagModel.is_paused.is_(False)).all() 91 | if self.dag_ids is None: 92 | target_dags = active_dags # subdags always included 93 | else: 94 | target_dags = [ 95 | dag_id for dag_id in active_dags 96 | if True in [ 97 | dag_id.startswith(dag_id_mask) 98 | for dag_id_mask in self.dag_ids 99 | if (self.include_subdags 100 | or dag_id.count('.') == dag_id_mask.count('.')) 101 | ] 102 | ] 103 | return target_dags 104 | 105 | @staticmethod 106 | def _get_task_instance(key): 107 | TI = TaskInstance 108 | session = Session() 109 | # filter via key should uniquely indentify the instance 110 | tis = session.query(TI).filter(TI.dag_id == key[0], 111 | TI.task_id == key[1], 112 | TI.execution_date == key[2]) 113 | return tis.first() # returns None if no such TI found 114 | 115 | def _send_notification(self, ti, ti_key, finished=False): 116 | title = ti.task_id.upper() 117 | text = 'DAG: {}'.format(ti.dag_id) 118 | color = 'warning' 119 | 120 | if finished: 121 | title += ' finally finished (as {})'.format(ti.state) 122 | if ti.state == State.SUCCESS: 123 | title += ' :white_check_mark:' 124 | color = 'good' 125 | else: 126 | runtime = self.last_notifications[ti_key] 127 | title += ' is still running :redsiren:' 128 | text = 'Still not finished after {}'.format( 129 | str(runtime).split('.')[0]) + '\n' + text 130 | 131 | send_notification(ti.get_dagrun(), text, title, color) 132 | 133 | def pre_execute(self, context): 134 | self.save_path = '/tmp/{}.pkl'.format( 135 | '.'.join([self.__class__.__name__, 136 | context['ti'].dag_id, 137 | context['ti'].task_id])) 138 | 139 | try: 140 | with open(self.save_path, mode='rb') as f: 141 | save = pickle.load(f) 142 | if datetime.now() - save['timestamp'] < timedelta(hours=6): 143 | self.last_notifications = save['data'] 144 | except FileNotFoundError as e: 145 | logging.warning('Unable to load previous state: {}'.format(e)) 146 | 147 | for target_name, target in [ 148 | ('DAG', self.dag_ids), 149 | ('Task', self.task_ids), 150 | ('Operator', self.operator_ids), 151 | ]: 152 | logging.info('Poking for {}s: {}'.format( 153 | target_name, 154 | '--all--' if target is None else ', '.join(target) 155 | )) 156 | 157 | logging.info('Start notifying after {}, then periodically after {}' 158 | .format(self.notify_after, self.notify_delta)) 159 | 160 | def post_execute(self, context): 161 | save = { 162 | 'timestamp': datetime.now(), 163 | 'data': self.last_notifications, 164 | } 165 | try: 166 | with open(self.save_path, mode='wb') as f: 167 | pickle.dump(save, f) 168 | except Exception as e: 169 | logging.warning('Unable to save current state: {}'.format(e)) 170 | 171 | def poke(self, context): 172 | TI = TaskInstance 173 | session = Session() 174 | tis = session.query(TI) 175 | 176 | tis = tis.filter(TI.state.in_(State.unfinished())) 177 | tis = tis.filter(TI.dag_id.in_(self._get_target_dags())) 178 | if self.task_ids: 179 | tis = tis.filter(TI.task_id.in_(self.task_ids)) 180 | if self.operator_ids: 181 | # tis = tis.filter(TI.operator.in_(self.operator_ids)) 182 | pass # operator attribute might be None 183 | 184 | tis = tis.all() 185 | tis = [ti for ti in tis if ti.key != context['ti'].key] # exclude self 186 | tis = [ti for ti in tis if (ti.operator is None 187 | or self.operator_ids is None 188 | or ti.operator in self.operator_ids)] 189 | 190 | if len(tis) == 0 and len(self.last_notifications) == 0: 191 | return datetime.now() >= context['ti'].start_date + self.start_wait 192 | 193 | now = datetime.now() 194 | start_midnight = datetime.combine( 195 | context['ti'].start_date, datetime.min.time()) 196 | 197 | ti_keys = [(ti.dag_id, ti.task_id, ti.execution_date) for ti in tis] 198 | for ti, ti_key in zip(tis, ti_keys): 199 | if self.check_execution_time: 200 | start_date = ti.start_date 201 | else: 202 | start_date = start_midnight 203 | 204 | runtime = now - start_date 205 | if runtime >= self.notify_after: 206 | last_notification = self.last_notifications.get(ti_key) 207 | if (last_notification is None or 208 | runtime >= last_notification + self.notify_delta): 209 | self.last_notifications[ti_key] = runtime 210 | self._send_notification(ti, ti_key, finished=False) 211 | 212 | # tis previously notified about but not found anymore -- finished 213 | ti_keys_to_delete = set(self.last_notifications) - set(ti_keys) 214 | for ti_key in ti_keys_to_delete: 215 | ti = self._get_task_instance(ti_key) 216 | if ti is not None: # could be deleted from db (deleted via UI) 217 | self._send_notification(ti, ti_key, finished=True) 218 | del self.last_notifications[ti_key] 219 | 220 | # return len(self.last_notifications) == 0 221 | return True # schedule regularly, always exit 222 | -------------------------------------------------------------------------------- /airflow_plugins/operators/slack/__init__.py: -------------------------------------------------------------------------------- 1 | from .operators import Message 2 | from .sensors import SlackMessageSensor 3 | 4 | __all__ = ['Message', 'SlackMessageSensor'] 5 | -------------------------------------------------------------------------------- /airflow_plugins/operators/slack/hooks.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import logging 4 | 5 | import requests 6 | from airflow.exceptions import AirflowException 7 | from airflow.hooks.base_hook import BaseHook 8 | from airflow_plugins import utils 9 | from slackclient import SlackClient 10 | 11 | 12 | class SlackHook(BaseHook): 13 | 14 | """Slack hook""" 15 | 16 | def __init__(self, 17 | token=None, 18 | method='chat.postMessage', 19 | api_params=None, 20 | channel=None, 21 | username=None, 22 | text=None, 23 | attachments=None, 24 | *args, **kwargs): 25 | 26 | self.token = token or utils.get_variable("SLACK_API_TOKEN") 27 | self.method = method 28 | self.api_params = api_params 29 | self.channel = channel or "airflow_stg" 30 | self.username = username or "Airflow (STG)" 31 | self.text = text 32 | self.attachments = attachments 33 | 34 | super(SlackHook, self).__init__(None, *args, **kwargs) 35 | 36 | @property 37 | def client(self): 38 | 39 | if not hasattr(self, "_client"): 40 | 41 | self._client = SlackClient(self.token) 42 | 43 | return self._client 44 | 45 | def run(self, **kwargs): 46 | """ 47 | SlackAPIOperator calls will not fail even if the call is not 48 | unsuccessful. It should not prevent a DAG from completing in success. 49 | """ 50 | if not self.api_params: 51 | self.construct_api_call_params(**kwargs) 52 | 53 | rc = self.client.api_call(self.method, **self.api_params) 54 | 55 | if not rc['ok']: 56 | logging.error("Slack API call failed ({})".format(rc['error'])) 57 | raise AirflowException( 58 | "Slack API call failed: ({})".format(rc['error'])) 59 | 60 | return rc 61 | 62 | def construct_api_call_params(self, **kwargs): 63 | self.api_params = { 64 | 'channel': self.channel 65 | } 66 | 67 | if self.username: 68 | self.api_params['username'] = self.username 69 | self.api_params['icon_url'] = \ 70 | 'https://raw.githubusercontent.com/airbnb/airflow' \ 71 | '/master/airflow/www/static/pin_100.png' 72 | 73 | if self.text: 74 | self.api_params['text'] = self.text 75 | 76 | if self.attachments: 77 | self.api_params['attachments'] = json.dumps(self.attachments) 78 | 79 | self.api_params.update(**kwargs) 80 | 81 | def get_file_content(self, url): 82 | """Returns file content 83 | """ 84 | 85 | r = requests.get(url, headers={ 86 | 'Authorization': 'Bearer %s' % self.token 87 | }) 88 | 89 | if r.status_code == 200: 90 | return r.text 91 | 92 | def get_channel_id(self, name): 93 | """Returns channel id by name 94 | """ 95 | 96 | rc = self.client.api_call("channels.list") 97 | 98 | for d in rc['channels']: 99 | 100 | if d['name'] == name: 101 | return d['id'] 102 | -------------------------------------------------------------------------------- /airflow_plugins/operators/slack/notifications.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from urllib.parse import urlencode 3 | 4 | from airflow_plugins import utils 5 | from slackclient import SlackClient 6 | 7 | 8 | def _compose_title_url(dag_run, env): 9 | url = utils.get_variable("airflow_url", "") 10 | if not url: 11 | url = "https://airflow-{}.stories.bi/admin/airflow/graph?".format(env) 12 | 13 | return url + urlencode({ 14 | 'execution_date': dag_run.execution_date, 15 | 'dag_id': dag_run.dag_id 16 | }) 17 | 18 | 19 | def send_notification_from_context(context, text, title, color="danger"): 20 | ti = context['ti'] 21 | send_notification(ti.get_dagrun(), text, title, color) 22 | 23 | 24 | def send_notification(dag_run, text, title, color="danger"): 25 | env = utils.get_variable("airflow_environment", "stg") 26 | username = "Airflow ({})".format(env.upper()) 27 | channel = "airflow_{}".format(env) 28 | 29 | token = utils.get_variable("SLACK_API_TOKEN") 30 | client = SlackClient(token) 31 | 32 | result = client.api_call( 33 | method="chat.postMessage", 34 | channel=channel, 35 | username=username, 36 | icon_url="https://raw.githubusercontent.com/airbnb/airflow/master/" 37 | "airflow/www/static/pin_100.png", 38 | attachments=[ 39 | { 40 | "color": color, 41 | "title": title, 42 | "title_link": _compose_title_url(dag_run, env), 43 | "text": text, 44 | "mrkdwn_in": ["text"] 45 | } 46 | ], 47 | link_names=True 48 | ) 49 | 50 | if not result['ok']: 51 | logging.error("Slack API call failed ({})".format(result['error'])) 52 | 53 | 54 | def make_failure_callback(args): 55 | recipients = args.get("notification_recipients", []) 56 | recipients = ' '.join("@%s" % i for i in recipients) 57 | 58 | def callback(context): 59 | dag_run = context['dag_run'] 60 | title = "{} failed :crying:".format(dag_run.dag_id) 61 | text = "DAG failed on '{}' task.".format(context['ti'].task_id) 62 | if recipients: 63 | text += "Hey, {} !".format(recipients) 64 | 65 | return send_notification(dag_run, text, title) 66 | 67 | return callback 68 | 69 | 70 | def success_callback(context): 71 | dag_run = context['dag_run'] 72 | title = "{} succeeded :notbad:".format(dag_run.dag_id) 73 | 74 | return send_notification(dag_run, "", title, color="good") 75 | -------------------------------------------------------------------------------- /airflow_plugins/operators/slack/operators.py: -------------------------------------------------------------------------------- 1 | from airflow.operators.slack_operator import SlackAPIPostOperator 2 | 3 | from airflow_plugins import utils 4 | 5 | 6 | class Message(SlackAPIPostOperator): 7 | 8 | """Slack message operator""" 9 | 10 | def __init__(self, channel=None, username=None, *args, **kwargs): 11 | 12 | super(Message, self).__init__(channel=channel, username=username, 13 | *args, **kwargs) 14 | 15 | # self.channel = self.params['company'] + "data-processing" 16 | self.channel = channel or "airflow_stg" 17 | 18 | self.token = utils.get_variable("SLACK_API_TOKEN") 19 | 20 | self.username = username or "Airflow (STG)" 21 | -------------------------------------------------------------------------------- /airflow_plugins/operators/slack/sensors.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from airflow.operators.sensors import BaseSensorOperator 4 | from airflow.utils.decorators import apply_defaults 5 | 6 | from airflow_plugins.operators.slack.hooks import SlackHook 7 | 8 | 9 | class SlackMessageSensor(BaseSensorOperator): 10 | 11 | """ 12 | Executes a HTTP get statement and returns False on failure: 13 | 404 not found or response_check function returned False 14 | 15 | :param http_conn_id: The connection to run the sensor against 16 | :type http_conn_id: string 17 | :param endpoint: The relative part of the full url 18 | :type endpoint: string 19 | :param params: The parameters to be added to the GET url 20 | :type params: a dictionary of string key/value pairs 21 | :param headers: The HTTP headers to be added to the GET request 22 | :type headers: a dictionary of string key/value pairs 23 | :param response_check: A check against the 'requests' response object. 24 | Returns True for 'pass' and False otherwise. 25 | :type response_check: A lambda or defined function. 26 | :param extra_options: Extra options for the 'requests' library, see the 27 | 'requests' documentation (options to modify timeout, ssl, etc.) 28 | :type extra_options: A dictionary of options, where key is string and value 29 | depends on the option that's being modified. 30 | """ 31 | 32 | msg_thanks = """ 33 | Thank you {author} ! and have a nice day. 34 | """ 35 | 36 | @apply_defaults 37 | def __init__(self, 38 | channel, 39 | username=None, 40 | text_contains=None, 41 | callback=None, 42 | params=None, 43 | headers=None, 44 | extra_options=None, *args, **kwargs): 45 | 46 | super(SlackMessageSensor, self).__init__(*args, **kwargs) 47 | 48 | self.channel = channel 49 | self.username = username 50 | self.text_contains = text_contains 51 | self.params = params or {} 52 | self.headers = headers or {} 53 | self.extra_options = extra_options or {} 54 | 55 | self.slack = SlackHook( 56 | method='channels.history', 57 | channel=self.channel) 58 | 59 | def poke(self, context): 60 | 61 | self.slack.channel = self.slack.get_channel_id(self.slack.channel) 62 | 63 | try: 64 | response = self.slack.run() 65 | except Exception as e: 66 | raise e 67 | 68 | author = None 69 | 70 | since = self.dag.start_date 71 | 72 | if not since: 73 | since = datetime.now() 74 | 75 | since = since.timestamp() 76 | 77 | for msg in response['messages']: 78 | 79 | if msg['ts'] < since: 80 | continue 81 | 82 | if self.params['company'] in msg['text']: 83 | 84 | if 'file' in msg: 85 | 86 | f = self.slack.get_file_content( 87 | msg['file']['url_private_download']) 88 | 89 | author = msg['username'].split("|")[-1][0:-1] 90 | 91 | SlackHook( 92 | channel=self.channel, 93 | text="I got your file %s ..." % f[:100]).run() 94 | 95 | SlackHook( 96 | channel=self.channel, 97 | text=self.msg_thanks.format(author=author)).run() 98 | 99 | return True 100 | 101 | SlackHook( 102 | channel=self.channel, 103 | text="Hey @everyone, I'm still waiting !").run() 104 | 105 | return False 106 | -------------------------------------------------------------------------------- /airflow_plugins/operators/zip.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import logging 3 | import os 4 | from zipfile import ZipFile 5 | 6 | from airflow.models import BaseOperator 7 | from airflow.utils import apply_defaults 8 | 9 | __author__ = 'rssanders3' 10 | 11 | """ 12 | Documentation References: 13 | - https://docs.python.org/2/library/zipfile.html 14 | - https://pymotw.com/2/zipfile/ 15 | """ 16 | 17 | 18 | class ZipOperator(BaseOperator): 19 | """ 20 | An operator which takes in a path to a file 21 | and zips the contents to a location you define. 22 | 23 | :param path_to_file_to_zip: Full path to the file you want to Zip 24 | :type path_to_file_to_zip: string 25 | :param path_to_save_zip: Full path to where you want to save the Zip file 26 | :type path_to_save_zip: string 27 | """ 28 | 29 | template_fields = ('path_to_file_to_zip', 'path_to_save_zip') 30 | template_ext = [] 31 | # ZipOperator's Main Color: white # todo: find better color 32 | ui_color = '#ffffff' 33 | 34 | @apply_defaults 35 | def __init__( 36 | self, 37 | path_to_file_to_zip, 38 | path_to_save_zip, 39 | *args, **kwargs): 40 | super(ZipOperator, self).__init__(*args, **kwargs) 41 | self.path_to_file_to_zip = path_to_file_to_zip 42 | self.path_to_save_zip = path_to_save_zip 43 | 44 | def execute(self, context): 45 | logging.info("Executing ZipOperator.execute(context)") 46 | 47 | logging.info("Path to the File to Zip provided " 48 | "by the User (path_to_file_to_zip): " + 49 | str(self.path_to_file_to_zip)) 50 | logging.info( 51 | "Path to save the Zip File provided by the " 52 | "User (path_to_save_zip) : " + str(self.path_to_save_zip)) 53 | 54 | dir_path_to_file_to_zip = os.path.dirname( 55 | os.path.abspath(self.path_to_file_to_zip)) 56 | logging.info("Absolute path to the File to Zip: " + 57 | str(dir_path_to_file_to_zip)) 58 | 59 | zip_file_name = os.path.basename(self.path_to_save_zip) 60 | logging.info("Zip File Name: " + str(zip_file_name)) 61 | 62 | file_to_zip_name = os.path.basename(self.path_to_file_to_zip) 63 | logging.info("Name of the File or Folder to be Zipped: " + 64 | str(file_to_zip_name)) 65 | 66 | os.chdir(dir_path_to_file_to_zip) 67 | logging.info("Current Working Directory: " + str(os.getcwd())) 68 | 69 | with ZipFile(zip_file_name, 'w') as zip_file: 70 | logging.info("Created zip file object '" + str(zip_file) + 71 | "' with name '" + str(zip_file_name) + "'") 72 | is_file = os.path.isfile(self.path_to_file_to_zip) 73 | logging.info( 74 | "Is the File to Zip a File " 75 | "(else its a folder): " + str(is_file)) 76 | if is_file: 77 | logging.info( 78 | "Writing '" + str(file_to_zip_name) + "to zip file") 79 | zip_file.write(file_to_zip_name) 80 | else: # is folder 81 | for dirname, subdirs, files in os.walk(file_to_zip_name): 82 | logging.info("Writing '" + str(dirname) + "to zip file") 83 | zip_file.write(dirname) 84 | for filename in files: 85 | file_name_to_write = os.path.join(dirname, filename) 86 | logging.info( 87 | "Writing '" + str(file_name_to_write) + 88 | "to zip file") 89 | zip_file.write(file_name_to_write) 90 | 91 | # todo: print out contents and results of zip file creation 92 | # (compression ratio, size, etc) 93 | 94 | logging.info("Closing Zip File Object") 95 | zip_file.close() 96 | 97 | logging.info("Moving '" + str(zip_file_name) + 98 | "' to '" + str(self.path_to_save_zip) + "'") 99 | os.rename(zip_file_name, self.path_to_save_zip) 100 | 101 | logging.info("Finished executing ZipOperator.execute(context)") 102 | 103 | 104 | class UnzipOperator(BaseOperator): 105 | """ 106 | An operator which takes in a path to a zip 107 | file and unzips the contents to a location you define. 108 | 109 | :param path_to_zip_file: Full path to the zip file you want to Unzip 110 | :type path_to_zip_file: string 111 | :param path_to_unzip_contents: Full path to 112 | where you want to save the contents of the Zip file you're Unzipping 113 | :type path_to_unzip_contents: string 114 | """ 115 | 116 | template_fields = ('path_to_zip_file', 'path_to_unzip_contents') 117 | template_ext = [] 118 | # UnzipOperator's Main Color: white # todo: find better color 119 | ui_color = '#ffffff' 120 | 121 | @apply_defaults 122 | def __init__( 123 | self, 124 | path_to_zip_file=None, 125 | path_to_zip_folder=None, 126 | path_to_zip_folder_pattern='*.zip', 127 | path_to_unzip_contents=None, 128 | *args, **kwargs): 129 | super(UnzipOperator, self).__init__(*args, **kwargs) 130 | self.path_to_zip_folder = path_to_zip_folder 131 | self.path_to_zip_folder_pattern = path_to_zip_folder_pattern 132 | self.path_to_zip_file = path_to_zip_file 133 | self.path_to_unzip_contents = path_to_unzip_contents 134 | 135 | def execute(self, context): 136 | logging.info("Executing UnzipOperator.execute(context)") 137 | 138 | logging.info("path_to_zip_file: " + str(self.path_to_zip_file)) 139 | logging.info("path_to_unzip_contents: " + 140 | str(self.path_to_unzip_contents)) 141 | 142 | # populate path_to_zip_file if zip directory is specified 143 | if self.path_to_zip_folder: 144 | directory_files = glob.glob(os.path.join( 145 | self.path_to_zip_folder, 146 | self.path_to_zip_folder_pattern)) 147 | self.path_to_zip_file = max(directory_files, key=os.path.getctime) 148 | 149 | # No check is done if the zip file is valid so that the operator fails 150 | # when expected so that airflow can properly mark the task as failed 151 | # and schedule retries as needed 152 | 153 | with ZipFile(self.path_to_zip_file, 'r') as zip_file: 154 | logging.info("Created zip file object '" + str(zip_file) + 155 | "' from path '" + str(self.path_to_zip_file) + "'") 156 | 157 | logging.info("Extracting all the contents to '" + 158 | str(self.path_to_unzip_contents) + "'") 159 | zip_file.extractall(self.path_to_unzip_contents) 160 | logging.info("Closing Zip File Object") 161 | zip_file.close() 162 | 163 | logging.info("Finished executing UnzipOperator.execute(context)") 164 | -------------------------------------------------------------------------------- /airflow_plugins/utils.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from datetime import datetime 3 | 4 | from airflow.models import Variable 5 | from pytz import timezone 6 | 7 | 8 | def get_variable(key, default_var=None): 9 | """Returns variable from Variable or config defaults""" 10 | 11 | return Variable.get(key, default_var=default_var) 12 | 13 | 14 | def create_variable(key, value): 15 | """Create variable""" 16 | 17 | return Variable.set(key, value) 18 | 19 | 20 | def update_params(params, *args): 21 | d = deepcopy(params) 22 | for arg in args: 23 | d.update(deepcopy(arg)) 24 | 25 | return d 26 | 27 | 28 | def get_connection(conn_id): 29 | """Returns a connection by id 30 | """ 31 | 32 | from airflow import settings, models 33 | 34 | session = settings.Session() 35 | 36 | return session.query( 37 | models.Connection).filter_by( 38 | conn_id=conn_id).first() 39 | 40 | 41 | def delete_connection(conn_id): 42 | """Delete a connection by id. Return is deleted""" 43 | 44 | from airflow import settings, models 45 | 46 | session = settings.Session() 47 | 48 | connection = session.query( 49 | models.Connection).filter_by( 50 | conn_id=conn_id) 51 | 52 | deleted_rows = connection.delete() 53 | session.commit() 54 | 55 | return deleted_rows 56 | 57 | 58 | def get_connection_str(conn_id, db_name=""): 59 | """Returns standard connection string 60 | """ 61 | con = get_connection(conn_id) 62 | 63 | if con: 64 | 65 | return "{type}://{user}:{password}@{host}:{port}/{db_name}".format(**{ 66 | 'type': con.conn_type, 67 | 'user': con.login, 68 | 'password': con.password, 69 | 'host': con.host, 70 | 'port': con.port, 71 | 'db_name': db_name, 72 | }).rstrip("/") 73 | 74 | 75 | def get_or_create_conn(name, **kwargs): 76 | """Returns a connection by id 77 | """ 78 | 79 | from airflow import settings, models 80 | 81 | session = settings.Session() 82 | 83 | con = get_connection(name) 84 | 85 | if not con: 86 | con = models.Connection(name, **kwargs) 87 | session.add(con) 88 | session.commit() 89 | 90 | return con 91 | 92 | 93 | def get_or_update_conn(name, **kwargs): 94 | """Returns a connection by id 95 | """ 96 | 97 | from airflow import settings, models 98 | 99 | session = settings.Session() 100 | 101 | con = get_connection(name) 102 | 103 | if not con: 104 | con = models.Connection(name, **kwargs) 105 | session.add(con) 106 | session.commit() 107 | else: 108 | 109 | for key, value in kwargs.items(): 110 | 111 | if key == "extra": 112 | con.set_extra(value) 113 | else: 114 | setattr(con, key, value) 115 | 116 | session.commit() 117 | 118 | return con 119 | 120 | 121 | def get_utc_offset(tz='Europe/Prague'): # as hours 122 | tz = timezone(tz) 123 | utc = timezone('UTC') 124 | now = datetime.utcnow() 125 | utcnow = utc.localize(now) 126 | dt = utcnow.astimezone(tz).replace(tzinfo=None) 127 | offset = (dt - now).total_seconds() / (60 * 60) 128 | return offset # float 129 | -------------------------------------------------------------------------------- /airflow_plugins/variables/__init__.py: -------------------------------------------------------------------------------- 1 | from .value_resolver import ValueResolver 2 | 3 | __all__ = ['ValueResolver'] 4 | -------------------------------------------------------------------------------- /airflow_plugins/variables/value_resolver.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import six 4 | from airflow.models import Variable 5 | from airflow.settings import Session 6 | 7 | VARIABLE_NAME_JOIN_CHAR = '_' 8 | 9 | 10 | class ValueResolver(object): 11 | session = Session() 12 | 13 | chars_list = list(range(0, 32)) + list(range(127, 160)) 14 | control_chars = ''.join(map(six.unichr, chars_list)) 15 | control_char_re = re.compile('[%s]' % re.escape(control_chars)) 16 | 17 | @classmethod 18 | def get_value(cls, key, company=None, dag=None, default_value=None): 19 | """ 20 | Load value from Variables with fallback into defined default_value. 21 | In case default value is not set, Error could be raised if variable 22 | doesn't exist. 23 | """ 24 | 25 | variable_names = cls._resolve_names(key, company, dag) 26 | rows = cls.session.query(Variable).filter( 27 | Variable.key.in_(variable_names)).all() 28 | 29 | variables = {} 30 | for variable in rows: 31 | variables[variable.key] = variable 32 | 33 | for variable_name in variable_names: 34 | if variable_name in variables: 35 | variable = variables[variable_name] 36 | break 37 | else: 38 | variable = None 39 | value = default_value 40 | 41 | if variable: 42 | value = variable.val 43 | if value is not None: 44 | value = value.strip() 45 | default_type = type(default_value) 46 | if default_type is bool: 47 | return True if value.lower() in ['1', 'true'] else False 48 | for typecls in [int, float]: 49 | if default_type is typecls: 50 | try: 51 | cls_value = typecls(value) 52 | except ValueError: 53 | # in case of possibly multi-type variable 54 | pass 55 | else: 56 | return cls_value 57 | break 58 | 59 | if (key.endswith(VARIABLE_NAME_JOIN_CHAR + 'date') 60 | and isinstance(value, six.string_types)): 61 | from datetime import datetime 62 | value = value.split('.')[0].replace('-', '').replace(':', '') 63 | value_format = '%Y%m%d' + ('T%H%M%S' if len(value) > 8 else '') 64 | value = datetime.strptime(value, value_format) 65 | 66 | if value is None: 67 | if variable: 68 | error_msg = "Variable `{}` exists, but is mis-configured."\ 69 | .format(variable.key) 70 | else: 71 | error_msg = "Variable `{}` doesn't exist.".format(key) 72 | 73 | raise RuntimeError(error_msg) 74 | 75 | return cls.strip_value(value) 76 | 77 | @staticmethod 78 | def _compose_variable_name(*args): 79 | return VARIABLE_NAME_JOIN_CHAR.join(args) 80 | 81 | @classmethod 82 | def _resolve_names(cls, key, company=None, dag=None): 83 | key, company, dag = [ 84 | x.lower() if x else None for x in [key, company, dag] 85 | ] 86 | 87 | possible_variables = [key] 88 | if company: 89 | possible_variables.append( 90 | cls._compose_variable_name(company, key) 91 | ) 92 | 93 | if dag: 94 | possible_variables.append(cls._compose_variable_name(dag, key)) 95 | 96 | if company and dag: 97 | possible_variables.append( 98 | cls._compose_variable_name(company, dag, key) 99 | ) 100 | 101 | possible_variables.reverse() 102 | 103 | return possible_variables 104 | 105 | @classmethod 106 | def strip_value(cls, value): 107 | if not isinstance(value, six.string_types): 108 | return value 109 | else: 110 | return cls.control_char_re.sub('', str(value)) 111 | -------------------------------------------------------------------------------- /airflow_plugins/xcom.py: -------------------------------------------------------------------------------- 1 | class Xcom(object): 2 | """XCOM DAO used for xcom pushes and pulls""" 3 | def __init__(self, tasks_ids, dag_id=None): 4 | self.tasks_ids = tasks_ids 5 | self.dag_id = dag_id 6 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | /airflow_plugins.rst 2 | /airflow_plugins.*.rst 3 | /modules.rst 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/airflow_plugins.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/airflow_plugins.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/airflow_plugins" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/airflow_plugins" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # airflow_plugins documentation build configuration file, created by 5 | # sphinx-quickstart on Tue Jul 9 22:26:36 2013. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another 20 | # directory, add these directories to sys.path here. If the directory is 21 | # relative to the documentation root, use os.path.abspath to make it 22 | # absolute, like shown here. 23 | #sys.path.insert(0, os.path.abspath('.')) 24 | 25 | # Get the project root dir, which is the parent dir of this 26 | cwd = os.getcwd() 27 | project_root = os.path.dirname(cwd) 28 | 29 | # Insert the project root dir as the first element in the PYTHONPATH. 30 | # This lets us ensure that the source package is imported, and that its 31 | # version is used. 32 | sys.path.insert(0, project_root) 33 | 34 | import airflow_plugins 35 | 36 | # -- General configuration --------------------------------------------- 37 | 38 | # If your documentation needs a minimal Sphinx version, state it here. 39 | #needs_sphinx = '1.0' 40 | 41 | # Add any Sphinx extension module names here, as strings. They can be 42 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 43 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | 48 | # The suffix of source filenames. 49 | source_suffix = '.rst' 50 | 51 | # The encoding of source files. 52 | #source_encoding = 'utf-8-sig' 53 | 54 | # The master toctree document. 55 | master_doc = 'index' 56 | 57 | # General information about the project. 58 | project = u'Airflow Plugins' 59 | copyright = u"2018, Stories" 60 | 61 | # The version info for the project you're documenting, acts as replacement 62 | # for |version| and |release|, also used in various other places throughout 63 | # the built documents. 64 | # 65 | # The short X.Y version. 66 | version = airflow_plugins.__version__ 67 | # The full version, including alpha/beta/rc tags. 68 | release = airflow_plugins.__version__ 69 | 70 | # The language for content autogenerated by Sphinx. Refer to documentation 71 | # for a list of supported languages. 72 | #language = None 73 | 74 | # There are two options for replacing |today|: either, you set today to 75 | # some non-false value, then it is used: 76 | #today = '' 77 | # Else, today_fmt is used as the format for a strftime call. 78 | #today_fmt = '%B %d, %Y' 79 | 80 | # List of patterns, relative to source directory, that match files and 81 | # directories to ignore when looking for source files. 82 | exclude_patterns = ['_build'] 83 | 84 | # The reST default role (used for this markup: `text`) to use for all 85 | # documents. 86 | #default_role = None 87 | 88 | # If true, '()' will be appended to :func: etc. cross-reference text. 89 | #add_function_parentheses = True 90 | 91 | # If true, the current module name will be prepended to all description 92 | # unit titles (such as .. function::). 93 | #add_module_names = True 94 | 95 | # If true, sectionauthor and moduleauthor directives will be shown in the 96 | # output. They are ignored by default. 97 | #show_authors = False 98 | 99 | # The name of the Pygments (syntax highlighting) style to use. 100 | pygments_style = 'sphinx' 101 | 102 | # A list of ignored prefixes for module index sorting. 103 | #modindex_common_prefix = [] 104 | 105 | # If true, keep warnings as "system message" paragraphs in the built 106 | # documents. 107 | #keep_warnings = False 108 | 109 | 110 | # -- Options for HTML output ------------------------------------------- 111 | 112 | # The theme to use for HTML and HTML Help pages. See the documentation for 113 | # a list of builtin themes. 114 | html_theme = 'default' 115 | 116 | # Theme options are theme-specific and customize the look and feel of a 117 | # theme further. For a list of options available for each theme, see the 118 | # documentation. 119 | #html_theme_options = {} 120 | 121 | # Add any paths that contain custom themes here, relative to this directory. 122 | #html_theme_path = [] 123 | 124 | # The name for this set of Sphinx documents. If None, it defaults to 125 | # " v documentation". 126 | #html_title = None 127 | 128 | # A shorter title for the navigation bar. Default is the same as 129 | # html_title. 130 | #html_short_title = None 131 | 132 | # The name of an image file (relative to this directory) to place at the 133 | # top of the sidebar. 134 | #html_logo = None 135 | 136 | # The name of an image file (within the static path) to use as favicon 137 | # of the docs. This file should be a Windows icon file (.ico) being 138 | # 16x16 or 32x32 pixels large. 139 | #html_favicon = None 140 | 141 | # Add any paths that contain custom static files (such as style sheets) 142 | # here, relative to this directory. They are copied after the builtin 143 | # static files, so a file named "default.css" will overwrite the builtin 144 | # "default.css". 145 | html_static_path = ['_static'] 146 | 147 | # If not '', a 'Last updated on:' timestamp is inserted at every page 148 | # bottom, using the given strftime format. 149 | #html_last_updated_fmt = '%b %d, %Y' 150 | 151 | # If true, SmartyPants will be used to convert quotes and dashes to 152 | # typographically correct entities. 153 | #html_use_smartypants = True 154 | 155 | # Custom sidebar templates, maps document names to template names. 156 | #html_sidebars = {} 157 | 158 | # Additional templates that should be rendered to pages, maps page names 159 | # to template names. 160 | #html_additional_pages = {} 161 | 162 | # If false, no module index is generated. 163 | #html_domain_indices = True 164 | 165 | # If false, no index is generated. 166 | #html_use_index = True 167 | 168 | # If true, the index is split into individual pages for each letter. 169 | #html_split_index = False 170 | 171 | # If true, links to the reST sources are added to the pages. 172 | #html_show_sourcelink = True 173 | 174 | # If true, "Created using Sphinx" is shown in the HTML footer. 175 | # Default is True. 176 | #html_show_sphinx = True 177 | 178 | # If true, "(C) Copyright ..." is shown in the HTML footer. 179 | # Default is True. 180 | #html_show_copyright = True 181 | 182 | # If true, an OpenSearch description file will be output, and all pages 183 | # will contain a tag referring to it. The value of this option 184 | # must be the base URL from which the finished HTML is served. 185 | #html_use_opensearch = '' 186 | 187 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 188 | #html_file_suffix = None 189 | 190 | # Output file base name for HTML help builder. 191 | htmlhelp_basename = 'airflow_pluginsdoc' 192 | 193 | 194 | # -- Options for LaTeX output ------------------------------------------ 195 | 196 | latex_elements = { 197 | # The paper size ('letterpaper' or 'a4paper'). 198 | #'papersize': 'letterpaper', 199 | 200 | # The font size ('10pt', '11pt' or '12pt'). 201 | #'pointsize': '10pt', 202 | 203 | # Additional stuff for the LaTeX preamble. 204 | #'preamble': '', 205 | } 206 | 207 | # Grouping the document tree into LaTeX files. List of tuples 208 | # (source start file, target name, title, author, documentclass 209 | # [howto/manual]). 210 | latex_documents = [ 211 | ('index', 'airflow_plugins.tex', 212 | u'Airflow Plugins Documentation', 213 | u'Michael Kuty', 'manual'), 214 | ] 215 | 216 | # The name of an image file (relative to this directory) to place at 217 | # the top of the title page. 218 | #latex_logo = None 219 | 220 | # For "manual" documents, if this is true, then toplevel headings 221 | # are parts, not chapters. 222 | #latex_use_parts = False 223 | 224 | # If true, show page references after internal links. 225 | #latex_show_pagerefs = False 226 | 227 | # If true, show URL addresses after external links. 228 | #latex_show_urls = False 229 | 230 | # Documents to append as an appendix to all manuals. 231 | #latex_appendices = [] 232 | 233 | # If false, no module index is generated. 234 | #latex_domain_indices = True 235 | 236 | 237 | # -- Options for manual page output ------------------------------------ 238 | 239 | # One entry per manual page. List of tuples 240 | # (source start file, name, description, authors, manual section). 241 | man_pages = [ 242 | ('index', 'airflow_plugins', 243 | u'Airflow Plugins Documentation', 244 | [u'Michael Kuty'], 1) 245 | ] 246 | 247 | # If true, show URL addresses after external links. 248 | #man_show_urls = False 249 | 250 | 251 | # -- Options for Texinfo output ---------------------------------------- 252 | 253 | # Grouping the document tree into Texinfo files. List of tuples 254 | # (source start file, target name, title, author, 255 | # dir menu entry, description, category) 256 | texinfo_documents = [ 257 | ('index', 'airflow_plugins', 258 | u'Airflow Plugins Documentation', 259 | u'Michael Kuty', 260 | 'airflow_plugins', 261 | 'One line description of project.', 262 | 'Miscellaneous'), 263 | ] 264 | 265 | # Documents to append as an appendix to all manuals. 266 | #texinfo_appendices = [] 267 | 268 | # If false, no module index is generated. 269 | #texinfo_domain_indices = True 270 | 271 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 272 | #texinfo_show_urls = 'footnote' 273 | 274 | # If true, do not generate a @detailmenu in the "Top" node's menu. 275 | #texinfo_no_detailmenu = False 276 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/history.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../HISTORY.rst 2 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Airflow Plugins 2 | =============== 3 | 4 | Contents: 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | readme 10 | installation 11 | plugins/index 12 | contributing 13 | history 14 | 15 | Indices and tables 16 | ================== 17 | 18 | * :ref:`genindex` 19 | * :ref:`modindex` 20 | * :ref:`search` 21 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | 8 | Stable release 9 | -------------- 10 | 11 | To install Airflow Plugins, run this command in your terminal: 12 | 13 | .. code-block:: console 14 | 15 | $ pip install airflow-plugins 16 | 17 | This is the preferred method to install Airflow Plugins, as it will always install the most recent stable release. 18 | 19 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide 20 | you through the process. 21 | 22 | .. _pip: https://pip.pypa.io 23 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ 24 | 25 | 26 | From sources 27 | ------------ 28 | 29 | The sources for Airflow Plugins can be downloaded from the `Github repo`_. 30 | 31 | You can either clone the public repository: 32 | 33 | .. code-block:: console 34 | 35 | $ git clone git://github.com/storiesbi/airflow-plugins 36 | 37 | Or download the `tarball`_: 38 | 39 | .. code-block:: console 40 | 41 | $ curl -OL https://github.com/storiesbi/airflow-plugins/tarball/master 42 | 43 | Once you have a copy of the source, you can install it with: 44 | 45 | .. code-block:: console 46 | 47 | $ python setup.py install 48 | 49 | 50 | .. _Github repo: https://github.com/storiesbi/airflow-plugins 51 | .. _tarball: https://github.com/storiesbi/airflow-plugins/tarball/master 52 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\airflow_plugins.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\airflow_plugins.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/plugins/index.rst: -------------------------------------------------------------------------------- 1 | 2 | =============== 3 | Airflow Plugins 4 | =============== 5 | 6 | .. toctree:: 7 | :maxdepth: 3 8 | 9 | operators/base 10 | operators/db 11 | operators/files 12 | operators/csv 13 | operators/zip 14 | operators/git 15 | operators/slack 16 | sensors 17 | utils 18 | -------------------------------------------------------------------------------- /docs/plugins/operators/base.rst: -------------------------------------------------------------------------------- 1 | 2 | ============== 3 | Base operators 4 | ============== 5 | 6 | .. automodule:: airflow_plugins.operators.base 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/plugins/operators/csv.rst: -------------------------------------------------------------------------------- 1 | 2 | === 3 | CSV 4 | === 5 | 6 | .. automodule:: airflow_plugins.operators.csv 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/plugins/operators/db.rst: -------------------------------------------------------------------------------- 1 | 2 | ======== 3 | Database 4 | ======== 5 | 6 | .. automodule:: airflow_plugins.operators.db 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/plugins/operators/files.rst: -------------------------------------------------------------------------------- 1 | 2 | ===== 3 | Files 4 | ===== 5 | 6 | .. automodule:: airflow_plugins.operators.files 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/plugins/operators/git.rst: -------------------------------------------------------------------------------- 1 | === 2 | Git 3 | === 4 | 5 | .. automodule:: airflow_plugins.operators.git 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/plugins/operators/slack.rst: -------------------------------------------------------------------------------- 1 | 2 | ===== 3 | Slack 4 | ===== 5 | 6 | .. automodule:: airflow_plugins.operators.slack.hooks 7 | :members: 8 | 9 | .. automodule:: airflow_plugins.operators.slack.operators 10 | :members: 11 | 12 | .. automodule:: airflow_plugins.operators.slack.sensors 13 | :members: 14 | -------------------------------------------------------------------------------- /docs/plugins/operators/zip.rst: -------------------------------------------------------------------------------- 1 | 2 | === 3 | ZIP 4 | === 5 | 6 | .. automodule:: airflow_plugins.operators.zip 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/plugins/sensors.rst: -------------------------------------------------------------------------------- 1 | 2 | ============ 3 | File Sensors 4 | ============ 5 | 6 | .. automodule:: airflow_plugins.operators.sensors.file_sensor 7 | :members: 8 | 9 | .. automodule:: airflow_plugins.operators.sensors.task_sensor 10 | :members: 11 | -------------------------------------------------------------------------------- /docs/plugins/utils.rst: -------------------------------------------------------------------------------- 1 | 2 | ===== 3 | Utils 4 | ===== 5 | 6 | .. automodule: : airflow_plugins.variables.value_resolver 7 | : members: 8 | 9 | .. automodule: : airflow_plugins.utils 10 | : members: 11 | 12 | .. automodule: : airflow_plugins.xcom 13 | : members: 14 | -------------------------------------------------------------------------------- /docs/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | pip==8.1.2 2 | bumpversion==0.5.3 3 | airflow>=1.7 4 | wheel==0.29.0 5 | watchdog==0.8.3 6 | flake8==2.6.0 7 | tox==2.3.1 8 | coverage==4.1 9 | Sphinx==1.4.8 10 | cryptography==1.7 11 | PyYAML==3.11 12 | pytest-runner==2.11.1 13 | pytest==3.0.7 14 | pytest-cov==2.4.0 15 | mock==2.0.0 16 | moto==0.4.30 17 | testfixtures==4.13.5 18 | psycopg2>=2.6.2 19 | python-slugify>=1.1.4 20 | psycopg2>=2.6.2 21 | boto==2.45.0 22 | csvkit==1.0.2 23 | slackclient==1.0.4 24 | six==1.11.0 25 | paramiko==2.3.1 26 | pytz==2017.2 27 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.3 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = version='{current_version}' 8 | replace = version='{new_version}' 9 | 10 | [bumpversion:file:airflow_plugins/__init__.py] 11 | search = __version__ = '{current_version}' 12 | replace = __version__ = '{new_version}' 13 | 14 | [bdist_wheel] 15 | universal = 1 16 | 17 | [flake8] 18 | exclude = docs 19 | 20 | [aliases] 21 | test = pytest 22 | # Define setup.py command aliases here 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """The setup script.""" 5 | 6 | from setuptools import setup, find_packages 7 | 8 | with open('README.rst') as readme_file: 9 | readme = readme_file.read() 10 | 11 | with open('HISTORY.rst') as history_file: 12 | history = history_file.read() 13 | 14 | requirements = [ 15 | "python-slugify>=1.1.4", 16 | "psycopg2>=2.6.2", 17 | "boto==2.45.0", 18 | "csvkit==1.0.2", 19 | "slackclient==1.0.4", 20 | "six==1.11.0", 21 | "paramiko==2.3.1", 22 | "pytz==2017.2" 23 | ] 24 | 25 | setup_requirements = [ 26 | ] 27 | 28 | test_requirements = [ 29 | "pytest", 30 | "psycopg2>=2.6.2", 31 | "coverage==4.1", 32 | "pytest==3.0.7", 33 | "pytest-cov==2.4.0", 34 | "mock==2.0.0", 35 | "moto==0.4.30", 36 | "testfixtures==4.13.5", 37 | ] 38 | 39 | setup( 40 | name='airflow-plugins', 41 | version='0.1.3', 42 | description="Airflow plugins.", 43 | long_description=readme + '\n\n' + history, 44 | author="Michael Kuty", 45 | author_email='michael.kuty@stories.bi', 46 | url='https://github.com/storiesbi/airflow-plugins', 47 | packages=find_packages(include=['airflow_plugins.*', 'airflow_plugins']), 48 | include_package_data=True, 49 | install_requires=requirements, 50 | license="MIT license", 51 | zip_safe=False, 52 | keywords='airflow_plugins', 53 | classifiers=[ 54 | 'Development Status :: 2 - Pre-Alpha', 55 | 'Intended Audience :: Developers', 56 | 'License :: OSI Approved :: MIT License', 57 | 'Natural Language :: English', 58 | "Programming Language :: Python :: 2", 59 | 'Programming Language :: Python :: 2.6', 60 | 'Programming Language :: Python :: 2.7', 61 | 'Programming Language :: Python :: 3', 62 | 'Programming Language :: Python :: 3.3', 63 | 'Programming Language :: Python :: 3.4', 64 | 'Programming Language :: Python :: 3.5', 65 | ], 66 | test_suite='tests', 67 | tests_require=test_requirements, 68 | setup_requires=setup_requirements, 69 | ) 70 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from airflow.models import Connection, Variable 3 | from airflow.settings import Session 4 | 5 | 6 | def add_connection(session, connection_dict): 7 | for key, values in connection_dict.items(): 8 | con = Connection(key, **values) 9 | session.add(con) 10 | session.commit() 11 | 12 | 13 | def delete_connection(session, keys): 14 | query = session.query(Connection).filter(Connection.conn_id.in_(keys)) 15 | query.delete(synchronize_session="fetch") 16 | session.commit() 17 | 18 | 19 | @pytest.fixture() 20 | def connection_fixture(request): 21 | connection = request.param.get('connection', {}) 22 | connection_to_delete = request.param.get('connection_to_delete', []) 23 | 24 | session = Session() 25 | add_connection(session, connection) 26 | yield # Teardown 27 | delete_connection(session, connection_to_delete) 28 | 29 | 30 | def add_variables(session, variables_dict): 31 | for key, value in variables_dict.items(): 32 | Variable.set(key, value) 33 | 34 | 35 | def delete_variables(session, keys): 36 | query = session.query(Variable).filter(Variable.key.in_(keys)) 37 | query.delete(synchronize_session="fetch") 38 | session.commit() 39 | 40 | 41 | @pytest.fixture() 42 | def variables_fixture(request): 43 | variables = request.param.get('variables', {}) 44 | variable_to_delete = request.param.get('variable_to_delete', []) 45 | 46 | session = Session() 47 | add_variables(session, variables) 48 | yield # Teardown 49 | delete_variables(session, variable_to_delete) 50 | -------------------------------------------------------------------------------- /tests/operators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/storiesbi/airflow-plugins/2762a2f1725487dd9605ce11c8e37772adbd5767/tests/operators/__init__.py -------------------------------------------------------------------------------- /tests/operators/sensors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/storiesbi/airflow-plugins/2762a2f1725487dd9605ce11c8e37772adbd5767/tests/operators/sensors/__init__.py -------------------------------------------------------------------------------- /tests/operators/sensors/test_file_sensor.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from time import sleep 3 | 4 | import boto 5 | import pytest 6 | from airflow.exceptions import AirflowException 7 | from boto.s3.key import Key 8 | from mock import Mock 9 | from moto import mock_s3 10 | 11 | from airflow_plugins.operators import FileSensor 12 | from airflow_plugins.utils import get_or_update_conn 13 | 14 | ctx = {'params': {}, 'ti': Mock(start_date=datetime.now())} 15 | 16 | 17 | def test_files_sensor_fails_on_not_existing_connection(): 18 | file_sensor = FileSensor( 19 | task_id="check_new_file", 20 | path="foo", 21 | conn_id="baz", 22 | ) 23 | 24 | file_sensor.pre_execute(ctx) 25 | with pytest.raises(AirflowException) as e: 26 | file_sensor.poke(ctx) 27 | 28 | assert "Connection not found: `baz`" in str(e) 29 | 30 | 31 | def test_files_sensor_fail_on_unsupported_connection(): 32 | get_or_update_conn("baz_oracle", conn_type="oracle") 33 | 34 | file_sensor = FileSensor( 35 | task_id="check_new_file", 36 | path="foo", 37 | conn_id="baz_oracle", 38 | ) 39 | 40 | file_sensor.pre_execute(ctx) 41 | with pytest.raises(NotImplementedError) as e: 42 | file_sensor.poke(ctx) 43 | 44 | assert "Unsupported engine: `oracle`" in str(e) 45 | 46 | 47 | @mock_s3 48 | def test_files_on_s3(): 49 | conn = boto.connect_s3() 50 | bucket = conn.create_bucket('storiesbi-datapipeline') 51 | get_or_update_conn("s3.stories.bi", conn_type="s3") 52 | 53 | file_sensor = FileSensor( 54 | task_id="check_new_file", 55 | path="foo", 56 | conn_id="s3.stories.bi", 57 | modified="anytime" 58 | ) 59 | 60 | file_sensor.pre_execute(ctx) 61 | 62 | assert not file_sensor.poke(ctx) 63 | 64 | k = Key(bucket) 65 | k.key = "foo" 66 | k.set_contents_from_string("bar") 67 | 68 | assert file_sensor.poke(ctx) 69 | 70 | 71 | @mock_s3 72 | def test_files_on_s3_modified_after(): 73 | conn = boto.connect_s3() 74 | bucket = conn.create_bucket('storiesbi-datapipeline') 75 | 76 | k = Key(bucket) 77 | k.key = "foo" 78 | k.set_contents_from_string("bar") 79 | 80 | get_or_update_conn("s3.stories.bi", conn_type="s3") 81 | 82 | file_sensor = FileSensor( 83 | task_id="check_new_file", 84 | path="foo", 85 | conn_id="s3.stories.bi", 86 | modified=datetime.now() 87 | ) 88 | 89 | file_sensor.pre_execute(ctx) 90 | 91 | assert not file_sensor.poke(ctx) 92 | 93 | # Hacky hacky! 94 | sleep(1) 95 | key = bucket.get_key("foo") 96 | key.set_contents_from_string("baz") 97 | 98 | assert file_sensor.poke(ctx) 99 | 100 | 101 | @mock_s3 102 | def test_files_on_s3_from_custom_bucket_defined_in_path(): 103 | conn = boto.connect_s3() 104 | bucket = conn.create_bucket('testing') 105 | k = Key(bucket) 106 | k.key = "foo" 107 | k.set_contents_from_string("baz") 108 | 109 | get_or_update_conn("s3.stories.bi", conn_type="s3") 110 | yesterday = datetime.now() - timedelta(1) 111 | 112 | file_sensor = FileSensor( 113 | task_id="check_new_file", 114 | path="s3://testing/foo", 115 | conn_id="s3.stories.bi", 116 | modified=yesterday 117 | ) 118 | 119 | file_sensor.pre_execute(ctx) 120 | 121 | assert file_sensor.poke(ctx) 122 | -------------------------------------------------------------------------------- /tests/operators/test_db.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from airflow_plugins.operators import CreateTableWithColumns 6 | 7 | 8 | @pytest.mark.parametrize( 9 | ['file_name', 'known_columns'], 10 | [('test_db_columns.csv', [ 11 | "date", "bookings_count", "sectors_count", "passengers_count", 12 | "seats_count", "bags_count", "booked_at", "partner", "market", 13 | "airlines", "currency", "nationality", "device_type", "trip_type", 14 | "src_dst", "src", "dst", "transfers", "booking_channel", '"AT_sales"', 15 | "insurance_costs", "has_kiwi_interlining", "extras_sales", "refunds", 16 | '"AT_costs"', "turnover", "margin", '"intra space"' 17 | ])] 18 | ) 19 | def test_create_table_with_columns(file_name, known_columns): 20 | file = os.path.join(os.path.dirname(os.path.realpath(__file__)), file_name) 21 | get_columns = CreateTableWithColumns._get_table_columns 22 | columns = get_columns(file) 23 | assert len(known_columns) == len(columns) 24 | for i in range(len(known_columns)): 25 | assert known_columns[i] == columns[i] 26 | -------------------------------------------------------------------------------- /tests/operators/test_db_columns.csv: -------------------------------------------------------------------------------- 1 | "date","bookings_count","sectors_count","passengers_count","seats_count","bags_count","booked_at","partner","market","airlines","currency","nationality","device_type","trip_type","src_dst","src","dst","transfers","booking_channel","AT_sales","insurance_costs","has_kiwi_interlining","extras_sales","refunds","AT_costs","turnover","margin","intra space" 2 | "2016-11-14","1","1","1","1","0","false","kayak","NZ","JQ","NZD","NZ","pc","oneway","CNS - MEL","CNS","MEL","direct","undefined","155.03","0.00","false","0.00","0.00","0.00","155.03","1.00","value" 3 | -------------------------------------------------------------------------------- /tests/operators/test_run_evaluation.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import pytest 4 | from airflow.models import TaskInstance 5 | from airflow.operators.dummy_operator import DummyOperator 6 | from airflow.utils.state import State 7 | from airflow_plugins.operators import RunEvaluationOperator 8 | from mock import Mock 9 | 10 | DEFAULT_DATE = datetime(2017, 3, 28) 11 | 12 | 13 | def test_execution_fails_on_failed_tasks(): 14 | ti = TaskInstance(DummyOperator(task_id='test'), DEFAULT_DATE) 15 | ti.state = State.FAILED 16 | 17 | dag_run_mock = Mock() 18 | dag_run_mock.get_task_instances.return_value = [ti] 19 | 20 | op = RunEvaluationOperator(task_id='evaluation') 21 | 22 | with pytest.raises(RuntimeError) as e: 23 | op.execute({'dag_run': dag_run_mock}) 24 | 25 | assert "Failed tasks instances detected" in str(e) 26 | 27 | 28 | def test_execution_without_failed_tasks(): 29 | dag_run_mock = Mock() 30 | dag_run_mock.get_task_instances.return_value = [ 31 | TaskInstance(DummyOperator(task_id='test'), DEFAULT_DATE) 32 | ] 33 | 34 | op = RunEvaluationOperator(task_id='evaluation') 35 | op.execute({'dag_run': dag_run_mock}) 36 | -------------------------------------------------------------------------------- /tests/operators/test_zip_operator.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | from airflow_plugins.operators import UnzipOperator 4 | 5 | 6 | def test_unzip_operator(): 7 | 8 | folder = os.path.dirname(os.path.realpath(__file__)) 9 | 10 | op = UnzipOperator(task_id='dag_task', 11 | path_to_zip_folder=folder, 12 | path_to_zip_folder_pattern='*.py') 13 | 14 | try: 15 | op.execute({}) 16 | except: 17 | # swallow is not zip file 18 | pass 19 | 20 | # check that the file is populated 21 | assert op.path_to_zip_file is not None 22 | -------------------------------------------------------------------------------- /tests/variables/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/storiesbi/airflow-plugins/2762a2f1725487dd9605ce11c8e37772adbd5767/tests/variables/__init__.py -------------------------------------------------------------------------------- /tests/variables/test_value_resolver.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from airflow.models import Variable 3 | 4 | from airflow_plugins.variables import ValueResolver 5 | 6 | 7 | @pytest.mark.parametrize('variables_fixture', [ 8 | ( 9 | { 10 | 'variables': { 11 | 'verbosity': "", 12 | }, 13 | 'variable_to_delete': ["verbosity"] 14 | } 15 | ) 16 | ], indirect=True) 17 | def test_mis_configured_variable(variables_fixture): 18 | Variable.set("verbosity", "") 19 | 20 | with pytest.raises(RuntimeError) as err: 21 | ValueResolver.get_value("verbosity") 22 | 23 | assert str(err.value) == "Variable `verbosity` exists, but " \ 24 | "is mis-configured." 25 | 26 | 27 | def test_not_existing_variable(): 28 | with pytest.raises(RuntimeError) as err: 29 | ValueResolver.get_value("not-existing") 30 | 31 | assert str(err.value) == "Variable `not-existing` doesn't exist." 32 | 33 | 34 | def test_default_value(): 35 | value = ValueResolver.get_value("verbosity", default_value="INFO") 36 | assert value == "INFO" 37 | 38 | 39 | @pytest.mark.parametrize('variables_fixture', [ 40 | ( 41 | { 42 | 'variables': { 43 | 'verbosity': "INFO", 44 | 'stories_verbosity': "DEBUG", 45 | 'test_master_verbosity': "WARNING", 46 | }, 47 | 'variable_to_delete': [ 48 | "verbosity", "stories_verbosity", "test_master_verbosity", 49 | "stories_test_master_verbosity" 50 | ] 51 | } 52 | ) 53 | ], indirect=True) 54 | def test_resolve(variables_fixture): 55 | # dag 56 | value = ValueResolver.get_value("verbosity", dag="test_master") 57 | assert value == "WARNING" 58 | 59 | # dag value has higher priority than company value 60 | value = ValueResolver.get_value("verbosity", "stories", "test_master") 61 | assert value == "WARNING" 62 | 63 | # company value 64 | value = ValueResolver.get_value("verbosity", "stories") 65 | assert value == "DEBUG" 66 | 67 | # company dag value 68 | Variable.set("stories_test_master_verbosity", "ERROR") 69 | value = ValueResolver.get_value("verbosity", "stories", "test_master") 70 | assert value == "ERROR" 71 | 72 | # global value 73 | value = ValueResolver.get_value("verbosity") 74 | assert value == "INFO" 75 | 76 | 77 | @pytest.mark.parametrize('variables_fixture', [ 78 | ( 79 | { 80 | 'variables': { 81 | 'verbosity': "INFO\n", 82 | 'vm_size': "t2.medium ", 83 | }, 84 | 'variable_to_delete': [ 85 | "verbosity", "vm_size", 86 | ] 87 | } 88 | ) 89 | ], indirect=True) 90 | def test_resolve_without_special_chars(variables_fixture): 91 | value = ValueResolver.get_value("verbosity") 92 | assert value == "INFO" 93 | 94 | value = ValueResolver.get_value("vm_size") 95 | assert value == "t2.medium" 96 | 97 | 98 | @pytest.mark.parametrize('variables_fixture', [ 99 | ( 100 | { 101 | 'variables': { 102 | 'boolvalue': "true", 103 | }, 104 | 'variable_to_delete': [ 105 | "boolvalue", 106 | ] 107 | } 108 | ) 109 | ], indirect=True) 110 | def test_boolean_values(variables_fixture): 111 | assert ValueResolver.get_value("boolvalue", default_value=False) is True 112 | 113 | 114 | @pytest.mark.parametrize('variables_fixture', [ 115 | ( 116 | { 117 | 'variables': { 118 | 'intvalue': "-10 ", # space intentional 119 | 'floatvalue': "2.2", 120 | }, 121 | 'variable_to_delete': [ 122 | 'intvalue', 123 | 'floatvalue', 124 | ] 125 | } 126 | ) 127 | ], indirect=True) 128 | def test_numerical_values(variables_fixture): 129 | def resolver(key): 130 | def resolve(default): 131 | return ValueResolver.get_value(key, default_value=default) 132 | return resolve 133 | 134 | resolve = resolver("intvalue") 135 | assert resolve(default=1) == -10 136 | assert resolve(default=1.0) == -10.0 137 | assert resolve(default=None) == "-10" # strips spaces 138 | 139 | resolve = resolver("floatvalue") 140 | assert resolve(default=1) == "2.2" # float strings cannot be casted to int 141 | assert resolve(default=1.0) == 2.2 142 | assert resolve(default=None) == "2.2" 143 | 144 | 145 | @pytest.mark.parametrize(['variables_fixture', 'date_variable'], [ 146 | ( 147 | { 148 | 'variables': {var_name: date_str}, 149 | 'variable_to_delete': [var_name], 150 | }, 151 | { 152 | 'variable': {'key': var_name, 'value': date_str}, 153 | 'should_resolve': should_resolve, 154 | } 155 | ) for var_name, date_str, should_resolve in [ 156 | ('date', '2016-06-24', False), # needs separator char before 'date' 157 | ('start_date', '2016-06-24', True), 158 | ('iso_date', '2016-06-24T12:24:48', True), 159 | ] 160 | ], indirect=['variables_fixture']) 161 | def test_date_values(variables_fixture, date_variable): 162 | value = date_variable['variable']['value'] 163 | resolved = ValueResolver.get_value(date_variable['variable']['key']) 164 | if date_variable['should_resolve']: 165 | if 'T' in value: 166 | assert value == resolved.isoformat().split('.')[0] 167 | else: 168 | assert value == resolved.isoformat().split('T')[0] 169 | else: 170 | assert value == resolved 171 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py33, py34, py35, flake8 3 | 4 | [travis] 5 | python = 6 | 3.5: py35 7 | 8 | [testenv:flake8] 9 | basepython=python 10 | deps=flake8 11 | commands=flake8 airflow_plugins 12 | 13 | [testenv] 14 | setenv = 15 | PYTHONPATH = {toxinidir} 16 | deps = 17 | -r{toxinidir}/requirements_dev.txt 18 | commands = 19 | py.test --cov=airflow_plugins --basetemp={envtmpdir} 20 | -------------------------------------------------------------------------------- /travis_pypi_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Update encrypted deploy password in Travis config file.""" 4 | 5 | 6 | from __future__ import print_function 7 | import base64 8 | import json 9 | import os 10 | from getpass import getpass 11 | import yaml 12 | from cryptography.hazmat.primitives.serialization import load_pem_public_key 13 | from cryptography.hazmat.backends import default_backend 14 | from cryptography.hazmat.primitives.asymmetric.padding import PKCS1v15 15 | 16 | 17 | try: 18 | from urllib import urlopen 19 | except ImportError: 20 | from urllib.request import urlopen 21 | 22 | 23 | GITHUB_REPO = 'michaelkuty/airflow_plugins' 24 | TRAVIS_CONFIG_FILE = os.path.join( 25 | os.path.dirname(os.path.abspath(__file__)), '.travis.yml') 26 | 27 | 28 | def load_key(pubkey): 29 | """Load public RSA key. 30 | 31 | Work around keys with incorrect header/footer format. 32 | 33 | Read more about RSA encryption with cryptography: 34 | https://cryptography.io/latest/hazmat/primitives/asymmetric/rsa/ 35 | """ 36 | try: 37 | return load_pem_public_key(pubkey.encode(), default_backend()) 38 | except ValueError: 39 | # workaround for https://github.com/travis-ci/travis-api/issues/196 40 | pubkey = pubkey.replace('BEGIN RSA', 'BEGIN').replace('END RSA', 'END') 41 | return load_pem_public_key(pubkey.encode(), default_backend()) 42 | 43 | 44 | def encrypt(pubkey, password): 45 | """Encrypt password using given RSA public key and encode it with base64. 46 | 47 | The encrypted password can only be decrypted by someone with the 48 | private key (in this case, only Travis). 49 | """ 50 | key = load_key(pubkey) 51 | encrypted_password = key.encrypt(password, PKCS1v15()) 52 | return base64.b64encode(encrypted_password) 53 | 54 | 55 | def fetch_public_key(repo): 56 | """Download RSA public key Travis will use for this repo. 57 | 58 | Travis API docs: http://docs.travis-ci.com/api/#repository-keys 59 | """ 60 | keyurl = 'https://api.travis-ci.org/repos/{0}/key'.format(repo) 61 | data = json.loads(urlopen(keyurl).read().decode()) 62 | if 'key' not in data: 63 | errmsg = "Could not find public key for repo: {}.\n".format(repo) 64 | errmsg += "Have you already added your GitHub repo to Travis?" 65 | raise ValueError(errmsg) 66 | return data['key'] 67 | 68 | 69 | def prepend_line(filepath, line): 70 | """Rewrite a file adding a line to its beginning.""" 71 | with open(filepath) as f: 72 | lines = f.readlines() 73 | 74 | lines.insert(0, line) 75 | 76 | with open(filepath, 'w') as f: 77 | f.writelines(lines) 78 | 79 | 80 | def load_yaml_config(filepath): 81 | """Load yaml config file at the given path.""" 82 | with open(filepath) as f: 83 | return yaml.load(f) 84 | 85 | 86 | def save_yaml_config(filepath, config): 87 | """Save yaml config file at the given path.""" 88 | with open(filepath, 'w') as f: 89 | yaml.dump(config, f, default_flow_style=False) 90 | 91 | 92 | def update_travis_deploy_password(encrypted_password): 93 | """Put `encrypted_password` into the deploy section of .travis.yml.""" 94 | config = load_yaml_config(TRAVIS_CONFIG_FILE) 95 | 96 | config['deploy']['password'] = dict(secure=encrypted_password) 97 | 98 | save_yaml_config(TRAVIS_CONFIG_FILE, config) 99 | 100 | line = ('# This file was autogenerated and will overwrite' 101 | ' each time you run travis_pypi_setup.py\n') 102 | prepend_line(TRAVIS_CONFIG_FILE, line) 103 | 104 | 105 | def main(args): 106 | """Add a PyPI password to .travis.yml so that Travis can deploy to PyPI. 107 | 108 | Fetch the Travis public key for the repo, and encrypt the PyPI password 109 | with it before adding, so that only Travis can decrypt and use the PyPI 110 | password. 111 | """ 112 | public_key = fetch_public_key(args.repo) 113 | password = args.password or getpass('PyPI password: ') 114 | update_travis_deploy_password(encrypt(public_key, password.encode())) 115 | print("Wrote encrypted password to .travis.yml -- you're ready to deploy") 116 | 117 | 118 | if '__main__' == __name__: 119 | import argparse 120 | parser = argparse.ArgumentParser(description=__doc__) 121 | parser.add_argument('--repo', default=GITHUB_REPO, 122 | help='GitHub repo (default: %s)' % GITHUB_REPO) 123 | parser.add_argument('--password', 124 | help='PyPI password (will prompt if not provided)') 125 | 126 | args = parser.parse_args() 127 | main(args) 128 | --------------------------------------------------------------------------------