├── .gitignore ├── LICENSE ├── README.md ├── dags ├── book_data.py ├── daily_processing.py └── diamond.py └── plugins ├── __init__.py ├── blueprints ├── __init__.py └── trigger_dag.py └── operators ├── __init__.py ├── book_data.py ├── decide.py └── predict.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Example Plugin for airflow, presented @ Europython 2016 2 | for documentation, see 3 | https://github.com/blue-yonder/documents/blob/master/presentations/EuroPython%202016/get_in_control_of_your_workflows_with_airflow/EuropythonAirflow.ipynb 4 | 5 | This plugin contains some examples for workflows, operators and blueprints. 6 | 7 | To use this examples from this repository, first follow the installation 8 | instructions from airflow. When you have set up airflow, you can enter this 9 | plugin in your airflow.cfg file: 10 | - enter the path to the plugin directory in the corresponding airflow.cfg 11 | section 12 | - enter the path to the dag directory in the corresponding airflow.cfg 13 | section 14 | 15 | That's all! Now start your airflow webserver and scheduler and view your 16 | workflows. 17 | -------------------------------------------------------------------------------- /dags/book_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Workflow definition to book data 3 | """ 4 | 5 | from __future__ import division, absolute_import, print_function 6 | 7 | from datetime import datetime, timedelta 8 | 9 | from airflow import DAG 10 | from airflow.operators import ( 11 | BookData 12 | ) 13 | 14 | dag_id = "book_data" 15 | schedule_interval = None 16 | 17 | default_args = { 18 | 'owner': 'europython', 19 | 'depends_on_past': False, 20 | 'email': ['airflow@europython'], 21 | 'email_on_failure': False, 22 | 'email_on_retry': False, 23 | 'retries': 0, 24 | 'retry_delay': timedelta(seconds=30) 25 | } 26 | 27 | dag = DAG( 28 | dag_id, 29 | start_date=datetime(2016, 12, 7), 30 | schedule_interval=schedule_interval, 31 | default_args=default_args) 32 | 33 | book = BookData(dag=dag) 34 | -------------------------------------------------------------------------------- /dags/daily_processing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Workflow definition for daily processing 3 | """ 4 | 5 | from __future__ import division, absolute_import, print_function 6 | 7 | from datetime import datetime, timedelta 8 | 9 | from airflow import DAG 10 | from airflow.operators import ( 11 | BookData, 12 | Predict, 13 | Decide 14 | ) 15 | 16 | dag_id = "daily_processing" 17 | schedule_interval = '0 22 * * *' 18 | 19 | default_args = { 20 | 'owner': 'europython', 21 | 'depends_on_past': False, 22 | 'email': ['airflow@europython'], 23 | 'email_on_failure': False, 24 | 'email_on_retry': False, 25 | 'retries': 2, 26 | 'retry_delay': timedelta(minutes=5) 27 | } 28 | 29 | dag = DAG( 30 | dag_id, 31 | start_date=datetime(2016, 7, 15, 22, 0), 32 | schedule_interval=schedule_interval, 33 | default_args=default_args) 34 | 35 | book = BookData(dag=dag) 36 | 37 | predict = Predict(dag=dag) 38 | predict.set_upstream(book) 39 | 40 | decide = Decide(dag=dag) 41 | decide.set_upstream(predict) 42 | -------------------------------------------------------------------------------- /dags/diamond.py: -------------------------------------------------------------------------------- 1 | """ 2 | Workflow definition for daily processing 3 | """ 4 | 5 | from __future__ import division, absolute_import, print_function 6 | 7 | from datetime import datetime, timedelta 8 | 9 | from airflow import DAG 10 | from airflow.operators import ( 11 | BookData, 12 | Predict, 13 | Decide 14 | ) 15 | 16 | dag_id = "diamond" 17 | schedule_interval = None 18 | 19 | default_args = { 20 | 'owner': 'europython', 21 | 'depends_on_past': False, 22 | 'email': ['airflow@europython'], 23 | 'email_on_failure': False, 24 | 'email_on_retry': False, 25 | 'retries': 2, 26 | 'retry_delay': timedelta(minutes=5) 27 | } 28 | 29 | dag = DAG( 30 | dag_id, 31 | start_date=datetime(2016, 12, 7), 32 | schedule_interval=schedule_interval, 33 | default_args=default_args) 34 | 35 | book = BookData(dag=dag) 36 | 37 | predict_ger = Predict(dag=dag, country='GER') 38 | predict_ger.set_upstream(book) 39 | 40 | predict_uk = Predict(dag=dag, country='UK') 41 | predict_uk.set_upstream(book) 42 | 43 | decide = Decide(dag=dag) 44 | decide.set_upstream(predict_ger) 45 | decide.set_upstream(predict_uk) 46 | -------------------------------------------------------------------------------- /plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | 3 | from airflow.plugins_manager import AirflowPlugin 4 | 5 | from plugins import blueprints 6 | from plugins import operators 7 | 8 | 9 | # Defining the plugin class 10 | class EuropythonPlugin(AirflowPlugin): 11 | name = "europython_plugin" 12 | operators = [ 13 | operators.BookData, 14 | operators.Predict, 15 | operators.Decide 16 | ] 17 | flask_blueprints = [blueprints.TriggerBlueprint] 18 | -------------------------------------------------------------------------------- /plugins/blueprints/__init__.py: -------------------------------------------------------------------------------- 1 | from plugins.blueprints.trigger_dag import TriggerBlueprint 2 | -------------------------------------------------------------------------------- /plugins/blueprints/trigger_dag.py: -------------------------------------------------------------------------------- 1 | import httplib 2 | from datetime import datetime 3 | from flask import Blueprint, request, jsonify, Response 4 | from sqlalchemy import and_ 5 | from sqlalchemy.orm.exc import NoResultFound 6 | 7 | from airflow import settings 8 | from airflow.models import DagRun, DagModel 9 | from airflow.www.app import csrf 10 | from airflow.utils.state import State 11 | 12 | 13 | TriggerBlueprint = Blueprint('trigger', __name__, url_prefix='/trigger') 14 | """ 15 | Represents a blueprint to trigger DAGs. 16 | """ 17 | 18 | 19 | def check_dag_exists(session, dag_id): 20 | """ 21 | if returns an error response, if it doesn't exist 22 | """ 23 | dag_exists = session.query(DagModel).filter(DagModel.dag_id == dag_id).count() 24 | if not dag_exists: 25 | return Response('Dag {} does not exist'.format(dag_id), httplib.BAD_REQUEST) 26 | 27 | return None 28 | 29 | 30 | @TriggerBlueprint.route('/', methods=['POST']) 31 | @csrf.exempt 32 | def trigger_dag(dag_id): 33 | """ 34 | .. http:post:: /trigger// 35 | 36 | Triggers a defined DAG. The data must be send in json format with 37 | a key "run_id" and the value a string of your choice. Passing the data 38 | is optional. If no data is passed the run_id will be automatically 39 | be generated with a timestamp and looks like 40 | "external_trigger_2016-01-19T02:01:49.703365". 41 | 42 | **Example request**: 43 | 44 | .. sourcecode:: http 45 | 46 | POST /trigger/make_fit 47 | Host: localhost:7357 48 | Content-Type: application/json 49 | 50 | { 51 | "run_id": "my_special_run" 52 | } 53 | 54 | **Example response**: 55 | 56 | .. sourcecode:: http 57 | 58 | HTTP/1.1 200 OK 59 | Vary: Accept 60 | Content-Type: application/json 61 | 62 | { 63 | "dag_id": "daily_processing", 64 | "run_id": "my_special_run" 65 | } 66 | """ 67 | session = settings.Session() 68 | 69 | error_response = check_dag_exists(session, dag_id) 70 | if error_response: 71 | return error_response 72 | 73 | execution_date = datetime.now() 74 | 75 | run_id = None 76 | json_params = request.get_json() 77 | if json_params and 'run_id' in json_params: 78 | run_id = json_params['run_id'] 79 | if not run_id: 80 | run_id = 'external_trigger_' + execution_date.isoformat() 81 | 82 | trigger = DagRun( 83 | dag_id=dag_id, 84 | run_id=run_id, 85 | state=State.RUNNING, 86 | execution_date=execution_date, 87 | external_trigger=True) 88 | session.add(trigger) 89 | session.commit() 90 | 91 | return jsonify(dag_id=dag_id, run_id=run_id) 92 | 93 | 94 | @TriggerBlueprint.route('/', methods=['GET']) 95 | @csrf.exempt 96 | def get_dag_runs(dag_id): 97 | """ 98 | .. http:get:: /trigger/ 99 | 100 | Get the run_ids for a dag_id, ordered by execution date 101 | 102 | **Example request**: 103 | 104 | .. sourcecode:: http 105 | 106 | GET /trigger/make_fit 107 | Host: localhost:7357 108 | 109 | **Example response**: 110 | 111 | .. sourcecode:: http 112 | 113 | HTTP/1.1 200 OK 114 | Content-Type: application/json 115 | 116 | { 117 | "dag_id": "daily_processing", 118 | "run_ids": ["my_special_run", "normal_run_17"] 119 | } 120 | """ 121 | session = settings.Session() 122 | 123 | error_response = check_dag_exists(session, dag_id) 124 | if error_response: 125 | return error_response 126 | 127 | dag_runs = session.query(DagRun).filter(DagRun.dag_id == dag_id).order_by(DagRun.execution_date).all() 128 | run_ids = [dag_run.run_id for dag_run in dag_runs] 129 | 130 | return jsonify(dag_id=dag_id, run_ids=run_ids) 131 | 132 | 133 | @TriggerBlueprint.route('//', methods=['GET']) 134 | @csrf.exempt 135 | def dag_run_status(dag_id, run_id): 136 | """ 137 | .. http:get:: /trigger// 138 | 139 | Gets the status of a dag run. 140 | Possible states are: running, success, failed 141 | 142 | **Example request**: 143 | 144 | .. sourcecode:: http 145 | 146 | GET /trigger/make_fit/my_special_run 147 | Host: localhost:7357 148 | 149 | **Example response**: 150 | 151 | .. sourcecode:: http 152 | 153 | HTTP/1.1 200 OK 154 | Content-Type: application/json 155 | 156 | { 157 | "dag_id": "daily_processing", 158 | "run_id": "my_special_run", 159 | "state": "running", 160 | "execution_date": "2016-06-27T15:32:57" 161 | } 162 | """ 163 | session = settings.Session() 164 | 165 | error_response = check_dag_exists(session, dag_id) 166 | if error_response: 167 | return error_response 168 | 169 | try: 170 | dag_run = session.query(DagRun).filter(and_(DagRun.dag_id == dag_id, DagRun.run_id == run_id)).one() 171 | except NoResultFound: 172 | return Response('RunId {} does not exist for Dag {}'.format(run_id, dag_id), httplib.BAD_REQUEST) 173 | 174 | time_format = "%Y-%m-%dT%H:%M:%S" 175 | return jsonify( 176 | dag_id=dag_id, 177 | run_id=run_id, 178 | state=dag_run.state, 179 | execution_date=dag_run.execution_date.strftime(time_format) 180 | ) 181 | -------------------------------------------------------------------------------- /plugins/operators/__init__.py: -------------------------------------------------------------------------------- 1 | from plugins.operators.book_data import BookData 2 | from plugins.operators.predict import Predict 3 | from plugins.operators.decide import Decide 4 | 5 | __all__ = [ 6 | 'BookData', 7 | 'Predict', 8 | 'Decide' 9 | ] 10 | -------------------------------------------------------------------------------- /plugins/operators/book_data.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | 4 | from airflow import models 5 | from airflow import utils as airflow_utils 6 | 7 | 8 | class BookData(models.BaseOperator): 9 | @airflow_utils.apply_defaults 10 | def __init__(self, **kwargs): 11 | super(BookData, self).__init__( 12 | task_id='book_data', 13 | **kwargs) 14 | 15 | def execute(self, context): 16 | waiting_time = 2 + random.random() * 2 17 | time.sleep(waiting_time) 18 | -------------------------------------------------------------------------------- /plugins/operators/decide.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | 4 | from airflow import models 5 | from airflow import utils as airflow_utils 6 | from airflow import hooks 7 | 8 | 9 | class Decide(models.BaseOperator): 10 | @airflow_utils.apply_defaults 11 | def __init__(self, **kwargs): 12 | super(Decide, self).__init__( 13 | task_id='decide', 14 | **kwargs) 15 | self.http_conn_id = 'DECISION_SERVER' 16 | self.endpoint_job_start = 'decide/' 17 | self.endpoint_job_status = 'job_status/' 18 | 19 | def execute(self, context): 20 | http = hooks.HttpHook(method='POST', http_conn_id=self.http_conn_id) 21 | response = http.run(endpoint=self.endpoint_job_start) 22 | job_id = response.json()['job_id'] 23 | logging.info('started decision job with job id {}'.format(job_id)) 24 | self.wait_for_job(job_id) 25 | 26 | def wait_for_job(self, job_id): 27 | job_status = None 28 | http = hooks.HttpHook(method='GET', http_conn_id=self.http_conn_id) 29 | while not job_status == 'FINISHED': 30 | time.sleep(1) 31 | response = http.run(endpoint=self.endpoint_job_status + str(job_id)) 32 | job_status = response.json()['status'] 33 | logging.info('status of decision job {} is {}'.format(job_id, job_status)) 34 | -------------------------------------------------------------------------------- /plugins/operators/predict.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | 4 | from airflow import models 5 | from airflow import utils as airflow_utils 6 | 7 | 8 | class Predict(models.BaseOperator): 9 | @airflow_utils.apply_defaults 10 | def __init__(self, country=None, **kwargs): 11 | task_id = 'predict' 12 | if country: 13 | task_id = task_id + '_' + country 14 | super(Predict, self).__init__( 15 | task_id=task_id, 16 | **kwargs) 17 | 18 | def execute(self, context): 19 | waiting_time = 3 + random.random() * 3 20 | time.sleep(waiting_time) 21 | --------------------------------------------------------------------------------