├── .gitignore
├── LICENSE
├── README.md
├── dags
├── book_data.py
├── daily_processing.py
└── diamond.py
└── plugins
├── __init__.py
├── blueprints
├── __init__.py
└── trigger_dag.py
└── operators
├── __init__.py
├── book_data.py
├── decide.py
└── predict.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # Spyder project settings
86 | .spyderproject
87 |
88 | # Rope project settings
89 | .ropeproject
90 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | CC0 1.0 Universal
2 |
3 | Statement of Purpose
4 |
5 | The laws of most jurisdictions throughout the world automatically confer
6 | exclusive Copyright and Related Rights (defined below) upon the creator and
7 | subsequent owner(s) (each and all, an "owner") of an original work of
8 | authorship and/or a database (each, a "Work").
9 |
10 | Certain owners wish to permanently relinquish those rights to a Work for the
11 | purpose of contributing to a commons of creative, cultural and scientific
12 | works ("Commons") that the public can reliably and without fear of later
13 | claims of infringement build upon, modify, incorporate in other works, reuse
14 | and redistribute as freely as possible in any form whatsoever and for any
15 | purposes, including without limitation commercial purposes. These owners may
16 | contribute to the Commons to promote the ideal of a free culture and the
17 | further production of creative, cultural and scientific works, or to gain
18 | reputation or greater distribution for their Work in part through the use and
19 | efforts of others.
20 |
21 | For these and/or other purposes and motivations, and without any expectation
22 | of additional consideration or compensation, the person associating CC0 with a
23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
25 | and publicly distribute the Work under its terms, with knowledge of his or her
26 | Copyright and Related Rights in the Work and the meaning and intended legal
27 | effect of CC0 on those rights.
28 |
29 | 1. Copyright and Related Rights. A Work made available under CC0 may be
30 | protected by copyright and related or neighboring rights ("Copyright and
31 | Related Rights"). Copyright and Related Rights include, but are not limited
32 | to, the following:
33 |
34 | i. the right to reproduce, adapt, distribute, perform, display, communicate,
35 | and translate a Work;
36 |
37 | ii. moral rights retained by the original author(s) and/or performer(s);
38 |
39 | iii. publicity and privacy rights pertaining to a person's image or likeness
40 | depicted in a Work;
41 |
42 | iv. rights protecting against unfair competition in regards to a Work,
43 | subject to the limitations in paragraph 4(a), below;
44 |
45 | v. rights protecting the extraction, dissemination, use and reuse of data in
46 | a Work;
47 |
48 | vi. database rights (such as those arising under Directive 96/9/EC of the
49 | European Parliament and of the Council of 11 March 1996 on the legal
50 | protection of databases, and under any national implementation thereof,
51 | including any amended or successor version of such directive); and
52 |
53 | vii. other similar, equivalent or corresponding rights throughout the world
54 | based on applicable law or treaty, and any national implementations thereof.
55 |
56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of,
57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
59 | and Related Rights and associated claims and causes of action, whether now
60 | known or unknown (including existing as well as future claims and causes of
61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum
62 | duration provided by applicable law or treaty (including future time
63 | extensions), (iii) in any current or future medium and for any number of
64 | copies, and (iv) for any purpose whatsoever, including without limitation
65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
66 | the Waiver for the benefit of each member of the public at large and to the
67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver
68 | shall not be subject to revocation, rescission, cancellation, termination, or
69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work
70 | by the public as contemplated by Affirmer's express Statement of Purpose.
71 |
72 | 3. Public License Fallback. Should any part of the Waiver for any reason be
73 | judged legally invalid or ineffective under applicable law, then the Waiver
74 | shall be preserved to the maximum extent permitted taking into account
75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
76 | is so judged Affirmer hereby grants to each affected person a royalty-free,
77 | non transferable, non sublicensable, non exclusive, irrevocable and
78 | unconditional license to exercise Affirmer's Copyright and Related Rights in
79 | the Work (i) in all territories worldwide, (ii) for the maximum duration
80 | provided by applicable law or treaty (including future time extensions), (iii)
81 | in any current or future medium and for any number of copies, and (iv) for any
82 | purpose whatsoever, including without limitation commercial, advertising or
83 | promotional purposes (the "License"). The License shall be deemed effective as
84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the
85 | License for any reason be judged legally invalid or ineffective under
86 | applicable law, such partial invalidity or ineffectiveness shall not
87 | invalidate the remainder of the License, and in such case Affirmer hereby
88 | affirms that he or she will not (i) exercise any of his or her remaining
89 | Copyright and Related Rights in the Work or (ii) assert any associated claims
90 | and causes of action with respect to the Work, in either case contrary to
91 | Affirmer's express Statement of Purpose.
92 |
93 | 4. Limitations and Disclaimers.
94 |
95 | a. No trademark or patent rights held by Affirmer are waived, abandoned,
96 | surrendered, licensed or otherwise affected by this document.
97 |
98 | b. Affirmer offers the Work as-is and makes no representations or warranties
99 | of any kind concerning the Work, express, implied, statutory or otherwise,
100 | including without limitation warranties of title, merchantability, fitness
101 | for a particular purpose, non infringement, or the absence of latent or
102 | other defects, accuracy, or the present or absence of errors, whether or not
103 | discoverable, all to the greatest extent permissible under applicable law.
104 |
105 | c. Affirmer disclaims responsibility for clearing rights of other persons
106 | that may apply to the Work or any use thereof, including without limitation
107 | any person's Copyright and Related Rights in the Work. Further, Affirmer
108 | disclaims responsibility for obtaining any necessary consents, permissions
109 | or other rights required for any use of the Work.
110 |
111 | d. Affirmer understands and acknowledges that Creative Commons is not a
112 | party to this document and has no duty or obligation with respect to this
113 | CC0 or use of the Work.
114 |
115 | For more information, please see
116 |
117 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Example Plugin for airflow, presented @ Europython 2016
2 | for documentation, see
3 | https://github.com/blue-yonder/documents/blob/master/presentations/EuroPython%202016/get_in_control_of_your_workflows_with_airflow/EuropythonAirflow.ipynb
4 |
5 | This plugin contains some examples for workflows, operators and blueprints.
6 |
7 | To use this examples from this repository, first follow the installation
8 | instructions from airflow. When you have set up airflow, you can enter this
9 | plugin in your airflow.cfg file:
10 | - enter the path to the plugin directory in the corresponding airflow.cfg
11 | section
12 | - enter the path to the dag directory in the corresponding airflow.cfg
13 | section
14 |
15 | That's all! Now start your airflow webserver and scheduler and view your
16 | workflows.
17 |
--------------------------------------------------------------------------------
/dags/book_data.py:
--------------------------------------------------------------------------------
1 | """
2 | Workflow definition to book data
3 | """
4 |
5 | from __future__ import division, absolute_import, print_function
6 |
7 | from datetime import datetime, timedelta
8 |
9 | from airflow import DAG
10 | from airflow.operators import (
11 | BookData
12 | )
13 |
14 | dag_id = "book_data"
15 | schedule_interval = None
16 |
17 | default_args = {
18 | 'owner': 'europython',
19 | 'depends_on_past': False,
20 | 'email': ['airflow@europython'],
21 | 'email_on_failure': False,
22 | 'email_on_retry': False,
23 | 'retries': 0,
24 | 'retry_delay': timedelta(seconds=30)
25 | }
26 |
27 | dag = DAG(
28 | dag_id,
29 | start_date=datetime(2016, 12, 7),
30 | schedule_interval=schedule_interval,
31 | default_args=default_args)
32 |
33 | book = BookData(dag=dag)
34 |
--------------------------------------------------------------------------------
/dags/daily_processing.py:
--------------------------------------------------------------------------------
1 | """
2 | Workflow definition for daily processing
3 | """
4 |
5 | from __future__ import division, absolute_import, print_function
6 |
7 | from datetime import datetime, timedelta
8 |
9 | from airflow import DAG
10 | from airflow.operators import (
11 | BookData,
12 | Predict,
13 | Decide
14 | )
15 |
16 | dag_id = "daily_processing"
17 | schedule_interval = '0 22 * * *'
18 |
19 | default_args = {
20 | 'owner': 'europython',
21 | 'depends_on_past': False,
22 | 'email': ['airflow@europython'],
23 | 'email_on_failure': False,
24 | 'email_on_retry': False,
25 | 'retries': 2,
26 | 'retry_delay': timedelta(minutes=5)
27 | }
28 |
29 | dag = DAG(
30 | dag_id,
31 | start_date=datetime(2016, 7, 15, 22, 0),
32 | schedule_interval=schedule_interval,
33 | default_args=default_args)
34 |
35 | book = BookData(dag=dag)
36 |
37 | predict = Predict(dag=dag)
38 | predict.set_upstream(book)
39 |
40 | decide = Decide(dag=dag)
41 | decide.set_upstream(predict)
42 |
--------------------------------------------------------------------------------
/dags/diamond.py:
--------------------------------------------------------------------------------
1 | """
2 | Workflow definition for daily processing
3 | """
4 |
5 | from __future__ import division, absolute_import, print_function
6 |
7 | from datetime import datetime, timedelta
8 |
9 | from airflow import DAG
10 | from airflow.operators import (
11 | BookData,
12 | Predict,
13 | Decide
14 | )
15 |
16 | dag_id = "diamond"
17 | schedule_interval = None
18 |
19 | default_args = {
20 | 'owner': 'europython',
21 | 'depends_on_past': False,
22 | 'email': ['airflow@europython'],
23 | 'email_on_failure': False,
24 | 'email_on_retry': False,
25 | 'retries': 2,
26 | 'retry_delay': timedelta(minutes=5)
27 | }
28 |
29 | dag = DAG(
30 | dag_id,
31 | start_date=datetime(2016, 12, 7),
32 | schedule_interval=schedule_interval,
33 | default_args=default_args)
34 |
35 | book = BookData(dag=dag)
36 |
37 | predict_ger = Predict(dag=dag, country='GER')
38 | predict_ger.set_upstream(book)
39 |
40 | predict_uk = Predict(dag=dag, country='UK')
41 | predict_uk.set_upstream(book)
42 |
43 | decide = Decide(dag=dag)
44 | decide.set_upstream(predict_ger)
45 | decide.set_upstream(predict_uk)
46 |
--------------------------------------------------------------------------------
/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, absolute_import, print_function
2 |
3 | from airflow.plugins_manager import AirflowPlugin
4 |
5 | from plugins import blueprints
6 | from plugins import operators
7 |
8 |
9 | # Defining the plugin class
10 | class EuropythonPlugin(AirflowPlugin):
11 | name = "europython_plugin"
12 | operators = [
13 | operators.BookData,
14 | operators.Predict,
15 | operators.Decide
16 | ]
17 | flask_blueprints = [blueprints.TriggerBlueprint]
18 |
--------------------------------------------------------------------------------
/plugins/blueprints/__init__.py:
--------------------------------------------------------------------------------
1 | from plugins.blueprints.trigger_dag import TriggerBlueprint
2 |
--------------------------------------------------------------------------------
/plugins/blueprints/trigger_dag.py:
--------------------------------------------------------------------------------
1 | import httplib
2 | from datetime import datetime
3 | from flask import Blueprint, request, jsonify, Response
4 | from sqlalchemy import and_
5 | from sqlalchemy.orm.exc import NoResultFound
6 |
7 | from airflow import settings
8 | from airflow.models import DagRun, DagModel
9 | from airflow.www.app import csrf
10 | from airflow.utils.state import State
11 |
12 |
13 | TriggerBlueprint = Blueprint('trigger', __name__, url_prefix='/trigger')
14 | """
15 | Represents a blueprint to trigger DAGs.
16 | """
17 |
18 |
19 | def check_dag_exists(session, dag_id):
20 | """
21 | if returns an error response, if it doesn't exist
22 | """
23 | dag_exists = session.query(DagModel).filter(DagModel.dag_id == dag_id).count()
24 | if not dag_exists:
25 | return Response('Dag {} does not exist'.format(dag_id), httplib.BAD_REQUEST)
26 |
27 | return None
28 |
29 |
30 | @TriggerBlueprint.route('/', methods=['POST'])
31 | @csrf.exempt
32 | def trigger_dag(dag_id):
33 | """
34 | .. http:post:: /trigger//
35 |
36 | Triggers a defined DAG. The data must be send in json format with
37 | a key "run_id" and the value a string of your choice. Passing the data
38 | is optional. If no data is passed the run_id will be automatically
39 | be generated with a timestamp and looks like
40 | "external_trigger_2016-01-19T02:01:49.703365".
41 |
42 | **Example request**:
43 |
44 | .. sourcecode:: http
45 |
46 | POST /trigger/make_fit
47 | Host: localhost:7357
48 | Content-Type: application/json
49 |
50 | {
51 | "run_id": "my_special_run"
52 | }
53 |
54 | **Example response**:
55 |
56 | .. sourcecode:: http
57 |
58 | HTTP/1.1 200 OK
59 | Vary: Accept
60 | Content-Type: application/json
61 |
62 | {
63 | "dag_id": "daily_processing",
64 | "run_id": "my_special_run"
65 | }
66 | """
67 | session = settings.Session()
68 |
69 | error_response = check_dag_exists(session, dag_id)
70 | if error_response:
71 | return error_response
72 |
73 | execution_date = datetime.now()
74 |
75 | run_id = None
76 | json_params = request.get_json()
77 | if json_params and 'run_id' in json_params:
78 | run_id = json_params['run_id']
79 | if not run_id:
80 | run_id = 'external_trigger_' + execution_date.isoformat()
81 |
82 | trigger = DagRun(
83 | dag_id=dag_id,
84 | run_id=run_id,
85 | state=State.RUNNING,
86 | execution_date=execution_date,
87 | external_trigger=True)
88 | session.add(trigger)
89 | session.commit()
90 |
91 | return jsonify(dag_id=dag_id, run_id=run_id)
92 |
93 |
94 | @TriggerBlueprint.route('/', methods=['GET'])
95 | @csrf.exempt
96 | def get_dag_runs(dag_id):
97 | """
98 | .. http:get:: /trigger/
99 |
100 | Get the run_ids for a dag_id, ordered by execution date
101 |
102 | **Example request**:
103 |
104 | .. sourcecode:: http
105 |
106 | GET /trigger/make_fit
107 | Host: localhost:7357
108 |
109 | **Example response**:
110 |
111 | .. sourcecode:: http
112 |
113 | HTTP/1.1 200 OK
114 | Content-Type: application/json
115 |
116 | {
117 | "dag_id": "daily_processing",
118 | "run_ids": ["my_special_run", "normal_run_17"]
119 | }
120 | """
121 | session = settings.Session()
122 |
123 | error_response = check_dag_exists(session, dag_id)
124 | if error_response:
125 | return error_response
126 |
127 | dag_runs = session.query(DagRun).filter(DagRun.dag_id == dag_id).order_by(DagRun.execution_date).all()
128 | run_ids = [dag_run.run_id for dag_run in dag_runs]
129 |
130 | return jsonify(dag_id=dag_id, run_ids=run_ids)
131 |
132 |
133 | @TriggerBlueprint.route('//', methods=['GET'])
134 | @csrf.exempt
135 | def dag_run_status(dag_id, run_id):
136 | """
137 | .. http:get:: /trigger//
138 |
139 | Gets the status of a dag run.
140 | Possible states are: running, success, failed
141 |
142 | **Example request**:
143 |
144 | .. sourcecode:: http
145 |
146 | GET /trigger/make_fit/my_special_run
147 | Host: localhost:7357
148 |
149 | **Example response**:
150 |
151 | .. sourcecode:: http
152 |
153 | HTTP/1.1 200 OK
154 | Content-Type: application/json
155 |
156 | {
157 | "dag_id": "daily_processing",
158 | "run_id": "my_special_run",
159 | "state": "running",
160 | "execution_date": "2016-06-27T15:32:57"
161 | }
162 | """
163 | session = settings.Session()
164 |
165 | error_response = check_dag_exists(session, dag_id)
166 | if error_response:
167 | return error_response
168 |
169 | try:
170 | dag_run = session.query(DagRun).filter(and_(DagRun.dag_id == dag_id, DagRun.run_id == run_id)).one()
171 | except NoResultFound:
172 | return Response('RunId {} does not exist for Dag {}'.format(run_id, dag_id), httplib.BAD_REQUEST)
173 |
174 | time_format = "%Y-%m-%dT%H:%M:%S"
175 | return jsonify(
176 | dag_id=dag_id,
177 | run_id=run_id,
178 | state=dag_run.state,
179 | execution_date=dag_run.execution_date.strftime(time_format)
180 | )
181 |
--------------------------------------------------------------------------------
/plugins/operators/__init__.py:
--------------------------------------------------------------------------------
1 | from plugins.operators.book_data import BookData
2 | from plugins.operators.predict import Predict
3 | from plugins.operators.decide import Decide
4 |
5 | __all__ = [
6 | 'BookData',
7 | 'Predict',
8 | 'Decide'
9 | ]
10 |
--------------------------------------------------------------------------------
/plugins/operators/book_data.py:
--------------------------------------------------------------------------------
1 | import time
2 | import random
3 |
4 | from airflow import models
5 | from airflow import utils as airflow_utils
6 |
7 |
8 | class BookData(models.BaseOperator):
9 | @airflow_utils.apply_defaults
10 | def __init__(self, **kwargs):
11 | super(BookData, self).__init__(
12 | task_id='book_data',
13 | **kwargs)
14 |
15 | def execute(self, context):
16 | waiting_time = 2 + random.random() * 2
17 | time.sleep(waiting_time)
18 |
--------------------------------------------------------------------------------
/plugins/operators/decide.py:
--------------------------------------------------------------------------------
1 | import time
2 | import logging
3 |
4 | from airflow import models
5 | from airflow import utils as airflow_utils
6 | from airflow import hooks
7 |
8 |
9 | class Decide(models.BaseOperator):
10 | @airflow_utils.apply_defaults
11 | def __init__(self, **kwargs):
12 | super(Decide, self).__init__(
13 | task_id='decide',
14 | **kwargs)
15 | self.http_conn_id = 'DECISION_SERVER'
16 | self.endpoint_job_start = 'decide/'
17 | self.endpoint_job_status = 'job_status/'
18 |
19 | def execute(self, context):
20 | http = hooks.HttpHook(method='POST', http_conn_id=self.http_conn_id)
21 | response = http.run(endpoint=self.endpoint_job_start)
22 | job_id = response.json()['job_id']
23 | logging.info('started decision job with job id {}'.format(job_id))
24 | self.wait_for_job(job_id)
25 |
26 | def wait_for_job(self, job_id):
27 | job_status = None
28 | http = hooks.HttpHook(method='GET', http_conn_id=self.http_conn_id)
29 | while not job_status == 'FINISHED':
30 | time.sleep(1)
31 | response = http.run(endpoint=self.endpoint_job_status + str(job_id))
32 | job_status = response.json()['status']
33 | logging.info('status of decision job {} is {}'.format(job_id, job_status))
34 |
--------------------------------------------------------------------------------
/plugins/operators/predict.py:
--------------------------------------------------------------------------------
1 | import time
2 | import random
3 |
4 | from airflow import models
5 | from airflow import utils as airflow_utils
6 |
7 |
8 | class Predict(models.BaseOperator):
9 | @airflow_utils.apply_defaults
10 | def __init__(self, country=None, **kwargs):
11 | task_id = 'predict'
12 | if country:
13 | task_id = task_id + '_' + country
14 | super(Predict, self).__init__(
15 | task_id=task_id,
16 | **kwargs)
17 |
18 | def execute(self, context):
19 | waiting_time = 3 + random.random() * 3
20 | time.sleep(waiting_time)
21 |
--------------------------------------------------------------------------------