├── src
├── unittest
│ ├── python
│ │ ├── dags
│ │ │ ├── __init__.py
│ │ │ ├── helloworld_dag_tests.py
│ │ │ └── helloworld_xcoms_tests.py
│ │ ├── plugins
│ │ │ ├── __init__.py
│ │ │ ├── multiplyby5_operator_tests.py
│ │ │ └── helloworld_sensor_tests.py
│ │ ├── resources
│ │ │ ├── variables.json
│ │ │ └── connections.sh
│ │ ├── .DS_Store
│ │ └── dag_integrity_tests.py
│ └── .DS_Store
├── .DS_Store
├── integrationtest
│ └── python
│ │ ├── constants.py
│ │ ├── db_util.py
│ │ ├── dags
│ │ ├── hello_world_tests.py
│ │ └── presto_to_mysql_tests.py
│ │ └── airflow_api.py
└── main
│ ├── .DS_Store
│ └── python
│ ├── .DS_Store
│ ├── plugins
│ ├── multiplyby5_operator.py
│ ├── helloworld_sensor.py
│ ├── templates
│ │ └── rest_api_plugin
│ │ │ └── index.html
│ └── rest_api_plugin.py
│ └── dags
│ ├── presto_to_mysql.py
│ ├── hello_world.py
│ └── helloworld_xcoms.py
├── .DS_Store
├── how_minikube_work.png
├── start_airflow.sh
├── .gitignore
├── requirements.txt
├── script
└── entrypoint.sh
├── k8s
├── mysql
│ └── mysql.kube.yaml
└── presto
│ └── presto.kube.yaml
├── Dockerfile
├── README.md
└── airflow.kube.yaml
/src/unittest/python/dags/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/unittest/python/plugins/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/unittest/python/resources/variables.json:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/unittest/python/resources/connections.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/.DS_Store
--------------------------------------------------------------------------------
/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/src/.DS_Store
--------------------------------------------------------------------------------
/src/integrationtest/python/constants.py:
--------------------------------------------------------------------------------
1 | PRESTO_DB_PORT=32211
2 | MYSQL_DB_PORT=31320
3 | AIRFLOW_PORT=31317
--------------------------------------------------------------------------------
/src/main/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/src/main/.DS_Store
--------------------------------------------------------------------------------
/how_minikube_work.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/how_minikube_work.png
--------------------------------------------------------------------------------
/src/unittest/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/src/unittest/.DS_Store
--------------------------------------------------------------------------------
/src/main/python/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/src/main/python/.DS_Store
--------------------------------------------------------------------------------
/src/unittest/python/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/src/unittest/python/.DS_Store
--------------------------------------------------------------------------------
/start_airflow.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | minikube start --cpus 4 --memory 8192
3 | kubectl apply -f airflow.kube.yaml
4 | minikube mount src/main/python/:/data
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | venv/
3 | logs/
4 | airflow.db
5 | airflow.cfg
6 | *.pyc
7 | *.cfg
8 | target/
9 | .minikube/
10 | .pytest_cache/
11 | *.DS_Store
12 | .pybuilder/
13 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | apache-airflow==1.10.3
2 | jinja2==2.10.1
3 | werkzeug==0.15.3
4 | pyhive==0.6.1
5 | mysqlclient==1.4.2
6 | mysql-connector==2.2.9
7 | presto-python-client==0.7.0
8 |
--------------------------------------------------------------------------------
/script/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | echo Command is : "$1"
5 | echo Minikube IP is : "$2"
6 |
7 | echo "$2" > /tmp/minikube_ip.txt
8 |
9 | case "$1" in
10 | install_dependencies)
11 | pyb "$1"
12 | ;;
13 | run_unit_tests)
14 | pyb "$1"
15 | ;;
16 | run_integration_tests)
17 | pyb "$1"
18 | ;;
19 | *)
20 | # The command is something like bash, not an pyb subcommand. Just run it in the right environment.
21 | exec "$@"
22 | ;;
23 | esac
--------------------------------------------------------------------------------
/src/unittest/python/plugins/multiplyby5_operator_tests.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from datetime import datetime
3 | from airflow import DAG
4 | from airflow.models import TaskInstance
5 | from airflow.operators import MultiplyBy5Operator
6 |
7 |
8 | class TestMultiplyBy5Operator(unittest.TestCase):
9 |
10 | def test_execute(self):
11 | dag = DAG(dag_id='anydag', start_date=datetime.now())
12 | task = MultiplyBy5Operator(my_operator_param=10, dag=dag, task_id='anytask')
13 | ti = TaskInstance(task=task, execution_date=datetime.now())
14 | result = task.execute(ti.get_template_context())
15 | self.assertEqual(result, 50)
16 |
--------------------------------------------------------------------------------
/src/main/python/plugins/multiplyby5_operator.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from airflow.models import BaseOperator
4 | from airflow.plugins_manager import AirflowPlugin
5 | from airflow.utils.decorators import apply_defaults
6 |
7 | log = logging.getLogger(__name__)
8 |
9 |
10 | class MultiplyBy5Operator(BaseOperator):
11 | @apply_defaults
12 | def __init__(self, my_operator_param, *args, **kwargs):
13 | self.operator_param = my_operator_param
14 | super(MultiplyBy5Operator, self).__init__(*args, **kwargs)
15 |
16 | def execute(self, context):
17 | log.info('operator_param: %s', self.operator_param)
18 | return (self.operator_param * 5)
19 |
20 |
21 | class MultiplyBy5Plugin(AirflowPlugin):
22 | name = "multiplyby5_plugin"
23 | operators = [MultiplyBy5Operator]
24 |
--------------------------------------------------------------------------------
/src/integrationtest/python/db_util.py:
--------------------------------------------------------------------------------
1 |
2 | class DBUtil:
3 |
4 | def create_table(self, db_conn,create_table_sql):
5 | cursor = db_conn.cursor()
6 | cursor.execute((create_table_sql))
7 | cursor.close()
8 |
9 | def insert_into_table(self, db_conn,insert_query):
10 | cursor = db_conn.cursor()
11 | cursor.execute(insert_query)
12 | db_conn.commit()
13 | cursor.close()
14 |
15 | def drop_table(self, db_conn,drop_table_query):
16 | cursor = db_conn.cursor()
17 | cursor.execute((drop_table_query))
18 | cursor.close()
19 |
20 | def get_row_count(self,db_conn,select_query):
21 | cursor = db_conn.cursor()
22 | cursor.execute((select_query))
23 | final_result = [list(i) for i in cursor]
24 | return final_result
--------------------------------------------------------------------------------
/src/main/python/dags/presto_to_mysql.py:
--------------------------------------------------------------------------------
1 | from airflow.operators.presto_to_mysql import PrestoToMySqlTransfer
2 | from datetime import datetime
3 | from airflow import DAG
4 |
5 | default_args = {
6 | 'email': ['hello@world.com']
7 | }
8 |
9 | dag = DAG('presto_to_mysql', description='Presto to Mysql Transfer', default_args=default_args,
10 | schedule_interval='0 12 * * *',
11 | start_date=datetime(2017, 3, 20), catchup=False)
12 |
13 | PrestoToMySqlTransfer(
14 | presto_conn_id='presto-conn',
15 | mysql_conn_id='mysql-conn',
16 | task_id='presto_to_mysql_transfer',
17 | sql="""
18 | SELECT name, count(*) as count
19 | FROM blackhole.default.region
20 | GROUP BY name
21 | """,
22 | mysql_table='mysql_region',
23 | mysql_preoperator='TRUNCATE TABLE mysql_region;',
24 | dag=dag)
25 |
--------------------------------------------------------------------------------
/src/integrationtest/python/dags/hello_world_tests.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import unittest
3 |
4 | sys.path.append('../')
5 | from airflow_api import AirflowAPI
6 |
7 |
8 | class TestHelloWorldDag(unittest.TestCase):
9 | """Integration test for Hello world DAG"""
10 |
11 | def setUp(self):
12 | self.airflow_api = AirflowAPI()
13 |
14 | def test_hello_world(self):
15 | """helloword dag should run successfully"""
16 | execution_date = "2019-01-11T12:00:00+00:00"
17 | dag_id = "hello_world"
18 | self.airflow_api.trigger_dag(dag_id, execution_date)
19 | is_running = True
20 | while is_running:
21 | is_running = self.airflow_api.is_dag_running(dag_id, execution_date)
22 | self.assertEqual(is_running, False)
23 | self.assertEqual(self.airflow_api.get_dag_status(dag_id, execution_date), "success")
24 |
25 |
26 | if __name__ == '__main__':
27 | unittest.main()
28 |
--------------------------------------------------------------------------------
/src/main/python/dags/hello_world.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from airflow import DAG
3 | from airflow.operators.dummy_operator import DummyOperator
4 | from airflow.operators.python_operator import PythonOperator
5 | from airflow.operators import MultiplyBy5Operator
6 |
7 | default_args = {
8 | 'email': ['hello@world.com']
9 | }
10 |
11 |
12 | def print_hello():
13 | return 'Hello Wolrd'
14 |
15 |
16 | dag = DAG('hello_world', description='Hello world example', default_args=default_args, schedule_interval='0 12 * * *',
17 | start_date=datetime(2017, 3, 20), catchup=False)
18 |
19 | dummy_operator = DummyOperator(task_id='dummy_task', retries=3, dag=dag)
20 |
21 | hello_operator = PythonOperator(task_id='hello_task', python_callable=print_hello, dag=dag)
22 |
23 | multiplyby5_operator = MultiplyBy5Operator(my_operator_param=10,
24 | task_id='multiplyby5_task', dag=dag)
25 |
26 | dummy_operator >> hello_operator
27 |
28 | dummy_operator >> multiplyby5_operator
29 |
--------------------------------------------------------------------------------
/k8s/mysql/mysql.kube.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: mysql
5 | spec:
6 | type: NodePort
7 | selector:
8 | app: prod-mysql
9 | tier: db
10 | ports:
11 | - name: mysql
12 | protocol: TCP
13 | port: 3306
14 | targetPort: mysql
15 | nodePort: 31320
16 | ---
17 | apiVersion: extensions/v1beta1
18 | kind: Deployment
19 | metadata:
20 | name: mysql
21 | spec:
22 | replicas: 1
23 | template:
24 | metadata:
25 | labels:
26 | app: prod-mysql
27 | tier: db
28 | spec:
29 | containers:
30 | - name: mysql
31 | image: mysql:5.7.25
32 | resources:
33 | requests:
34 | memory: "256Mi"
35 | cpu: "200m"
36 | limits:
37 | memory: "512Mi"
38 | cpu: "400m"
39 | ports:
40 | - name: mysql
41 | containerPort: 3306
42 | env:
43 | - name: MYSQL_ROOT_PASSWORD
44 | value: "mysql"
45 | - name: MYSQL_DATABASE
46 | value: "mysql"
47 | - name: MYSQL_USER
48 | value: "mysql"
49 | - name: MYSQL_PASSWORD
50 | value: "mysql"
--------------------------------------------------------------------------------
/src/unittest/python/dag_integrity_tests.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from airflow.models import DagBag
3 |
4 |
5 | class TestDagIntegrity(unittest.TestCase):
6 | LOAD_SECOND_THRESHOLD = 2
7 |
8 | def setUp(self):
9 | self.dagbag = DagBag()
10 |
11 | def test_import_dags(self):
12 | self.assertFalse(
13 | len(self.dagbag.import_errors),
14 | 'DAG import failures. Errors: {}'.format(
15 | self.dagbag.import_errors
16 | )
17 | )
18 |
19 | def test_import_time(self):
20 | stats = self.dagbag.dagbag_stats
21 | slow_dags = list(filter(lambda d: d.duration > self.LOAD_SECOND_THRESHOLD, stats))
22 | res = ', '.join(map(lambda d: d.file[1:], slow_dags))
23 |
24 | self.assertEquals(0, len(slow_dags),
25 | 'The following files take more than {threshold}s to load: {res}'.format(
26 | threshold=self.LOAD_SECOND_THRESHOLD, res=res)
27 | )
28 |
29 | def test_alert_email_present(self):
30 | for dag_id, dag in self.dagbag.dags.items():
31 | emails = dag.default_args.get('email', [])
32 | msg = 'Alert email not set for DAG {id}'.format(id=dag_id)
33 | self.assertIn('hello@world.com', emails, msg)
34 |
--------------------------------------------------------------------------------
/src/main/python/plugins/helloworld_sensor.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, timedelta
2 | import time
3 | import logging
4 | from airflow.operators.sensors import BaseSensorOperator
5 | from airflow.plugins_manager import AirflowPlugin
6 | from airflow.utils.decorators import apply_defaults
7 |
8 | log = logging.getLogger(__name__)
9 |
10 |
11 | class HelloworldSensor(BaseSensorOperator):
12 |
13 | @apply_defaults
14 | def __init__(self, *args, **kwargs):
15 | super(HelloworldSensor, self).__init__(*args, **kwargs)
16 |
17 | def poke(self, context):
18 | current_minute = self.params.get('sensor_start_time').minute
19 | if current_minute % 3 != 0:
20 | log.info("Sensor minute (%s) is not divisible by 3, sensor will retry.", current_minute)
21 | self.params['sensor_start_time'] = self.params.get('sensor_start_time') + timedelta(minutes=1)
22 | return False
23 |
24 | log.info("Sensor minute (%s) is divisible by 3, sensor finished.", current_minute)
25 | return True
26 |
27 | def execute(self, context):
28 | while not self.poke(context):
29 | time.sleep(self.poke_interval)
30 | return self.params.get('sensor_start_time').minute
31 |
32 |
33 | class HelloworldSensorPlugin(AirflowPlugin):
34 | name = "helloworld_sensor_plugin"
35 | operators = [HelloworldSensor]
36 |
37 |
38 | def get_now():
39 | return datetime.now()
40 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.6-stretch
2 | USER root
3 | ENV AIRFLOW__CORE__LOAD_EXAMPLES False
4 | ENV AIRFLOW__CORE__DAGS_FOLDER /opt/src/main/python/dags
5 | ENV AIRFLOW__CORE__PLUGINS_FOLDER /opt/src/main/python/plugins
6 | ENV AIRFLOW__REST_API_PLUGIN__LOG_LOADING True
7 | ENV AIRFLOW__REST_API_PLUGIN__FILTER_LOADING_MESSAGES_IN_CLI_RESPONSE True
8 | ENV AIRFLOW__REST_API_PLUGIN__REST_API_PLUGIN_HTTP_TOKEN_HEADER_NAME rest_api_plugin_http_token
9 | ENV AIRFLOW__REST_API_PLUGIN__REST_API_PLUGIN_EXPECTED_HTTP_TOKEN None
10 |
11 | ENV AIRFLOW_HOME /usr/local/airflow
12 |
13 | WORKDIR /opt
14 |
15 | RUN apt-get update -qq \
16 | && pip install -U pip \
17 | && pip install pybuilder \
18 | && rm -rf /var/lib/apt/lists/* /var/cache/apk/*
19 |
20 | COPY build.py .
21 | RUN pyb install_dependencies
22 |
23 | COPY requirements.txt .
24 | RUN pip install -r requirements.txt
25 |
26 | COPY src/unittest/python/resources/variables.json /usr/local/airflow/variables.json
27 | COPY src/unittest/python/resources/connections.sh /usr/local/airflow/connections.sh
28 |
29 | RUN airflow initdb && \
30 | airflow variables -i /usr/local/airflow/variable.json && \
31 | sh /usr/local/airflow/connections.sh
32 |
33 | RUN rm -f /opt/build.py
34 | RUN rm -f /usr/local/airflow/variables.json
35 | RUN rm -f /usr/local/airflow/connections.sh
36 |
37 | COPY script/entrypoint.sh /mnt/entrypoint.sh
38 | RUN chmod +x /mnt/entrypoint.sh
39 |
40 | ENV PYTHONPATH /opt/src/
41 |
42 | ENTRYPOINT ["/mnt/entrypoint.sh"]
43 |
44 | CMD ["install_dependencies",""]
45 |
--------------------------------------------------------------------------------
/src/main/python/dags/helloworld_xcoms.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from airflow import DAG
3 | from airflow.operators import PythonOperator
4 | from airflow.operators import BashOperator
5 |
6 | yesterday = datetime.datetime.combine(
7 | datetime.datetime.today() - datetime.timedelta(1),
8 | datetime.datetime.min.time())
9 |
10 | default_dag_args = {
11 | 'start_date': yesterday,
12 | 'email_on_failure': False,
13 | 'email_on_retry': False,
14 | 'email': ['hello@world.com'],
15 | 'retries': 0
16 | }
17 |
18 |
19 | def push_to_xcoms(*args, **kwargs):
20 | value = "dummyValue"
21 | kwargs['ti'].xcom_push(key="dummyKey", value=value)
22 |
23 |
24 | def pull_from_xcoms(**kwargs):
25 | ti = kwargs['ti']
26 | pulled_value = ti.xcom_pull(key='dummyKey', task_ids='push_to_xcoms')
27 | print("value=" + str(pulled_value))
28 |
29 |
30 | dag = DAG('hello_world_xcoms', description='Hello world XComs example', default_args=default_dag_args, schedule_interval=None)
31 |
32 | push_to_xcoms_task = PythonOperator(
33 | task_id='push_to_xcoms',
34 | provide_context=True,
35 | python_callable=push_to_xcoms,
36 | dag=dag
37 | )
38 |
39 | pull_from_xcoms_task = PythonOperator(
40 | task_id='pull_from_xcoms',
41 | provide_context=True,
42 | python_callable=pull_from_xcoms,
43 | dag=dag
44 | )
45 |
46 | templated_xcoms_value_task = BashOperator(
47 | task_id='templated_xcoms_value',
48 | bash_command='echo ' + str("{{ ti.xcom_pull(key='dummyKey')}}"),
49 | dag=dag
50 | )
51 |
52 | push_to_xcoms_task >> pull_from_xcoms_task >> templated_xcoms_value_task
53 |
--------------------------------------------------------------------------------
/src/unittest/python/plugins/helloworld_sensor_tests.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from datetime import datetime
3 | from airflow import DAG
4 | from airflow.models import TaskInstance
5 | from airflow.operators import HelloworldSensor
6 |
7 |
8 | class TestHelloworldSensor(unittest.TestCase):
9 | def test_poke_should_return_false_when_value_of_minute_is_not_divisible_by_3(self):
10 | dag = DAG(dag_id='anydag', start_date=datetime.now())
11 | sensor_task = HelloworldSensor(
12 | task_id='any_sensor_task',
13 | poke_interval=2,
14 | params={'sensor_start_time': datetime(2018, 8, 8, 10, 50)},
15 | dag=dag
16 | )
17 | sti = TaskInstance(task=sensor_task, execution_date=datetime.now())
18 | result = sensor_task.poke(sti.get_template_context())
19 | self.assertFalse(result)
20 |
21 | def test_poke_should_return_true_when_value_of_minute_is_divisible_by_3(self):
22 | dag = DAG(dag_id='anydag', start_date=datetime.now())
23 | sensor_task = HelloworldSensor(
24 | task_id='any_sensor_task',
25 | poke_interval=2,
26 | params={'sensor_start_time': datetime(2018, 8, 8, 10, 9)},
27 | dag=dag
28 | )
29 | sti = TaskInstance(task=sensor_task, execution_date=datetime.now())
30 | result = sensor_task.poke(sti.get_template_context())
31 | self.assertTrue(result)
32 |
33 | def test_execute_should_return_true(self):
34 | dag = DAG(dag_id='anydag', start_date=datetime.now())
35 | sensor_task = HelloworldSensor(
36 | task_id='any_sensor_task',
37 | poke_interval=2,
38 | params={'sensor_start_time': datetime(2018, 8, 8, 10, 10)},
39 | dag=dag
40 | )
41 | sti = TaskInstance(task=sensor_task, execution_date=datetime.now())
42 | sensor_time = sensor_task.execute(sti.get_template_context())
43 | self.assertEqual(sensor_time, 12)
44 |
--------------------------------------------------------------------------------
/src/unittest/python/dags/helloworld_dag_tests.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from airflow.models import DagBag
3 |
4 |
5 | class TestHelloWorldDAG(unittest.TestCase):
6 | """Check HelloWorldDAG expectation"""
7 |
8 | def setUp(self):
9 | self.dagbag = DagBag()
10 |
11 | def test_task_count(self):
12 | """Check task count of hello_world dag"""
13 | dag_id = 'hello_world'
14 | dag = self.dagbag.get_dag(dag_id)
15 | self.assertEqual(len(dag.tasks), 3)
16 |
17 | def test_contain_tasks(self):
18 | """Check task contains in hello_world dag"""
19 | dag_id = 'hello_world'
20 | dag = self.dagbag.get_dag(dag_id)
21 | tasks = dag.tasks
22 | task_ids = list(map(lambda task: task.task_id, tasks))
23 | self.assertListEqual(sorted(task_ids), sorted(['dummy_task', 'multiplyby5_task', 'hello_task']))
24 |
25 | def test_dependencies_of_dummy_task(self):
26 | """Check the task dependencies of dummy_task in hello_world dag"""
27 | dag_id = 'hello_world'
28 | dag = self.dagbag.get_dag(dag_id)
29 | dummy_task = dag.get_task('dummy_task')
30 |
31 | upstream_task_ids = list(map(lambda task: task.task_id, dummy_task.upstream_list))
32 | self.assertListEqual(upstream_task_ids, [])
33 | downstream_task_ids = list(map(lambda task: task.task_id, dummy_task.downstream_list))
34 | self.assertListEqual(sorted(downstream_task_ids), sorted(['hello_task', 'multiplyby5_task']))
35 |
36 | def test_dependencies_of_hello_task(self):
37 | """Check the task dependencies of hello_task in hello_world dag"""
38 | dag_id = 'hello_world'
39 | dag = self.dagbag.get_dag(dag_id)
40 | hello_task = dag.get_task('hello_task')
41 |
42 | upstream_task_ids = list(map(lambda task: task.task_id, hello_task.upstream_list))
43 | self.assertListEqual(upstream_task_ids, ['dummy_task'])
44 | downstream_task_ids = list(map(lambda task: task.task_id, hello_task.downstream_list))
45 | self.assertListEqual(downstream_task_ids, [])
46 |
--------------------------------------------------------------------------------
/src/unittest/python/dags/helloworld_xcoms_tests.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from datetime import datetime
3 | from airflow.models import DagBag, TaskInstance
4 |
5 |
6 | class TestXComExamplesDag(unittest.TestCase):
7 |
8 | def setUp(self):
9 | self.dagbag = DagBag()
10 |
11 | def test_xcoms(self):
12 | dag_id = 'hello_world_xcoms'
13 | dag = self.dagbag.get_dag(dag_id)
14 | push_to_xcoms_task = dag.get_task('push_to_xcoms')
15 | pull_from_xcoms_task = dag.get_task('pull_from_xcoms')
16 |
17 | execution_date = datetime.now()
18 |
19 | push_to_xcoms_ti = TaskInstance(task=push_to_xcoms_task, execution_date=execution_date)
20 | context = push_to_xcoms_ti.get_template_context()
21 | push_to_xcoms_task.execute(context)
22 |
23 | pull_from_xcoms_ti = TaskInstance(task=pull_from_xcoms_task, execution_date=execution_date)
24 |
25 | result = pull_from_xcoms_ti.xcom_pull(key="dummyKey")
26 | self.assertEqual(result, 'dummyValue')
27 |
28 | def test_xcom_in_templated_field(self):
29 | dag_id = 'hello_world_xcoms'
30 | dag = self.dagbag.get_dag(dag_id)
31 | push_to_xcoms_task = dag.get_task('push_to_xcoms')
32 |
33 | execution_date = datetime.now()
34 |
35 | push_to_xcoms_ti = TaskInstance(task=push_to_xcoms_task, execution_date=execution_date)
36 | context = push_to_xcoms_ti.get_template_context()
37 | push_to_xcoms_task.execute(context)
38 |
39 | templated_xcoms_value_task = dag.get_task('templated_xcoms_value')
40 | templated_xcoms_value_ti = TaskInstance(task=templated_xcoms_value_task, execution_date=execution_date)
41 | context = templated_xcoms_value_ti.get_template_context()
42 |
43 | bash_operator_templated_field = 'bash_command'
44 |
45 | rendered_template = templated_xcoms_value_task.render_template
46 |
47 | bash_command_value = getattr(templated_xcoms_value_task, bash_operator_templated_field)
48 |
49 | bash_command_rendered_value = rendered_template(bash_operator_templated_field, bash_command_value, context)
50 |
51 | self.assertEqual(bash_command_rendered_value, 'echo dummyValue')
52 |
53 |
54 | suite = unittest.TestLoader().loadTestsFromTestCase(TestXComExamplesDag)
55 | unittest.TextTestRunner(verbosity=2).run(suite)
56 |
--------------------------------------------------------------------------------
/k8s/presto/presto.kube.yaml:
--------------------------------------------------------------------------------
1 | # Actual Source: https://github.com/dharmeshkakadia/presto-kubernetes
2 |
3 | apiVersion: v1
4 | kind: Service
5 | metadata:
6 | name: presto
7 | spec:
8 | selector:
9 | presto: coordinator
10 | type: NodePort
11 | ports:
12 | - name: coordinator
13 | port: 8080
14 | protocol: TCP
15 | targetPort: coordinator
16 | nodePort: 32211
17 | ---
18 | kind: Deployment
19 | apiVersion: apps/v1beta1
20 | metadata:
21 | name: coordinator
22 | labels:
23 | presto: coordinator
24 | spec:
25 | replicas: 1
26 | template:
27 | metadata:
28 | labels:
29 | presto: coordinator
30 | spec:
31 | containers:
32 | - env:
33 | - name: HTTP_SERVER_PORT
34 | value: "8080"
35 | - name: PRESTO_JVM_HEAP_SIZE
36 | value: "8"
37 | - name: PRESTO_MAX_MEMORY
38 | value: "10"
39 | - name: PRESTO_MAX_MEMORY_PER_NODE
40 | value: "1"
41 | image: johandry/presto
42 | livenessProbe:
43 | exec:
44 | command:
45 | - /etc/init.d/presto status | grep -q 'Running as'
46 | failureThreshold: 3
47 | periodSeconds: 300
48 | timeoutSeconds: 10
49 | name: presto-coordinator
50 | ports:
51 | - name: coordinator
52 | containerPort: 8080
53 | restartPolicy: Always
54 | ---
55 | apiVersion: apps/v1beta1
56 | kind: Deployment
57 | metadata:
58 | labels:
59 | presto: presto-worker
60 | name: presto-worker
61 | spec:
62 | replicas: 1
63 | template:
64 | metadata:
65 | labels:
66 | presto: presto-worker
67 | spec:
68 | containers:
69 | - env:
70 | - name: HTTP_SERVER_PORT
71 | value: "8080"
72 | - name: PRESTO_JVM_HEAP_SIZE
73 | value: "8"
74 | - name: PRESTO_MAX_MEMORY
75 | value: "10"
76 | - name: PRESTO_MAX_MEMORY_PER_NODE
77 | value: "1"
78 | - name : COORDINATOR
79 | value: "presto"
80 | image: johandry/presto
81 | livenessProbe:
82 | exec:
83 | command:
84 | - /etc/init.d/presto status | grep -q 'Running as'
85 | failureThreshold: 3
86 | periodSeconds: 300
87 | timeoutSeconds: 10
88 | name: presto-worker
89 | ports:
90 | - containerPort: 8080
91 | restartPolicy: Always
--------------------------------------------------------------------------------
/src/integrationtest/python/airflow_api.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | from constants import AIRFLOW_PORT,MYSQL_DB_PORT,PRESTO_DB_PORT
4 |
5 |
6 | class AirflowAPI:
7 | def __init__(self):
8 | self.minikube_ip = self.get_minikube_ip()
9 |
10 | def get_minikube_ip(self):
11 | f = open("/tmp/minikube_ip.txt", "r")
12 | minikube_ip = f.readlines()[0].replace('\n', '')
13 | if not minikube_ip:
14 | raise Exception("Minikube is not running. Please, start minikube first.")
15 | f.close()
16 | return minikube_ip
17 |
18 | def get_airflow_url(self):
19 | return "http://%s:%s" % (self.minikube_ip, AIRFLOW_PORT)
20 |
21 | def unpause_dag(self, dag_id):
22 | return requests.get(
23 | "%s/admin/rest_api/api?api=unpause&dag_id=%s" % (self.get_airflow_url(), dag_id))
24 |
25 | def pause_dag(self, dag_id):
26 | return requests.get(
27 | "%s/admin/rest_api/api?api=pause&dag_id=%s" % (self.get_airflow_url(), dag_id))
28 |
29 | def trigger_dag(self, dag_id, execution_date):
30 | self.clear_dag(dag_id, execution_date);
31 | self.unpause_dag(dag_id)
32 | triggered_response = requests.get(
33 | "%s/admin/rest_api/api?api=trigger_dag&dag_id=%s&exec_date=%s" % (
34 | self.get_airflow_url(), dag_id, execution_date))
35 | if triggered_response.status_code != 200:
36 | raise Exception("Please, wait for airflow web server to start.")
37 |
38 | def dag_state(self, dag_id, execution_date):
39 | return requests.get(
40 | "%s/admin/rest_api/api?api=dag_state&dag_id=%s&execution_date=%s" % (
41 | self.get_airflow_url(), dag_id, execution_date))
42 |
43 | def clear_dag(self, dag_id, execution_date):
44 | return requests.get(
45 | "%s/admin/rest_api/api?api=clear&dag_id=%s&execution_date=%s" % (
46 | self.get_airflow_url(), dag_id, execution_date))
47 |
48 | def is_dag_running(self, dag_id, execution_date):
49 | response = self.dag_state(dag_id, execution_date)
50 | json_response = json.loads(response.text)
51 | print(json_response)
52 | if "running" in json_response['output']['stdout']:
53 | return True
54 | else:
55 | self.pause_dag(dag_id)
56 | return False
57 |
58 | def get_dag_status(self, dag_id, execution_date):
59 | response = self.dag_state(dag_id, execution_date)
60 | json_response = json.loads(response.text)
61 | if "running" in json_response['output']['stdout']:
62 | return "running"
63 | elif "success" in json_response['output']['stdout']:
64 | return "success"
65 | elif "failed" in json_response['output']['stdout']:
66 | return "failed"
67 | else:
68 | return "Not Defined"
69 |
70 | def add_presto_connection(self, name, catalog, schema):
71 | conn_uri = "presto://" + self.minikube_ip + ":"+str(PRESTO_DB_PORT)+"/" + catalog + "/" + schema
72 | return requests.get(
73 | "%s/admin/rest_api/api?api=connections&add=on&conn_id=%s&conn_uri=%s" % (
74 | self.get_airflow_url(), name, conn_uri))
75 |
76 | def add_mysql_connection(self, name, user, password, database):
77 | conn_uri = "mysql://" + user + ":" + password + "@" + self.minikube_ip + ":"+str(MYSQL_DB_PORT)+"/" + database
78 | return requests.get(
79 | "%s/admin/rest_api/api?api=connections&add=on&conn_id=%s&conn_uri=%s" % (
80 | self.get_airflow_url(), name, conn_uri))
81 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Airflow Testing
2 | This project contains different categories of tests with examples.
3 |
4 | ## Five Categories of Tests
5 | 1. DAG Validation Tests: To test the validity of the DAG, checking typos and cyclicity.
6 | 2. DAG/Pipeline Definition Tests: To test the total number of tasks in the DAG, upstream and downstream dependencies of each task, etc.
7 | 3. Unit Tests: To test the logic of custom Operators, custom Sensor, etc.
8 | 4. Integration Tests: To test the communication between tasks. For example, task1 pass some information to task 2 using Xcoms.
9 | 5. End to End Pipeline Tests: To test and verify the integration between each task. You can also assert the data on successful completion of the E2E pipeline.
10 |
11 | Clone this repo to run these test in your local machine.
12 |
13 | ## Unit Tests
14 |
15 | Unit tests cover all tests falls under teh first four categories.
16 |
17 | #### How to run?
18 | 1. Build the airflow image. Go to project root directory and run
19 |
20 | ```docker build . -t airflow-test```
21 |
22 | 2. Run the unit tests from the docker. Use your repository location for **{SourceDir}** (Eg. If you cloned your repo at `/User/username/airflow-testing/` then
23 | SourceDir is `/User/username`.)
24 |
25 | ```docker run -ti -v {SourceDir}/airflow-testing:/opt --entrypoint /mnt/entrypoint.sh airflow-test run_unit_tests```
26 |
27 | ## End-to-End Tests
28 |
29 | End-to-End tests cover all tests of category five. To run these tests,
30 | we need to set up airflow environment in minikube. Also, we need to set up
31 | all the component required by your DAGs.
32 |
33 | #### Minikube set up
34 |
35 | Prerequisite:
36 |
37 | git clone https://github.com/chandulal/airflow-testing.git
38 | brew cask install virtualbox (run if you don't have virtual box installed)
39 |
40 |
41 | Install minikube
42 |
43 | brew cask install minikube
44 | brew install kubernetes-cli
45 | minikube start --cpus 4 --memory 8192
46 |
47 |
48 | #### Mount DAGs, Plugins, etc.
49 |
50 | Mount all your DAGs,Plugins, etc. in minikube
51 |
52 | minikube mount {project dir}/src/main/python/:/data
53 |
54 |
55 | #### Deploy Airflow in minikube
56 |
57 | Open new terminal. Go to project root dir and run:
58 |
59 | kubectl apply -f airflow.kube.yaml
60 |
61 |
62 | wait for 3-4 min to start all airflow components.
63 |
64 | This will set up following components:
65 | * Postgres (To store the metadata of airflow)
66 | * Redis (Broker for celery executors)
67 | * Airflow Scheduler
68 | * Celery Workers
69 | * Airflow Web Server
70 | * Flower
71 |
72 | #### Access Airflow
73 | Get minikube ip by running ```minikube ip``` command
74 |
75 | Use minikube ip and access:
76 |
77 | **Airflow UI:** {minikube-ip}:31317
78 |
79 | **Flower:** {minikube-ip}:32081
80 |
81 | #### How Airflow works in minikube?
82 |
83 | 
84 |
85 | #### How to run these tests?
86 |
87 | 1. Install all required components to run your DAGs in minikube. To run integration tests,
88 | available in this repo, we required MySQL and Presto on minikube.
89 |
90 | kubectl apply -f {SourceDir}/k8s/mysql/mysql.kube.yaml
91 | kubectl apply -f {SourceDir}/k8s/presto/presto.kube.yaml
92 |
93 |
94 | 2. Run the integration tests from the docker. Use absolute path of this repository in your machine for **{SourceDir}**
95 |
96 | ```docker run -ti -v {SourceDir}/airflow-testing:/opt --entrypoint /mnt/entrypoint.sh airflow-test run_integration_tests {minikube-ip} ```
--------------------------------------------------------------------------------
/src/integrationtest/python/dags/presto_to_mysql_tests.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import unittest
3 | import mysql.connector
4 | import prestodb
5 |
6 | sys.path.append('../')
7 | from airflow_api import AirflowAPI
8 | from db_util import DBUtil
9 | from constants import PRESTO_DB_PORT,MYSQL_DB_PORT
10 |
11 |
12 | class TestPrestoToMySqlDag(unittest.TestCase):
13 | """Integration test for presto to mysql transfer"""
14 |
15 | mysql_conn = None
16 | prest_conn = None
17 |
18 |
19 | def setUp(self):
20 | presto_catlog="blackhole"
21 | presto_schema= "default"
22 | mysql_database="mysql"
23 | mysql_user="mysql"
24 | mysql_password="mysql"
25 |
26 | self.airflow_api = AirflowAPI()
27 | self.minikube_ip = str(self.airflow_api.get_minikube_ip())
28 | self.db_util = DBUtil()
29 | self.airflow_api.add_presto_connection("presto-conn",presto_catlog
30 | ,presto_schema)
31 | self.airflow_api.add_mysql_connection("mysql-conn", mysql_database,
32 | mysql_user, mysql_password)
33 | self.mysql_conn = mysql.connector.connect(user=mysql_user,
34 | password=mysql_password,
35 | host=self.minikube_ip,
36 | port=MYSQL_DB_PORT,
37 | database=mysql_database,
38 | use_pure=False)
39 |
40 | self.prest_conn = prestodb.dbapi.connect(
41 | host=self.minikube_ip,
42 | port=PRESTO_DB_PORT,
43 | user='admin',
44 | catalog=presto_catlog,
45 | schema=presto_schema,
46 | )
47 |
48 | create_mysql_table_sql = """
49 | CREATE TABLE IF NOT EXISTS mysql_region (
50 | name VARCHAR(50),count int(10)
51 | );
52 | """
53 |
54 | self.db_util.create_table(self.mysql_conn,create_mysql_table_sql)
55 |
56 | create_presto_table_sql = """
57 | CREATE TABLE region (
58 | name varchar
59 | )
60 | WITH (
61 | split_count = 1,
62 | pages_per_split = 1,
63 | rows_per_page = 1,
64 | page_processing_delay = '5s'
65 | )"""
66 |
67 |
68 | self.db_util.create_table(self.prest_conn,create_presto_table_sql)
69 |
70 | insert_query_1 = "insert into region values('INDIA')"
71 | self.db_util.insert_into_table(self.prest_conn,insert_query_1)
72 |
73 | def test_presto_to_mysql_transfer(self):
74 | """should transfer data from presto to mysql"""
75 |
76 | execution_date = "2019-05-12T14:00:00+00:00"
77 | dag_id = "presto_to_mysql"
78 | self.airflow_api.trigger_dag(dag_id, execution_date)
79 | is_running = True
80 | while is_running:
81 | is_running = self.airflow_api.is_dag_running(dag_id, execution_date)
82 | self.assertEqual(is_running, False)
83 | self.assertEqual(self.airflow_api.get_dag_status(dag_id,
84 | execution_date), "success")
85 |
86 | mysql_select_query = "SELECT name FROM mysql_region"
87 | row_count=self.db_util.get_row_count(self.mysql_conn,mysql_select_query)
88 | self.assertEqual(1, len(row_count))
89 |
90 | def tearDown(self):
91 | drop_mysql_table="drop table mysql_region"
92 | drop_presto_table = "drop table region"
93 | self.db_util.drop_table(self.mysql_conn,drop_mysql_table)
94 | self.db_util.drop_table(self.prest_conn,drop_presto_table)
95 | self.mysql_conn.close()
96 | self.prest_conn.close()
97 |
98 |
99 | if __name__ == '__main__':
100 | unittest.main()
101 |
--------------------------------------------------------------------------------
/src/main/python/plugins/templates/rest_api_plugin/index.html:
--------------------------------------------------------------------------------
1 | {% extends "airflow/master.html" %}
2 |
3 | {% block title %}Airflow - REST API Plugin{% endblock %}
4 |
5 | {% block head_css %}
6 | {{ super() }}
7 |
8 | {% endblock %}
9 |
10 | {% block body %}
11 |
12 |
13 |
19 |
20 |
21 |
44 |
45 | Airflow REST API
46 |
47 |
48 | Documentation
49 |
53 |
54 |
55 | Versions
56 |
57 | - Airflow Version: {{airflow_version}}
58 | - Rest API Plugin Version: {{rest_api_plugin_version}}
59 |
60 |
61 |
62 | DAGs:
63 |
64 |
65 | | DAG ID | Is Active |
66 |
67 | {% for dag in dags %}
68 |
69 | | {{dag.dag_id}} | {{dag.is_active}} |
70 |
71 | {% endfor %}
72 |
73 |
74 |
75 | API Directory
76 | Click on one of the links bellow to jump to the API form
77 |
82 |
83 |
84 | APIs
85 | {% for api_metadata in apis_metadata %}
86 |
87 |
88 |
{{api_metadata.description}}
89 |
{{api_metadata.http_method|default('GET', true)}} {{airflow_webserver_base_url}}{{rest_api_endpoint}}?api={{api_metadata.name}}{% if api_metadata.http_method != 'POST' %}{% for argument in api_metadata.arguments %}&{{argument.name}}{% if argument.form_input_type != 'checkbox' %}=value{% endif %}{% endfor %}{% endif %}
90 |
91 | {% if api_metadata.form_enctype %}
92 |
enctype={{api_metadata.form_enctype}}
93 | {% endif %}
94 |
95 | {% if api_metadata.post_body_description %}
96 |
{{api_metadata.post_body_description}}
97 | {% endif %}
98 |
99 |
Available in Airflow Version: {{api_metadata.airflow_version}}
100 |
148 |
149 |
150 |
151 | {% endfor %}
152 |
153 |
154 |
155 | {% endblock %}
156 |
157 | {% block tail %}
158 | {{ super() }}
159 |
160 | {% endblock %}
161 |
--------------------------------------------------------------------------------
/airflow.kube.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: postgres
5 | spec:
6 | type: ClusterIP
7 | selector:
8 | app: airflow
9 | tier: db
10 | ports:
11 | - name: postgres
12 | protocol: TCP
13 | port: 5432
14 | targetPort: postgres
15 | ---
16 | apiVersion: v1
17 | kind: Service
18 | metadata:
19 | name: redis
20 | spec:
21 | type: ClusterIP
22 | selector:
23 | app: airflow
24 | tier: backend
25 | ports:
26 | - port: 6379
27 | name: redis
28 | protocol: TCP
29 | targetPort: redis
30 | ---
31 | apiVersion: v1
32 | kind: Service
33 | metadata:
34 | name: web
35 | spec:
36 | type: NodePort
37 | selector:
38 | app: airflow
39 | tier: web
40 | ports:
41 | - name: web
42 | protocol: TCP
43 | port: 8080
44 | targetPort: web
45 | nodePort: 31317
46 | ---
47 | apiVersion: v1
48 | kind: Service
49 | metadata:
50 | name: flower
51 | spec:
52 | type: NodePort
53 | selector:
54 | app: airflow
55 | tier: flower
56 | ports:
57 | - name: flower
58 | protocol: TCP
59 | port: 5555
60 | targetPort: flower
61 | nodePort: 32081
62 | ---
63 | apiVersion: extensions/v1beta1
64 | kind: Deployment
65 | metadata:
66 | name: postgres
67 | spec:
68 | replicas: 1
69 | template:
70 | metadata:
71 | labels:
72 | app: airflow
73 | tier: db
74 | spec:
75 | containers:
76 | - name: postgres
77 | image: postgres:9.6
78 | resources:
79 | requests:
80 | memory: "64Mi"
81 | cpu: "200m"
82 | limits:
83 | memory: "128Mi"
84 | cpu: "400m"
85 | ports:
86 | - name: postgres
87 | containerPort: 5432
88 | env:
89 | - name: POSTGRES_USER
90 | value: "airflow"
91 | - name: POSTGRES_PASSWORD
92 | value: "airflow"
93 | - name: POSTGRES_DB
94 | value: "airflow"
95 | ---
96 | apiVersion: extensions/v1beta1
97 | kind: Deployment
98 | metadata:
99 | name: redis
100 | spec:
101 | replicas: 1
102 | template:
103 | metadata:
104 | labels:
105 | app: airflow
106 | tier: backend
107 | spec:
108 | restartPolicy: Always
109 | containers:
110 | - name: redis
111 | image: redis:3.2.7
112 | resources:
113 | requests:
114 | memory: "32Mi"
115 | cpu: "200m"
116 | limits:
117 | memory: "64Mi"
118 | cpu: "400m"
119 | ports:
120 | - name: redis
121 | containerPort: 6379
122 | ---
123 | apiVersion: extensions/v1beta1
124 | kind: Deployment
125 | metadata:
126 | name: web
127 | spec:
128 | replicas: 1
129 | strategy:
130 | type: Recreate
131 | template:
132 | metadata:
133 | labels:
134 | app: airflow
135 | tier: web
136 | spec:
137 | securityContext:
138 | runAsUser: 0
139 | fsGroup: 0
140 | volumes:
141 | - name: airflowdagdir
142 | hostPath:
143 | path: "/data/dags/"
144 | - name: airflowpluginsdir
145 | hostPath:
146 | path: "/data/plugins/"
147 | restartPolicy: Always
148 | containers:
149 | - args:
150 | - webserver
151 | env:
152 | - name: EXECUTOR
153 | value: Celery
154 | - name: FERNET_KEY
155 | value: 46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
156 | - name: LOAD_EX
157 | value: "n"
158 | - name: POSTGRES_USER
159 | value: "airflow"
160 | - name: POSTGRES_PASSWORD
161 | value: "airflow"
162 | - name: POSTGRES_DB
163 | value: "airflow"
164 | - name: POSTGRES_HOST
165 | value: "postgres"
166 | - name: POSTGRES_PORT
167 | value: "5432"
168 | - name: REDIS_HOST
169 | value: "redis"
170 | - name: REDIS_PORT
171 | value: "6379"
172 | - name: AIRFLOW_HOME
173 | value: "/usr/local/airflow"
174 | - name: AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG
175 | value: "1"
176 | image: puckel/docker-airflow:1.10.1
177 | name: webserver
178 | ports:
179 | - name: web
180 | containerPort: 8080
181 | volumeMounts:
182 | - name: airflowdagdir
183 | mountPath: "/usr/local/airflow/dags"
184 | - name: airflowpluginsdir
185 | mountPath: "/usr/local/airflow/plugins"
186 | ---
187 | apiVersion: extensions/v1beta1
188 | kind: Deployment
189 | metadata:
190 | name: flower
191 | spec:
192 | replicas: 1
193 | strategy:
194 | type: Recreate
195 | template:
196 | metadata:
197 | labels:
198 | app: airflow
199 | tier: flower
200 | spec:
201 | securityContext:
202 | runAsUser: 0
203 | fsGroup: 0
204 | volumes:
205 | - name: airflowdagdir
206 | hostPath:
207 | path: "/data/dags/"
208 | - name: airflowpluginsdir
209 | hostPath:
210 | path: "/data/plugins/"
211 | restartPolicy: Always
212 | containers:
213 | - args:
214 | - flower
215 | env:
216 | - name: FLOWER_PORT
217 | value: "5555"
218 | - name: EXECUTOR
219 | value: Celery
220 | - name: FERNET_KEY
221 | value: 46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
222 | - name: LOAD_EX
223 | value: "n"
224 | - name: POSTGRES_USER
225 | value: "airflow"
226 | - name: POSTGRES_PASSWORD
227 | value: "airflow"
228 | - name: POSTGRES_DB
229 | value: "airflow"
230 | - name: POSTGRES_HOST
231 | value: "postgres"
232 | - name: POSTGRES_PORT
233 | value: "5432"
234 | - name: REDIS_HOST
235 | value: "redis"
236 | - name: REDIS_PORT
237 | value: "6379"
238 | - name: AIRFLOW_HOME
239 | value: "/usr/local/airflow"
240 | image: puckel/docker-airflow:1.10.1
241 | resources:
242 | requests:
243 | memory: "64Mi"
244 | cpu: "200m"
245 | limits:
246 | memory: "128Mi"
247 | cpu: "400m"
248 | name: flower
249 | ports:
250 | - name: flower
251 | containerPort: 5555
252 | volumeMounts:
253 | - name: airflowdagdir
254 | mountPath: "/usr/local/airflow/dags"
255 | - name: airflowpluginsdir
256 | mountPath: "/usr/local/airflow/plugins"
257 | ---
258 | apiVersion: extensions/v1beta1
259 | kind: Deployment
260 | metadata:
261 | name: scheduler
262 | spec:
263 | replicas: 1
264 | template:
265 | metadata:
266 | labels:
267 | app: airflow
268 | tier: scheduler
269 | spec:
270 | restartPolicy: Always
271 | securityContext:
272 | runAsUser: 0
273 | fsGroup: 0
274 | volumes:
275 | - name: airflowdagdir
276 | hostPath:
277 | path: "/data/dags/"
278 | - name: airflowpluginsdir
279 | hostPath:
280 | path: "/data/plugins/"
281 | containers:
282 | - args:
283 | - scheduler
284 | env:
285 | - name: EXECUTOR
286 | value: Celery
287 | - name: FERNET_KEY
288 | value: 46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
289 | - name: LOAD_EX
290 | value: "n"
291 | - name: POSTGRES_USER
292 | value: "airflow"
293 | - name: POSTGRES_PASSWORD
294 | value: "airflow"
295 | - name: POSTGRES_DB
296 | value: "airflow"
297 | - name: POSTGRES_HOST
298 | value: "postgres"
299 | - name: POSTGRES_PORT
300 | value: "5432"
301 | - name: REDIS_HOST
302 | value: "redis"
303 | - name: REDIS_PORT
304 | value: "6379"
305 | - name: AIRFLOW_HOME
306 | value: "/usr/local/airflow"
307 | - name: AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG
308 | value: "1"
309 | image: puckel/docker-airflow:1.10.1
310 | resources:
311 | requests:
312 | memory: "64Mi"
313 | cpu: "200m"
314 | limits:
315 | memory: "128Mi"
316 | cpu: "400m"
317 | name: scheduler
318 | volumeMounts:
319 | - name: airflowdagdir
320 | mountPath: "/usr/local/airflow/dags"
321 | - name: airflowpluginsdir
322 | mountPath: "/usr/local/airflow/plugins"
323 | ---
324 | apiVersion: extensions/v1beta1
325 | kind: Deployment
326 | metadata:
327 | name: worker
328 | spec:
329 | replicas: 1
330 | template:
331 | metadata:
332 | labels:
333 | app: airflow
334 | tier: worker
335 | spec:
336 | restartPolicy: Always
337 | volumes:
338 | - name: airflowdagdir
339 | hostPath:
340 | path: "/data/dags/"
341 | - name: airflowpluginsdir
342 | hostPath:
343 | path: "/data/plugins/"
344 | containers:
345 | - args:
346 | - worker
347 | env:
348 | - name: EXECUTOR
349 | value: Celery
350 | - name: FERNET_KEY
351 | value: 46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
352 | - name: LOAD_EX
353 | value: "n"
354 | - name: POSTGRES_USER
355 | value: "airflow"
356 | - name: POSTGRES_PASSWORD
357 | value: "airflow"
358 | - name: POSTGRES_DB
359 | value: "airflow"
360 | - name: POSTGRES_HOST
361 | value: "postgres"
362 | - name: POSTGRES_PORT
363 | value: "5432"
364 | - name: REDIS_HOST
365 | value: "redis"
366 | - name: REDIS_PORT
367 | value: "6379"
368 | - name: AIRFLOW_HOME
369 | value: "/usr/local/airflow"
370 | - name: AIRFLOW__CORE__EXECUTOR
371 | value: CeleryExecutor
372 | - name: AIRFLOW__CORE__SQL_ALCHEMY_CONN
373 | value: "postgresql+psycopg2://airflow:airflow@postgres:5432/airflow"
374 | - name: AIRFLOW__CELERY__RESULT_BACKEND
375 | value: "db+postgresql://airflow:airflow@postgres:5432/airflow"
376 | - name: AIRFLOW__CELERY__BROKER_URL
377 | value: "redis://redis:6379/1"
378 | - name: AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG
379 | value: "1"
380 | - name: AIRFLOW__CELERY__WORKER_CONCURRENCY
381 | value: "1"
382 | image: puckel/docker-airflow:1.10.1
383 | resources:
384 | requests:
385 | memory: "1Gi"
386 | cpu: "200m"
387 | limits:
388 | memory: "1.5Gi"
389 | cpu: "400m"
390 | name: worker
391 | volumeMounts:
392 | - name: airflowdagdir
393 | mountPath: "/usr/local/airflow/dags"
394 | - name: airflowpluginsdir
395 | mountPath: "/usr/local/airflow/plugins"
--------------------------------------------------------------------------------
/src/main/python/plugins/rest_api_plugin.py:
--------------------------------------------------------------------------------
1 | __author__ = 'robertsanders'
2 | __version__ = "1.0.4"
3 |
4 | from airflow.models import DagBag, DagModel
5 | from airflow.plugins_manager import AirflowPlugin
6 | from airflow import configuration
7 | from airflow.www.app import csrf
8 |
9 | from flask import Blueprint, request, jsonify
10 | from flask_admin import BaseView, expose
11 |
12 | from datetime import datetime
13 | import airflow
14 | import logging
15 | import subprocess
16 | import os
17 | import socket
18 |
19 | """
20 | CLIs this REST API exposes are Defined here: http://airflow.incubator.apache.org/cli.html
21 | """
22 |
23 | # todo: dynamically decide which api objects to display based off which version of airflow is installed - http://stackoverflow.com/questions/1714027/version-number-comparison
24 |
25 | # Location of the REST Endpoint
26 | # Note: Changing this will only effect where the messages are posted to on the web interface and will not change where the endpoint actually resides
27 | rest_api_endpoint = "/admin/rest_api/api"
28 |
29 | # Getting Versions and Global variables
30 | hostname = socket.gethostname()
31 | airflow_version = airflow.__version__
32 | rest_api_plugin_version = __version__
33 |
34 | # Getting configurations from airflow.cfg file
35 | airflow_webserver_base_url = configuration.get('webserver', 'BASE_URL')
36 | airflow_base_log_folder = configuration.get('core', 'BASE_LOG_FOLDER')
37 | airflow_dags_folder = configuration.get('core', 'DAGS_FOLDER')
38 | log_loading = configuration.getboolean("rest_api_plugin", "LOG_LOADING") if configuration.has_option("rest_api_plugin", "LOG_LOADING") else False
39 | filter_loading_messages_in_cli_response = configuration.getboolean("rest_api_plugin", "FILTER_LOADING_MESSAGES_IN_CLI_RESPONSE") if configuration.has_option("rest_api_plugin", "FILTER_LOADING_MESSAGES_IN_CLI_RESPONSE") else True
40 | airflow_rest_api_plugin_http_token_header_name = configuration.get("rest_api_plugin", "REST_API_PLUGIN_HTTP_TOKEN_HEADER_NAME") if configuration.has_option("rest_api_plugin", "REST_API_PLUGIN_HTTP_TOKEN_HEADER_NAME") else "rest_api_plugin_http_token"
41 | airflow_expected_http_token = configuration.get("rest_api_plugin", "REST_API_PLUGIN_EXPECTED_HTTP_TOKEN") if configuration.has_option("rest_api_plugin", "REST_API_PLUGIN_EXPECTED_HTTP_TOKEN") else None
42 |
43 | # Using UTF-8 Encoding so that response messages don't have any characters in them that can't be handled
44 | os.environ['PYTHONIOENCODING'] = 'utf-8'
45 |
46 | if log_loading:
47 | logging.info("Initializing Airflow REST API Plugin with configs:")
48 | logging.info("\trest_api_endpoint: " + str(rest_api_endpoint))
49 | logging.info("\thostname: " + str(hostname))
50 | logging.info("\tairflow_version: " + str(airflow_version))
51 | logging.info("\trest_api_plugin_version: " + str(rest_api_plugin_version))
52 | logging.info("\tairflow_webserver_base_url: " + str(airflow_webserver_base_url))
53 | logging.info("\tairflow_base_log_folder: " + str(airflow_base_log_folder))
54 | logging.info("\tairflow_dags_folder: " + str(airflow_dags_folder))
55 | logging.info("\tairflow_rest_api_plugin_http_token_header_name: " + str(airflow_rest_api_plugin_http_token_header_name))
56 | logging.info("\tairflow_expected_http_token: OMITTED_FOR_SECURITY")
57 | logging.info("\tfilter_loading_messages_in_cli_response: " + str(filter_loading_messages_in_cli_response))
58 |
59 | """
60 | Metadata that defines a single API:
61 | {
62 | "name": "{string}", # Name of the API (cli command to be executed)
63 | "description": "{string}", # Description of the API
64 | "airflow_version": "{string}", # Version the API was available in to allow people to better determine if the API is available. (to be displayed on the Admin page)
65 | "http_method": "{string}", # HTTP method to use when calling the function. (Default: GET) (Optional)
66 | "background_mode": {boolean}, # Whether to run the process in the background if its a CLI API (Optional)
67 | "arguments": [ # List of arguments that can be provided to the API
68 | {
69 | "name": "{string}", # Name of the argument
70 | "description": "{string}", # Description of the argument
71 | "form_input_type": "{string}", # Type of input to use on the Admin page for the argument
72 | "required": {boolean}, # Whether the argument is required upon submission
73 | "cli_end_position": {int} # In the case with a CLI command that the arguments value should be appended on to the end (for example: airflow trigger_dag some_dag_id), this is the position that the argument should be provided in the CLI command. (Optional)
74 | }
75 | ],
76 | "fixed_arguments": [ # List of arguments that will always be used by the API endpoint and can't be changed
77 | {
78 | "name": "{string}", # Name of the argument
79 | "description": "{string}", # Description of the argument
80 | "fixed_value": "{string}" # Fixed value that will always be used
81 | }
82 | ],
83 | "post_arguments": [ # List of arguments that can be provided in the POST body to the API
84 | {
85 | "name": "{string}", # Name of the argument
86 | "description": "{string}", # Description of the argument
87 | "form_input_type": "{string}", # Type of input to use on the Admin page for the argument
88 | "required": {boolean}, # Whether the argument is required upon submission
89 | }
90 | ]
91 | },
92 | """
93 |
94 | # Metadata about the APIs and how to call them. Representing them like this allows us to dynamically generate the APIs
95 | # in the admin page and dynamically execute them. This also allows us to easily add new ones.
96 | # API Object definition is described in the comment block above.
97 | apis_metadata = [
98 | {
99 | "name": "version",
100 | "description": "Displays the version of Airflow you're using",
101 | "airflow_version": "1.0.0 or greater",
102 | "http_method": "GET",
103 | "arguments": []
104 | },
105 | {
106 | "name": "rest_api_plugin_version",
107 | "description": "Displays the version of this REST API Plugin you're using",
108 | "airflow_version": "None - Custom API",
109 | "http_method": "GET",
110 | "arguments": []
111 | },
112 | {
113 | "name": "render",
114 | "description": "Render a task instance's template(s)",
115 | "airflow_version": "1.7.0 or greater",
116 | "http_method": "GET",
117 | "arguments": [
118 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
119 | {"name": "task_id", "description": "The id of the task", "form_input_type": "text", "required": True, "cli_end_position": 2},
120 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 3},
121 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}
122 | ]
123 | },
124 | {
125 | "name": "variables",
126 | "description": "CRUD operations on variables",
127 | "airflow_version": "1.7.1 or greater",
128 | "http_method": "GET",
129 | "arguments": [
130 | {"name": "set", "description": "Set a variable. Expected input in the form: KEY VALUE.", "form_input_type": "text", "required": False},
131 | {"name": "get", "description": "Get value of a variable", "form_input_type": "text", "required": False},
132 | {"name": "json", "description": "Deserialize JSON variable", "form_input_type": "checkbox", "required": False},
133 | {"name": "default", "description": "Default value returned if variable does not exist", "form_input_type": "text", "required": False},
134 | {"name": "import", "description": "Import variables from JSON file", "form_input_type": "text", "required": False},
135 | {"name": "export", "description": "Export variables to JSON file", "form_input_type": "text", "required": False},
136 | {"name": "delete", "description": "Delete a variable", "form_input_type": "text", "required": False}
137 | ]
138 | },
139 | {
140 | "name": "connections",
141 | "description": "List/Add/Delete connections",
142 | "airflow_version": "1.8.0 or greater",
143 | "http_method": "GET",
144 | "arguments": [
145 | {"name": "list", "description": "List all connections", "form_input_type": "checkbox", "required": False},
146 | {"name": "add", "description": "Add a connection", "form_input_type": "checkbox", "required": False},
147 | {"name": "delete", "description": "Delete a connection", "form_input_type": "checkbox", "required": False},
148 | {"name": "conn_id", "description": "Connection id, required to add/delete a connection", "form_input_type": "text", "required": False},
149 | {"name": "conn_uri", "description": "Connection URI, required to add a connection", "form_input_type": "text", "required": False},
150 | {"name": "conn_extra", "description": "Connection 'Extra' field, optional when adding a connection", "form_input_type": "text", "required": False}
151 | ]
152 | },
153 | {
154 | "name": "pause",
155 | "description": "Pauses a DAG",
156 | "airflow_version": "1.7.0 or greater",
157 | "http_method": "GET",
158 | "arguments": [
159 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
160 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}
161 | ]
162 | },
163 | {
164 | "name": "unpause",
165 | "description": "Unpauses a DAG",
166 | "airflow_version": "1.7.0 or greater",
167 | "http_method": "GET",
168 | "arguments": [
169 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
170 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}
171 | ]
172 | },
173 | {
174 | "name": "task_failed_deps",
175 | "description": "Returns the unmet dependencies for a task instance from the perspective of the scheduler. In other words, why a task instance doesn't get scheduled and then queued by the scheduler, and then run by an executor).",
176 | "airflow_version": "1.8.0 or greater",
177 | "http_method": "GET",
178 | "arguments": [
179 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
180 | {"name": "task_id", "description": "The id of the task", "form_input_type": "text", "required": True, "cli_end_position": 2},
181 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 3},
182 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}
183 | ]
184 | },
185 | { # todo: should print out the run id
186 | "name": "trigger_dag",
187 | "description": "Trigger a DAG run",
188 | "airflow_version": "1.6.0 or greater",
189 | "http_method": "GET",
190 | "arguments": [
191 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
192 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False},
193 | {"name": "run_id", "description": "Helps to identify this run", "form_input_type": "text", "required": False},
194 | {"name": "conf", "description": "JSON string that gets pickled into the DagRun's conf attribute", "form_input_type": "text", "required": False},
195 | {"name": "exec_date", "description": "The execution date of the DAG", "form_input_type": "text", "required": False}
196 | ]
197 | },
198 | {
199 | "name": "test",
200 | "description": "Test a task instance. This will run a task without checking for dependencies or recording it's state in the database.",
201 | "airflow_version": "0.1 or greater",
202 | "http_method": "GET",
203 | "arguments": [
204 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
205 | {"name": "task_id", "description": "The id of the task", "form_input_type": "text", "required": True, "cli_end_position": 2},
206 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 3},
207 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False},
208 | {"name": "dry_run", "description": "Perform a dry run", "form_input_type": "checkbox", "required": False},
209 | {"name": "task_params", "description": "Sends a JSON params dict to the task", "form_input_type": "text", "required": False}
210 | ]
211 | },
212 | {
213 | "name": "dag_state",
214 | "description": "Get the status of a dag run",
215 | "airflow_version": "1.8.0 or greater",
216 | "http_method": "GET",
217 | "arguments": [
218 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
219 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 2},
220 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}
221 | ]
222 | },
223 | {
224 | "name": "run",
225 | "description": "Run a single task instance",
226 | "airflow_version": "1.0.0 or greater",
227 | "http_method": "GET",
228 | "arguments": [
229 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
230 | {"name": "task_id", "description": "The id of the task", "form_input_type": "text", "required": True, "cli_end_position": 2},
231 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 3},
232 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False},
233 | {"name": "mark_success", "description": "Mark jobs as succeeded without running them", "form_input_type": "checkbox", "required": False},
234 | {"name": "force", "description": "Ignore previous task instance state, rerun regardless if task already succeede", "form_input_type": "checkbox", "required": False},
235 | {"name": "pool", "description": "Resource pool to use", "form_input_type": "text", "required": False},
236 | {"name": "cfg_path", "description": "Path to config file to use instead of airflow.cfg", "form_input_type": "text", "required": False},
237 | {"name": "local", "description": "Run the task using the LocalExecutor", "form_input_type": "checkbox", "required": False},
238 | {"name": "ignore_all_dependencies", "description": "Ignores all non-critical dependencies, including ignore_ti_state and ignore_task_depsstore_true", "form_input_type": "checkbox", "required": False},
239 | {"name": "ignore_dependencies", "description": "Ignore task-specific dependencies, e.g. upstream, depends_on_past, and retry delay dependencies", "form_input_type": "checkbox", "required": False},
240 | {"name": "ignore_depends_on_past", "description": "Ignore depends_on_past dependencies (but respect upstream dependencies)", "form_input_type": "checkbox", "required": False},
241 | {"name": "ship_dag", "description": "Pickles (serializes) the DAG and ships it to the worker", "form_input_type": "checkbox", "required": False},
242 | {"name": "pickle", "description": "Serialized pickle object of the entire dag (used internally)", "form_input_type": "text", "required": False},
243 | ]
244 | },
245 | {
246 | "name": "list_tasks",
247 | "description": "List the tasks within a DAG",
248 | "airflow_version": "0.1 or greater",
249 | "http_method": "GET",
250 | "arguments": [
251 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
252 | {"name": "tree", "description": "Tree view", "form_input_type": "checkbox", "required": False},
253 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}
254 | ]
255 | },
256 | {
257 | "name": "backfill",
258 | "description": "Run subsections of a DAG for a specified date range",
259 | "airflow_version": "0.1 or greater",
260 | "http_method": "GET",
261 | "arguments": [
262 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
263 | {"name": "task_regex", "description": "The regex to filter specific task_ids to backfill (optional)", "form_input_type": "text", "required": False},
264 | {"name": "start_date", "description": "Override start_date YYYY-MM-DD. Either this or the end_date needs to be provided.", "form_input_type": "text", "required": False},
265 | {"name": "end_date", "description": "Override end_date YYYY-MM-DD. Either this or the start_date needs to be provided.", "form_input_type": "text", "required": False},
266 | {"name": "mark_success", "description": "Mark jobs as succeeded without running them", "form_input_type": "checkbox", "required": False},
267 | {"name": "local", "description": "Run the task using the LocalExecutor", "form_input_type": "checkbox", "required": False},
268 | {"name": "donot_pickle", "description": "Do not attempt to pickle the DAG object to send over to the workers, just tell the workers to run their version of the code.", "form_input_type": "checkbox", "required": False},
269 | {"name": "include_adhoc", "description": "Include dags with the adhoc argument.", "form_input_type": "checkbox", "required": False},
270 | {"name": "ignore_dependencies", "description": "Ignore task-specific dependencies, e.g. upstream, depends_on_past, and retry delay dependencies", "form_input_type": "checkbox", "required": False},
271 | {"name": "ignore_first_depends_on_past", "description": "Ignores depends_on_past dependencies for the first set of tasks only (subsequent executions in the backfill DO respect depends_on_past).", "form_input_type": "checkbox", "required": False},
272 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False},
273 | {"name": "pool", "description": "Resource pool to use", "form_input_type": "text", "required": False},
274 | {"name": "dry_run", "description": "Perform a dry run", "form_input_type": "checkbox", "required": False}
275 | ]
276 | },
277 | {
278 | "name": "list_dags",
279 | "description": "List all the DAGs",
280 | "airflow_version": "0.1 or greater",
281 | "http_method": "GET",
282 | "arguments": [
283 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False},
284 | {"name": "report", "description": "Show DagBag loading report", "form_input_type": "checkbox", "required": False}
285 | ]
286 | },
287 | {
288 | "name": "kerberos",
289 | "description": "Start a kerberos ticket renewer",
290 | "airflow_version": "1.6.0 or greater",
291 | "http_method": "GET",
292 | "background_mode": True,
293 | "arguments": [
294 | {"name": "principal", "description": "kerberos principal", "form_input_type": "text", "required": True, "cli_end_position": 1},
295 | {"name": "keytab", "description": "keytab", "form_input_type": "text", "required": False},
296 | {"name": "pid", "description": "PID file location", "form_input_type": "text", "required": False},
297 | {"name": "daemon", "description": "Daemonize instead of running in the foreground", "form_input_type": "checkbox", "required": False},
298 | {"name": "stdout", "description": "Redirect stdout to this file", "form_input_type": "text", "required": False},
299 | {"name": "stderr", "description": "Redirect stderr to this file", "form_input_type": "text", "required": False},
300 | {"name": "log-file", "description": "Location of the log file", "form_input_type": "text", "required": False}
301 | ]
302 | },
303 | {
304 | "name": "worker",
305 | "description": "Start a Celery worker node",
306 | "airflow_version": "0.1 or greater",
307 | "http_method": "GET",
308 | "background_mode": True,
309 | "arguments": [
310 | {"name": "do_pickle", "description": "Attempt to pickle the DAG object to send over to the workers, instead of letting workers run their version of the code.", "form_input_type": "checkbox", "required": False},
311 | {"name": "queues", "description": "Comma delimited list of queues to serve", "form_input_type": "text", "required": False},
312 | {"name": "concurrency", "description": "The number of worker processes", "form_input_type": "text", "required": False},
313 | {"name": "pid", "description": "PID file location", "form_input_type": "checkbox", "required": False},
314 | {"name": "daemon", "description": "Daemonize instead of running in the foreground", "form_input_type": "checkbox", "required": False},
315 | {"name": "stdout", "description": "Redirect stdout to this file", "form_input_type": "text", "required": False},
316 | {"name": "stderr", "description": "Redirect stderr to this file", "form_input_type": "text", "required": False},
317 | {"name": "log-file", "description": "Location of the log file", "form_input_type": "text", "required": False}
318 | ]
319 | },
320 | {
321 | "name": "flower",
322 | "description": "Start a Celery worker node",
323 | "airflow_version": "1.0.0 or greater",
324 | "http_method": "GET",
325 | "background_mode": True,
326 | "arguments": [
327 | {"name": "hostname", "description": "Set the hostname on which to run the server", "form_input_type": "text", "required": False},
328 | {"name": "port", "description": "The port on which to run the server", "form_input_type": "text", "required": False},
329 | {"name": "flower_conf", "description": "Configuration file for flower", "form_input_type": "text", "required": False},
330 | {"name": "broker_api", "description": "Broker api", "form_input_type": "text", "required": False},
331 | {"name": "pid", "description": "PID file location", "form_input_type": "text", "required": False},
332 | {"name": "daemon", "description": "Daemonize instead of running in the foreground", "form_input_type": "checkbox", "required": False},
333 | {"name": "stdout", "description": "Redirect stdout to this file", "form_input_type": "text", "required": False},
334 | {"name": "stderr", "description": "Redirect stderr to this file", "form_input_type": "text", "required": False},
335 | {"name": "log-file", "description": "Location of the log file", "form_input_type": "text", "required": False},
336 | ]
337 | },
338 | {
339 | "name": "scheduler",
340 | "description": "Start a scheduler instance",
341 | "airflow_version": "1.0.0 or greater",
342 | "http_method": "GET",
343 | "background_mode": True,
344 | "arguments": [
345 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": False},
346 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False},
347 | {"name": "run-duration", "description": "Set number of seconds to execute before exiting", "form_input_type": "text", "required": False},
348 | {"name": "num_runs", "description": "Set the number of runs to execute before exiting", "form_input_type": "text", "required": False},
349 | {"name": "do_pickle", "description": "Attempt to pickle the DAG object to send over to the workers, instead of letting workers run their version of the code.", "form_input_type": "text", "required": False},
350 | {"name": "pid", "description": "PID file location", "form_input_type": "checkbox", "required": False},
351 | {"name": "daemon", "description": "Daemonize instead of running in the foreground", "form_input_type": "checkbox", "required": False},
352 | {"name": "stdout", "description": "Redirect stdout to this file", "form_input_type": "text", "required": False},
353 | {"name": "stderr", "description": "Redirect stderr to this file", "form_input_type": "text", "required": False},
354 | {"name": "log-file", "description": "Location of the log file", "form_input_type": "text", "required": False}
355 | ]
356 | },
357 | {
358 | "name": "task_state",
359 | "description": "Get the status of a task instance",
360 | "airflow_version": "1.0.0 or greater",
361 | "http_method": "GET",
362 | "arguments": [
363 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
364 | {"name": "task_id", "description": "The id of the task", "form_input_type": "text", "required": True, "cli_end_position": 2},
365 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 3},
366 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}
367 | ]
368 | },
369 | {
370 | "name": "pool",
371 | "description": "CRUD operations on pools",
372 | "airflow_version": "1.8.0 or greater",
373 | "http_method": "GET",
374 | "arguments": [
375 | {"name": "set", "description": "Set pool slot count and description, respectively. Expected input in the form: NAME SLOT_COUNT POOL_DESCRIPTION.", "form_input_type": "text", "required": False},
376 | {"name": "get", "description": "Get pool info", "form_input_type": "text", "required": False},
377 | {"name": "delete", "description": "Delete a pool", "form_input_type": "text", "required": False}
378 | ]
379 | },
380 | {
381 | "name": "serve_logs",
382 | "description": "Serve logs generate by worker",
383 | "airflow_version": "0.1 or greater",
384 | "http_method": "GET",
385 | "background_mode": True,
386 | "arguments": []
387 | },
388 | {
389 | "name": "clear",
390 | "description": "Clear a set of task instance, as if they never ran",
391 | "airflow_version": "0.1 or greater",
392 | "http_method": "GET",
393 | "arguments": [
394 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1},
395 | {"name": "task_regex", "description": "The regex to filter specific task_ids to backfill (optional)", "form_input_type": "text", "required": False},
396 | {"name": "start_date", "description": "Override start_date YYYY-MM-DD", "form_input_type": "text", "required": False},
397 | {"name": "end_date", "description": "Override end_date YYYY-MM-DD", "form_input_type": "text", "required": False},
398 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False},
399 | {"name": "upstream", "description": "Include upstream tasks", "form_input_type": "checkbox", "required": False},
400 | {"name": "downstream", "description": "Include downstream tasks", "form_input_type": "checkbox", "required": False},
401 | {"name": "only_failed", "description": "Only failed jobs", "form_input_type": "checkbox", "required": False},
402 | {"name": "only_running", "description": "Only running jobs", "form_input_type": "checkbox", "required": False},
403 | {"name": "exclude_subdags", "description": "Exclude subdags", "form_input_type": "checkbox", "required": False}
404 | ],
405 | "fixed_arguments": [
406 | {"name": "no_confirm", "description": "Do not request confirmation", "fixed_value": ""}
407 | ],
408 | },
409 | {
410 | "name": "deploy_dag",
411 | "description": "Deploy a new DAG File to the DAGs directory",
412 | "airflow_version": "None - Custom API",
413 | "http_method": "POST",
414 | "post_body_description": "dag_file - POST Body Element - REQUIRED",
415 | "form_enctype": "multipart/form-data",
416 | "arguments": [],
417 | "post_arguments": [
418 | {"name": "dag_file", "description": "Python file to upload and deploy", "form_input_type": "file", "required": True},
419 | {"name": "force", "description": "Whether to forcefully upload the file if the file already exists or not", "form_input_type": "checkbox", "required": False},
420 | {"name": "pause", "description": "The DAG will be forced to be paused when created and override the 'dags_are_paused_at_creation' config.", "form_input_type": "checkbox", "required": False},
421 | {"name": "unpause", "description": "The DAG will be forced to be unpaused when created and override the 'dags_are_paused_at_creation' config.", "form_input_type": "checkbox", "required": False}
422 | ]
423 | },
424 | {
425 | "name": "refresh_dag",
426 | "description": "Refresh a DAG in the Web Server",
427 | "airflow_version": "None - Custom API",
428 | "http_method": "GET",
429 | "arguments": [
430 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True}
431 | ]
432 | }
433 | ]
434 |
435 |
436 | # Function used to secure the REST ENDPOINT
437 | def http_token_secure(func):
438 | def secure_check(arg):
439 | logging.info("Rest_API_Plugin.http_token_secure() called")
440 | # Check if the airflow_expected_http_token variable is not none from configurations. This means authentication is enabled.
441 | if airflow_expected_http_token:
442 | logging.info("Performing Token Authentication")
443 | if request.headers.get(airflow_rest_api_plugin_http_token_header_name, None) != airflow_expected_http_token:
444 | warning_message = "Token Authentication Failed"
445 | logging.warn(warning_message)
446 | base_response = REST_API_Response_Util.get_base_response(include_arguments=False)
447 | return REST_API_Response_Util.get_403_error_response(base_response=base_response, output=warning_message)
448 | return func(arg)
449 |
450 | return secure_check
451 |
452 |
453 | # Utility for creating the REST Responses
454 | class REST_API_Response_Util():
455 |
456 | # Gets the Base Response object with all required response fields included. To be used at the beginning of the REST Call.
457 | @staticmethod
458 | def get_base_response(status="OK", http_response_code=200, call_time=datetime.now(), include_arguments=True):
459 | base_response = {"status": status, "http_response_code": http_response_code, "call_time": call_time}
460 | if include_arguments:
461 | base_response["arguments"] = request.args
462 | base_response["post_arguments"] = request.form
463 | return base_response
464 |
465 | # Finalize the Base Response with additional data
466 | @staticmethod
467 | def _get_final_response(base_response, output=None, airflow_cmd=None, http_response_code=None, warning=None):
468 | final_response = base_response
469 | final_response["response_time"] = datetime.now()
470 | if output:
471 | final_response["output"] = output
472 | if airflow_cmd:
473 | final_response["airflow_cmd"] = airflow_cmd
474 | if http_response_code:
475 | final_response["http_response_code"] = http_response_code
476 | if warning:
477 | final_response["warning"] = warning
478 | return jsonify(final_response)
479 |
480 | # Set the Base Response as a 200 HTTP Response object
481 | @staticmethod
482 | def get_200_response(base_response, output=None, airflow_cmd=None, warning=None):
483 | logging.info("Returning a 200 Response Code with response '" + str(output) + "'")
484 | return REST_API_Response_Util._get_final_response(base_response=base_response, output=output, airflow_cmd=airflow_cmd, warning=warning)
485 |
486 | # Set the Base Response and an Error
487 | @staticmethod
488 | def _get_error_response(base_response, error_code, output=None):
489 | base_response["status"] = "ERROR"
490 | return REST_API_Response_Util._get_final_response(base_response=base_response, output=output, http_response_code=error_code), error_code
491 |
492 | # Set the Base Response as a 400 HTTP Response object
493 | @staticmethod
494 | def get_400_error_response(base_response, output=None):
495 | logging.warning("Returning a 400 Response Code with response '" + str(output) + "'")
496 | return REST_API_Response_Util._get_error_response(base_response, 400, output)
497 |
498 | # Set the Base Response as a 403 HTTP Response object
499 | @staticmethod
500 | def get_403_error_response(base_response, output=None):
501 | logging.warning("Returning a 403 Response Code with response '" + str(output) + "'")
502 | return REST_API_Response_Util._get_error_response(base_response, 403, output)
503 |
504 | # Set the Base Response as a 500 HTTP Response object
505 | @staticmethod
506 | def get_500_error_response(base_response, output=None):
507 | logging.warning("Returning a 500 Response Code with response '" + str(output) + "'")
508 | return REST_API_Response_Util._get_error_response(base_response, 500, output)
509 |
510 |
511 | # REST_API View which extends the flask_admin BaseView
512 | class REST_API(BaseView):
513 |
514 | # Checks a string object to see if it is none or empty so we can determine if an argument (passed to the rest api) is provided
515 | @staticmethod
516 | def is_arg_not_provided(arg):
517 | return arg is None or arg == ""
518 |
519 | # Get the DagBag which has a list of all the current Dags
520 | @staticmethod
521 | def get_dagbag():
522 | return DagBag()
523 |
524 | # '/' Endpoint where the Admin page is which allows you to view the APIs available and trigger them
525 | @expose('/')
526 | def index(self):
527 | logging.info("REST_API.index() called")
528 |
529 | # get the information that we want to display on the page regarding the dags that are available
530 | dagbag = self.get_dagbag()
531 | dags = []
532 | for dag_id in dagbag.dags:
533 | orm_dag = DagModel.get_current(dag_id)
534 | dags.append({
535 | "dag_id": dag_id,
536 | "is_active": (not orm_dag.is_paused) if orm_dag is not None else False
537 | })
538 |
539 | return self.render("rest_api_plugin/index.html",
540 | dags=dags,
541 | airflow_webserver_base_url=airflow_webserver_base_url,
542 | rest_api_endpoint=rest_api_endpoint,
543 | apis_metadata=apis_metadata,
544 | airflow_version=airflow_version,
545 | rest_api_plugin_version=rest_api_plugin_version
546 | )
547 |
548 | # '/api' REST Endpoint where API requests should all come in
549 | @csrf.exempt # Exempt the CSRF token
550 | @expose('/api', methods=["GET", "POST"])
551 | @http_token_secure # On each request,
552 | def api(self):
553 | base_response = REST_API_Response_Util.get_base_response()
554 |
555 | # Get the api that you want to execute
556 | api = request.args.get('api')
557 | if api is not None:
558 | api = api.strip().lower()
559 | logging.info("REST_API.api() called (api: " + str(api) + ")")
560 |
561 | # Validate that the API is provided
562 | if self.is_arg_not_provided(api):
563 | logging.warning("api argument not provided")
564 | return REST_API_Response_Util.get_400_error_response(base_response, "API should be provided")
565 |
566 | # Get the api_metadata from the api object list that correcsponds to the api we want to run to get the metadata.
567 | api_metadata = None
568 | for test_api_metadata in apis_metadata:
569 | if test_api_metadata["name"] == api:
570 | api_metadata = test_api_metadata
571 | if api_metadata is None:
572 | logging.info("api '" + str(api) + "' was not found in the apis list in the REST API Plugin")
573 | return REST_API_Response_Util.get_400_error_response(base_response, "API '" + str(api) + "' was not found")
574 |
575 | # check if all the required arguments are provided
576 | missing_required_arguments = []
577 | dag_id = None
578 | for argument in api_metadata["arguments"]:
579 | argument_name = argument["name"]
580 | argument_value = request.args.get(argument_name)
581 | if argument["required"]:
582 | if self.is_arg_not_provided(argument_value):
583 | missing_required_arguments.append(argument_name)
584 | if argument_name == "dag_id" and argument_value is not None:
585 | dag_id = argument_value.strip()
586 | if len(missing_required_arguments) > 0:
587 | logging.warning("Missing required arguments: " + str(missing_required_arguments))
588 | return REST_API_Response_Util.get_400_error_response(base_response, "The argument(s) " + str(missing_required_arguments) + " are required")
589 |
590 | # Check to make sure that the DAG you're referring to, already exists.
591 | dag_bag = self.get_dagbag()
592 | if dag_id is not None and dag_id not in dag_bag.dags:
593 | logging.info("DAG_ID '" + str(dag_id) + "' was not found in the DagBag list '" + str(dag_bag.dags) + "'")
594 | return REST_API_Response_Util.get_400_error_response(base_response, "The DAG ID '" + str(dag_id) + "' does not exist")
595 |
596 | # Deciding which function to use based off the API object that was requested. Some functions are custom and need to be manually routed to.
597 | if api == "version":
598 | final_response = self.version(base_response)
599 | elif api == "rest_api_plugin_version":
600 | final_response = self.rest_api_plugin_version(base_response)
601 | elif api == "deploy_dag":
602 | final_response = self.deploy_dag(base_response)
603 | elif api == "refresh_dag":
604 | final_response = self.refresh_dag(base_response)
605 | else:
606 | final_response = self.execute_cli(base_response, api_metadata)
607 |
608 | return final_response
609 |
610 | # General execution of a CLI command
611 | # A command will be assembled and then passed to the OS as a commandline function and the results will be returned
612 | def execute_cli(self, base_response, api_metadata):
613 | logging.info("Executing cli function")
614 |
615 | # getting the largest cli_end_position in the api_metadata object so that the cli function can be assembled
616 | largest_end_argument_value = 0
617 | for argument in api_metadata.get("arguments", []):
618 | if argument.get("cli_end_position") is not None and argument["cli_end_position"] > largest_end_argument_value:
619 | largest_end_argument_value = argument["cli_end_position"]
620 |
621 | # starting to create the airflow_cmd function
622 | airflow_cmd_split = ["airflow", api_metadata["name"]]
623 |
624 | # appending arguments to the airflow_cmd_split array and setting arguments aside in the end_arguments array to be appended onto the end of airflow_cmd_split
625 | end_arguments = [0] * largest_end_argument_value
626 | for argument in api_metadata["arguments"]:
627 | argument_name = argument["name"]
628 | argument_value = request.args.get(argument_name)
629 | logging.info("argument_name: " + str(argument_name) + ", argument_value: " + str(argument_value))
630 | if argument_value is not None:
631 | # if the argument should be appended onto the end, find the position and add it to the end_arguments array
632 | if "cli_end_position" in argument:
633 | logging.info("argument['cli_end_position']: " + str(argument['cli_end_position']))
634 | end_arguments[argument["cli_end_position"]-1] = argument_value
635 | else:
636 | airflow_cmd_split.extend(["--" + argument_name])
637 | if argument["form_input_type"] is not "checkbox":
638 | # Relacing airflow_cmd_split.extend(argument_value.split(" ") with command below to fix issue where configuration
639 | # values contain space with them.
640 | airflow_cmd_split.append(argument_value)
641 | else:
642 | logging.warning("argument_value is null")
643 |
644 | # appending fixed arguments that should always be provided to the APIs
645 | for fixed_argument in api_metadata.get("fixed_arguments", []):
646 | fixed_argument_name = fixed_argument["name"]
647 | fixed_argument_value = fixed_argument.get("fixed_value")
648 | logging.info("fixed_argument_name: " + str(fixed_argument_name) + ", fixed_argument_value: " + str(fixed_argument_value))
649 | if fixed_argument_value is not None:
650 | airflow_cmd_split.extend(["--" + fixed_argument_name])
651 | if fixed_argument_value:
652 | airflow_cmd_split.extend(fixed_argument_value.split(" "))
653 |
654 | # appending the end_arguments to the very end
655 | airflow_cmd_split.extend(end_arguments)
656 |
657 | run_api_in_background_mode = "background_mode" in api_metadata and api_metadata["background_mode"]
658 |
659 | # handling the case where the process should be ran in the background
660 | if run_api_in_background_mode:
661 | # if a log file is provided, then that should be used to dump the output of the call
662 | if request.args.get("log-file") is None:
663 | airflow_cmd_split.append(">> " + str(airflow_base_log_folder) + "/" + api_metadata["name"] + ".log")
664 | # appending a '&' character to run the process in the background
665 | airflow_cmd_split.append("&")
666 |
667 | # joining all the individual arguments and components into a single string
668 | airflow_cmd = " ".join(airflow_cmd_split)
669 |
670 | logging.info("airflow_cmd array: " + str(airflow_cmd_split))
671 | logging.info("airflow_cmd: " + str(airflow_cmd))
672 |
673 | # execute the airflow command a certain way if its meant to be ran in the background
674 | if run_api_in_background_mode:
675 | output = self.execute_cli_command_background_mode(airflow_cmd)
676 | else:
677 | output = self.execute_cli_command(airflow_cmd_split)
678 |
679 | # if desired, filter out the loading messages to reduce the noise in the output
680 | if filter_loading_messages_in_cli_response:
681 | logging.info("Filtering Loading Messages from the CLI Response")
682 | output = self.filter_loading_messages(output)
683 |
684 | return REST_API_Response_Util.get_200_response(base_response=base_response, output=output, airflow_cmd=airflow_cmd)
685 |
686 | # Custom function for the version API
687 | def version(self, base_response):
688 | logging.info("Executing custom 'version' function")
689 | return REST_API_Response_Util.get_200_response(base_response, airflow_version)
690 |
691 | # Custom function for the rest_api_plugin_version API
692 | def rest_api_plugin_version(self, base_response):
693 | logging.info("Executing custom 'rest_api_plugin_version' function")
694 | return REST_API_Response_Util.get_200_response(base_response, rest_api_plugin_version)
695 |
696 | # Custom Function for the deploy_dag API
697 | def deploy_dag(self, base_response):
698 | logging.info("Executing custom 'deploy_dag' function")
699 |
700 | if 'dag_file' not in request.files or request.files['dag_file'].filename == '': # check if the post request has the file part
701 | logging.warning("The dag_file argument wasn't provided")
702 | return REST_API_Response_Util.get_400_error_response(base_response, "dag_file should be provided")
703 | dag_file = request.files['dag_file']
704 |
705 | force = True if request.form.get('force') is not None else False
706 | logging.info("deploy_dag force upload: " + str(force))
707 |
708 | pause = True if request.form.get('pause') is not None else False
709 | logging.info("deploy_dag in pause state: " + str(pause))
710 |
711 | unpause = True if request.form.get('unpause') is not None else False
712 | logging.info("deploy_dag in unpause state: " + str(unpause))
713 |
714 | # make sure that the dag_file is a python script
715 | if dag_file and dag_file.filename.endswith(".py"):
716 | save_file_path = os.path.join(airflow_dags_folder, dag_file.filename)
717 |
718 | # Check if the file already exists.
719 | if os.path.isfile(save_file_path) and not force:
720 | logging.warning("File to upload already exists")
721 | return REST_API_Response_Util.get_400_error_response(base_response, "The file '" + save_file_path + "' already exists on host '" + hostname + "'.")
722 |
723 | logging.info("Saving file to '" + save_file_path + "'")
724 | dag_file.save(save_file_path)
725 |
726 | else:
727 | logging.warning("deploy_dag file is not a python file. It does not end with a .py.")
728 | return REST_API_Response_Util.get_400_error_response(base_response, "dag_file is not a *.py file")
729 |
730 | warning = None
731 | # if both the pause and unpause options are provided then skip the pausing and unpausing phase
732 | if not (pause and unpause):
733 | if pause or unpause:
734 | try:
735 | # import the DAG file that was uploaded so that we can get the DAG_ID to execute the command to pause or unpause it
736 | import imp
737 | dag_file = imp.load_source('module.name', save_file_path)
738 | dag_id = dag_file.dag.dag_id
739 |
740 | # run the pause or unpause cli command
741 | airflow_cmd_split = []
742 | if pause:
743 | airflow_cmd_split = ["airflow", "pause", dag_id]
744 | if unpause:
745 | airflow_cmd_split = ["airflow", "unpause", dag_id]
746 | cli_output = self.execute_cli_command(airflow_cmd_split)
747 | except Exception as e:
748 | warning = "Failed to set the state (pause, unpause) of the DAG: " + str(e)
749 | logging.warning(warning)
750 | else:
751 | warning = "Both options pause and unpause were given. Skipping setting the state (pause, unpause) of the DAG."
752 | logging.warning(warning)
753 |
754 | return REST_API_Response_Util.get_200_response(base_response=base_response, output="DAG File [{}] has been uploaded".format(dag_file), warning=warning)
755 |
756 | # Custom Function for the refresh_dag API
757 | # This will call the direct function corresponding to the web endpoint '/admin/airflow/refresh' that already exists in Airflow
758 | def refresh_dag(self, base_response):
759 | logging.info("Executing custom 'refresh_dag' function")
760 | dag_id = request.args.get('dag_id')
761 | logging.info("dag_id to refresh: '" + str(dag_id) + "'")
762 | if self.is_arg_not_provided(dag_id):
763 | return REST_API_Response_Util.get_400_error_response(base_response, "dag_id should be provided")
764 | elif " " in dag_id:
765 | return REST_API_Response_Util.get_400_error_response(base_response, "dag_id contains spaces and is therefore an illegal argument")
766 |
767 | try:
768 | from airflow.www.views import Airflow
769 | # NOTE: The request argument 'dag_id' is required for the refresh() function to get the dag_id
770 | refresh_result = Airflow().refresh()
771 | logging.info("Refresh Result: " + str(refresh_result))
772 | except Exception as e:
773 | error_message = "An error occurred while trying to Refresh the DAG '" + str(dag_id) + "': " + str(e)
774 | logging.error(error_message)
775 | return REST_API_Response_Util.get_500_error_response(base_response, error_message)
776 |
777 | return REST_API_Response_Util.get_200_response(base_response=base_response, output="DAG [{}] is now fresh as a daisy".format(dag_id))
778 |
779 | # Executes the airflow command passed into it in the background so the function isn't tied to the webserver process
780 | @staticmethod
781 | def execute_cli_command_background_mode(airflow_cmd):
782 | logging.info("Executing CLI Command in the Background")
783 | exit_code = os.system(airflow_cmd)
784 | output = REST_API.get_empty_process_output()
785 | output["stdout"] = "exit_code: " + str(exit_code)
786 | return output
787 |
788 | # General execution of the airflow command passed to it and returns the response
789 | @staticmethod
790 | def execute_cli_command(airflow_cmd_split):
791 | logging.info("Executing CLI Command")
792 | process = subprocess.Popen(airflow_cmd_split, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
793 | process.wait()
794 | return REST_API.collect_process_output(process)
795 |
796 | # gets and empty object that has all the fields a CLI function would have in it.
797 | @staticmethod
798 | def get_empty_process_output():
799 | return {
800 | "stderr": "",
801 | "stdin": "",
802 | "stdout": ""
803 | }
804 |
805 | # Get the output of the CLI process and package it in a dict
806 | @staticmethod
807 | def collect_process_output(process):
808 | output = REST_API.get_empty_process_output()
809 | if process.stderr is not None:
810 | output["stderr"] = ""
811 | for line in process.stderr.readlines():
812 | output["stderr"] += str(line)
813 | if process.stdin is not None:
814 | output["stdin"] = ""
815 | for line in process.stdin.readlines():
816 | output["stdin"] += str(line)
817 | if process.stdout is not None:
818 | output["stdout"] = ""
819 | for line in process.stdout.readlines():
820 | output["stdout"] += str(line)
821 | logging.info("RestAPI Output: " + str(output))
822 | return output
823 |
824 | # Filtering out logging statements from the standard output
825 | # Content like:
826 | #
827 | # [2017-04-19 10:04:34,927] {__init__.py:36} INFO - Using executor CeleryExecutor
828 | # [2017-04-19 10:04:35,926] {models.py:154} INFO - Filling up the DagBag from /Users/...
829 | @staticmethod
830 | def filter_loading_messages(output):
831 | stdout = output["stdout"]
832 | new_stdout_array = stdout.split("\n")
833 | content_to_remove_greatest_index = 0
834 | for index, content in enumerate(new_stdout_array):
835 | if content.startswith("["):
836 | content_to_remove_greatest_index = index
837 | content_to_remove_greatest_index += 1
838 | if len(new_stdout_array) > content_to_remove_greatest_index:
839 | new_stdout_array = new_stdout_array[content_to_remove_greatest_index:]
840 | output["stdout"] = "\n".join(new_stdout_array)
841 | return output
842 |
843 | # Creating View to be used by Plugin
844 | rest_api_view = REST_API(category="Admin", name="REST API Plugin")
845 |
846 | # Creating Blueprint
847 | rest_api_bp = Blueprint(
848 | "rest_api_bp",
849 | __name__,
850 | template_folder='templates',
851 | static_folder='static',
852 | static_url_path='/static/'
853 | )
854 |
855 |
856 | # Creating the REST_API_Plugin which extends the AirflowPlugin so its imported into Airflow
857 | class REST_API_Plugin(AirflowPlugin):
858 | name = "rest_api"
859 | operators = []
860 | flask_blueprints = [rest_api_bp]
861 | hooks = []
862 | executors = []
863 | admin_views = [rest_api_view]
864 | menu_links = []
865 |
--------------------------------------------------------------------------------