├── src ├── unittest │ ├── python │ │ ├── dags │ │ │ ├── __init__.py │ │ │ ├── helloworld_dag_tests.py │ │ │ └── helloworld_xcoms_tests.py │ │ ├── plugins │ │ │ ├── __init__.py │ │ │ ├── multiplyby5_operator_tests.py │ │ │ └── helloworld_sensor_tests.py │ │ ├── resources │ │ │ ├── variables.json │ │ │ └── connections.sh │ │ ├── .DS_Store │ │ └── dag_integrity_tests.py │ └── .DS_Store ├── .DS_Store ├── integrationtest │ └── python │ │ ├── constants.py │ │ ├── db_util.py │ │ ├── dags │ │ ├── hello_world_tests.py │ │ └── presto_to_mysql_tests.py │ │ └── airflow_api.py └── main │ ├── .DS_Store │ └── python │ ├── .DS_Store │ ├── plugins │ ├── multiplyby5_operator.py │ ├── helloworld_sensor.py │ ├── templates │ │ └── rest_api_plugin │ │ │ └── index.html │ └── rest_api_plugin.py │ └── dags │ ├── presto_to_mysql.py │ ├── hello_world.py │ └── helloworld_xcoms.py ├── .DS_Store ├── how_minikube_work.png ├── start_airflow.sh ├── .gitignore ├── requirements.txt ├── script └── entrypoint.sh ├── k8s ├── mysql │ └── mysql.kube.yaml └── presto │ └── presto.kube.yaml ├── Dockerfile ├── README.md └── airflow.kube.yaml /src/unittest/python/dags/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/unittest/python/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/unittest/python/resources/variables.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/unittest/python/resources/connections.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/.DS_Store -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/src/.DS_Store -------------------------------------------------------------------------------- /src/integrationtest/python/constants.py: -------------------------------------------------------------------------------- 1 | PRESTO_DB_PORT=32211 2 | MYSQL_DB_PORT=31320 3 | AIRFLOW_PORT=31317 -------------------------------------------------------------------------------- /src/main/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/src/main/.DS_Store -------------------------------------------------------------------------------- /how_minikube_work.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/how_minikube_work.png -------------------------------------------------------------------------------- /src/unittest/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/src/unittest/.DS_Store -------------------------------------------------------------------------------- /src/main/python/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/src/main/python/.DS_Store -------------------------------------------------------------------------------- /src/unittest/python/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outlandishideas/airflow-testing/master/src/unittest/python/.DS_Store -------------------------------------------------------------------------------- /start_airflow.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | minikube start --cpus 4 --memory 8192 3 | kubectl apply -f airflow.kube.yaml 4 | minikube mount src/main/python/:/data -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | venv/ 3 | logs/ 4 | airflow.db 5 | airflow.cfg 6 | *.pyc 7 | *.cfg 8 | target/ 9 | .minikube/ 10 | .pytest_cache/ 11 | *.DS_Store 12 | .pybuilder/ 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | apache-airflow==1.10.3 2 | jinja2==2.10.1 3 | werkzeug==0.15.3 4 | pyhive==0.6.1 5 | mysqlclient==1.4.2 6 | mysql-connector==2.2.9 7 | presto-python-client==0.7.0 8 | -------------------------------------------------------------------------------- /script/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo Command is : "$1" 5 | echo Minikube IP is : "$2" 6 | 7 | echo "$2" > /tmp/minikube_ip.txt 8 | 9 | case "$1" in 10 | install_dependencies) 11 | pyb "$1" 12 | ;; 13 | run_unit_tests) 14 | pyb "$1" 15 | ;; 16 | run_integration_tests) 17 | pyb "$1" 18 | ;; 19 | *) 20 | # The command is something like bash, not an pyb subcommand. Just run it in the right environment. 21 | exec "$@" 22 | ;; 23 | esac -------------------------------------------------------------------------------- /src/unittest/python/plugins/multiplyby5_operator_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from datetime import datetime 3 | from airflow import DAG 4 | from airflow.models import TaskInstance 5 | from airflow.operators import MultiplyBy5Operator 6 | 7 | 8 | class TestMultiplyBy5Operator(unittest.TestCase): 9 | 10 | def test_execute(self): 11 | dag = DAG(dag_id='anydag', start_date=datetime.now()) 12 | task = MultiplyBy5Operator(my_operator_param=10, dag=dag, task_id='anytask') 13 | ti = TaskInstance(task=task, execution_date=datetime.now()) 14 | result = task.execute(ti.get_template_context()) 15 | self.assertEqual(result, 50) 16 | -------------------------------------------------------------------------------- /src/main/python/plugins/multiplyby5_operator.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from airflow.models import BaseOperator 4 | from airflow.plugins_manager import AirflowPlugin 5 | from airflow.utils.decorators import apply_defaults 6 | 7 | log = logging.getLogger(__name__) 8 | 9 | 10 | class MultiplyBy5Operator(BaseOperator): 11 | @apply_defaults 12 | def __init__(self, my_operator_param, *args, **kwargs): 13 | self.operator_param = my_operator_param 14 | super(MultiplyBy5Operator, self).__init__(*args, **kwargs) 15 | 16 | def execute(self, context): 17 | log.info('operator_param: %s', self.operator_param) 18 | return (self.operator_param * 5) 19 | 20 | 21 | class MultiplyBy5Plugin(AirflowPlugin): 22 | name = "multiplyby5_plugin" 23 | operators = [MultiplyBy5Operator] 24 | -------------------------------------------------------------------------------- /src/integrationtest/python/db_util.py: -------------------------------------------------------------------------------- 1 | 2 | class DBUtil: 3 | 4 | def create_table(self, db_conn,create_table_sql): 5 | cursor = db_conn.cursor() 6 | cursor.execute((create_table_sql)) 7 | cursor.close() 8 | 9 | def insert_into_table(self, db_conn,insert_query): 10 | cursor = db_conn.cursor() 11 | cursor.execute(insert_query) 12 | db_conn.commit() 13 | cursor.close() 14 | 15 | def drop_table(self, db_conn,drop_table_query): 16 | cursor = db_conn.cursor() 17 | cursor.execute((drop_table_query)) 18 | cursor.close() 19 | 20 | def get_row_count(self,db_conn,select_query): 21 | cursor = db_conn.cursor() 22 | cursor.execute((select_query)) 23 | final_result = [list(i) for i in cursor] 24 | return final_result -------------------------------------------------------------------------------- /src/main/python/dags/presto_to_mysql.py: -------------------------------------------------------------------------------- 1 | from airflow.operators.presto_to_mysql import PrestoToMySqlTransfer 2 | from datetime import datetime 3 | from airflow import DAG 4 | 5 | default_args = { 6 | 'email': ['hello@world.com'] 7 | } 8 | 9 | dag = DAG('presto_to_mysql', description='Presto to Mysql Transfer', default_args=default_args, 10 | schedule_interval='0 12 * * *', 11 | start_date=datetime(2017, 3, 20), catchup=False) 12 | 13 | PrestoToMySqlTransfer( 14 | presto_conn_id='presto-conn', 15 | mysql_conn_id='mysql-conn', 16 | task_id='presto_to_mysql_transfer', 17 | sql=""" 18 | SELECT name, count(*) as count 19 | FROM blackhole.default.region 20 | GROUP BY name 21 | """, 22 | mysql_table='mysql_region', 23 | mysql_preoperator='TRUNCATE TABLE mysql_region;', 24 | dag=dag) 25 | -------------------------------------------------------------------------------- /src/integrationtest/python/dags/hello_world_tests.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | sys.path.append('../') 5 | from airflow_api import AirflowAPI 6 | 7 | 8 | class TestHelloWorldDag(unittest.TestCase): 9 | """Integration test for Hello world DAG""" 10 | 11 | def setUp(self): 12 | self.airflow_api = AirflowAPI() 13 | 14 | def test_hello_world(self): 15 | """helloword dag should run successfully""" 16 | execution_date = "2019-01-11T12:00:00+00:00" 17 | dag_id = "hello_world" 18 | self.airflow_api.trigger_dag(dag_id, execution_date) 19 | is_running = True 20 | while is_running: 21 | is_running = self.airflow_api.is_dag_running(dag_id, execution_date) 22 | self.assertEqual(is_running, False) 23 | self.assertEqual(self.airflow_api.get_dag_status(dag_id, execution_date), "success") 24 | 25 | 26 | if __name__ == '__main__': 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /src/main/python/dags/hello_world.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow import DAG 3 | from airflow.operators.dummy_operator import DummyOperator 4 | from airflow.operators.python_operator import PythonOperator 5 | from airflow.operators import MultiplyBy5Operator 6 | 7 | default_args = { 8 | 'email': ['hello@world.com'] 9 | } 10 | 11 | 12 | def print_hello(): 13 | return 'Hello Wolrd' 14 | 15 | 16 | dag = DAG('hello_world', description='Hello world example', default_args=default_args, schedule_interval='0 12 * * *', 17 | start_date=datetime(2017, 3, 20), catchup=False) 18 | 19 | dummy_operator = DummyOperator(task_id='dummy_task', retries=3, dag=dag) 20 | 21 | hello_operator = PythonOperator(task_id='hello_task', python_callable=print_hello, dag=dag) 22 | 23 | multiplyby5_operator = MultiplyBy5Operator(my_operator_param=10, 24 | task_id='multiplyby5_task', dag=dag) 25 | 26 | dummy_operator >> hello_operator 27 | 28 | dummy_operator >> multiplyby5_operator 29 | -------------------------------------------------------------------------------- /k8s/mysql/mysql.kube.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: mysql 5 | spec: 6 | type: NodePort 7 | selector: 8 | app: prod-mysql 9 | tier: db 10 | ports: 11 | - name: mysql 12 | protocol: TCP 13 | port: 3306 14 | targetPort: mysql 15 | nodePort: 31320 16 | --- 17 | apiVersion: extensions/v1beta1 18 | kind: Deployment 19 | metadata: 20 | name: mysql 21 | spec: 22 | replicas: 1 23 | template: 24 | metadata: 25 | labels: 26 | app: prod-mysql 27 | tier: db 28 | spec: 29 | containers: 30 | - name: mysql 31 | image: mysql:5.7.25 32 | resources: 33 | requests: 34 | memory: "256Mi" 35 | cpu: "200m" 36 | limits: 37 | memory: "512Mi" 38 | cpu: "400m" 39 | ports: 40 | - name: mysql 41 | containerPort: 3306 42 | env: 43 | - name: MYSQL_ROOT_PASSWORD 44 | value: "mysql" 45 | - name: MYSQL_DATABASE 46 | value: "mysql" 47 | - name: MYSQL_USER 48 | value: "mysql" 49 | - name: MYSQL_PASSWORD 50 | value: "mysql" -------------------------------------------------------------------------------- /src/unittest/python/dag_integrity_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from airflow.models import DagBag 3 | 4 | 5 | class TestDagIntegrity(unittest.TestCase): 6 | LOAD_SECOND_THRESHOLD = 2 7 | 8 | def setUp(self): 9 | self.dagbag = DagBag() 10 | 11 | def test_import_dags(self): 12 | self.assertFalse( 13 | len(self.dagbag.import_errors), 14 | 'DAG import failures. Errors: {}'.format( 15 | self.dagbag.import_errors 16 | ) 17 | ) 18 | 19 | def test_import_time(self): 20 | stats = self.dagbag.dagbag_stats 21 | slow_dags = list(filter(lambda d: d.duration > self.LOAD_SECOND_THRESHOLD, stats)) 22 | res = ', '.join(map(lambda d: d.file[1:], slow_dags)) 23 | 24 | self.assertEquals(0, len(slow_dags), 25 | 'The following files take more than {threshold}s to load: {res}'.format( 26 | threshold=self.LOAD_SECOND_THRESHOLD, res=res) 27 | ) 28 | 29 | def test_alert_email_present(self): 30 | for dag_id, dag in self.dagbag.dags.items(): 31 | emails = dag.default_args.get('email', []) 32 | msg = 'Alert email not set for DAG {id}'.format(id=dag_id) 33 | self.assertIn('hello@world.com', emails, msg) 34 | -------------------------------------------------------------------------------- /src/main/python/plugins/helloworld_sensor.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | import time 3 | import logging 4 | from airflow.operators.sensors import BaseSensorOperator 5 | from airflow.plugins_manager import AirflowPlugin 6 | from airflow.utils.decorators import apply_defaults 7 | 8 | log = logging.getLogger(__name__) 9 | 10 | 11 | class HelloworldSensor(BaseSensorOperator): 12 | 13 | @apply_defaults 14 | def __init__(self, *args, **kwargs): 15 | super(HelloworldSensor, self).__init__(*args, **kwargs) 16 | 17 | def poke(self, context): 18 | current_minute = self.params.get('sensor_start_time').minute 19 | if current_minute % 3 != 0: 20 | log.info("Sensor minute (%s) is not divisible by 3, sensor will retry.", current_minute) 21 | self.params['sensor_start_time'] = self.params.get('sensor_start_time') + timedelta(minutes=1) 22 | return False 23 | 24 | log.info("Sensor minute (%s) is divisible by 3, sensor finished.", current_minute) 25 | return True 26 | 27 | def execute(self, context): 28 | while not self.poke(context): 29 | time.sleep(self.poke_interval) 30 | return self.params.get('sensor_start_time').minute 31 | 32 | 33 | class HelloworldSensorPlugin(AirflowPlugin): 34 | name = "helloworld_sensor_plugin" 35 | operators = [HelloworldSensor] 36 | 37 | 38 | def get_now(): 39 | return datetime.now() 40 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6-stretch 2 | USER root 3 | ENV AIRFLOW__CORE__LOAD_EXAMPLES False 4 | ENV AIRFLOW__CORE__DAGS_FOLDER /opt/src/main/python/dags 5 | ENV AIRFLOW__CORE__PLUGINS_FOLDER /opt/src/main/python/plugins 6 | ENV AIRFLOW__REST_API_PLUGIN__LOG_LOADING True 7 | ENV AIRFLOW__REST_API_PLUGIN__FILTER_LOADING_MESSAGES_IN_CLI_RESPONSE True 8 | ENV AIRFLOW__REST_API_PLUGIN__REST_API_PLUGIN_HTTP_TOKEN_HEADER_NAME rest_api_plugin_http_token 9 | ENV AIRFLOW__REST_API_PLUGIN__REST_API_PLUGIN_EXPECTED_HTTP_TOKEN None 10 | 11 | ENV AIRFLOW_HOME /usr/local/airflow 12 | 13 | WORKDIR /opt 14 | 15 | RUN apt-get update -qq \ 16 | && pip install -U pip \ 17 | && pip install pybuilder \ 18 | && rm -rf /var/lib/apt/lists/* /var/cache/apk/* 19 | 20 | COPY build.py . 21 | RUN pyb install_dependencies 22 | 23 | COPY requirements.txt . 24 | RUN pip install -r requirements.txt 25 | 26 | COPY src/unittest/python/resources/variables.json /usr/local/airflow/variables.json 27 | COPY src/unittest/python/resources/connections.sh /usr/local/airflow/connections.sh 28 | 29 | RUN airflow initdb && \ 30 | airflow variables -i /usr/local/airflow/variable.json && \ 31 | sh /usr/local/airflow/connections.sh 32 | 33 | RUN rm -f /opt/build.py 34 | RUN rm -f /usr/local/airflow/variables.json 35 | RUN rm -f /usr/local/airflow/connections.sh 36 | 37 | COPY script/entrypoint.sh /mnt/entrypoint.sh 38 | RUN chmod +x /mnt/entrypoint.sh 39 | 40 | ENV PYTHONPATH /opt/src/ 41 | 42 | ENTRYPOINT ["/mnt/entrypoint.sh"] 43 | 44 | CMD ["install_dependencies",""] 45 | -------------------------------------------------------------------------------- /src/main/python/dags/helloworld_xcoms.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from airflow import DAG 3 | from airflow.operators import PythonOperator 4 | from airflow.operators import BashOperator 5 | 6 | yesterday = datetime.datetime.combine( 7 | datetime.datetime.today() - datetime.timedelta(1), 8 | datetime.datetime.min.time()) 9 | 10 | default_dag_args = { 11 | 'start_date': yesterday, 12 | 'email_on_failure': False, 13 | 'email_on_retry': False, 14 | 'email': ['hello@world.com'], 15 | 'retries': 0 16 | } 17 | 18 | 19 | def push_to_xcoms(*args, **kwargs): 20 | value = "dummyValue" 21 | kwargs['ti'].xcom_push(key="dummyKey", value=value) 22 | 23 | 24 | def pull_from_xcoms(**kwargs): 25 | ti = kwargs['ti'] 26 | pulled_value = ti.xcom_pull(key='dummyKey', task_ids='push_to_xcoms') 27 | print("value=" + str(pulled_value)) 28 | 29 | 30 | dag = DAG('hello_world_xcoms', description='Hello world XComs example', default_args=default_dag_args, schedule_interval=None) 31 | 32 | push_to_xcoms_task = PythonOperator( 33 | task_id='push_to_xcoms', 34 | provide_context=True, 35 | python_callable=push_to_xcoms, 36 | dag=dag 37 | ) 38 | 39 | pull_from_xcoms_task = PythonOperator( 40 | task_id='pull_from_xcoms', 41 | provide_context=True, 42 | python_callable=pull_from_xcoms, 43 | dag=dag 44 | ) 45 | 46 | templated_xcoms_value_task = BashOperator( 47 | task_id='templated_xcoms_value', 48 | bash_command='echo ' + str("{{ ti.xcom_pull(key='dummyKey')}}"), 49 | dag=dag 50 | ) 51 | 52 | push_to_xcoms_task >> pull_from_xcoms_task >> templated_xcoms_value_task 53 | -------------------------------------------------------------------------------- /src/unittest/python/plugins/helloworld_sensor_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from datetime import datetime 3 | from airflow import DAG 4 | from airflow.models import TaskInstance 5 | from airflow.operators import HelloworldSensor 6 | 7 | 8 | class TestHelloworldSensor(unittest.TestCase): 9 | def test_poke_should_return_false_when_value_of_minute_is_not_divisible_by_3(self): 10 | dag = DAG(dag_id='anydag', start_date=datetime.now()) 11 | sensor_task = HelloworldSensor( 12 | task_id='any_sensor_task', 13 | poke_interval=2, 14 | params={'sensor_start_time': datetime(2018, 8, 8, 10, 50)}, 15 | dag=dag 16 | ) 17 | sti = TaskInstance(task=sensor_task, execution_date=datetime.now()) 18 | result = sensor_task.poke(sti.get_template_context()) 19 | self.assertFalse(result) 20 | 21 | def test_poke_should_return_true_when_value_of_minute_is_divisible_by_3(self): 22 | dag = DAG(dag_id='anydag', start_date=datetime.now()) 23 | sensor_task = HelloworldSensor( 24 | task_id='any_sensor_task', 25 | poke_interval=2, 26 | params={'sensor_start_time': datetime(2018, 8, 8, 10, 9)}, 27 | dag=dag 28 | ) 29 | sti = TaskInstance(task=sensor_task, execution_date=datetime.now()) 30 | result = sensor_task.poke(sti.get_template_context()) 31 | self.assertTrue(result) 32 | 33 | def test_execute_should_return_true(self): 34 | dag = DAG(dag_id='anydag', start_date=datetime.now()) 35 | sensor_task = HelloworldSensor( 36 | task_id='any_sensor_task', 37 | poke_interval=2, 38 | params={'sensor_start_time': datetime(2018, 8, 8, 10, 10)}, 39 | dag=dag 40 | ) 41 | sti = TaskInstance(task=sensor_task, execution_date=datetime.now()) 42 | sensor_time = sensor_task.execute(sti.get_template_context()) 43 | self.assertEqual(sensor_time, 12) 44 | -------------------------------------------------------------------------------- /src/unittest/python/dags/helloworld_dag_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from airflow.models import DagBag 3 | 4 | 5 | class TestHelloWorldDAG(unittest.TestCase): 6 | """Check HelloWorldDAG expectation""" 7 | 8 | def setUp(self): 9 | self.dagbag = DagBag() 10 | 11 | def test_task_count(self): 12 | """Check task count of hello_world dag""" 13 | dag_id = 'hello_world' 14 | dag = self.dagbag.get_dag(dag_id) 15 | self.assertEqual(len(dag.tasks), 3) 16 | 17 | def test_contain_tasks(self): 18 | """Check task contains in hello_world dag""" 19 | dag_id = 'hello_world' 20 | dag = self.dagbag.get_dag(dag_id) 21 | tasks = dag.tasks 22 | task_ids = list(map(lambda task: task.task_id, tasks)) 23 | self.assertListEqual(sorted(task_ids), sorted(['dummy_task', 'multiplyby5_task', 'hello_task'])) 24 | 25 | def test_dependencies_of_dummy_task(self): 26 | """Check the task dependencies of dummy_task in hello_world dag""" 27 | dag_id = 'hello_world' 28 | dag = self.dagbag.get_dag(dag_id) 29 | dummy_task = dag.get_task('dummy_task') 30 | 31 | upstream_task_ids = list(map(lambda task: task.task_id, dummy_task.upstream_list)) 32 | self.assertListEqual(upstream_task_ids, []) 33 | downstream_task_ids = list(map(lambda task: task.task_id, dummy_task.downstream_list)) 34 | self.assertListEqual(sorted(downstream_task_ids), sorted(['hello_task', 'multiplyby5_task'])) 35 | 36 | def test_dependencies_of_hello_task(self): 37 | """Check the task dependencies of hello_task in hello_world dag""" 38 | dag_id = 'hello_world' 39 | dag = self.dagbag.get_dag(dag_id) 40 | hello_task = dag.get_task('hello_task') 41 | 42 | upstream_task_ids = list(map(lambda task: task.task_id, hello_task.upstream_list)) 43 | self.assertListEqual(upstream_task_ids, ['dummy_task']) 44 | downstream_task_ids = list(map(lambda task: task.task_id, hello_task.downstream_list)) 45 | self.assertListEqual(downstream_task_ids, []) 46 | -------------------------------------------------------------------------------- /src/unittest/python/dags/helloworld_xcoms_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from datetime import datetime 3 | from airflow.models import DagBag, TaskInstance 4 | 5 | 6 | class TestXComExamplesDag(unittest.TestCase): 7 | 8 | def setUp(self): 9 | self.dagbag = DagBag() 10 | 11 | def test_xcoms(self): 12 | dag_id = 'hello_world_xcoms' 13 | dag = self.dagbag.get_dag(dag_id) 14 | push_to_xcoms_task = dag.get_task('push_to_xcoms') 15 | pull_from_xcoms_task = dag.get_task('pull_from_xcoms') 16 | 17 | execution_date = datetime.now() 18 | 19 | push_to_xcoms_ti = TaskInstance(task=push_to_xcoms_task, execution_date=execution_date) 20 | context = push_to_xcoms_ti.get_template_context() 21 | push_to_xcoms_task.execute(context) 22 | 23 | pull_from_xcoms_ti = TaskInstance(task=pull_from_xcoms_task, execution_date=execution_date) 24 | 25 | result = pull_from_xcoms_ti.xcom_pull(key="dummyKey") 26 | self.assertEqual(result, 'dummyValue') 27 | 28 | def test_xcom_in_templated_field(self): 29 | dag_id = 'hello_world_xcoms' 30 | dag = self.dagbag.get_dag(dag_id) 31 | push_to_xcoms_task = dag.get_task('push_to_xcoms') 32 | 33 | execution_date = datetime.now() 34 | 35 | push_to_xcoms_ti = TaskInstance(task=push_to_xcoms_task, execution_date=execution_date) 36 | context = push_to_xcoms_ti.get_template_context() 37 | push_to_xcoms_task.execute(context) 38 | 39 | templated_xcoms_value_task = dag.get_task('templated_xcoms_value') 40 | templated_xcoms_value_ti = TaskInstance(task=templated_xcoms_value_task, execution_date=execution_date) 41 | context = templated_xcoms_value_ti.get_template_context() 42 | 43 | bash_operator_templated_field = 'bash_command' 44 | 45 | rendered_template = templated_xcoms_value_task.render_template 46 | 47 | bash_command_value = getattr(templated_xcoms_value_task, bash_operator_templated_field) 48 | 49 | bash_command_rendered_value = rendered_template(bash_operator_templated_field, bash_command_value, context) 50 | 51 | self.assertEqual(bash_command_rendered_value, 'echo dummyValue') 52 | 53 | 54 | suite = unittest.TestLoader().loadTestsFromTestCase(TestXComExamplesDag) 55 | unittest.TextTestRunner(verbosity=2).run(suite) 56 | -------------------------------------------------------------------------------- /k8s/presto/presto.kube.yaml: -------------------------------------------------------------------------------- 1 | # Actual Source: https://github.com/dharmeshkakadia/presto-kubernetes 2 | 3 | apiVersion: v1 4 | kind: Service 5 | metadata: 6 | name: presto 7 | spec: 8 | selector: 9 | presto: coordinator 10 | type: NodePort 11 | ports: 12 | - name: coordinator 13 | port: 8080 14 | protocol: TCP 15 | targetPort: coordinator 16 | nodePort: 32211 17 | --- 18 | kind: Deployment 19 | apiVersion: apps/v1beta1 20 | metadata: 21 | name: coordinator 22 | labels: 23 | presto: coordinator 24 | spec: 25 | replicas: 1 26 | template: 27 | metadata: 28 | labels: 29 | presto: coordinator 30 | spec: 31 | containers: 32 | - env: 33 | - name: HTTP_SERVER_PORT 34 | value: "8080" 35 | - name: PRESTO_JVM_HEAP_SIZE 36 | value: "8" 37 | - name: PRESTO_MAX_MEMORY 38 | value: "10" 39 | - name: PRESTO_MAX_MEMORY_PER_NODE 40 | value: "1" 41 | image: johandry/presto 42 | livenessProbe: 43 | exec: 44 | command: 45 | - /etc/init.d/presto status | grep -q 'Running as' 46 | failureThreshold: 3 47 | periodSeconds: 300 48 | timeoutSeconds: 10 49 | name: presto-coordinator 50 | ports: 51 | - name: coordinator 52 | containerPort: 8080 53 | restartPolicy: Always 54 | --- 55 | apiVersion: apps/v1beta1 56 | kind: Deployment 57 | metadata: 58 | labels: 59 | presto: presto-worker 60 | name: presto-worker 61 | spec: 62 | replicas: 1 63 | template: 64 | metadata: 65 | labels: 66 | presto: presto-worker 67 | spec: 68 | containers: 69 | - env: 70 | - name: HTTP_SERVER_PORT 71 | value: "8080" 72 | - name: PRESTO_JVM_HEAP_SIZE 73 | value: "8" 74 | - name: PRESTO_MAX_MEMORY 75 | value: "10" 76 | - name: PRESTO_MAX_MEMORY_PER_NODE 77 | value: "1" 78 | - name : COORDINATOR 79 | value: "presto" 80 | image: johandry/presto 81 | livenessProbe: 82 | exec: 83 | command: 84 | - /etc/init.d/presto status | grep -q 'Running as' 85 | failureThreshold: 3 86 | periodSeconds: 300 87 | timeoutSeconds: 10 88 | name: presto-worker 89 | ports: 90 | - containerPort: 8080 91 | restartPolicy: Always -------------------------------------------------------------------------------- /src/integrationtest/python/airflow_api.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from constants import AIRFLOW_PORT,MYSQL_DB_PORT,PRESTO_DB_PORT 4 | 5 | 6 | class AirflowAPI: 7 | def __init__(self): 8 | self.minikube_ip = self.get_minikube_ip() 9 | 10 | def get_minikube_ip(self): 11 | f = open("/tmp/minikube_ip.txt", "r") 12 | minikube_ip = f.readlines()[0].replace('\n', '') 13 | if not minikube_ip: 14 | raise Exception("Minikube is not running. Please, start minikube first.") 15 | f.close() 16 | return minikube_ip 17 | 18 | def get_airflow_url(self): 19 | return "http://%s:%s" % (self.minikube_ip, AIRFLOW_PORT) 20 | 21 | def unpause_dag(self, dag_id): 22 | return requests.get( 23 | "%s/admin/rest_api/api?api=unpause&dag_id=%s" % (self.get_airflow_url(), dag_id)) 24 | 25 | def pause_dag(self, dag_id): 26 | return requests.get( 27 | "%s/admin/rest_api/api?api=pause&dag_id=%s" % (self.get_airflow_url(), dag_id)) 28 | 29 | def trigger_dag(self, dag_id, execution_date): 30 | self.clear_dag(dag_id, execution_date); 31 | self.unpause_dag(dag_id) 32 | triggered_response = requests.get( 33 | "%s/admin/rest_api/api?api=trigger_dag&dag_id=%s&exec_date=%s" % ( 34 | self.get_airflow_url(), dag_id, execution_date)) 35 | if triggered_response.status_code != 200: 36 | raise Exception("Please, wait for airflow web server to start.") 37 | 38 | def dag_state(self, dag_id, execution_date): 39 | return requests.get( 40 | "%s/admin/rest_api/api?api=dag_state&dag_id=%s&execution_date=%s" % ( 41 | self.get_airflow_url(), dag_id, execution_date)) 42 | 43 | def clear_dag(self, dag_id, execution_date): 44 | return requests.get( 45 | "%s/admin/rest_api/api?api=clear&dag_id=%s&execution_date=%s" % ( 46 | self.get_airflow_url(), dag_id, execution_date)) 47 | 48 | def is_dag_running(self, dag_id, execution_date): 49 | response = self.dag_state(dag_id, execution_date) 50 | json_response = json.loads(response.text) 51 | print(json_response) 52 | if "running" in json_response['output']['stdout']: 53 | return True 54 | else: 55 | self.pause_dag(dag_id) 56 | return False 57 | 58 | def get_dag_status(self, dag_id, execution_date): 59 | response = self.dag_state(dag_id, execution_date) 60 | json_response = json.loads(response.text) 61 | if "running" in json_response['output']['stdout']: 62 | return "running" 63 | elif "success" in json_response['output']['stdout']: 64 | return "success" 65 | elif "failed" in json_response['output']['stdout']: 66 | return "failed" 67 | else: 68 | return "Not Defined" 69 | 70 | def add_presto_connection(self, name, catalog, schema): 71 | conn_uri = "presto://" + self.minikube_ip + ":"+str(PRESTO_DB_PORT)+"/" + catalog + "/" + schema 72 | return requests.get( 73 | "%s/admin/rest_api/api?api=connections&add=on&conn_id=%s&conn_uri=%s" % ( 74 | self.get_airflow_url(), name, conn_uri)) 75 | 76 | def add_mysql_connection(self, name, user, password, database): 77 | conn_uri = "mysql://" + user + ":" + password + "@" + self.minikube_ip + ":"+str(MYSQL_DB_PORT)+"/" + database 78 | return requests.get( 79 | "%s/admin/rest_api/api?api=connections&add=on&conn_id=%s&conn_uri=%s" % ( 80 | self.get_airflow_url(), name, conn_uri)) 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Airflow Testing 2 | This project contains different categories of tests with examples. 3 | 4 | ## Five Categories of Tests 5 | 1. DAG Validation Tests: To test the validity of the DAG, checking typos and cyclicity. 6 | 2. DAG/Pipeline Definition Tests: To test the total number of tasks in the DAG, upstream and downstream dependencies of each task, etc. 7 | 3. Unit Tests: To test the logic of custom Operators, custom Sensor, etc. 8 | 4. Integration Tests: To test the communication between tasks. For example, task1 pass some information to task 2 using Xcoms. 9 | 5. End to End Pipeline Tests: To test and verify the integration between each task. You can also assert the data on successful completion of the E2E pipeline. 10 | 11 | Clone this repo to run these test in your local machine. 12 | 13 | ## Unit Tests 14 | 15 | Unit tests cover all tests falls under teh first four categories. 16 | 17 | #### How to run? 18 | 1. Build the airflow image. Go to project root directory and run
19 | 20 | ```docker build . -t airflow-test``` 21 | 22 | 2. Run the unit tests from the docker. Use your repository location for **{SourceDir}** (Eg. If you cloned your repo at `/User/username/airflow-testing/` then 23 | SourceDir is `/User/username`.) 24 | 25 | ```docker run -ti -v {SourceDir}/airflow-testing:/opt --entrypoint /mnt/entrypoint.sh airflow-test run_unit_tests``` 26 | 27 | ## End-to-End Tests 28 | 29 | End-to-End tests cover all tests of category five. To run these tests, 30 | we need to set up airflow environment in minikube. Also, we need to set up 31 | all the component required by your DAGs. 32 | 33 | #### Minikube set up 34 | 35 | Prerequisite: 36 |
37 |     git clone https://github.com/chandulal/airflow-testing.git
38 |     brew cask install virtualbox (run if you don't have virtual box installed)
39 |     
40 | 41 | Install minikube 42 |
43 |     brew cask install minikube
44 |     brew install kubernetes-cli
45 |     minikube start --cpus 4 --memory 8192
46 |     
47 | 48 | #### Mount DAGs, Plugins, etc. 49 | 50 | Mount all your DAGs,Plugins, etc. in minikube 51 |
 
52 |     minikube mount {project dir}/src/main/python/:/data
53 |     
54 | 55 | #### Deploy Airflow in minikube 56 | 57 | Open new terminal. Go to project root dir and run: 58 |
 
59 |     kubectl apply -f airflow.kube.yaml
60 |     
61 | 62 | wait for 3-4 min to start all airflow components. 63 | 64 | This will set up following components:
65 | * Postgres (To store the metadata of airflow) 66 | * Redis (Broker for celery executors) 67 | * Airflow Scheduler 68 | * Celery Workers 69 | * Airflow Web Server 70 | * Flower 71 | 72 | #### Access Airflow 73 | Get minikube ip by running ```minikube ip``` command 74 | 75 | Use minikube ip and access: 76 | 77 | **Airflow UI:** {minikube-ip}:31317 78 | 79 | **Flower:** {minikube-ip}:32081 80 | 81 | #### How Airflow works in minikube? 82 | 83 | ![minkube_airflow_architecture](https://github.com/chandulal/airflow-testing/blob/master/how_minikube_work.png) 84 | 85 | #### How to run these tests? 86 | 87 | 1. Install all required components to run your DAGs in minikube. To run integration tests, 88 | available in this repo, we required MySQL and Presto on minikube. 89 |
90 |     kubectl apply -f {SourceDir}/k8s/mysql/mysql.kube.yaml
91 |     kubectl apply -f {SourceDir}/k8s/presto/presto.kube.yaml
92 |     
93 | 94 | 2. Run the integration tests from the docker. Use absolute path of this repository in your machine for **{SourceDir}** 95 | 96 | ```docker run -ti -v {SourceDir}/airflow-testing:/opt --entrypoint /mnt/entrypoint.sh airflow-test run_integration_tests {minikube-ip} ``` -------------------------------------------------------------------------------- /src/integrationtest/python/dags/presto_to_mysql_tests.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | import mysql.connector 4 | import prestodb 5 | 6 | sys.path.append('../') 7 | from airflow_api import AirflowAPI 8 | from db_util import DBUtil 9 | from constants import PRESTO_DB_PORT,MYSQL_DB_PORT 10 | 11 | 12 | class TestPrestoToMySqlDag(unittest.TestCase): 13 | """Integration test for presto to mysql transfer""" 14 | 15 | mysql_conn = None 16 | prest_conn = None 17 | 18 | 19 | def setUp(self): 20 | presto_catlog="blackhole" 21 | presto_schema= "default" 22 | mysql_database="mysql" 23 | mysql_user="mysql" 24 | mysql_password="mysql" 25 | 26 | self.airflow_api = AirflowAPI() 27 | self.minikube_ip = str(self.airflow_api.get_minikube_ip()) 28 | self.db_util = DBUtil() 29 | self.airflow_api.add_presto_connection("presto-conn",presto_catlog 30 | ,presto_schema) 31 | self.airflow_api.add_mysql_connection("mysql-conn", mysql_database, 32 | mysql_user, mysql_password) 33 | self.mysql_conn = mysql.connector.connect(user=mysql_user, 34 | password=mysql_password, 35 | host=self.minikube_ip, 36 | port=MYSQL_DB_PORT, 37 | database=mysql_database, 38 | use_pure=False) 39 | 40 | self.prest_conn = prestodb.dbapi.connect( 41 | host=self.minikube_ip, 42 | port=PRESTO_DB_PORT, 43 | user='admin', 44 | catalog=presto_catlog, 45 | schema=presto_schema, 46 | ) 47 | 48 | create_mysql_table_sql = """ 49 | CREATE TABLE IF NOT EXISTS mysql_region ( 50 | name VARCHAR(50),count int(10) 51 | ); 52 | """ 53 | 54 | self.db_util.create_table(self.mysql_conn,create_mysql_table_sql) 55 | 56 | create_presto_table_sql = """ 57 | CREATE TABLE region ( 58 | name varchar 59 | ) 60 | WITH ( 61 | split_count = 1, 62 | pages_per_split = 1, 63 | rows_per_page = 1, 64 | page_processing_delay = '5s' 65 | )""" 66 | 67 | 68 | self.db_util.create_table(self.prest_conn,create_presto_table_sql) 69 | 70 | insert_query_1 = "insert into region values('INDIA')" 71 | self.db_util.insert_into_table(self.prest_conn,insert_query_1) 72 | 73 | def test_presto_to_mysql_transfer(self): 74 | """should transfer data from presto to mysql""" 75 | 76 | execution_date = "2019-05-12T14:00:00+00:00" 77 | dag_id = "presto_to_mysql" 78 | self.airflow_api.trigger_dag(dag_id, execution_date) 79 | is_running = True 80 | while is_running: 81 | is_running = self.airflow_api.is_dag_running(dag_id, execution_date) 82 | self.assertEqual(is_running, False) 83 | self.assertEqual(self.airflow_api.get_dag_status(dag_id, 84 | execution_date), "success") 85 | 86 | mysql_select_query = "SELECT name FROM mysql_region" 87 | row_count=self.db_util.get_row_count(self.mysql_conn,mysql_select_query) 88 | self.assertEqual(1, len(row_count)) 89 | 90 | def tearDown(self): 91 | drop_mysql_table="drop table mysql_region" 92 | drop_presto_table = "drop table region" 93 | self.db_util.drop_table(self.mysql_conn,drop_mysql_table) 94 | self.db_util.drop_table(self.prest_conn,drop_presto_table) 95 | self.mysql_conn.close() 96 | self.prest_conn.close() 97 | 98 | 99 | if __name__ == '__main__': 100 | unittest.main() 101 | -------------------------------------------------------------------------------- /src/main/python/plugins/templates/rest_api_plugin/index.html: -------------------------------------------------------------------------------- 1 | {% extends "airflow/master.html" %} 2 | 3 | {% block title %}Airflow - REST API Plugin{% endblock %} 4 | 5 | {% block head_css %} 6 | {{ super() }} 7 | 8 | {% endblock %} 9 | 10 | {% block body %} 11 | 12 | 13 | 19 | 20 | 21 | 44 | 45 |

Airflow REST API

46 | 47 | 48 |

Documentation

49 | 53 | 54 | 55 |

Versions

56 | 60 | 61 | 62 |

DAGs:

63 | 64 | 65 | 66 | 67 | {% for dag in dags %} 68 | 69 | 70 | 71 | {% endfor %} 72 |
DAG IDIs Active
{{dag.dag_id}}{{dag.is_active}}
73 | 74 | 75 |

API Directory

76 |

Click on one of the links bellow to jump to the API form

77 | 82 | 83 | 84 |

APIs

85 | {% for api_metadata in apis_metadata %} 86 |
87 |

{{api_metadata.name}}

88 |
{{api_metadata.description}}
89 |
{{api_metadata.http_method|default('GET', true)}} {{airflow_webserver_base_url}}{{rest_api_endpoint}}?api={{api_metadata.name}}{% if api_metadata.http_method != 'POST' %}{% for argument in api_metadata.arguments %}&{{argument.name}}{% if argument.form_input_type != 'checkbox' %}=value{% endif %}{% endfor %}{% endif %}
90 | 91 | {% if api_metadata.form_enctype %} 92 |
enctype={{api_metadata.form_enctype}}
93 | {% endif %} 94 | 95 | {% if api_metadata.post_body_description %} 96 |
{{api_metadata.post_body_description}}
97 | {% endif %} 98 | 99 |
Available in Airflow Version: {{api_metadata.airflow_version}}
100 |
101 |
107 | 108 | 109 | 110 | 111 | {% if api_metadata.arguments|length > 0 or api_metadata.post_arguments|length > 0 %} 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | {% for argument in api_metadata.arguments %} 121 | 122 | 123 | 124 | 125 | 126 | 127 | {% endfor %} 128 | 129 | {% for argument in api_metadata.post_arguments %} 130 | 131 | 132 | 133 | 134 | 135 | 136 | {% endfor %} 137 | 138 | {% else %} 139 | No Arguments 140 | {% endif %} 141 | 142 | 143 | 144 | 145 |
Argument NameInputRequiredDescription
{{argument.name}}:{{argument.required}}{{argument.description}}
{{argument.name}}:{{argument.required}}{{argument.description}}
146 |
147 |
148 |
149 |
150 | 151 | {% endfor %} 152 | 153 |
154 | 155 | {% endblock %} 156 | 157 | {% block tail %} 158 | {{ super() }} 159 | 160 | {% endblock %} 161 | -------------------------------------------------------------------------------- /airflow.kube.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: postgres 5 | spec: 6 | type: ClusterIP 7 | selector: 8 | app: airflow 9 | tier: db 10 | ports: 11 | - name: postgres 12 | protocol: TCP 13 | port: 5432 14 | targetPort: postgres 15 | --- 16 | apiVersion: v1 17 | kind: Service 18 | metadata: 19 | name: redis 20 | spec: 21 | type: ClusterIP 22 | selector: 23 | app: airflow 24 | tier: backend 25 | ports: 26 | - port: 6379 27 | name: redis 28 | protocol: TCP 29 | targetPort: redis 30 | --- 31 | apiVersion: v1 32 | kind: Service 33 | metadata: 34 | name: web 35 | spec: 36 | type: NodePort 37 | selector: 38 | app: airflow 39 | tier: web 40 | ports: 41 | - name: web 42 | protocol: TCP 43 | port: 8080 44 | targetPort: web 45 | nodePort: 31317 46 | --- 47 | apiVersion: v1 48 | kind: Service 49 | metadata: 50 | name: flower 51 | spec: 52 | type: NodePort 53 | selector: 54 | app: airflow 55 | tier: flower 56 | ports: 57 | - name: flower 58 | protocol: TCP 59 | port: 5555 60 | targetPort: flower 61 | nodePort: 32081 62 | --- 63 | apiVersion: extensions/v1beta1 64 | kind: Deployment 65 | metadata: 66 | name: postgres 67 | spec: 68 | replicas: 1 69 | template: 70 | metadata: 71 | labels: 72 | app: airflow 73 | tier: db 74 | spec: 75 | containers: 76 | - name: postgres 77 | image: postgres:9.6 78 | resources: 79 | requests: 80 | memory: "64Mi" 81 | cpu: "200m" 82 | limits: 83 | memory: "128Mi" 84 | cpu: "400m" 85 | ports: 86 | - name: postgres 87 | containerPort: 5432 88 | env: 89 | - name: POSTGRES_USER 90 | value: "airflow" 91 | - name: POSTGRES_PASSWORD 92 | value: "airflow" 93 | - name: POSTGRES_DB 94 | value: "airflow" 95 | --- 96 | apiVersion: extensions/v1beta1 97 | kind: Deployment 98 | metadata: 99 | name: redis 100 | spec: 101 | replicas: 1 102 | template: 103 | metadata: 104 | labels: 105 | app: airflow 106 | tier: backend 107 | spec: 108 | restartPolicy: Always 109 | containers: 110 | - name: redis 111 | image: redis:3.2.7 112 | resources: 113 | requests: 114 | memory: "32Mi" 115 | cpu: "200m" 116 | limits: 117 | memory: "64Mi" 118 | cpu: "400m" 119 | ports: 120 | - name: redis 121 | containerPort: 6379 122 | --- 123 | apiVersion: extensions/v1beta1 124 | kind: Deployment 125 | metadata: 126 | name: web 127 | spec: 128 | replicas: 1 129 | strategy: 130 | type: Recreate 131 | template: 132 | metadata: 133 | labels: 134 | app: airflow 135 | tier: web 136 | spec: 137 | securityContext: 138 | runAsUser: 0 139 | fsGroup: 0 140 | volumes: 141 | - name: airflowdagdir 142 | hostPath: 143 | path: "/data/dags/" 144 | - name: airflowpluginsdir 145 | hostPath: 146 | path: "/data/plugins/" 147 | restartPolicy: Always 148 | containers: 149 | - args: 150 | - webserver 151 | env: 152 | - name: EXECUTOR 153 | value: Celery 154 | - name: FERNET_KEY 155 | value: 46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= 156 | - name: LOAD_EX 157 | value: "n" 158 | - name: POSTGRES_USER 159 | value: "airflow" 160 | - name: POSTGRES_PASSWORD 161 | value: "airflow" 162 | - name: POSTGRES_DB 163 | value: "airflow" 164 | - name: POSTGRES_HOST 165 | value: "postgres" 166 | - name: POSTGRES_PORT 167 | value: "5432" 168 | - name: REDIS_HOST 169 | value: "redis" 170 | - name: REDIS_PORT 171 | value: "6379" 172 | - name: AIRFLOW_HOME 173 | value: "/usr/local/airflow" 174 | - name: AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG 175 | value: "1" 176 | image: puckel/docker-airflow:1.10.1 177 | name: webserver 178 | ports: 179 | - name: web 180 | containerPort: 8080 181 | volumeMounts: 182 | - name: airflowdagdir 183 | mountPath: "/usr/local/airflow/dags" 184 | - name: airflowpluginsdir 185 | mountPath: "/usr/local/airflow/plugins" 186 | --- 187 | apiVersion: extensions/v1beta1 188 | kind: Deployment 189 | metadata: 190 | name: flower 191 | spec: 192 | replicas: 1 193 | strategy: 194 | type: Recreate 195 | template: 196 | metadata: 197 | labels: 198 | app: airflow 199 | tier: flower 200 | spec: 201 | securityContext: 202 | runAsUser: 0 203 | fsGroup: 0 204 | volumes: 205 | - name: airflowdagdir 206 | hostPath: 207 | path: "/data/dags/" 208 | - name: airflowpluginsdir 209 | hostPath: 210 | path: "/data/plugins/" 211 | restartPolicy: Always 212 | containers: 213 | - args: 214 | - flower 215 | env: 216 | - name: FLOWER_PORT 217 | value: "5555" 218 | - name: EXECUTOR 219 | value: Celery 220 | - name: FERNET_KEY 221 | value: 46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= 222 | - name: LOAD_EX 223 | value: "n" 224 | - name: POSTGRES_USER 225 | value: "airflow" 226 | - name: POSTGRES_PASSWORD 227 | value: "airflow" 228 | - name: POSTGRES_DB 229 | value: "airflow" 230 | - name: POSTGRES_HOST 231 | value: "postgres" 232 | - name: POSTGRES_PORT 233 | value: "5432" 234 | - name: REDIS_HOST 235 | value: "redis" 236 | - name: REDIS_PORT 237 | value: "6379" 238 | - name: AIRFLOW_HOME 239 | value: "/usr/local/airflow" 240 | image: puckel/docker-airflow:1.10.1 241 | resources: 242 | requests: 243 | memory: "64Mi" 244 | cpu: "200m" 245 | limits: 246 | memory: "128Mi" 247 | cpu: "400m" 248 | name: flower 249 | ports: 250 | - name: flower 251 | containerPort: 5555 252 | volumeMounts: 253 | - name: airflowdagdir 254 | mountPath: "/usr/local/airflow/dags" 255 | - name: airflowpluginsdir 256 | mountPath: "/usr/local/airflow/plugins" 257 | --- 258 | apiVersion: extensions/v1beta1 259 | kind: Deployment 260 | metadata: 261 | name: scheduler 262 | spec: 263 | replicas: 1 264 | template: 265 | metadata: 266 | labels: 267 | app: airflow 268 | tier: scheduler 269 | spec: 270 | restartPolicy: Always 271 | securityContext: 272 | runAsUser: 0 273 | fsGroup: 0 274 | volumes: 275 | - name: airflowdagdir 276 | hostPath: 277 | path: "/data/dags/" 278 | - name: airflowpluginsdir 279 | hostPath: 280 | path: "/data/plugins/" 281 | containers: 282 | - args: 283 | - scheduler 284 | env: 285 | - name: EXECUTOR 286 | value: Celery 287 | - name: FERNET_KEY 288 | value: 46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= 289 | - name: LOAD_EX 290 | value: "n" 291 | - name: POSTGRES_USER 292 | value: "airflow" 293 | - name: POSTGRES_PASSWORD 294 | value: "airflow" 295 | - name: POSTGRES_DB 296 | value: "airflow" 297 | - name: POSTGRES_HOST 298 | value: "postgres" 299 | - name: POSTGRES_PORT 300 | value: "5432" 301 | - name: REDIS_HOST 302 | value: "redis" 303 | - name: REDIS_PORT 304 | value: "6379" 305 | - name: AIRFLOW_HOME 306 | value: "/usr/local/airflow" 307 | - name: AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG 308 | value: "1" 309 | image: puckel/docker-airflow:1.10.1 310 | resources: 311 | requests: 312 | memory: "64Mi" 313 | cpu: "200m" 314 | limits: 315 | memory: "128Mi" 316 | cpu: "400m" 317 | name: scheduler 318 | volumeMounts: 319 | - name: airflowdagdir 320 | mountPath: "/usr/local/airflow/dags" 321 | - name: airflowpluginsdir 322 | mountPath: "/usr/local/airflow/plugins" 323 | --- 324 | apiVersion: extensions/v1beta1 325 | kind: Deployment 326 | metadata: 327 | name: worker 328 | spec: 329 | replicas: 1 330 | template: 331 | metadata: 332 | labels: 333 | app: airflow 334 | tier: worker 335 | spec: 336 | restartPolicy: Always 337 | volumes: 338 | - name: airflowdagdir 339 | hostPath: 340 | path: "/data/dags/" 341 | - name: airflowpluginsdir 342 | hostPath: 343 | path: "/data/plugins/" 344 | containers: 345 | - args: 346 | - worker 347 | env: 348 | - name: EXECUTOR 349 | value: Celery 350 | - name: FERNET_KEY 351 | value: 46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= 352 | - name: LOAD_EX 353 | value: "n" 354 | - name: POSTGRES_USER 355 | value: "airflow" 356 | - name: POSTGRES_PASSWORD 357 | value: "airflow" 358 | - name: POSTGRES_DB 359 | value: "airflow" 360 | - name: POSTGRES_HOST 361 | value: "postgres" 362 | - name: POSTGRES_PORT 363 | value: "5432" 364 | - name: REDIS_HOST 365 | value: "redis" 366 | - name: REDIS_PORT 367 | value: "6379" 368 | - name: AIRFLOW_HOME 369 | value: "/usr/local/airflow" 370 | - name: AIRFLOW__CORE__EXECUTOR 371 | value: CeleryExecutor 372 | - name: AIRFLOW__CORE__SQL_ALCHEMY_CONN 373 | value: "postgresql+psycopg2://airflow:airflow@postgres:5432/airflow" 374 | - name: AIRFLOW__CELERY__RESULT_BACKEND 375 | value: "db+postgresql://airflow:airflow@postgres:5432/airflow" 376 | - name: AIRFLOW__CELERY__BROKER_URL 377 | value: "redis://redis:6379/1" 378 | - name: AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG 379 | value: "1" 380 | - name: AIRFLOW__CELERY__WORKER_CONCURRENCY 381 | value: "1" 382 | image: puckel/docker-airflow:1.10.1 383 | resources: 384 | requests: 385 | memory: "1Gi" 386 | cpu: "200m" 387 | limits: 388 | memory: "1.5Gi" 389 | cpu: "400m" 390 | name: worker 391 | volumeMounts: 392 | - name: airflowdagdir 393 | mountPath: "/usr/local/airflow/dags" 394 | - name: airflowpluginsdir 395 | mountPath: "/usr/local/airflow/plugins" -------------------------------------------------------------------------------- /src/main/python/plugins/rest_api_plugin.py: -------------------------------------------------------------------------------- 1 | __author__ = 'robertsanders' 2 | __version__ = "1.0.4" 3 | 4 | from airflow.models import DagBag, DagModel 5 | from airflow.plugins_manager import AirflowPlugin 6 | from airflow import configuration 7 | from airflow.www.app import csrf 8 | 9 | from flask import Blueprint, request, jsonify 10 | from flask_admin import BaseView, expose 11 | 12 | from datetime import datetime 13 | import airflow 14 | import logging 15 | import subprocess 16 | import os 17 | import socket 18 | 19 | """ 20 | CLIs this REST API exposes are Defined here: http://airflow.incubator.apache.org/cli.html 21 | """ 22 | 23 | # todo: dynamically decide which api objects to display based off which version of airflow is installed - http://stackoverflow.com/questions/1714027/version-number-comparison 24 | 25 | # Location of the REST Endpoint 26 | # Note: Changing this will only effect where the messages are posted to on the web interface and will not change where the endpoint actually resides 27 | rest_api_endpoint = "/admin/rest_api/api" 28 | 29 | # Getting Versions and Global variables 30 | hostname = socket.gethostname() 31 | airflow_version = airflow.__version__ 32 | rest_api_plugin_version = __version__ 33 | 34 | # Getting configurations from airflow.cfg file 35 | airflow_webserver_base_url = configuration.get('webserver', 'BASE_URL') 36 | airflow_base_log_folder = configuration.get('core', 'BASE_LOG_FOLDER') 37 | airflow_dags_folder = configuration.get('core', 'DAGS_FOLDER') 38 | log_loading = configuration.getboolean("rest_api_plugin", "LOG_LOADING") if configuration.has_option("rest_api_plugin", "LOG_LOADING") else False 39 | filter_loading_messages_in_cli_response = configuration.getboolean("rest_api_plugin", "FILTER_LOADING_MESSAGES_IN_CLI_RESPONSE") if configuration.has_option("rest_api_plugin", "FILTER_LOADING_MESSAGES_IN_CLI_RESPONSE") else True 40 | airflow_rest_api_plugin_http_token_header_name = configuration.get("rest_api_plugin", "REST_API_PLUGIN_HTTP_TOKEN_HEADER_NAME") if configuration.has_option("rest_api_plugin", "REST_API_PLUGIN_HTTP_TOKEN_HEADER_NAME") else "rest_api_plugin_http_token" 41 | airflow_expected_http_token = configuration.get("rest_api_plugin", "REST_API_PLUGIN_EXPECTED_HTTP_TOKEN") if configuration.has_option("rest_api_plugin", "REST_API_PLUGIN_EXPECTED_HTTP_TOKEN") else None 42 | 43 | # Using UTF-8 Encoding so that response messages don't have any characters in them that can't be handled 44 | os.environ['PYTHONIOENCODING'] = 'utf-8' 45 | 46 | if log_loading: 47 | logging.info("Initializing Airflow REST API Plugin with configs:") 48 | logging.info("\trest_api_endpoint: " + str(rest_api_endpoint)) 49 | logging.info("\thostname: " + str(hostname)) 50 | logging.info("\tairflow_version: " + str(airflow_version)) 51 | logging.info("\trest_api_plugin_version: " + str(rest_api_plugin_version)) 52 | logging.info("\tairflow_webserver_base_url: " + str(airflow_webserver_base_url)) 53 | logging.info("\tairflow_base_log_folder: " + str(airflow_base_log_folder)) 54 | logging.info("\tairflow_dags_folder: " + str(airflow_dags_folder)) 55 | logging.info("\tairflow_rest_api_plugin_http_token_header_name: " + str(airflow_rest_api_plugin_http_token_header_name)) 56 | logging.info("\tairflow_expected_http_token: OMITTED_FOR_SECURITY") 57 | logging.info("\tfilter_loading_messages_in_cli_response: " + str(filter_loading_messages_in_cli_response)) 58 | 59 | """ 60 | Metadata that defines a single API: 61 | { 62 | "name": "{string}", # Name of the API (cli command to be executed) 63 | "description": "{string}", # Description of the API 64 | "airflow_version": "{string}", # Version the API was available in to allow people to better determine if the API is available. (to be displayed on the Admin page) 65 | "http_method": "{string}", # HTTP method to use when calling the function. (Default: GET) (Optional) 66 | "background_mode": {boolean}, # Whether to run the process in the background if its a CLI API (Optional) 67 | "arguments": [ # List of arguments that can be provided to the API 68 | { 69 | "name": "{string}", # Name of the argument 70 | "description": "{string}", # Description of the argument 71 | "form_input_type": "{string}", # Type of input to use on the Admin page for the argument 72 | "required": {boolean}, # Whether the argument is required upon submission 73 | "cli_end_position": {int} # In the case with a CLI command that the arguments value should be appended on to the end (for example: airflow trigger_dag some_dag_id), this is the position that the argument should be provided in the CLI command. (Optional) 74 | } 75 | ], 76 | "fixed_arguments": [ # List of arguments that will always be used by the API endpoint and can't be changed 77 | { 78 | "name": "{string}", # Name of the argument 79 | "description": "{string}", # Description of the argument 80 | "fixed_value": "{string}" # Fixed value that will always be used 81 | } 82 | ], 83 | "post_arguments": [ # List of arguments that can be provided in the POST body to the API 84 | { 85 | "name": "{string}", # Name of the argument 86 | "description": "{string}", # Description of the argument 87 | "form_input_type": "{string}", # Type of input to use on the Admin page for the argument 88 | "required": {boolean}, # Whether the argument is required upon submission 89 | } 90 | ] 91 | }, 92 | """ 93 | 94 | # Metadata about the APIs and how to call them. Representing them like this allows us to dynamically generate the APIs 95 | # in the admin page and dynamically execute them. This also allows us to easily add new ones. 96 | # API Object definition is described in the comment block above. 97 | apis_metadata = [ 98 | { 99 | "name": "version", 100 | "description": "Displays the version of Airflow you're using", 101 | "airflow_version": "1.0.0 or greater", 102 | "http_method": "GET", 103 | "arguments": [] 104 | }, 105 | { 106 | "name": "rest_api_plugin_version", 107 | "description": "Displays the version of this REST API Plugin you're using", 108 | "airflow_version": "None - Custom API", 109 | "http_method": "GET", 110 | "arguments": [] 111 | }, 112 | { 113 | "name": "render", 114 | "description": "Render a task instance's template(s)", 115 | "airflow_version": "1.7.0 or greater", 116 | "http_method": "GET", 117 | "arguments": [ 118 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 119 | {"name": "task_id", "description": "The id of the task", "form_input_type": "text", "required": True, "cli_end_position": 2}, 120 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 3}, 121 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False} 122 | ] 123 | }, 124 | { 125 | "name": "variables", 126 | "description": "CRUD operations on variables", 127 | "airflow_version": "1.7.1 or greater", 128 | "http_method": "GET", 129 | "arguments": [ 130 | {"name": "set", "description": "Set a variable. Expected input in the form: KEY VALUE.", "form_input_type": "text", "required": False}, 131 | {"name": "get", "description": "Get value of a variable", "form_input_type": "text", "required": False}, 132 | {"name": "json", "description": "Deserialize JSON variable", "form_input_type": "checkbox", "required": False}, 133 | {"name": "default", "description": "Default value returned if variable does not exist", "form_input_type": "text", "required": False}, 134 | {"name": "import", "description": "Import variables from JSON file", "form_input_type": "text", "required": False}, 135 | {"name": "export", "description": "Export variables to JSON file", "form_input_type": "text", "required": False}, 136 | {"name": "delete", "description": "Delete a variable", "form_input_type": "text", "required": False} 137 | ] 138 | }, 139 | { 140 | "name": "connections", 141 | "description": "List/Add/Delete connections", 142 | "airflow_version": "1.8.0 or greater", 143 | "http_method": "GET", 144 | "arguments": [ 145 | {"name": "list", "description": "List all connections", "form_input_type": "checkbox", "required": False}, 146 | {"name": "add", "description": "Add a connection", "form_input_type": "checkbox", "required": False}, 147 | {"name": "delete", "description": "Delete a connection", "form_input_type": "checkbox", "required": False}, 148 | {"name": "conn_id", "description": "Connection id, required to add/delete a connection", "form_input_type": "text", "required": False}, 149 | {"name": "conn_uri", "description": "Connection URI, required to add a connection", "form_input_type": "text", "required": False}, 150 | {"name": "conn_extra", "description": "Connection 'Extra' field, optional when adding a connection", "form_input_type": "text", "required": False} 151 | ] 152 | }, 153 | { 154 | "name": "pause", 155 | "description": "Pauses a DAG", 156 | "airflow_version": "1.7.0 or greater", 157 | "http_method": "GET", 158 | "arguments": [ 159 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 160 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False} 161 | ] 162 | }, 163 | { 164 | "name": "unpause", 165 | "description": "Unpauses a DAG", 166 | "airflow_version": "1.7.0 or greater", 167 | "http_method": "GET", 168 | "arguments": [ 169 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 170 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False} 171 | ] 172 | }, 173 | { 174 | "name": "task_failed_deps", 175 | "description": "Returns the unmet dependencies for a task instance from the perspective of the scheduler. In other words, why a task instance doesn't get scheduled and then queued by the scheduler, and then run by an executor).", 176 | "airflow_version": "1.8.0 or greater", 177 | "http_method": "GET", 178 | "arguments": [ 179 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 180 | {"name": "task_id", "description": "The id of the task", "form_input_type": "text", "required": True, "cli_end_position": 2}, 181 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 3}, 182 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False} 183 | ] 184 | }, 185 | { # todo: should print out the run id 186 | "name": "trigger_dag", 187 | "description": "Trigger a DAG run", 188 | "airflow_version": "1.6.0 or greater", 189 | "http_method": "GET", 190 | "arguments": [ 191 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 192 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}, 193 | {"name": "run_id", "description": "Helps to identify this run", "form_input_type": "text", "required": False}, 194 | {"name": "conf", "description": "JSON string that gets pickled into the DagRun's conf attribute", "form_input_type": "text", "required": False}, 195 | {"name": "exec_date", "description": "The execution date of the DAG", "form_input_type": "text", "required": False} 196 | ] 197 | }, 198 | { 199 | "name": "test", 200 | "description": "Test a task instance. This will run a task without checking for dependencies or recording it's state in the database.", 201 | "airflow_version": "0.1 or greater", 202 | "http_method": "GET", 203 | "arguments": [ 204 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 205 | {"name": "task_id", "description": "The id of the task", "form_input_type": "text", "required": True, "cli_end_position": 2}, 206 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 3}, 207 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}, 208 | {"name": "dry_run", "description": "Perform a dry run", "form_input_type": "checkbox", "required": False}, 209 | {"name": "task_params", "description": "Sends a JSON params dict to the task", "form_input_type": "text", "required": False} 210 | ] 211 | }, 212 | { 213 | "name": "dag_state", 214 | "description": "Get the status of a dag run", 215 | "airflow_version": "1.8.0 or greater", 216 | "http_method": "GET", 217 | "arguments": [ 218 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 219 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 2}, 220 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False} 221 | ] 222 | }, 223 | { 224 | "name": "run", 225 | "description": "Run a single task instance", 226 | "airflow_version": "1.0.0 or greater", 227 | "http_method": "GET", 228 | "arguments": [ 229 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 230 | {"name": "task_id", "description": "The id of the task", "form_input_type": "text", "required": True, "cli_end_position": 2}, 231 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 3}, 232 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}, 233 | {"name": "mark_success", "description": "Mark jobs as succeeded without running them", "form_input_type": "checkbox", "required": False}, 234 | {"name": "force", "description": "Ignore previous task instance state, rerun regardless if task already succeede", "form_input_type": "checkbox", "required": False}, 235 | {"name": "pool", "description": "Resource pool to use", "form_input_type": "text", "required": False}, 236 | {"name": "cfg_path", "description": "Path to config file to use instead of airflow.cfg", "form_input_type": "text", "required": False}, 237 | {"name": "local", "description": "Run the task using the LocalExecutor", "form_input_type": "checkbox", "required": False}, 238 | {"name": "ignore_all_dependencies", "description": "Ignores all non-critical dependencies, including ignore_ti_state and ignore_task_depsstore_true", "form_input_type": "checkbox", "required": False}, 239 | {"name": "ignore_dependencies", "description": "Ignore task-specific dependencies, e.g. upstream, depends_on_past, and retry delay dependencies", "form_input_type": "checkbox", "required": False}, 240 | {"name": "ignore_depends_on_past", "description": "Ignore depends_on_past dependencies (but respect upstream dependencies)", "form_input_type": "checkbox", "required": False}, 241 | {"name": "ship_dag", "description": "Pickles (serializes) the DAG and ships it to the worker", "form_input_type": "checkbox", "required": False}, 242 | {"name": "pickle", "description": "Serialized pickle object of the entire dag (used internally)", "form_input_type": "text", "required": False}, 243 | ] 244 | }, 245 | { 246 | "name": "list_tasks", 247 | "description": "List the tasks within a DAG", 248 | "airflow_version": "0.1 or greater", 249 | "http_method": "GET", 250 | "arguments": [ 251 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 252 | {"name": "tree", "description": "Tree view", "form_input_type": "checkbox", "required": False}, 253 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False} 254 | ] 255 | }, 256 | { 257 | "name": "backfill", 258 | "description": "Run subsections of a DAG for a specified date range", 259 | "airflow_version": "0.1 or greater", 260 | "http_method": "GET", 261 | "arguments": [ 262 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 263 | {"name": "task_regex", "description": "The regex to filter specific task_ids to backfill (optional)", "form_input_type": "text", "required": False}, 264 | {"name": "start_date", "description": "Override start_date YYYY-MM-DD. Either this or the end_date needs to be provided.", "form_input_type": "text", "required": False}, 265 | {"name": "end_date", "description": "Override end_date YYYY-MM-DD. Either this or the start_date needs to be provided.", "form_input_type": "text", "required": False}, 266 | {"name": "mark_success", "description": "Mark jobs as succeeded without running them", "form_input_type": "checkbox", "required": False}, 267 | {"name": "local", "description": "Run the task using the LocalExecutor", "form_input_type": "checkbox", "required": False}, 268 | {"name": "donot_pickle", "description": "Do not attempt to pickle the DAG object to send over to the workers, just tell the workers to run their version of the code.", "form_input_type": "checkbox", "required": False}, 269 | {"name": "include_adhoc", "description": "Include dags with the adhoc argument.", "form_input_type": "checkbox", "required": False}, 270 | {"name": "ignore_dependencies", "description": "Ignore task-specific dependencies, e.g. upstream, depends_on_past, and retry delay dependencies", "form_input_type": "checkbox", "required": False}, 271 | {"name": "ignore_first_depends_on_past", "description": "Ignores depends_on_past dependencies for the first set of tasks only (subsequent executions in the backfill DO respect depends_on_past).", "form_input_type": "checkbox", "required": False}, 272 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}, 273 | {"name": "pool", "description": "Resource pool to use", "form_input_type": "text", "required": False}, 274 | {"name": "dry_run", "description": "Perform a dry run", "form_input_type": "checkbox", "required": False} 275 | ] 276 | }, 277 | { 278 | "name": "list_dags", 279 | "description": "List all the DAGs", 280 | "airflow_version": "0.1 or greater", 281 | "http_method": "GET", 282 | "arguments": [ 283 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}, 284 | {"name": "report", "description": "Show DagBag loading report", "form_input_type": "checkbox", "required": False} 285 | ] 286 | }, 287 | { 288 | "name": "kerberos", 289 | "description": "Start a kerberos ticket renewer", 290 | "airflow_version": "1.6.0 or greater", 291 | "http_method": "GET", 292 | "background_mode": True, 293 | "arguments": [ 294 | {"name": "principal", "description": "kerberos principal", "form_input_type": "text", "required": True, "cli_end_position": 1}, 295 | {"name": "keytab", "description": "keytab", "form_input_type": "text", "required": False}, 296 | {"name": "pid", "description": "PID file location", "form_input_type": "text", "required": False}, 297 | {"name": "daemon", "description": "Daemonize instead of running in the foreground", "form_input_type": "checkbox", "required": False}, 298 | {"name": "stdout", "description": "Redirect stdout to this file", "form_input_type": "text", "required": False}, 299 | {"name": "stderr", "description": "Redirect stderr to this file", "form_input_type": "text", "required": False}, 300 | {"name": "log-file", "description": "Location of the log file", "form_input_type": "text", "required": False} 301 | ] 302 | }, 303 | { 304 | "name": "worker", 305 | "description": "Start a Celery worker node", 306 | "airflow_version": "0.1 or greater", 307 | "http_method": "GET", 308 | "background_mode": True, 309 | "arguments": [ 310 | {"name": "do_pickle", "description": "Attempt to pickle the DAG object to send over to the workers, instead of letting workers run their version of the code.", "form_input_type": "checkbox", "required": False}, 311 | {"name": "queues", "description": "Comma delimited list of queues to serve", "form_input_type": "text", "required": False}, 312 | {"name": "concurrency", "description": "The number of worker processes", "form_input_type": "text", "required": False}, 313 | {"name": "pid", "description": "PID file location", "form_input_type": "checkbox", "required": False}, 314 | {"name": "daemon", "description": "Daemonize instead of running in the foreground", "form_input_type": "checkbox", "required": False}, 315 | {"name": "stdout", "description": "Redirect stdout to this file", "form_input_type": "text", "required": False}, 316 | {"name": "stderr", "description": "Redirect stderr to this file", "form_input_type": "text", "required": False}, 317 | {"name": "log-file", "description": "Location of the log file", "form_input_type": "text", "required": False} 318 | ] 319 | }, 320 | { 321 | "name": "flower", 322 | "description": "Start a Celery worker node", 323 | "airflow_version": "1.0.0 or greater", 324 | "http_method": "GET", 325 | "background_mode": True, 326 | "arguments": [ 327 | {"name": "hostname", "description": "Set the hostname on which to run the server", "form_input_type": "text", "required": False}, 328 | {"name": "port", "description": "The port on which to run the server", "form_input_type": "text", "required": False}, 329 | {"name": "flower_conf", "description": "Configuration file for flower", "form_input_type": "text", "required": False}, 330 | {"name": "broker_api", "description": "Broker api", "form_input_type": "text", "required": False}, 331 | {"name": "pid", "description": "PID file location", "form_input_type": "text", "required": False}, 332 | {"name": "daemon", "description": "Daemonize instead of running in the foreground", "form_input_type": "checkbox", "required": False}, 333 | {"name": "stdout", "description": "Redirect stdout to this file", "form_input_type": "text", "required": False}, 334 | {"name": "stderr", "description": "Redirect stderr to this file", "form_input_type": "text", "required": False}, 335 | {"name": "log-file", "description": "Location of the log file", "form_input_type": "text", "required": False}, 336 | ] 337 | }, 338 | { 339 | "name": "scheduler", 340 | "description": "Start a scheduler instance", 341 | "airflow_version": "1.0.0 or greater", 342 | "http_method": "GET", 343 | "background_mode": True, 344 | "arguments": [ 345 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": False}, 346 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}, 347 | {"name": "run-duration", "description": "Set number of seconds to execute before exiting", "form_input_type": "text", "required": False}, 348 | {"name": "num_runs", "description": "Set the number of runs to execute before exiting", "form_input_type": "text", "required": False}, 349 | {"name": "do_pickle", "description": "Attempt to pickle the DAG object to send over to the workers, instead of letting workers run their version of the code.", "form_input_type": "text", "required": False}, 350 | {"name": "pid", "description": "PID file location", "form_input_type": "checkbox", "required": False}, 351 | {"name": "daemon", "description": "Daemonize instead of running in the foreground", "form_input_type": "checkbox", "required": False}, 352 | {"name": "stdout", "description": "Redirect stdout to this file", "form_input_type": "text", "required": False}, 353 | {"name": "stderr", "description": "Redirect stderr to this file", "form_input_type": "text", "required": False}, 354 | {"name": "log-file", "description": "Location of the log file", "form_input_type": "text", "required": False} 355 | ] 356 | }, 357 | { 358 | "name": "task_state", 359 | "description": "Get the status of a task instance", 360 | "airflow_version": "1.0.0 or greater", 361 | "http_method": "GET", 362 | "arguments": [ 363 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 364 | {"name": "task_id", "description": "The id of the task", "form_input_type": "text", "required": True, "cli_end_position": 2}, 365 | {"name": "execution_date", "description": "The execution date of the DAG (Example: 2017-01-02T03:04:05)", "form_input_type": "text", "required": True, "cli_end_position": 3}, 366 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False} 367 | ] 368 | }, 369 | { 370 | "name": "pool", 371 | "description": "CRUD operations on pools", 372 | "airflow_version": "1.8.0 or greater", 373 | "http_method": "GET", 374 | "arguments": [ 375 | {"name": "set", "description": "Set pool slot count and description, respectively. Expected input in the form: NAME SLOT_COUNT POOL_DESCRIPTION.", "form_input_type": "text", "required": False}, 376 | {"name": "get", "description": "Get pool info", "form_input_type": "text", "required": False}, 377 | {"name": "delete", "description": "Delete a pool", "form_input_type": "text", "required": False} 378 | ] 379 | }, 380 | { 381 | "name": "serve_logs", 382 | "description": "Serve logs generate by worker", 383 | "airflow_version": "0.1 or greater", 384 | "http_method": "GET", 385 | "background_mode": True, 386 | "arguments": [] 387 | }, 388 | { 389 | "name": "clear", 390 | "description": "Clear a set of task instance, as if they never ran", 391 | "airflow_version": "0.1 or greater", 392 | "http_method": "GET", 393 | "arguments": [ 394 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True, "cli_end_position": 1}, 395 | {"name": "task_regex", "description": "The regex to filter specific task_ids to backfill (optional)", "form_input_type": "text", "required": False}, 396 | {"name": "start_date", "description": "Override start_date YYYY-MM-DD", "form_input_type": "text", "required": False}, 397 | {"name": "end_date", "description": "Override end_date YYYY-MM-DD", "form_input_type": "text", "required": False}, 398 | {"name": "subdir", "description": "File location or directory from which to look for the dag", "form_input_type": "text", "required": False}, 399 | {"name": "upstream", "description": "Include upstream tasks", "form_input_type": "checkbox", "required": False}, 400 | {"name": "downstream", "description": "Include downstream tasks", "form_input_type": "checkbox", "required": False}, 401 | {"name": "only_failed", "description": "Only failed jobs", "form_input_type": "checkbox", "required": False}, 402 | {"name": "only_running", "description": "Only running jobs", "form_input_type": "checkbox", "required": False}, 403 | {"name": "exclude_subdags", "description": "Exclude subdags", "form_input_type": "checkbox", "required": False} 404 | ], 405 | "fixed_arguments": [ 406 | {"name": "no_confirm", "description": "Do not request confirmation", "fixed_value": ""} 407 | ], 408 | }, 409 | { 410 | "name": "deploy_dag", 411 | "description": "Deploy a new DAG File to the DAGs directory", 412 | "airflow_version": "None - Custom API", 413 | "http_method": "POST", 414 | "post_body_description": "dag_file - POST Body Element - REQUIRED", 415 | "form_enctype": "multipart/form-data", 416 | "arguments": [], 417 | "post_arguments": [ 418 | {"name": "dag_file", "description": "Python file to upload and deploy", "form_input_type": "file", "required": True}, 419 | {"name": "force", "description": "Whether to forcefully upload the file if the file already exists or not", "form_input_type": "checkbox", "required": False}, 420 | {"name": "pause", "description": "The DAG will be forced to be paused when created and override the 'dags_are_paused_at_creation' config.", "form_input_type": "checkbox", "required": False}, 421 | {"name": "unpause", "description": "The DAG will be forced to be unpaused when created and override the 'dags_are_paused_at_creation' config.", "form_input_type": "checkbox", "required": False} 422 | ] 423 | }, 424 | { 425 | "name": "refresh_dag", 426 | "description": "Refresh a DAG in the Web Server", 427 | "airflow_version": "None - Custom API", 428 | "http_method": "GET", 429 | "arguments": [ 430 | {"name": "dag_id", "description": "The id of the dag", "form_input_type": "text", "required": True} 431 | ] 432 | } 433 | ] 434 | 435 | 436 | # Function used to secure the REST ENDPOINT 437 | def http_token_secure(func): 438 | def secure_check(arg): 439 | logging.info("Rest_API_Plugin.http_token_secure() called") 440 | # Check if the airflow_expected_http_token variable is not none from configurations. This means authentication is enabled. 441 | if airflow_expected_http_token: 442 | logging.info("Performing Token Authentication") 443 | if request.headers.get(airflow_rest_api_plugin_http_token_header_name, None) != airflow_expected_http_token: 444 | warning_message = "Token Authentication Failed" 445 | logging.warn(warning_message) 446 | base_response = REST_API_Response_Util.get_base_response(include_arguments=False) 447 | return REST_API_Response_Util.get_403_error_response(base_response=base_response, output=warning_message) 448 | return func(arg) 449 | 450 | return secure_check 451 | 452 | 453 | # Utility for creating the REST Responses 454 | class REST_API_Response_Util(): 455 | 456 | # Gets the Base Response object with all required response fields included. To be used at the beginning of the REST Call. 457 | @staticmethod 458 | def get_base_response(status="OK", http_response_code=200, call_time=datetime.now(), include_arguments=True): 459 | base_response = {"status": status, "http_response_code": http_response_code, "call_time": call_time} 460 | if include_arguments: 461 | base_response["arguments"] = request.args 462 | base_response["post_arguments"] = request.form 463 | return base_response 464 | 465 | # Finalize the Base Response with additional data 466 | @staticmethod 467 | def _get_final_response(base_response, output=None, airflow_cmd=None, http_response_code=None, warning=None): 468 | final_response = base_response 469 | final_response["response_time"] = datetime.now() 470 | if output: 471 | final_response["output"] = output 472 | if airflow_cmd: 473 | final_response["airflow_cmd"] = airflow_cmd 474 | if http_response_code: 475 | final_response["http_response_code"] = http_response_code 476 | if warning: 477 | final_response["warning"] = warning 478 | return jsonify(final_response) 479 | 480 | # Set the Base Response as a 200 HTTP Response object 481 | @staticmethod 482 | def get_200_response(base_response, output=None, airflow_cmd=None, warning=None): 483 | logging.info("Returning a 200 Response Code with response '" + str(output) + "'") 484 | return REST_API_Response_Util._get_final_response(base_response=base_response, output=output, airflow_cmd=airflow_cmd, warning=warning) 485 | 486 | # Set the Base Response and an Error 487 | @staticmethod 488 | def _get_error_response(base_response, error_code, output=None): 489 | base_response["status"] = "ERROR" 490 | return REST_API_Response_Util._get_final_response(base_response=base_response, output=output, http_response_code=error_code), error_code 491 | 492 | # Set the Base Response as a 400 HTTP Response object 493 | @staticmethod 494 | def get_400_error_response(base_response, output=None): 495 | logging.warning("Returning a 400 Response Code with response '" + str(output) + "'") 496 | return REST_API_Response_Util._get_error_response(base_response, 400, output) 497 | 498 | # Set the Base Response as a 403 HTTP Response object 499 | @staticmethod 500 | def get_403_error_response(base_response, output=None): 501 | logging.warning("Returning a 403 Response Code with response '" + str(output) + "'") 502 | return REST_API_Response_Util._get_error_response(base_response, 403, output) 503 | 504 | # Set the Base Response as a 500 HTTP Response object 505 | @staticmethod 506 | def get_500_error_response(base_response, output=None): 507 | logging.warning("Returning a 500 Response Code with response '" + str(output) + "'") 508 | return REST_API_Response_Util._get_error_response(base_response, 500, output) 509 | 510 | 511 | # REST_API View which extends the flask_admin BaseView 512 | class REST_API(BaseView): 513 | 514 | # Checks a string object to see if it is none or empty so we can determine if an argument (passed to the rest api) is provided 515 | @staticmethod 516 | def is_arg_not_provided(arg): 517 | return arg is None or arg == "" 518 | 519 | # Get the DagBag which has a list of all the current Dags 520 | @staticmethod 521 | def get_dagbag(): 522 | return DagBag() 523 | 524 | # '/' Endpoint where the Admin page is which allows you to view the APIs available and trigger them 525 | @expose('/') 526 | def index(self): 527 | logging.info("REST_API.index() called") 528 | 529 | # get the information that we want to display on the page regarding the dags that are available 530 | dagbag = self.get_dagbag() 531 | dags = [] 532 | for dag_id in dagbag.dags: 533 | orm_dag = DagModel.get_current(dag_id) 534 | dags.append({ 535 | "dag_id": dag_id, 536 | "is_active": (not orm_dag.is_paused) if orm_dag is not None else False 537 | }) 538 | 539 | return self.render("rest_api_plugin/index.html", 540 | dags=dags, 541 | airflow_webserver_base_url=airflow_webserver_base_url, 542 | rest_api_endpoint=rest_api_endpoint, 543 | apis_metadata=apis_metadata, 544 | airflow_version=airflow_version, 545 | rest_api_plugin_version=rest_api_plugin_version 546 | ) 547 | 548 | # '/api' REST Endpoint where API requests should all come in 549 | @csrf.exempt # Exempt the CSRF token 550 | @expose('/api', methods=["GET", "POST"]) 551 | @http_token_secure # On each request, 552 | def api(self): 553 | base_response = REST_API_Response_Util.get_base_response() 554 | 555 | # Get the api that you want to execute 556 | api = request.args.get('api') 557 | if api is not None: 558 | api = api.strip().lower() 559 | logging.info("REST_API.api() called (api: " + str(api) + ")") 560 | 561 | # Validate that the API is provided 562 | if self.is_arg_not_provided(api): 563 | logging.warning("api argument not provided") 564 | return REST_API_Response_Util.get_400_error_response(base_response, "API should be provided") 565 | 566 | # Get the api_metadata from the api object list that correcsponds to the api we want to run to get the metadata. 567 | api_metadata = None 568 | for test_api_metadata in apis_metadata: 569 | if test_api_metadata["name"] == api: 570 | api_metadata = test_api_metadata 571 | if api_metadata is None: 572 | logging.info("api '" + str(api) + "' was not found in the apis list in the REST API Plugin") 573 | return REST_API_Response_Util.get_400_error_response(base_response, "API '" + str(api) + "' was not found") 574 | 575 | # check if all the required arguments are provided 576 | missing_required_arguments = [] 577 | dag_id = None 578 | for argument in api_metadata["arguments"]: 579 | argument_name = argument["name"] 580 | argument_value = request.args.get(argument_name) 581 | if argument["required"]: 582 | if self.is_arg_not_provided(argument_value): 583 | missing_required_arguments.append(argument_name) 584 | if argument_name == "dag_id" and argument_value is not None: 585 | dag_id = argument_value.strip() 586 | if len(missing_required_arguments) > 0: 587 | logging.warning("Missing required arguments: " + str(missing_required_arguments)) 588 | return REST_API_Response_Util.get_400_error_response(base_response, "The argument(s) " + str(missing_required_arguments) + " are required") 589 | 590 | # Check to make sure that the DAG you're referring to, already exists. 591 | dag_bag = self.get_dagbag() 592 | if dag_id is not None and dag_id not in dag_bag.dags: 593 | logging.info("DAG_ID '" + str(dag_id) + "' was not found in the DagBag list '" + str(dag_bag.dags) + "'") 594 | return REST_API_Response_Util.get_400_error_response(base_response, "The DAG ID '" + str(dag_id) + "' does not exist") 595 | 596 | # Deciding which function to use based off the API object that was requested. Some functions are custom and need to be manually routed to. 597 | if api == "version": 598 | final_response = self.version(base_response) 599 | elif api == "rest_api_plugin_version": 600 | final_response = self.rest_api_plugin_version(base_response) 601 | elif api == "deploy_dag": 602 | final_response = self.deploy_dag(base_response) 603 | elif api == "refresh_dag": 604 | final_response = self.refresh_dag(base_response) 605 | else: 606 | final_response = self.execute_cli(base_response, api_metadata) 607 | 608 | return final_response 609 | 610 | # General execution of a CLI command 611 | # A command will be assembled and then passed to the OS as a commandline function and the results will be returned 612 | def execute_cli(self, base_response, api_metadata): 613 | logging.info("Executing cli function") 614 | 615 | # getting the largest cli_end_position in the api_metadata object so that the cli function can be assembled 616 | largest_end_argument_value = 0 617 | for argument in api_metadata.get("arguments", []): 618 | if argument.get("cli_end_position") is not None and argument["cli_end_position"] > largest_end_argument_value: 619 | largest_end_argument_value = argument["cli_end_position"] 620 | 621 | # starting to create the airflow_cmd function 622 | airflow_cmd_split = ["airflow", api_metadata["name"]] 623 | 624 | # appending arguments to the airflow_cmd_split array and setting arguments aside in the end_arguments array to be appended onto the end of airflow_cmd_split 625 | end_arguments = [0] * largest_end_argument_value 626 | for argument in api_metadata["arguments"]: 627 | argument_name = argument["name"] 628 | argument_value = request.args.get(argument_name) 629 | logging.info("argument_name: " + str(argument_name) + ", argument_value: " + str(argument_value)) 630 | if argument_value is not None: 631 | # if the argument should be appended onto the end, find the position and add it to the end_arguments array 632 | if "cli_end_position" in argument: 633 | logging.info("argument['cli_end_position']: " + str(argument['cli_end_position'])) 634 | end_arguments[argument["cli_end_position"]-1] = argument_value 635 | else: 636 | airflow_cmd_split.extend(["--" + argument_name]) 637 | if argument["form_input_type"] is not "checkbox": 638 | # Relacing airflow_cmd_split.extend(argument_value.split(" ") with command below to fix issue where configuration 639 | # values contain space with them. 640 | airflow_cmd_split.append(argument_value) 641 | else: 642 | logging.warning("argument_value is null") 643 | 644 | # appending fixed arguments that should always be provided to the APIs 645 | for fixed_argument in api_metadata.get("fixed_arguments", []): 646 | fixed_argument_name = fixed_argument["name"] 647 | fixed_argument_value = fixed_argument.get("fixed_value") 648 | logging.info("fixed_argument_name: " + str(fixed_argument_name) + ", fixed_argument_value: " + str(fixed_argument_value)) 649 | if fixed_argument_value is not None: 650 | airflow_cmd_split.extend(["--" + fixed_argument_name]) 651 | if fixed_argument_value: 652 | airflow_cmd_split.extend(fixed_argument_value.split(" ")) 653 | 654 | # appending the end_arguments to the very end 655 | airflow_cmd_split.extend(end_arguments) 656 | 657 | run_api_in_background_mode = "background_mode" in api_metadata and api_metadata["background_mode"] 658 | 659 | # handling the case where the process should be ran in the background 660 | if run_api_in_background_mode: 661 | # if a log file is provided, then that should be used to dump the output of the call 662 | if request.args.get("log-file") is None: 663 | airflow_cmd_split.append(">> " + str(airflow_base_log_folder) + "/" + api_metadata["name"] + ".log") 664 | # appending a '&' character to run the process in the background 665 | airflow_cmd_split.append("&") 666 | 667 | # joining all the individual arguments and components into a single string 668 | airflow_cmd = " ".join(airflow_cmd_split) 669 | 670 | logging.info("airflow_cmd array: " + str(airflow_cmd_split)) 671 | logging.info("airflow_cmd: " + str(airflow_cmd)) 672 | 673 | # execute the airflow command a certain way if its meant to be ran in the background 674 | if run_api_in_background_mode: 675 | output = self.execute_cli_command_background_mode(airflow_cmd) 676 | else: 677 | output = self.execute_cli_command(airflow_cmd_split) 678 | 679 | # if desired, filter out the loading messages to reduce the noise in the output 680 | if filter_loading_messages_in_cli_response: 681 | logging.info("Filtering Loading Messages from the CLI Response") 682 | output = self.filter_loading_messages(output) 683 | 684 | return REST_API_Response_Util.get_200_response(base_response=base_response, output=output, airflow_cmd=airflow_cmd) 685 | 686 | # Custom function for the version API 687 | def version(self, base_response): 688 | logging.info("Executing custom 'version' function") 689 | return REST_API_Response_Util.get_200_response(base_response, airflow_version) 690 | 691 | # Custom function for the rest_api_plugin_version API 692 | def rest_api_plugin_version(self, base_response): 693 | logging.info("Executing custom 'rest_api_plugin_version' function") 694 | return REST_API_Response_Util.get_200_response(base_response, rest_api_plugin_version) 695 | 696 | # Custom Function for the deploy_dag API 697 | def deploy_dag(self, base_response): 698 | logging.info("Executing custom 'deploy_dag' function") 699 | 700 | if 'dag_file' not in request.files or request.files['dag_file'].filename == '': # check if the post request has the file part 701 | logging.warning("The dag_file argument wasn't provided") 702 | return REST_API_Response_Util.get_400_error_response(base_response, "dag_file should be provided") 703 | dag_file = request.files['dag_file'] 704 | 705 | force = True if request.form.get('force') is not None else False 706 | logging.info("deploy_dag force upload: " + str(force)) 707 | 708 | pause = True if request.form.get('pause') is not None else False 709 | logging.info("deploy_dag in pause state: " + str(pause)) 710 | 711 | unpause = True if request.form.get('unpause') is not None else False 712 | logging.info("deploy_dag in unpause state: " + str(unpause)) 713 | 714 | # make sure that the dag_file is a python script 715 | if dag_file and dag_file.filename.endswith(".py"): 716 | save_file_path = os.path.join(airflow_dags_folder, dag_file.filename) 717 | 718 | # Check if the file already exists. 719 | if os.path.isfile(save_file_path) and not force: 720 | logging.warning("File to upload already exists") 721 | return REST_API_Response_Util.get_400_error_response(base_response, "The file '" + save_file_path + "' already exists on host '" + hostname + "'.") 722 | 723 | logging.info("Saving file to '" + save_file_path + "'") 724 | dag_file.save(save_file_path) 725 | 726 | else: 727 | logging.warning("deploy_dag file is not a python file. It does not end with a .py.") 728 | return REST_API_Response_Util.get_400_error_response(base_response, "dag_file is not a *.py file") 729 | 730 | warning = None 731 | # if both the pause and unpause options are provided then skip the pausing and unpausing phase 732 | if not (pause and unpause): 733 | if pause or unpause: 734 | try: 735 | # import the DAG file that was uploaded so that we can get the DAG_ID to execute the command to pause or unpause it 736 | import imp 737 | dag_file = imp.load_source('module.name', save_file_path) 738 | dag_id = dag_file.dag.dag_id 739 | 740 | # run the pause or unpause cli command 741 | airflow_cmd_split = [] 742 | if pause: 743 | airflow_cmd_split = ["airflow", "pause", dag_id] 744 | if unpause: 745 | airflow_cmd_split = ["airflow", "unpause", dag_id] 746 | cli_output = self.execute_cli_command(airflow_cmd_split) 747 | except Exception as e: 748 | warning = "Failed to set the state (pause, unpause) of the DAG: " + str(e) 749 | logging.warning(warning) 750 | else: 751 | warning = "Both options pause and unpause were given. Skipping setting the state (pause, unpause) of the DAG." 752 | logging.warning(warning) 753 | 754 | return REST_API_Response_Util.get_200_response(base_response=base_response, output="DAG File [{}] has been uploaded".format(dag_file), warning=warning) 755 | 756 | # Custom Function for the refresh_dag API 757 | # This will call the direct function corresponding to the web endpoint '/admin/airflow/refresh' that already exists in Airflow 758 | def refresh_dag(self, base_response): 759 | logging.info("Executing custom 'refresh_dag' function") 760 | dag_id = request.args.get('dag_id') 761 | logging.info("dag_id to refresh: '" + str(dag_id) + "'") 762 | if self.is_arg_not_provided(dag_id): 763 | return REST_API_Response_Util.get_400_error_response(base_response, "dag_id should be provided") 764 | elif " " in dag_id: 765 | return REST_API_Response_Util.get_400_error_response(base_response, "dag_id contains spaces and is therefore an illegal argument") 766 | 767 | try: 768 | from airflow.www.views import Airflow 769 | # NOTE: The request argument 'dag_id' is required for the refresh() function to get the dag_id 770 | refresh_result = Airflow().refresh() 771 | logging.info("Refresh Result: " + str(refresh_result)) 772 | except Exception as e: 773 | error_message = "An error occurred while trying to Refresh the DAG '" + str(dag_id) + "': " + str(e) 774 | logging.error(error_message) 775 | return REST_API_Response_Util.get_500_error_response(base_response, error_message) 776 | 777 | return REST_API_Response_Util.get_200_response(base_response=base_response, output="DAG [{}] is now fresh as a daisy".format(dag_id)) 778 | 779 | # Executes the airflow command passed into it in the background so the function isn't tied to the webserver process 780 | @staticmethod 781 | def execute_cli_command_background_mode(airflow_cmd): 782 | logging.info("Executing CLI Command in the Background") 783 | exit_code = os.system(airflow_cmd) 784 | output = REST_API.get_empty_process_output() 785 | output["stdout"] = "exit_code: " + str(exit_code) 786 | return output 787 | 788 | # General execution of the airflow command passed to it and returns the response 789 | @staticmethod 790 | def execute_cli_command(airflow_cmd_split): 791 | logging.info("Executing CLI Command") 792 | process = subprocess.Popen(airflow_cmd_split, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 793 | process.wait() 794 | return REST_API.collect_process_output(process) 795 | 796 | # gets and empty object that has all the fields a CLI function would have in it. 797 | @staticmethod 798 | def get_empty_process_output(): 799 | return { 800 | "stderr": "", 801 | "stdin": "", 802 | "stdout": "" 803 | } 804 | 805 | # Get the output of the CLI process and package it in a dict 806 | @staticmethod 807 | def collect_process_output(process): 808 | output = REST_API.get_empty_process_output() 809 | if process.stderr is not None: 810 | output["stderr"] = "" 811 | for line in process.stderr.readlines(): 812 | output["stderr"] += str(line) 813 | if process.stdin is not None: 814 | output["stdin"] = "" 815 | for line in process.stdin.readlines(): 816 | output["stdin"] += str(line) 817 | if process.stdout is not None: 818 | output["stdout"] = "" 819 | for line in process.stdout.readlines(): 820 | output["stdout"] += str(line) 821 | logging.info("RestAPI Output: " + str(output)) 822 | return output 823 | 824 | # Filtering out logging statements from the standard output 825 | # Content like: 826 | # 827 | # [2017-04-19 10:04:34,927] {__init__.py:36} INFO - Using executor CeleryExecutor 828 | # [2017-04-19 10:04:35,926] {models.py:154} INFO - Filling up the DagBag from /Users/... 829 | @staticmethod 830 | def filter_loading_messages(output): 831 | stdout = output["stdout"] 832 | new_stdout_array = stdout.split("\n") 833 | content_to_remove_greatest_index = 0 834 | for index, content in enumerate(new_stdout_array): 835 | if content.startswith("["): 836 | content_to_remove_greatest_index = index 837 | content_to_remove_greatest_index += 1 838 | if len(new_stdout_array) > content_to_remove_greatest_index: 839 | new_stdout_array = new_stdout_array[content_to_remove_greatest_index:] 840 | output["stdout"] = "\n".join(new_stdout_array) 841 | return output 842 | 843 | # Creating View to be used by Plugin 844 | rest_api_view = REST_API(category="Admin", name="REST API Plugin") 845 | 846 | # Creating Blueprint 847 | rest_api_bp = Blueprint( 848 | "rest_api_bp", 849 | __name__, 850 | template_folder='templates', 851 | static_folder='static', 852 | static_url_path='/static/' 853 | ) 854 | 855 | 856 | # Creating the REST_API_Plugin which extends the AirflowPlugin so its imported into Airflow 857 | class REST_API_Plugin(AirflowPlugin): 858 | name = "rest_api" 859 | operators = [] 860 | flask_blueprints = [rest_api_bp] 861 | hooks = [] 862 | executors = [] 863 | admin_views = [rest_api_view] 864 | menu_links = [] 865 | --------------------------------------------------------------------------------