├── .gitignore ├── README.md ├── postgres_operator_example └── dag_postgres_operator.py ├── wrapper_add_test_tasks_to_dag ├── bash_dag.py ├── test_dag_wrapper.py └── test_tasks.py └── xcom_diff_dag_and_multiply └── xcom_dag.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | dags/__pycache__/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apache Airflow Code Samples 2 | 3 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/Z8Z237B0Q) 4 | 5 | Short guide: How to use PostgresOperator in Apache Airflow? 6 | 7 | code samples in postgres_operator_example/ 8 | 9 | 10 | Apache Airflow: add validation tasks to production DAG for end-2-end tests 11 | 12 | code samples in wrapper_add_test_tasks_to_dag/ 13 | 14 | 15 | Airflow XCom pull and push under the hood: multiple value, from different dags and etc 16 | 17 | code samples in xcom_diff_dag_and_multiply/ 18 | -------------------------------------------------------------------------------- /postgres_operator_example/dag_postgres_operator.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | from airflow import DAG 4 | from airflow.utils.trigger_rule import TriggerRule 5 | from airflow.operators.postgres_operator import PostgresOperator 6 | 7 | 8 | dag_params = { 9 | 'dag_id': 'PostgresOperator_dag', 10 | 'start_date': datetime(2019, 10, 7), 11 | 'schedule_interval': None 12 | } 13 | 14 | 15 | with DAG(**dag_params) as dag: 16 | 17 | create_table = PostgresOperator( 18 | task_id='create_table', 19 | sql='''CREATE TABLE new_table( 20 | custom_id integer NOT NULL, timestamp TIMESTAMP NOT NULL, user_id VARCHAR (50) NOT NULL 21 | );''', 22 | ) 23 | 24 | insert_row = PostgresOperator( 25 | task_id='insert_row', 26 | sql='INSERT INTO new_table VALUES(%s, %s, %s)', 27 | trigger_rule=TriggerRule.ALL_DONE, 28 | parameters=(uuid.uuid4().int % 123456789, datetime.now(), uuid.uuid4().hex[:10]) 29 | ) 30 | 31 | create_table >> insert_row -------------------------------------------------------------------------------- /wrapper_add_test_tasks_to_dag/bash_dag.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow import DAG 3 | from airflow.operators.bash_operator import BashOperator 4 | from airflow.operators.dummy_operator import DummyOperator 5 | 6 | file_path = "/usr/local/airflow/target.txt" 7 | 8 | with DAG(dag_id="Create_file_DAG", schedule_interval=None, 9 | start_date=datetime(2019, 10, 29)) as dag: 10 | create_file_task = BashOperator(task_id="create_file", 11 | bash_command=f"touch {file_path}") 12 | dummy_one = DummyOperator(task_id="dummy_one") 13 | dummy_two = DummyOperator(task_id="dummy_two") 14 | dummy_three = DummyOperator(task_id="dummy_three") 15 | dummy_one >> dummy_two >> dummy_three >> create_file_task 16 | 17 | -------------------------------------------------------------------------------- /wrapper_add_test_tasks_to_dag/test_dag_wrapper.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | 3 | 4 | def create_test_dag(production_dag, tasks_to_add): 5 | 6 | test_dag = copy(production_dag) 7 | # we overwrite airflow DAG name 8 | test_dag._dag_id = test_dag._dag_id + "_test" 9 | last_tasks_list = [] 10 | for task in test_dag.task_dict: 11 | test_dag.task_dict[task]._dag = test_dag 12 | last_tasks_list.append(task) if not test_dag.task_dict[task].downstream_task_ids else None 13 | 14 | for task in tasks_to_add: 15 | test_dag.log.info('Adding task: %s', tasks_to_add) 16 | # also exists method add_tasks to pass list of tasks 17 | if task.task_id not in test_dag.task_dict: 18 | # if not added previous to avoid errors 19 | test_dag.add_task(task) 20 | # set last tasks of production DAG to downstream our validation tasks 21 | task_in_dag = test_dag.get_task(task.task_id) 22 | [test_dag.get_task(task) for task in last_tasks_list] >> task_in_dag 23 | 24 | return test_dag 25 | -------------------------------------------------------------------------------- /wrapper_add_test_tasks_to_dag/test_tasks.py: -------------------------------------------------------------------------------- 1 | # apache airflow DAG 2 | from os import path 3 | from airflow.operators.python_operator import PythonOperator 4 | from test_dag_wrapper import create_test_dag 5 | from bash_dag import dag 6 | 7 | file_path = "/usr/local/airflow/target.txt" 8 | validation_task_file_name = PythonOperator(task_id="validate_file_exists", 9 | python_callable=lambda: path.exists(file_path)) 10 | # create test DAG using wrapper 11 | test_dag = create_test_dag(dag, [validation_task_file_name]) 12 | globals()[test_dag._dag_id] = test_dag -------------------------------------------------------------------------------- /xcom_diff_dag_and_multiply/xcom_dag.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow import DAG 3 | from airflow.operators.python_operator import PythonOperator 4 | from airflow.utils.timezone import make_aware 5 | from airflow.models import XCom 6 | 7 | # one DAG will write to Xcom 8 | dag_1 = DAG('write_to_xcom', schedule_interval='*/15 * * * *', start_date=datetime(2019, 12, 1)) 9 | 10 | 11 | def push_xcom_call(**kwargs): 12 | kwargs['task_instance'].xcom_push(key='test_dag', value=str({'key1': 'value1'})) 13 | 14 | 15 | xcom_push_task = PythonOperator( 16 | task_id='xcom_push_task', 17 | dag=dag_1, 18 | python_callable=push_xcom_call, 19 | provide_context=True 20 | ) 21 | 22 | 23 | # second Read from Xcom values of first DAG 24 | dag_2 = DAG('read_from_xcom', schedule_interval='*/15 * * * *', start_date=datetime(2019, 12, 3)) 25 | 26 | 27 | def pull_xcom_call(**kwargs): 28 | # xcom will get all values, that was written before this date with using Xcom directly (without context object) 29 | get_mane_xcoms_values__with_xcom_class = XCom.get_many( 30 | execution_date=make_aware(datetime(2019, 12, 3, 0, 51, 00, 00)), 31 | dag_ids=["write_to_xcom"], include_prior_dates=True) 32 | print('XCom.get_many ') 33 | print(get_mane_xcoms_values__with_xcom_class) 34 | 35 | get_xcom_with_ti = kwargs['ti'].xcom_pull(dag_id="write_to_xcom", include_prior_dates=True) 36 | print('ti.xcom_pull with include_prior_dates') 37 | print(get_xcom_with_ti) 38 | 39 | 40 | xcom_pull_task = PythonOperator( 41 | task_id='xcom_pull_task', 42 | dag=dag_2, 43 | python_callable=pull_xcom_call, 44 | provide_context=True 45 | ) 46 | --------------------------------------------------------------------------------