├── .astro └── config.yaml ├── .dockerignore ├── .gitignore ├── Dockerfile ├── README.md ├── dags ├── dag_file_1.py ├── dag_file_2.py ├── dynamic-dags-connections.py ├── dynamic-dags-loop.py └── dynamic-dags-variable.py ├── include ├── dag-config │ ├── dag1-config.json │ └── dag2-config.json ├── dag-template.py └── generate-dag-files.py ├── packages.txt └── requirements.txt /.astro/config.yaml: -------------------------------------------------------------------------------- 1 | project: 2 | name: dynamically-generate-dags 3 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .astro 2 | .git 3 | .env 4 | airflow_settings.yaml 5 | logs/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .git 2 | .env 3 | airflow_settings.yaml 4 | **_pycache_ 5 | *_pycache_ -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM quay.io/astronomer/astro-runtime:8.6.0 2 | 3 | ENV AIRFLOW_VAR_DAG_NUMBER=5 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dynamic-dags-tutorial 2 | This repo contains an Astronomer project with multiple examples showing how to dynamically generate DAGs in Airflow. A guide discussing these concepts in depth will be published shortly. 3 | 4 | ## DAG Overview 5 | This repo contains DAGs and supporting Python scripts that dynamically generate DAGs using multiple methods. They are described here, organized by folder. 6 | 7 | #### dags 8 | 9 | - `dynamic-dags-connections.py` generates DAGs based on Airflow connections. 10 | - `dynamic-dags-loop.py` generates DAGs based on a simple range() loop. 11 | - `dynamic-dags-variable.py` generates DAGs based on an Airflow variables. 12 | - `dag_file_1.py` and `dag_file_2.py` are actual DAG files that were dynamically generated using scripts in the `include/` directory, described below. 13 | 14 | #### include 15 | 16 | - `dag-config/` contains two Json configuration files with parameters used to dynamically generate Python files for `dag_file_1.py` and `dag_file_2.py`. 17 | - `dag-template.py` contains the starting DAG template from which other DAG files are dynamically generated. 18 | - `generate-dag-files.py` contains a script to dynamically generate a DAG file for each config file in `dag-config/` by making a copy of `dag-template.py` and replacing key parameters from the config file. 19 | 20 | ## Getting Started 21 | The easiest way to run these example DAGs is to use the Astronomer CLI to get an Airflow instance up and running locally: 22 | 23 | 1. [Install the Astronomer CLI](https://www.astronomer.io/docs/cloud/stable/develop/cli-quickstart) 24 | 2. Clone this repo somewhere locally and navigate to it in your terminal 25 | 3. Initialize an Astronomer project by running `astro dev init` 26 | 4. Start Airflow locally by running `astro dev start` 27 | 5. Navigate to localhost:8080 in your browser and you should see the tutorial DAGs there 28 | -------------------------------------------------------------------------------- /dags/dag_file_1.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import dag 2 | from airflow.operators.bash import BashOperator 3 | from pendulum import datetime 4 | 5 | 6 | @dag( 7 | dag_id='dag_file_1', 8 | start_date=datetime(2023, 7, 1), 9 | schedule='@daily', 10 | catchup=False, 11 | ) 12 | def dag_from_config(): 13 | BashOperator( 14 | task_id="say_hello", 15 | bash_command='echo $ENVVAR', 16 | env={"ENVVAR": 'Hello! :)'}, 17 | ) 18 | 19 | 20 | dag_from_config() 21 | -------------------------------------------------------------------------------- /dags/dag_file_2.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import dag 2 | from airflow.operators.bash import BashOperator 3 | from pendulum import datetime 4 | 5 | 6 | @dag( 7 | dag_id='dag_file_2', 8 | start_date=datetime(2023, 7, 1), 9 | schedule='@hourly', 10 | catchup=False, 11 | ) 12 | def dag_from_config(): 13 | BashOperator( 14 | task_id="say_hello", 15 | bash_command='echo $ENVVAR and Goodbye!', 16 | env={"ENVVAR": 'Hola! :)'}, 17 | ) 18 | 19 | 20 | dag_from_config() 21 | -------------------------------------------------------------------------------- /dags/dynamic-dags-connections.py: -------------------------------------------------------------------------------- 1 | from airflow import settings 2 | from airflow.decorators import dag, task 3 | from airflow.models import Connection 4 | from pendulum import datetime 5 | 6 | 7 | def create_dag(dag_id, schedule, dag_number, default_args): 8 | @dag(dag_id=dag_id, schedule=schedule, default_args=default_args, catchup=False) 9 | def hello_world_dag(): 10 | @task() 11 | def hello_world(): 12 | print("Hello World") 13 | print("This is DAG: {}".format(str(dag_number))) 14 | 15 | hello_world() 16 | 17 | generated_dag = hello_world_dag() 18 | 19 | return generated_dag 20 | 21 | 22 | session = settings.Session() 23 | 24 | # adjust the filter criteria to filter which of your connections to use 25 | # to generated your DAGs 26 | conns = ( 27 | session.query(Connection.conn_id) 28 | .filter(Connection.conn_id.ilike("%MY_DATABASE_CONN%")) 29 | .all() 30 | ) 31 | 32 | for conn in conns: 33 | dag_id = "connection_hello_world_{}".format(conn[0]) 34 | 35 | default_args = {"owner": "airflow", "start_date": datetime(2013, 7, 1)} 36 | 37 | schedule = "@daily" 38 | dag_number = conn 39 | 40 | globals()[dag_id] = create_dag(dag_id, schedule, dag_number, default_args) 41 | -------------------------------------------------------------------------------- /dags/dynamic-dags-loop.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import dag, task 2 | from pendulum import datetime 3 | 4 | 5 | def create_dag(dag_id, schedule, dag_number, default_args): 6 | @dag(dag_id=dag_id, schedule=schedule, default_args=default_args, catchup=False) 7 | def hello_world_dag(): 8 | @task() 9 | def hello_world(*args): 10 | print("Hello World") 11 | print("This is DAG: {}".format(str(dag_number))) 12 | 13 | hello_world() 14 | 15 | generated_dag = hello_world_dag() 16 | 17 | return generated_dag 18 | 19 | 20 | # build a dag for each number in range(1, 4) 21 | for n in range(1, 4): 22 | dag_id = "loop_hello_world_{}".format(str(n)) 23 | 24 | default_args = {"owner": "airflow", "start_date": datetime(2023, 7, 1)} 25 | 26 | schedule = "@daily" 27 | 28 | dag_number = n 29 | 30 | globals()[dag_id] = create_dag(dag_id, schedule, dag_number, default_args) 31 | -------------------------------------------------------------------------------- /dags/dynamic-dags-variable.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import dag, task 2 | from airflow.models import Variable 3 | from pendulum import datetime 4 | 5 | 6 | def create_dag(dag_id, schedule, dag_number, default_args): 7 | @dag(dag_id=dag_id, schedule=schedule, default_args=default_args, catchup=False) 8 | def hello_world_dag(): 9 | @task() 10 | def hello_world(*args): 11 | print("Hello World") 12 | print("This is DAG: {}".format(str(dag_number))) 13 | 14 | hello_world() 15 | 16 | generated_dag = hello_world_dag() 17 | 18 | return generated_dag 19 | 20 | 21 | number_of_dags = Variable.get("dag_number", default_var=3) 22 | number_of_dags = int(number_of_dags) 23 | 24 | for n in range(1, number_of_dags): 25 | dag_id = "variable_hello_world_{}".format(str(n)) 26 | 27 | default_args = {"owner": "airflow", "start_date": datetime(2023, 7, 1)} 28 | 29 | schedule = "@daily" 30 | dag_number = n 31 | 32 | globals()[dag_id] = create_dag(dag_id, schedule, dag_number, default_args) 33 | -------------------------------------------------------------------------------- /include/dag-config/dag1-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "dag_id": "dag_file_1", 3 | "schedule": "'@daily'", 4 | "bash_command": "'echo $ENVVAR'", 5 | "env_var": "'Hello! :)'" 6 | } -------------------------------------------------------------------------------- /include/dag-config/dag2-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "dag_id": "dag_file_2", 3 | "schedule": "'@hourly'", 4 | "bash_command": "'echo $ENVVAR and Goodbye!'", 5 | "env_var": "'Hola! :)'" 6 | } -------------------------------------------------------------------------------- /include/dag-template.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import dag 2 | from airflow.operators.bash import BashOperator 3 | from pendulum import datetime 4 | 5 | 6 | @dag( 7 | dag_id=dag_id_to_replace, 8 | start_date=datetime(2023, 7, 1), 9 | schedule=schedule_to_replace, 10 | catchup=False, 11 | ) 12 | def dag_from_config(): 13 | BashOperator( 14 | task_id="say_hello", 15 | bash_command=bash_command_to_replace, 16 | env={"ENVVAR": env_var_to_replace}, 17 | ) 18 | 19 | 20 | dag_from_config() 21 | -------------------------------------------------------------------------------- /include/generate-dag-files.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | import fileinput 5 | 6 | 7 | config_filepath = "include/dag-config/" 8 | dag_template_filename = "include/dag-template.py" 9 | 10 | for filename in os.listdir(config_filepath): 11 | f = open(config_filepath + filename) 12 | config = json.load(f) 13 | 14 | new_filename = "dags/" + config["dag_id"] + ".py" 15 | shutil.copyfile(dag_template_filename, new_filename) 16 | 17 | for line in fileinput.input(new_filename, inplace=True): 18 | line = line.replace("dag_id_to_replace", "'" + config["dag_id"] + "'") 19 | line = line.replace("schedule_to_replace", config["schedule"]) 20 | line = line.replace("bash_command_to_replace", config["bash_command"]) 21 | line = line.replace("env_var_to_replace", config["env_var"]) 22 | print(line, end="") 23 | -------------------------------------------------------------------------------- /packages.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/dynamic-dags-tutorial/0fb3f5a4c2b9dcdcebfcc6541763fdbec3fdc51f/packages.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astronomer/dynamic-dags-tutorial/0fb3f5a4c2b9dcdcebfcc6541763fdbec3fdc51f/requirements.txt --------------------------------------------------------------------------------