├── .gitignore ├── dags ├── dag_with_stuff_in_global_scope.py └── hello_airflow.py ├── docker-compose.yml ├── requirements.txt ├── setup.py ├── src └── testing_examples │ ├── __init__.py │ ├── hooks │ └── __init__.py │ └── operators │ ├── __init__.py │ └── postgres_to_local_operator.py └── tests ├── conftest.py ├── dags └── test_dag_integrity.py ├── test_bash_operator.py ├── test_fixture_example.py ├── test_fixture_scope_example.py ├── test_full_context.py ├── test_python_operator.py ├── test_simple_http_operator.py ├── test_tmpdir.py └── testing_examples └── operators ├── postgres-init.sql └── test_postgres_to_local_operator.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.pyc 3 | .idea/ 4 | .python-version 5 | airflow.cfg 6 | airflow.db 7 | logs/ 8 | unittests.cfg 9 | -------------------------------------------------------------------------------- /dags/dag_with_stuff_in_global_scope.py: -------------------------------------------------------------------------------- 1 | # This also demonstrates the DebugExecutor 2 | # https://airflow.apache.org/docs/stable/executor/debug.html 3 | # Set AIRFLOW__CORE__EXECUTOR=DebugExecutor 4 | 5 | import airflow.utils.dates 6 | from airflow.models import DAG 7 | from airflow.operators.bash_operator import BashOperator 8 | from airflow.operators.python_operator import PythonOperator 9 | 10 | dag = DAG(dag_id="hello_airflow", start_date=airflow.utils.dates.days_ago(3), schedule_interval="@daily") 11 | 12 | print("global!") 13 | 14 | hello = BashOperator(task_id="hello", bash_command="echo 'hello'", dag=dag) 15 | airflow = PythonOperator(task_id="airflow", python_callable=lambda: print("airflow"), dag=dag) 16 | 17 | hello >> airflow 18 | 19 | if __name__ == "__main__": 20 | dag.clear(reset_dag_runs=True) 21 | dag.run() 22 | -------------------------------------------------------------------------------- /dags/hello_airflow.py: -------------------------------------------------------------------------------- 1 | import airflow.utils.dates 2 | from airflow.models import DAG 3 | from airflow.operators.bash_operator import BashOperator 4 | from airflow.operators.python_operator import PythonOperator 5 | 6 | dag = DAG(dag_id="hello_airflow", start_date=airflow.utils.dates.days_ago(3), schedule_interval="@daily") 7 | 8 | 9 | def do_magic(**context): 10 | print(context) 11 | 12 | 13 | hello = BashOperator(task_id="hello", bash_command="echo 'hello'", dag=dag) 14 | airflow = PythonOperator(task_id="airflow", python_callable=do_magic, provide_context=True, dag=dag) 15 | 16 | hello >> airflow 17 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.7' 2 | 3 | # ====================================== AIRFLOW ENVIRONMENT VARIABLES ======================================= 4 | x-environment: &airflow_environment 5 | - AIRFLOW__CORE__EXECUTOR=LocalExecutor 6 | - AIRFLOW__CORE__FERNET_KEY=hCRoPUYBO27QiEg1MRu5hSjLG7yNd8y8XKlm-8kRlkQ= 7 | - AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False 8 | - AIRFLOW__CORE__LOAD_EXAMPLES=False 9 | - AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql://airflow:airflow@postgres:5432/airflow 10 | - AIRFLOW__CORE__STORE_DAG_CODE=True 11 | - AIRFLOW__CORE__STORE_SERIALIZED_DAGS=True 12 | - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=30 13 | - AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True 14 | - AIRFLOW__WEBSERVER__RBAC=True 15 | # ====================================== /AIRFLOW ENVIRONMENT VARIABLES ====================================== 16 | 17 | services: 18 | postgres: 19 | image: postgres:13-alpine 20 | environment: 21 | - POSTGRES_USER=airflow 22 | - POSTGRES_PASSWORD=airflow 23 | - POSTGRES_DB=airflow 24 | ports: 25 | - "5432:5432" 26 | 27 | initdb_adduser: 28 | image: apache/airflow:1.10.11-python3.7 29 | depends_on: 30 | - postgres 31 | environment: *airflow_environment 32 | entrypoint: /bin/bash 33 | # The webserver initializes permissions, so sleep for that to (approximately) be finished 34 | # No disaster if the webserver isn't finished by then, but create_user will start spitting out errors until the permissions exist 35 | command: -c 'airflow initdb && sleep 5 && airflow create_user --role Admin --username airflow --password airflow -e airflow@airflow.com -f airflow -l airflow' 36 | 37 | webserver: 38 | image: apache/airflow:1.10.11-python3.7 39 | restart: always 40 | depends_on: 41 | - postgres 42 | volumes: 43 | - logs:/opt/airflow/logs 44 | ports: 45 | - "8080:8080" 46 | environment: *airflow_environment 47 | command: webserver 48 | 49 | scheduler: 50 | image: apache/airflow:1.10.11-python3.7 51 | restart: always 52 | depends_on: 53 | - postgres 54 | volumes: 55 | - ./dags:/opt/airflow/dags 56 | - ./src:/opt/airflow/testing_examples/src 57 | - ./setup.py:/opt/airflow/testing_examples/setup.py 58 | - logs:/opt/airflow/logs 59 | environment: *airflow_environment 60 | entrypoint: ["/bin/sh"] 61 | command: ["-c", "pip install --user -e /opt/airflow/testing_examples && airflow scheduler"] 62 | 63 | volumes: 64 | logs: 65 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | apache-airflow[postgres]~=1.10.11 2 | ipdb~=0.13.3 3 | pytest-helpers-namespace~=2019.1.8 4 | pytest-mock~=3.2.0 5 | pytest~=5.4.3 6 | pytest-docker-tools~=0.2.2 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name="testing_examples", 5 | version="0.1", 6 | description="Package containing custom Airflow components for demonstrating testing during Airflow Summit.", 7 | packages=setuptools.find_packages(where="src"), 8 | package_dir={"": "src"}, 9 | install_requires=["apache-airflow~=1.10.11"], 10 | python_requires="==3.7.*", 11 | ) 12 | -------------------------------------------------------------------------------- /src/testing_examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/godatadriven/airflow-testing-examples/441952639b18dbd4560246a5a31cb90630097af3/src/testing_examples/__init__.py -------------------------------------------------------------------------------- /src/testing_examples/hooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/godatadriven/airflow-testing-examples/441952639b18dbd4560246a5a31cb90630097af3/src/testing_examples/hooks/__init__.py -------------------------------------------------------------------------------- /src/testing_examples/operators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/godatadriven/airflow-testing-examples/441952639b18dbd4560246a5a31cb90630097af3/src/testing_examples/operators/__init__.py -------------------------------------------------------------------------------- /src/testing_examples/operators/postgres_to_local_operator.py: -------------------------------------------------------------------------------- 1 | """Dump JSON data from Postgres to local storage.""" 2 | 3 | import json 4 | from typing import Optional 5 | 6 | from airflow.hooks.postgres_hook import PostgresHook 7 | from airflow.models import BaseOperator 8 | from airflow.utils.decorators import apply_defaults 9 | from psycopg2.extras import RealDictCursor 10 | 11 | 12 | class PostgresToLocalOperator(BaseOperator): 13 | """ 14 | Airflow operator for storing a JSON-formatted 15 | Postgres query result on local disk. 16 | """ 17 | 18 | ui_color = "#705B74" 19 | ui_fgcolor = "#8FA48B" 20 | 21 | @apply_defaults 22 | def __init__( 23 | self, 24 | pg_query: str, 25 | local_path: str, 26 | postgres_conn_id: Optional[str] = None, 27 | **kwargs 28 | ): 29 | super().__init__(**kwargs) 30 | self._pg_query = pg_query 31 | self._local_path = local_path 32 | self._postgres_conn_id = postgres_conn_id 33 | 34 | def execute(self, context): 35 | postgres_hook = PostgresHook(postgres_conn_id=self._postgres_conn_id) 36 | conn = postgres_hook.get_conn() 37 | cursor = conn.cursor(cursor_factory=RealDictCursor) 38 | cursor.execute(self._pg_query) 39 | 40 | with open(self._local_path, "w") as f: 41 | json.dump(cursor.fetchall(), f, indent=4) 42 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pytest 4 | from airflow.models import DAG 5 | 6 | pytest_plugins = ["helpers_namespace"] 7 | 8 | 9 | @pytest.fixture 10 | def test_dag(): 11 | """Airflow DAG for testing.""" 12 | return DAG( 13 | "test_dag", start_date=datetime.datetime(2020, 1, 1), schedule_interval=datetime.timedelta(days=1) 14 | ) 15 | 16 | 17 | @pytest.helpers.register 18 | def run_task(task, dag): 19 | """Run an Airflow task.""" 20 | dag.clear() 21 | task.run(start_date=dag.start_date, end_date=dag.start_date) 22 | -------------------------------------------------------------------------------- /tests/dags/test_dag_integrity.py: -------------------------------------------------------------------------------- 1 | """Test the validity of all DAGs.""" 2 | import glob 3 | from os import path 4 | 5 | import pytest 6 | from airflow import models as airflow_models 7 | 8 | DAG_PATHS = glob.glob(path.join(path.dirname(__file__), "..", "..", "dags", "*.py")) 9 | 10 | 11 | @pytest.mark.parametrize("dag_path", DAG_PATHS) 12 | def test_dag_integrity(dag_path): 13 | """Import DAG files and check for a valid DAG instance.""" 14 | dag_name = path.basename(dag_path) 15 | module = _import_file(dag_name, dag_path) 16 | 17 | # Validate if there is at least 1 DAG object in the file 18 | dag_objects = [var for var in vars(module).values() if isinstance(var, airflow_models.DAG)] 19 | assert dag_objects 20 | 21 | # For every DAG object, test for cycles 22 | for dag in dag_objects: 23 | dag.test_cycle() 24 | 25 | 26 | def _import_file(module_name, module_path): 27 | import importlib.util 28 | 29 | spec = importlib.util.spec_from_file_location(module_name, str(module_path)) 30 | module = importlib.util.module_from_spec(spec) 31 | spec.loader.exec_module(module) 32 | return module 33 | -------------------------------------------------------------------------------- /tests/test_bash_operator.py: -------------------------------------------------------------------------------- 1 | from airflow.operators.bash_operator import BashOperator 2 | 3 | 4 | def test_bash_operator(): 5 | test = BashOperator(task_id="test", bash_command="echo testme", xcom_push=True) 6 | result = test.execute(context={}) 7 | assert result == "testme" 8 | -------------------------------------------------------------------------------- /tests/test_fixture_example.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture 5 | def a(): 6 | return 1 7 | 8 | 9 | @pytest.fixture 10 | def b(): 11 | return 2 12 | 13 | 14 | @pytest.fixture 15 | def c(): 16 | return 3 17 | 18 | 19 | def test_sum_ab(a, b): 20 | assert sum([a, b]) == 3 21 | 22 | 23 | def test_sum_ac(a, c): 24 | assert sum([a, c]) == 4 25 | -------------------------------------------------------------------------------- /tests/test_fixture_scope_example.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture(scope="module") 5 | def a(): 6 | return [1] 7 | 8 | 9 | @pytest.fixture 10 | def b(): 11 | return [2] 12 | 13 | 14 | class TestBla: 15 | def test_something(self, a): 16 | a.append(1) 17 | assert sum(a) == 2 18 | 19 | @pytest.mark.xfail( 20 | reason="This is expected to fail when run via the class TestBla, " 21 | "because the value of a is changed in test_something()." 22 | ) 23 | def test_something_ab(self, a, b): 24 | assert sum(a + b) == 3 25 | -------------------------------------------------------------------------------- /tests/test_full_context.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from airflow.operators.python_operator import PythonOperator 3 | 4 | 5 | def test_full_context(test_dag, tmpdir): 6 | def do_magic(**context): 7 | with open(tmpdir / "test.txt", "w") as f: 8 | f.write(context["ds"]) 9 | 10 | task = PythonOperator(task_id="test", python_callable=do_magic, provide_context=True, dag=test_dag) 11 | pytest.helpers.run_task(task=task, dag=test_dag) 12 | 13 | with open(tmpdir / "test.txt", "r") as f: 14 | assert f.readlines()[0] == test_dag.start_date.strftime("%Y-%m-%d") 15 | -------------------------------------------------------------------------------- /tests/test_python_operator.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from airflow.operators.python_operator import PythonOperator 4 | 5 | 6 | def test_python_operator(): 7 | test = PythonOperator(task_id="test", python_callable=lambda: "testme") 8 | result = test.execute(context={}) 9 | assert result == "testme" 10 | 11 | 12 | def next_week(**context): 13 | return context["execution_date"] + datetime.timedelta(days=7) 14 | 15 | 16 | def test_python_operator_with_context(): 17 | test = PythonOperator(task_id="test", python_callable=next_week, provide_context=True) 18 | testdate = datetime.datetime(2020, 1, 1) 19 | result = test.execute(context={"execution_date": testdate}) 20 | assert result == testdate + datetime.timedelta(days=7) 21 | -------------------------------------------------------------------------------- /tests/test_simple_http_operator.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import pytest 4 | from airflow.hooks.base_hook import BaseHook 5 | from airflow.models import Connection 6 | from airflow.operators.http_operator import SimpleHttpOperator, HttpHook 7 | 8 | 9 | def test_simple_http_operator(test_dag, mocker): 10 | mocker.patch.object( 11 | BaseHook, "get_connection", return_value=Connection(schema="https", host="api.sunrise-sunset.org") 12 | ) 13 | 14 | def _check_light(sunset_sunrise_response): 15 | results = sunset_sunrise_response.json()["results"] 16 | sunrise = datetime.strptime(results["sunrise"][:-6], "%Y-%m-%dT%H:%M:%S") 17 | sunset = datetime.strptime(results["sunset"][:-6], "%Y-%m-%dT%H:%M:%S") 18 | 19 | if sunrise < datetime.utcnow() < sunset: 20 | print("It is light!") 21 | else: 22 | print("It is dark!") 23 | 24 | return True 25 | 26 | is_it_light = SimpleHttpOperator( 27 | task_id="is_it_light", 28 | http_conn_id="my_http_conn", 29 | endpoint="json", 30 | method="GET", 31 | data={"lat": "52.370216", "lng": "4.895168", "formatted": "0"}, 32 | response_check=_check_light, 33 | dag=test_dag, 34 | ) 35 | 36 | pytest.helpers.run_task(task=is_it_light, dag=test_dag) 37 | 38 | 39 | def test_simple_http_operator_no_external_call(test_dag, mocker): 40 | mocker.patch.object( 41 | BaseHook, "get_connection", return_value=Connection(schema="https", host="api.sunrise-sunset.org") 42 | ) 43 | mock_run = mocker.patch.object(HttpHook, "run") 44 | 45 | is_it_light = SimpleHttpOperator( 46 | task_id="is_it_light", 47 | http_conn_id="my_http_conn", 48 | endpoint="json", 49 | method="GET", 50 | data={"lat": "52.370216", "lng": "4.895168", "date": "{{ ds }}", "formatted": "0"}, 51 | dag=test_dag, 52 | ) 53 | 54 | pytest.helpers.run_task(task=is_it_light, dag=test_dag) 55 | mock_run.assert_called_once() 56 | assert mock_run.call_args_list[0][0][1] == { 57 | "lat": "52.370216", 58 | "lng": "4.895168", 59 | "date": test_dag.start_date.strftime("%Y-%m-%d"), 60 | "formatted": "0", 61 | } 62 | -------------------------------------------------------------------------------- /tests/test_tmpdir.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from airflow.operators.bash_operator import BashOperator 3 | 4 | 5 | def test_writing_to_disk(tmpdir): 6 | tmpfile = tmpdir.join("hello.txt") 7 | 8 | task = BashOperator(task_id="test", bash_command=f"echo 'hello' > {tmpfile}") 9 | task.execute(context={}) 10 | 11 | assert len(tmpdir.listdir()) == 1 12 | assert tmpfile.read().replace("\n", "") == "hello" 13 | 14 | 15 | def test_bash_operator_tmpdir(test_dag, tmpdir): 16 | tmpfile = tmpdir.join("hello.txt") 17 | 18 | task = BashOperator(task_id="test", bash_command=f"echo 'hello' > {tmpfile}", dag=test_dag) 19 | pytest.helpers.run_task(task=task, dag=test_dag) 20 | 21 | assert len(tmpdir.listdir()) == 1 22 | assert tmpfile.read().replace("\n", "") == "hello" 23 | -------------------------------------------------------------------------------- /tests/testing_examples/operators/postgres-init.sql: -------------------------------------------------------------------------------- 1 | SET search_path TO public; 2 | CREATE TABLE dummy ( 3 | id integer, 4 | name character varying(255) 5 | ); 6 | INSERT INTO dummy (id,name) VALUES (1, 'dummy1'); 7 | INSERT INTO dummy (id,name) VALUES (2, 'dummy2'); 8 | INSERT INTO dummy (id,name) VALUES (3, 'dummy3'); 9 | -------------------------------------------------------------------------------- /tests/testing_examples/operators/test_postgres_to_local_operator.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import namedtuple 3 | from os import path 4 | from pathlib import Path 5 | 6 | import pytest 7 | from airflow.hooks.postgres_hook import PostgresHook 8 | from airflow.models import Connection 9 | from pytest_docker_tools import container, fetch 10 | 11 | from testing_examples.operators.postgres_to_local_operator import PostgresToLocalOperator 12 | 13 | 14 | @pytest.fixture(scope="module") 15 | def postgres_credentials(): 16 | """Namedtuple containing postgres credentials to define only once.""" 17 | PostgresCredentials = namedtuple("PostgresCredentials", ["username", "password"]) 18 | return PostgresCredentials("testuser", "testpass") 19 | 20 | 21 | postgres_image = fetch(repository="postgres:11.1-alpine") 22 | 23 | postgres = container( 24 | image="{postgres_image.id}", 25 | environment={ 26 | "POSTGRES_USER": "{postgres_credentials.username}", 27 | "POSTGRES_PASSWORD": "{postgres_credentials.password}", 28 | }, 29 | ports={"5432/tcp": None}, 30 | volumes={ 31 | path.join(path.dirname(__file__), "postgres-init.sql"): { 32 | "bind": "/docker-entrypoint-initdb.d/postgres-init.sql" 33 | } 34 | }, 35 | ) 36 | 37 | 38 | def test_postgres_to_local_operator(test_dag, mocker, tmpdir, postgres, postgres_credentials): 39 | mocker.patch.object( 40 | PostgresHook, 41 | "get_connection", 42 | return_value=Connection( 43 | host="localhost", 44 | conn_type="postgres", 45 | login=postgres_credentials.username, 46 | password=postgres_credentials.password, 47 | port=postgres.ports["5432/tcp"][0], 48 | ), 49 | ) 50 | 51 | output_path = str(tmpdir / "pg_dump") 52 | task = PostgresToLocalOperator( 53 | task_id="test", 54 | postgres_conn_id="postgres", 55 | pg_query="SELECT * FROM dummy", 56 | local_path=output_path, 57 | dag=test_dag, 58 | ) 59 | pytest.helpers.run_task(task=task, dag=test_dag) 60 | 61 | # Assert if output file exists 62 | output_file = Path(output_path) 63 | assert output_file.is_file() 64 | 65 | # Assert file contents, should be the same as in postgres-init.sql 66 | expected = [{"id": 1, "name": "dummy1"}, {"id": 2, "name": "dummy2"}, {"id": 3, "name": "dummy3"}] 67 | with open(output_file, "r") as f: 68 | assert json.load(f) == expected 69 | --------------------------------------------------------------------------------