├── .github └── workflows │ ├── pythonapp.yml │ └── pythonpublish.yml ├── .gitignore ├── LICENCE ├── README.md ├── aircal ├── __init__.py ├── dao │ ├── airflow.py │ └── gcal.py ├── events.py └── export.py ├── example.py ├── setup.py └── tests ├── event_test.py └── export_test.py /.github/workflows/pythonapp.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 3.8 20 | uses: actions/setup-python@v1 21 | with: 22 | python-version: 3.8 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | python -m pip install . 27 | - name: Lint with flake8 28 | run: | 29 | pip install flake8 30 | # stop the build if there are Python syntax errors or undefined names 31 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 32 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 33 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 34 | - name: Test with pytest 35 | run: | 36 | pip install pytest 37 | pytest 38 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: '3.7' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | /dist/ 3 | /*.egg-info 4 | /*.egg 5 | .vscode 6 | notebooks 7 | 8 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Domen Pogacnik 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Update: The library is not actively maintained. In case of issues, you might be on your own. However, you're free to open a PR :) 2 | 3 | # Aircal 4 | 5 | This library helps visualizing Airflow's DAG run schedule. It does that by exporting future DAG runs as events to Google Calendar. 6 | 7 | Having DAG run as events in the calendar might help you: 8 | - visualize the utilization of your airflow workers to better spread your jobs 9 | - determine when a certain DAG should be finished to monitor the service. 10 | 11 | ![DAG run visualization](https://user-images.githubusercontent.com/6691247/80535228-c28cc700-89a0-11ea-8cdc-0050a3c91298.png) 12 | 13 | The library will also observe the changes to your DAGs and synchronize it with the calendar: 14 | - add runs for the freshly added DAGs 15 | - change start and/or end time when an existing DAG changes the schedule (or the execution time changes significantly) 16 | - delete run events when a DAG is removed (or paused) 17 | 18 | Tip: run the sync script regularly, perhaps, with you know, Airflow :) 19 | 20 | The library only support DAG schedules that use the standard cron syntax. The rest will be ignored (with a warning). 21 | 22 | **Warning: This is an beta stage software. Expect occassional bugs and rough edges (PR welcome).** 23 | 24 | ## Installation & setup 25 | 26 | ``` 27 | pip install aircal 28 | ``` 29 | 30 | Alternatively you can clone the repo and install it from there: 31 | 32 | ``` 33 | pip install -e . 34 | ``` 35 | 36 | Google API credentials are required to create events in the calendar. You can obtain them [here](https://console.developers.google.com/apis/credentials). Store `credentials.json` into a directory accessible by your code. 37 | 38 | **The library is modifying and deleting calendar events. I highly recommend creating a new calendar to be used by this software:** "add calendar" -> "create new calendar" in Google calendar settings. 39 | 40 | ## Usage 41 | 42 | See `example.py` for an example of the potential pipeline that can be run on the regular intervals. 43 | -------------------------------------------------------------------------------- /aircal/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/domenp/aircal/612e690a6f01463215f5de80c4fd3f4ffe07e3f4/aircal/__init__.py -------------------------------------------------------------------------------- /aircal/dao/airflow.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | class AirflowDb(object): 5 | 6 | def __init__(self, engine): 7 | self.engine = engine 8 | 9 | def load_dag_metadata(self): 10 | return pd.read_sql('SELECT dag_id, schedule_interval FROM dag WHERE is_paused = \'0\'', self.engine) 11 | 12 | def load_dag_run_metadata(self): 13 | df_dr = pd.read_sql('SELECT dag_id, start_date, end_date FROM dag_run', self.engine) 14 | return df_dr 15 | -------------------------------------------------------------------------------- /aircal/dao/gcal.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pickle 3 | from pathlib import Path 4 | from googleapiclient.discovery import build 5 | from google_auth_oauthlib.flow import InstalledAppFlow 6 | from google.auth.transport.requests import Request 7 | from googleapiclient.errors import HttpError 8 | from aircal.export import INSERT_ACTION, UPDATE_ACTION, DELETE_ACTION 9 | 10 | 11 | SCOPES = ['https://www.googleapis.com/auth/calendar'] 12 | TITLE_PREFIX = 'DAG:' 13 | 14 | 15 | class GCalClient(object): 16 | 17 | def __init__(self, calendar_id, creds_dir, logger, max_results=2000): 18 | creds = self._auth(creds_dir) 19 | self.calendar_id = calendar_id 20 | self.service = build('calendar', 'v3', credentials=creds) 21 | self.max_results = max_results 22 | self.logger = logger 23 | 24 | def _auth(self, creds_dir): 25 | creds = None 26 | token_path = creds_dir / 'token.pickle' 27 | creds_path = creds_dir / 'credentials.json' 28 | 29 | # The file token.pickle stores the user's access and refresh tokens, and is 30 | # created automatically when the authorization flow completes for the first 31 | # time. 32 | if token_path.exists(): 33 | with open(token_path, 'rb') as token: 34 | creds = pickle.load(token) 35 | 36 | # If there are no (valid) credentials available, let the user log in. 37 | if not creds or not creds.valid: 38 | if creds and creds.expired and creds.refresh_token: 39 | creds.refresh(Request()) 40 | else: 41 | flow = InstalledAppFlow.from_client_secrets_file(creds_path, SCOPES) 42 | creds = flow.run_local_server(port=0) 43 | with open(token_path, 'wb') as token: 44 | pickle.dump(creds, token) 45 | 46 | return creds 47 | 48 | def create_event(self, dag_id, start_date, end_date): 49 | event = { 50 | 'summary': 'DAG: %s' % dag_id, 51 | 'start': { 52 | 'dateTime': start_date.strftime('%Y-%m-%dT%H:%M:0'), 53 | 'timeZone': 'Etc/UTC', 54 | }, 55 | 'end': { 56 | 'dateTime': end_date.strftime('%Y-%m-%dT%H:%M:0'), 57 | 'timeZone': 'Etc/UTC', 58 | } 59 | } 60 | event = self.service.events().insert(calendarId=self.calendar_id, body=event).execute() 61 | return event['status'] 62 | 63 | def delete_event(self, event_id): 64 | self.service.events().delete(calendarId=self.calendar_id, eventId=event_id).execute() 65 | return 'deleted' 66 | 67 | def update_event(self, event_id, dag_id, start_date, end_date): 68 | event = { 69 | 'summary': 'DAG: %s' % dag_id, 70 | 'start': { 71 | 'dateTime': start_date.strftime('%Y-%m-%dT%H:%M:0'), 72 | 'timeZone': 'Etc/UTC', 73 | }, 74 | 'end': { 75 | 'dateTime': end_date.strftime('%Y-%m-%dT%H:%M:0'), 76 | 'timeZone': 'Etc/UTC', 77 | } 78 | } 79 | event = self.service.events().update(calendarId=self.calendar_id, eventId=event_id, body=event).execute() 80 | return event['status'] 81 | 82 | def do_sync(self, v): 83 | for i in range(3): 84 | try: 85 | if v.action == INSERT_ACTION: 86 | self.create_event(v.dag_id, v.start_date, v.end_date) 87 | elif v.action == DELETE_ACTION: 88 | self.delete_event(v.event_id) 89 | elif v.action == UPDATE_ACTION: 90 | self.update_event(v.event_id, v.dag_id, v.start_date, v.end_date) 91 | else: 92 | raise Exception('action not supported') 93 | except HttpError as ex: 94 | print(ex) 95 | self.logger.error('HTTP exception occured, retrying') 96 | time.sleep(10**(i+1)) 97 | else: 98 | return 0 99 | return 1 100 | 101 | def get_events(self, base_date): 102 | events_result = self.service.events().list( 103 | calendarId=self.calendar_id, maxResults=self.max_results, 104 | timeMin=base_date, singleEvents=True, orderBy='startTime' 105 | ).execute() 106 | events = events_result.get('items', []) 107 | if len(events) == self.max_results: 108 | raise Exception(( 109 | '# of retrieved events equals to max results. Some events might be ignored. ' 110 | 'Consider increasing max_results parameter or decrease n_horizon_days.')) 111 | elig_events = [v for v in events if v.get('summary', '').startswith(TITLE_PREFIX)] 112 | for event in elig_events: 113 | event['summary'] = event['summary'].replace(TITLE_PREFIX, '').strip() 114 | return elig_events 115 | -------------------------------------------------------------------------------- /aircal/events.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from datetime import datetime, timedelta 4 | from croniter import croniter, CroniterBadCronError 5 | 6 | 7 | class DagRunEventsExtractor(object): 8 | 9 | def __init__(self, airflow_db, base_date=datetime.now(), n_last_runs=5): 10 | self.airflow_db = airflow_db 11 | self.base_date = base_date 12 | self.n_last_runs = n_last_runs 13 | 14 | def _estimate_dag_exec_time(self): 15 | """ 16 | Estimate execution time of the future DAG run. 17 | 18 | Takes an average of last 5 runs by default. 19 | """ 20 | df_dag_run = self.airflow_db.load_dag_run_metadata() 21 | df_dag_run.start_date = pd.to_datetime(df_dag_run.start_date) 22 | df_dag_run.end_date = pd.to_datetime(df_dag_run.end_date) 23 | df_dag_run['exec_time'] = df_dag_run.end_date - df_dag_run.start_date 24 | 25 | df_et = df_dag_run.groupby('dag_id').\ 26 | apply(lambda x: x.nlargest(self.n_last_runs, 'start_date')).reset_index(drop=True) 27 | df_met = df_et.groupby('dag_id')['exec_time'].agg(['sum', 'size']) 28 | df_met['mean_exec_time'] = df_met['sum'] / df_met['size'] 29 | return df_met.drop(columns=['sum', 'size']) 30 | 31 | 32 | def _next_dag_runs(self, pattern, end_date): 33 | """ 34 | Returns future DAG runs starting from the base date and to the end date. 35 | 36 | If the cron pattern is not recognized an empty list is returned. 37 | """ 38 | if not pattern: 39 | return [] 40 | 41 | pattern = pattern.strip('"') 42 | try: 43 | c = croniter(pattern, self.base_date) 44 | except CroniterBadCronError: 45 | return [] 46 | 47 | dates = [] 48 | while True: 49 | next_date = c.get_next(datetime) 50 | if next_date > end_date: 51 | break 52 | dates.append(next_date) 53 | return dates 54 | 55 | def get_future_dag_runs(self, n_horizon_days=30): 56 | """Returns data frame containing all upcoming (relative to the base date) DAG runs. 57 | 58 | Parameters 59 | ---------- 60 | n_horizon_days : int 61 | 62 | Returns 63 | ------- 64 | pandas.DataFrame 65 | Data frame containing columns: dag_id, start_date, end_date. 66 | """ 67 | df_dag = self.airflow_db.load_dag_metadata() 68 | df_exec_time = self._estimate_dag_exec_time() 69 | df = df_dag.merge(df_exec_time, on='dag_id', how='left') 70 | 71 | # generate events from start time to the end of horizon 72 | date_end = self.base_date + timedelta(n_horizon_days) 73 | df['next_runs'] = df.apply(lambda v: self._next_dag_runs(v.schedule_interval, date_end), axis=1) 74 | 75 | # skip entries with no next runs 76 | # TODO: warn if no runs are scheduled 77 | df_ed = df[df.next_runs.str.len() != 0] 78 | 79 | df_events = df_ed.explode('next_runs') 80 | 81 | # if there's no execution time estimate default to one minute 82 | df_events.mean_exec_time = df_events.mean_exec_time.fillna(timedelta(minutes=1)) 83 | 84 | df_events['start_date'] = df_events.next_runs 85 | df_events['end_date'] = df_events.next_runs + df_events.mean_exec_time 86 | 87 | return df_events.drop(columns=['next_runs']) 88 | -------------------------------------------------------------------------------- /aircal/export.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from datetime import datetime 4 | 5 | 6 | INSERT_ACTION = 'insert' 7 | UPDATE_ACTION = 'update' 8 | DELETE_ACTION = 'delete' 9 | 10 | 11 | class GCalExporter(object): 12 | 13 | def __init__(self, gcal): 14 | self.gcal = gcal 15 | 16 | def _get_gcal_events(self): 17 | now = datetime.utcnow().isoformat() + 'Z' 18 | events = self.gcal.get_events(now) 19 | df_gcal = pd.DataFrame(data={ 20 | 'dag_id': [v['summary'] for v in events], 21 | 'start_date': [v['start']['dateTime'] for v in events], 22 | 'end_date': [v['end']['dateTime'] for v in events], 23 | 'event_id': [v['id'] for v in events], 24 | 'source': 'gcal' 25 | }) 26 | 27 | if df_gcal.empty: 28 | return df_gcal 29 | 30 | df_gcal.start_date = pd.to_datetime(df_gcal.start_date).dt.tz_convert('UTC').dt.tz_localize(None) 31 | df_gcal.end_date = pd.to_datetime(df_gcal.end_date).dt.tz_convert('UTC').dt.tz_localize(None) 32 | 33 | return df_gcal 34 | 35 | def _determine_overlap(self, df_elig_events, df_gcal): 36 | df_new = df_elig_events.copy() 37 | df_new = df_new.set_index([df_new.dag_id, df_new.start_date]) 38 | df_new = df_new[['end_date']].rename(columns={'end_date': 'end_date_events'}) 39 | 40 | df_cur = df_gcal.copy() 41 | df_cur = df_cur.set_index([df_cur.dag_id, df_cur.start_date]) 42 | df_cur = df_cur[['end_date', 'event_id']].rename(columns={'end_date': 'end_date_gcal'}) 43 | 44 | return pd.concat([df_new, df_cur], axis=1).reset_index() 45 | 46 | def mark_for_sync(self, df_events, exec_time_diff_tol_seconds=360): 47 | 48 | df_elig_events = df_events.copy() 49 | 50 | df_gcal = self._get_gcal_events() 51 | df_ov = self._determine_overlap(df_elig_events, df_gcal) 52 | 53 | df_to_insert = df_ov[df_ov.end_date_gcal.isna()].copy() 54 | df_to_insert['action'] = INSERT_ACTION 55 | 56 | df_to_delete = df_ov[df_ov.end_date_events.isna()].copy() 57 | df_to_delete['action'] = DELETE_ACTION 58 | 59 | dfs = [df_to_insert, df_to_delete] 60 | 61 | if df_ov[~df_ov.end_date_events.isna()].shape[0] and df_ov[~df_ov.end_date_gcal.isna()].shape[0]: 62 | df_to_update = df_ov[(df_ov.end_date_events - df_ov.end_date_gcal).dt.seconds > exec_time_diff_tol_seconds].copy() 63 | df_to_update['action'] = UPDATE_ACTION 64 | dfs.append(df_to_update) 65 | 66 | df_to_sync = pd.concat(dfs) 67 | 68 | if df_to_sync.empty: 69 | return df_to_sync 70 | 71 | df_to_sync['end_date'] = df_to_sync.apply( 72 | lambda x: x.end_date_events if x.end_date_events else x.end_date_gcal, axis=1) 73 | 74 | return df_to_sync.drop(columns=['end_date_events', 'end_date_gcal']) 75 | 76 | def sync(self, df_to_sync): 77 | if df_to_sync.empty: 78 | return df_to_sync 79 | 80 | df_to_sync['error'] = df_to_sync.apply(self.gcal.do_sync, axis=1) 81 | return df_to_sync -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import logging 5 | from pathlib import Path 6 | from datetime import timedelta 7 | from sqlalchemy import create_engine 8 | from aircal.events import DagRunEventsExtractor 9 | from aircal.dao.airflow import AirflowDb 10 | from aircal.dao.gcal import GCalClient 11 | from aircal.export import GCalExporter 12 | 13 | 14 | logger = logging.getLogger('aircal') 15 | logger.setLevel(logging.INFO) 16 | cli_handler = logging.StreamHandler() 17 | cli_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 18 | cli_handler.setFormatter(cli_format) 19 | logger.addHandler(cli_handler) 20 | 21 | 22 | def do_continue(df_events): 23 | if df_events.shape[0] <= 500: 24 | return 25 | logger.info('# events to manage is high: %d' % df_events.shape[0]) 26 | logger.info('You might want to consider filtering them to reduce clutter.') 27 | yn = input('Are you sure you want to export all of them (y/n): ') 28 | if yn != 'y': 29 | logger.info('Too many events, exiting.') 30 | sys.exit(1) 31 | 32 | 33 | def main(args): 34 | logger.info('Extracting dag run events.') 35 | airflow_db = AirflowDb(create_engine(args.sqlalchemy_conn_string)) 36 | extractor = DagRunEventsExtractor(airflow_db, n_last_runs=args.n_last_runs) 37 | 38 | # extract all future DAG runs as calendar events 39 | df_events = extractor.get_future_dag_runs(n_horizon_days=args.n_horizon_days) 40 | # filter out the ones that are of no interest of you 41 | # in this case we only keep the ones that are running more than x minutes 42 | df_events = df_events[df_events.mean_exec_time > timedelta(minutes=args.min_dag_exec_time)] 43 | 44 | do_continue(df_events) 45 | 46 | logger.info('Syncing to GCal.') 47 | gcal_client = GCalClient(calendar_id=args.calendar_id, creds_dir=Path(args.creds_path), logger=logger) 48 | exporter = GCalExporter(gcal_client) 49 | # identify the DAG runs that needs sync (insert, update, delete) 50 | df_to_sync = exporter.mark_for_sync(df_events) 51 | 52 | logger.info('# DAG runs need sync: %d' % df_to_sync.shape[0]) 53 | df_updated = exporter.sync(df_to_sync) 54 | 55 | if not df_updated.empty: 56 | # save the data frame for inspection 57 | df_updated.to_csv('event_ops.csv', index=False) 58 | logger.info('%d events synced.' % df_updated.shape[0]) 59 | 60 | 61 | if __name__ == '__main__': 62 | 63 | parser = argparse.ArgumentParser() 64 | parser.add_argument('--sqlalchemy-conn-string', required=True, 65 | help='sql connection needed to read dag and dag_run tables in airflow DB') 66 | parser.add_argument('--calendar-id', required=True, 67 | help='calendar where the DAG run events will be created') 68 | parser.add_argument('--creds-path', default=os.getcwd(), 69 | help='place to store credentials.json; also used to store a token received from the Google API') 70 | parser.add_argument('--n-horizon-days', type=int, default=10, 71 | help='how many days in advance we want the events to be created') 72 | parser.add_argument('--n-last-runs', type=int, default=5, 73 | help='number of recent DAG runs to estimate its execution time') 74 | parser.add_argument('--min-dag-exec-time', type=int, default=0, 75 | help='min execution time of a DAG to export to the calendar') 76 | args = parser.parse_args() 77 | 78 | main(args) 79 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from setuptools import setup, find_packages 3 | 4 | BASE_DIR = pathlib.Path(__file__).parent 5 | README = (BASE_DIR / 'README.md').read_text() 6 | 7 | 8 | setup( 9 | name='aircal', 10 | version='0.1.2', 11 | description='Export and visualize Airflow DAG runs as events in Google calendar.', 12 | long_description=README, 13 | long_description_content_type='text/markdown', 14 | url='http://github.com/domenp/aircal', 15 | author='Domen Pogacnik', 16 | license='MIT', 17 | packages=find_packages(), 18 | install_requires=[ 19 | 'numpy', 20 | 'pandas', 21 | 'croniter', 22 | 'google-api-python-client', 23 | 'google-auth-httplib2', 24 | 'google-auth-oauthlib', 25 | 'sqlalchemy' 26 | ], 27 | tests_require=['pytest'], 28 | classifiers=[ 29 | 'Programming Language :: Python :: 3', 30 | 'Development Status :: 4 - Beta', 31 | ], 32 | zip_safe=False) 33 | -------------------------------------------------------------------------------- /tests/event_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | from datetime import timedelta 4 | from aircal.events import DagRunEventsExtractor 5 | 6 | 7 | DEF_NUM_EVENTS = 250 8 | 9 | 10 | class AirflowDbDefaultMock: 11 | 12 | def load_dag_metadata(self): 13 | return pd.DataFrame(data={ 14 | 'dag_id': ['test_dag', 'foobar'], 15 | 'schedule_interval': ['5 10 * * *', '5 * * * *'] 16 | }) 17 | 18 | def load_dag_run_metadata(self): 19 | return pd.DataFrame(data={ 20 | 'dag_id': ['test_dag', 'foobar'], 21 | 'start_date': ['2020-04-5 14:44:03.5', '2020-04-5 18:44:03.5'], 22 | 'end_date': ['2020-04-5 15:44:03.5', '2020-04-5 20:44:03.5'] 23 | }) 24 | 25 | 26 | def get_events(): 27 | airflow_db = AirflowDbDefaultMock() 28 | extractor = DagRunEventsExtractor(airflow_db) 29 | return extractor.get_future_dag_runs(n_horizon_days=10) 30 | 31 | 32 | def test_all_events_present(): 33 | df_events = get_events() 34 | assert df_events.shape[0] == DEF_NUM_EVENTS 35 | 36 | 37 | def test_all_essential_columns_not_na(): 38 | df_events = get_events() 39 | assert df_events[~df_events[ 40 | ['dag_id', 'schedule_interval', 'mean_exec_time', 'start_date', 'end_date'] 41 | ].isna()].shape[0] == DEF_NUM_EVENTS 42 | 43 | 44 | def test_mean_exec_time_est(): 45 | df_events = get_events() 46 | assert all(df_events[df_events.dag_id == 'test_dag'].mean_exec_time == timedelta(hours=1)) 47 | -------------------------------------------------------------------------------- /tests/export_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | from datetime import datetime 4 | from aircal.export import GCalExporter, INSERT_ACTION, DELETE_ACTION, UPDATE_ACTION 5 | 6 | 7 | NUM_EVENTS_TO_SYNC = 2 8 | 9 | class GCalClientMock: 10 | 11 | def create_event(self, dag_id, start_date, end_date): 12 | return 'confirmed' 13 | 14 | def delete_event(self, event_id): 15 | return 'deleted' 16 | 17 | def update_event(self, event_id, dag_id, start_date, end_date): 18 | return 'confirmed' 19 | 20 | def do_sync(self, v): 21 | if v.action == INSERT_ACTION: 22 | self.create_event(v.dag_id, v.start_date, v.end_date) 23 | elif v.action == DELETE_ACTION: 24 | self.delete_event(v.event_id) 25 | elif v.action == UPDATE_ACTION: 26 | self.update_event(v.event_id, v.dag_id, v.start_date, v.end_date) 27 | else: 28 | raise Exception('action not supported') 29 | return 0 30 | 31 | def get_events(self, base_date): 32 | elig_events = [{ 33 | 'id': '1', 34 | 'summary': 'foo', 35 | 'start': {'dateTime': '2020-04-20T08:20:00Z'}, 36 | 'end': {'dateTime': '2020-04-20T08:22:00Z'} 37 | }] 38 | return elig_events 39 | 40 | 41 | def test_sync(): 42 | df_events = pd.DataFrame(data={ 43 | 'dag_id': ['foo', 'bar', 'baz'], 44 | 'start_date': [datetime(2020, 4, 20, 8, 20), datetime(2020, 4, 20, 8, 21), datetime(2020, 4, 20, 8, 22)], 45 | 'end_date': [datetime(2020, 4, 20, 8, 22), datetime(2020, 4, 20, 10, 20), datetime(2020, 4, 20, 16, 0)] 46 | }) 47 | gcal_client = GCalClientMock() 48 | exporter = GCalExporter(gcal_client) 49 | df_to_sync = exporter.mark_for_sync(df_events) 50 | assert df_to_sync.shape[0] == NUM_EVENTS_TO_SYNC 51 | 52 | df_updated = exporter.sync(df_to_sync) 53 | assert df_updated.shape[0] == NUM_EVENTS_TO_SYNC 54 | --------------------------------------------------------------------------------