├── tests ├── __init__.py ├── utils │ ├── __init__.py │ ├── test_fn_utils.py │ └── test_hook_utils.py ├── airflow_metrics │ ├── __init__.py │ ├── test_patching.py │ ├── test_patch_bq.py │ ├── test_patch_gcs_2_bq.py │ └── test_patch_requests.py └── utility.py ├── airflow_metrics ├── utils │ ├── __init__.py │ ├── event_utils.py │ ├── fn_utils.py │ └── hook_utils.py ├── __init__.py └── airflow_metrics │ ├── patch_stats.py │ ├── patch_tasks.py │ ├── __init__.py │ ├── patch_thread.py │ ├── patch_bq.py │ ├── patch_gcs_2_bq.py │ ├── datadog_logger.py │ └── patch_requests.py ├── requirements.txt ├── requirements-dev.txt ├── .travis.yml ├── setup.py ├── .gitignore ├── README.md ├── LICENSE └── .pylintrc /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /airflow_metrics/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/airflow_metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | datadog>=0.29.3 2 | -------------------------------------------------------------------------------- /tests/utility.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | 4 | def mockfn(func): 5 | mock = MagicMock(side_effect=func) 6 | mock.__name__ = func.__name__ 7 | return mock 8 | -------------------------------------------------------------------------------- /airflow_metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from airflow.plugins_manager import AirflowPlugin 2 | 3 | from airflow_metrics.airflow_metrics import patch 4 | 5 | 6 | patch() 7 | 8 | 9 | class AirflowMetricsPlugin(AirflowPlugin): 10 | name = 'airflow_metrics' 11 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | Flask==1.0.3 2 | apache-airflow>=1.10.2, <2.0.0dev 3 | freezegun==0.3.12 4 | google-api-python-client>=1.6.0, <2.0.0dev 5 | Jinja2>=2.10.1 6 | keyring==19.0.2 7 | pandas-gbq 8 | pylint==2.3.1 9 | pytest==4.6.3 10 | twine==1.13.0 11 | werkzeug==0.14.1 12 | -------------------------------------------------------------------------------- /tests/airflow_metrics/test_patching.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | 4 | class TestPatching(TestCase): 5 | def test_patching(self): 6 | ''' 7 | airflow-metrics automatically patches the package upon loading, so this empty test is just 8 | here to make a note that if the tests run and pass, the patching is working properly 9 | ''' 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3.6 3 | 4 | env: 5 | global: 6 | - SLUGIFY_USES_TEXT_UNIDECODE=yes 7 | 8 | install: 9 | - "pip install -r requirements.txt" 10 | - "pip install -r requirements-dev.txt" 11 | 12 | jobs: 13 | include: 14 | - stage: test 15 | before_script: 16 | - "airflow initdb" 17 | - "airflow connections --add --conn_id datadog_default --conn_type HTTP --conn_extr '{\"api_key\": \"\"}'" 18 | script: 19 | - "pytest -v" 20 | - stage: test 21 | script: 22 | - "pylint --disable=missing-docstring airflow_metrics" 23 | - "pylint --disable=missing-docstring --disable=too-few-public-methods --disable=no-self-use --disable=invalid-name tests" 24 | -------------------------------------------------------------------------------- /airflow_metrics/airflow_metrics/patch_stats.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from airflow import settings 4 | from airflow.models import TaskInstance 5 | from datadog import ThreadStats 6 | 7 | from airflow_metrics.airflow_metrics.datadog_logger import DatadogStatsLogger 8 | from airflow_metrics.utils.fn_utils import once 9 | from airflow_metrics.utils.hook_utils import HookManager 10 | 11 | 12 | @once 13 | def patch_stats(): 14 | org_logger = settings.Stats 15 | 16 | if len(sys.argv) > 1 and sys.argv[1] == 'run': 17 | def undo_patch(*args, **kwargs): 18 | logger.stop() 19 | settings.Stats = org_logger 20 | 21 | ti_run_raw_task_manager = HookManager(TaskInstance, '_run_raw_task') 22 | ti_run_raw_task_manager.register_post_hook(undo_patch) 23 | ti_run_raw_task_manager.wrap_method() 24 | 25 | def join(_, self, *args, **kwargs): 26 | self._flush_thread.join() # pylint: disable=protected-access 27 | 28 | threadstats_stop_manager = HookManager(ThreadStats, 'stop') 29 | threadstats_stop_manager.register_post_hook(join) 30 | threadstats_stop_manager.wrap_method() 31 | 32 | logger = DatadogStatsLogger() 33 | logger.start() 34 | settings.Stats = logger 35 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | 4 | setup( 5 | name='airflow-metrics', 6 | version='0.1.5', 7 | author='zylphrex', 8 | author_email='zylphrex@gmail.com', 9 | maintainer='zylphrex', 10 | maintainer_email='zylphrex@gmail.com', 11 | url='https://github.com/getsentry/airflow-metrics', 12 | description='Airflow plugin for automatically sending metrics from Airflow to Datadog', 13 | long_description=open('README.md').read(), 14 | long_description_content_type='text/markdown', 15 | classifiers=[ 16 | 'Development Status :: 3 - Alpha', 17 | 'Environment :: Plugins', 18 | 'License :: OSI Approved :: Apache Software License', 19 | ], 20 | platforms=[ 21 | 'MacOS', 22 | 'Unix', 23 | 'Windows', 24 | ], 25 | keywords=[ 26 | 'airflow', 27 | 'datadog' 28 | 'metrics', 29 | 'plugin', 30 | ], 31 | 32 | packages=find_packages(exclude=['tests', '*.tests', '*.tests.*', 'tests.*']), 33 | entry_points={ 34 | 'airflow.plugins': [ 35 | 'airflow_metrics = airflow_metrics:AirflowMetricsPlugin', 36 | ], 37 | }, 38 | install_requires=open('requirements.txt').read().split('\n'), 39 | ) 40 | -------------------------------------------------------------------------------- /airflow_metrics/airflow_metrics/patch_tasks.py: -------------------------------------------------------------------------------- 1 | from airflow.models import DagRun 2 | from airflow.models import TaskInstance 3 | from airflow.settings import Stats 4 | 5 | from airflow_metrics.utils.event_utils import EventManager 6 | from airflow_metrics.utils.fn_utils import once 7 | 8 | 9 | def dag_duration(target=None, **kwargs): 10 | if target.start_date and target.end_date: 11 | duration = (target.end_date - target.start_date).total_seconds() 12 | tags = { 13 | 'dag': target.dag_id, 14 | } 15 | Stats.timing('dag.duration', duration * 1000, tags=tags) 16 | 17 | 18 | def task_duration(target=None, **kwargs): 19 | if target.duration: 20 | tags = { 21 | 'dag': target.dag_id, 22 | 'task': target.task_id, 23 | 'state': target.state, 24 | 'operator': target.operator, 25 | } 26 | Stats.timing('task.duration', target.duration * 1000, tags=tags) 27 | 28 | 29 | @once 30 | def patch_tasks(): 31 | dag_run_after_update_manager = EventManager(DagRun, 'after_update') 32 | dag_run_after_update_manager.register_callback('end_date', dag_duration) 33 | 34 | task_instance_after_update_manager = EventManager(TaskInstance, 'after_update') 35 | task_instance_after_update_manager.register_callback('duration', task_duration) 36 | -------------------------------------------------------------------------------- /airflow_metrics/utils/event_utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | from airflow.utils.log.logging_mixin import LoggingMixin 4 | from sqlalchemy import event 5 | from sqlalchemy import inspect 6 | 7 | 8 | class EventManager(LoggingMixin): 9 | def __init__(self, cls, event_name): 10 | super().__init__() 11 | self.cls = cls 12 | self.event_name = event_name 13 | self.callbacks = defaultdict(list) 14 | 15 | def listener(mapper, connection, target): 16 | del mapper 17 | del connection 18 | 19 | state = inspect(target) 20 | for attr, callbacks in self.callbacks.items(): 21 | history = state.get_history(attr, True) 22 | if not history.has_changes(): 23 | continue 24 | 25 | for callback in callbacks: 26 | old = None 27 | if history.deleted and history.deleted[0]: 28 | old = history.deleted[0] # not too clear on why this is a list 29 | 30 | new = None 31 | if history.added and history.added[0]: 32 | new = history.added[0] # not too clear on why this is a list 33 | 34 | callback(target=target, new=new, old=old) 35 | event.listens_for(self.cls, self.event_name)(listener) 36 | 37 | 38 | def register_callback(self, state, callback): 39 | self.log.info('registering a callback') 40 | self.callbacks[state].append(callback) 41 | -------------------------------------------------------------------------------- /airflow_metrics/airflow_metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from airflow import configuration as conf 2 | 3 | from airflow_metrics.utils.fn_utils import once 4 | from airflow_metrics.utils.fn_utils import enabled 5 | from airflow_metrics.utils.fn_utils import swallow_error 6 | 7 | 8 | @once 9 | @swallow_error 10 | def patch(): 11 | if not enabled(): 12 | return 13 | 14 | from airflow_metrics.airflow_metrics.patch_stats import patch_stats 15 | patch_stats() 16 | 17 | if enabled('tasks'): 18 | monkey_patch_tasks() 19 | 20 | if enabled('thread'): 21 | monkey_patch_thread() 22 | 23 | if enabled('bq'): 24 | monkey_patch_bq() 25 | 26 | if enabled('gcs_to_bq'): 27 | monkey_patch_gcs_2_bq() 28 | 29 | if enabled('requests'): 30 | monkey_patch_requests() 31 | 32 | 33 | @swallow_error 34 | def monkey_patch_tasks(): 35 | from airflow_metrics.airflow_metrics.patch_tasks import patch_tasks 36 | patch_tasks() 37 | 38 | 39 | @swallow_error 40 | def monkey_patch_thread(): 41 | from airflow_metrics.airflow_metrics.patch_thread import patch_thread 42 | patch_thread() 43 | 44 | 45 | @swallow_error 46 | def monkey_patch_bq(): 47 | from airflow_metrics.airflow_metrics.patch_bq import patch_bq 48 | patch_bq() 49 | 50 | 51 | @swallow_error 52 | def monkey_patch_gcs_2_bq(): 53 | from airflow_metrics.airflow_metrics.patch_gcs_2_bq import patch_gcs_2_bq 54 | patch_gcs_2_bq() 55 | 56 | 57 | @swallow_error 58 | def monkey_patch_requests(): 59 | from airflow_metrics.airflow_metrics.patch_requests import patch_requests 60 | patch_requests() 61 | -------------------------------------------------------------------------------- /airflow_metrics/airflow_metrics/patch_thread.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from datetime import datetime, timedelta 4 | from threading import Thread 5 | from time import sleep 6 | 7 | import sqlalchemy 8 | 9 | from airflow.models import TaskInstance 10 | from airflow.settings import Stats 11 | from airflow.utils.db import provide_session 12 | from pytz import utc 13 | 14 | from airflow_metrics.utils.fn_utils import once 15 | from airflow_metrics.utils.fn_utils import capture_exception 16 | 17 | @provide_session 18 | def task_states(_since, session=None): 19 | states = ( 20 | session.query(TaskInstance.state, sqlalchemy.func.count()) 21 | .group_by(TaskInstance.state) 22 | ) 23 | 24 | for state, count in states: 25 | if state is None: 26 | continue 27 | 28 | tags = { 29 | 'state': state 30 | } 31 | Stats.gauge('task.state', count, tags=tags) 32 | 33 | 34 | @provide_session 35 | def bq_task_states(since, session=None): 36 | states = ( 37 | session.query(TaskInstance.state, sqlalchemy.func.count()) 38 | .filter(TaskInstance.operator == 'BigQueryOperator') 39 | .filter(TaskInstance.end_date > since) 40 | .group_by(TaskInstance.state) 41 | ) 42 | 43 | for state, count in states: 44 | if state is None: 45 | continue 46 | 47 | tags = { 48 | 'state': state 49 | } 50 | Stats.incr('task.state.bq', count, tags=tags) 51 | 52 | 53 | def forever(funcs, sleep_time): 54 | passed = timedelta(seconds=sleep_time) 55 | 56 | def wrapped(): 57 | while True: 58 | for func in funcs: 59 | since = datetime.utcnow() - passed 60 | func(utc.localize(since)) 61 | sleep(sleep_time) 62 | return wrapped 63 | 64 | 65 | @once 66 | def patch_thread(): 67 | try: 68 | if len(sys.argv) > 1 and sys.argv[1] == 'scheduler': 69 | funcs = [ 70 | task_states, 71 | bq_task_states, 72 | ] 73 | thread = Thread(target=forever(funcs, 10)) 74 | thread.daemon = True 75 | thread.start() 76 | except Exception as ex: # pylint: disable=broad-except 77 | capture_exception(ex) 78 | -------------------------------------------------------------------------------- /airflow_metrics/airflow_metrics/patch_bq.py: -------------------------------------------------------------------------------- 1 | from airflow.contrib.operators.bigquery_operator import BigQueryOperator 2 | from airflow.settings import Stats 3 | 4 | from airflow_metrics.utils.fn_utils import once 5 | from airflow_metrics.utils.hook_utils import HookManager 6 | 7 | 8 | @HookManager.success_only 9 | def get_bq_job(ctx, self, *args, **kwargs): 10 | bq_cursor = self.bq_cursor 11 | service = bq_cursor.service 12 | jobs = service.jobs() 13 | job = jobs.get(projectId=bq_cursor.project_id, 14 | jobId=bq_cursor.running_job_id).execute() 15 | ctx['job'] = job 16 | 17 | 18 | @HookManager.success_only 19 | def bq_upserted(ctx, self, *args, **kwargs): 20 | query_stats = ctx['job']['statistics']['query']['queryPlan'] 21 | 22 | all_queries = set() 23 | upstream_queries = set() 24 | 25 | for stat in query_stats: 26 | all_queries.add(stat['id']) 27 | upstream_queries.update(set(stat.get('inputStages', []))) 28 | 29 | final_queries = all_queries - upstream_queries 30 | written = 0 31 | 32 | for stat in query_stats: 33 | if stat['id'] not in final_queries: 34 | continue 35 | 36 | written += int(stat['recordsWritten']) 37 | 38 | tags = { 39 | 'dag': self.dag_id, 40 | 'task': self.task_id, 41 | 'operator': self.__class__.__name__, 42 | } 43 | Stats.gauge('task.upserted.bq', written, tags=tags) 44 | 45 | 46 | @HookManager.success_only 47 | def bq_duration(ctx, self, *args, **kwargs): 48 | stats = ctx['job']['statistics'] 49 | creation = int(stats['creationTime']) 50 | start = int(stats['startTime']) 51 | end = int(stats['endTime']) 52 | 53 | tags = { 54 | 'dag': self.dag_id, 55 | 'task': self.task_id, 56 | 'operator': self.__class__.__name__, 57 | } 58 | Stats.timing('task.delay.bq', start - creation, tags=tags) 59 | Stats.timing('task.duration.bq', end - start, tags=tags) 60 | 61 | 62 | @once 63 | def patch_bq(): 64 | bq_operator_execute_manager = HookManager(BigQueryOperator, 'execute') 65 | bq_operator_execute_manager.register_post_hook(get_bq_job) 66 | bq_operator_execute_manager.register_post_hook(bq_upserted) 67 | bq_operator_execute_manager.register_post_hook(bq_duration) 68 | bq_operator_execute_manager.wrap_method() 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # pipenv 86 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 87 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 88 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 89 | # install all needed dependencies. 90 | #Pipfile.lock 91 | 92 | # celery beat schedule file 93 | celerybeat-schedule 94 | 95 | # SageMath parsed files 96 | *.sage.py 97 | 98 | # Environments 99 | .env 100 | .venv 101 | env/ 102 | venv/ 103 | ENV/ 104 | env.bak/ 105 | venv.bak/ 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | .dmypy.json 120 | dmypy.json 121 | 122 | # Pyre type checker 123 | .pyre/ 124 | -------------------------------------------------------------------------------- /airflow_metrics/utils/fn_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from functools import wraps 4 | 5 | from airflow import configuration as conf 6 | from airflow.exceptions import AirflowConfigException 7 | from airflow.models import BaseOperator 8 | from airflow.utils.log.logging_mixin import LoggingMixin 9 | 10 | 11 | LOG = LoggingMixin().log 12 | 13 | 14 | def capture_exception(ex): 15 | try: 16 | from sentry_sdk import capture_exception as capture # pylint: disable=import-error 17 | capture(ex) 18 | except (ModuleNotFoundError, ImportError): 19 | LOG.warning(str(ex)) 20 | 21 | 22 | def enabled(metric='', default=True): 23 | if metric: 24 | metric = '{}_'.format(metric) 25 | metric = 'airflow_metrics_{}enabled'.format(metric) 26 | try: 27 | return conf.getboolean('airflow_metrics', metric) 28 | except AirflowConfigException: 29 | return default 30 | 31 | 32 | def once(func): 33 | context = { 34 | 'ran': False, 35 | } 36 | 37 | @wraps(func) 38 | def wrapped(*args, **kwargs): 39 | if context['ran']: # turn the second call and onwards into noop 40 | return None 41 | context['ran'] = True 42 | 43 | return func(*args, **kwargs) 44 | 45 | return wrapped 46 | 47 | 48 | def swallow_error(func): 49 | @wraps(func) 50 | def wrapped(*args, **kwargs): 51 | try: 52 | return func(*args, **kwargs) 53 | except Exception as ex: # pylint: disable=broad-except 54 | capture_exception(ex) 55 | return None 56 | return wrapped 57 | 58 | 59 | def get_local_vars(frame_number=0): 60 | try: 61 | frame = sys._getframe(frame_number + 1) # pylint: disable=protected-access 62 | local_vars = frame.f_locals 63 | return local_vars 64 | finally: 65 | try: 66 | del frame 67 | del local_vars 68 | except Exception as ex: # pylint: disable=broad-except 69 | capture_exception(ex) 70 | 71 | def get_calling_operator(max_frames=25): 72 | for i in range(max_frames): 73 | try: 74 | local_vars = get_local_vars(i) 75 | except ValueError: 76 | return None 77 | 78 | self = local_vars.get('self', None) 79 | 80 | if self is None: 81 | continue 82 | 83 | if isinstance(self, BaseOperator): 84 | return self 85 | return None 86 | -------------------------------------------------------------------------------- /airflow_metrics/utils/hook_utils.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | from airflow.utils.log.logging_mixin import LoggingMixin 4 | 5 | from airflow_metrics.utils.fn_utils import swallow_error 6 | 7 | 8 | class HookManager(LoggingMixin): 9 | def __init__(self, cls, method_name): 10 | super().__init__() 11 | self.cls = cls 12 | self.method_name = method_name 13 | 14 | self.pre_hooks = [] 15 | self.post_hooks = [] 16 | 17 | def wrap_method(self): 18 | method = getattr(self.cls, self.method_name) 19 | 20 | @wraps(method) 21 | def wrapped_method(*args, **kwargs): 22 | hook_context = {} 23 | 24 | self.run_pre_hooks(hook_context, *args, **kwargs) 25 | 26 | try: 27 | if 'return' in hook_context: 28 | # in the event that the method fails, ensure 'return' is not a key 29 | del hook_context['return'] 30 | hook_context['return'] = method(*args, **kwargs) 31 | except Exception as ex: 32 | hook_context['success'] = False 33 | raise ex 34 | else: 35 | hook_context['success'] = True 36 | finally: 37 | self.run_post_hooks(hook_context, *args, **kwargs) 38 | 39 | return hook_context['return'] 40 | 41 | setattr(self.cls, self.method_name, wrapped_method) 42 | 43 | @swallow_error 44 | def run_pre_hooks(self, hook_context, *args, **kwargs): 45 | for pre_hook in self.pre_hooks: 46 | pre_hook(hook_context, *args, **kwargs) 47 | 48 | @swallow_error 49 | def run_post_hooks(self, hook_context, *args, **kwargs): 50 | for post_hook in self.post_hooks: 51 | post_hook(hook_context, *args, **kwargs) 52 | 53 | def register_pre_hook(self, pre_hook): 54 | self.log.info('registering a pre-hook: {}'.format(pre_hook.__name__)) 55 | self.pre_hooks.append(pre_hook) 56 | 57 | def register_post_hook(self, post_hook): 58 | self.log.info('registering a post-hook: {}'.format(post_hook.__name__)) 59 | self.post_hooks.append(post_hook) 60 | 61 | @classmethod 62 | def success_only(cls, func): 63 | @wraps(func) 64 | def wrapped(ctx, *args, **kwargs): 65 | if 'success' in ctx and not ctx['success']: 66 | return None 67 | return func(ctx, *args, **kwargs) 68 | return wrapped 69 | -------------------------------------------------------------------------------- /airflow_metrics/airflow_metrics/patch_gcs_2_bq.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | from airflow.contrib.hooks.bigquery_hook import BigQueryConnection 4 | from airflow.contrib.operators.gcs_to_bq import GoogleCloudStorageToBigQueryOperator 5 | from airflow.settings import Stats 6 | 7 | from airflow_metrics.utils.fn_utils import get_calling_operator 8 | from airflow_metrics.utils.fn_utils import once 9 | from airflow_metrics.utils.hook_utils import HookManager 10 | 11 | 12 | @HookManager.success_only 13 | def attach_cursor(ctx, *args, **kwargs): 14 | operator = get_calling_operator() 15 | if isinstance(operator, GoogleCloudStorageToBigQueryOperator): 16 | operator.__big_query_cursor__ = ctx['return'] 17 | 18 | 19 | def has_cursor(func): 20 | @wraps(func) 21 | def wrapped(ctx, self, *args, **kwargs): 22 | if not hasattr(self, '__big_query_cursor__'): 23 | return None 24 | return func(ctx, self, *args, **kwargs) 25 | return wrapped 26 | 27 | 28 | @HookManager.success_only 29 | @has_cursor 30 | def get_bq_job(ctx, self, *args, **kwargs): 31 | bq_cursor = self.__big_query_cursor__ 32 | service = bq_cursor.service 33 | jobs = service.jobs() 34 | job = jobs.get(projectId=bq_cursor.project_id, 35 | jobId=bq_cursor.running_job_id).execute() 36 | ctx['job'] = job 37 | 38 | 39 | @HookManager.success_only 40 | @has_cursor 41 | def bq_upserted(ctx, self, *args, **kwargs): 42 | rows = ctx['job']['statistics']['load']['outputRows'] 43 | tags = { 44 | 'dag': self.dag_id, 45 | 'task': self.task_id, 46 | 'operator': self.__class__.__name__, 47 | } 48 | Stats.gauge('task.upserted.gcs_to_bq', rows, tags=tags) 49 | 50 | 51 | @HookManager.success_only 52 | @has_cursor 53 | def bq_duration(ctx, self, *args, **kwargs): 54 | stats = ctx['job']['statistics'] 55 | creation = int(stats['creationTime']) 56 | start = int(stats['startTime']) 57 | end = int(stats['endTime']) 58 | 59 | tags = { 60 | 'dag': self.dag_id, 61 | 'task': self.task_id, 62 | 'operator': self.__class__.__name__, 63 | } 64 | Stats.timing('task.delay.gcs_to_bq', start - creation, tags=tags) 65 | Stats.timing('task.duration.gcs_to_bq', end - start, tags=tags) 66 | 67 | 68 | @once 69 | def patch_gcs_2_bq(): 70 | bq_connection_cursor_manager = HookManager(BigQueryConnection, 'cursor') 71 | bq_connection_cursor_manager.register_post_hook(attach_cursor) 72 | bq_connection_cursor_manager.wrap_method() 73 | 74 | gcs_to_bq_operator_execute_manager = \ 75 | HookManager(GoogleCloudStorageToBigQueryOperator, 'execute') 76 | gcs_to_bq_operator_execute_manager.register_post_hook(get_bq_job) 77 | gcs_to_bq_operator_execute_manager.register_post_hook(bq_upserted) 78 | gcs_to_bq_operator_execute_manager.register_post_hook(bq_duration) 79 | gcs_to_bq_operator_execute_manager.wrap_method() 80 | -------------------------------------------------------------------------------- /airflow_metrics/airflow_metrics/datadog_logger.py: -------------------------------------------------------------------------------- 1 | from atexit import register, unregister 2 | from datetime import timedelta 3 | 4 | from airflow.exceptions import AirflowException 5 | from airflow.hooks.base_hook import BaseHook 6 | from airflow.utils.log.logging_mixin import LoggingMixin 7 | from datadog import initialize, ThreadStats 8 | 9 | 10 | class DatadogStatsLogger(LoggingMixin): 11 | def __init__(self, datadog_conn_id='datadog_default'): 12 | super().__init__() 13 | conn = BaseHook.get_connection(datadog_conn_id) 14 | self.api_key = conn.extra_dejson.get('api_key', None) 15 | self.app_key = conn.extra_dejson.get('app_key', None) 16 | self.source_type_name = conn.extra_dejson.get('source_type_name ', None) 17 | 18 | # If the host is populated, it will use that hostname instead 19 | # for all metric submissions 20 | self.host = conn.host 21 | 22 | if self.api_key is None: 23 | raise AirflowException('api_key must be specified in the ' 24 | 'Datadog connection details') 25 | 26 | self.log.info('Setting up api keys for Datadog') 27 | self.stats = None 28 | initialize(api_key=self.api_key, app_key=self.app_key) 29 | 30 | def incr(self, stat, count=1, rate=1, tags=None): 31 | self.log.info('datadog incr: {} {} {} {}'.format(stat, count, rate, tags)) 32 | self.stats.increment(stat, value=count, sample_rate=rate, 33 | tags=self._format_tags(tags)) 34 | 35 | def decr(self, stat, count=1, rate=1, tags=None): 36 | self.log.info('datadog decr: {} {} {} {}'.format(stat, count, rate, tags)) 37 | self.stats.decrement(stat, value=count, sample_rate=rate, 38 | tags=self._format_tags(tags)) 39 | 40 | def gauge(self, stat, value, rate=1, delta=False, tags=None): 41 | self.log.info('datadog gauge: {} {} {} {} {}'.format(stat, value, rate, delta, tags)) 42 | if delta: 43 | self.log.warning('Deltas are unsupported in Datadog') 44 | self.stats.gauge(stat, value, sample_rate=rate, 45 | tags=self._format_tags(tags)) 46 | 47 | def timing(self, stat, delta, rate=1, tags=None): 48 | self.log.info('datadog timing: {} {} {}'.format(stat, delta, tags)) 49 | if isinstance(delta, timedelta): 50 | delta = delta.total_seconds() * 1000. 51 | self.stats.timing(stat, delta, sample_rate=rate, 52 | tags=self._format_tags(tags)) 53 | 54 | @classmethod 55 | def _format_tags(cls, tags): 56 | if not tags: 57 | return None 58 | return ['{}:{}'.format(k, v) for k, v in tags.items()] 59 | 60 | def start(self): 61 | self.stats = ThreadStats(namespace='airflow') 62 | self.stats.start() 63 | register(self.stop) 64 | 65 | def stop(self): 66 | unregister(self.stop) 67 | self.stats.stop() 68 | -------------------------------------------------------------------------------- /airflow_metrics/airflow_metrics/patch_requests.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from functools import wraps 3 | from urllib.parse import urlparse 4 | 5 | from airflow.settings import Stats 6 | from airflow.utils.log.logging_mixin import LoggingMixin 7 | from requests import PreparedRequest 8 | from requests import Session 9 | 10 | from airflow_metrics.utils.fn_utils import get_calling_operator 11 | from airflow_metrics.utils.fn_utils import once 12 | from airflow_metrics.utils.hook_utils import HookManager 13 | 14 | 15 | LOG = LoggingMixin().log 16 | 17 | BLACKLIST = { 18 | 'api.datadoghq.com', 19 | } 20 | 21 | 22 | def attach_request_meta(ctx, *args, **kwargs): 23 | if len(args) >= 2 and isinstance(args[1], PreparedRequest): 24 | request = args[1] 25 | url = request.url 26 | else: 27 | LOG.info('No url found for request') 28 | return 29 | ctx['url'] = url 30 | 31 | domain = urlparse(url).netloc 32 | if domain in BLACKLIST: 33 | LOG.info('Found blacklisted domain: {}'.format(domain)) 34 | return 35 | ctx['domain'] = domain 36 | 37 | operator = get_calling_operator() 38 | if not operator: 39 | LOG.warning('Request not made by an operator: {}'.format(domain)) 40 | return 41 | ctx['operator'] = operator 42 | 43 | 44 | def whitelisted(func): 45 | @wraps(func) 46 | def wrapped(ctx, *args, **kwargs): 47 | if ctx.get('url') and ctx.get('domain') and ctx.get('operator'): 48 | return func(ctx, *args, **kwargs) 49 | return None 50 | return wrapped 51 | 52 | 53 | @whitelisted 54 | def start_time(ctx, *args, **kwargs): 55 | ctx['start_time'] = datetime.now() 56 | 57 | 58 | @whitelisted 59 | def stop_time(ctx, *args, **kwargs): 60 | start = ctx['start_time'] 61 | stop = datetime.now() 62 | duration = (stop - start).total_seconds() * 1000 63 | 64 | tags = { 65 | 'dag': ctx['operator'].dag_id, 66 | 'task': ctx['operator'].task_id, 67 | 'operator': ctx['operator'].__class__.__name__, 68 | 'domain': ctx['domain'], 69 | } 70 | Stats.timing('request.duration', duration, tags=tags) 71 | 72 | 73 | @HookManager.success_only 74 | @whitelisted 75 | def http_status(ctx, *args, **kwargs): 76 | response = ctx['return'] 77 | status = response.status_code 78 | 79 | tags = { 80 | 'dag': ctx['operator'].dag_id, 81 | 'task': ctx['operator'].task_id, 82 | 'operator': ctx['operator'].__class__.__name__, 83 | 'domain': ctx['domain'], 84 | 'status': status 85 | } 86 | if status < 400: 87 | Stats.incr('request.status.success', tags=tags) 88 | else: 89 | Stats.incr('request.status.failure', tags=tags) 90 | 91 | 92 | @once 93 | def patch_requests(): 94 | session_request_manager = HookManager(Session, 'send') 95 | session_request_manager.register_pre_hook(attach_request_meta) 96 | session_request_manager.register_pre_hook(start_time) 97 | session_request_manager.register_post_hook(stop_time) 98 | session_request_manager.register_post_hook(http_status) 99 | session_request_manager.wrap_method() 100 | -------------------------------------------------------------------------------- /tests/utils/test_fn_utils.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from airflow.models import BaseOperator 4 | 5 | from airflow_metrics.utils.fn_utils import once 6 | from airflow_metrics.utils.fn_utils import get_calling_operator 7 | from airflow_metrics.utils.fn_utils import get_local_vars 8 | from airflow_metrics.utils.fn_utils import swallow_error 9 | from tests.utility import mockfn 10 | 11 | 12 | class TestOnce(TestCase): 13 | def test_once(self): 14 | @mockfn 15 | def fn_mock(): 16 | pass 17 | func = once(fn_mock) 18 | assert fn_mock.call_count == 0 19 | func() 20 | assert fn_mock.call_count == 1 21 | func() 22 | assert fn_mock.call_count == 1 23 | 24 | 25 | class TestSwallowError(TestCase): 26 | def test_swallow_error(self): 27 | @swallow_error 28 | def error(): 29 | raise Exception() 30 | 31 | error() 32 | 33 | 34 | class TestGetLocalVars(TestCase): 35 | def test_get_local_vars(self): 36 | # the mockfn decorator addes additional frames onto the 37 | # call stack thus the values are larger than expected 38 | 39 | @mockfn 40 | def inner(): 41 | local_vars = get_local_vars(6) 42 | assert local_vars['a'] == 1 43 | assert local_vars['b'] == 2 44 | local_vars = get_local_vars(3) 45 | assert local_vars['c'] == 3 46 | assert local_vars['d'] == 4 47 | 48 | @mockfn 49 | def middle(c): 50 | d = 4 51 | local_vars = get_local_vars(3) 52 | assert local_vars['a'] == 1 53 | assert local_vars['b'] == 2 54 | inner() 55 | del c 56 | del d 57 | 58 | @mockfn 59 | def outer(a): 60 | b = 2 61 | local_vars = get_local_vars(0) 62 | assert local_vars['a'] == 1 63 | assert local_vars['b'] == 2 64 | middle(3) 65 | del a 66 | del b 67 | 68 | assert not outer.called 69 | assert not middle.called 70 | assert not inner.called 71 | outer(1) 72 | assert outer.called 73 | assert middle.called 74 | assert inner.called 75 | 76 | 77 | class TestGetCallingOperator(TestCase): 78 | def test_called_by_operator(self): 79 | @mockfn 80 | def test_fn(self): 81 | assert get_calling_operator() is self 82 | 83 | class MyOperator(BaseOperator): 84 | def execute(self, context): 85 | assert get_calling_operator() is self 86 | test_fn(self) 87 | 88 | operator = MyOperator(task_id='im-a-test') 89 | assert not test_fn.called 90 | operator.execute(None) 91 | assert test_fn.called 92 | 93 | def test_called_by_out_of_range_operator(self): 94 | @mockfn 95 | def test_fn(): 96 | assert get_calling_operator(2) is None 97 | 98 | class MyOperator(BaseOperator): 99 | def execute(self, context): 100 | assert get_calling_operator(2) is self 101 | test_fn() 102 | 103 | operator = MyOperator(task_id='im-a-test') 104 | assert not test_fn.called 105 | operator.execute(None) 106 | assert test_fn.called 107 | 108 | def test_not_called_by_operator(self): 109 | assert get_calling_operator() is None 110 | 111 | @mockfn 112 | def test_fn(): 113 | assert get_calling_operator() is None 114 | 115 | assert not test_fn.called 116 | test_fn() 117 | assert test_fn.called 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.com/getsentry/airflow-metrics.svg?token=TJpWxpbKGxDuV8CPPRzL&branch=master)](https://travis-ci.com/getsentry/airflow-metrics) 2 | 3 | # airflow-metrics 4 | 5 | `airflow-metrics` is an Airflow plugin for automatically sending metrics from Airflow to Datadog. 6 | 7 | **Tested For**: `apache-airflow>=1.10.2, <=1.10.3` 8 | 9 | ## Installation 10 | 11 | ```shell 12 | pip install airflow-metrics 13 | ``` 14 | 15 | ### Optional 16 | 17 | If you want to the metrics from `BigQueryOperator` and `GoogleCloudStorageToBigQueryOperator`, then make sure the necessary dependencies are installed. 18 | 19 | ```shell 20 | pip install apache-airflow[gcp_api] 21 | ``` 22 | 23 | ## Setup 24 | 25 | `airflow-metrics` will report all metrics to Datadog, so create an `airflow` connection with your Datadog api key. 26 | 27 | ```shell 28 | airflow connections --add --conn_id datadog_default --conn_type HTTP --conn_extr '{"api_key": ""}' 29 | ``` 30 | 31 | **Note**: If you skip this step, your `airflow` installation should still work but no metrics will be reported. 32 | 33 | ## Usage 34 | 35 | That's it! `airflow-metrics` will now begin sending metrics from Airflow to Datadog automatically. 36 | 37 | ### Metrics 38 | 39 | `airflow-metrics` will automatically begin reporting the following metrics 40 | 41 | * `airflow.task.state` The total number of tasks in a state where the state is stored as a tag. 42 | * `airflow.task.state.bq` The current number of big query tasks in a state where the state is stored as a tag. 43 | * `airflow.dag.duration` The duration of a DAG in ms. 44 | * `airflow.task.duration` The duration of a task in ms. 45 | * `airflow.request.duration` The duration of a HTTP request in ms. 46 | * `airflow.request.status.success` The current number of HTTP requests with successful status codes (<400) 47 | * `airflow.request.status.failure` The current number of HTTP requests with unsuccessful status codes (>=400) 48 | * `airflow.task.upserted.bq` The number of rows upserted by a BigQueryOperator. 49 | * `airflow.task.delay.bq` The time taken for the big query job from a BigQueryOperator to start in ms. 50 | * `airflow.task.duration.bq` The time taken for the big query job from a BigQueryOperator to finish in ms. 51 | * `airflow.task.upserted.gcs_to_bq` The number of rows upserted by a GoogleCloudStorageToBigQueryOperator. 52 | * `airflow.task.delay.gcs_to_bq` The time taken for the big query from a GoogleCloudStorageToBigQueryOperator to start in ms. 53 | * `airflow.task.duration.gcs_to_bq` The time taken for the big query from a GoogleCloudStorageToBigQueryOperator to finish in ms. 54 | 55 | ## Configuration 56 | 57 | By default, `airflow-metrics` will begin extracting metrics from Airflow as you run your DAGs and send them to Datadog. You can opt out of it entirely or opt out of a subset of the metrics by setting these configurations in your `airflow.cfg` 58 | 59 | ``` 60 | [airflow_metrics] 61 | 62 | airflow_metrics_enabled = True 63 | airflow_metrics_tasks_enabled = True 64 | airflow_metrics_bq_enabled = True 65 | airflow_metrics_gcs_to_bq_enabled = True 66 | airflow_metrics_requests_enabled = True 67 | airflow_metrics_thread_enabled = True` 68 | ``` 69 | 70 | ## Limitations 71 | 72 | `airflow-metrics` starts a thread to report some metrics, and is not supported when using sqlite as your database. 73 | 74 | ## Contributing 75 | 76 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. 77 | 78 | ### Getting Started 79 | 80 | Set up your virtual environment for python3 however you like. 81 | 82 | ```shell 83 | pip install -e . 84 | airflow initdb 85 | airflow connections --add --conn_id datadog_default --conn_type HTTP --conn_extr '{"api_key": ""}' 86 | ``` 87 | 88 | **Note**: The last step is necessary, otherwise the plugin will not initialize correctly and will not collect metrics. But you are free to add a dummy key for development purposes. 89 | 90 | ### Running Tests 91 | 92 | ```shell 93 | pip install -r requirements-dev.txt 94 | pytest 95 | ``` 96 | -------------------------------------------------------------------------------- /tests/airflow_metrics/test_patch_bq.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from unittest.mock import Mock 3 | from unittest.mock import patch 4 | 5 | from airflow_metrics.airflow_metrics.patch_bq import bq_duration 6 | from airflow_metrics.airflow_metrics.patch_bq import bq_upserted 7 | from airflow_metrics.airflow_metrics.patch_bq import get_bq_job 8 | 9 | 10 | class TestGetBqJob(TestCase): 11 | def setUp(self): 12 | self.self = Mock() 13 | self.ctx = {'success': True} 14 | 15 | def test_job_created(self): 16 | assert 'job' not in self.ctx 17 | get_bq_job(self.ctx, self.self) 18 | assert self.ctx['job'] 19 | 20 | 21 | class TestBqUpserted(TestCase): 22 | def setUp(self): 23 | self.self = Mock() 24 | self.self.dag_id = 'dag-id' 25 | self.self.task_id = 'task-id' 26 | self.self.__class__.__name__ = 'MockOperator' 27 | self.ctx = { 28 | 'job': { 29 | 'statistics': { 30 | 'query': { 31 | 'queryPlan': [ 32 | { 33 | 'id': '0', 34 | 'inputStages': [], 35 | 'recordsWritten': 0, 36 | }, 37 | { 38 | 'id': '1', 39 | 'inputStages': ['0'], 40 | 'recordsWritten': 1, 41 | }, 42 | { 43 | 'id': '2', 44 | 'inputStages': ['0'], 45 | 'recordsWritten': 2, 46 | }, 47 | { 48 | 'id': '3', 49 | 'inputStages': ['1'], 50 | 'recordsWritten': 3, 51 | }, 52 | { 53 | 'id': '4', 54 | 'inputStages': ['3'], 55 | 'recordsWritten': 4, 56 | }, 57 | { 58 | 'id': '5', 59 | 'inputStages': ['2', '3'], 60 | 'recordsWritten': 5, 61 | }, 62 | ], 63 | }, 64 | }, 65 | }, 66 | } 67 | 68 | def test_gauge(self): 69 | with patch('airflow_metrics.airflow_metrics.patch_bq.Stats') as Stats: 70 | bq_upserted(self.ctx, self.self) 71 | assert Stats.gauge.call_args == ( 72 | ('task.upserted.bq', 9), 73 | {'tags': {'dag': 'dag-id', 'task': 'task-id', 'operator': 'MockOperator'}}, 74 | ) 75 | 76 | 77 | class TestBqDuration(TestCase): 78 | def setUp(self): 79 | self.self = Mock() 80 | self.self.dag_id = 'dag-id' 81 | self.self.task_id = 'task-id' 82 | self.self.__class__.__name__ = 'MockOperator' 83 | self.ctx = { 84 | 'job': { 85 | 'statistics': { 86 | 'creationTime': 0, 87 | 'startTime': 1, 88 | 'endTime': 3, 89 | }, 90 | }, 91 | } 92 | 93 | def test_timing(self): 94 | with patch('airflow_metrics.airflow_metrics.patch_bq.Stats') as Stats: 95 | bq_duration(self.ctx, self.self) 96 | assert Stats.timing.call_args_list == [ 97 | ( 98 | ('task.delay.bq', 1), 99 | {'tags': {'dag': 'dag-id', 'task': 'task-id', 'operator': 'MockOperator'}}, 100 | ), 101 | ( 102 | ('task.duration.bq', 2), 103 | {'tags': {'dag': 'dag-id', 'task': 'task-id', 'operator': 'MockOperator'}}, 104 | ), 105 | ] 106 | -------------------------------------------------------------------------------- /tests/airflow_metrics/test_patch_gcs_2_bq.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from unittest.mock import Mock 3 | from unittest.mock import patch 4 | 5 | from airflow.contrib.operators.gcs_to_bq import GoogleCloudStorageToBigQueryOperator 6 | 7 | from airflow_metrics.airflow_metrics.patch_gcs_2_bq import attach_cursor 8 | from airflow_metrics.airflow_metrics.patch_gcs_2_bq import bq_duration 9 | from airflow_metrics.airflow_metrics.patch_gcs_2_bq import bq_upserted 10 | from airflow_metrics.airflow_metrics.patch_gcs_2_bq import get_bq_job 11 | from airflow_metrics.airflow_metrics.patch_gcs_2_bq import has_cursor 12 | from airflow_metrics.utils.hook_utils import HookManager 13 | from tests.utility import mockfn 14 | 15 | 16 | class TestAttachCursor(TestCase): 17 | def test_successfully_attach(self): 18 | class TestClass(): 19 | def test_method(self): 20 | return mock 21 | 22 | class TestOperator(GoogleCloudStorageToBigQueryOperator): 23 | def execute(self, context): 24 | test_object = TestClass() 25 | test_object.test_method() 26 | 27 | test_method_manager = HookManager(TestClass, 'test_method') 28 | test_method_manager.register_post_hook(attach_cursor) 29 | test_method_manager.wrap_method() 30 | 31 | mock = Mock() 32 | operator = TestOperator(task_id='task-id', bucket=None, source_objects=None, 33 | destination_project_dataset_table=None) 34 | operator.execute(None) 35 | assert operator.__big_query_cursor__ == mock # pylint: disable=no-member 36 | 37 | 38 | class TestHasCursor(TestCase): 39 | def test_does_have_cursor(self): 40 | class TestClass(): 41 | pass 42 | ctx = {} 43 | this = TestClass() 44 | this.__big_query_cursor__ = Mock() # pylint: disable=attribute-defined-outside-init 45 | 46 | @mockfn 47 | def fn_mock(*args, **kwargs): 48 | pass 49 | fn = has_cursor(fn_mock) 50 | 51 | assert not fn_mock.called 52 | fn(ctx, this) 53 | assert fn_mock.called 54 | 55 | def test_doesnt_have_cursor(self): 56 | class TestClass(): 57 | pass 58 | ctx = {} 59 | this = TestClass() 60 | 61 | @mockfn 62 | def fn_mock(*args, **kwargs): 63 | pass 64 | fn = has_cursor(fn_mock) 65 | 66 | assert not fn_mock.called 67 | fn(ctx, this) 68 | assert not fn_mock.called 69 | 70 | 71 | class TestGetBqJob(TestCase): 72 | def setUp(self): 73 | self.self = Mock() 74 | self.self.__big_query_cursor__ = Mock() 75 | self.ctx = { 76 | 'success': True, 77 | } 78 | 79 | def test_job_created(self): 80 | assert 'job' not in self.ctx 81 | get_bq_job(self.ctx, self.self) 82 | assert self.ctx['job'] 83 | 84 | 85 | class TestBqUpserted(TestCase): 86 | def setUp(self): 87 | self.self = Mock() 88 | self.self.__big_query_cursor__ = Mock() 89 | self.self.dag_id = 'dag-id' 90 | self.self.task_id = 'task-id' 91 | self.self.__class__.__name__ = 'MockOperator' 92 | self.ctx = { 93 | 'success': True, 94 | 'job': { 95 | 'statistics': { 96 | 'load': { 97 | 'outputRows': 9, 98 | }, 99 | }, 100 | }, 101 | } 102 | 103 | def test_timing(self): 104 | with patch('airflow_metrics.airflow_metrics.patch_gcs_2_bq.Stats') as Stats: 105 | bq_upserted(self.ctx, self.self) 106 | assert Stats.gauge.call_args == ( 107 | ('task.upserted.gcs_to_bq', 9), 108 | {'tags': {'dag': 'dag-id', 'task': 'task-id', 'operator': 'MockOperator'}}, 109 | ) 110 | 111 | 112 | class TestBqDuration(TestCase): 113 | def setUp(self): 114 | self.self = Mock() 115 | self.self.__big_query_cursor__ = Mock() 116 | self.self.dag_id = 'dag-id' 117 | self.self.task_id = 'task-id' 118 | self.self.__class__.__name__ = 'MockOperator' 119 | self.ctx = { 120 | 'success': True, 121 | 'job': { 122 | 'statistics': { 123 | 'creationTime': 0, 124 | 'startTime': 1, 125 | 'endTime': 3, 126 | }, 127 | }, 128 | } 129 | 130 | def test_timing(self): 131 | with patch('airflow_metrics.airflow_metrics.patch_gcs_2_bq.Stats') as Stats: 132 | bq_duration(self.ctx, self.self) 133 | assert Stats.timing.call_args_list == [ 134 | ( 135 | ('task.delay.gcs_to_bq', 1), 136 | {'tags': {'dag': 'dag-id', 'task': 'task-id', 'operator': 'MockOperator'}}, 137 | ), 138 | ( 139 | ('task.duration.gcs_to_bq', 2), 140 | {'tags': {'dag': 'dag-id', 'task': 'task-id', 'operator': 'MockOperator'}}, 141 | ), 142 | ] 143 | -------------------------------------------------------------------------------- /tests/airflow_metrics/test_patch_requests.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from unittest import TestCase 3 | from unittest.mock import Mock 4 | from unittest.mock import patch 5 | from urllib.parse import urlparse 6 | 7 | from airflow.models import BaseOperator 8 | from freezegun import freeze_time 9 | from requests import PreparedRequest 10 | 11 | from airflow_metrics.airflow_metrics.patch_requests import attach_request_meta 12 | from airflow_metrics.airflow_metrics.patch_requests import http_status 13 | from airflow_metrics.airflow_metrics.patch_requests import start_time 14 | from airflow_metrics.airflow_metrics.patch_requests import stop_time 15 | 16 | 17 | URL = 'https://httpbin.org/get' 18 | DOMAIN = urlparse(URL).netloc 19 | 20 | def whitelisted_context(): 21 | 22 | operator = Mock() 23 | operator.dag_id = 'dag-id' 24 | operator.task_id = 'task-id' 25 | operator.__class__.__name__ = 'MockOperator' 26 | 27 | return { 28 | 'url': URL, 29 | 'domain': DOMAIN, 30 | 'operator': operator, 31 | } 32 | 33 | 34 | class TestAttachRequestMeta(TestCase): 35 | def setUp(self): 36 | self.request = PreparedRequest() 37 | self.request.prepare(url=URL) 38 | 39 | def test_no_url(self): 40 | ctx = {} 41 | args = [] 42 | kwargs = {} 43 | attach_request_meta(ctx, *args, **kwargs) 44 | assert 'url' not in ctx 45 | 46 | def test_not_prepared_request(self): 47 | ctx = {} 48 | args = [1, 2] 49 | kwargs = {} 50 | attach_request_meta(ctx, *args, **kwargs) 51 | assert 'url' not in ctx 52 | 53 | def test_blacklisted(self): 54 | with patch('airflow_metrics.airflow_metrics.patch_requests.BLACKLIST', {DOMAIN}): 55 | ctx = {} 56 | args = [1, self.request] 57 | kwargs = {} 58 | attach_request_meta(ctx, *args, **kwargs) 59 | assert 'domain' not in ctx 60 | 61 | def test_not_called_by_operator(self): 62 | ctx = {} 63 | args = [1, self.request] 64 | kwargs = {} 65 | attach_request_meta(ctx, *args, **kwargs) 66 | assert 'operator' not in ctx 67 | 68 | def test_correct(self): 69 | this = self 70 | 71 | class MockOperator(BaseOperator): 72 | def execute(self, context): 73 | ctx = {} 74 | args = [1, this.request] 75 | kwargs = {} 76 | attach_request_meta(ctx, *args, **kwargs) 77 | assert ctx['url'] == URL 78 | assert ctx['domain'] == DOMAIN 79 | assert ctx['operator'] == operator 80 | 81 | operator = MockOperator(task_id='task-id') 82 | operator.execute(None) 83 | 84 | 85 | class TestStartTime(TestCase): 86 | @freeze_time('2019-01-01') 87 | def test_inserts_start_time(self): 88 | ctx = whitelisted_context() 89 | start_time(ctx) 90 | assert ctx['start_time'] == datetime(2019, 1, 1) 91 | 92 | 93 | class TestStopTime(TestCase): 94 | @freeze_time('2019-01-01 00:05:03') 95 | def test_timing(self): 96 | with patch('airflow_metrics.airflow_metrics.patch_requests.Stats') as Stats: 97 | ctx = whitelisted_context() 98 | ctx['start_time'] = datetime(2019, 1, 1) 99 | stop_time(ctx) 100 | assert Stats.timing.call_args == ( 101 | ('request.duration', 303000.0), 102 | { 103 | 'tags': { 104 | 'dag': 'dag-id', 105 | 'task': 'task-id', 106 | 'operator': 'MockOperator', 107 | 'domain': DOMAIN, 108 | } 109 | }, 110 | ) 111 | 112 | 113 | class TestHttpStatus(TestCase): 114 | def test_succcessful_http(self): 115 | with patch('airflow_metrics.airflow_metrics.patch_requests.Stats') as Stats: 116 | ctx = whitelisted_context() 117 | response = Mock() 118 | response.status_code = 200 119 | ctx['return'] = response 120 | http_status(ctx) 121 | assert Stats.incr.call_args == ( 122 | ('request.status.success',), 123 | { 124 | 'tags': { 125 | 'dag': 'dag-id', 126 | 'task': 'task-id', 127 | 'operator': 'MockOperator', 128 | 'domain': DOMAIN, 129 | 'status': 200, 130 | } 131 | }, 132 | ) 133 | 134 | def test_failed_http(self): 135 | with patch('airflow_metrics.airflow_metrics.patch_requests.Stats') as Stats: 136 | ctx = whitelisted_context() 137 | response = Mock() 138 | response.status_code = 400 139 | ctx['return'] = response 140 | http_status(ctx) 141 | assert Stats.incr.call_args == ( 142 | ('request.status.failure',), 143 | { 144 | 'tags': { 145 | 'dag': 'dag-id', 146 | 'task': 'task-id', 147 | 'operator': 'MockOperator', 148 | 'domain': DOMAIN, 149 | 'status': 400, 150 | } 151 | }, 152 | ) 153 | -------------------------------------------------------------------------------- /tests/utils/test_hook_utils.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from pytest import raises 4 | 5 | from airflow_metrics.utils.hook_utils import HookManager 6 | from tests.utility import mockfn 7 | 8 | 9 | class TestHookManager(TestCase): 10 | def test_patch_method(self): 11 | class TestClass(): 12 | def test_method(self): 13 | pass 14 | 15 | test_method_manager = HookManager(TestClass, 'test_method') 16 | test_method_manager.wrap_method() 17 | 18 | fake_method_manager = HookManager(TestClass, 'fake_method') 19 | with raises(AttributeError): 20 | fake_method_manager.wrap_method() 21 | 22 | def test_swallows_errors(self): 23 | class TestClass(): 24 | def test_method(self): 25 | return True 26 | 27 | def pre_hook(*args, **kwargs): 28 | raise Exception() 29 | 30 | def post_hook(*args, **kwargs): 31 | raise Exception() 32 | 33 | test_method_manager = HookManager(TestClass, 'test_method') 34 | test_method_manager.register_pre_hook(pre_hook) 35 | test_method_manager.register_post_hook(post_hook) 36 | test_method_manager.wrap_method() 37 | 38 | test_obj = TestClass() 39 | assert test_obj.test_method() 40 | 41 | def test_call_order(self): 42 | call_order = [] 43 | 44 | class TestClass(): 45 | def test_method(self): 46 | call_order.append('test method') 47 | 48 | def pre_hook_1(*args, **kwargs): 49 | call_order.append('pre-hook 1') 50 | 51 | def pre_hook_2(*args, **kwargs): 52 | call_order.append('pre-hook 2') 53 | 54 | def post_hook_1(*args, **kwargs): 55 | call_order.append('post-hook 1') 56 | 57 | def post_hook_2(*args, **kwargs): 58 | call_order.append('post-hook 2') 59 | 60 | test_method_manager = HookManager(TestClass, 'test_method') 61 | test_method_manager.register_pre_hook(pre_hook_1) 62 | test_method_manager.register_pre_hook(pre_hook_2) 63 | test_method_manager.register_post_hook(post_hook_1) 64 | test_method_manager.register_post_hook(post_hook_2) 65 | test_method_manager.wrap_method() 66 | 67 | test_obj = TestClass() 68 | assert not call_order 69 | 70 | test_obj.test_method() 71 | assert call_order == [ 72 | 'pre-hook 1', 73 | 'pre-hook 2', 74 | 'test method', 75 | 'post-hook 1', 76 | 'post-hook 2' 77 | ] 78 | 79 | def test_carries_context(self): 80 | class TestClass(): 81 | def test_method(self): 82 | pass 83 | 84 | @mockfn 85 | def pre_hook_1(ctx, *args, **kwargs): 86 | ctx['pre-hook 1'] = True 87 | 88 | @mockfn 89 | def pre_hook_2(ctx, *args, **kwargs): 90 | assert ctx['pre-hook 1'] 91 | ctx['pre-hook 2'] = True 92 | 93 | @mockfn 94 | def post_hook_1(ctx, *args, **kwargs): 95 | assert ctx['pre-hook 1'] 96 | assert ctx['pre-hook 2'] 97 | ctx['post-hook 1'] = True 98 | 99 | @mockfn 100 | def post_hook_2(ctx, *args, **kwargs): 101 | assert ctx['pre-hook 1'] 102 | assert ctx['pre-hook 2'] 103 | assert ctx['post-hook 1'] 104 | 105 | test_method_manager = HookManager(TestClass, 'test_method') 106 | test_method_manager.register_pre_hook(pre_hook_1) 107 | test_method_manager.register_pre_hook(pre_hook_2) 108 | test_method_manager.register_post_hook(post_hook_1) 109 | test_method_manager.register_post_hook(post_hook_2) 110 | test_method_manager.wrap_method() 111 | 112 | test_obj = TestClass() 113 | assert not pre_hook_1.called 114 | assert not pre_hook_2.called 115 | assert not post_hook_1.called 116 | assert not post_hook_2.called 117 | test_obj.test_method() 118 | assert pre_hook_1.called 119 | assert pre_hook_2.called 120 | assert post_hook_1.called 121 | assert post_hook_2.called 122 | 123 | def test_success_status_true(self): 124 | class TestClass(): 125 | def test_method(self): 126 | pass 127 | 128 | @mockfn 129 | def pre_hook(ctx, *args, **kwargs): 130 | assert 'success' not in ctx 131 | 132 | @mockfn 133 | def post_hook(ctx, *args, **kwargs): 134 | assert ctx['success'] 135 | 136 | test_method_manager = HookManager(TestClass, 'test_method') 137 | test_method_manager.register_pre_hook(pre_hook) 138 | test_method_manager.register_post_hook(post_hook) 139 | test_method_manager.wrap_method() 140 | 141 | test_obj = TestClass() 142 | assert not pre_hook.called 143 | assert not post_hook.called 144 | test_obj.test_method() 145 | assert pre_hook.called 146 | assert post_hook.called 147 | 148 | def test_success_status_false(self): 149 | class TestClass(): 150 | def test_method(self): 151 | raise Exception() 152 | 153 | @mockfn 154 | def pre_hook(ctx, *args, **kwargs): 155 | assert not ctx['success'] 156 | 157 | @mockfn 158 | def post_hook(ctx, *args, **kwargs): 159 | assert not ctx['success'] 160 | 161 | test_method_manager = HookManager(TestClass, 'test_method') 162 | test_method_manager.register_pre_hook(pre_hook) 163 | test_method_manager.register_post_hook(post_hook) 164 | test_method_manager.wrap_method() 165 | 166 | test_obj = TestClass() 167 | with raises(Exception): 168 | test_obj.test_method() 169 | 170 | def test_pass_on_return_value(self): 171 | class TestClass(): 172 | def test_method(self): 173 | return 'return value' 174 | 175 | @mockfn 176 | def post_hook(ctx, *args, **kwargs): 177 | assert ctx['return'] == 'return value' 178 | 179 | test_method_manager = HookManager(TestClass, 'test_method') 180 | test_method_manager.register_post_hook(post_hook) 181 | test_method_manager.wrap_method() 182 | 183 | test_obj = TestClass() 184 | assert not post_hook.called 185 | test_obj.test_method() 186 | assert post_hook.called 187 | 188 | def test_modify_return_value(self): 189 | class TestClass(): 190 | def test_method(self): 191 | return 'return value' 192 | 193 | def post_hook(ctx, *args, **kwargs): 194 | ctx['return'] = 'modified value' 195 | 196 | test_method_manager = HookManager(TestClass, 'test_method') 197 | test_method_manager.register_post_hook(post_hook) 198 | test_method_manager.wrap_method() 199 | 200 | test_obj = TestClass() 201 | assert test_obj.test_method() == 'modified value' 202 | 203 | def test_success_only_true(self): 204 | class TestClass(): 205 | def test_method(self): 206 | pass 207 | 208 | @mockfn 209 | def post_hook_mock(*args, **kwargs): 210 | pass 211 | post_hook = HookManager.success_only(post_hook_mock) 212 | 213 | test_method_manager = HookManager(TestClass, 'test_method') 214 | test_method_manager.register_post_hook(post_hook) 215 | test_method_manager.wrap_method() 216 | 217 | test_obj = TestClass() 218 | assert not post_hook_mock.called 219 | test_obj.test_method() 220 | assert post_hook_mock.called 221 | 222 | def test_success_only_false(self): 223 | class TestClass(): 224 | def test_method(self): 225 | raise Exception() 226 | 227 | @mockfn 228 | def post_hook_mock(*args, **kwargs): 229 | pass 230 | post_hook = HookManager.success_only(post_hook_mock) 231 | 232 | test_method_manager = HookManager(TestClass, 'test_method') 233 | test_method_manager.register_post_hook(post_hook) 234 | test_method_manager.wrap_method() 235 | 236 | test_obj = TestClass() 237 | assert not post_hook_mock.called 238 | with raises(Exception): 239 | test_obj.test_method() 240 | assert not post_hook_mock.called 241 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2019 Functional Software, Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # A comma-separated list of package or module names from where C extensions may 4 | # be loaded. Extensions are loading into the active Python interpreter and may 5 | # run arbitrary code. 6 | extension-pkg-whitelist= 7 | 8 | # Add files or directories to the blacklist. They should be base names, not 9 | # paths. 10 | ignore=CVS 11 | 12 | # Add files or directories matching the regex patterns to the blacklist. The 13 | # regex matches against base names, not paths. 14 | ignore-patterns= 15 | 16 | # Python code to execute, usually for sys.path manipulation such as 17 | # pygtk.require(). 18 | #init-hook= 19 | 20 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 21 | # number of processors available to use. 22 | jobs=1 23 | 24 | # Control the amount of potential inferred values when inferring a single 25 | # object. This can help the performance when dealing with large functions or 26 | # complex, nested conditions. 27 | limit-inference-results=100 28 | 29 | # List of plugins (as comma separated values of python modules names) to load, 30 | # usually to register additional checkers. 31 | load-plugins= 32 | 33 | # Pickle collected data for later comparisons. 34 | persistent=yes 35 | 36 | # Specify a configuration file. 37 | #rcfile= 38 | 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit 40 | # user-friendly hints instead of false-positive error messages. 41 | suggestion-mode=yes 42 | 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the 44 | # active Python interpreter and may run arbitrary code. 45 | unsafe-load-any-extension=no 46 | 47 | 48 | [MESSAGES CONTROL] 49 | 50 | # Only show warnings with the listed confidence levels. Leave empty to show 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. 52 | confidence= 53 | 54 | # Disable the message, report, category or checker with the given id(s). You 55 | # can either give multiple identifiers separated by comma (,) or put this 56 | # option multiple times (only on the command line, not in the configuration 57 | # file where it should appear only once). You can also use "--disable=all" to 58 | # disable everything first and then reenable specific checks. For example, if 59 | # you want to run only the similarities checker, you can use "--disable=all 60 | # --enable=similarities". If you want to run only the classes checker, but have 61 | # no Warning level messages displayed, use "--disable=all --enable=classes 62 | # --disable=W". 63 | disable=print-statement, 64 | parameter-unpacking, 65 | unpacking-in-except, 66 | old-raise-syntax, 67 | backtick, 68 | long-suffix, 69 | old-ne-operator, 70 | old-octal-literal, 71 | import-star-module-level, 72 | non-ascii-bytes-literal, 73 | raw-checker-failed, 74 | bad-inline-option, 75 | locally-disabled, 76 | file-ignored, 77 | suppressed-message, 78 | useless-suppression, 79 | deprecated-pragma, 80 | use-symbolic-message-instead, 81 | apply-builtin, 82 | basestring-builtin, 83 | buffer-builtin, 84 | cmp-builtin, 85 | coerce-builtin, 86 | execfile-builtin, 87 | file-builtin, 88 | long-builtin, 89 | raw_input-builtin, 90 | reduce-builtin, 91 | standarderror-builtin, 92 | unicode-builtin, 93 | xrange-builtin, 94 | coerce-method, 95 | delslice-method, 96 | getslice-method, 97 | setslice-method, 98 | no-absolute-import, 99 | old-division, 100 | dict-iter-method, 101 | dict-view-method, 102 | next-method-called, 103 | metaclass-assignment, 104 | indexing-exception, 105 | raising-string, 106 | reload-builtin, 107 | oct-method, 108 | hex-method, 109 | nonzero-method, 110 | cmp-method, 111 | input-builtin, 112 | round-builtin, 113 | intern-builtin, 114 | unichr-builtin, 115 | map-builtin-not-iterating, 116 | zip-builtin-not-iterating, 117 | range-builtin-not-iterating, 118 | filter-builtin-not-iterating, 119 | using-cmp-argument, 120 | eq-without-hash, 121 | div-method, 122 | idiv-method, 123 | rdiv-method, 124 | exception-message-attribute, 125 | invalid-str-codec, 126 | sys-max-int, 127 | bad-python3-import, 128 | deprecated-string-function, 129 | deprecated-str-translate-call, 130 | deprecated-itertools-function, 131 | deprecated-types-field, 132 | next-method-defined, 133 | dict-items-not-iterating, 134 | dict-keys-not-iterating, 135 | dict-values-not-iterating, 136 | deprecated-operator-function, 137 | deprecated-urllib-function, 138 | xreadlines-attribute, 139 | deprecated-sys-function, 140 | exception-escape, 141 | comprehension-escape 142 | 143 | # Enable the message, report, category or checker with the given id(s). You can 144 | # either give multiple identifier separated by comma (,) or put this option 145 | # multiple time (only on the command line, not in the configuration file where 146 | # it should appear only once). See also the "--disable" option for examples. 147 | enable=c-extension-no-member 148 | 149 | 150 | [REPORTS] 151 | 152 | # Python expression which should return a note less than 10 (10 is the highest 153 | # note). You have access to the variables errors warning, statement which 154 | # respectively contain the number of errors / warnings messages and the total 155 | # number of statements analyzed. This is used by the global evaluation report 156 | # (RP0004). 157 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 158 | 159 | # Template used to display messages. This is a python new-style format string 160 | # used to format the message information. See doc for all details. 161 | #msg-template= 162 | 163 | # Set the output format. Available formats are text, parseable, colorized, json 164 | # and msvs (visual studio). You can also give a reporter class, e.g. 165 | # mypackage.mymodule.MyReporterClass. 166 | output-format=text 167 | 168 | # Tells whether to display a full report or only the messages. 169 | reports=no 170 | 171 | # Activate the evaluation score. 172 | score=yes 173 | 174 | 175 | [REFACTORING] 176 | 177 | # Maximum number of nested blocks for function / method body 178 | max-nested-blocks=5 179 | 180 | # Complete name of functions that never returns. When checking for 181 | # inconsistent-return-statements if a never returning function is called then 182 | # it will be considered as an explicit return statement and no message will be 183 | # printed. 184 | never-returning-functions=sys.exit 185 | 186 | 187 | [LOGGING] 188 | 189 | # Format style used to check logging format string. `old` means using % 190 | # formatting, while `new` is for `{}` formatting. 191 | logging-format-style=old 192 | 193 | # Logging modules to check that the string format arguments are in logging 194 | # function parameter format. 195 | logging-modules=logging 196 | 197 | 198 | [SPELLING] 199 | 200 | # Limits count of emitted suggestions for spelling mistakes. 201 | max-spelling-suggestions=4 202 | 203 | # Spelling dictionary name. Available dictionaries: none. To make it working 204 | # install python-enchant package.. 205 | spelling-dict= 206 | 207 | # List of comma separated words that should not be checked. 208 | spelling-ignore-words= 209 | 210 | # A path to a file that contains private dictionary; one word per line. 211 | spelling-private-dict-file= 212 | 213 | # Tells whether to store unknown words to indicated private dictionary in 214 | # --spelling-private-dict-file option instead of raising a message. 215 | spelling-store-unknown-words=no 216 | 217 | 218 | [MISCELLANEOUS] 219 | 220 | # List of note tags to take in consideration, separated by a comma. 221 | notes=FIXME, 222 | XXX, 223 | TODO 224 | 225 | 226 | [TYPECHECK] 227 | 228 | # List of decorators that produce context managers, such as 229 | # contextlib.contextmanager. Add to this list to register other decorators that 230 | # produce valid context managers. 231 | contextmanager-decorators=contextlib.contextmanager 232 | 233 | # List of members which are set dynamically and missed by pylint inference 234 | # system, and so shouldn't trigger E1101 when accessed. Python regular 235 | # expressions are accepted. 236 | generated-members= 237 | 238 | # Tells whether missing members accessed in mixin class should be ignored. A 239 | # mixin class is detected if its name ends with "mixin" (case insensitive). 240 | ignore-mixin-members=yes 241 | 242 | # Tells whether to warn about missing members when the owner of the attribute 243 | # is inferred to be None. 244 | ignore-none=yes 245 | 246 | # This flag controls whether pylint should warn about no-member and similar 247 | # checks whenever an opaque object is returned when inferring. The inference 248 | # can return multiple potential results while evaluating a Python object, but 249 | # some branches might not be evaluated, which results in partial inference. In 250 | # that case, it might be useful to still emit no-member and other checks for 251 | # the rest of the inferred objects. 252 | ignore-on-opaque-inference=yes 253 | 254 | # List of class names for which member attributes should not be checked (useful 255 | # for classes with dynamically set attributes). This supports the use of 256 | # qualified names. 257 | ignored-classes=optparse.Values,thread._local,_thread._local 258 | 259 | # List of module names for which member attributes should not be checked 260 | # (useful for modules/projects where namespaces are manipulated during runtime 261 | # and thus existing member attributes cannot be deduced by static analysis. It 262 | # supports qualified module names, as well as Unix pattern matching. 263 | ignored-modules= 264 | 265 | # Show a hint with possible names when a member name was not found. The aspect 266 | # of finding the hint is based on edit distance. 267 | missing-member-hint=yes 268 | 269 | # The minimum edit distance a name should have in order to be considered a 270 | # similar match for a missing member name. 271 | missing-member-hint-distance=1 272 | 273 | # The total number of similar names that should be taken in consideration when 274 | # showing a hint for a missing member. 275 | missing-member-max-choices=1 276 | 277 | 278 | [VARIABLES] 279 | 280 | # List of additional names supposed to be defined in builtins. Remember that 281 | # you should avoid defining new builtins when possible. 282 | additional-builtins= 283 | 284 | # Tells whether unused global variables should be treated as a violation. 285 | allow-global-unused-variables=yes 286 | 287 | # List of strings which can identify a callback function by name. A callback 288 | # name must start or end with one of those strings. 289 | callbacks=cb_, 290 | _cb 291 | 292 | # A regular expression matching the name of dummy variables (i.e. expected to 293 | # not be used). 294 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 295 | 296 | # Argument names that match this expression will be ignored. Default to name 297 | # with leading underscore. 298 | ignored-argument-names=_.*|^ignored_|^unused_|^args$|^kwargs$ 299 | 300 | # Tells whether we should check for unused import in __init__ files. 301 | init-import=no 302 | 303 | # List of qualified module names which can have objects that can redefine 304 | # builtins. 305 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 306 | 307 | 308 | [FORMAT] 309 | 310 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 311 | expected-line-ending-format= 312 | 313 | # Regexp for a line that is allowed to be longer than the limit. 314 | ignore-long-lines=^\s*(# )??$ 315 | 316 | # Number of spaces of indent required inside a hanging or continued line. 317 | indent-after-paren=4 318 | 319 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 320 | # tab). 321 | indent-string=' ' 322 | 323 | # Maximum number of characters on a single line. 324 | max-line-length=100 325 | 326 | # Maximum number of lines in a module. 327 | max-module-lines=1000 328 | 329 | # List of optional constructs for which whitespace checking is disabled. `dict- 330 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 331 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 332 | # `empty-line` allows space-only lines. 333 | no-space-check=trailing-comma, 334 | dict-separator 335 | 336 | # Allow the body of a class to be on the same line as the declaration if body 337 | # contains single statement. 338 | single-line-class-stmt=no 339 | 340 | # Allow the body of an if to be on the same line as the test if there is no 341 | # else. 342 | single-line-if-stmt=no 343 | 344 | 345 | [SIMILARITIES] 346 | 347 | # Ignore comments when computing similarities. 348 | ignore-comments=yes 349 | 350 | # Ignore docstrings when computing similarities. 351 | ignore-docstrings=yes 352 | 353 | # Ignore imports when computing similarities. 354 | ignore-imports=no 355 | 356 | # Minimum lines number of a similarity. 357 | min-similarity-lines=10 358 | 359 | 360 | [BASIC] 361 | 362 | # Naming style matching correct argument names. 363 | argument-naming-style=snake_case 364 | 365 | # Regular expression matching correct argument names. Overrides argument- 366 | # naming-style. 367 | #argument-rgx= 368 | 369 | # Naming style matching correct attribute names. 370 | attr-naming-style=snake_case 371 | 372 | # Regular expression matching correct attribute names. Overrides attr-naming- 373 | # style. 374 | #attr-rgx= 375 | 376 | # Bad variable names which should always be refused, separated by a comma. 377 | bad-names=foo, 378 | bar, 379 | baz, 380 | toto, 381 | tutu, 382 | tata 383 | 384 | # Naming style matching correct class attribute names. 385 | class-attribute-naming-style=any 386 | 387 | # Regular expression matching correct class attribute names. Overrides class- 388 | # attribute-naming-style. 389 | #class-attribute-rgx= 390 | 391 | # Naming style matching correct class names. 392 | class-naming-style=PascalCase 393 | 394 | # Regular expression matching correct class names. Overrides class-naming- 395 | # style. 396 | #class-rgx= 397 | 398 | # Naming style matching correct constant names. 399 | const-naming-style=UPPER_CASE 400 | 401 | # Regular expression matching correct constant names. Overrides const-naming- 402 | # style. 403 | #const-rgx= 404 | 405 | # Minimum line length for functions/classes that require docstrings, shorter 406 | # ones are exempt. 407 | docstring-min-length=-1 408 | 409 | # Naming style matching correct function names. 410 | function-naming-style=snake_case 411 | 412 | # Regular expression matching correct function names. Overrides function- 413 | # naming-style. 414 | #function-rgx= 415 | 416 | # Good variable names which should always be accepted, separated by a comma. 417 | good-names=i, 418 | j, 419 | k, 420 | ex, 421 | Run, 422 | _ 423 | 424 | # Include a hint for the correct naming format with invalid-name. 425 | include-naming-hint=no 426 | 427 | # Naming style matching correct inline iteration names. 428 | inlinevar-naming-style=any 429 | 430 | # Regular expression matching correct inline iteration names. Overrides 431 | # inlinevar-naming-style. 432 | #inlinevar-rgx= 433 | 434 | # Naming style matching correct method names. 435 | method-naming-style=snake_case 436 | 437 | # Regular expression matching correct method names. Overrides method-naming- 438 | # style. 439 | #method-rgx= 440 | 441 | # Naming style matching correct module names. 442 | module-naming-style=snake_case 443 | 444 | # Regular expression matching correct module names. Overrides module-naming- 445 | # style. 446 | #module-rgx= 447 | 448 | # Colon-delimited sets of names that determine each other's naming style when 449 | # the name regexes allow several styles. 450 | name-group= 451 | 452 | # Regular expression which should only match function or class names that do 453 | # not require a docstring. 454 | no-docstring-rgx=^_ 455 | 456 | # List of decorators that produce properties, such as abc.abstractproperty. Add 457 | # to this list to register other decorators that produce valid properties. 458 | # These decorators are taken in consideration only for invalid-name. 459 | property-classes=abc.abstractproperty 460 | 461 | # Naming style matching correct variable names. 462 | variable-naming-style=snake_case 463 | 464 | # Regular expression matching correct variable names. Overrides variable- 465 | # naming-style. 466 | #variable-rgx= 467 | 468 | 469 | [STRING] 470 | 471 | # This flag controls whether the implicit-str-concat-in-sequence should 472 | # generate a warning on implicit string concatenation in sequences defined over 473 | # several lines. 474 | check-str-concat-over-line-jumps=no 475 | 476 | 477 | [IMPORTS] 478 | 479 | # Allow wildcard imports from modules that define __all__. 480 | allow-wildcard-with-all=no 481 | 482 | # Analyse import fallback blocks. This can be used to support both Python 2 and 483 | # 3 compatible code, which means that the block might have code that exists 484 | # only in one or another interpreter, leading to false positives when analysed. 485 | analyse-fallback-blocks=no 486 | 487 | # Deprecated modules which should not be used, separated by a comma. 488 | deprecated-modules=optparse,tkinter.tix 489 | 490 | # Create a graph of external dependencies in the given file (report RP0402 must 491 | # not be disabled). 492 | ext-import-graph= 493 | 494 | # Create a graph of every (i.e. internal and external) dependencies in the 495 | # given file (report RP0402 must not be disabled). 496 | import-graph= 497 | 498 | # Create a graph of internal dependencies in the given file (report RP0402 must 499 | # not be disabled). 500 | int-import-graph= 501 | 502 | # Force import order to recognize a module as part of the standard 503 | # compatibility libraries. 504 | known-standard-library= 505 | 506 | # Force import order to recognize a module as part of a third party library. 507 | known-third-party=enchant 508 | 509 | 510 | [CLASSES] 511 | 512 | # List of method names used to declare (i.e. assign) instance attributes. 513 | defining-attr-methods=__init__, 514 | __new__, 515 | setUp 516 | 517 | # List of member names, which should be excluded from the protected access 518 | # warning. 519 | exclude-protected=_asdict, 520 | _fields, 521 | _replace, 522 | _source, 523 | _make 524 | 525 | # List of valid names for the first argument in a class method. 526 | valid-classmethod-first-arg=cls 527 | 528 | # List of valid names for the first argument in a metaclass class method. 529 | valid-metaclass-classmethod-first-arg=cls 530 | 531 | 532 | [DESIGN] 533 | 534 | # Maximum number of arguments for function / method. 535 | max-args=7 536 | 537 | # Maximum number of attributes for a class (see R0902). 538 | max-attributes=7 539 | 540 | # Maximum number of boolean expressions in an if statement. 541 | max-bool-expr=5 542 | 543 | # Maximum number of branch for function / method body. 544 | max-branches=12 545 | 546 | # Maximum number of locals for function / method body. 547 | max-locals=15 548 | 549 | # Maximum number of parents for a class (see R0901). 550 | max-parents=7 551 | 552 | # Maximum number of public methods for a class (see R0904). 553 | max-public-methods=20 554 | 555 | # Maximum number of return / yield for function / method body. 556 | max-returns=6 557 | 558 | # Maximum number of statements in function / method body. 559 | max-statements=50 560 | 561 | # Minimum number of public methods for a class (see R0903). 562 | min-public-methods=2 563 | 564 | 565 | [EXCEPTIONS] 566 | 567 | # Exceptions that will emit a warning when being caught. Defaults to 568 | # "BaseException, Exception". 569 | overgeneral-exceptions=BaseException, 570 | Exception 571 | --------------------------------------------------------------------------------