├── tests ├── __init__.py ├── hooks │ ├── __init__.py │ └── test_hightouch_hook.py └── operators │ ├── __init__.py │ └── test_hightouch_operator.py ├── airflow_provider_hightouch ├── hooks │ ├── __init__.py │ └── hightouch.py ├── operators │ ├── __init__.py │ └── hightouch.py ├── sensors │ ├── __init__.py │ └── hightouch.py ├── triggers │ ├── __init__.py │ └── hightouch.py ├── example_dags │ ├── __init__.py │ └── example_hightouch_trigger_sync.py ├── version.py ├── __init__.py ├── consts.py ├── types.py └── utils.py ├── .flake8 ├── Makefile ├── pyproject.toml ├── .pre-commit-config.yaml ├── setup.cfg ├── CHANGELOG.md ├── DEVELOP.md ├── .gitignore ├── .circleci └── config.yml ├── README.md └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/operators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/operators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/sensors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/triggers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/example_dags/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | extend-ignore = E203 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build install 2 | 3 | build: 4 | python3 -m build 5 | 6 | 7 | install: build 8 | pip3 install -e . 9 | 10 | clean: 11 | rm -rf airflow_provider_hightouch.egg-info 12 | rm -rf build 13 | rm -rf dist 14 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/version.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | __version__ = "4.0.0" 5 | 6 | 7 | def validate_version(): 8 | version = __version__ 9 | tag = os.getenv("CIRCLE_TAG") 10 | if tag != version: 11 | info = "Git tag: {0} does not match the version : {1}".format(tag, version) 12 | sys.exit(info) 13 | 14 | 15 | if __name__ == "__main__": 16 | validate_version() 17 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | 8 | [tool.isort] 9 | multi_line_output = 3 10 | include_trailing_comma = true 11 | force_grid_wrap = 0 12 | use_parentheses = true 13 | ensure_newline_before_comments = true 14 | line_length = 88 15 | 16 | [tool.black] 17 | line-length = 88 18 | target-version = ['py37'] 19 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__ 2 | 3 | 4 | def get_provider_info(): 5 | return { 6 | "package-name": "airflow-provider-hightouch", 7 | "name": "Hightouch Provider", 8 | "description": "Hightouch API hooks for Airflow ", 9 | "versions": __version__, 10 | "extra-links": ["airflow_provider_hightouch.operators.hightouch.HightouchLink"], 11 | } 12 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/psf/black 9 | rev: 20.8b1 10 | hooks: 11 | - id: black 12 | - repo: https://github.com/timothycrosley/isort 13 | rev: 5.6.4 14 | hooks: 15 | - id: isort 16 | name: Run isort to sort imports 17 | files: \.py$ 18 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/consts.py: -------------------------------------------------------------------------------- 1 | SUCCESS = "success" 2 | WARNING = "warning" 3 | CANCELLED = "cancelled" 4 | FAILED = "failed" 5 | PENDING = "pending" 6 | WARNING = "warning" 7 | SUCCESS = "success" 8 | QUERYING = "querying" 9 | PROCESSING = "processing" 10 | ABORTED = "aborted" 11 | QUEUED = "queued" 12 | INTERRUPTED = "interrupted" 13 | REPORTING = "reporting" 14 | 15 | TERMINAL_STATUSES = [CANCELLED, FAILED, SUCCESS, WARNING, INTERRUPTED] 16 | PENDING_STATUSES = [ 17 | QUEUED, 18 | QUERYING, 19 | PROCESSING, 20 | REPORTING, 21 | PENDING, 22 | ] 23 | 24 | # The new Hightouch API is confusingly called v1, where the old one is called v2. 25 | HIGHTOUCH_API_BASE_V3 = "api/v1/" 26 | HIGHTOUCH_API_BASE_V2 = "api/v2/rest/" 27 | HIGHTOUCH_API_BASE_V1 = "api/v2/rest/" 28 | 29 | DEFAULT_POLL_INTERVAL = 3 30 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = airflow-provider-hightouch 3 | version = attr: airflow_provider_hightouch.__version__ 4 | description = Hightouch Provider for Airflow 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown 7 | license = Apache License 2.0 8 | classifiers = 9 | Development Status :: 4 - Beta 10 | License :: OSI Approved :: Apache Software License 11 | Programming Language :: Python :: 3 12 | Operating System :: OS Independent 13 | author = Hightouch 14 | author_email = pedram@hightouch.io 15 | url = https://github.com/hightouchio/airflow-provider-hightouch 16 | project_urls = 17 | Bug Tracker = https://github.com/hightouchio/airflow-provider-hightouch/issues 18 | cmdclass= 19 | verify=version:validate_version 20 | 21 | [options] 22 | packages = find: 23 | python_requires = >=3.6 24 | install_requires = 25 | requests 26 | apache-airflow >= 1.10 27 | tests_requires = 28 | pytest >= 6.2.3 29 | requests_mock >= 1.15 30 | 31 | [options.entry_points] 32 | apache_airflow_provider= 33 | provider_info=airflow_provider_hightouch:get_provider_info 34 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/types.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from typing import Any, Dict, NamedTuple 3 | 4 | 5 | class HightouchOutput( 6 | NamedTuple( 7 | "_HightouchOutput", 8 | [ 9 | ("sync_details", Dict[str, Any]), 10 | ("sync_run_details", Dict[str, Any]), 11 | ], 12 | ) 13 | ): 14 | """ 15 | Contains recorded information about the state of a Hightouch sync after a sync completes. 16 | Attributes: 17 | sync_details (Dict[str, Any]): 18 | https://hightouch.io/docs/api-reference/#operation/GetSync 19 | sync_run_details (Dict[str, Any]): 20 | https://hightouch.io/docs/api-reference/#operation/ListSyncRuns 21 | destination_details (Dict[str, Any]): 22 | https://hightouch.io/docs/api-reference/#operation/GetDestination 23 | """ 24 | 25 | 26 | SyncRunParsedOutput = namedtuple( 27 | "_SyncRunParsedOutput", 28 | [ 29 | "id", 30 | "created_at", 31 | "started_at", 32 | "finished_at", 33 | "elapsed_seconds", 34 | "planned_add", 35 | "planned_change", 36 | "planned_remove", 37 | "successful_add", 38 | "successful_change", 39 | "successful_remove", 40 | "failed_add", 41 | "failed_change", 42 | "failed_remove", 43 | "query_size", 44 | "status", 45 | "completion_ratio", 46 | "error", 47 | ], 48 | ) 49 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/example_dags/example_hightouch_trigger_sync.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | from airflow import DAG 4 | from airflow.operators.latest_only import LatestOnlyOperator 5 | from airflow.utils.dates import days_ago 6 | 7 | from airflow_provider_hightouch.operators.hightouch import HightouchTriggerSyncOperator 8 | from airflow_provider_hightouch.sensors.hightouch import HightouchSyncRunSensor 9 | 10 | args = {"owner": "airflow"} 11 | 12 | with DAG( 13 | dag_id="example_hightouch_operator", 14 | default_args=args, 15 | schedule_interval="@daily", 16 | start_date=days_ago(1), 17 | dagrun_timeout=timedelta(minutes=5), 18 | ) as dag: 19 | 20 | latest_only = LatestOnlyOperator(task_id="latest_only", dag=dag) 21 | 22 | # This task runs async, and doesn't poll for status or fail on error 23 | run_async = HightouchTriggerSyncOperator(task_id="run_async", sync_id=4) 24 | 25 | # This tasks polls the API until the Hightouch Sync completes or errors. 26 | # Warnings are considered errors, but this can be turned off with the 27 | # specified flag 28 | run_sync = HightouchTriggerSyncOperator( 29 | task_id="run_sync", sync_id=5, synchronous=True, error_on_warning=True 30 | ) 31 | 32 | sync_sensor = HightouchSyncRunSensor( 33 | task_id="sync_sensor", 34 | sync_run_id="123456", 35 | sync_id="123") 36 | 37 | latest_only >> sync_sensor 38 | sync_sensor >> run_sync 39 | sync_sensor >> run_async -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 4.0.0 2 | 3 | - Introduces HightouchSyncRunSensor, which monitors the success or failure of a sync run 4 | - Adds a return value to HightouchTriggerSyncOperator 5 | 6 | ## 3.0.2 7 | 8 | ### Fixes: 9 | 10 | - Fixes issue with repeated warnings when using multiple HTTP connections 11 | 12 | ## 3.0.1 13 | 14 | ### Fixes: 15 | 16 | - Correctly log parsed Sync run output 17 | 18 | ## 3.0.0 19 | 20 | This is a new API with some breaking changes. Please read the changes carefully 21 | before upgrading! 22 | 23 | ### NEW: 24 | 25 | - Uses the new Hightouch API Endpoint. This endpoint is now idempotent and more 26 | reliable and flexible. 27 | 28 | - Can trigger sync via ID or via Slug 29 | 30 | - Logs information about the number of rows added, changed, and remove along 31 | with other sync run details 32 | 33 | ### BREAKING: 34 | 35 | - Syncs are now synchronous by default, use `synchronous=False` for async 36 | operations. 37 | 38 | ## 2.1.2 39 | 40 | - #9 Fixes a bug with a missing f in logging unhandled states, and a bug 41 | in an assertion test 42 | 43 | ## 2.1.1 44 | 45 | - Adds support for the queued status from the API 46 | 47 | ## 2.1.0 48 | 49 | - Fixes a bug where starting a sync when a sync is already in progress does not 50 | return the correct sync status 51 | 52 | ## 2.0.0 53 | 54 | - Update Airflow operator to use v2 API 55 | 56 | ## 1.0.0 57 | 58 | - Adds tests and improves provider functionality 59 | 60 | ## 0.1.0 61 | 62 | - Initial release of the provider 63 | -------------------------------------------------------------------------------- /DEVELOP.md: -------------------------------------------------------------------------------- 1 | ## Developing 2 | 3 | ### Prerequisities 4 | 5 | You will need Airflow installed. One way to do this is to use 6 | [pyenv](https://github.com/pyenv/pyenv) and 7 | [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv) to create a virtual environment. 8 | 9 | 10 | 11 | ``` 12 | mkdir -p ~/airflow 13 | cd ~/airflow 14 | pyenv virtualenv airflow 15 | pyenv local airflow 16 | pip install apache-airflow 17 | ``` 18 | 19 | Once installed, you can install this pacakge locally. 20 | 21 | ``` 22 | cd ~/projects 23 | git clone git@github.com:hightouchio/airflow-provider-hightouch.git 24 | cd airflow-provider-hightouch 25 | 26 | # Activate airflow venv 27 | pyenv local airflow 28 | pip install -e . 29 | ``` 30 | 31 | Next, spin up Airflow. Make sure to set AIRFLOW_HOME to the directory you used 32 | for your virtual environment, then copy the example dag over and start Airflow 33 | test server 34 | 35 | ``` 36 | cd ~/airflow 37 | export AIRFLOW_HOME=~/airflow 38 | 39 | # Don't load example dags by default 40 | export AIRFLOW__CORE__LOAD_EXAMPLES=false 41 | 42 | # Copy the example dag over 43 | mkdir -p ~/airflow/dags 44 | cp ~/projects/airflow-provider-hightouch/airflow_provider_hightouch/example_dags/example_hightouch_trigger_sync.py ~/airflow/dags 45 | 46 | # https://github.com/apache/airflow/issues/12808 47 | export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES 48 | airflow standalone 49 | ``` 50 | 51 | Once complete, you'll see the admin password in the console. Use that to login at http://localhost:8080 52 | and continue the setup using the instructions in README.md to set an API key, connection etc. 53 | 54 | ## Releasing 55 | 56 | Update the version in `airflow_provider_hightouch/version.py` 57 | Add details in CHANGELOG.md 58 | Once the changes have been merged to main, tag the release and the deploy will complete through CircleCI -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | .vscode 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Type 2 | 3 | from dateutil import parser 4 | 5 | from .types import SyncRunParsedOutput 6 | 7 | 8 | def parse_sync_run_details(sync_run_details) -> Type[SyncRunParsedOutput]: 9 | x = SyncRunParsedOutput 10 | 11 | x.created_at = None 12 | x.started_at = None 13 | x.finished_at = None 14 | x.id = sync_run_details.get("id") 15 | 16 | if sync_run_details.get("createdAt"): 17 | x.created_at = parser.parse(sync_run_details["createdAt"]) 18 | if sync_run_details.get("startedAt"): 19 | x.started_at = parser.parse(sync_run_details["startedAt"]) 20 | if sync_run_details.get("finishedAt"): 21 | x.finished_at = parser.parse(sync_run_details["finishedAt"]) 22 | 23 | if x.finished_at and x.started_at: 24 | x.elapsed_seconds = (x.finished_at - x.started_at).seconds 25 | 26 | x.planned_add = sync_run_details["plannedRows"].get("addedCount") 27 | x.planned_change = sync_run_details["plannedRows"].get("changedCount") 28 | x.planned_remove = sync_run_details["plannedRows"].get("removedCount") 29 | 30 | x.successful_add = sync_run_details["successfulRows"].get("addedCount") 31 | x.successful_change = sync_run_details["successfulRows"].get("changedCount") 32 | x.successful_remove = sync_run_details["successfulRows"].get("removedCount") 33 | 34 | x.failed_add = sync_run_details["failedRows"].get("addedCount") 35 | x.failed_change = sync_run_details["failedRows"].get("changedCount") 36 | x.failed_remove = sync_run_details["failedRows"].get("removedCount") 37 | 38 | x.query_size = sync_run_details.get("querySize") 39 | x.status = sync_run_details.get("status") 40 | x.completion_ratio = float(sync_run_details.get("completionRatio", 0)) 41 | x.error = sync_run_details.get("error") 42 | 43 | return x 44 | 45 | 46 | def generate_metadata_from_parsed_run(parsed_output: SyncRunParsedOutput): 47 | return { 48 | "elapsed_seconds": parsed_output.elapsed_seconds or 0, 49 | "planned_add": parsed_output.planned_add, 50 | "planned_change": parsed_output.planned_change, 51 | "planned_remove": parsed_output.planned_remove, 52 | "successful_add": parsed_output.successful_add, 53 | "successful_change": parsed_output.successful_change, 54 | "successful_remove": parsed_output.successful_remove, 55 | "failed_add": parsed_output.failed_add, 56 | "failed_change": parsed_output.failed_change, 57 | "failed_remove": parsed_output.failed_remove, 58 | "query_size": parsed_output.query_size, 59 | } 60 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | jobs: 3 | build_wheels: 4 | docker: 5 | - image: cimg/python:3.9.2 6 | steps: 7 | - checkout 8 | - run: 9 | command: | 10 | python3 -m pip install --upgrade build 11 | make build 12 | - persist_to_workspace: 13 | root: ~/project 14 | paths: 15 | - dist 16 | - build 17 | 18 | test_pypi_publish: 19 | docker: 20 | - image: cimg/python:3.9.2 21 | steps: 22 | - checkout # checkout source code to working directory 23 | - attach_workspace: 24 | at: ~/project/ 25 | - run: 26 | name: init .pypirc 27 | command: | 28 | echo -e "[testpypi]" >> ~/.pypirc 29 | echo -e "username = __token__" >> ~/.pypirc 30 | echo -e "password = $PYPY_TEST_TOKEN" >> ~/.pypirc 31 | - run: 32 | name: deploy to pypi-test 33 | command: | # create whl, install twine and publish to Test PyPI 34 | python3 -m pip install --upgrade build 35 | make build 36 | pip3 install twine 37 | twine upload --repository testpypi dist/* 38 | 39 | pypi_publish: 40 | docker: 41 | - image: cimg/python:3.9.2 42 | steps: 43 | - checkout # checkout source code to working directory 44 | - attach_workspace: 45 | at: ~/project/ 46 | - run: 47 | name: init .pypirc 48 | command: | 49 | echo -e "[pypi]" >> ~/.pypirc 50 | echo -e "username = __token__" >> ~/.pypirc 51 | echo -e "password = $PYPY_TOKEN" >> ~/.pypirc 52 | python3 airflow_provider_hightouch/version.py 53 | - run: 54 | command: | # create whl, install twine and publish to PyPI 55 | python3 -m pip install --upgrade build twine 56 | make build 57 | pip3 install twine 58 | twine upload dist/* 59 | workflows: 60 | build_test_publish: 61 | jobs: 62 | - build_wheels 63 | - test_pypi_publish: 64 | requires: 65 | - build_wheels 66 | filters: 67 | branches: 68 | only: 69 | - testpypi 70 | 71 | build_tag_publish: 72 | jobs: 73 | - build_wheels: 74 | filters: 75 | branches: 76 | ignore: /.*/ 77 | tags: 78 | only: /[0-9].+/ 79 | 80 | - pypi_publish: 81 | requires: 82 | - build_wheels 83 | filters: 84 | branches: 85 | ignore: /.*/ 86 | tags: 87 | only: /[0-9].+/ 88 | -------------------------------------------------------------------------------- /tests/hooks/test_hightouch_hook.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unittest module to test Hightouch Hook. 3 | 4 | Requires the unittest and requests-mock Python libraries. 5 | 6 | Run test: 7 | 8 | python3 -m unittest tests.hooks.test_hightouch_hook.TestHightouchHook 9 | 10 | """ 11 | 12 | import unittest 13 | from unittest import mock 14 | 15 | import pytest 16 | import requests_mock 17 | from airflow import AirflowException 18 | 19 | from airflow_provider_hightouch.hooks.hightouch import HightouchHook 20 | 21 | 22 | def sync_details_payload(): 23 | return { 24 | "id": "1", 25 | "slug": "testsync", 26 | "workspaceId": "1", 27 | "createdAt": "2022-02-16T21:37:58.510Z", 28 | "updatedAt": "2022-02-16T21:37:58.510Z", 29 | "destinationId": "1", 30 | "modelId": "1", 31 | "configuration": { 32 | "mode": "upsert", 33 | "object": "contacts", 34 | "mappings": [ 35 | {"to": "email", "from": "email", "type": "standard"}, 36 | {"to": "phone", "from": "phone", "type": "standard"}, 37 | ], 38 | "objectId": "0-1", 39 | "externalIdMapping": { 40 | "to": "firstname", 41 | "from": "test_id", 42 | "type": "standard", 43 | }, 44 | "associationMappings": [], 45 | }, 46 | "schedule": { 47 | "type": "interval", 48 | "schedule": {"interval": {"unit": "day", "quantity": 1}}, 49 | }, 50 | "disabled": False, 51 | "status": "success", 52 | "lastRunAt": "2022-02-16T21:37:58.510Z", 53 | "referencedColumns": ["email", "name"], 54 | "primaryKey": "id", 55 | } 56 | 57 | 58 | @mock.patch.dict( 59 | "os.environ", 60 | AIRFLOW_CONN_HIGHTOUCH_DEFAULT='{ "conn_type": "https", "host": "test.hightouch.io", "schema": "https"}', 61 | ) 62 | class TestHightouchHook(unittest.TestCase): 63 | @requests_mock.mock() 64 | def test_hightouch_get_sync_status(self, requests_mock): 65 | 66 | requests_mock.get( 67 | "https://test.hightouch.io/api/v1/syncs/1", 68 | json=sync_details_payload(), 69 | ) 70 | hook = HightouchHook(api_version="v3") 71 | response = hook.get_sync_details(1) 72 | assert response["status"] == "success" 73 | 74 | @requests_mock.mock() 75 | def test_hightouch_submit_sync_with_id(self, requests_mock): 76 | requests_mock.post( 77 | "https://test.hightouch.io/api/v1/syncs/trigger", json={"id": "123"} 78 | ) 79 | hook = HightouchHook() 80 | response = hook.start_sync(sync_id=100) 81 | assert response == "123" 82 | 83 | @requests_mock.mock() 84 | def test_hightouch_submit_sync_with_slug(self, requests_mock): 85 | requests_mock.post( 86 | "https://test.hightouch.io/api/v1/syncs/trigger", json={"id": "123"} 87 | ) 88 | hook = HightouchHook() 89 | response = hook.start_sync(sync_slug="boo") 90 | assert response == "123" 91 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/sensors/hightouch.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from airflow.models.baseoperator import BaseOperatorLink 4 | from airflow.sensors.base import BaseSensorOperator 5 | 6 | from airflow.exceptions import AirflowException 7 | from airflow.utils.decorators import apply_defaults 8 | 9 | from airflow_provider_hightouch.hooks.hightouch import HightouchHook 10 | from airflow_provider_hightouch.utils import parse_sync_run_details 11 | 12 | from airflow_provider_hightouch.consts import * 13 | 14 | 15 | class HightouchLink(BaseOperatorLink): 16 | name = "Hightouch" 17 | 18 | def get_link(self, operator, dttm): 19 | return "https://app.hightouch.io" 20 | 21 | 22 | class HightouchSyncRunSensor(BaseSensorOperator): 23 | """ 24 | This operator monitors a specific sync run in Hightouch via the 25 | Hightouch API. 26 | 27 | .. seealso:: 28 | For more information on how to use this operator, take a look at the guide: 29 | :ref:`https://hightouch.io/docs/integrations/airflow/` 30 | 31 | :param sync_run_id: ID of the sync run to monitor 32 | :param sync_id: ID of the sync that the sync run belongs to 33 | :type sync_id: int 34 | :param connection_id: Name of the connection to use, defaults to hightouch_default 35 | :type connection_id: str 36 | :param api_version: Hightouch API version. Only v3 is supported. 37 | :type api_version: str 38 | :param error_on_warning: Should sync warnings be treated as errors or ignored? 39 | :type error_on_warning: bool 40 | """ 41 | 42 | operator_extra_links = (HightouchLink(),) 43 | 44 | @apply_defaults 45 | def __init__( 46 | self, 47 | sync_run_id: str, 48 | sync_id: str, 49 | connection_id: str = "hightouch_default", 50 | api_version: str = "v3", 51 | error_on_warning: bool = False, 52 | **kwargs, 53 | ): 54 | super().__init__(**kwargs) 55 | self.hightouch_conn_id = connection_id 56 | self.api_version = api_version 57 | self.sync_run_id = sync_run_id 58 | self.sync_id = sync_id 59 | self.error_on_warning = error_on_warning 60 | 61 | def poke(self, context) -> bool: 62 | hook = HightouchHook( 63 | hightouch_conn_id=self.hightouch_conn_id, 64 | api_version=self.api_version, 65 | ) 66 | 67 | sync_run_details = hook.get_sync_run_details( 68 | self.sync_id, 69 | self.sync_run_id 70 | )[0] 71 | 72 | run = parse_sync_run_details( 73 | sync_run_details 74 | ) 75 | 76 | if run.status in TERMINAL_STATUSES: 77 | self.log.info(f"Sync request status: {run.status}.") 78 | if run.error: 79 | self.log.info("Sync Request Error: %s", run.error) 80 | 81 | if run.status == SUCCESS: 82 | return True 83 | if run.status == WARNING and not self.error_on_warning: 84 | return True 85 | raise AirflowException( 86 | f"Sync {self.sync_id} for request: {self.sync_request_id} failed with status: " 87 | f"{run.status} and error: {run.error}" 88 | ) 89 | 90 | return False 91 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apache Airflow Provider for Hightouch 2 | 3 | Provides an Airflow Operator and Hook for [Hightouch](https://hightouch.io). 4 | This allows the user to initiate a run for a sync from Airflow. 5 | 6 | ## Installation 7 | 8 | Pre-requisites: An environment running `apache-airflow` >= 1.10, including >= 2. 9 | 10 | ``` 11 | pip install airflow-provider-hightouch 12 | ``` 13 | 14 | ## Configuration 15 | 16 | In the Airflow Connections UI, create a new connection for Hightouch. 17 | 18 | - `Conn ID`: `hightouch_default` 19 | - `Conn Type`: `HTTP` 20 | - `Host`: `https://api.hightouch.com` 21 | - `Password`: enter the API key for your workspace. You can generate an API 22 | key from your [Workspace Settings](https://app.hightouch.io/settings) 23 | 24 | The Operator uses the `hightouch_default` connection id by default, but 25 | if needed, you can create additional Airflow Connections and reference them 26 | in the operator 27 | 28 | ## Modules 29 | 30 | ### [HightouchTriggerSyncOperator](./airflow_provider_hightouch/operators/hightouch.py) 31 | 32 | Starts a Hightouch Sync Run. Requires the `sync_id` or the `sync_slug` for the sync you wish to 33 | run. 34 | 35 | Returns the `sync_run_id` of the sync it triggers. 36 | 37 | The run is synchronous by default, and the task will be marked complete once the 38 | sync is successfully completed. 39 | 40 | However, you can request a asynchronous request instead by passing `synchronous=False` 41 | to the operator. 42 | 43 | If the API key is not authorized or if the request is invalid the task will fail. 44 | If a run is already in progress, a new run will be triggered following the 45 | completion of the existing run. 46 | 47 | ### [HightouchSyncRunSensor](./airflow_provider_hightouch/operators/hightouch.py) 48 | 49 | Monitors a Hightouch Sync Run. Requires the `sync_id` and the `sync_run_id` of the sync you wish to monitor. 50 | To obtain the `sync_run_id` of a sync triggered in Airflow, we recommend using XComs to pass the return value 51 | of `HightouchTriggerSyncOperator`. 52 | 53 | ## Examples 54 | 55 | Creating a run is as simple as importing the operator and providing it with 56 | a sync_id. An [example dag](./airflow_provider_hightouch/example_dags/example_hightouch_trigger_sync.py) 57 | is available as well. 58 | 59 | ``` 60 | from airflow_provider_hightouch.operators.hightouch import HightouchTriggerSyncOperator 61 | 62 | with DAG(....) as dag: 63 | ... 64 | my_task = HightouchTriggerSyncOperator(task_id="run_my_sync", sync_id="123") 65 | my_other_task = HightouchTriggerSyncOperator(task_id="run_my_sync", sync_slug="my-sync-slug") 66 | ``` 67 | 68 | ## Issues 69 | 70 | Please submit [issues](https://github.com/hightouchio/airflow-provider-hightouch/issues) and 71 | [pull requests](https://github.com/hightouchio/airflow-provider-hightouch/pulls) in our official repo: 72 | [https://github.com/hightouchio/airflow-provider-hightouch](https://github.com/hightouchio/airflow-provider-hightouch) 73 | 74 | We are happy to hear from you, for any feedback please email the authors at [pedram@hightouch.io](mailto:pedram@hightouch.io). 75 | 76 | ## Acknowledgements 77 | 78 | Special thanks to [Fivetran](https://github.com/fivetran/airflow-provider-fivetran) 79 | for their provider and [Marcos Marx](https://github.com/marcosmarxm/)'s Airbyte 80 | contribution in the core Airflow repo for doing this before we had to 81 | so we could generously learn from their hard work. 82 | -------------------------------------------------------------------------------- /tests/operators/test_hightouch_operator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unittest module to test Hightouch Operator. 3 | 4 | Requires the unittest and requests-mock Python libraries. 5 | 6 | Run test: 7 | 8 | python3 -m unittest tests.operators.test_hightouch_operator.TestHightouchOperator 9 | 10 | """ 11 | 12 | import unittest 13 | from unittest import mock 14 | 15 | import requests_mock 16 | 17 | from airflow_provider_hightouch.operators.hightouch import HightouchTriggerSyncOperator 18 | 19 | 20 | @mock.patch.dict( 21 | "os.environ", 22 | AIRFLOW_CONN_HIGHTOUCH_DEFAULT="http://https%3A%2F%2Ftest.hightouch.io%2F", 23 | ) 24 | class TestHightouchOperator(unittest.TestCase): 25 | @requests_mock.mock() 26 | def test_hightouch_operator(self, requests_mock): 27 | requests_mock.get( 28 | "https://test.hightouch.io/api/v1/syncs/1/runs", 29 | json={ 30 | "data": [ 31 | { 32 | "id": "42", 33 | "startedAt": "2022-02-08T16:11:04.712Z", 34 | "createdAt": "2022-02-08T16:11:04.712Z", 35 | "finishedAt": "2022-02-08T16:11:11.698Z", 36 | "querySize": 773, 37 | "status": "success", 38 | "completionRatio": 0.54, 39 | "plannedRows": { 40 | "addedCount": 773, 41 | "changedCount": 0, 42 | "removedCount": 0, 43 | }, 44 | "successfulRows": { 45 | "addedCount": 773, 46 | "changedCount": 0, 47 | "removedCount": 0, 48 | }, 49 | "failedRows": { 50 | "addedCount": 0, 51 | "changedCount": 0, 52 | "removedCount": 0, 53 | }, 54 | "error": None, 55 | }, 56 | { 57 | "id": "43", 58 | "startedAt": "2022-02-08T16:11:04.712Z", 59 | "createdAt": "2022-02-08T17:44:05.198Z", 60 | "finishedAt": "2022-02-08T17:44:25.366Z", 61 | "querySize": 773, 62 | "status": "success", 63 | "completionRatio": 0.54, 64 | "plannedRows": { 65 | "addedCount": 0, 66 | "changedCount": 765, 67 | "removedCount": 0, 68 | }, 69 | "successfulRows": { 70 | "addedCount": 0, 71 | "changedCount": 765, 72 | "removedCount": 0, 73 | }, 74 | "failedRows": { 75 | "addedCount": 0, 76 | "changedCount": 0, 77 | "removedCount": 0, 78 | }, 79 | "error": None, 80 | }, 81 | ] 82 | }, 83 | ) 84 | requests_mock.get( 85 | "https://test.hightouch.io/api/v1/syncs/1", 86 | json={ 87 | "id": "1", 88 | "slug": "testsync", 89 | "workspaceId": "1", 90 | "createdAt": "2022-02-16T21:37:58.510Z", 91 | "updatedAt": "2022-02-16T21:37:58.510Z", 92 | "destinationId": "1", 93 | "modelId": "1", 94 | "configuration": { 95 | "mode": "upsert", 96 | "object": "contacts", 97 | "mappings": [ 98 | {"to": "email", "from": "email", "type": "standard"}, 99 | {"to": "phone", "from": "phone", "type": "standard"}, 100 | ], 101 | "objectId": "0-1", 102 | "externalIdMapping": { 103 | "to": "firstname", 104 | "from": "test_id", 105 | "type": "standard", 106 | }, 107 | "associationMappings": [], 108 | }, 109 | "schedule": { 110 | "type": "interval", 111 | "schedule": {"interval": {"unit": "day", "quantity": 1}}, 112 | }, 113 | "disabled": False, 114 | "status": "success", 115 | "lastRunAt": "2022-02-16T21:37:58.510Z", 116 | "referencedColumns": ["email", "name"], 117 | "primaryKey": "id", 118 | }, 119 | ) 120 | requests_mock.post( 121 | "https://test.hightouch.io/api/v1/syncs/trigger", 122 | json={"id": "123"}, 123 | ) 124 | operator = HightouchTriggerSyncOperator(task_id="run", sync_id=1) 125 | 126 | operator.execute(context={}) 127 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/operators/hightouch.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from airflow.exceptions import AirflowException 4 | from airflow.models import BaseOperator, BaseOperatorLink 5 | from airflow.utils.decorators import apply_defaults 6 | from airflow.utils.context import Context 7 | 8 | from airflow_provider_hightouch.hooks.hightouch import HightouchHook 9 | from airflow_provider_hightouch.utils import parse_sync_run_details 10 | from airflow_provider_hightouch.triggers.hightouch import HightouchTrigger 11 | 12 | 13 | class HightouchLink(BaseOperatorLink): 14 | name = "Hightouch" 15 | 16 | def get_link(self, operator, dttm): 17 | return "https://app.hightouch.io" 18 | 19 | 20 | class HightouchTriggerSyncOperator(BaseOperator): 21 | """ 22 | This operator triggers a run for a specified Sync in Hightouch via the 23 | Hightouch API. 24 | 25 | :param sync_id: ID of the sync to trigger 26 | :type sync_id: int 27 | :param sync_slug: Slug of the sync to trigger 28 | :param workspace_id: Workspace Id to pass to Airflow logs to build URL 29 | :type workspace_id: str 30 | :param connection_id: Name of the connection to use, defaults to hightouch_default 31 | :type connection_id: str 32 | :param api_version: Hightouch API version. Only v3 is supported. 33 | :type api_version: str 34 | :param synchronous: Whether to wait for the sync to complete before completing the task 35 | :type synchronous: bool 36 | :param deferrable: Whether to defer the execution of the operator 37 | :type deferrable: bool 38 | :param error_on_warning: Should sync warnings be treated as errors or ignored? 39 | :type error_on_warning: bool 40 | :param wait_seconds: Time to wait in between subsequent polls to the API. 41 | :type wait_seconds: float 42 | :param timeout: Maximum time to wait for a sync to complete before aborting 43 | :type timeout: int 44 | """ 45 | 46 | operator_extra_links = (HightouchLink(),) 47 | 48 | @apply_defaults 49 | def __init__( 50 | self, 51 | sync_id: Optional[str] = None, 52 | sync_slug: Optional[str] = None, 53 | workspace_id: Optional[str] = "{Workspace Slug}", 54 | connection_id: str = "hightouch_default", 55 | api_version: str = "v3", 56 | synchronous: bool = True, 57 | deferrable: bool = False, 58 | error_on_warning: bool = False, 59 | wait_seconds: float = 3, 60 | timeout: int = 3600, 61 | **kwargs, 62 | ): 63 | super().__init__(**kwargs) 64 | self.hightouch_conn_id = connection_id 65 | self.api_version = api_version 66 | self.sync_id = sync_id 67 | self.sync_slug = sync_slug 68 | self.workspace_id = workspace_id 69 | self.error_on_warning = error_on_warning 70 | self.synchronous = synchronous 71 | self.deferrable = deferrable 72 | self.wait_seconds = wait_seconds 73 | self.timeout = timeout 74 | 75 | def execute(self, context: Context) -> str: 76 | """Start a Hightouch Sync Run""" 77 | hook = HightouchHook( 78 | hightouch_conn_id=self.hightouch_conn_id, 79 | api_version=self.api_version, 80 | ) 81 | 82 | if not self.sync_id and not self.sync_slug: 83 | raise AirflowException( 84 | "One of sync_id or sync_slug must be provided to trigger a sync" 85 | ) 86 | 87 | if self.synchronous: 88 | self.log.info("Start synchronous request to run a sync.") 89 | hightouch_output = hook.sync_and_poll( 90 | self.sync_id, 91 | self.sync_slug, 92 | fail_on_warning=self.error_on_warning, 93 | poll_interval=self.wait_seconds, 94 | poll_timeout=self.timeout, 95 | ) 96 | try: 97 | parsed_result = parse_sync_run_details( 98 | hightouch_output.sync_run_details 99 | ) 100 | self.log.info("Sync completed successfully") 101 | self.log.info(dict(parsed_result)) 102 | return parsed_result.id 103 | except Exception: 104 | self.log.warning("Sync ran successfully but failed to parse output.") 105 | self.log.warning(hightouch_output) 106 | 107 | else: 108 | 109 | self.log.info("Start async request to run a sync.") 110 | request_id = hook.start_sync(self.sync_id, self.sync_slug) 111 | 112 | if self.deferrable: 113 | 114 | if not self.sync_id: 115 | self.sync_id = hook.get_sync_from_slug(sync_slug=self.sync_slug) 116 | 117 | self.sync_run_url = ( 118 | f"https://app.hightouch.com/{self.workspace_id}/" 119 | f"syncs/{self.sync_id}/runs/{request_id}" 120 | ) 121 | 122 | self.log.info( 123 | f"Started sync {self.sync_run_url} Deferring execution..." 124 | ) 125 | 126 | self.defer( 127 | trigger=HightouchTrigger( 128 | sync_run_url=self.sync_run_url, 129 | sync_id=self.sync_id, 130 | sync_request_id=request_id, 131 | sync_slug=self.sync_slug, 132 | connection_id=self.hightouch_conn_id, 133 | timeout=self.timeout, 134 | end_from_trigger=True, 135 | poll_interval=self.wait_seconds, 136 | error_on_warning=self.error_on_warning, 137 | ), 138 | method_name=None, 139 | ) 140 | else: 141 | sync = self.sync_id or self.sync_slug 142 | self.log.info( 143 | f"Successfully created request {request_id} to start sync: {sync}" 144 | ) 145 | return request_id 146 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/triggers/hightouch.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from typing import Any, AsyncIterator, Dict, Optional, Tuple 3 | 4 | from airflow.triggers.base import ( 5 | BaseTrigger, 6 | TriggerEvent, 7 | TaskSuccessEvent, 8 | TaskFailedEvent, 9 | ) 10 | from airflow_provider_hightouch.hooks.hightouch import HightouchAsyncHook 11 | from airflow_provider_hightouch.consts import ( 12 | PENDING_STATUSES, 13 | SUCCESS, 14 | WARNING, 15 | ) 16 | 17 | 18 | class HightouchTrigger(BaseTrigger): 19 | """ 20 | Trigger to monitor a Hightouch sync run asynchronously. 21 | 22 | This trigger checks the status of a Hightouch sync run at regular intervals 23 | until it completes, fails, or times out. It uses the Hightouch API to retrieve 24 | the sync status and yields events based on the sync's progress. 25 | 26 | Args: 27 | sync_run_url (str): Constructed sync_run_url. 28 | sync_id (str): The ID of the Hightouch sync. 29 | sync_request_id (str): The request ID of the sync run to monitor. 30 | sync_slug (str): The slug of the Hightouch sync. 31 | connection_id (str): The Airflow connection ID for Hightouch API access. 32 | timeout (float): The maximum time (in seconds) to wait before timing out. 33 | poll_interval (float): The time (in seconds) to wait between status checks. 34 | """ 35 | 36 | def __init__( 37 | self, 38 | sync_run_url: Optional[str], 39 | sync_id: Optional[str], 40 | sync_request_id: str, 41 | sync_slug: Optional[str], 42 | connection_id: str, 43 | timeout: float, 44 | end_from_trigger: bool = True, 45 | poll_interval: float = 4.0, 46 | error_on_warning: bool = False, 47 | ) -> None: 48 | """ 49 | Initializes the HightouchTrigger with the provided parameters. 50 | 51 | Args: 52 | sync_run_url (str): Constructed sync_run_url. 53 | sync_id (str): The ID of the Hightouch sync. 54 | sync_request_id (str): The request ID of the sync run to monitor. 55 | sync_slug (str): The slug of the Hightouch sync. 56 | connection_id (str): The Airflow connection ID for Hightouch API access. 57 | timeout (float): The maximum time (in seconds) to wait before timing out. 58 | end_from_trigger (bool): Allows for task to complete from the trigger. Default is true. 59 | poll_interval (float): The time (in seconds) to wait between status checks. 60 | error_on_warning (bool): Whether or not to error when the sync status is Warning 61 | """ 62 | super().__init__() 63 | self.sync_run_url = sync_run_url 64 | self.sync_id = sync_id 65 | self.sync_request_id = sync_request_id 66 | self.sync_slug = sync_slug 67 | self.connection_id = connection_id 68 | self.timeout = timeout 69 | self.end_from_trigger = end_from_trigger 70 | self.poll_interval = poll_interval 71 | self.error_on_warning = error_on_warning 72 | self.hook = HightouchAsyncHook(hightouch_conn_id=self.connection_id) 73 | 74 | def serialize(self) -> Tuple[str, Dict[str, Any]]: 75 | """ 76 | Serialize the trigger state for storage. 77 | 78 | Returns: 79 | Tuple[str, Dict[str, Any]]: A tuple containing the fully qualified class name 80 | and a dictionary of the trigger's parameters for state restoration. 81 | """ 82 | return ( 83 | f"{self.__module__}.{self.__class__.__name__}", 84 | { 85 | "sync_run_url": self.sync_run_url, 86 | "sync_id": self.sync_id, 87 | "sync_request_id": self.sync_request_id, 88 | "sync_slug": self.sync_slug, 89 | "connection_id": self.connection_id, 90 | "timeout": self.timeout, 91 | "end_from_trigger": self.end_from_trigger, 92 | "poll_interval": self.poll_interval, 93 | "error_on_warning": self.error_on_warning, 94 | }, 95 | ) 96 | 97 | async def run(self) -> AsyncIterator[TriggerEvent]: 98 | """ 99 | Periodically checks the sync status until completion or timeout. 100 | 101 | This method uses the Hightouch API to check the status of a sync run at 102 | regular intervals defined by the poll_interval. It yields TriggerEvents 103 | based on the current status of the sync. 104 | 105 | Yields: 106 | AsyncIterator[TriggerEvent]: Events indicating the status of the sync run, 107 | which can be "success", "failed", "timeout", or the current status during polling. 108 | """ 109 | start_time = asyncio.get_event_loop().time() 110 | 111 | while True: 112 | 113 | # Check for timeout 114 | if asyncio.get_event_loop().time() - start_time > self.timeout: 115 | self.log.error( 116 | f"{self.sync_run_url} exceeded DAG timeout of {self.timeout} seconds." 117 | ) 118 | yield TaskFailedEvent() 119 | return 120 | 121 | try: 122 | # Fetch the current sync status 123 | response = await self.hook.get_sync_run_details( 124 | self.sync_id, self.sync_request_id 125 | ) 126 | 127 | status = response[0].get("status") 128 | 129 | # Handle different sync statuses 130 | if ( 131 | status == SUCCESS 132 | or (status == WARNING and not self.error_on_warning) 133 | ): 134 | self.log.info(f"{self.sync_run_url} finished with status {status}!") 135 | yield TaskSuccessEvent( 136 | xcoms={ 137 | "sync_id": self.sync_id, 138 | "sync_run_id": self.sync_request_id 139 | } 140 | ) 141 | return 142 | 143 | elif status in PENDING_STATUSES: 144 | self.log.info( 145 | f"Sync is {status}... Sleeping for {self.poll_interval} seconds." 146 | ) 147 | await asyncio.sleep(self.poll_interval) 148 | 149 | # Will capture terminal statuses along with new statuses 150 | else: 151 | self.log.error( 152 | f"{self.sync_run_url} finished with status {status}!\n" 153 | f"Sync Error: {response[0]['error']}" 154 | ) 155 | yield TaskFailedEvent() 156 | return 157 | 158 | except Exception as e: 159 | self.log.error("Error while checking sync status: %s", str(e)) 160 | yield TaskFailedEvent() 161 | return 162 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /airflow_provider_hightouch/hooks/hightouch.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import aiohttp 3 | import asyncio 4 | import time 5 | from typing import Any, Dict, List, Optional 6 | from urllib.parse import urljoin 7 | 8 | from airflow.exceptions import AirflowException 9 | 10 | from airflow_provider_hightouch.consts import ( 11 | DEFAULT_POLL_INTERVAL, 12 | HIGHTOUCH_API_BASE_V3, 13 | PENDING_STATUSES, 14 | SUCCESS, 15 | TERMINAL_STATUSES, 16 | WARNING, 17 | ) 18 | from airflow_provider_hightouch.types import HightouchOutput 19 | 20 | from airflow.providers.http.hooks.http import HttpAsyncHook 21 | 22 | try: 23 | from airflow.providers.http.hooks.http import HttpHook 24 | except ImportError: 25 | from airflow.hooks.http_hook import HttpHook 26 | 27 | from airflow_provider_hightouch import __version__, utils 28 | 29 | 30 | class HightouchHook(HttpHook): 31 | """ 32 | Hook for Hightouch API 33 | 34 | Args: 35 | hightouch_conn_id (str): The name of the Airflow connection 36 | with connection information for the Hightouch API 37 | api_version: (optional(str)). Hightouch API version. 38 | """ 39 | 40 | def __init__( 41 | self, 42 | hightouch_conn_id: str = "hightouch_default", 43 | api_version: str = "v3", 44 | request_max_retries: int = 3, 45 | request_retry_delay: float = 0.5, 46 | ): 47 | self.hightouch_conn_id = hightouch_conn_id 48 | self.api_version = api_version 49 | self._request_max_retries = request_max_retries 50 | self.token = self.get_connection(self.hightouch_conn_id).password 51 | self._request_retry_delay = request_retry_delay 52 | if self.api_version not in ("v1", "v3"): 53 | raise AirflowException( 54 | "This version of the Hightouch Operator only supports the v1/v3 API." 55 | ) 56 | super().__init__(http_conn_id=hightouch_conn_id) 57 | 58 | @property 59 | def api_base_url(self) -> str: 60 | """Returns the correct API BASE URL depending on the API version.""" 61 | return HIGHTOUCH_API_BASE_V3 62 | 63 | def make_request( 64 | self, 65 | method: str, 66 | endpoint: str, 67 | data: Optional[Dict[str, Any]] = None, 68 | ): 69 | """Creates and sends a request to the desired Hightouch API endpoint 70 | Args: 71 | method (str): The http method use for this request (e.g. "GET", "POST"). 72 | endpoint (str): The Hightouch API endpoint to send this request to. 73 | params (Optional(dict): Query parameters to pass to the API endpoint 74 | body (Optional(dict): Body parameters to pass to the API endpoint 75 | Returns: 76 | Dict[str, Any]: Parsed json data from the response to this request 77 | """ 78 | 79 | user_agent = "AirflowHightouchOperator/" + __version__ 80 | headers = { 81 | "accept": "application/json", 82 | "Authorization": f"Bearer {self.token}", 83 | "User-Agent": user_agent, 84 | } 85 | 86 | num_retries = 0 87 | while True: 88 | try: 89 | self.method = method 90 | response = self.run( 91 | endpoint=urljoin(self.api_base_url, endpoint), 92 | data=data, 93 | headers=headers, 94 | ) 95 | resp_dict = response.json() 96 | return resp_dict["data"] if "data" in resp_dict else resp_dict 97 | except AirflowException as e: 98 | self.log.error("Request to Hightouch API failed: %s", e) 99 | if num_retries == self._request_max_retries: 100 | break 101 | num_retries += 1 102 | time.sleep(self._request_retry_delay) 103 | 104 | raise AirflowException("Exceeded max number of retries.") 105 | 106 | def get_sync_run_details( 107 | self, sync_id: str, sync_request_id: str 108 | ) -> List[Dict[str, Any]]: 109 | """Get details about a given sync run from the Hightouch API. 110 | Args: 111 | sync_id (str): The Hightouch Sync ID. 112 | sync_request_id (str): The Hightouch Sync Request ID. 113 | Returns: 114 | Dict[str, Any]: Parsed json data from the response 115 | """ 116 | params = {"runId": sync_request_id} 117 | return self.make_request( 118 | method="GET", endpoint=f"syncs/{sync_id}/runs", data=params 119 | ) 120 | 121 | def get_sync_details(self, sync_id: str) -> Dict[str, Any]: 122 | """Get details about a given sync from the Hightouch API. 123 | Args: 124 | sync_id (str): The Hightouch Sync ID. 125 | Returns: 126 | Dict[str, Any]: Parsed json data from the response 127 | """ 128 | return self.make_request(method="GET", endpoint=f"syncs/{sync_id}") 129 | 130 | def get_sync_from_slug(self, sync_slug: str) -> str: 131 | """Get details about a given sync from the Hightouch API. 132 | Args: 133 | sync_id (str): The Hightouch Sync ID. 134 | Returns: 135 | Dict[str, Any]: Parsed json data from the response 136 | """ 137 | return self.make_request( 138 | method="GET", endpoint="syncs", data={"slug": sync_slug} 139 | )[0]["id"] 140 | 141 | def start_sync( 142 | self, sync_id: Optional[str] = None, sync_slug: Optional[str] = None 143 | ) -> str: 144 | """Trigger a sync and initiate a sync run 145 | Args: 146 | sync_id (str): The Hightouch Sync ID. 147 | Returns: 148 | str: The sync request ID created by the Hightouch API. 149 | """ 150 | if sync_id: 151 | return self.make_request( 152 | method="POST", endpoint="syncs/trigger", data={"syncId": sync_id} 153 | )["id"] 154 | if sync_slug: 155 | return self.make_request( 156 | method="POST", endpoint="syncs/trigger", data={"syncSlug": sync_slug} 157 | )["id"] 158 | raise AirflowException( 159 | "One of sync_id or sync_slug must be provided to trigger a sync." 160 | ) 161 | 162 | def poll_sync( 163 | self, 164 | sync_id: str, 165 | sync_request_id: str, 166 | fail_on_warning: bool = False, 167 | poll_interval: float = DEFAULT_POLL_INTERVAL, 168 | poll_timeout: Optional[float] = None, 169 | ) -> HightouchOutput: 170 | """Poll for the completion of a sync 171 | Args: 172 | sync_id (str): The Hightouch Sync ID 173 | sync_request_id (str): The Hightouch Sync Request ID to poll against. 174 | fail_on_warning (bool): Whether a warning is considered a failure for this sync. 175 | poll_interval (float): The time in seconds that will be waited between succcessive polls 176 | poll_timeout (float): The maximum time that will be waited before this operation 177 | times out. 178 | Returns: 179 | Dict[str, Any]: Parsed json output from the API 180 | """ 181 | poll_start = datetime.datetime.now() 182 | while True: 183 | sync_run_details = self.get_sync_run_details(sync_id, sync_request_id)[0] 184 | 185 | self.log.debug(sync_run_details) 186 | run = utils.parse_sync_run_details(sync_run_details) 187 | self.log.info( 188 | f"Polling Hightouch Sync {sync_id}. Current status: {run.status}. " 189 | f"{100 * run.completion_ratio}% completed." 190 | ) 191 | 192 | if run.status in TERMINAL_STATUSES: 193 | self.log.info(f"Sync request status: {run.status}. Polling complete") 194 | if run.error: 195 | self.log.info("Sync Request Error: %s", run.error) 196 | 197 | if run.status == SUCCESS: 198 | break 199 | if run.status == WARNING and not fail_on_warning: 200 | break 201 | raise AirflowException( 202 | f"Sync {sync_id} for request: {sync_request_id} failed with status: " 203 | f"{run.status} and error: {run.error}" 204 | ) 205 | if run.status not in PENDING_STATUSES: 206 | self.log.warning( 207 | "Unexpected status: %s returned for sync %s and request %s. Will try " 208 | "again, but if you see this error, please let someone at Hightouch know.", 209 | run.status, 210 | sync_id, 211 | sync_request_id, 212 | ) 213 | if ( 214 | poll_timeout 215 | and datetime.datetime.now() 216 | > poll_start + datetime.timedelta(seconds=poll_timeout) 217 | ): 218 | raise AirflowException( 219 | f"Sync {sync_id} for request: {sync_request_id}' time out after " 220 | f"{datetime.datetime.now() - poll_start}. Last status was {run.status}." 221 | ) 222 | 223 | time.sleep(poll_interval) 224 | sync_details = self.get_sync_details(sync_id) 225 | 226 | return HightouchOutput(sync_details, sync_run_details) 227 | 228 | def sync_and_poll( 229 | self, 230 | sync_id: Optional[str] = None, 231 | sync_slug: Optional[str] = None, 232 | fail_on_warning: bool = False, 233 | poll_interval: float = DEFAULT_POLL_INTERVAL, 234 | poll_timeout: Optional[float] = None, 235 | ) -> HightouchOutput: 236 | """ 237 | Initialize a sync run for the given sync id, and polls until it completes 238 | Args: 239 | sync_id (str): The Hightouch Sync ID 240 | sync_request_id (str): The Hightouch Sync Request ID to poll against. 241 | fail_on_warning (bool): Whether a warning is considered a failure for this sync. 242 | poll_interval (float): The time in seconds that will be waited between succcessive polls 243 | poll_timeout (float): The maximum time that will be waited before this operation 244 | times out. 245 | Returns: 246 | :py:class:`~HightouchOutput`: 247 | Object containing details about the Hightouch sync run 248 | """ 249 | sync_request_id = self.start_sync(sync_id, sync_slug) 250 | 251 | if not sync_id: 252 | assert sync_slug 253 | sync_id = sync_id or self.get_sync_from_slug(sync_slug=sync_slug) 254 | 255 | assert sync_id or sync_slug 256 | ht_output = self.poll_sync( 257 | sync_id, 258 | sync_request_id, 259 | fail_on_warning=fail_on_warning, 260 | poll_interval=poll_interval, 261 | poll_timeout=poll_timeout, 262 | ) 263 | 264 | return ht_output 265 | 266 | 267 | class HightouchAsyncHook(HttpAsyncHook): 268 | """ 269 | Extending the Hightouch hook for asynchronous functionality 270 | 271 | Args: 272 | hightouch_conn_id (str): The name of the Airflow connection 273 | with connection information for the Hightouch API 274 | api_version: (optional(str)). Hightouch API version. 275 | """ 276 | 277 | def __init__( 278 | self, 279 | hightouch_conn_id: str = "hightouch_default", 280 | api_version: str = "v3", 281 | request_max_retries: int = 3, 282 | request_retry_delay: float = 0.5, 283 | ): 284 | self.hightouch_conn_id = hightouch_conn_id 285 | self.api_version = api_version 286 | self._request_max_retries = request_max_retries 287 | self.token = self.get_connection(self.hightouch_conn_id).password 288 | self._request_retry_delay = request_retry_delay 289 | if self.api_version not in ("v1", "v3"): 290 | raise AirflowException( 291 | "This version of the Hightouch Operator only supports the v1/v3 API." 292 | ) 293 | super().__init__(http_conn_id=hightouch_conn_id) 294 | 295 | @property 296 | def api_base_url(self) -> str: 297 | """Returns the correct API BASE URL depending on the API version.""" 298 | return HIGHTOUCH_API_BASE_V3 299 | 300 | async def get_sync_run_details( 301 | self, sync_id: str, sync_request_id: str 302 | ) -> List[Dict[str, Any]]: 303 | """Get details about a given sync run from the Hightouch API. 304 | Args: 305 | sync_id (str): The Hightouch Sync ID. 306 | sync_request_id (str): The Hightouch Sync Request ID. 307 | Returns: 308 | Dict[str, Any]: Parsed json data from the response 309 | """ 310 | params = {"runId": sync_request_id} 311 | return await self.make_request( 312 | method="GET", endpoint=f"syncs/{sync_id}/runs", data=params 313 | ) 314 | 315 | async def make_request( 316 | self, 317 | method: str, 318 | endpoint: str, 319 | data: Optional[Dict[str, Any]] = None, 320 | ): 321 | """Creates and sends a request to the desired Hightouch API endpoint 322 | Args: 323 | method (str): The http method use for this request (e.g. "GET", "POST"). 324 | endpoint (str): The Hightouch API endpoint to send this request to. 325 | params (Optional(dict): Query parameters to pass to the API endpoint 326 | body (Optional(dict): Body parameters to pass to the API endpoint 327 | Returns: 328 | Dict[str, Any]: Parsed json data from the response to this request 329 | """ 330 | 331 | user_agent = "AirflowHightouchOperator/" + __version__ 332 | headers = { 333 | "accept": "application/json", 334 | "Authorization": f"Bearer {self.token}", 335 | "User-Agent": user_agent, 336 | } 337 | 338 | num_retries = 0 339 | while True: 340 | try: 341 | self.method = method 342 | async with aiohttp.ClientSession() as session: 343 | response = await self.run( 344 | session=session, 345 | endpoint=urljoin(self.api_base_url, endpoint), 346 | data=data, 347 | headers=headers, 348 | ) 349 | resp_dict = await response.json() 350 | return resp_dict["data"] if "data" in resp_dict else resp_dict 351 | except AirflowException as e: 352 | self.log.error("Request to Hightouch API failed: %s", e) 353 | if num_retries == self._request_max_retries: 354 | break 355 | num_retries += 1 356 | await asyncio.sleep(self._request_retry_delay) 357 | 358 | raise AirflowException("Exceeded max number of retries.") 359 | --------------------------------------------------------------------------------