├── .circleci └── config.yml ├── .github └── pull_request_template.md ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── bin ├── run-a-test.sh └── run-all-tests.sh ├── config.sample.json ├── setup.cfg ├── setup.py ├── tap_hubspot ├── __init__.py ├── schemas │ ├── campaigns.json │ ├── companies.json │ ├── contact_lists.json │ ├── contacts.json │ ├── contacts_by_company.json │ ├── deal_pipelines.json │ ├── deals.json │ ├── email_events.json │ ├── engagements.json │ ├── forms.json │ ├── owners.json │ ├── shared │ │ ├── associations_schema.json │ │ └── custom_objects.json │ ├── subscription_changes.json │ ├── tickets.json │ ├── versions.json │ └── workflows.json └── tests │ ├── __init__.py │ ├── test_bookmarks.py │ ├── test_deals.py │ ├── test_get_streams_to_sync.py │ ├── test_offsets.py │ ├── unittests │ ├── test_custom_objects.py │ ├── test_get_start.py │ ├── test_request_timeout.py │ └── test_tickets.py │ └── utils.py └── tests ├── base.py ├── base_hubspot.py ├── client.py ├── client_tester.py ├── test_hubspot_all_fields.py ├── test_hubspot_automatic_fields.py ├── test_hubspot_bookmarks.py ├── test_hubspot_bookmarks_static.py ├── test_hubspot_child_stream_only.py ├── test_hubspot_discovery.py ├── test_hubspot_interrupted_sync.py ├── test_hubspot_interrupted_sync_offset.py ├── test_hubspot_newfw_all_fields.py ├── test_hubspot_pagination.py ├── test_hubspot_start_date.py └── unittests ├── test_deals.py └── test_deselect_unselected_fields.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | orbs: 3 | slack: circleci/slack@3.4.2 4 | 5 | executors: 6 | tap_tester: 7 | docker: 8 | - image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/circle-ci:stitch-tap-tester 9 | jobs: 10 | ensure_env: 11 | executor: tap_tester 12 | steps: 13 | - checkout 14 | - run: 15 | name: 'Setup virtual env' 16 | command: | 17 | aws s3 cp s3://com-stitchdata-dev-deployment-assets/environments/tap-tester/tap_tester_sandbox /usr/local/share/virtualenvs/dev_env.sh 18 | python3 -m venv /usr/local/share/virtualenvs/tap-hubspot/ 19 | source /usr/local/share/virtualenvs/tap-hubspot/bin/activate 20 | pip install -U 'pip<19.2' 'setuptools<51.0.0' 21 | pip install .[dev] 22 | - persist_to_workspace: 23 | root: /usr/local/share/virtualenvs/ 24 | paths: 25 | - tap-hubspot 26 | - dev_env.sh 27 | build: 28 | executor: tap_tester 29 | steps: 30 | - run: echo "Tests have passed." 31 | run_pylint_and_unittests: 32 | executor: tap_tester 33 | steps: 34 | - checkout 35 | - attach_workspace: 36 | at: /usr/local/share/virtualenvs 37 | - run: 38 | name: 'pylint' 39 | command: | 40 | source /usr/local/share/virtualenvs/dev_env.sh 41 | source /usr/local/share/virtualenvs/tap-hubspot/bin/activate 42 | # BUG https://jira.talendforge.org/browse/TDL-15447 43 | echo "Disabling the following $PYLINT_DISABLE_LIST" 44 | pylint tap_hubspot -d "$PYLINT_DISABLE_LIST" 45 | - run: 46 | name: 'Unit Tests' 47 | command: | 48 | source /usr/local/share/virtualenvs/tap-hubspot/bin/activate 49 | nosetests tap_hubspot/tests 50 | pip install coverage 51 | nosetests --with-coverage --cover-erase --cover-package=tap_hubspot --cover-html-dir=htmlcov tap_hubspot/tests/unittests 52 | coverage html 53 | - store_test_results: 54 | path: test_output/report.xml 55 | - store_artifacts: 56 | path: htmlcov 57 | - run: 58 | name: 'JSON Validator' 59 | command: | 60 | source /usr/local/share/virtualenvs/tap-tester/bin/activate 61 | stitch-validate-json tap_hubspot/schemas/*.json 62 | integration_test: 63 | parameters: 64 | test_command: 65 | type: string 66 | executor: tap_tester 67 | steps: 68 | - checkout 69 | - attach_workspace: 70 | at: /usr/local/share/virtualenvs 71 | - run: 72 | name: 'Integration Tests' 73 | command: | 74 | source /usr/local/share/virtualenvs/dev_env.sh 75 | mkdir /tmp/${CIRCLE_PROJECT_REPONAME} 76 | export STITCH_CONFIG_DIR=/tmp/${CIRCLE_PROJECT_REPONAME} 77 | source /usr/local/share/virtualenvs/tap-tester/bin/activate 78 | << parameters.test_command >> 79 | - slack/notify-on-failure: 80 | only_for_branches: master 81 | - store_artifacts: 82 | path: /tmp/tap-hubspot 83 | 84 | workflows: 85 | version: 2 86 | commit: &commit_jobs 87 | jobs: 88 | - ensure_env: 89 | context: 90 | - circleci-user 91 | - tier-1-tap-user 92 | - run_pylint_and_unittests: 93 | context: 94 | - circleci-user 95 | - tier-1-tap-user 96 | requires: 97 | - ensure_env 98 | - integration_test: 99 | name: "Discovery Test" 100 | context: 101 | - circleci-user 102 | - tier-1-tap-user 103 | test_command: |- 104 | run-test --tap=${CIRCLE_PROJECT_REPONAME} tests/test_hubspot_discovery.py 105 | requires: 106 | - ensure_env 107 | - integration_test: 108 | name: "Automatic Fields Test" 109 | context: 110 | - circleci-user 111 | - tier-1-tap-user 112 | test_command: |- 113 | run-test --tap=${CIRCLE_PROJECT_REPONAME} tests/test_hubspot_automatic_fields.py 114 | requires: 115 | - ensure_env 116 | - integration_test: 117 | name: "Child Streams Test" 118 | context: 119 | - circleci-user 120 | - tier-1-tap-user 121 | test_command: |- 122 | run-test --tap=${CIRCLE_PROJECT_REPONAME} tests/test_hubspot_child_stream_only.py 123 | requires: 124 | - ensure_env 125 | - integration_test: 126 | name: "Start Date Test" 127 | context: 128 | - circleci-user 129 | - tier-1-tap-user 130 | test_command: |- 131 | run-test --tap=${CIRCLE_PROJECT_REPONAME} tests/test_hubspot_start_date.py 132 | requires: 133 | - ensure_env 134 | - integration_test: 135 | name: "All Fields Test" 136 | context: 137 | - circleci-user 138 | - tier-1-tap-user 139 | test_command: |- 140 | run-test --tap=${CIRCLE_PROJECT_REPONAME} tests/test_hubspot_all_fields.py 141 | requires: 142 | - "Child Streams Test" 143 | - "Start Date Test" 144 | - "Automatic Fields Test" 145 | - "Discovery Test" 146 | - integration_test: 147 | name: "Bookmarks Test CRUD" 148 | context: 149 | - circleci-user 150 | - tier-1-tap-user 151 | test_command: |- 152 | run-test --tap=${CIRCLE_PROJECT_REPONAME} tests/test_hubspot_bookmarks.py 153 | requires: 154 | - "All Fields Test" 155 | - integration_test: 156 | name: "Bookmarks Test Static Data" 157 | context: 158 | - circleci-user 159 | - tier-1-tap-user 160 | test_command: |- 161 | run-test --tap=${CIRCLE_PROJECT_REPONAME} tests/test_hubspot_bookmarks_static.py 162 | requires: 163 | - "All Fields Test" 164 | - integration_test: 165 | name: "Interrupted State Test" 166 | context: 167 | - circleci-user 168 | - tier-1-tap-user 169 | test_command: |- 170 | run-test --tap=${CIRCLE_PROJECT_REPONAME} tests/test_hubspot_interrupted_sync.py 171 | requires: 172 | - "Bookmarks Test Static Data" 173 | - "Bookmarks Test CRUD" 174 | - integration_test: 175 | name: "Interrupted State with Offset Test" 176 | context: 177 | - circleci-user 178 | - tier-1-tap-user 179 | test_command: |- 180 | run-test --tap=${CIRCLE_PROJECT_REPONAME} tests/test_hubspot_interrupted_sync_offset.py 181 | requires: 182 | - "Bookmarks Test Static Data" 183 | - "Bookmarks Test CRUD" 184 | - integration_test: 185 | name: "Pagination Test" 186 | context: 187 | - circleci-user 188 | - tier-1-tap-user 189 | test_command: |- 190 | run-test --tap=${CIRCLE_PROJECT_REPONAME} tests/test_hubspot_pagination.py 191 | requires: 192 | - "Interrupted State with Offset Test" 193 | - "Interrupted State Test" 194 | - build: 195 | context: 196 | - circleci-user 197 | - tier-1-tap-user 198 | requires: 199 | - "Pagination Test" 200 | build_daily: 201 | <<: *commit_jobs 202 | triggers: 203 | - schedule: 204 | cron: "0 1 * * *" 205 | filters: 206 | branches: 207 | only: 208 | - master 209 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Description of change 2 | (write a short description or paste a link to JIRA) 3 | 4 | # Manual QA steps 5 | - 6 | 7 | # Risks 8 | - 9 | 10 | # Rollback steps 11 | - revert this branch 12 | 13 | #### AI generated code 14 | https://internal.qlik.dev/general/ways-of-working/code-reviews/#guidelines-for-ai-generated-code 15 | - [ ] this PR has been written with the help of GitHub Copilot or another generative AI tool 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # Mac 92 | ._* 93 | .DS_Store 94 | 95 | # Custom stuff 96 | env.sh 97 | config.json 98 | .autoenv.zsh 99 | *~ 100 | env-vars* 101 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 3.2.0 4 | * Add support for `select_fields_by_default` config property [#266](https://github.com/singer-io/tap-hubspot/pull/266) 5 | * Fix dependabot issue 6 | 7 | ## 3.1.1 8 | * Replace legacy properties for Contacts and Deals [#265](https://github.com/singer-io/tap-hubspot/pull/265) 9 | 10 | ## 3.1.0 11 | * Renames custom object that shares name with standard objects [#263](https://github.com/singer-io/tap-hubspot/pull/263) 12 | 13 | ## 3.0.0 14 | * Upgrade Owners API endpoint [#256](https://github.com/singer-io/tap-hubspot/pull/256) 15 | 16 | ## 2.13.2 17 | * Fix out-of-index error [#253](https://github.com/singer-io/tap-hubspot/pull/253) 18 | 19 | ## 2.13.1 20 | * Optimise contacts_by_company implementation [#250](https://github.com/singer-io/tap-hubspot/pull/250) 21 | 22 | ## 2.13.0 23 | * HubSpot Custom CRM Objects Support [#242](https://github.com/singer-io/tap-hubspot/pull/242) 24 | 25 | ## 2.12.2 26 | * Use engagements_page_size advanced option [#234](https://github.com/singer-io/tap-hubspot/pull/234) 27 | 28 | ## 2.12.1 29 | * Use sync start time for writing bookmarks [#226](https://github.com/singer-io/tap-hubspot/pull/226) 30 | 31 | ## 2.12.0 32 | * Include properties(default + custom) in tickets stream [#220](https://github.com/singer-io/tap-hubspot/pull/220) 33 | 34 | ## 2.11.0 35 | * Implement new stream - `tickets` [#218](https://github.com/singer-io/tap-hubspot/pull/218) 36 | * Update integration tests for the tickets stream implementation [#219](https://github.com/singer-io/tap-hubspot/pull/219) 37 | 38 | ## 2.10.0 39 | * Updated replication method as INCREMENTAL and replication key as property_hs_lastmodifieddate for deals and companies streams [#195](https://github.com/singer-io/tap-hubspot/pull/195) 40 | * Fixed Pylint errors [#204](https://github.com/singer-io/tap-hubspot/pull/204) 41 | 42 | ## 2.9.6 43 | * Implement Request Timeout [#177](https://github.com/singer-io/tap-hubspot/pull/177) 44 | * Add version timestamp in contacts [#191](https://github.com/singer-io/tap-hubspot/pull/191 45 | 46 | ## 2.9.5 47 | * Fixes a bug in sending the fields to the v3 Deals endpoint [#145](https://github.com/singer-io/tap-hubspot/pull/145) 48 | 49 | ## 2.9.4 50 | * Reverts 142 [#144](https://github.com/singer-io/tap-hubspot/pull/144) 51 | 52 | ## 2.9.3 53 | * Add support for property_versions [#142](https://github.com/singer-io/tap-hubspot/pull/142) 54 | 55 | ## 2.9.2 56 | * Change `POST` to V3 Deals to use one non-standard field instead of all fields we want [#139](https://github.com/singer-io/tap-hubspot/pull/139) 57 | * See the pull request for a more detailed explaination 58 | 59 | ## 2.9.1 60 | * Add retry logic to V3 calls [#136](https://github.com/singer-io/tap-hubspot/pull/136) 61 | 62 | ## 2.9.0 63 | * Add fields to Deals stream - `hs_date_entered_*` and `hs_date_exited_*` [#133](https://github.com/singer-io/tap-hubspot/pull/133) 64 | 65 | ## 2.8.1 66 | * Reverts `v2.8.0` back to `v.2.7.0` 67 | 68 | ## 2.8.0 69 | * Add fields to Deals stream - `hs_date_entered_*` and `hs_date_exited_*` [#124](https://github.com/singer-io/tap-hubspot/pull/124) 70 | 71 | ## 2.7.0 72 | * Fields nested under `properties` are copied to top level and prepended with `property_` [#107](https://github.com/singer-io/tap-hubspot/pull/107) 73 | 74 | ## 2.6.5 75 | * For `deals` stream, use `includeAllProperties` flag instead of appending all properties to request url [#112](https://github.com/singer-io/tap-hubspot/pull/112) 76 | 77 | ## 2.6.4 78 | * When making `deals` requests, only attach `properties` if selected [#102](https://github.com/singer-io/tap-hubspot/pull/102) 79 | 80 | ## 2.6.3 81 | * Use the metadata library better 82 | 83 | ## 2.6.2 84 | * Revert the revert. Go back to v2.6.0. 85 | 86 | ## 2.6.1 87 | * Revert v2.6.0 to v.2.5.2 88 | 89 | ## 2.6.0 90 | * Replaced `annotated_schema` with Singer `metadata` 91 | * Added integration tests to CircleCI 92 | 93 | ## 2.5.2 94 | * Companies and Engagements have a new pattern to catch records that are updated during a long-running sync. Rather than using a lookback window, the bookmark value will be limited to the `min(current_sync_start, max_bk_seen)` [#98](https://github.com/singer-io/tap-hubspot/pull/98) 95 | 96 | ## 2.4.0 97 | * The owners stream can optionally fetch "inactive owners" [#92](https://github.com/singer-io/tap-hubspot/pull/92) 98 | 99 | ## 2.3.0 100 | * Engagements will now track how long the stream takes to sync, and look back on the next run by that amount to cover potentially missed updates due to asynchronous updates during the previous sync [#91](https://github.com/singer-io/tap-hubspot/pull/91) 101 | 102 | ## 2.2.8 103 | * When resuming an interrupted sync, will now attempt all streams before exiting [#90](https://github.com/singer-io/tap-hubspot/pull/90) 104 | 105 | ## 2.2.7 106 | * Add `delivered`, `forward`, `print`, `reply`, `spamreport` to `campaigns.counters` 107 | 108 | ## 2.2.6 109 | * Change a loop over `dict.items()` to `dict.values()` because the keys returned were not being used [#82](https://github.com/singer-io/tap-hubspot/pull/82) 110 | 111 | ## 2.2.5 112 | * Update version of `requests` to `2.20.0` in response to CVE 2018-18074 113 | 114 | ## 2.2.4 115 | * Ensure that deal associations are being retrieved if `associations` are selected in the catalog [#79](https://github.com/singer-io/tap-hubspot/pull/79) 116 | 117 | ## 2.2.3 118 | * Scrub the access token from error messages Hubspot returns when there are insufficient permissions [#75](https://github.com/singer-io/tap-hubspot/pull/75) 119 | 120 | ## 2.2.2 121 | * Fix a bug with the 'engagements' stream which requires the 'engagement' field to have automatic inclusion [#74](https://github.com/singer-io/tap-hubspot/pull/74) 122 | 123 | ## 2.2.1 124 | * Fix a bug with the 'inclusion' metadata for replication_key fields [#72](https://github.com/singer-io/tap-hubspot/pull/72) 125 | 126 | ## 2.2.0 127 | * Adds property selection to the tap [#67](https://github.com/singer-io/tap-hubspot/pull/67) 128 | * Removed the keywords stream as it is deprecated [#68](https://github.com/singer-io/tap-hubspot/pull/68) 129 | * Schema updates [#69](https://github.com/singer-io/tap-hubspot/pull/69) [#70](https://github.com/singer-io/tap-hubspot/pull/70) 130 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include tap_hubspot/schemas/*.json 3 | include tap_hubspot/schemas/shared/*.json 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tap-hubspot 2 | 3 | This is a [Singer](https://singer.io) tap that produces JSON-formatted data following the [Singer spec](https://github.com/singer-io/getting-started/blob/master/SPEC.md). 4 | 5 | This tap: 6 | - Pulls raw data from HubSpot's [REST API](http://developers.hubspot.com/docs/overview) 7 | - Extracts the following resources from HubSpot 8 | - [Campaigns](http://developers.hubspot.com/docs/methods/email/get_campaign_data) 9 | - [Companies](http://developers.hubspot.com/docs/methods/companies/get_company) 10 | - [Contacts](https://developers.hubspot.com/docs/methods/contacts/get_contacts) 11 | - [Contact Lists](http://developers.hubspot.com/docs/methods/lists/get_lists) 12 | - [Deals](http://developers.hubspot.com/docs/methods/deals/get_deals_modified) 13 | - [Deal Pipelines](https://developers.hubspot.com/docs/methods/deal-pipelines/get-all-deal-pipelines) 14 | - [Email Events](http://developers.hubspot.com/docs/methods/email/get_events) 15 | - [Engagements](https://developers.hubspot.com/docs/methods/engagements/get-all-engagements) 16 | - [Forms](http://developers.hubspot.com/docs/methods/forms/v2/get_forms) 17 | - [Keywords](http://developers.hubspot.com/docs/methods/keywords/get_keywords) 18 | - [Owners](https://developers.hubspot.com/docs/api/crm/owners) 19 | - [Subscription Changes](http://developers.hubspot.com/docs/methods/email/get_subscriptions_timeline) 20 | - [Workflows](http://developers.hubspot.com/docs/methods/workflows/v3/get_workflows) 21 | - [Tickets](https://developers.hubspot.com/docs/api/crm/tickets) 22 | - Outputs the schema for each resource 23 | - Incrementally pulls data based on the input state 24 | 25 | ## Configuration 26 | 27 | This tap requires a `config.json` which specifies details regarding [OAuth 2.0](https://developers.hubspot.com/docs/methods/oauth2/oauth2-overview) authentication, a cutoff date for syncing historical data, an optional parameter request_timeout for which request should wait to get the response and an optional flag which controls collection of anonymous usage metrics. See [config.sample.json](config.sample.json) for an example. You may specify an API key instead of OAuth parameters for development purposes, as detailed below. 28 | 29 | To run `tap-hubspot` with the configuration file, use this command: 30 | 31 | ```bash 32 | › tap-hubspot -c my-config.json 33 | ``` 34 | 35 | 36 | ## API Key Authentication (for development) 37 | 38 | As an alternative to OAuth 2.0 authentication during development, you may specify an API key (`HAPIKEY`) to authenticate with the HubSpot API. This should be used only for low-volume development work, as the [HubSpot API Usage Guidelines](https://developers.hubspot.com/apps/api_guidelines) specify that integrations should use OAuth for authentication. 39 | 40 | To use an API key, include a `hapikey` configuration variable in your `config.json` and set it to the value of your HubSpot API key. Any OAuth authentication parameters in your `config.json` **will be ignored** if this key is present! 41 | 42 | --- 43 | 44 | Copyright © 2017 Stitch 45 | -------------------------------------------------------------------------------- /bin/run-a-test.sh: -------------------------------------------------------------------------------- 1 | set -exu 2 | TEST_FILE=$1 3 | TEST_CLASS=$2 4 | TEST_NAME=$3 5 | nosetests tap_hubspot/tests/$TEST_FILE:$TEST_CLASS.$TEST_NAME 6 | -------------------------------------------------------------------------------- /bin/run-all-tests.sh: -------------------------------------------------------------------------------- 1 | nosetests tap_hubspot/tests/ 2 | -------------------------------------------------------------------------------- /config.sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "redirect_uri": "https://api.hubspot.com/", 3 | "client_id": 123456789000, 4 | "client_secret": "my_secret", 5 | "refresh_token": "my_token", 6 | "start_date": "2017-01-01T00:00:00Z", 7 | "request_timeout": 300, 8 | "disable_collection": false 9 | } 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | setup(name='tap-hubspot', 6 | version='3.2.0', 7 | description='Singer.io tap for extracting data from the HubSpot API', 8 | author='Stitch', 9 | url='http://singer.io', 10 | classifiers=['Programming Language :: Python :: 3 :: Only'], 11 | py_modules=['tap_hubspot'], 12 | install_requires=[ 13 | 'attrs==16.3.0', 14 | 'singer-python==5.13.0', 15 | 'requests==2.32.3', 16 | 'backoff==1.8.0', 17 | 'requests_mock==1.3.0', 18 | ], 19 | extras_require= { 20 | 'dev': [ 21 | 'pylint==2.5.3', 22 | 'nose==1.3.7', 23 | ] 24 | }, 25 | entry_points=''' 26 | [console_scripts] 27 | tap-hubspot=tap_hubspot:main 28 | ''', 29 | packages=['tap_hubspot'], 30 | package_data = { 31 | 'tap_hubspot/schemas': [ 32 | "schemas/*.json", 33 | "schemas/shared/*.json" 34 | ] 35 | }, 36 | include_package_data=True, 37 | ) 38 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/campaigns.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "appId": { 5 | "type": ["null", "integer"] 6 | }, 7 | "appName": { 8 | "type": ["null", "string"] 9 | }, 10 | "contentId": { 11 | "type": ["null", "integer"] 12 | }, 13 | "counters": { 14 | "type": ["null", "object"], 15 | "properties": { 16 | "delievered": { 17 | "type": ["null", "integer"] 18 | }, 19 | "open": { 20 | "type": ["null", "integer"] 21 | }, 22 | "processed": { 23 | "type": ["null", "integer"] 24 | }, 25 | "sent": { 26 | "type": ["null", "integer"] 27 | }, 28 | "deferred": { 29 | "type": ["null", "integer"] 30 | }, 31 | "unsubscribed": { 32 | "type": ["null", "integer"] 33 | }, 34 | "statuschange": { 35 | "type": ["null", "integer"] 36 | }, 37 | "bounce": { 38 | "type": ["null", "integer"] 39 | }, 40 | "mta_dropped": { 41 | "type": ["null", "integer"] 42 | }, 43 | "dropped": { 44 | "type": ["null", "integer"] 45 | }, 46 | "suppressed": { 47 | "type": ["null", "integer"] 48 | }, 49 | "click": { 50 | "type": ["null", "integer"] 51 | }, 52 | "delivered": { 53 | "type": ["null", "integer"] 54 | }, 55 | "forward": { 56 | "type": ["null", "integer"] 57 | }, 58 | "print": { 59 | "type": ["null", "integer"] 60 | }, 61 | "reply": { 62 | "type": ["null", "integer"] 63 | }, 64 | "spamreport": { 65 | "type": ["null", "integer"] 66 | } 67 | } 68 | }, 69 | "id": { 70 | "type": ["null", "integer"] 71 | }, 72 | "name": { 73 | "type": ["null", "string"] 74 | }, 75 | "numIncluded": { 76 | "type": ["null", "integer"] 77 | }, 78 | "numQueued": { 79 | "type": ["null", "integer"] 80 | }, 81 | "subType": { 82 | "type": ["null", "string"] 83 | }, 84 | "subject": { 85 | "type": ["null", "string"] 86 | }, 87 | "type": { 88 | "type": ["null", "string"] 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/companies.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "portalId": { 5 | "type": ["null", "integer"] 6 | }, 7 | "companyId": { 8 | "type": ["null", "integer"] 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/contact_lists.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "parentId": { 5 | "type": ["null", "integer"] 6 | }, 7 | "metaData": { 8 | "type": "object", 9 | "properties": { 10 | "processing": { 11 | "type": ["null", "string"] 12 | }, 13 | "size": { 14 | "type": ["null", "integer"] 15 | }, 16 | "error": { 17 | "type": ["null", "string"] 18 | }, 19 | "lastProcessingStateChangeAt": { 20 | "type": ["null", "string"], 21 | "format": "date-time" 22 | }, 23 | "lastSizeChangeAt": { 24 | "type": ["null", "string"], 25 | "format": "date-time" 26 | } 27 | } 28 | }, 29 | "dynamic": { 30 | "type": ["null", "boolean"] 31 | }, 32 | "name": { 33 | "type": ["null", "string"] 34 | }, 35 | "filters": { 36 | "type": "array", 37 | "items": { 38 | "type": "array", 39 | "items": { 40 | "type": "object", 41 | "properties": { 42 | "filterFamily": { 43 | "type": ["null", "string"] 44 | }, 45 | "withinTimeMode": { 46 | "type": ["null", "string"] 47 | }, 48 | "checkPastVersions": { 49 | "type": ["null", "boolean"] 50 | }, 51 | "type": { 52 | "type": ["null", "string"] 53 | }, 54 | "property": { 55 | "type": ["null", "string"] 56 | }, 57 | "value": { 58 | "type": ["null", "string"] 59 | }, 60 | "operator": { 61 | "type": ["null", "string"] 62 | } 63 | } 64 | } 65 | } 66 | }, 67 | "portalId": { 68 | "type": ["null", "integer"] 69 | }, 70 | "createdAt": { 71 | "type": ["null", "string"], 72 | "format": "date-time" 73 | }, 74 | "listId": { 75 | "type": ["null", "integer"] 76 | }, 77 | "updatedAt": { 78 | "type": ["null", "string"], 79 | "format": "date-time" 80 | }, 81 | "internalListId": { 82 | "type": ["null", "integer"] 83 | }, 84 | "readOnly": { 85 | "type": ["null", "boolean"] 86 | }, 87 | "deleteable": { 88 | "type": ["null", "boolean"] 89 | }, 90 | "listType": { 91 | "type": ["null", "string"] 92 | }, 93 | "archived": { 94 | "type": ["null", "boolean"] 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/contacts.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "vid": { 5 | "type": ["null", "integer"] 6 | }, 7 | "versionTimestamp": { 8 | "type": ["null", "string"], 9 | "format": "date-time" 10 | }, 11 | "canonical-vid": { 12 | "type": ["null", "integer"] 13 | }, 14 | "merged-vids": { 15 | "type": ["null", "array"], 16 | "items": { 17 | "type": ["null", "integer"] 18 | } 19 | }, 20 | "portal-id": { 21 | "type": ["null", "integer"] 22 | }, 23 | "is-contact": { 24 | "type": ["null", "boolean"] 25 | }, 26 | "profile-token": { 27 | "type": ["null", "string"] 28 | }, 29 | "profile-url": { 30 | "type": ["null", "string"] 31 | }, 32 | "associated-company" : { 33 | "type": ["null", "object"], 34 | "properties" : {} 35 | }, 36 | "identity-profiles": { 37 | "type": ["null", "array"], 38 | "items": { 39 | "type": ["null", "object"], 40 | "properties": { 41 | "deleted-changed-timestamp": { 42 | "type": ["null", "string"], 43 | "format": "date-time" 44 | }, 45 | "saved-at-timestamp": { 46 | "type": ["null", "string"], 47 | "format": "date-time" 48 | }, 49 | "vid": { 50 | "type": ["null", "integer"] 51 | }, 52 | "identities": { 53 | "type": ["null", "array"], 54 | "items": { 55 | "type": ["null", "object"], 56 | "properties": { 57 | "timestamp": { 58 | "type": ["null", "string"], 59 | "format": "date-time" 60 | }, 61 | "type": { 62 | "type": ["null", "string"] 63 | }, 64 | "value": { 65 | "type": ["null", "string"] 66 | } 67 | } 68 | } 69 | } 70 | } 71 | } 72 | }, 73 | "list-memberships": { 74 | "type": ["null", "array"], 75 | "items": { 76 | "type": ["null", "object"], 77 | "properties": { 78 | "internal-list-id": { 79 | "type": ["null", "integer"] 80 | }, 81 | "is-member": { 82 | "type": ["null", "boolean"] 83 | }, 84 | "static-list-id": { 85 | "type": ["null", "integer"] 86 | }, 87 | "timestamp": { 88 | "type": ["null", "string"], 89 | "format": "date-time" 90 | }, 91 | "vid": { 92 | "type": ["null", "integer"] 93 | } 94 | } 95 | } 96 | }, 97 | "form-submissions": { 98 | "type": ["null", "array"], 99 | "items": { 100 | "type": ["null", "object"], 101 | "properties": { 102 | "conversion-id": { 103 | "type": ["null", "string"] 104 | }, 105 | "timestamp": { 106 | "type": ["null", "string"], 107 | "format": "date-time" 108 | }, 109 | "form-id": { 110 | "type": ["null", "string"] 111 | }, 112 | "portal-id": { 113 | "type": ["null", "integer"] 114 | }, 115 | "page-url": { 116 | "type": ["null", "string"] 117 | }, 118 | "title": { 119 | "type": ["null", "string"] 120 | } 121 | } 122 | } 123 | }, 124 | "merge-audits": { 125 | "type": ["null", "array"], 126 | "items": { 127 | "type": ["null", "object"], 128 | "properties": { 129 | "canonical-vid": { 130 | "type": ["null", "integer"] 131 | }, 132 | "vid-to-merge": { 133 | "type": ["null", "integer"] 134 | }, 135 | "timestamp": { 136 | "type": ["null", "string"], 137 | "format": "date-time" 138 | }, 139 | "user-id": { 140 | "type": ["null", "integer"] 141 | }, 142 | "num-properties-moved": { 143 | "type": ["null", "integer"] 144 | }, 145 | "merged_from_email": { 146 | "type": ["null", "object"], 147 | "properties": { 148 | "value": { 149 | "type": ["null", "string"] 150 | }, 151 | "source-type": { 152 | "type": ["null", "string"] 153 | }, 154 | "source-id": { 155 | "type": ["null", "string"] 156 | }, 157 | "source-label": { 158 | "type": ["null", "string"] 159 | }, 160 | "source-vids": { 161 | "type": ["null", "array"], 162 | "items": { 163 | "type": ["null", "integer"] 164 | } 165 | }, 166 | "timestamp": { 167 | "type": ["null", "integer"] 168 | }, 169 | "selected": { 170 | "type": ["null", "boolean"] 171 | } 172 | } 173 | }, 174 | "merged_to_email": { 175 | "type": ["null", "object"], 176 | "properties": { 177 | "value": { 178 | "type": ["null", "string"] 179 | }, 180 | "source-type": { 181 | "type": ["null", "string"] 182 | }, 183 | "source-id": { 184 | "type": ["null", "string"] 185 | }, 186 | "source-label": { 187 | "type": ["null", "string"] 188 | }, 189 | "timestamp": { 190 | "type": ["null", "integer"] 191 | }, 192 | "selected": { 193 | "type": ["null", "boolean"] 194 | } 195 | } 196 | } 197 | } 198 | } 199 | } 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/contacts_by_company.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "contact-id": { 5 | "type": ["integer"] 6 | }, 7 | "company-id": { 8 | "type": ["integer"] 9 | } 10 | }, 11 | "additionalProperties": false 12 | } 13 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/deal_pipelines.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "pipelineId": { 5 | "type": ["null", "string"] 6 | }, 7 | "stages": { 8 | "type": ["null", "array"], 9 | "items": { 10 | "type": "object", 11 | "properties": { 12 | "stageId": { 13 | "type": ["null", "string"] 14 | }, 15 | "label": { 16 | "type": ["null", "string"] 17 | }, 18 | "probability": { 19 | "type": ["null", "number"] 20 | }, 21 | "active": { 22 | "type": ["null", "boolean"] 23 | }, 24 | "displayOrder": { 25 | "type": ["null", "integer"] 26 | }, 27 | "closedWon": { 28 | "type": ["null", "boolean"] 29 | } 30 | } 31 | } 32 | }, 33 | "label": { 34 | "type": ["null", "string"] 35 | }, 36 | "active": { 37 | "type": ["null", "boolean"] 38 | }, 39 | "displayOrder": { 40 | "type": ["null", "integer"] 41 | }, 42 | "staticDefault": { 43 | "type": ["null", "boolean"] 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/deals.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "portalId": { 5 | "type": ["null", "integer"] 6 | }, 7 | "dealId": { 8 | "type": ["null", "integer"] 9 | }, 10 | "isDeleted": { 11 | "type": ["null", "boolean"] 12 | }, 13 | "associations": { 14 | "type": ["null", "object"], 15 | "properties": { 16 | "associatedVids": { 17 | "type": ["null", "array"], 18 | "items": { 19 | "type": ["null", "integer"] 20 | } 21 | }, 22 | "associatedCompanyIds": { 23 | "type": ["null", "array"], 24 | "items": { 25 | "type": ["null", "integer"] 26 | } 27 | }, 28 | "associatedDealIds": { 29 | "type": ["null", "array"], 30 | "items": { 31 | "type": ["null", "integer"] 32 | } 33 | } 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/email_events.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "appId": { 5 | "type": ["null", "integer"] 6 | }, 7 | "appName": { 8 | "type": ["null", "string"] 9 | }, 10 | "browser": { 11 | "type": ["null", "object"], 12 | "properties": { 13 | "family": { 14 | "type": ["null", "string"] 15 | }, 16 | "name": { 17 | "type": ["null", "string"] 18 | }, 19 | "producer": { 20 | "type": ["null", "string"] 21 | }, 22 | "producerUrl": { 23 | "type": ["null", "string"] 24 | }, 25 | "type": { 26 | "type": ["null", "string"] 27 | }, 28 | "url": { 29 | "type": ["null", "string"] 30 | } 31 | } 32 | }, 33 | "created": { 34 | "type": ["null", "string"], 35 | "format": "date-time" 36 | }, 37 | "deviceType": { 38 | "type": ["null", "string"] 39 | }, 40 | "duration": { 41 | "type": ["null", "integer"] 42 | }, 43 | "emailCampaignId": { 44 | "type": ["null", "integer"] 45 | }, 46 | "emailCampaignGroupId": { 47 | "type": ["null", "integer"] 48 | }, 49 | "filteredEvent": { 50 | "type": ["null", "boolean"] 51 | }, 52 | "from": { 53 | "type": ["null", "string"] 54 | }, 55 | "hmid": { 56 | "type": ["null", "string"] 57 | }, 58 | "id": { 59 | "type": ["null", "string"] 60 | }, 61 | "ipAddress": { 62 | "type": ["null", "string"] 63 | }, 64 | "linkId": { 65 | "type": ["null", "integer"] 66 | }, 67 | "location": { 68 | "type": ["null", "object"], 69 | "properties": { 70 | "city": { 71 | "type": ["null", "string"] 72 | }, 73 | "country": { 74 | "type": ["null", "string"] 75 | }, 76 | "state": { 77 | "type": ["null", "string"] 78 | } 79 | } 80 | }, 81 | "portalId": { 82 | "type": ["null", "integer"] 83 | }, 84 | "recipient": { 85 | "type": ["null", "string"] 86 | }, 87 | "response": { 88 | "type": ["null", "string"] 89 | }, 90 | "sentBy": { 91 | "type": ["null", "object"], 92 | "properties": { 93 | "created": { 94 | "type": ["null", "string"], 95 | "format": "date-time" 96 | }, 97 | "id": { 98 | "type": ["null", "string"] 99 | } 100 | } 101 | }, 102 | "smtpId": { 103 | "type": ["null", "string"] 104 | }, 105 | "subject": { 106 | "type": ["null", "string"] 107 | }, 108 | "type": { 109 | "type": ["null", "string"] 110 | }, 111 | "url": { 112 | "type": ["null", "string"] 113 | }, 114 | "userAgent": { 115 | "type": ["null", "string"] 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/engagements.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "engagement_id": { 5 | "type": "integer" 6 | }, 7 | "lastUpdated": { 8 | "type": ["null", "string"], 9 | "format": "date-time" 10 | }, 11 | "engagement": { 12 | "type": "object", 13 | "properties": { 14 | "id": { 15 | "type": "integer" 16 | }, 17 | "portalId": { 18 | "type": "integer" 19 | }, 20 | "active": { 21 | "type": "boolean" 22 | }, 23 | "createdAt": { 24 | "type": ["null", "string"], 25 | "format": "date-time" 26 | }, 27 | "lastUpdated": { 28 | "type": ["null", "string"], 29 | "format": "date-time" 30 | }, 31 | "ownerId": { 32 | "type": "integer" 33 | }, 34 | "type": { 35 | "type": "string" 36 | }, 37 | "timestamp": { 38 | "type": ["null", "string"], 39 | "format": "date-time" 40 | } 41 | } 42 | }, 43 | "associations": { 44 | "type": ["null", "object"], 45 | "properties": { 46 | "contactIds": { 47 | "type": ["null", "array"], 48 | "items": { 49 | "type": "integer" 50 | } 51 | }, 52 | "companyIds": { 53 | "type": ["null", "array"], 54 | "items": { 55 | "type": "integer" 56 | } 57 | }, 58 | "dealIds": { 59 | "type": ["null", "array"], 60 | "items": { 61 | "type": "integer" 62 | } 63 | } 64 | } 65 | }, 66 | "attachments": { 67 | "type": ["null", "array"], 68 | "items": { 69 | "type": "object", 70 | "properties": { 71 | "id": { 72 | "type": "integer" 73 | } 74 | } 75 | } 76 | }, 77 | "metadata": { 78 | "type": ["null", "object"], 79 | "properties": { 80 | "body": { 81 | "type": ["null", "string"] 82 | }, 83 | "from": { 84 | "type": ["null", "object"], 85 | "properties": { 86 | "email": { 87 | "type": "string" 88 | }, 89 | "firstName": { 90 | "type": "string" 91 | }, 92 | "lastName": { 93 | "type": "string" 94 | } 95 | } 96 | }, 97 | "to": { 98 | "type": ["null", "array"], 99 | "items": { 100 | "type": "object", 101 | "properties": { 102 | "email": { 103 | "type": "string" 104 | } 105 | } 106 | } 107 | }, 108 | "cc": { 109 | "type": ["null", "array"], 110 | "items": { 111 | "type": "object", 112 | "properties": { 113 | "email": { 114 | "type": "string" 115 | } 116 | } 117 | } 118 | }, 119 | "bcc": { 120 | "type": ["null", "array"], 121 | "items": { 122 | "type": "object", 123 | "properties": { 124 | "email": { 125 | "type": "string" 126 | } 127 | } 128 | } 129 | }, 130 | "subject": { 131 | "type": ["null", "string"] 132 | }, 133 | "html": { 134 | "type": ["null", "string"] 135 | }, 136 | "text": { 137 | "type": ["null", "string"] 138 | }, 139 | "status": { 140 | "type": ["null", "string"] 141 | }, 142 | "forObjectType": { 143 | "type": ["null", "string"] 144 | }, 145 | "startTime": { 146 | "type": ["null", "integer"] 147 | }, 148 | "endTime": { 149 | "type": ["null", "integer"] 150 | }, 151 | "title": { 152 | "type": ["null", "string"] 153 | }, 154 | "toNumber": { 155 | "type": ["null", "string"] 156 | }, 157 | "fromNumber": { 158 | "type": ["null", "string"] 159 | }, 160 | "externalId": { 161 | "type": ["null", "string"] 162 | }, 163 | "durationMilliseconds": { 164 | "type": ["null", "integer"] 165 | }, 166 | "externalAccountId": { 167 | "type": ["null", "string"] 168 | }, 169 | "recordingUrl": { 170 | "type": ["null", "string"], 171 | "format": "uri" 172 | }, 173 | "disposition": { 174 | "type": ["null", "string"] 175 | } 176 | } 177 | } 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/forms.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "deletedAt": { 5 | "type": ["null", "integer"] 6 | }, 7 | "portalId": { 8 | "type": ["null", "integer"] 9 | }, 10 | "guid": { 11 | "type": ["null", "string"] 12 | }, 13 | "name": { 14 | "type": ["null", "string"] 15 | }, 16 | "action": { 17 | "type": ["null", "string"] 18 | }, 19 | "method": { 20 | "type": ["null", "string"] 21 | }, 22 | "cssClass": { 23 | "type": ["null", "string"] 24 | }, 25 | "redirect": { 26 | "type": ["null", "string"] 27 | }, 28 | "submitText": { 29 | "type": ["null", "string"] 30 | }, 31 | "followUpId": { 32 | "type": ["null", "string"] 33 | }, 34 | "notifyRecipients": { 35 | "type": ["null", "string"] 36 | }, 37 | "leadNurturingCampaignId": { 38 | "type": ["null", "string"] 39 | }, 40 | "formFieldGroups": { 41 | "type": "array", 42 | "items": { 43 | "type": "object", 44 | "properties": { 45 | "fields": { 46 | "type": "array", 47 | "items": { 48 | "type": "object", 49 | "properties": { 50 | "name": { 51 | "type": ["null", "string"] 52 | }, 53 | "label": { 54 | "type": ["null", "string"] 55 | }, 56 | "type": { 57 | "type": ["null", "string"] 58 | }, 59 | "fieldType": { 60 | "type": ["null", "string"] 61 | }, 62 | "description": { 63 | "type": ["null", "string"] 64 | }, 65 | "groupName": { 66 | "type": ["null", "string"] 67 | }, 68 | "displayOrder": { 69 | "type": ["null", "integer"] 70 | }, 71 | "required": { 72 | "type": ["null", "boolean"] 73 | }, 74 | "validation": { 75 | "type": "object", 76 | "properties": { 77 | "name": { 78 | "type": ["null", "string"] 79 | }, 80 | "message": { 81 | "type": ["null", "string"] 82 | }, 83 | "data": { 84 | "type": ["null", "string"] 85 | }, 86 | "useDefaultBlockList": { 87 | "type": ["null", "boolean"] 88 | }, 89 | "blockedEmailAddresses": { 90 | "type": "array", 91 | "items": { 92 | "type": ["null", "string"] 93 | } 94 | } 95 | } 96 | }, 97 | "enabled": { 98 | "type": ["null", "boolean"] 99 | }, 100 | "hidden": { 101 | "type": ["null", "boolean"] 102 | }, 103 | "defaultValue": { 104 | "type": ["null", "string"] 105 | }, 106 | "isSmartField": { 107 | "type": ["null", "boolean"] 108 | }, 109 | "unselectedLabel": { 110 | "type": ["null", "string"] 111 | }, 112 | "placeholder": { 113 | "type": ["null", "string"] 114 | }, 115 | "labelHidden": { 116 | "type": ["null", "boolean"] 117 | }, 118 | "options": { 119 | "type": "array", 120 | "items": { 121 | "type": "object", 122 | "properties": { 123 | "description": { 124 | "type": ["null", "string"] 125 | }, 126 | "displayOrder": { 127 | "type": ["null", "integer"] 128 | }, 129 | "doubleData": { 130 | "type": ["null", "number"] 131 | }, 132 | "hidden" : { 133 | "type": ["null", "boolean"] 134 | }, 135 | "label": { 136 | "type": ["null", "string"] 137 | }, 138 | "readOnly": { 139 | "type": ["null", "boolean"] 140 | }, 141 | "value": { 142 | "type": ["null", "string"] 143 | } 144 | } 145 | } 146 | }, 147 | "selectedOptions": { 148 | "type": "array", 149 | "items": { 150 | "type" : ["null", "string"] 151 | } 152 | } 153 | } 154 | } 155 | }, 156 | "default": { 157 | "type": ["null", "boolean"] 158 | }, 159 | "isSmartGroup": { 160 | "type": ["null", "boolean"] 161 | }, 162 | "richText": { 163 | "type": "object", 164 | "properties": { 165 | "content": { 166 | "type": ["null", "string"] 167 | } 168 | } 169 | } 170 | } 171 | } 172 | }, 173 | "createdAt": { 174 | "type": ["null", "string"], 175 | "format": "date-time" 176 | }, 177 | "updatedAt": { 178 | "type": ["null", "string"], 179 | "format": "date-time" 180 | }, 181 | "performableHtml": { 182 | "type": ["null", "string"] 183 | }, 184 | "migratedFrom": { 185 | "type": ["null", "string"] 186 | }, 187 | "ignoreCurrentValues": { 188 | "type": ["null", "boolean"] 189 | }, 190 | "deletable": { 191 | "type": ["null", "boolean"] 192 | }, 193 | "inlineMessage": { 194 | "type": ["null", "string"] 195 | }, 196 | "tmsId": { 197 | "type": ["null", "string"] 198 | }, 199 | "captchaEnabled": { 200 | "type": ["null", "boolean"] 201 | }, 202 | "campaignGuid": { 203 | "type": ["null", "string"] 204 | }, 205 | "cloneable": { 206 | "type": ["null", "boolean"] 207 | }, 208 | "editable": { 209 | "type": ["null", "boolean"] 210 | }, 211 | "formType": { 212 | "type": ["null", "string"] 213 | }, 214 | "metaData": { 215 | "type": "array", 216 | "items": { 217 | "type": "object", 218 | "properties": { 219 | "name": { 220 | "type": ["null", "string"] 221 | }, 222 | "value": { 223 | "type": ["null", "string"] 224 | } 225 | } 226 | } 227 | } 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/owners.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "id": { 5 | "type": ["null", "string"] 6 | }, 7 | "email": { 8 | "type": ["null", "string"] 9 | }, 10 | "firstName": { 11 | "type": ["null", "string"] 12 | }, 13 | "lastName": { 14 | "type": ["null", "string"] 15 | }, 16 | "userId": { 17 | "type": ["null", "integer"] 18 | }, 19 | "createdAt": { 20 | "type": ["null", "string"], 21 | "format": "date-time" 22 | }, 23 | "updatedAt": { 24 | "type": ["null", "string"], 25 | "format": "date-time" 26 | }, 27 | "archived": { 28 | "type": ["null", "boolean"] 29 | }, 30 | "teams": { 31 | "type": ["null", "array"], 32 | "items": { 33 | "type": "object", 34 | "properties": { 35 | "id": { 36 | "type": ["null", "string"] 37 | }, 38 | "name": { 39 | "type": ["null", "string"] 40 | }, 41 | "primary": { 42 | "type": ["null", "boolean"] 43 | } 44 | } 45 | } 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/shared/associations_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "null", 4 | "object" 5 | ], 6 | "properties": { 7 | "results": { 8 | "type": [ 9 | "null", 10 | "array" 11 | ], 12 | "items": { 13 | "type": [ 14 | "null", 15 | "object" 16 | ], 17 | "properties": { 18 | "id": { 19 | "type": [ 20 | "null", 21 | "string" 22 | ] 23 | }, 24 | "type": { 25 | "type": [ 26 | "null", 27 | "string" 28 | ] 29 | } 30 | } 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/shared/custom_objects.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "id": { 5 | "type": [ 6 | "null", 7 | "string" 8 | ] 9 | }, 10 | "createdAt": { 11 | "type": ["null", "string"], 12 | "format": "date-time" 13 | }, 14 | "updatedAt": { 15 | "type": ["null", "string"], 16 | "format": "date-time" 17 | }, 18 | "archived": { 19 | "type": [ 20 | "null", 21 | "boolean" 22 | ] 23 | }, 24 | "associations": { 25 | "type": [ 26 | "null", 27 | "object" 28 | ], 29 | "properties": { 30 | "emails": {"$ref": "associations_schema.json"}, 31 | "meetings": {"$ref": "associations_schema.json"}, 32 | "notes": {"$ref": "associations_schema.json"}, 33 | "tasks": {"$ref": "associations_schema.json"}, 34 | "calls": {"$ref": "associations_schema.json"}, 35 | "conversation_session": {"$ref": "associations_schema.json"}, 36 | "companies": {"$ref": "associations_schema.json"}, 37 | "contacts": {"$ref": "associations_schema.json"}, 38 | "deals": {"$ref": "associations_schema.json"}, 39 | "products": {"$ref": "associations_schema.json"}, 40 | "tickets": {"$ref": "associations_schema.json"} 41 | } 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/subscription_changes.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "timestamp": { 5 | "type": ["null", "string"], 6 | "format": "date-time" 7 | }, 8 | "portalId": { 9 | "type": ["null", "integer"] 10 | }, 11 | "recipient": { 12 | "type": ["null", "string"] 13 | }, 14 | "changes": { 15 | "type": ["null", "array"], 16 | "items": { 17 | "type": ["null", "object"], 18 | "properties": { 19 | "change": { 20 | "type": ["null", "string"] 21 | }, 22 | "timestamp": { 23 | "type": ["null", "string"], 24 | "format": "date-time" 25 | }, 26 | "source": { 27 | "type": ["null", "string"] 28 | }, 29 | "portalId": { 30 | "type": ["null", "integer"] 31 | }, 32 | "subscriptionId": { 33 | "type": ["null", "integer"] 34 | }, 35 | "changeType": { 36 | "type": ["null", "string"] 37 | }, 38 | "causedByEvent": { 39 | "type": ["null", "object"], 40 | "properties": { 41 | "id": { 42 | "type": ["null", "string"] 43 | }, 44 | "created": { 45 | "type": ["null", "string"], 46 | "format": "date-time" 47 | } 48 | } 49 | } 50 | } 51 | } 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/tickets.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "id": { 5 | "type": "string" 6 | }, 7 | "createdAt": { 8 | "type": [ 9 | "null", 10 | "string" 11 | ], 12 | "format": "date-time" 13 | }, 14 | "updatedAt": { 15 | "type": [ 16 | "null", 17 | "string" 18 | ], 19 | "format": "date-time" 20 | }, 21 | "archived": { 22 | "type": [ 23 | "null", 24 | "boolean" 25 | ] 26 | }, 27 | "associations": { 28 | "type": [ 29 | "null", 30 | "object" 31 | ], 32 | "properties": { 33 | "companies": { 34 | "type": [ 35 | "null", 36 | "object" 37 | ], 38 | "properties": { 39 | "results": { 40 | "type": [ 41 | "null", 42 | "array" 43 | ], 44 | "items": { 45 | "type": [ 46 | "null", 47 | "object" 48 | ], 49 | "properties": { 50 | "id": { 51 | "type": [ 52 | "null", 53 | "string" 54 | ] 55 | }, 56 | "type": { 57 | "type": [ 58 | "null", 59 | "string" 60 | ] 61 | } 62 | } 63 | } 64 | } 65 | } 66 | }, 67 | "deals": { 68 | "type": [ 69 | "null", 70 | "object" 71 | ], 72 | "properties": { 73 | "results": { 74 | "type": [ 75 | "null", 76 | "array" 77 | ], 78 | "items": { 79 | "type": [ 80 | "null", 81 | "object" 82 | ], 83 | "properties": { 84 | "id": { 85 | "type": [ 86 | "null", 87 | "string" 88 | ] 89 | }, 90 | "type": { 91 | "type": [ 92 | "null", 93 | "string" 94 | ] 95 | } 96 | } 97 | } 98 | } 99 | } 100 | }, 101 | "contacts": { 102 | "type": [ 103 | "null", 104 | "object" 105 | ], 106 | "properties": { 107 | "results": { 108 | "type": [ 109 | "null", 110 | "array" 111 | ], 112 | "items": { 113 | "type": [ 114 | "null", 115 | "object" 116 | ], 117 | "properties": { 118 | "id": { 119 | "type": [ 120 | "null", 121 | "string" 122 | ] 123 | }, 124 | "type": { 125 | "type": [ 126 | "null", 127 | "string" 128 | ] 129 | } 130 | } 131 | } 132 | } 133 | } 134 | } 135 | } 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/versions.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "array", 3 | "items": { 4 | "type": ["null", "object"], 5 | "properties": { 6 | "name": { 7 | "type": ["null", "string"] 8 | }, 9 | "value": { 10 | "type": ["null", "string"] 11 | }, 12 | "timestamp": { 13 | "type": ["null", "string"], 14 | "format": "date-time" 15 | }, 16 | "source": { 17 | "type": ["null", "string"] 18 | }, 19 | "sourceId": { 20 | "type": ["null", "string"] 21 | }, 22 | "sourceVid": { 23 | "type": ["null", "array"], 24 | "items": { 25 | "type": ["null", "string"] 26 | } 27 | } 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /tap_hubspot/schemas/workflows.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "name": { 5 | "type": ["null", "string"] 6 | }, 7 | "id": { 8 | "type": ["null", "integer"] 9 | }, 10 | "type": { 11 | "type": ["null", "string"] 12 | }, 13 | "enabled": { 14 | "type": ["null", "boolean"] 15 | }, 16 | "insertedAt": { 17 | "type": ["null", "string"], 18 | "format": "date-time" 19 | }, 20 | "updatedAt": { 21 | "type": ["null", "string"], 22 | "format": "date-time" 23 | }, 24 | "personaTagIds": { 25 | "type": "array", 26 | "items": { 27 | "type": "integer" 28 | } 29 | }, 30 | "contactListIds": { 31 | "type": "object", 32 | "properties": { 33 | "enrolled": { 34 | "type": ["null", "integer"] 35 | }, 36 | "active": { 37 | "type": ["null", "integer"] 38 | }, 39 | "steps": { 40 | "type": ["null", "array"], 41 | "items": { 42 | "type": ["null", "string"] 43 | } 44 | } 45 | } 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /tap_hubspot/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/singer-io/tap-hubspot/155e7535949a83be34dbc08f40809ae1eefffd4f/tap_hubspot/tests/__init__.py -------------------------------------------------------------------------------- /tap_hubspot/tests/test_bookmarks.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import singer.messages 3 | import tap_hubspot 4 | from tap_hubspot.tests import utils 5 | 6 | LOGGER = singer.get_logger() 7 | 8 | class Bookmarks(unittest.TestCase): 9 | def setUp(self): 10 | utils.verify_environment_vars() 11 | utils.seed_tap_hubspot_config() 12 | utils.write_to_singer() 13 | 14 | #NB> test account must have > 2 contacts for this to work 15 | def sync_contacts(self): 16 | STATE = utils.get_clear_state() 17 | catalog = {'stream_alias': 'hubspot_contacts'} 18 | 19 | tap_hubspot.default_contact_params['count'] = 1 20 | 21 | STATE = tap_hubspot.sync_contacts(STATE, catalog) 22 | #offset has been cleared 23 | self.assertEqual(utils.caught_state['bookmarks']['contacts']['offset'], {}) 24 | 25 | #some bookmark has been recorded in the state 26 | self.assertNotEqual(utils.caught_state['bookmarks']['contacts']['lastmodifieddate'], None) 27 | 28 | #should sync some contacts 29 | # LOGGER.info('A caught record: {}'.format(utils.caught_records['contacts'][0])) 30 | self.assertGreater(len(utils.caught_records['contacts']), 1) 31 | self.assertEqual(set(utils.caught_records.keys()), {'contacts'}) 32 | self.assertEqual(utils.caught_pks, {'contacts': ['vid']}) 33 | 34 | utils.caught_records = [] 35 | STATE = tap_hubspot.sync_contacts(STATE, catalog) 36 | 37 | #no new records thanks to bookmark 38 | self.assertEqual(len(utils.caught_records), 0) 39 | 40 | def sync_companies(self): 41 | STATE = utils.get_clear_state() 42 | 43 | catalog = {'stream_alias': 'hubspot_companies'} 44 | STATE = tap_hubspot.sync_companies(STATE, catalog) 45 | 46 | #offset has been cleared 47 | self.assertEqual(utils.caught_state['bookmarks']['companies']['offset'], {}) 48 | 49 | #some bookmark has been recorded in the state 50 | self.assertNotEqual(utils.caught_state['bookmarks']['companies']['hs_lastmodifieddate'], None) 51 | 52 | #should sync some contacts && some hubspot_contacts_by_company 53 | self.assertGreater(len(utils.caught_records), 0) 54 | self.assertEqual(set(utils.caught_records.keys()), {'companies', 'hubspot_contacts_by_company'}) 55 | 56 | self.assertEqual(utils.caught_pks, {'companies': ['companyId'], 'hubspot_contacts_by_company': ['company-id', 'contact-id']}) 57 | 58 | utils.caught_records = [] 59 | STATE = tap_hubspot.sync_companies(STATE, catalog) 60 | 61 | #no new records thanks to bookmark 62 | self.assertEqual(len(utils.caught_records), 0) 63 | -------------------------------------------------------------------------------- /tap_hubspot/tests/test_deals.py: -------------------------------------------------------------------------------- 1 | from tap_hubspot import sync_deals 2 | from unittest.mock import patch, ANY 3 | 4 | 5 | @patch('builtins.min') 6 | @patch('tap_hubspot.Context.get_catalog_from_id', return_value={"metadata": ""}) 7 | @patch('singer.metadata.to_map', return_value={}) 8 | @patch('singer.utils.strptime_with_tz') 9 | @patch('singer.utils.strftime') 10 | @patch('tap_hubspot.load_schema') 11 | @patch('tap_hubspot.gen_request', return_value=[]) 12 | def test_associations_are_not_validated(mocked_gen_request, mocked_catalog_from_id, mocked_metadata_map, mocked_utils_strptime, mocked_utils_strftime, mocked_load_schema, mocked_min): 13 | # pylint: disable=unused-argument 14 | sync_deals({}, mocked_catalog_from_id) 15 | 16 | expected_param = {'includeAssociations': False, 'properties': [], 'limit': 100} 17 | 18 | mocked_gen_request.assert_called_once_with(ANY, ANY, ANY, expected_param, ANY, ANY, ANY, ANY, v3_fields=None) 19 | 20 | 21 | @patch('builtins.min') 22 | @patch('tap_hubspot.Context.get_catalog_from_id', return_value={"metadata": ""}) 23 | @patch('singer.metadata.to_map', return_value={"associations": {"selected": True}}) 24 | @patch('singer.utils.strptime_with_tz') 25 | @patch('singer.utils.strftime') 26 | @patch('tap_hubspot.load_schema') 27 | @patch('tap_hubspot.gen_request', return_value=[]) 28 | def test_associations_are_validated(mocked_gen_request, mocked_catalog_from_id, mocked_metadata_map, mocked_utils_strptime, mocked_utils_strftime, mocked_load_schema, mocked_min): 29 | # pylint: disable=unused-argument 30 | sync_deals({}, mocked_catalog_from_id) 31 | 32 | expected_param = {'includeAssociations': True, 'properties': [], 'limit': 100} 33 | 34 | mocked_gen_request.assert_called_once_with(ANY, ANY, ANY, expected_param, ANY, ANY, ANY, ANY, v3_fields=None) 35 | -------------------------------------------------------------------------------- /tap_hubspot/tests/test_get_streams_to_sync.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from tap_hubspot import get_streams_to_sync, parse_source_from_url, Stream 3 | 4 | 5 | class TestGetStreamsToSync(unittest.TestCase): 6 | 7 | def setUp(self): 8 | self.streams = [ 9 | Stream('a', 'a', [], None, None), 10 | Stream('b', 'b', [], None, None), 11 | Stream('c', 'c', [], None, None), 12 | ] 13 | 14 | def test_get_streams_to_sync_with_no_this_stream(self): 15 | state = {'this_stream': None} 16 | self.assertEqual(self.streams, get_streams_to_sync(self.streams, state)) 17 | 18 | def test_get_streams_to_sync_with_first_stream(self): 19 | state = {'currently_syncing': 'a'} 20 | 21 | result = get_streams_to_sync(self.streams, state) 22 | 23 | parsed_result = [s.tap_stream_id for s in result] 24 | self.assertEqual(parsed_result, ['a', 'b', 'c']) 25 | 26 | def test_get_streams_to_sync_with_middle_stream(self): 27 | state = {'currently_syncing': 'b'} 28 | 29 | result = get_streams_to_sync(self.streams, state) 30 | 31 | parsed_result = [s.tap_stream_id for s in result] 32 | self.assertEqual(parsed_result, ['b', 'c', 'a']) 33 | 34 | def test_get_streams_to_sync_with_last_stream(self): 35 | state = {'currently_syncing': 'c'} 36 | 37 | result = get_streams_to_sync(self.streams, state) 38 | 39 | parsed_result = [s.tap_stream_id for s in result] 40 | self.assertEqual(parsed_result, ['c', 'a', 'b']) 41 | 42 | def test_parse_source_from_url_succeeds(self): 43 | url = "https://api.hubapi.com/companies/v2/companies/recent/modified" 44 | self.assertEqual('companies', parse_source_from_url(url)) 45 | -------------------------------------------------------------------------------- /tap_hubspot/tests/test_offsets.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import singer 3 | import tap_hubspot 4 | import singer.bookmarks 5 | from tap_hubspot.tests import utils 6 | 7 | LOGGER = singer.get_logger() 8 | 9 | def set_offset_with_exception(state, tap_stream_id, offset_key, offset_value): 10 | LOGGER.info("set_offset_with_exception: %s", utils.caught_state) 11 | utils.caught_state = singer.bookmarks.set_offset(state, tap_stream_id, offset_key, offset_value) 12 | raise Exception("simulated") 13 | 14 | class Offsets(unittest.TestCase): 15 | def setUp(self): 16 | utils.verify_environment_vars() 17 | utils.seed_tap_hubspot_config() 18 | utils.write_to_singer() 19 | singer.set_offset = set_offset_with_exception 20 | 21 | #NB> test accounts must have > 1 companies for this to work 22 | def sync_companies(self): 23 | simulated_exception = None 24 | STATE = utils.get_clear_state() 25 | catalog = {'stream_alias': 'hubspot_companies'} 26 | 27 | #change count = 1 28 | tap_hubspot.default_company_params['limit'] = 1 29 | 30 | try: 31 | STATE = tap_hubspot.sync_companies(STATE, catalog) 32 | except Exception as ex: 33 | simulated_exception = ex 34 | # logging.exception('strange') 35 | 36 | self.assertIsNot(simulated_exception, None) 37 | 38 | 39 | self.assertEqual(set(utils.caught_records.keys()), {'companies', 'hubspot_contacts_by_company'}) 40 | 41 | #should only emit 1 company record because of the limit 42 | self.assertEqual(len(utils.caught_records['companies']), 1) 43 | self.assertGreater(len(utils.caught_records['hubspot_contacts_by_company']), 0) 44 | 45 | #offset should be set in state 46 | LOGGER.info("utils.caught_state: %s", utils.caught_state) 47 | self.assertNotEqual(utils.caught_state['bookmarks']['companies']['offset'], {}) 48 | 49 | #no bookmark though 50 | self.assertEqual(utils.caught_state['bookmarks']['companies']['hs_lastmodifieddate'], None) 51 | 52 | #change count back to 250 53 | tap_hubspot.default_company_params['limit'] = 250 54 | 55 | #call do_sync and verify: 56 | # 1)sync_companies is called first 57 | # 2)previous retrieved record is not retrieved again 58 | -------------------------------------------------------------------------------- /tap_hubspot/tests/unittests/test_custom_objects.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import patch 3 | from tap_hubspot import generate_custom_streams, Stream, sync_custom_object_records, Context 4 | 5 | MOCK_CATALOG = { 6 | "streams": [ 7 | { 8 | "stream": "cars", 9 | "tap_stream_id": "cars", 10 | "table_name": "cars", 11 | "schema": { 12 | "type": "object", 13 | "properties": { 14 | "id": {"type": ["null", "string"]}, 15 | "updatedAt": {"type": ["null", "string"], "format": "date-time"}, 16 | "property_model": {"type": ["null", "string"]}, 17 | }, 18 | }, 19 | "metadata": [ 20 | { 21 | "breadcrumb": [], 22 | "metadata": { 23 | "table-key-properties": ["id"], 24 | "forced-replication-method": "INCREMENTAL", 25 | "valid-replication-keys": ["updatedAt"], 26 | "selected": True, 27 | }, 28 | }, 29 | { 30 | "breadcrumb": ["properties", "id"], 31 | "metadata": {"inclusion": "automatic"}, 32 | }, 33 | { 34 | "breadcrumb": ["properties", "updatedAt"], 35 | "metadata": {"inclusion": "automatic"}, 36 | }, 37 | { 38 | "breadcrumb": ["properties", "property_model"], 39 | "metadata": {"inclusion": "available", "selected": True}, 40 | }, 41 | ], 42 | } 43 | ] 44 | } 45 | 46 | 47 | class TestGenerateCustomStreams(unittest.TestCase): 48 | @patch("tap_hubspot.sync_custom_object_records") 49 | @patch("tap_hubspot.get_url", return_value="fake_custom_objects_schema_url") 50 | @patch("tap_hubspot.load_shared_schema_refs", return_value="fake_refs") 51 | @patch("tap_hubspot.gen_request_custom_objects") 52 | @patch("tap_hubspot.utils.load_json") 53 | @patch("tap_hubspot.parse_custom_schema") 54 | @patch("tap_hubspot.singer.resolve_schema_references") 55 | @patch("builtins.open", create=True) 56 | @patch("tap_hubspot.LOGGER.warning") 57 | def test_generate_custom_streams( 58 | self, 59 | mock_warning, 60 | mock_open, 61 | mock_resolve_schema, 62 | mock_parse_custom_schema, 63 | mock_load_json, 64 | mock_gen_request_custom_objects, 65 | mock_load_shared_schema_refs, 66 | mock_get_url, 67 | mock_sync_custom_records 68 | ): 69 | """ 70 | test the flow of definition generate_custom_streams 71 | """ 72 | 73 | # Set up mocks and fake data 74 | mode = "DISCOVER" 75 | fake_custom_object = { 76 | "name": "fake_object", 77 | "properties": {"prop1": "type1", "prop2": "type2"}, 78 | } 79 | fake_custom_objects_schema_url = "fake_custom_objects_schema_url" 80 | fake_final_schema = { 81 | "type": "object", 82 | "properties": {"property_fake_object": "fake_value"}, 83 | } 84 | expected_value = [ 85 | {'custom_object_name':'fake_object', 86 | 'stream': Stream(tap_stream_id='fake_object', sync=mock_sync_custom_records, key_properties=['id'], replication_key='updatedAt', replication_method='INCREMENTAL'), 87 | 'schema': {'type': 'object', 'properties': {'property_fake_object': 'fake_value'}}}] 88 | 89 | # Set up mock return values 90 | mock_gen_request_custom_objects.return_value = [fake_custom_object] 91 | mock_load_json.return_value = { 92 | "type": "object", 93 | "properties": {"properties": {}}, 94 | } 95 | mock_parse_custom_schema.return_value = {"prop1": "type1", "prop2": "type2"} 96 | mock_resolve_schema.return_value = fake_final_schema 97 | mock_get_url.return_value = fake_custom_objects_schema_url 98 | 99 | # Call the function 100 | actual_value = generate_custom_streams(mode) 101 | # Verify the expected calls 102 | mock_gen_request_custom_objects.assert_called_once_with( 103 | "custom_objects_schema", 104 | fake_custom_objects_schema_url, 105 | {}, 106 | "results", 107 | "paging", 108 | ) 109 | mock_load_shared_schema_refs.assert_called_once() 110 | mock_get_url.assert_called_once_with("custom_objects_schema") 111 | mock_parse_custom_schema.assert_called_once_with( 112 | "fake_object", {"prop1": "type1", "prop2": "type2"}, is_custom_object=True 113 | ) 114 | mock_resolve_schema.assert_called_once_with( 115 | { 116 | "type": "object", 117 | "properties": { 118 | "properties": { 119 | "type": "object", 120 | "properties": {"prop1": "type1", "prop2": "type2"}, 121 | }, 122 | "property_prop1": "type1", 123 | "property_prop2": "type2", 124 | }, 125 | }, 126 | "fake_refs", 127 | ) 128 | mock_warning.assert_not_called() # No warning should be issued in this case 129 | self.assertEqual(actual_value, expected_value) 130 | 131 | @patch("tap_hubspot.gen_request_custom_objects") 132 | @patch("tap_hubspot.get_start", return_value="2023-07-07T00:00:00Z") 133 | @patch("tap_hubspot.get_selected_property_fields", return_value="model") 134 | def test_sync_custom_objects( 135 | self, mock_property, mock_start_date, mock_custom_objects 136 | ): 137 | """ 138 | Test the synchronization of custom objects. 139 | """ 140 | 141 | # Set up mocks and fake data 142 | STATE = {"currently_syncing": "cars"} 143 | ctx = Context(MOCK_CATALOG) 144 | stream_id = "cars" 145 | mock_custom_objects.return_value = [ 146 | { 147 | "id": "11111", 148 | "properties": {"model": "Frontier"}, 149 | "updatedAt": "2023-11-09T13:14:22.956Z", 150 | } 151 | ] 152 | expected_output = { 153 | "currently_syncing": "cars", 154 | "bookmarks": {"cars": {"updatedAt": "2023-11-09T13:14:22.956000Z"}}, 155 | } 156 | 157 | # Call the function 158 | actual_output = sync_custom_object_records(STATE, ctx, stream_id) 159 | # Verify the expected calls 160 | self.assertEqual(expected_output, actual_output) 161 | -------------------------------------------------------------------------------- /tap_hubspot/tests/unittests/test_get_start.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tap_hubspot 3 | from tap_hubspot import get_start 4 | from tap_hubspot import singer 5 | 6 | def get_state(key,value): 7 | """ 8 | Returns a mock state 9 | """ 10 | return { 11 | "bookmarks": { 12 | "stream_id_1": { 13 | "offset": {}, 14 | key: value 15 | } 16 | } 17 | } 18 | 19 | class TestGetStart(unittest.TestCase): 20 | """ 21 | Verify return value of `get_start` function. 22 | """ 23 | def test_get_start_without_state(self): 24 | """ 25 | This test verifies that `get_start` function returns start_date from CONFIG 26 | if an empty state is passed. 27 | """ 28 | mock_state = {} 29 | expected_value = tap_hubspot.CONFIG["start_date"] 30 | returned_value = get_start(mock_state, "stream_id_1", "current_bookmark", "old_bookmark") 31 | 32 | # Verify that returned value is start_date 33 | self.assertEqual(returned_value, expected_value) 34 | 35 | def test_get_start_with_old_bookmark(self): 36 | """ 37 | This test verifies that the `get_start` function returns old_bookmark from the state 38 | if current_bookmark is not available in the state. 39 | """ 40 | mock_state = get_state("old_bookmark", "OLD_BOOKMARK_VALUE") 41 | expected_value = "OLD_BOOKMARK_VALUE" 42 | 43 | returned_value = get_start(mock_state, "stream_id_1", "current_bookmark", "old_bookmark") 44 | 45 | # Verify that returned value is old_bookmark_value 46 | self.assertEqual(returned_value, expected_value) 47 | 48 | def test_get_start_with_current_bookmark_and_no_old_bookmark(self): 49 | """ 50 | This test verifies that the `get_start` function returns current_bookmark from the state 51 | if current_bookmark is available in the state and old_bookmark is not given. 52 | """ 53 | mock_state = get_state("current_bookmark", "CURR_BOOKMARK_VALUE") 54 | expected_value = "CURR_BOOKMARK_VALUE" 55 | 56 | returned_value = get_start(mock_state, "stream_id_1", "current_bookmark") 57 | 58 | # Verify that returned value is current bookmark 59 | self.assertEqual(returned_value, expected_value) 60 | 61 | def test_get_start_with_empty_start__no_old_bookmark(self): 62 | """ 63 | This test verifies that the `get_start` function returns start_date from CONFIG 64 | if an empty state is passed and old_bookamrk is not given. 65 | """ 66 | mock_state = {} 67 | expected_value = tap_hubspot.CONFIG["start_date"] 68 | 69 | returned_value = get_start(mock_state, "stream_id_1", "current_bookmark") 70 | 71 | # Verify that returned value is start_date 72 | self.assertEqual(returned_value, expected_value) 73 | 74 | def test_get_start_with_both_bookmark(self): 75 | """ 76 | This test verifies that the `get_start` function returns current_bookmark from the state 77 | if both old and current bookmark is available in the state. 78 | """ 79 | 80 | mock_state = { 81 | "bookmarks": { 82 | "stream_id_1": { 83 | "offset": {}, 84 | "old_bookmark": "OLD_BOOKMARK_VALUE", 85 | "current_bookmark": "CURR_BOOKMARK_VALUE" 86 | } 87 | } 88 | } 89 | expected_value = "CURR_BOOKMARK_VALUE" 90 | 91 | returned_value = get_start(mock_state, "stream_id_1", "current_bookmark", "old_bookmark") 92 | 93 | # Verify that returned value is current bookmark 94 | self.assertEqual(returned_value, expected_value) 95 | -------------------------------------------------------------------------------- /tap_hubspot/tests/unittests/test_request_timeout.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import requests 3 | from unittest import mock 4 | import tap_hubspot 5 | class TestRequestTimeoutValue(unittest.TestCase): 6 | 7 | def test_integer_request_timeout_in_config(self): 8 | """ 9 | Verify that if request_timeout is provided in config(integer value) then it should be use 10 | """ 11 | tap_hubspot.CONFIG.update({"request_timeout": 100}) # integer timeout in config 12 | 13 | request_timeout = tap_hubspot.get_request_timeout() 14 | 15 | self.assertEqual(request_timeout, 100.0) # Verify timeout value 16 | 17 | def test_float_request_timeout_in_config(self): 18 | """ 19 | Verify that if request_timeout is provided in config(float value) then it should be use 20 | """ 21 | tap_hubspot.CONFIG.update({"request_timeout": 100.5}) # float timeout in config 22 | 23 | request_timeout = tap_hubspot.get_request_timeout() 24 | 25 | self.assertEqual(request_timeout, 100.5) # Verify timeout value 26 | 27 | def test_string_request_timeout_in_config(self): 28 | """ 29 | Verify that if request_timeout is provided in config(string value) then it should be use 30 | """ 31 | tap_hubspot.CONFIG.update({"request_timeout": "100"}) # string format timeout in config 32 | 33 | request_timeout = tap_hubspot.get_request_timeout() 34 | 35 | self.assertEqual(request_timeout, 100.0) # Verify timeout value 36 | 37 | def test_empty_string_request_timeout_in_config(self): 38 | """ 39 | Verify that if request_timeout is provided in config with empty string then default value is used 40 | """ 41 | tap_hubspot.CONFIG.update({"request_timeout": ""}) # empty string in config 42 | 43 | request_timeout = tap_hubspot.get_request_timeout() 44 | 45 | self.assertEqual(request_timeout, 300) # Verify timeout value 46 | 47 | def test_zero_request_timeout_in_config(self): 48 | """ 49 | Verify that if request_timeout is provided in config with zero value then default value is used 50 | """ 51 | tap_hubspot.CONFIG.update({"request_timeout": 0}) # zero value in config 52 | 53 | request_timeout = tap_hubspot.get_request_timeout() 54 | 55 | self.assertEqual(request_timeout, 300) # Verify timeout value 56 | 57 | def test_zero_string_request_timeout_in_config(self): 58 | """ 59 | Verify that if request_timeout is provided in config with zero in string format then default value is used 60 | """ 61 | tap_hubspot.CONFIG.update({"request_timeout": '0'}) # zero value in config 62 | 63 | request_timeout = tap_hubspot.get_request_timeout() 64 | 65 | self.assertEqual(request_timeout, 300) # Verify timeout value 66 | 67 | def test_no_request_timeout_in_config(self): 68 | """ 69 | Verify that if request_timeout is not provided in config then default value is used 70 | """ 71 | tap_hubspot.CONFIG = {} 72 | request_timeout = tap_hubspot.get_request_timeout() 73 | 74 | self.assertEqual(request_timeout, 300) # Verify timeout value 75 | 76 | 77 | @mock.patch("time.sleep") 78 | class TestRequestTimeoutBackoff(unittest.TestCase): 79 | 80 | @mock.patch('requests.Session.send', side_effect = requests.exceptions.Timeout) 81 | @mock.patch("requests.Request.prepare") 82 | @mock.patch('tap_hubspot.get_params_and_headers', return_value = ({}, {})) 83 | def test_request_timeout_backoff(self, mocked_get, mocked_prepare, mocked_send, mocked_sleep): 84 | """ 85 | Verify request function is backoff for only 5 times on Timeout exception. 86 | """ 87 | try: 88 | tap_hubspot.request('dummy_url', {}) 89 | except Exception: 90 | pass 91 | 92 | # Verify that Session.send is called 5 times 93 | self.assertEqual(mocked_send.call_count, 5) 94 | 95 | @mock.patch('tap_hubspot.get_params_and_headers', return_value = ({}, {})) 96 | @mock.patch('requests.post', side_effect = requests.exceptions.Timeout) 97 | def test_request_timeout_backoff_for_post_search_endpoint(self, mocked_post, mocked_get, mocked_sleep): 98 | """ 99 | Verify post_search_endpoint function is backoff for only 5 times on Timeout exception. 100 | """ 101 | try: 102 | tap_hubspot.post_search_endpoint('dummy_url', {}) 103 | except Exception: 104 | pass 105 | 106 | # Verify that requests.post is called 5 times 107 | self.assertEqual(mocked_post.call_count, 5) 108 | 109 | @mock.patch('requests.post', side_effect = requests.exceptions.Timeout) 110 | def test_request_timeout_backoff_for_acquire_access_token_from_refresh_token(self, mocked_post, mocked_sleep): 111 | """ 112 | Verify request function is backoff for only 5 times instead of 25 times on Timeout exception that thrown from `acquire_access_token_from_refresh_token` method. 113 | Here get_params_and_headers method called from request method and acquire_access_token_from_refresh_token called from get_params_and_headers method. 114 | """ 115 | try: 116 | tap_hubspot.post_search_endpoint('dummy_url', {}) 117 | except Exception: 118 | pass 119 | 120 | # Verify that requests.post is called 5 times 121 | self.assertEqual(mocked_post.call_count, 5) 122 | -------------------------------------------------------------------------------- /tap_hubspot/tests/unittests/test_tickets.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import patch 3 | 4 | from tap_hubspot import sync_tickets 5 | 6 | mock_response_data = { 7 | "results": [{ 8 | "updatedAt": "2022-08-18T12:57:17.587Z", 9 | "createdAt": "2019-08-06T02:43:01.930Z", 10 | "name": "hs_file_upload", 11 | "label": "File upload", 12 | "type": "string", 13 | "fieldType": "file", 14 | "description": "Files attached to a support form by a contact.", 15 | "groupName": "ticketinformation", 16 | "options": [], 17 | "displayOrder": -1, 18 | "calculated": False, 19 | "externalOptions": False, 20 | "hasUniqueValue": False, 21 | "hidden": False, 22 | "hubspotDefined": True, 23 | "modificationMetadata": { 24 | "archivable": True, 25 | "readOnlyDefinition": True, 26 | "readOnlyValue": False 27 | }, 28 | "formField": True 29 | }] 30 | } 31 | 32 | 33 | class MockResponse: 34 | 35 | def __init__(self, json_data): 36 | self.json_data = json_data 37 | 38 | def json(self): 39 | return self.json_data 40 | 41 | 42 | class MockContext: 43 | def get_catalog_from_id(self, stream_name): 44 | return { 45 | "stream": "tickets", 46 | "tap_stream_id": "tickets", 47 | "schema": { 48 | "type": "object", 49 | "properties": { 50 | "id": { 51 | "type": "string" 52 | }, 53 | "updatedAt": { 54 | "type": [ 55 | "null", 56 | "string" 57 | ], 58 | "format": "date-time" 59 | }, 60 | "properties": { 61 | "type": "object", 62 | "properties": { 63 | "hs_all_team_ids": { 64 | "type": [ 65 | "null", 66 | "string" 67 | ] 68 | } 69 | } 70 | }, 71 | "property_hs_all_team_ids": { 72 | "type": [ 73 | "null", 74 | "string" 75 | ] 76 | } 77 | } 78 | }, 79 | "metadata": [{ 80 | "breadcrumb": [], 81 | "metadata": { 82 | "table-key-properties": ["id"], 83 | "forced-replication-method": "INCREMENTAL", 84 | "valid-replication-keys": [ 85 | "updatedAt" 86 | ], 87 | "selected": True 88 | } 89 | }, 90 | { 91 | "breadcrumb": ["properties", "id"], 92 | "metadata": { 93 | "inclusion": "automatic" 94 | } 95 | }, 96 | 97 | { 98 | "breadcrumb": ["properties", "updatedAt"], 99 | "metadata": { 100 | "inclusion": "automatic" 101 | } 102 | }, 103 | { 104 | "breadcrumb": ["properties", "properties"], 105 | "metadata": { 106 | "inclusion": "available" 107 | } 108 | }, 109 | 110 | { 111 | "breadcrumb": ["properties", "property_hs_all_team_ids"], 112 | "metadata": { 113 | "inclusion": "available", 114 | "selected": True 115 | } 116 | } 117 | ] 118 | } 119 | 120 | 121 | class TestTickets(unittest.TestCase): 122 | 123 | @patch('tap_hubspot.request', return_value=MockResponse(mock_response_data)) 124 | @patch('tap_hubspot.get_start', return_value='2023-01-01T00:00:00Z') 125 | @patch('tap_hubspot.get_v3_records') 126 | def test_ticket_params_are_validated(self, mocked_gen_request, mocked_get_start, 127 | mock_request_response): 128 | """ 129 | # Validating the parameters passed while making the API request to list the tickets 130 | """ 131 | mock_context = MockContext() 132 | expected_param = {'limit': 100, 133 | 'associations': 'contact,company,deals', 134 | 'properties': 'hs_all_team_ids', 135 | 'archived': False 136 | } 137 | expected_return_value = {'currently_syncing': 'tickets', 'bookmarks': { 138 | 'tickets': {'updatedAt': '2023-01-01T00:00:00.000000Z'}}} 139 | 140 | return_value = sync_tickets({'currently_syncing': 'tickets'}, mock_context) 141 | self.assertEqual( 142 | expected_return_value, 143 | return_value 144 | ) 145 | mocked_gen_request.assert_called_once_with('tickets', 146 | 'https://api.hubapi.com/crm/v4/objects/tickets', 147 | expected_param, 'results', 'paging') 148 | -------------------------------------------------------------------------------- /tap_hubspot/tests/utils.py: -------------------------------------------------------------------------------- 1 | import singer 2 | import singer.bookmarks 3 | import os 4 | import tap_hubspot 5 | 6 | LOGGER = singer.get_logger() 7 | 8 | caught_records = {} 9 | caught_bookmarks = [] 10 | caught_state = {} 11 | caught_schema = {} 12 | caught_pks = {} 13 | 14 | 15 | def verify_environment_vars(): 16 | missing_envs = [x for x in [os.getenv('TAP_HUBSPOT_REDIRECT_URI'), 17 | os.getenv('TAP_HUBSPOT_CLIENT_ID'), 18 | os.getenv('TAP_HUBSPOT_CLIENT_SECRET'), 19 | os.getenv('TAP_HUBSPOT_REFRESH_TOKEN')] if x is None] 20 | if len(missing_envs) != 0: 21 | #pylint: disable=line-too-long 22 | raise Exception("set TAP_HUBSPOT_REDIRECT_URI, TAP_HUBSPOT_CLIENT_ID, TAP_HUBSPOT_CLIENT_SECRET, TAP_HUBSPOT_REFRESH_TOKEN") 23 | 24 | def seed_tap_hubspot_config(): 25 | tap_hubspot.CONFIG = { 26 | "access_token": None, 27 | "token_expires": None, 28 | 29 | "redirect_uri": os.environ['TAP_HUBSPOT_REDIRECT_URI'], 30 | "client_id": os.environ['TAP_HUBSPOT_CLIENT_ID'], 31 | "client_secret": os.environ['TAP_HUBSPOT_CLIENT_SECRET'], 32 | "refresh_token": os.environ['TAP_HUBSPOT_REFRESH_TOKEN'], 33 | "start_date": "2001-01-01T00:00:00Z" 34 | } 35 | 36 | def get_clear_state(): 37 | return { 38 | "bookmarks": { 39 | "contacts": { 40 | "offset": {}, 41 | "lastmodifieddate": None 42 | }, 43 | "companies": { 44 | "offset": {}, 45 | "hs_lastmodifieddate": None 46 | } 47 | 48 | }, 49 | "currently_syncing": None 50 | } 51 | 52 | 53 | #pylint: disable=line-too-long 54 | def our_write_bookmark(state, table_name, bookmark_key, bookmark_value): 55 | caught_bookmarks.append([bookmark_key, bookmark_value]) 56 | state = singer.bookmarks.write_bookmark(state, table_name, bookmark_key, bookmark_value) 57 | return state 58 | 59 | def our_write_schema(table_name, schema, pks): 60 | caught_pks[table_name] = pks 61 | caught_schema[table_name] = schema 62 | 63 | def our_write_state(state): 64 | # pylint: disable=global-statement 65 | LOGGER.info("our_write_state: %s", state) 66 | global caught_state 67 | caught_state = state 68 | return state 69 | 70 | def our_write_record(table_name, record): 71 | if caught_records.get(table_name) is None: 72 | caught_records[table_name] = [] 73 | 74 | caught_records[table_name].append(record) 75 | 76 | def write_to_singer(): 77 | singer.write_bookmark = our_write_bookmark 78 | singer.write_state = our_write_state 79 | singer.write_record = our_write_record 80 | singer.write_schema = our_write_schema 81 | -------------------------------------------------------------------------------- /tests/base_hubspot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from datetime import datetime as dt 4 | from datetime import timedelta 5 | 6 | import tap_tester.menagerie as menagerie 7 | import tap_tester.connections as connections 8 | import tap_tester.runner as runner 9 | from tap_tester.base_suite_tests.base_case import BaseCase 10 | from tap_tester import LOGGER 11 | 12 | 13 | class HubspotBaseCase(BaseCase): 14 | 15 | # set the default start date which can be overridden in the tests. 16 | start_date = BaseCase.timedelta_formatted(dt.utcnow(), delta=timedelta(days=-1)) 17 | 18 | EXTRA_FIELDS = { 19 | "contacts": { "versionTimestamp" } 20 | } 21 | 22 | def setUp(self): 23 | missing_envs = [x for x in [ 24 | 'TAP_HUBSPOT_REDIRECT_URI', 25 | 'TAP_HUBSPOT_CLIENT_ID', 26 | 'TAP_HUBSPOT_CLIENT_SECRET', 27 | 'TAP_HUBSPOT_REFRESH_TOKEN' 28 | ] if os.getenv(x) is None] 29 | if missing_envs: 30 | raise Exception("Missing environment variables: {}".format(missing_envs)) 31 | 32 | @staticmethod 33 | def get_type(): 34 | return "platform.hubspot" 35 | 36 | @staticmethod 37 | def tap_name(): 38 | return "tap-hubspot" 39 | 40 | def get_properties(self): 41 | return {'start_date': self.start_date} 42 | 43 | def get_credentials(self): 44 | return {'refresh_token': os.getenv('TAP_HUBSPOT_REFRESH_TOKEN'), 45 | 'client_secret': os.getenv('TAP_HUBSPOT_CLIENT_SECRET'), 46 | 'redirect_uri': os.getenv('TAP_HUBSPOT_REDIRECT_URI'), 47 | 'client_id': os.getenv('TAP_HUBSPOT_CLIENT_ID')} 48 | 49 | @classmethod 50 | def expected_metadata(cls): # DOCS_BUG https://stitchdata.atlassian.net/browse/DOC-1523) 51 | """The expected streams and metadata about the streams""" 52 | 53 | return { 54 | "campaigns": { 55 | BaseCase.PRIMARY_KEYS: {"id"}, 56 | BaseCase.REPLICATION_METHOD: BaseCase.FULL_TABLE, 57 | BaseCase.OBEYS_START_DATE: False 58 | }, 59 | "companies": { 60 | BaseCase.PRIMARY_KEYS: {"companyId"}, 61 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 62 | BaseCase.REPLICATION_KEYS: {"property_hs_lastmodifieddate"}, 63 | BaseCase.API_LIMIT: 250, 64 | BaseCase.OBEYS_START_DATE: True 65 | }, 66 | "contact_lists": { 67 | BaseCase.PRIMARY_KEYS: {"listId"}, 68 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 69 | BaseCase.REPLICATION_KEYS: {"updatedAt"}, 70 | BaseCase.API_LIMIT: 250, 71 | BaseCase.OBEYS_START_DATE: True 72 | }, 73 | "contacts": { 74 | BaseCase.PRIMARY_KEYS: {"vid"}, 75 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 76 | BaseCase.REPLICATION_KEYS: {"versionTimestamp"}, 77 | BaseCase.API_LIMIT: 100, 78 | BaseCase.OBEYS_START_DATE: True 79 | }, 80 | "contacts_by_company": { 81 | BaseCase.PRIMARY_KEYS: {"company-id", "contact-id"}, 82 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 83 | BaseCase.API_LIMIT: 100, 84 | BaseCase.OBEYS_START_DATE: True, 85 | BaseCase.PARENT_STREAM: 'companies' 86 | }, 87 | "deal_pipelines": { 88 | BaseCase.PRIMARY_KEYS: {"pipelineId"}, 89 | BaseCase.REPLICATION_METHOD: BaseCase.FULL_TABLE, 90 | BaseCase.OBEYS_START_DATE: False, 91 | }, 92 | "deals": { 93 | BaseCase.PRIMARY_KEYS: {"dealId"}, 94 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 95 | BaseCase.REPLICATION_KEYS: {"property_hs_lastmodifieddate"}, 96 | BaseCase.OBEYS_START_DATE: True 97 | }, 98 | "email_events": { 99 | BaseCase.PRIMARY_KEYS: {"id"}, 100 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 101 | BaseCase.REPLICATION_KEYS: {"startTimestamp"}, 102 | BaseCase.API_LIMIT: 1000, 103 | BaseCase.OBEYS_START_DATE: True 104 | }, 105 | "engagements": { 106 | BaseCase.PRIMARY_KEYS: {"engagement_id"}, 107 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 108 | BaseCase.REPLICATION_KEYS: {"lastUpdated"}, 109 | BaseCase.API_LIMIT: 250, 110 | BaseCase.OBEYS_START_DATE: True 111 | }, 112 | "forms": { 113 | BaseCase.PRIMARY_KEYS: {"guid"}, 114 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 115 | BaseCase.REPLICATION_KEYS: {"updatedAt"}, 116 | BaseCase.OBEYS_START_DATE: True 117 | }, 118 | "owners": { 119 | BaseCase.PRIMARY_KEYS: {"id"}, 120 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 121 | BaseCase.REPLICATION_KEYS: {"updatedAt"}, 122 | BaseCase.OBEYS_START_DATE: True # TODO is this a BUG? 123 | }, 124 | "subscription_changes": { 125 | BaseCase.PRIMARY_KEYS: {"timestamp", "portalId", "recipient"}, 126 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 127 | BaseCase.REPLICATION_KEYS: {"startTimestamp"}, 128 | BaseCase.API_LIMIT: 1000, 129 | BaseCase.OBEYS_START_DATE: True 130 | }, 131 | "workflows": { 132 | BaseCase.PRIMARY_KEYS: {"id"}, 133 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 134 | BaseCase.REPLICATION_KEYS: {"updatedAt"}, 135 | BaseCase.OBEYS_START_DATE: True 136 | }, 137 | "tickets": { 138 | BaseCase.PRIMARY_KEYS: {"id"}, 139 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 140 | BaseCase.REPLICATION_KEYS: {"updatedAt"}, 141 | BaseCase.API_LIMIT: 100, 142 | BaseCase.OBEYS_START_DATE: True 143 | }, 144 | # below are the custom_objects stream 145 | "cars": { 146 | BaseCase.PRIMARY_KEYS: {"id"}, 147 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 148 | BaseCase.REPLICATION_KEYS: {"updatedAt"}, 149 | BaseCase.API_LIMIT: 100, 150 | BaseCase.EXPECTED_PAGE_SIZE: 100, 151 | BaseCase.OBEYS_START_DATE: True 152 | }, 153 | "co_firsts": { 154 | BaseCase.PRIMARY_KEYS: {"id"}, 155 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 156 | BaseCase.REPLICATION_KEYS: {"updatedAt"}, 157 | BaseCase.API_LIMIT: 100, 158 | BaseCase.EXPECTED_PAGE_SIZE: 100, 159 | BaseCase.OBEYS_START_DATE: True 160 | }, 161 | "custom_object_campaigns": { 162 | BaseCase.PRIMARY_KEYS: {"id"}, 163 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 164 | BaseCase.REPLICATION_KEYS: {"updatedAt"}, 165 | BaseCase.API_LIMIT: 100, 166 | BaseCase.EXPECTED_PAGE_SIZE: 100, 167 | BaseCase.OBEYS_START_DATE: True 168 | }, 169 | "custom_object_contacts": { 170 | BaseCase.PRIMARY_KEYS: {"id"}, 171 | BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, 172 | BaseCase.REPLICATION_KEYS: {"updatedAt"}, 173 | BaseCase.API_LIMIT: 100, 174 | BaseCase.EXPECTED_PAGE_SIZE: 100, 175 | BaseCase.OBEYS_START_DATE: True 176 | } 177 | 178 | } 179 | -------------------------------------------------------------------------------- /tests/client_tester.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | from client import TestClient 4 | from base import HubspotBaseTest 5 | 6 | class TestHubspotTestClient(HubspotBaseTest): 7 | """ 8 | Test the basic functionality of our Test Client. This is a tool for sanity checks, nothing more. 9 | 10 | To check an individual crud method, uncomment the corresponding test case below, and execute this file 11 | as if it is a normal tap-tester test via bin/run-test. 12 | """ 13 | def __init__(self, *args, **kwargs): 14 | super().__init__(*args, **kwargs) 15 | self.test_client = TestClient(self.get_properties()['start_date']) 16 | 17 | ########################################################################## 18 | ### TESTING CREATES 19 | ########################################################################## 20 | 21 | # def test_contacts_create(self): 22 | # # Testing contacts Post 23 | # old_records = self.test_client.get_contacts() 24 | # our_record = self.test_client.create_contacts() 25 | # new_records = self.test_client.get_contacts() 26 | # assert len(old_records) < len(new_records), \ 27 | # f"Before contacts post found {len(old_records)} records. After post found {len(new_records)} records" 28 | 29 | # def test_contacts_create_stability(self): 30 | # old_records = self.test_client.get_contacts() 31 | # our_record = self.test_client.create_contacts() 32 | # responses = [] 33 | # for i in range(10): 34 | # new_records = self.test_client.get_contacts() 35 | # responses.append(new_records) 36 | # time.sleep(1) 37 | # all_versions = [record['versionTimestamp'] for response in responses 38 | # for record in response if record['vid'] == our_record[0]['vid']] 39 | # from pprint import pprint as pp 40 | # pp(all_versions) 41 | 42 | # def test_companies_create(self): 43 | # # Testing companies Post 44 | 45 | # old_records = self.test_client.get_companies('2021-08-25T00:00:00.000000Z') 46 | # our_record = self.test_client.create_companies() 47 | # now = time.time() 48 | # time.sleep(6) 49 | 50 | # new_records = self.test_client.get_companies('2021-08-25T00:00:00.000000Z') 51 | # time_for_get = time.time()-now 52 | # print(time_for_get) 53 | 54 | # assert len(old_records) < len(new_records), \ 55 | # f"Before companies post found {len(old_records)} records. After post found {len(new_records)} records" 56 | 57 | # def test_contact_lists_create(self): 58 | # # Testing contact_lists POST 59 | 60 | # old_records = self.test_client.get_contact_lists() 61 | # our_record = self.test_client.create_contact_lists() 62 | # new_records = self.test_client.get_contact_lists() 63 | 64 | # assert len(old_records) < len(new_records), \ 65 | # f"Before post found {len(old_records)} records. After post found {len(new_records)} records" 66 | 67 | 68 | # def test_contacts_by_company_create(self): 69 | # # Testing contacts_by_company PUT 70 | 71 | 72 | # old_contact_records = self.test_client.get_contacts() 73 | # old_company_records = self.test_client.get_companies('2021-08-25T00:00:00.000000Z') 74 | # old_records = self.test_client.get_contacts_by_company([old_company_records[0]["companyId"]]) 75 | # our_record = self.test_client.create_contacts_by_company() 76 | # new_records = self.test_client.get_contacts_by_company([old_company_records[0]["companyId"]]) 77 | # assert len(old_records) < len(new_records), \ 78 | # f"Before post found {len(old_records)} records. After post found {len(new_records)} records" 79 | 80 | 81 | # def test_deal_pipelines_create(self): 82 | # # Testing deal_pipelines POST 83 | 84 | # old_records = self.test_client.get_deal_pipelines() 85 | # our_record = self.test_client.create_deal_pipelines() 86 | # new_records = self.test_client.get_deal_pipelines() 87 | # assert len(old_records) < len(new_records), \ 88 | # f"Before post found {len(old_records)} records. After post found {len(new_records)} records" 89 | 90 | # def test_deal_pipelines_deletes(self): 91 | # # Testing deal_pipelines DELETE 92 | # import ipdb; ipdb.set_trace() 93 | # 1+1 94 | # our_record = self.test_client.create_deal_pipelines() 95 | # old_records = self.test_client.get_deal_pipelines() 96 | # delete_records = self.test_client.delete_deal_pipelines(1) 97 | # new_records = self.test_client.get_deal_pipelines() 98 | # assert len(old_records) > len(new_records), \ 99 | # f"Before post found {len(old_records)} records. After post found {len(new_records)} records" 100 | 101 | # def test_deals_create(self): 102 | # # Testing deals POST 103 | 104 | # old_records = self.test_client.get_deals() 105 | # our_record = self.test_client.create_deals() 106 | # new_records = self.test_client.get_deals() 107 | # assert len(old_records) < len(new_records), \ 108 | # f"Before post found {len(old_records)} records. After post found {len(new_records)} records" 109 | 110 | 111 | # def test_subscription_changes_and_email_events_create(self): 112 | # # Testing subscription_changes and email_events POST 113 | 114 | # old_emails = self.test_client.get_email_events() 115 | # old_subs = self.test_client.get_subscription_changes() 116 | # our_record = self.test_client.create_subscription_changes() 117 | # time.sleep(10) 118 | # new_subs = self.test_client.get_subscription_changes() 119 | # new_emails = self.test_client.get_email_events() 120 | 121 | # assert len(old_subs) < len(new_subs), \ 122 | # f"Before post found {len(old_subs)} subs. After post found {len(new_subs)} subs" 123 | # assert len(old_emails) < len(new_emails), \ 124 | # f"Before post found {len(old_emails)} emails. After post found {len(new_emails)} emails" 125 | # print(f"Before {len(old_subs)} subs. After found {len(new_subs)} subs") 126 | # print(f"Before {len(old_emails)} emails. After found {len(new_emails)} emails") 127 | 128 | # def test_engagements_create(self): 129 | # # Testing create_engagements POST 130 | 131 | # old_records = self.test_client.get_engagements() 132 | # our_record = self.test_client.create_engagements() 133 | # new_records = self.test_client.get_engagements() 134 | # assert len(old_records) < len(new_records), \ 135 | # f"Before post found {len(old_records)} records. After post found {len(new_records)} records" 136 | 137 | 138 | # def test_forms_create(self): 139 | # # Testing create_forms POST 140 | # old_records = self.test_client.get_forms() 141 | # our_record = self.test_client.create_forms() 142 | # new_records = self.test_client.get_forms() 143 | # assert len(old_records) < len(new_records), \ 144 | # f"Before post found {len(old_records)} records. After post found {len(new_records)} records" 145 | 146 | 147 | # def test_workflows_create(self): 148 | # # Testing create_workflows POST 149 | 150 | # old_records = self.test_client.get_workflows() 151 | # our_record = self.test_client.create_workflows() 152 | # new_records = self.test_client.get_workflows() 153 | # assert len(old_records) < len(new_records), \ 154 | # f"Before post found {len(old_records)} records. After post found {len(new_records)} records" 155 | 156 | 157 | ########################################################################## 158 | ### TESTING UPDATES 159 | ########################################################################## 160 | 161 | 162 | # def test_workflows_update(self): # TODO This failed to change the record 163 | # # Testing update_workflows POST 164 | 165 | # # grab a contact's email to use as the update 166 | # contacts = self.test_client.get_contacts() 167 | # for contact in contacts: 168 | # if contact['properties'].get('email'): 169 | # contact_email = contact['properties']['email']['value'] 170 | # break 171 | 172 | # # old 173 | # workflow = self.test_client.create('workflows') 174 | # workflow_id = workflow[0]['id'] 175 | # old_record = self.test_client._get_workflows_by_pk(workflow_id=workflow_id) 176 | 177 | 178 | # # do the update 179 | # our_record = self.test_client.update_workflows(workflow_id=workflow_id, contact_email=contact_email) 180 | 181 | # # new 182 | # new_record = self.test_client._get_workflows_by_pk(workflow_id=workflow_id) 183 | 184 | # self.assertNotEqual(old_record, new_record) 185 | 186 | # def test_contacts_update(self): 187 | # new_record = self.test_client.create_contacts() 188 | # record_vid = new_record[0]['vid'] 189 | # old_email = new_record[0]['properties']['email']['value'] 190 | 191 | # updated_record = self.test_client.update_contacts(record_vid) 192 | 193 | # self.assertNotEqual(updated_record[0]['properties']['email']['value'], old_email) 194 | 195 | # def test_campaigns_update(self): TODO 196 | # """No endpoint found.""" 197 | # self.fail("test_campaigns_update not implmented") 198 | 199 | # def test_companies_update(self): 200 | # initial_record = self.test_client.create_companies() 201 | # time.sleep(6) 202 | # record_id = initial_record[0]['companyId'] 203 | # initial_value = initial_record[0]['properties']['description']['value'] 204 | 205 | # updated_record = self.test_client.update_companies(record_id) 206 | # updated_value = updated_record['properties']['description']['value'] 207 | 208 | # self.assertNotEqual(initial_value, updated_value) 209 | 210 | # def test_contact_lists_update(self): 211 | # initial_record = self.test_client.create_contact_lists() 212 | 213 | # record_id = initial_record[0]['listId'] 214 | # initial_value = initial_record[0]['name'] 215 | 216 | # updated_record = self.test_client.update_contact_lists(record_id) 217 | # updated_value = updated_record['name'] 218 | 219 | # self.assertNotEqual(initial_value, updated_value) 220 | 221 | # def test_deal_pipelines_update(self): 222 | # initial_record = self.test_client.get_deal_pipelines() 223 | 224 | # record_id = initial_record[0]['pipelineId'] 225 | # initial_value = initial_record[0]['label'] 226 | 227 | # updated_record = self.test_client.update_deal_pipelines(record_id) 228 | # updated_value = updated_record['label'] 229 | 230 | # self.assertNotEqual(initial_value, updated_value) 231 | 232 | # def test_deals_update(self): 233 | # initial_record = self.test_client.get_deals() 234 | 235 | # record_id = initial_record[0]['dealId'] 236 | # initial_value = initial_record[0]['properties']['dealname']['value'] 237 | 238 | # updated_record = self.test_client.update_deals(record_id) 239 | # updated_value = updated_record['properties']['dealname']['value'] 240 | 241 | # self.assertNotEqual(initial_value, updated_value) 242 | 243 | # def test_forms_update(self): 244 | # initial_record = self.test_client.get_forms() 245 | 246 | # record_id = initial_record[0]['guid'] 247 | # initial_value = initial_record[0]['name'] 248 | 249 | # updated_record = self.test_client.update_forms(record_id) 250 | # updated_value = updated_record['name'] 251 | 252 | # self.assertNotEqual(initial_value, updated_value) 253 | 254 | # def test_owners_update(self): TODO 255 | # """No endpoint found.""" 256 | # self.fail("test_owners_update not implmented") 257 | 258 | # def test_engagements_update(self): 259 | # initial_record = self.test_client.get_engagements() 260 | 261 | # record_id = initial_record[0]['engagement_id'] 262 | # initial_value = initial_record[0]['metadata'] 263 | 264 | # updated_record = self.test_client.update_engagements(record_id) 265 | # updated_value = updated_record['metadata'] 266 | 267 | # self.assertNotEqual(initial_value, updated_value) 268 | 269 | ########################################################################## 270 | ### TODO updates 271 | ########################################################################## 272 | # def test_contacts_by_company_update(self): 273 | # pass 274 | 275 | # def test_email_events_update(self): 276 | # pass 277 | 278 | 279 | # def test_subscription_changes_update(self): 280 | # pass 281 | -------------------------------------------------------------------------------- /tests/test_hubspot_all_fields.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import tap_tester.connections as connections 4 | import tap_tester.menagerie as menagerie 5 | import tap_tester.runner as runner 6 | from tap_tester import LOGGER 7 | 8 | from base import HubspotBaseTest 9 | from client import TestClient 10 | 11 | def get_matching_actual_record_by_pk(expected_primary_key_dict, actual_records): 12 | ret_records = [] 13 | can_save = True 14 | for record in actual_records: 15 | for key, value in expected_primary_key_dict.items(): 16 | actual_value = record[key] 17 | if actual_value != value: 18 | can_save = False 19 | break 20 | if can_save: 21 | ret_records.append(record) 22 | can_save = True 23 | return ret_records 24 | 25 | FIELDS_ADDED_BY_TAP = { 26 | # In 'contacts' streams 'versionTimeStamp' is not available in response of the second call. 27 | # In the 1st call, Tap retrieves records of all contacts and from those records, it collects vids(id of contact). 28 | # These records contain the versionTimestamp field. 29 | # In the 2nd call, vids collected from the 1st call will be used to retrieve the whole contact record. 30 | # Here, the records collected for detailed contact information do not contain the versionTimestamp field. 31 | # So, we add the versionTimestamp field(fetched from 1st call records) explicitly in the record of 2nd call. 32 | "contacts": { "versionTimestamp" } 33 | } 34 | 35 | KNOWN_EXTRA_FIELDS = { 36 | 'deals': { 37 | # BUG_TDL-14993 | https://jira.talendforge.org/browse/TDL-14993 38 | # Has an value of object with key 'value' and value 'Null' 39 | 'property_hs_date_entered_1258834', 40 | 'property_hs_time_in_example_stage1660743867503491_315775040', 41 | }, 42 | } 43 | 44 | KNOWN_MISSING_FIELDS = { 45 | 'contacts':{ # BUG https://jira.talendforge.org/browse/TDL-16016 46 | 'property_hs_latest_source', 47 | 'property_hs_latest_source_data_1', 48 | 'property_hs_latest_source_data_2', 49 | 'property_hs_latest_source_timestamp', 50 | 'property_hs_timezone', 51 | 'property_hs_v2_cumulative_time_in_lead', 52 | 'property_hs_v2_cumulative_time_in_opportunity', 53 | 'property_hs_v2_cumulative_time_in_subscriber', 54 | 'property_hs_v2_date_entered_customer', 55 | 'property_hs_v2_date_entered_lead', 56 | 'property_hs_v2_date_entered_opportunity', 57 | 'property_hs_v2_date_entered_subscriber', 58 | 'property_hs_v2_date_exited_lead', 59 | 'property_hs_v2_date_exited_opportunity', 60 | 'property_hs_v2_date_exited_subscriber', 61 | 'property_hs_v2_latest_time_in_lead', 62 | 'property_hs_v2_latest_time_in_opportunity', 63 | 'property_hs_v2_latest_time_in_subscriber', 64 | }, 65 | 'contact_lists': { # BUG https://jira.talendforge.org/browse/TDL-14996 66 | 'authorId', 67 | 'teamIds', 68 | 'internal', 69 | 'ilsFilterBranch', 70 | 'limitExempt', 71 | }, 72 | 'email_events': { # BUG https://jira.talendforge.org/browse/TDL-14997 73 | 'portalSubscriptionStatus', 74 | 'attempt', 75 | 'source', 76 | 'subscriptions', 77 | 'sourceId', 78 | 'replyTo', 79 | 'suppressedMessage', 80 | 'bcc', 81 | 'suppressedReason', 82 | 'cc', 83 | }, 84 | 'engagements': { # BUG https://jira.talendforge.org/browse/TDL-14997 85 | 'scheduledTasks', 86 | }, 87 | 'workflows': { # BUG https://jira.talendforge.org/browse/TDL-14998 88 | 'migrationStatus', 89 | 'updateSource', 90 | 'description', 91 | 'originalAuthorUserId', 92 | 'lastUpdatedByUserId', 93 | 'creationSource', 94 | 'portalId', 95 | 'contactCounts', 96 | }, 97 | 'owners': { # BUG https://jira.talendforge.org/browse/TDL-15000 98 | 'activeSalesforceId', 99 | 100 | # Field is returned by API but not listed in official Hubspot documentation 101 | 'userIdIncludingInactive', 102 | 'type' 103 | }, 104 | 'forms': { # BUG https://jira.talendforge.org/browse/TDL-15001 105 | 'alwaysCreateNewCompany', 106 | 'themeColor', 107 | 'publishAt', 108 | 'editVersion', 109 | 'embedVersion', 110 | 'enrichable', 111 | 'themeName', 112 | 'style', 113 | 'thankYouMessageJson', 114 | 'createMarketableContact', 115 | 'kickbackEmailWorkflowId', 116 | 'businessUnitId', 117 | 'portableKey', 118 | 'parentId', 119 | 'kickbackEmailsJson', 120 | 'unpublishAt', 121 | 'internalUpdatedAt', 122 | 'multivariateTest', 123 | 'publishedAt', 124 | 'customUid', 125 | 'isPublished', 126 | 'paymentSessionTemplateIds', 127 | 'selectedExternalOptions', 128 | }, 129 | 'companies': { # BUG https://jira.talendforge.org/browse/TDL-15003 130 | 'mergeAudits', 131 | 'stateChanges', 132 | 'isDeleted', 133 | 'additionalDomains', 134 | 'property_hs_analytics_latest_source', 135 | 'property_hs_analytics_latest_source_data_2', 136 | 'property_hs_analytics_latest_source_data_1', 137 | 'property_hs_analytics_latest_source_timestamp', 138 | }, 139 | 'campaigns': { # BUG https://jira.talendforge.org/browse/TDL-15003 140 | 'lastProcessingStateChangeAt', 141 | 'lastProcessingFinishedAt', 142 | 'processingState', 143 | 'lastProcessingStartedAt', 144 | }, 145 | 'deals': { # BUG https://jira.talendforge.org/browse/TDL-14999 146 | 'imports', 147 | 'property_hs_num_associated_deal_splits', 148 | 'property_hs_is_active_shared_deal', #https://jira.talendforge.org/browse/TDL-24758 149 | 'property_hs_is_deal_split', 150 | 'property_hs_is_active_shared_deal', 151 | 'stateChanges', 152 | 'property_hs_num_associated_active_deal_registrations', 153 | 'property_hs_num_associated_deal_registrations', 154 | 'property_hs_analytics_latest_source', 155 | 'property_hs_analytics_latest_source_timestamp_contact', 156 | 'property_hs_analytics_latest_source_data_1_contact', 157 | 'property_hs_analytics_latest_source_timestamp', 158 | 'property_hs_analytics_latest_source_data_1', 159 | 'property_hs_analytics_latest_source_contact', 160 | 'property_hs_analytics_latest_source_company', 161 | 'property_hs_analytics_latest_source_data_1_company', 162 | 'property_hs_analytics_latest_source_data_2_company', 163 | 'property_hs_analytics_latest_source_data_2', 164 | 'property_hs_analytics_latest_source_data_2_contact', 165 | 'property_hs_deal_score', 166 | 'property_hs_is_active_shared_deal', 167 | 'property_hs_v2_date_entered_appointmentscheduled', 168 | 'property_hs_v2_date_exited_appointmentscheduled', 169 | 'property_hs_v2_latest_time_in_appointmentscheduled', 170 | 'property_hs_v2_cumulative_time_in_appointmentscheduled', 171 | 'property_hs_v2_date_entered_qualifiedtobuy', 172 | 'property_deal_currency_code' 173 | }, 174 | 'subscription_changes':{ 175 | 'normalizedEmailId' 176 | } 177 | } 178 | 179 | 180 | class TestHubspotAllFields(HubspotBaseTest): 181 | """Test that with all fields selected for a stream we replicate data as expected""" 182 | @staticmethod 183 | def name(): 184 | return "tt_hubspot_all_fields_dynamic" 185 | 186 | def streams_under_test(self): 187 | """expected streams minus the streams not under test""" 188 | return self.expected_streams().difference({ 189 | 'owners', 190 | 'subscription_changes', # BUG_TDL-14938 https://jira.talendforge.org/browse/TDL-14938 191 | }) 192 | 193 | def setUp(self): 194 | self.maxDiff = None # see all output in failure 195 | 196 | test_client = TestClient(start_date=self.get_properties()['start_date']) 197 | self.expected_records = dict() 198 | streams = self.streams_under_test() 199 | stream_to_run_last = 'contacts_by_company' 200 | if stream_to_run_last in streams: 201 | streams.remove(stream_to_run_last) 202 | streams = list(streams) 203 | streams.append(stream_to_run_last) 204 | 205 | for stream in streams: 206 | # Get all records 207 | if stream == 'contacts_by_company': 208 | company_ids = [company['companyId'] for company in self.expected_records['companies']] 209 | self.expected_records[stream] = test_client.read(stream, parent_ids=company_ids) 210 | else: 211 | self.expected_records[stream] = test_client.read(stream) 212 | 213 | for stream, records in self.expected_records.items(): 214 | LOGGER.info("The test client found %s %s records.", len(records), stream) 215 | 216 | 217 | self.convert_datatype(self.expected_records) 218 | 219 | def convert_datatype(self, expected_records): 220 | for stream, records in expected_records.items(): 221 | for record in records: 222 | 223 | # convert timestamps to string formatted datetime 224 | timestamp_keys = {'timestamp'} 225 | for key in timestamp_keys: 226 | timestamp = record.get(key) 227 | if timestamp: 228 | unformatted = datetime.datetime.fromtimestamp(timestamp/1000) 229 | formatted = datetime.datetime.strftime(unformatted, self.BASIC_DATE_FORMAT) 230 | record[key] = formatted 231 | 232 | return expected_records 233 | 234 | def test_run(self): 235 | conn_id = connections.ensure_connection(self) 236 | 237 | found_catalogs = self.run_and_verify_check_mode(conn_id) 238 | 239 | # Select only the expected streams tables 240 | expected_streams = self.streams_under_test() 241 | catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in expected_streams] 242 | for catalog_entry in catalog_entries: 243 | stream_schema = menagerie.get_annotated_schema(conn_id, catalog_entry['stream_id']) 244 | connections.select_catalog_and_fields_via_metadata( 245 | conn_id, 246 | catalog_entry, 247 | stream_schema 248 | ) 249 | 250 | # Run sync 251 | first_record_count_by_stream = self.run_and_verify_sync(conn_id) 252 | synced_records = runner.get_records_from_target_output() 253 | 254 | # Test by Stream 255 | for stream in expected_streams: 256 | with self.subTest(stream=stream): 257 | 258 | # gather expected values 259 | replication_method = self.expected_replication_method()[stream] 260 | primary_keys = sorted(self.expected_primary_keys()[stream]) 261 | 262 | # gather replicated records 263 | actual_records = [message['data'] 264 | for message in synced_records[stream]['messages'] 265 | if message['action'] == 'upsert'] 266 | 267 | for expected_record in self.expected_records[stream]: 268 | 269 | primary_key_dict = {primary_key: expected_record[primary_key] for primary_key in primary_keys} 270 | primary_key_values = list(primary_key_dict.values()) 271 | 272 | with self.subTest(expected_record=primary_key_dict): 273 | # grab the replicated record that corresponds to expected_record by checking primary keys 274 | matching_actual_records_by_pk = get_matching_actual_record_by_pk(primary_key_dict, actual_records) 275 | if not matching_actual_records_by_pk: 276 | LOGGER.warn("Expected %s record was not replicated: %s", 277 | stream, primary_key_dict) 278 | continue # skip this expected record if it isn't replicated 279 | actual_record = matching_actual_records_by_pk[0] 280 | 281 | expected_keys = set(expected_record.keys()).union(FIELDS_ADDED_BY_TAP.get(stream, {})) 282 | actual_keys = set(actual_record.keys()) 283 | 284 | # NB: KNOWN_MISSING_FIELDS is a dictionary of streams to aggregated missing fields. 285 | # We will check each expected_record to see which of the known keys is present in expectations 286 | # and then will add them to the known_missing_keys set. 287 | known_missing_keys = set() 288 | for missing_key in KNOWN_MISSING_FIELDS.get(stream, set()): 289 | if missing_key in expected_record.keys(): 290 | known_missing_keys.add(missing_key) 291 | del expected_record[missing_key] 292 | 293 | # NB : KNOWN_EXTRA_FIELDS is a dictionary of streams to fields that should not 294 | # be replicated but are. See the variable declaration at top of file for linked BUGs. 295 | known_extra_keys = set() 296 | for extra_key in KNOWN_EXTRA_FIELDS.get(stream, set()): 297 | known_extra_keys.add(extra_key) 298 | 299 | # Verify the fields in our expected record match the fields in the corresponding replicated record 300 | expected_keys_adjusted = expected_keys.union(known_extra_keys) 301 | actual_keys_adjusted = actual_keys.union(known_missing_keys) 302 | 303 | # NB: The following woraround is for dynamic fields on the `deals` stream that we just can't track. 304 | # At the time of implementation there is no customer feedback indicating that these dynamic fields 305 | # would prove useful to an end user. The ones that we replicated with the test client are specific 306 | # to our test data. We have determined that the filtering of these fields is an expected behavior. 307 | 308 | # deals workaround for 'property_hs_date_entered_' fields 309 | 310 | bad_key_prefixes = {'property_hs_date_entered_', 'property_hs_date_exited_', 311 | 'property_hs_time_in', 'property_hs_'} 312 | bad_keys = set() 313 | for key in expected_keys_adjusted: 314 | for prefix in bad_key_prefixes: 315 | if key.startswith(prefix) and key not in actual_keys_adjusted: 316 | bad_keys.add(key) 317 | for key in actual_keys_adjusted: 318 | for prefix in bad_key_prefixes: 319 | if key.startswith(prefix) and key not in expected_keys_adjusted: 320 | bad_keys.add(key) 321 | for key in bad_keys: 322 | if key in expected_keys_adjusted: 323 | expected_keys_adjusted.remove(key) 324 | elif key in actual_keys_adjusted: 325 | actual_keys_adjusted.remove(key) 326 | 327 | self.assertSetEqual(expected_keys_adjusted, actual_keys_adjusted) 328 | 329 | # Future Testing | TDL-16145 330 | # self.assertDictEqual(expected_record, actual_record) 331 | 332 | # Toss out a warn if tap is replicating more than the expected records were replicated 333 | expected_primary_key_values = {tuple([record[primary_key] 334 | for primary_key in primary_keys]) 335 | for record in self.expected_records[stream]} 336 | actual_records_primary_key_values = {tuple([record[primary_key] 337 | for primary_key in primary_keys]) 338 | for record in actual_records} 339 | if expected_primary_key_values.issubset(actual_records_primary_key_values): 340 | LOGGER.warn("Unexpected %s records replicated: %s", 341 | stream, 342 | actual_records_primary_key_values - expected_primary_key_values) 343 | 344 | 345 | class TestHubspotAllFieldsStatic(TestHubspotAllFields): 346 | @staticmethod 347 | def name(): 348 | return "tt_hubspot_all_fields_static" 349 | 350 | def streams_under_test(self): 351 | """expected streams minus the streams not under test""" 352 | return { 353 | 'owners', 354 | # 'subscription_changes', # BUG_TDL-14938 https://jira.talendforge.org/browse/TDL-14938 355 | } 356 | 357 | def get_properties(self): 358 | return {'start_date' : '2021-05-02T00:00:00Z'} 359 | -------------------------------------------------------------------------------- /tests/test_hubspot_automatic_fields.py: -------------------------------------------------------------------------------- 1 | import tap_tester.connections as connections 2 | import tap_tester.menagerie as menagerie 3 | import tap_tester.runner as runner 4 | import re 5 | 6 | from base import HubspotBaseTest 7 | 8 | STATIC_DATA_STREAMS = {'owners'} 9 | 10 | class TestHubspotAutomaticFields(HubspotBaseTest): 11 | @staticmethod 12 | def name(): 13 | return "tt_hubspot_automatic" 14 | 15 | def streams_to_test(self): 16 | """streams to test""" 17 | return self.expected_streams() - STATIC_DATA_STREAMS 18 | 19 | def test_run(self): 20 | """ 21 | Verify we can deselect all fields except when inclusion=automatic, which is handled by base.py methods 22 | Verify that only the automatic fields are sent to the target. 23 | """ 24 | conn_id = connections.ensure_connection(self) 25 | found_catalogs = self.run_and_verify_check_mode(conn_id) 26 | 27 | # Select only the expected streams tables 28 | expected_streams = self.streams_to_test() 29 | catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in expected_streams] 30 | self.select_all_streams_and_fields(conn_id, catalog_entries, select_all_fields=False) 31 | 32 | # Include the following step in this test if/when hubspot conforms to the standards of metadata 33 | # See bugs BUG_TDL-9939 and BUG_TDL-14938 34 | 35 | # # Verify our selection resulted in no fields selected except for those with inclusion of 'automatic' 36 | # catalogs_selection = menagerie.get_catalogs(conn_id) 37 | # for cat in catalogs_selection: 38 | # with self.subTest(cat=cat): 39 | # catalog_entry = menagerie.get_annotated_schema(conn_id, cat['stream_id']) 40 | 41 | # # Verify the expected stream tables are selected 42 | # selected = catalog_entry.get('annotated-schema').get('selected') 43 | # print("Validating selection on {}: {}".format(cat['stream_name'], selected)) 44 | # if cat['stream_name'] not in expected_streams: 45 | # self.assertFalse(selected, msg="Stream selected, but not testable.") 46 | # continue # Skip remaining assertions if we aren't selecting this stream 47 | # self.assertTrue(selected, msg="Stream not selected.") 48 | 49 | # # Verify only automatic fields are selected 50 | # expected_automatic_fields = self.expected_automatic_fields().get(cat['tap_stream_id']) 51 | # selected_fields = self.get_selected_fields_from_metadata(catalog_entry['metadata']) 52 | 53 | # # remove replication keys 54 | # self.assertEqual(expected_automatic_fields, selected_fields) 55 | 56 | # Run a sync job using orchestrator 57 | sync_record_count = self.run_and_verify_sync(conn_id) 58 | synced_records = runner.get_records_from_target_output() 59 | 60 | # Assert the records for each stream 61 | for stream in expected_streams: 62 | with self.subTest(stream=stream): 63 | 64 | # Verify that data is present 65 | record_count = sync_record_count.get(stream, 0) 66 | self.assertGreater(record_count, 0) 67 | 68 | data = synced_records.get(stream) 69 | record_messages_keys = [set(row['data'].keys()) for row in data['messages']] 70 | expected_keys = self.expected_automatic_fields().get(stream) 71 | 72 | # BUG_TDL-9939 https://jira.talendforge.org/browse/TDL-9939 Replication keys are not included as an automatic field for these streams 73 | if stream in {'subscription_changes', 'email_events'}: 74 | # replication keys not in the expected_keys 75 | remove_keys = self.expected_metadata()[stream].get(self.REPLICATION_KEYS) 76 | expected_keys = expected_keys.difference(remove_keys) 77 | elif stream in {'engagements'}: 78 | # engagements has a nested object 'engagement' with the automatic fields 79 | expected_keys = expected_keys.union({'engagement'}) 80 | # Verify that only the automatic fields are sent to the target 81 | for actual_keys in record_messages_keys: 82 | self.assertSetEqual(actual_keys, expected_keys, 83 | msg=f"Expected automatic fields: {expected_keys} and nothing else." 84 | ) 85 | 86 | 87 | # BUG_TDL-14938 https://jira.talendforge.org/browse/TDL-14938 88 | # The subscription_changes stream does not have a valid pk to ensure no dupes are sent 89 | if stream != 'subscription_changes': 90 | 91 | # make sure there are no duplicate records by using the pks 92 | pk = self.expected_primary_keys()[stream] 93 | pks_values = [tuple([message['data'][p] for p in pk]) for message in data['messages']] 94 | self.assertEqual(len(pks_values), len(set(pks_values))) 95 | 96 | 97 | class TestHubspotAutomaticFieldsStaticData(TestHubspotAutomaticFields): 98 | def streams_to_test(self): 99 | """streams to test""" 100 | return STATIC_DATA_STREAMS 101 | 102 | @staticmethod 103 | def name(): 104 | return "tt_hubspot_automatic_static" 105 | 106 | def get_properties(self): 107 | return { 108 | 'start_date' : '2021-08-19T00:00:00Z', 109 | } 110 | -------------------------------------------------------------------------------- /tests/test_hubspot_bookmarks.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | import time 3 | 4 | 5 | import tap_tester.connections as connections 6 | import tap_tester.menagerie as menagerie 7 | import tap_tester.runner as runner 8 | 9 | from base import HubspotBaseTest 10 | from client import TestClient 11 | from tap_tester import LOGGER 12 | 13 | 14 | STREAMS_WITHOUT_UPDATES = {'email_events', 'contacts_by_company', 'workflows'} 15 | STREAMS_WITHOUT_CREATES = {'campaigns', 'owners'} 16 | 17 | class TestHubspotBookmarks(HubspotBaseTest): 18 | """Ensure tap replicates new and upated records based on the replication method of a given stream. 19 | 20 | Create records for each stream. Run check mode, perform table and field selection, and run a sync. 21 | Create 1 record for each stream and update 1 record for each stream prior to running a 2nd sync. 22 | - Verify for each incremental stream you can do a sync which records bookmarks, and that the format matches expectations. 23 | - Verify that a bookmark doesn't exist for full table streams. 24 | - Verify the bookmark is the max value sent to the target for the a given replication key. 25 | - Verify 2nd sync respects the bookmark. 26 | """ 27 | @staticmethod 28 | def name(): 29 | return "tt_hubspot_bookmarks" 30 | 31 | def streams_to_test(self): 32 | """expected streams minus the streams not under test""" 33 | expected_streams = self.expected_streams().difference(STREAMS_WITHOUT_CREATES) 34 | 35 | return expected_streams.difference({ 36 | 'subscription_changes', # BUG_TDL-14938 https://jira.talendforge.org/browse/TDL-14938 37 | }) 38 | 39 | def get_properties(self): 40 | return { 41 | 'start_date' : datetime.strftime(datetime.today()-timedelta(days=5), self.START_DATE_FORMAT), 42 | } 43 | 44 | def setUp(self): 45 | self.maxDiff = None # see all output in failure 46 | 47 | self.test_client = TestClient(self.get_properties()['start_date']) 48 | 49 | def create_test_data(self, expected_streams): 50 | """ 51 | Creating more records(5) instead of 3 to get the update time to build the histogram - tdl-20939 52 | Excluding workflows as it results in assertion failures with expected_pk and sync_pk at line#261 53 | """ 54 | 55 | self.expected_records = {stream: [] 56 | for stream in expected_streams} 57 | for stream in expected_streams - {'contacts_by_company'}: 58 | if stream == 'contacts': 59 | self.times=10 60 | elif stream == 'contact_lists': 61 | self.times=2 62 | else: 63 | self.times =3 64 | 65 | if stream == 'email_events': 66 | email_records = self.test_client.create(stream, self.times) 67 | self.expected_records['email_events'] += email_records 68 | else: 69 | # create records, one will be updated between syncs 70 | # create one static list and the rest dynamic list 71 | for _ in range(self.times): 72 | record = self.test_client.create(stream) 73 | self.expected_records[stream] += record 74 | if stream == 'contact_lists': 75 | static_list = self.test_client.create('static_contact_lists') 76 | self.expected_records[stream] += static_list 77 | 78 | if 'contacts_by_company' in expected_streams: # do last 79 | company_ids = [record['companyId'] for record in self.expected_records['companies']] 80 | contact_records = self.expected_records['contacts'] 81 | for i in range(self.times): 82 | record = self.test_client.create_contacts_by_company( 83 | company_ids=company_ids, contact_records=contact_records 84 | ) 85 | self.expected_records['contacts_by_company'] += record 86 | 87 | def test_run(self): 88 | expected_streams = self.streams_to_test() 89 | 90 | # generate 3 records for every stream that has a create endpoint 91 | create_streams = expected_streams - STREAMS_WITHOUT_CREATES 92 | self.create_test_data(create_streams) 93 | 94 | conn_id = connections.ensure_connection(self) 95 | 96 | found_catalogs = self.run_and_verify_check_mode(conn_id) 97 | 98 | # Select only the expected streams tables 99 | catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in expected_streams] 100 | for catalog_entry in catalog_entries: 101 | stream_schema = menagerie.get_annotated_schema(conn_id, catalog_entry['stream_id']) 102 | connections.select_catalog_and_fields_via_metadata( 103 | conn_id, 104 | catalog_entry, 105 | stream_schema 106 | ) 107 | 108 | # Run sync 1 109 | first_record_count_by_stream = self.run_and_verify_sync(conn_id) 110 | synced_records = runner.get_records_from_target_output() 111 | state_1 = menagerie.get_state(conn_id) 112 | 113 | # Create 1 record for each stream between syncs 114 | for stream in expected_streams - {'contacts_by_company'}: 115 | record = self.test_client.create(stream) 116 | self.expected_records[stream] += record 117 | 118 | if 'contacts_by_company' in expected_streams: 119 | company_ids = [record['companyId'] for record in self.expected_records['companies'][:-1]] 120 | contact_records = self.expected_records['contacts'][-1:] 121 | record = self.test_client.create_contacts_by_company( 122 | company_ids=company_ids, contact_records=contact_records 123 | ) 124 | self.expected_records['contacts_by_company'] += record 125 | 126 | # Update 1 record from the test seutp for each stream that has an update endpoint 127 | for stream in expected_streams - STREAMS_WITHOUT_UPDATES: 128 | primary_key = list(self.expected_primary_keys()[stream])[0] 129 | record_id = self.expected_records[stream][0][primary_key] 130 | record = self.test_client.update(stream, record_id) 131 | self.expected_records[stream].append(record) 132 | 133 | #run second sync 134 | second_record_count_by_stream = self.run_and_verify_sync(conn_id) 135 | synced_records_2 = runner.get_records_from_target_output() 136 | state_2 = menagerie.get_state(conn_id) 137 | 138 | # Test by Stream 139 | for stream in expected_streams: 140 | 141 | with self.subTest(stream=stream): 142 | 143 | # gather expected values 144 | replication_method = self.expected_replication_method()[stream] 145 | primary_keys = self.expected_primary_keys()[stream] 146 | 147 | # setting expected records for sync 1 based on the unsorted list of record 148 | # which does not inclue the created record between syncs 1 and 2 149 | expected_records_1 = self.expected_records[stream][:3] 150 | 151 | # gather replicated records 152 | actual_record_count_2 = second_record_count_by_stream[stream] 153 | actual_records_2 = [message['data'] 154 | for message in synced_records_2[stream]['messages'] 155 | if message['action'] == 'upsert'] 156 | actual_record_count_1 = first_record_count_by_stream[stream] 157 | actual_records_1 = [message['data'] 158 | for message in synced_records[stream]['messages'] 159 | if message['action'] == 'upsert'] 160 | 161 | if self.is_child(stream): # we will set expectations for child streeams based on the parent 162 | 163 | parent_stream = self.expected_metadata()[stream][self.PARENT_STREAM] 164 | parent_replication_method = self.expected_replication_method()[parent_stream] 165 | 166 | if parent_replication_method == self.INCREMENTAL: 167 | 168 | expected_record_count = 1 if stream not in STREAMS_WITHOUT_UPDATES else 2 169 | expected_records_2 = self.expected_records[stream][-expected_record_count:] 170 | 171 | # verify the record count matches our expectations for a child streams with incremental parents 172 | self.assertGreater(actual_record_count_1, actual_record_count_2) 173 | 174 | elif parent_replication_method == self.FULL: 175 | 176 | # verify the record count matches our expectations for child streams with full table parents 177 | expected_records_2 = self.expected_records[stream] 178 | self.assertEqual(actual_record_count_1 + 1, actual_record_count_2) 179 | 180 | else: 181 | raise AssertionError(f"Replication method is {replication_method} for stream: {stream}") 182 | 183 | 184 | elif replication_method == self.INCREMENTAL: 185 | 186 | # NB: FOR INCREMENTAL STREAMS the tap does not replicate the replication-key for any records. 187 | # It does functionaly replicate as a standard incremental sync would but does not order 188 | # records by replication-key value (since it does not exist on the record). To get around 189 | # this we are putting the replication-keys on our expected records via test_client. We will 190 | # verify the records we expect (via primary-key) are replicated prior to checking the 191 | # replication-key values. 192 | 193 | # get saved states 194 | stream_replication_key = list(self.expected_replication_keys()[stream])[0] 195 | bookmark_1 = state_1['bookmarks'][stream][stream_replication_key] 196 | bookmark_2 = state_2['bookmarks'][stream][stream_replication_key] 197 | 198 | # setting expected records knowing they are ordered by replication-key value 199 | expected_record_count = 1 if stream not in STREAMS_WITHOUT_UPDATES else 2 200 | expected_records_2 = self.expected_records[stream][-expected_record_count:] 201 | 202 | # Given streams does not contain proper replication-key value in the response. 203 | if stream not in {"companies","deals","contacts_by_company","email_events"}: 204 | # verify first sync bookmark value is max bookmark value 205 | max_bk_value = actual_records_1[0].get(stream_replication_key) 206 | for record in actual_records_1: 207 | replication_key_value = record.get(stream_replication_key) 208 | if max_bk_value < replication_key_value: 209 | max_bk_value = replication_key_value 210 | 211 | # For few streams, test records updated before sync may have replication value 212 | # greater than bookmark value probably due delayed records updates pickup by Hubspot 213 | self.assertLessEqual(bookmark_1, max_bk_value, 214 | msg="First sync bookmark value cannot be greater than max replication-key value") 215 | 216 | # verify second sync bookmark value is max bookmark value 217 | max_bk_value = actual_records_2[0].get(stream_replication_key) 218 | for record in actual_records_2: 219 | replication_key_value = record.get(stream_replication_key) 220 | if max_bk_value < replication_key_value: 221 | max_bk_value = replication_key_value 222 | 223 | # For few streams, test records updated before sync may have replication value 224 | # greater than bookmark value probably due delayed records updates pickup by Hubspot 225 | self.assertLessEqual(bookmark_2, max_bk_value, 226 | msg="Second sync bookmark value cannot be greater than max replication-key value") 227 | 228 | # verify only the new and updated records are captured checking record countx 229 | self.assertGreater(actual_record_count_1, actual_record_count_2) 230 | 231 | # verify the state was updated with incremented bookmark 232 | if stream != 'email_events': # BUG TDL-15706 233 | self.assertGreater(bookmark_2, bookmark_1) 234 | 235 | elif replication_method == self.FULL: 236 | expected_records_2 = self.expected_records[stream] 237 | self.assertEqual(actual_record_count_1 + 1, actual_record_count_2) 238 | 239 | else: 240 | raise AssertionError(f"Replication method is {replication_method} for stream: {stream}") 241 | 242 | # verify by primary key that all expected records are replicated in sync 1 243 | sync_1_pks = [tuple([record[pk] for pk in primary_keys]) for record in actual_records_1] 244 | expected_sync_1_pks = [tuple([record[pk] for pk in primary_keys]) 245 | for record in expected_records_1] 246 | for expected_pk in expected_sync_1_pks: 247 | self.assertIn(expected_pk, sync_1_pks) 248 | 249 | # verify by primary key that all expected records are replicated in sync 2 250 | sync_2_pks = sorted([tuple([record[pk] for pk in primary_keys]) for record in actual_records_2]) 251 | expected_sync_2_pks = sorted([tuple([record[pk] for pk in primary_keys]) 252 | for record in expected_records_2]) 253 | for expected_pk in expected_sync_2_pks: 254 | self.assertIn(expected_pk, sync_2_pks) 255 | 256 | # verify that at least 1 record from the first sync is replicated in the 2nd sync 257 | # to prove that the bookmarking is inclusive 258 | if stream in {'companies', # BUG | https://jira.talendforge.org/browse/TDL-15503 259 | 'email_events'}: # BUG | https://jira.talendforge.org/browse/TDL-15706 260 | continue # skipping failures 261 | self.assertTrue(any([expected_pk in sync_2_pks for expected_pk in expected_sync_1_pks])) 262 | 263 | def tearDown(self): 264 | """Print histogram of Create time difference - tdl-20939""" 265 | self.test_client.print_histogram_data() 266 | -------------------------------------------------------------------------------- /tests/test_hubspot_bookmarks_static.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from time import sleep 3 | import copy 4 | 5 | import tap_tester.connections as connections 6 | import tap_tester.menagerie as menagerie 7 | import tap_tester.runner as runner 8 | 9 | from base import HubspotBaseTest 10 | from client import TestClient 11 | 12 | 13 | STREAMS_WITHOUT_CREATES = {'campaigns', 'owners'} 14 | 15 | 16 | class TestHubspotBookmarks(HubspotBaseTest): 17 | """Test basic bookmarking and replication for streams that do not have CRUD capability.""" 18 | @staticmethod 19 | def name(): 20 | return "tt_hubspot_bookmarks_static" 21 | 22 | def streams_to_test(self): 23 | """expected streams minus the streams not under test""" 24 | return STREAMS_WITHOUT_CREATES 25 | 26 | def get_properties(self): 27 | # 'start_date' : '2021-08-19T00:00:00Z' 28 | return {'start_date' : '2017-11-22T00:00:00Z'} 29 | 30 | def setUp(self): 31 | self.maxDiff = None # see all output in failure 32 | 33 | 34 | def test_run(self): 35 | expected_streams = self.streams_to_test() 36 | 37 | conn_id = connections.ensure_connection(self) 38 | 39 | found_catalogs = self.run_and_verify_check_mode(conn_id) 40 | 41 | # Select only the expected streams tables 42 | catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in expected_streams] 43 | for catalog_entry in catalog_entries: 44 | stream_schema = menagerie.get_annotated_schema(conn_id, catalog_entry['stream_id']) 45 | connections.select_catalog_and_fields_via_metadata( 46 | conn_id, 47 | catalog_entry, 48 | stream_schema 49 | ) 50 | 51 | # Run sync 1 52 | first_record_count_by_stream = self.run_and_verify_sync(conn_id) 53 | synced_records = runner.get_records_from_target_output() 54 | state_1 = menagerie.get_state(conn_id) 55 | 56 | # Update state to simulate a bookmark 57 | new_state = copy.deepcopy(state_1) 58 | for stream in state_1['bookmarks'].keys(): 59 | if self.expected_replication_method()[stream] == self.INCREMENTAL: 60 | calculated_bookmark_value = self.timedelta_formatted( 61 | state_1['bookmarks']['owners']['updatedAt'], days=-1, str_format=self.BASIC_DATE_FORMAT 62 | ) 63 | 64 | menagerie.set_state(conn_id, new_state) 65 | 66 | # run second sync 67 | second_record_count_by_stream = self.run_and_verify_sync(conn_id) 68 | synced_records_2 = runner.get_records_from_target_output() 69 | state_2 = menagerie.get_state(conn_id) 70 | 71 | # Test by Stream 72 | for stream in expected_streams: 73 | 74 | with self.subTest(stream=stream): 75 | 76 | # gather expected values 77 | replication_method = self.expected_replication_method()[stream] 78 | primary_keys = self.expected_primary_keys()[stream] 79 | 80 | # gather replicated records 81 | actual_record_count_2 = second_record_count_by_stream[stream] 82 | actual_records_2 = [message['data'] 83 | for message in synced_records_2[stream]['messages'] 84 | if message['action'] == 'upsert'] 85 | actual_record_count_1 = first_record_count_by_stream[stream] 86 | actual_records_1 = [message['data'] 87 | for message in synced_records[stream]['messages'] 88 | if message['action'] == 'upsert'] 89 | 90 | # NB: There are no replication-key values on records and so we cannot confirm that the records, 91 | # replicated respect the bookmark via direct comparison. All we can do is verify syncs correspond 92 | # to the repliaction methods logically by strategically setting the simulated. 93 | 94 | if replication_method == self.INCREMENTAL: 95 | 96 | # get saved states 97 | stream_replication_key = list(self.expected_replication_keys()[stream])[0] 98 | bookmark_1 = state_1['bookmarks'][stream][stream_replication_key] 99 | bookmark_2 = state_2['bookmarks'][stream][stream_replication_key] 100 | 101 | # verify the uninterrupted sync and the simulated sync end with the same bookmark values 102 | self.assertEqual(bookmark_1, bookmark_2) 103 | 104 | # trim records down to just the primary key values 105 | sync_1_pks = [tuple([record[pk] for pk in primary_keys]) for record in actual_records_1] 106 | sync_2_pks = [tuple([record[pk] for pk in primary_keys]) for record in actual_records_2] 107 | # ensure no dupe records present 108 | self.assertCountEqual(set(sync_1_pks), sync_1_pks) 109 | self.assertCountEqual(set(sync_2_pks), sync_2_pks) 110 | 111 | # verify the records from sync 1 are not present in sync 2 as the simulated state 112 | # does not correspond to a specific record's replication-key value 113 | self.assertTrue(set(sync_2_pks).issubset(set(sync_1_pks))) 114 | 115 | # verify there are more records in sync 1 than in sync 2 (proper setup required for this) 116 | self.assertGreater(actual_record_count_1, actual_record_count_2) 117 | 118 | elif replication_method == self.FULL: 119 | 120 | # verify the same number of records were replicated in each sync 121 | self.assertEqual(actual_record_count_1, actual_record_count_2) 122 | 123 | # verify the exact same records were replicated in each sync 124 | self.assertEqual(actual_records_1, actual_records_2) 125 | 126 | else: 127 | raise AssertionError(f"Replication method is {replication_method} for stream: {stream}") 128 | -------------------------------------------------------------------------------- /tests/test_hubspot_child_stream_only.py: -------------------------------------------------------------------------------- 1 | """Test tap field selection of child streams without its parent.""" 2 | import re 3 | from datetime import datetime as dt 4 | from datetime import timedelta 5 | 6 | from tap_tester import connections 7 | from tap_tester import menagerie 8 | from tap_tester import runner 9 | 10 | from base import HubspotBaseTest 11 | from client import TestClient 12 | 13 | 14 | class FieldSelectionChildTest(HubspotBaseTest): 15 | """Test tap field selection of child streams without its parent.""" 16 | 17 | @staticmethod 18 | def name(): 19 | return "tt_hubspot_child_streams" 20 | 21 | def get_properties(self): 22 | return { 23 | 'start_date' : dt.strftime(dt.today()-timedelta(days=2), self.START_DATE_FORMAT) 24 | } 25 | 26 | def setUp(self): 27 | test_client = TestClient(start_date=self.get_properties()['start_date']) 28 | 29 | contact = test_client.create('contacts') 30 | company = test_client.create('companies')[0] 31 | contact_by_company = test_client.create_contacts_by_company( 32 | company_ids=[company['companyId']], 33 | contact_records=contact 34 | ) 35 | 36 | def test_run(self): 37 | """ 38 | Verify that when a child stream is selected without its parent that 39 | • a critical error in the tap occurs 40 | • the error indicates which parent stream needs to be selected 41 | • when the parent is selected the tap doesn't critical error 42 | """ 43 | streams_to_test = {"contacts_by_company"} 44 | 45 | conn_id = self.create_connection_and_run_check() 46 | 47 | found_catalogs = self.run_and_verify_check_mode(conn_id) 48 | 49 | # Select only the expected streams tables 50 | catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in streams_to_test] 51 | 52 | for catalog_entry in catalog_entries: 53 | stream_schema = menagerie.get_annotated_schema(conn_id, catalog_entry['stream_id']) 54 | connections.select_catalog_and_fields_via_metadata( 55 | conn_id, 56 | catalog_entry, 57 | stream_schema 58 | ) 59 | 60 | # Run a sync job using orchestrator 61 | sync_job_name = runner.run_sync_mode(self, conn_id) 62 | 63 | # Verify tap and target exit codes 64 | exit_status = menagerie.get_exit_status(conn_id, sync_job_name) 65 | 66 | # Verify that the tap error message shows you need to select the parent stream 67 | self.assertRaises(AssertionError, menagerie.verify_sync_exit_status, self, exit_status, sync_job_name) 68 | self.assertEqual(exit_status['tap_error_message'], 69 | ('Unable to extract contacts_by_company data. ' 70 | 'To receive contacts_by_company data, you also need to select companies.')) 71 | 72 | # Verify there is no discovery or target error 73 | self.assertEqual(exit_status['target_exit_status'], 0) 74 | self.assertEqual(exit_status['discovery_exit_status'], 0) 75 | 76 | # Select only child and required parent and make sure there is no critical error 77 | streams_to_test = {"contacts_by_company", "companies"} 78 | catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in streams_to_test] 79 | for catalog_entry in catalog_entries: 80 | stream_schema = menagerie.get_annotated_schema(conn_id, catalog_entry['stream_id']) 81 | connections.select_catalog_and_fields_via_metadata( 82 | conn_id, 83 | catalog_entry, 84 | stream_schema 85 | ) 86 | 87 | # Run a sync job 88 | self.run_and_verify_sync(conn_id) 89 | -------------------------------------------------------------------------------- /tests/test_hubspot_discovery.py: -------------------------------------------------------------------------------- 1 | """Test tap discovery mode and metadata/annotated-schema.""" 2 | import re 3 | 4 | from tap_tester import menagerie 5 | 6 | from base import HubspotBaseTest 7 | 8 | 9 | class DiscoveryTest(HubspotBaseTest): 10 | """Test tap discovery mode and metadata/annotated-schema conforms to standards.""" 11 | 12 | @staticmethod 13 | def name(): 14 | return "tt_hubspot_discovery" 15 | 16 | def test_run(self): 17 | """ 18 | Verify that discover creates the appropriate catalog, schema, metadata, etc. 19 | 20 | • Verify number of actual streams discovered match expected 21 | • Verify the stream names discovered were what we expect 22 | • Verify stream names follow naming convention 23 | streams should only have lowercase alphas and underscores 24 | • verify there is only 1 top level breadcrumb 25 | • verify replication key(s) 26 | • verify primary key(s) 27 | • verify that if there is a replication key we are doing INCREMENTAL otherwise FULL 28 | • verify the actual replication matches our expected replication method 29 | • verify that primary, replication and foreign keys 30 | are given the inclusion of automatic (metadata and annotated schema). 31 | • verify that all other fields have inclusion of available (metadata and schema) 32 | """ 33 | streams_to_test = self.expected_streams() 34 | 35 | conn_id = self.create_connection_and_run_check() 36 | 37 | found_catalogs = self.run_and_verify_check_mode(conn_id) 38 | 39 | # Verify stream names follow naming convention 40 | # streams should only have lowercase alphas and underscores 41 | found_catalog_names = {c['tap_stream_id'] for c in found_catalogs} 42 | self.assertTrue(all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]), 43 | msg="One or more streams don't follow standard naming") 44 | 45 | for stream in streams_to_test: 46 | with self.subTest(stream=stream): 47 | catalog = next(iter([catalog for catalog in found_catalogs 48 | if catalog["stream_name"] == stream])) 49 | assert catalog # based on previous tests this should always be found 50 | schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) 51 | metadata = schema_and_metadata["metadata"] 52 | 53 | # verify there is only 1 top level breadcrumb 54 | stream_properties = [item for item in metadata if item.get("breadcrumb") == []] 55 | self.assertTrue(len(stream_properties) == 1, 56 | msg=f"There is NOT only one top level breadcrumb for {stream}" + \ 57 | "\nstream_properties | {stream_properties}") 58 | 59 | # verify replication key(s) 60 | actual_rep_keys = set(stream_properties[0].get( 61 | "metadata", {self.REPLICATION_KEYS: None}).get( 62 | self.REPLICATION_KEYS, [])) 63 | self.assertEqual( 64 | set(stream_properties[0].get( 65 | "metadata", {self.REPLICATION_KEYS: []}).get(self.REPLICATION_KEYS, [])), 66 | self.expected_replication_keys()[stream], 67 | msg=f"expected replication key {self.expected_replication_keys()[stream]} but actual is {actual_rep_keys}" 68 | ) 69 | 70 | 71 | # verify primary key(s) 72 | actual_primary_keys = set(stream_properties[0].get( "metadata", {self.PRIMARY_KEYS: []}).get(self.PRIMARY_KEYS, [])) 73 | self.assertSetEqual(self.expected_primary_keys()[stream], actual_primary_keys, 74 | msg=f"expected primary key {self.expected_primary_keys()[stream]} but actual is {actual_primary_keys}" 75 | #set(stream_properties[0].get('metadata', {self.PRIMARY_KEYS: None}).get(self.PRIMARY_KEYS, [])))}" 76 | 77 | ) 78 | actual_replication_method = stream_properties[0]['metadata'].get('forced-replication-method') 79 | # BUG https://jira.talendforge.org/browse/TDL-9939 all streams are set to full-table in the metadata 80 | # verify the actual replication matches our expected replication method 81 | if stream == "contacts": 82 | self.assertEqual( 83 | self.expected_replication_method().get(stream, None), 84 | actual_replication_method, 85 | msg="The actual replication method {} doesn't match the expected {}".format( 86 | actual_replication_method, 87 | self.expected_replication_method().get(stream, None))) 88 | 89 | # verify that if there is a replication key we are doing INCREMENTAL otherwise FULL 90 | actual_replication_method = stream_properties[0].get( 91 | "metadata", {self.REPLICATION_METHOD: None}).get(self.REPLICATION_METHOD) 92 | if stream_properties[0].get( 93 | "metadata", {self.REPLICATION_KEYS: []}).get(self.REPLICATION_KEYS, []): 94 | 95 | if stream in ["contacts", "companies", "deals"]: 96 | self.assertTrue(actual_replication_method == self.INCREMENTAL, 97 | msg="Expected INCREMENTAL replication " 98 | "since there is a replication key") 99 | else: 100 | # BUG_TDL-9939 https://jira.talendforge.org/browse/TDL-9939 all streams are set to full table 101 | pass # BUG TDL-9939 REMOVE ME WHEN BUG IS ADDRESSED 102 | 103 | else: 104 | self.assertTrue(actual_replication_method == self.FULL, 105 | msg="Expected FULL replication " 106 | "since there is no replication key") 107 | 108 | expected_primary_keys = self.expected_primary_keys()[stream] 109 | expected_replication_keys = self.expected_replication_keys()[stream] 110 | expected_automatic_fields = expected_primary_keys | expected_replication_keys 111 | 112 | # verify that primary, replication and foreign keys are given the inclusion of automatic in metadata. 113 | # BUG_2 https://jira.talendforge.org/browse/TDL-9772 'inclusion' is not present for replication keys 114 | actual_automatic_fields = {item.get("breadcrumb", ["properties", None])[1] 115 | for item in metadata 116 | if item.get("metadata").get("inclusion") == "automatic"} 117 | if stream in ["contacts", "companies", "deals"]: 118 | self.assertEqual(expected_automatic_fields, 119 | actual_automatic_fields, 120 | msg=f"expected {expected_automatic_fields} automatic fields but got {actual_automatic_fields}" 121 | ) 122 | 123 | # verify that all other fields have inclusion of available 124 | # This assumes there are no unsupported fields for SaaS sources 125 | self.assertTrue( 126 | all({item.get("metadata").get("inclusion") == "available" 127 | for item in metadata 128 | if item.get("breadcrumb", []) != [] 129 | and item.get("breadcrumb", ["properties", None])[1] 130 | not in actual_automatic_fields}), 131 | msg="Not all non key properties are set to available in metadata") 132 | -------------------------------------------------------------------------------- /tests/test_hubspot_interrupted_sync.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from time import sleep 3 | import copy 4 | 5 | import tap_tester.connections as connections 6 | import tap_tester.menagerie as menagerie 7 | import tap_tester.runner as runner 8 | 9 | from base import HubspotBaseTest 10 | from client import TestClient 11 | 12 | 13 | class TestHubspotInterruptedSync1(HubspotBaseTest): 14 | """Testing interrupted syncs for streams that implement unique bookmarking logic.""" 15 | @staticmethod 16 | def name(): 17 | return "tt_hubspot_sync_interrupt_1" 18 | 19 | def streams_to_test(self): 20 | """expected streams minus the streams not under test""" 21 | return {'companies', 'engagements', 'tickets'} 22 | 23 | def simulated_interruption(self, reference_state): 24 | 25 | new_state = copy.deepcopy(reference_state) 26 | 27 | companies_bookmark = self.timedelta_formatted( 28 | reference_state['bookmarks']['companies']['property_hs_lastmodifieddate'], 29 | days=-1, str_format=self.BASIC_DATE_FORMAT 30 | ) 31 | new_state['bookmarks']['companies']['property_hs_lastmodifieddate'] = None 32 | new_state['bookmarks']['companies']['current_sync_start'] = companies_bookmark 33 | 34 | engagements_bookmark = self.timedelta_formatted( 35 | reference_state['bookmarks']['engagements']['lastUpdated'], 36 | days=-1, str_format=self.BASIC_DATE_FORMAT 37 | ) 38 | new_state['bookmarks']['engagements']['lastUpdated'] = None 39 | new_state['bookmarks']['engagements']['current_sync_start'] = engagements_bookmark 40 | 41 | tickets_bookmark = self.timedelta_formatted( 42 | reference_state['bookmarks']['tickets']['updatedAt'], 43 | days=-1, str_format=self.BASIC_DATE_FORMAT) 44 | new_state['bookmarks']['tickets']['updatedAt'] = tickets_bookmark 45 | 46 | return new_state 47 | 48 | def get_properties(self): 49 | # 'start_date' : '2021-08-19T00:00:00Z' 50 | # return {'start_date' : '2017-11-22T00:00:00Z'} 51 | return { 52 | 'start_date' : datetime.strftime( 53 | datetime.today()-timedelta(days=5), self.START_DATE_FORMAT 54 | ), 55 | } 56 | 57 | def setUp(self): 58 | self.maxDiff = None # see all output in failure 59 | 60 | def test_run(self): 61 | 62 | expected_streams = self.streams_to_test() 63 | 64 | conn_id = connections.ensure_connection(self) 65 | 66 | found_catalogs = self.run_and_verify_check_mode(conn_id) 67 | 68 | # Select only the expected streams tables 69 | catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in expected_streams] 70 | for catalog_entry in catalog_entries: 71 | stream_schema = menagerie.get_annotated_schema(conn_id, catalog_entry['stream_id']) 72 | connections.select_catalog_and_fields_via_metadata( 73 | conn_id, 74 | catalog_entry, 75 | stream_schema 76 | ) 77 | 78 | # Run sync 1 79 | first_record_count_by_stream = self.run_and_verify_sync(conn_id) 80 | synced_records = runner.get_records_from_target_output() 81 | state_1 = menagerie.get_state(conn_id) 82 | 83 | # Update state to simulate a bookmark 84 | new_state = self.simulated_interruption(state_1) 85 | menagerie.set_state(conn_id, new_state) 86 | 87 | # run second sync 88 | second_record_count_by_stream = self.run_and_verify_sync(conn_id) 89 | synced_records_2 = runner.get_records_from_target_output() 90 | state_2 = menagerie.get_state(conn_id) 91 | 92 | # Test by Stream 93 | for stream in expected_streams: 94 | 95 | with self.subTest(stream=stream): 96 | 97 | # gather expected values 98 | replication_method = self.expected_replication_method()[stream] 99 | primary_keys = self.expected_primary_keys()[stream] 100 | 101 | # gather replicated records 102 | actual_record_count_2 = second_record_count_by_stream[stream] 103 | actual_records_2 = [message['data'] 104 | for message in synced_records_2[stream]['messages'] 105 | if message['action'] == 'upsert'] 106 | actual_record_count_1 = first_record_count_by_stream[stream] 107 | actual_records_1 = [message['data'] 108 | for message in synced_records[stream]['messages'] 109 | if message['action'] == 'upsert'] 110 | 111 | # NB: There are no replication-key values on records and so we cannot confirm that the records, 112 | # replicated respect the bookmark via direct comparison. All we can do is verify syncs correspond 113 | # to the repliaction methods logically by strategically setting the simulated state. 114 | 115 | if replication_method == self.INCREMENTAL: 116 | 117 | # get saved states 118 | stream_replication_key = list(self.expected_replication_keys()[stream])[0] 119 | bookmark_1 = state_1['bookmarks'][stream][stream_replication_key] 120 | bookmark_2 = state_2['bookmarks'][stream][stream_replication_key] 121 | 122 | # BUG_TDL-15782 [tap-hubspot] Failure to recover from interrupted sync (engagements, companies) 123 | if stream in {'companies', 'engagements'}: 124 | continue # skip failng assertions 125 | 126 | # verify the uninterrupted sync and the simulated sync end with the same bookmark values 127 | self.assertEqual(bookmark_1, bookmark_2) 128 | 129 | # trim records down to just the primary key values 130 | sync_1_pks = [tuple([record[pk] for pk in primary_keys]) for record in actual_records_1] 131 | sync_2_pks = [tuple([record[pk] for pk in primary_keys]) for record in actual_records_2] 132 | # ensure no dupe records present 133 | self.assertCountEqual(set(sync_1_pks), sync_1_pks) 134 | self.assertCountEqual(set(sync_2_pks), sync_2_pks) 135 | 136 | # verify the records from sync 1 are not present in sync 2 as the simulated state 137 | # does not correspond to a specific record's replication-key value 138 | self.assertTrue(set(sync_2_pks).issubset(set(sync_1_pks))) 139 | 140 | else: 141 | raise AssertionError(f"Replication method is {replication_method} for stream: {stream}") 142 | 143 | -------------------------------------------------------------------------------- /tests/test_hubspot_interrupted_sync_offset.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from time import sleep 3 | import copy 4 | 5 | import tap_tester.connections as connections 6 | import tap_tester.menagerie as menagerie 7 | import tap_tester.runner as runner 8 | 9 | from base import HubspotBaseTest 10 | from client import TestClient 11 | 12 | 13 | class TestHubspotInterruptedSyncOffsetContactLists(HubspotBaseTest): 14 | """Testing interrupted syncs for streams that implement unique bookmarking logic.""" 15 | synced_records = None 16 | 17 | @staticmethod 18 | def name(): 19 | return "tt_hubspot_interrupt_contact_lists" 20 | 21 | def streams_to_test(self): 22 | """expected streams minus the streams not under test""" 23 | untested = { 24 | # Streams tested elsewhere 25 | 'engagements', # covered in TestHubspotInterruptedSync1 26 | # Feature Request | TDL-16095: [tap-hubspot] All incremental 27 | # streams should implement the interruptible sync feature 28 | 'forms', # TDL-16095 29 | 'owners', # TDL-16095 30 | 'workflows', # TDL-16095 31 | # Streams that do not apply 32 | 'deal_pipelines', # interruptible does not apply, child of deals 33 | 'campaigns', # unable to manually find a partial state with our test data 34 | 'email_events', # unable to manually find a partial state with our test data 35 | 'subscription_changes', # BUG_TDL-14938 36 | 'tickets' # covered in TestHubspotInterruptedSync1 37 | } 38 | 39 | return self.expected_streams() - untested 40 | 41 | def stream_to_interrupt(self): 42 | return 'contact_lists' 43 | 44 | def state_to_inject(self, new_state): 45 | new_state['bookmarks']['contact_lists'] = {'offset': {'offset': 250}} 46 | return new_state 47 | 48 | def get_properties(self): 49 | return { 50 | 'start_date' : datetime.strftime( 51 | datetime.today()-timedelta(days=3), self.START_DATE_FORMAT 52 | ), 53 | } 54 | 55 | def setUp(self): 56 | self.maxDiff = None # see all output in failure 57 | 58 | def test_run(self): 59 | 60 | # BUG TDL-16094 [tap-hubspot] `contacts` streams fails to recover from sync interruption 61 | if self.stream_to_interrupt() == 'contacts': 62 | self.skipTest("Skipping contacts TEST! See BUG[TDL-16094]") 63 | 64 | 65 | expected_streams = self.streams_to_test() 66 | 67 | conn_id = connections.ensure_connection(self) 68 | 69 | found_catalogs = self.run_and_verify_check_mode(conn_id) 70 | 71 | # Select only the expected streams tables 72 | catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in expected_streams] 73 | for catalog_entry in catalog_entries: 74 | stream_schema = menagerie.get_annotated_schema(conn_id, catalog_entry['stream_id']) 75 | connections.select_catalog_and_fields_via_metadata( 76 | conn_id, 77 | catalog_entry, 78 | stream_schema 79 | ) 80 | 81 | # Run sync 1 82 | first_record_count_by_stream = self.run_and_verify_sync(conn_id) 83 | self.synced_records = runner.get_records_from_target_output() 84 | state_1 = menagerie.get_state(conn_id) 85 | 86 | # Update state to simulate a bookmark 87 | stream = self.stream_to_interrupt() 88 | new_state = copy.deepcopy(state_1) 89 | new_state = self.state_to_inject(new_state) 90 | new_state['currently_syncing'] = stream 91 | 92 | menagerie.set_state(conn_id, new_state) 93 | 94 | # run second sync 95 | second_record_count_by_stream = self.run_and_verify_sync(conn_id) 96 | synced_records_2 = runner.get_records_from_target_output() 97 | state_2 = menagerie.get_state(conn_id) 98 | 99 | # Verify post-iterrupted sync bookmark should be greater than or equal to interrupted sync bookmark 100 | # since newly created test records may get updated while stream is syncing 101 | replication_keys = self.expected_replication_keys() 102 | for stream in state_1.get('bookmarks'): 103 | 104 | if self.stream_to_interrupt() == 'companies' and stream == 'companies': 105 | replication_key = list(replication_keys[stream])[0] 106 | self.assertLessEqual(new_state.get('bookmarks')[stream].get('current_sync_start'), 107 | state_2["bookmarks"][stream].get(replication_key), 108 | msg="First sync bookmark should not be greater than the second bookmark.") 109 | elif stream == 'contacts_by_company': 110 | self.assertEquals(state_1["bookmarks"][stream], {"offset": {}}) 111 | self.assertEquals(state_2["bookmarks"][stream], {"offset": {}}) 112 | 113 | else: 114 | replication_key = list(replication_keys[stream])[0] 115 | self.assertLessEqual(state_1["bookmarks"][stream].get(replication_key), 116 | state_2["bookmarks"][stream].get(replication_key), 117 | msg="First sync bookmark should not be greater than the second bookmark.") 118 | 119 | 120 | class TestHubspotInterruptedSyncOffsetContacts(TestHubspotInterruptedSyncOffsetContactLists): 121 | """Testing interrupted syncs for streams that implement unique bookmarking logic.""" 122 | @staticmethod 123 | def name(): 124 | return "tt_hubspot_interrupt_contacts" 125 | 126 | def get_properties(self): 127 | return { 128 | 'start_date' : datetime.strftime( 129 | datetime.today()-timedelta(days=3), self.START_DATE_FORMAT 130 | ), 131 | } 132 | 133 | 134 | def stream_to_interrupt(self): 135 | return 'contacts' 136 | 137 | def state_to_inject(self, new_state): 138 | new_state['bookmarks']['contacts'] = {'offset': {'vidOffset': 3502}} 139 | return new_state 140 | 141 | class TestHubspotInterruptedSyncOffsetDeals(TestHubspotInterruptedSyncOffsetContactLists): 142 | """Testing interrupted syncs for streams that implement unique bookmarking logic.""" 143 | @staticmethod 144 | def name(): 145 | return "tt_hubspot_interrupt_deals" 146 | 147 | def get_properties(self): 148 | return { 149 | 'start_date' : datetime.strftime( 150 | datetime.today()-timedelta(days=3), self.START_DATE_FORMAT 151 | ), 152 | } 153 | 154 | def stream_to_interrupt(self): 155 | return 'deals' 156 | 157 | def state_to_inject(self, new_state): 158 | new_state['bookmarks']['deals'] = {'property_hs_lastmodifieddate': '2021-10-13T08:32:08.383000Z', 159 | 'offset': {'offset': 3442973342}} 160 | return new_state 161 | 162 | 163 | class TestHubspotInterruptedSyncOffsetCompanies(TestHubspotInterruptedSyncOffsetContactLists): 164 | """Testing interrupted syncs for streams that implement unique bookmarking logic.""" 165 | @staticmethod 166 | def name(): 167 | return "tt_hubspot_interrupt_companies" 168 | 169 | def get_properties(self): 170 | return { 171 | 'start_date' : datetime.strftime( 172 | datetime.today()-timedelta(days=5), self.START_DATE_FORMAT 173 | ), 174 | } 175 | 176 | def stream_to_interrupt(self): 177 | return 'companies' 178 | 179 | def state_to_inject(self, new_state): 180 | companies_records = self.synced_records['companies']['messages'] 181 | contacts_by_company_records = self.synced_records['contacts_by_company']['messages'] 182 | 183 | company_record_index = int(len(companies_records)/2) 184 | contact_record_index = int(3*len(contacts_by_company_records)/4) 185 | 186 | last_modified_value = companies_records[-1]['data'][list(self.expected_replication_keys()['companies'])[0]]['value'] 187 | current_sync_start = companies_records[company_record_index]['data'][list(self.expected_replication_keys()['companies'])[0]]['value'] 188 | offset_1 = companies_records[company_record_index]['data']['companyId'] 189 | offset_2 = contacts_by_company_records[contact_record_index]['data']['company-id'] 190 | 191 | new_state['bookmarks']['companies'] = {'property_hs_lastmodifieddate': last_modified_value, 192 | 'current_sync_start': current_sync_start, 193 | 'offset': {'offset': offset_1}} 194 | new_state['bookmarks']['contacts_by_company'] = {'offset': {'offset': offset_2}} 195 | 196 | return new_state 197 | -------------------------------------------------------------------------------- /tests/test_hubspot_newfw_all_fields.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from tap_tester.base_suite_tests.all_fields_test import AllFieldsTest 3 | from tap_tester.logger import LOGGER 4 | from base_hubspot import HubspotBaseCase 5 | from client import TestClient 6 | 7 | class HubspotAllFieldsTest(AllFieldsTest, HubspotBaseCase): 8 | """Hubspot all fields test implementation """ 9 | EXTRA_FIELDS = HubspotBaseCase.EXTRA_FIELDS 10 | 11 | @staticmethod 12 | def name(): 13 | return "tt_hubspot_all_fields" 14 | 15 | def streams_to_test(self): 16 | """expected streams minus the streams not under test""" 17 | return self.expected_stream_names().difference({ 18 | 'owners', 19 | 'subscription_changes', # BUG_TDL-14938 https://jira.talendforge.org/browse/TDL-14938 20 | }) 21 | 22 | def setUp(self): 23 | self.maxDiff = None # see all output in failure 24 | 25 | test_client = TestClient(start_date=self.get_properties()['start_date']) 26 | 27 | self.expected_records = dict() 28 | streams = self.streams_to_test() 29 | stream_to_run_last = 'contacts_by_company' 30 | if stream_to_run_last in streams: 31 | streams.remove(stream_to_run_last) 32 | streams = list(streams) 33 | streams.append(stream_to_run_last) 34 | 35 | for stream in streams: 36 | # Get all records 37 | if stream == 'contacts_by_company': 38 | company_ids = [company['companyId'] for company in self.expected_records['companies']] 39 | self.expected_records[stream] = test_client.read(stream, parent_ids=company_ids) 40 | else: 41 | self.expected_records[stream] = test_client.read(stream) 42 | 43 | for stream, records in self.expected_records.items(): 44 | LOGGER.info("The test client found %s %s records.", len(records), stream) 45 | 46 | super().setUp() 47 | self.convert_datatype(self.expected_records) 48 | 49 | def convert_datatype(self, expected_records): 50 | # Convert the time stamp data type, Get keys with data and with no data 51 | for stream, records in expected_records.items(): 52 | expected_keys = set() 53 | for record in records: 54 | 55 | expected_keys = expected_keys.union(record.keys()) 56 | # convert timestamps to string formatted datetime 57 | timestamp_keys = {'timestamp'} 58 | for key in timestamp_keys: 59 | timestamp = record.get(key) 60 | if timestamp: 61 | record[key]=self.datetime_from_timestamp(timestamp/1000, str_format=self.BASIC_DATE_FORMAT) 62 | 63 | self.KEYS_WITH_NO_DATA[stream] = self.selected_fields.get(stream).difference(expected_keys) 64 | 65 | return expected_records 66 | 67 | def remove_bad_keys(self, stream): 68 | # NB: The following woraround is for dynamic fields on the `deals` stream that we just can't track. 69 | # At the time of implementation there is no customer feedback indicating that these dynamic fields 70 | # would prove useful to an end user. The ones that we replicated with the test client are specific 71 | # to our test data. We have determined that the filtering of these fields is an expected behavior. 72 | # deals workaround for 'property_hs_date_entered_' fields 73 | # BUG_TDL-14993 | https://jira.talendforge.org/browse/TDL-14993 74 | # Has an value of object with key 'value' and value 'Null' 75 | if stream == 'deals': 76 | bad_key_prefixes = {'property_hs_date_entered_', 'property_hs_date_exited_', 'property_hs_time_in'} 77 | bad_keys = set() 78 | for key in self.expected_all_keys: 79 | for bad_prefix in bad_key_prefixes: 80 | if key.startswith(bad_prefix) and key not in self.fields_replicated: 81 | bad_keys.add(key) 82 | for key in self.fields_replicated: 83 | for bad_prefix in bad_key_prefixes: 84 | if key.startswith(bad_prefix) and key not in self.expected_all_keys: 85 | bad_keys.add(key) 86 | 87 | for key in bad_keys: 88 | if key in self.expected_all_keys: 89 | self.expected_all_keys.remove(key) 90 | elif key in self.fields_replicated: 91 | self.fields_replicated.remove(key) 92 | 93 | ########################################################################## 94 | # Tests To Skip 95 | ########################################################################## 96 | 97 | @unittest.skip("Skip till all cards of missing fields are fixed. TDL-16145 ") 98 | def test_values_of_all_fields(self): 99 | for stream in self.test_streams: 100 | with self.subTest(stream=stream): 101 | 102 | # gather expectations 103 | expected_all_keys = self.selected_fields.get(stream, set()) - set(self.MISSING_FIELDS.get(stream, {})) 104 | 105 | # gather results 106 | fields_replicated = self.actual_fields.get(stream, set()) 107 | 108 | # verify that all fields are sent to the target 109 | # test the combination of all records 110 | self.assertSetEqual(fields_replicated, expected_all_keys, 111 | logging=f"verify all fields are replicated for stream {stream}") 112 | 113 | -------------------------------------------------------------------------------- /tests/test_hubspot_pagination.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from datetime import timedelta 3 | import time 4 | from tap_tester.logger import LOGGER 5 | 6 | from client import TestClient 7 | from tap_tester.base_suite_tests.pagination_test import PaginationTest 8 | from base_hubspot import HubspotBaseCase 9 | 10 | 11 | class HubspotPaginationTest(PaginationTest, HubspotBaseCase): 12 | 13 | @staticmethod 14 | def name(): 15 | return "tt_hubspot_pagination" 16 | 17 | def streams_to_test(self): 18 | """ 19 | # All streams with limits are under test 20 | # """ 21 | streams_with_page_limits = { 22 | stream 23 | for stream, limit in self.expected_page_size().items() 24 | if limit 25 | } 26 | streams_to_test = streams_with_page_limits.difference({ 27 | # updates for contacts_by_company do not get processed quickly or consistently 28 | # via Hubspot API, unable to guarantee page limit is exceeded 29 | 'contacts_by_company', 30 | 'email_events', 31 | 'subscription_changes', # BUG_TDL-14938 https://jira.talendforge.org/browse/TDL-14938 32 | }) 33 | return streams_to_test 34 | 35 | def get_properties(self): 36 | return { 37 | 'start_date' : datetime.strftime(datetime.today()-timedelta(days=5), self.START_DATE_FORMAT) 38 | } 39 | 40 | def setUp(self): 41 | self.maxDiff = None # see all output in failure 42 | 43 | # initialize the test client 44 | setup_start = time.perf_counter() 45 | test_client = TestClient(self.get_properties()['start_date']) 46 | 47 | # gather expectations 48 | existing_records = dict() 49 | limits = self.expected_page_size() 50 | streams = self.streams_to_test() 51 | 52 | # order the creation of test data for streams based on the streams under test 53 | # this is necessary for child streams and streams that share underlying data in hubspot 54 | if 'subscription_changes' in streams and 'email_events' in streams: 55 | streams.remove('email_events') # we get this for free with subscription_changes 56 | stream_to_run_last = 'contacts_by_company' # child stream depends on companyIds, must go last 57 | if stream_to_run_last in streams: 58 | streams.remove(stream_to_run_last) 59 | streams = list(streams) 60 | streams.append(stream_to_run_last) 61 | 62 | # generate test data if necessary, one stream at a time 63 | for stream in streams: 64 | # Get all records 65 | if stream == 'contacts_by_company': 66 | company_ids = [company['companyId'] for company in existing_records['companies']] 67 | existing_records[stream] = test_client.read(stream, parent_ids=company_ids, pagination=True) 68 | else: 69 | existing_records[stream] = test_client.read(stream, pagination=True) 70 | 71 | # check if we exceed the pagination limit 72 | LOGGER.info(f"Pagination limit set to - {limits[stream]} and total number of existing record - {len(existing_records[stream])}") 73 | under_target = limits[stream] + 1 - len(existing_records[stream]) 74 | LOGGER.info(f'under_target = {under_target} for {stream}') 75 | 76 | # if we do not exceed the limit generate more data so that we do 77 | if under_target > 0 : 78 | LOGGER.info(f"need to make {under_target} records for {stream} stream") 79 | if stream in {'subscription_changes', 'emails_events'}: 80 | test_client.create(stream, subscriptions=existing_records[stream], times=under_target) 81 | elif stream == 'contacts_by_company': 82 | test_client.create(stream, company_ids, times=under_target) 83 | else: 84 | for i in range(under_target): 85 | # create records to exceed limit 86 | test_client.create(stream) 87 | 88 | setup_end = time.perf_counter() 89 | LOGGER.info(f"Test Client took about {str(setup_end-setup_start).split('.')[0]} seconds") 90 | super().setUp() 91 | -------------------------------------------------------------------------------- /tests/test_hubspot_start_date.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import tap_tester.connections as connections 4 | import tap_tester.menagerie as menagerie 5 | import tap_tester.runner as runner 6 | from tap_tester import LOGGER 7 | 8 | from base import HubspotBaseTest 9 | from client import TestClient 10 | 11 | 12 | STATIC_DATA_STREAMS = {'owners', 'campaigns'} 13 | 14 | class TestHubspotStartDate(HubspotBaseTest): 15 | 16 | @staticmethod 17 | def name(): 18 | return "tt_hubspot_start_date" 19 | 20 | def setUp(self): 21 | """ 22 | Create 1 record for every stream under test, because we must guarantee that 23 | over time there will always be more records in the sync 1 time bin 24 | (of start_date_1 -> now) than there are in the sync 2 time bin (of start_date_2 -> now). 25 | """ 26 | 27 | LOGGER.info("running streams with creates") 28 | streams_under_test = self.expected_streams() - {'email_events'} # we get this for free with subscription_changes 29 | self.my_start_date = self.get_properties()['start_date'] 30 | self.test_client = TestClient(self.my_start_date) 31 | for stream in streams_under_test: 32 | if stream == 'contacts_by_company': 33 | companies_records = self.test_client.read('companies', since=self.my_start_date) 34 | company_ids = [company['companyId'] for company in companies_records] 35 | self.test_client.create(stream, company_ids) 36 | else: 37 | self.test_client.create(stream) 38 | 39 | def expected_streams(self): 40 | """ 41 | If any streams cannot have data generated programmatically, 42 | hardcode start_dates for these streams and run the test twice. 43 | streams tested in TestHubspotStartDateStatic should be removed. 44 | """ 45 | return self.expected_check_streams().difference({ 46 | 'owners', # static test data, covered in separate test 47 | 'campaigns', # static test data, covered in separate test 48 | }) 49 | 50 | 51 | def get_properties(self, original=True): 52 | utc_today = datetime.datetime.strftime( 53 | datetime.datetime.utcnow(), self.START_DATE_FORMAT 54 | ) 55 | 56 | if original: 57 | return { 58 | 'start_date' : self.timedelta_formatted(utc_today, days=-20) 59 | } 60 | else: 61 | return { 62 | 'start_date': self.timedelta_formatted(utc_today, days=-3) 63 | } 64 | 65 | def test_run(self): 66 | 67 | # SYNC 1 68 | conn_id = connections.ensure_connection(self) 69 | found_catalogs = self.run_and_verify_check_mode(conn_id) 70 | 71 | # Select only the expected streams tables 72 | expected_streams = self.expected_streams() 73 | catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in expected_streams] 74 | self.select_all_streams_and_fields(conn_id, catalog_entries) 75 | first_record_count_by_stream = self.run_and_verify_sync(conn_id) 76 | first_sync_records = runner.get_records_from_target_output() 77 | 78 | # SYNC 2 79 | conn_id = connections.ensure_connection(self, original_properties=False) 80 | found_catalogs = self.run_and_verify_check_mode(conn_id) 81 | catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in expected_streams] 82 | self.select_all_streams_and_fields(conn_id, catalog_entries) 83 | second_record_count_by_stream = self.run_and_verify_sync(conn_id) 84 | second_sync_records = runner.get_records_from_target_output() 85 | 86 | # Test by stream 87 | for stream in self.expected_streams(): 88 | with self.subTest(stream=stream): 89 | 90 | # gather expectations 91 | start_date_1 = self.get_properties()['start_date'] 92 | start_date_2 = self.get_properties(original=False)['start_date'] 93 | primary_keys = self.expected_primary_keys()[stream] 94 | replication_key = list(self.expected_replication_keys()[stream]) 95 | 96 | # gather results 97 | first_sync_count = first_record_count_by_stream.get(stream, 0) 98 | second_sync_count = second_record_count_by_stream.get(stream, 0) 99 | first_sync_messages = first_sync_records.get(stream, {'messages': []}).get('messages') 100 | second_sync_messages = second_sync_records.get(stream, {'messages': []}).get('messages') 101 | first_sync_primary_keys = set(tuple([record['data'][pk] for pk in primary_keys]) 102 | for record in first_sync_messages) 103 | second_sync_primary_keys = set(tuple([record['data'][pk] for pk in primary_keys]) 104 | for record in second_sync_messages) 105 | 106 | if self.expected_metadata()[stream][self.OBEYS_START_DATE]: 107 | 108 | # Verify sync 2 overlaps with sync 1 109 | self.assertFalse(first_sync_primary_keys.isdisjoint(second_sync_primary_keys), 110 | msg='There should be a shared set of data from start date 2 through sync execution time.') 111 | 112 | # Verify the second sync has less data 113 | self.assertGreater(first_sync_count, second_sync_count) 114 | 115 | # for incrmental streams we can compare records agains the start date 116 | if replication_key and stream not in {'contacts', 'subscription_changes', 'email_events'}: # BUG_TDL-9939 117 | 118 | # BUG_TDL-9939 replication key is not listed correctly 119 | if stream in {"campaigns", "companies", "contacts_by_company", "deal_pipelines", "deals"}: 120 | # For deals stream, the replication key is already prefixed with 'property_'. 121 | replication_key = [replication_key[0]] if stream in ["deals", "companies"] else [f'property_{replication_key[0]}'] 122 | first_sync_replication_key_values = [record['data'][replication_key[0]]['value'] 123 | for record in first_sync_messages] 124 | second_sync_replication_key_values = [record['data'][replication_key[0]]['value'] 125 | for record in second_sync_messages] 126 | else: 127 | first_sync_replication_key_values = [record['data'][replication_key[0]] for record in first_sync_messages] 128 | second_sync_replication_key_values = [record['data'][replication_key[0]] for record in second_sync_messages] 129 | formatted_start_date_1 = start_date_1.replace('Z', '.000000Z') 130 | formatted_start_date_2 = start_date_2.replace('Z', '.000000Z') 131 | 132 | # Verify the replication key values are greater than or equal to the start date 133 | # for sync 1 134 | for value in first_sync_replication_key_values: 135 | self.assertGreaterEqual(value, formatted_start_date_1) 136 | # and for sync 2 137 | for value in second_sync_replication_key_values: 138 | self.assertGreaterEqual(value, formatted_start_date_2) 139 | else: 140 | 141 | # If Start date is not obeyed then verify the syncs are equal 142 | self.assertEqual(first_sync_count, second_sync_count) 143 | self.assertEqual(first_sync_primary_keys, second_sync_primary_keys) 144 | 145 | # Verify records are replicated for both syncs 146 | self.assertGreater(first_sync_count, 0, 147 | msg='start date usage is not confirmed when no records are replicated') 148 | self.assertGreater(second_sync_count, 0, 149 | msg='start date usage is not confirmed when no records are replicated') 150 | 151 | class TestHubspotStartDateStatic(TestHubspotStartDate): 152 | @staticmethod 153 | def name(): 154 | return "tt_hubspot_start_date_static" 155 | 156 | def expected_streams(self): 157 | """expected streams minus the streams not under test""" 158 | return { 159 | 'owners', 160 | 'campaigns', 161 | } 162 | 163 | def get_properties(self, original=True): 164 | if original: 165 | return {'start_date' : '2017-11-22T00:00:00Z'} 166 | 167 | else: 168 | return { 169 | 'start_date' : '2023-02-25T00:00:00Z' 170 | } 171 | 172 | def setUp(self): 173 | LOGGER.info("running streams with no creates") 174 | -------------------------------------------------------------------------------- /tests/unittests/test_deals.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests at the functions need to run `sync_deals` 3 | """ 4 | import os 5 | import unittest 6 | from tap_hubspot import acquire_access_token_from_refresh_token 7 | from tap_hubspot import CONFIG 8 | from tap_hubspot import gen_request 9 | from tap_hubspot import get_url 10 | from tap_hubspot import merge_responses 11 | from tap_hubspot import process_v3_deals_records 12 | 13 | 14 | class TestDeals(unittest.TestCase): 15 | """ 16 | This class gets an access token for the tests to use and then tests 17 | assumptions we have about the tap 18 | """ 19 | def setUp(self): 20 | """ 21 | This functions reads in the variables need to get an access token 22 | """ 23 | CONFIG['redirect_uri'] = os.environ['HUBSPOT_REDIRECT_URI'] 24 | CONFIG['refresh_token'] = os.environ['HUBSPOT_REFRESH_TOKEN'] 25 | CONFIG['client_id'] = os.environ['HUBSPOT_CLIENT_ID'] 26 | CONFIG['client_secret'] = os.environ['HUBSPOT_CLIENT_SECRET'] 27 | 28 | acquire_access_token_from_refresh_token() 29 | 30 | 31 | def test_can_fetch_hs_date_entered_props(self): 32 | """ 33 | This test is written on the assumption that `sync_deals()` calls 34 | `gen_request()` to get records 35 | """ 36 | state = {} 37 | url = get_url('deals_all') 38 | params = {'count': 250, 39 | 'includeAssociations': False, 40 | 'properties' : []} 41 | v3_fields = ['hs_v2_date_entered_appointmentscheduled'] 42 | 43 | records = list( 44 | gen_request(state, 'deals', url, params, 'deals', "hasMore", ["offset"], ["offset"], v3_fields=v3_fields) 45 | ) 46 | 47 | for record in records: 48 | # The test account has a deal stage called "appointment scheduled" 49 | value = record.get('properties',{}).get('hs_v2_date_entered_appointmentscheduled') 50 | error_msg = ('Could not find "hs_v2_date_entered_appointment_scheduled"' 51 | 'in {}').format(record) 52 | self.assertIsNotNone(value, msg=error_msg) 53 | 54 | def test_process_v3_deals_records(self): 55 | self.maxDiff = None 56 | data = [ 57 | {'properties': {'field1': 'value1', 58 | 'field2': 'value2', 59 | 'hs_v2_date_entered_field3': 'value3', 60 | 'hs_v2_date_exited_field4': 'value4',}}, 61 | ] 62 | 63 | expected = [ 64 | {'properties': {'hs_v2_date_entered_field3': {'value': 'value3'}, 65 | 'hs_v2_date_exited_field4': {'value': 'value4'},}}, 66 | ] 67 | 68 | actual = process_v3_deals_records(data) 69 | 70 | self.assertDictEqual(expected[0]['properties'], actual[0]['properties']) 71 | 72 | def test_merge_responses(self): 73 | v1_resp = [ 74 | {'dealId': '1', 75 | 'properties': {'field1': 'value1',}}, 76 | {'dealId': '2', 77 | 'properties': {'field3': 'value3',}}, 78 | ] 79 | 80 | v3_resp = [ 81 | {'id': '1', 82 | 'properties': {'field2': 'value2',}}, 83 | {'id': '2', 84 | 'properties': {'field4': 'value4',}}, 85 | ] 86 | 87 | expected = [ 88 | {'dealId': '1', 89 | 'properties': {'field1': 'value1', 90 | 'field2': 'value2',}}, 91 | {'dealId': '2', 92 | 'properties': {'field3': 'value3', 93 | 'field4': 'value4',}}, 94 | ] 95 | 96 | merge_responses(v1_resp, v3_resp) 97 | 98 | for expected_record in expected: 99 | for actual_record in v1_resp: 100 | if actual_record['dealId'] == expected_record['dealId']: 101 | self.assertDictEqual(expected_record, actual_record) 102 | -------------------------------------------------------------------------------- /tests/unittests/test_deselect_unselected_fields.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import patch, MagicMock 3 | from tap_hubspot import deselect_unselected_fields, do_sync, main_impl, CONFIG 4 | 5 | 6 | class TestMainImpl(unittest.TestCase): 7 | 8 | @patch('tap_hubspot.utils.parse_args') 9 | @patch('tap_hubspot.do_discover') 10 | @patch('tap_hubspot.do_sync') 11 | def test_main_impl_default_behavior(self, mock_do_sync, mock_do_discover, mock_parse_args): 12 | """Test the default behavior of the main_impl function when select_fields_by_default is not set.""" 13 | mock_args = MagicMock() 14 | mock_args.config = {} 15 | mock_args.state = None 16 | mock_args.discover = False 17 | mock_args.properties = None 18 | mock_parse_args.return_value = mock_args 19 | 20 | main_impl() 21 | 22 | self.assertTrue(CONFIG['select_fields_by_default']) 23 | mock_do_discover.assert_not_called() 24 | mock_do_sync.assert_not_called() 25 | 26 | @patch('tap_hubspot.utils.parse_args') 27 | @patch('tap_hubspot.do_discover') 28 | @patch('tap_hubspot.do_sync') 29 | def test_main_impl_select_fields_by_default_true(self, mock_do_sync, mock_do_discover, mock_parse_args): 30 | """Test the behavior of the main_impl function when select_fields_by_default is set to true.""" 31 | mock_args = MagicMock() 32 | mock_args.config = {'select_fields_by_default': 'true'} 33 | mock_args.state = None 34 | mock_args.discover = False 35 | mock_args.properties = None 36 | mock_parse_args.return_value = mock_args 37 | 38 | main_impl() 39 | 40 | self.assertTrue(CONFIG['select_fields_by_default']) 41 | mock_do_discover.assert_not_called() 42 | mock_do_sync.assert_not_called() 43 | 44 | @patch('tap_hubspot.utils.parse_args') 45 | @patch('tap_hubspot.do_discover') 46 | @patch('tap_hubspot.do_sync') 47 | def test_main_impl_select_fields_by_default_false(self, mock_do_sync, mock_do_discover, mock_parse_args): 48 | """Test the behavior of the main_impl function when select_fields_by_default is set to false.""" 49 | mock_args = MagicMock() 50 | mock_args.config = {'select_fields_by_default': 'false'} 51 | mock_args.state = None 52 | mock_args.discover = False 53 | mock_args.properties = None 54 | mock_parse_args.return_value = mock_args 55 | 56 | main_impl() 57 | 58 | self.assertFalse(CONFIG['select_fields_by_default']) 59 | mock_do_discover.assert_not_called() 60 | mock_do_sync.assert_not_called() 61 | 62 | @patch('tap_hubspot.utils.parse_args') 63 | @patch('tap_hubspot.do_discover') 64 | @patch('tap_hubspot.do_sync') 65 | def test_main_impl_invalid_select_fields_by_default(self, mock_do_sync, mock_do_discover, mock_parse_args): 66 | """Test the behavior of the main_impl function when select_fields_by_default is set to an invalid value.""" 67 | mock_args = MagicMock() 68 | mock_args.config = {'select_fields_by_default': 'invalid'} 69 | mock_args.state = None 70 | mock_args.discover = False 71 | mock_args.properties = None 72 | mock_parse_args.return_value = mock_args 73 | 74 | with self.assertRaises(ValueError): 75 | main_impl() 76 | 77 | mock_do_discover.assert_not_called() 78 | mock_do_sync.assert_not_called() 79 | 80 | class TestDoSync(unittest.TestCase): 81 | 82 | @patch('tap_hubspot.deselect_unselected_fields') 83 | @patch('tap_hubspot.generate_custom_streams') 84 | @patch('tap_hubspot.clean_state') 85 | @patch('tap_hubspot.Context') 86 | @patch('tap_hubspot.validate_dependencies') 87 | @patch('tap_hubspot.get_streams_to_sync') 88 | @patch('tap_hubspot.get_selected_streams') 89 | @patch('tap_hubspot.singer') 90 | def test_do_sync_select_fields_by_default_none(self, mock_singer, mock_get_selected_streams, mock_get_streams_to_sync, mock_validate_dependencies, mock_Context, mock_clean_state, mock_generate_custom_streams, mock_deselect_unselected_fields): 91 | """Test the default behavior of the do_sync function. When select_fields_by_default is not specified, it should not call deselect_unselected_fields.""" 92 | # Mocking the necessary functions and objects 93 | mock_singer.get_currently_syncing.return_value = None 94 | mock_get_streams_to_sync.return_value = [] 95 | mock_get_selected_streams.return_value = [] 96 | mock_generate_custom_streams.return_value = [] 97 | 98 | # Mocking the catalog and state 99 | CONFIG.update({'select_fields_by_default': None}) 100 | catalog = {'streams': []} 101 | state = {} 102 | 103 | # Call the function 104 | do_sync(state, catalog) 105 | 106 | # Assertions 107 | mock_deselect_unselected_fields.assert_not_called() 108 | 109 | # @patch('tap_hubspot.CONFIG', {'select_fields_by_default': True}) 110 | @patch('tap_hubspot.deselect_unselected_fields') 111 | @patch('tap_hubspot.generate_custom_streams') 112 | @patch('tap_hubspot.clean_state') 113 | @patch('tap_hubspot.Context') 114 | @patch('tap_hubspot.validate_dependencies') 115 | @patch('tap_hubspot.get_streams_to_sync') 116 | @patch('tap_hubspot.get_selected_streams') 117 | @patch('tap_hubspot.singer') 118 | def test_do_sync_select_fields_by_default_true(self, mock_singer, mock_get_selected_streams, mock_get_streams_to_sync, mock_validate_dependencies, mock_Context, mock_clean_state, mock_generate_custom_streams, mock_deselect_unselected_fields): 119 | """Test the default behavior of the do_sync function. When select_fields_by_default is True, it should not call deselect_unselected_fields.""" 120 | # Mocking the necessary functions and objects 121 | mock_singer.get_currently_syncing.return_value = None 122 | mock_get_streams_to_sync.return_value = [] 123 | mock_get_selected_streams.return_value = [] 124 | mock_generate_custom_streams.return_value = [] 125 | 126 | # Mocking the catalog and state 127 | CONFIG.update({'select_fields_by_default': 'true'}) 128 | catalog = {'streams': []} 129 | state = {} 130 | 131 | # Call the function 132 | do_sync(state, catalog) 133 | 134 | # Assertions 135 | mock_deselect_unselected_fields.assert_not_called() 136 | 137 | # @patch('tap_hubspot.CONFIG', {'select_fields_by_default': False}) 138 | @patch('tap_hubspot.deselect_unselected_fields') 139 | @patch('tap_hubspot.generate_custom_streams') 140 | @patch('tap_hubspot.clean_state') 141 | @patch('tap_hubspot.Context') 142 | @patch('tap_hubspot.validate_dependencies') 143 | @patch('tap_hubspot.get_streams_to_sync') 144 | @patch('tap_hubspot.get_selected_streams') 145 | @patch('tap_hubspot.singer') 146 | def test_do_sync_select_fields_by_default_false(self, mock_singer, mock_get_selected_streams, mock_get_streams_to_sync, mock_validate_dependencies, mock_Context, mock_clean_state, mock_generate_custom_streams, mock_deselect_unselected_fields): 147 | """Test the default behavior of the do_sync function. When select_fields_by_default is False, it should call deselect_unselected_fields.""" 148 | # Mocking the necessary functions and objects 149 | mock_singer.get_currently_syncing.return_value = None 150 | mock_get_streams_to_sync.return_value = [] 151 | mock_get_selected_streams.return_value = [] 152 | mock_generate_custom_streams.return_value = [] 153 | 154 | # Mocking the catalog and state 155 | CONFIG.update({'select_fields_by_default': 'false'}) 156 | catalog = {'streams': []} 157 | state = {} 158 | 159 | # Call the function 160 | do_sync(state, catalog) 161 | 162 | # Assertions 163 | mock_deselect_unselected_fields.assert_called_once_with(catalog) 164 | 165 | 166 | class TestDeselectUnselectedFields(unittest.TestCase): 167 | 168 | def test_deselect_unselected_fields(self): 169 | catalog = { 170 | 'streams': [ 171 | { 172 | "stream_id": "test_stream_1", 173 | 'metadata': [ 174 | {'breadcrumb': [], 'metadata': {'selected': True}}, 175 | {'breadcrumb': ['properties', 'field1'], 'metadata': {}}, 176 | {'breadcrumb': ['properties', 'field2'], 'metadata': {'selected': True}}, 177 | {'breadcrumb': ['properties', 'field3'], 'metadata': {'selected': False}} 178 | ] 179 | }, 180 | { 181 | "stream_id": "test_stream_2", 182 | 'metadata': [ 183 | {'breadcrumb': [], 'metadata': {'selected': False}}, 184 | {'breadcrumb': ['properties', 'field1'], 'metadata': {}}, 185 | {'breadcrumb': ['properties', 'field2'], 'metadata': {}} 186 | ] 187 | } 188 | ] 189 | } 190 | 191 | expected_catalog = { 192 | 'streams': [ 193 | { 194 | "stream_id": "test_stream_1", 195 | 'metadata': [ 196 | {'breadcrumb': [], 'metadata': {'selected': True}}, 197 | {'breadcrumb': ['properties', 'field1'], 'metadata': {'selected': False}}, 198 | {'breadcrumb': ['properties', 'field2'], 'metadata': {'selected': True}}, 199 | {'breadcrumb': ['properties', 'field3'], 'metadata': {'selected': False}} 200 | ] 201 | }, 202 | { 203 | "stream_id": "test_stream_2", 204 | 'metadata': [ 205 | {'breadcrumb': [], 'metadata': {'selected': False}}, 206 | {'breadcrumb': ['properties', 'field1'], 'metadata': {}}, 207 | {'breadcrumb': ['properties', 'field2'], 'metadata': {}} 208 | ] 209 | } 210 | ] 211 | } 212 | 213 | deselect_unselected_fields(catalog) 214 | self.assertEqual(catalog, expected_catalog) 215 | --------------------------------------------------------------------------------