├── src ├── google_ads_report │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── pubsub.py │ │ └── gcs.py │ ├── requirements_dev.txt │ ├── requirements.txt │ ├── bq_schema.json │ ├── README.md │ ├── main_test.py │ └── main.py ├── youtube_channel │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── pubsub.py │ │ └── gcs.py │ ├── requirements_dev.txt │ ├── requirements.txt │ ├── bq_schema.json │ ├── main_test.py │ ├── README.md │ └── main.py ├── google_ads_accounts │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ └── pubsub.py │ ├── requirements_dev.txt │ ├── requirements.txt │ ├── main_test.py │ ├── README.md │ └── main.py ├── google_ads_excluder │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ └── gcs.py │ ├── requirements_dev.txt │ ├── requirements.txt │ ├── bq_schema.json │ ├── README.md │ └── main.py └── reporting │ ├── README.md │ └── exclusions_report.sql ├── terraform ├── backend.tf ├── outputs.tf ├── variables.tf └── main.tf ├── docs ├── images │ ├── cloud-shell.png │ ├── oauth-configuration.png │ ├── ape-architecture-diagram.png │ ├── ape-datastudio-report-example.png │ ├── ape-account-service-architecture-diagram.png │ ├── ape-report-service-architecture-diagram.png │ ├── ape-youtube-service-architecture-diagram.png │ └── ape-excluder-service-architecture-diagram.png ├── reporting.md ├── deployment.md └── architecture.md ├── .gitignore ├── contributing.md ├── README.md └── LICENSE /src/google_ads_report/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/youtube_channel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/google_ads_accounts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/google_ads_excluder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/youtube_channel/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/google_ads_accounts/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/google_ads_excluder/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/google_ads_report/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/google_ads_report/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | 3 | functions-framework==3.1.0 4 | -------------------------------------------------------------------------------- /src/youtube_channel/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | 3 | functions-framework==3.1.0 4 | -------------------------------------------------------------------------------- /src/google_ads_accounts/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | 3 | functions-framework==3.1.0 4 | -------------------------------------------------------------------------------- /src/google_ads_excluder/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | 3 | functions-framework==3.1.0 4 | -------------------------------------------------------------------------------- /terraform/backend.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | backend "gcs" { 3 | prefix = "terraform/state" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /docs/images/cloud-shell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/cloud-shell.png -------------------------------------------------------------------------------- /terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "service_account_email" { 2 | value = google_service_account.service_account.email 3 | } 4 | -------------------------------------------------------------------------------- /docs/images/oauth-configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/oauth-configuration.png -------------------------------------------------------------------------------- /docs/images/ape-architecture-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-architecture-diagram.png -------------------------------------------------------------------------------- /docs/images/ape-datastudio-report-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-datastudio-report-example.png -------------------------------------------------------------------------------- /src/google_ads_report/requirements.txt: -------------------------------------------------------------------------------- 1 | google-ads==18.0.0 2 | google-cloud-pubsub==2.13.4 3 | google-cloud-storage==2.5.0 4 | jsonschema==4.9.1 5 | pandas==1.4.3 6 | -------------------------------------------------------------------------------- /docs/images/ape-account-service-architecture-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-account-service-architecture-diagram.png -------------------------------------------------------------------------------- /docs/images/ape-report-service-architecture-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-report-service-architecture-diagram.png -------------------------------------------------------------------------------- /docs/images/ape-youtube-service-architecture-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-youtube-service-architecture-diagram.png -------------------------------------------------------------------------------- /docs/images/ape-excluder-service-architecture-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-excluder-service-architecture-diagram.png -------------------------------------------------------------------------------- /src/google_ads_accounts/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==2.1.3 2 | google-auth-httplib2==0.1.0 3 | google-auth-oauthlib==0.5.2 4 | google-api-python-client==2.55.0 5 | google-cloud-pubsub==2.13.4 6 | jsonschema==4.9.1 7 | pydata-google-auth==1.4.0 8 | -------------------------------------------------------------------------------- /src/google_ads_excluder/requirements.txt: -------------------------------------------------------------------------------- 1 | google-ads==18.0.0 2 | google-auth-httplib2==0.1.0 3 | google-auth-oauthlib==0.5.2 4 | google-api-python-client==2.55.0 5 | google-cloud-bigquery==3.3.0 6 | google-cloud-pubsub==2.13.4 7 | google-cloud-storage==2.2.1 8 | jsonschema==4.9.1 9 | pandas==1.4.3 10 | -------------------------------------------------------------------------------- /src/youtube_channel/requirements.txt: -------------------------------------------------------------------------------- 1 | google-api-python-client==2.55.0 2 | google-auth==1.35.0 3 | google-auth-httplib2==0.1.0 4 | google-auth-oauthlib==0.5.2 5 | google-cloud-bigquery==3.3.2 6 | google-cloud-pubsub==2.13.6 7 | google-cloud-storage==2.2.1 8 | google-cloud-translate==2.0.1 9 | jsonschema==4.9.1 10 | numpy==1.23.1 11 | pandas==1.4.3 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Temp deployment 2 | .temp/ 3 | .zip 4 | 5 | # Virtual envs 6 | env/ 7 | venv/ 8 | venvs/ 9 | 10 | # IDEs 11 | .idea/ 12 | 13 | # Terraform 14 | **/*.tfvars 15 | **/.terraform/* 16 | *.tfstate 17 | *.tfstate.* 18 | .terraform.lock.hcl 19 | out/ 20 | 21 | # Python 22 | __pycache__ 23 | 24 | # Google Cloud credentials 25 | creds.json 26 | 27 | # OS 28 | .DS_STORE 29 | -------------------------------------------------------------------------------- /src/google_ads_excluder/bq_schema.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "channel_id", 4 | "type": "STRING", 5 | "mode": "REQUIRED", 6 | "description": "The YouTube Channel ID" 7 | }, 8 | { 9 | "name": "customer_id", 10 | "type": "STRING", 11 | "mode": "REQUIRED", 12 | "description": "The Google Ads Customer ID where this placement originated" 13 | }, 14 | { 15 | "name": "datetime_updated", 16 | "type": "TIMESTAMP", 17 | "mode": "REQUIRED", 18 | "description": "The datetime the exclusion was made" 19 | } 20 | ] 21 | -------------------------------------------------------------------------------- /contributing.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement (CLA). You (or your employer) retain the copyright to your 10 | contribution; this simply gives us permission to use and redistribute your 11 | contributions as part of the project. Head over to 12 | to see your current agreements on file or 13 | to sign a new one. 14 | 15 | You generally only need to submit a CLA once, so if you've already submitted one 16 | (even if it was for a different project), you probably don't need to do it 17 | again. 18 | 19 | ## Code Reviews 20 | 21 | All submissions, including submissions by project members, require review. We 22 | use GitHub pull requests for this purpose. Consult 23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 24 | information on using pull requests. 25 | 26 | ## Community Guidelines 27 | 28 | This project follows 29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 30 | -------------------------------------------------------------------------------- /terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "project_id" { 2 | type = string 3 | description = "The project ID to deploy the resources to" 4 | } 5 | 6 | variable "region" { 7 | type = string 8 | description = "The region to deploy the resources to, e.g. europe-west2" 9 | default = "europe-west2" 10 | } 11 | 12 | variable "oauth_refresh_token" { 13 | type = string 14 | description = "The OAuth refresh token" 15 | } 16 | 17 | variable "google_cloud_client_id" { 18 | type = string 19 | description = "The client ID from Google Cloud" 20 | } 21 | 22 | variable "google_cloud_client_secret" { 23 | type = string 24 | description = "The client secret from Google Cloud" 25 | } 26 | 27 | variable "google_ads_developer_token" { 28 | type = string 29 | description = "The Google Ads developer token" 30 | } 31 | 32 | variable "google_ads_login_customer_id" { 33 | type = string 34 | description = "The Google Ads MCC customer ID with no dashes" 35 | } 36 | 37 | variable "config_sheet_id" { 38 | type = string 39 | description = "The Google Sheeet ID containing the config" 40 | } 41 | 42 | variable "bq_dataset" { 43 | type = string 44 | description = "The name of the BQ dataset" 45 | default = "ads_placement_excluder" 46 | } 47 | -------------------------------------------------------------------------------- /src/reporting/README.md: -------------------------------------------------------------------------------- 1 | # Ads Placement Excluder Reporting 2 | 3 | This code is used to build a DataStudio dashboard to provide visibility into the 4 | Ads Placement Excluder solution. 5 | 6 | ## Disclaimers 7 | __This is not an officially supported Google product.__ 8 | 9 | Copyright 2022 Google LLC. This solution, including any related sample code or 10 | data, is made available on an “as is,” “as available,” and “with all faults” 11 | basis, solely for illustrative purposes, and without warranty or representation 12 | of any kind. This solution is experimental, unsupported and provided solely for 13 | your convenience. Your use of it is subject to your agreements with Google, as 14 | applicable, and may constitute a beta feature as defined under those agreements. 15 | To the extent that you make any data available to Google in connection with your 16 | use of the solution, you represent and warrant that you have all necessary and 17 | appropriate rights, consents and permissions to permit Google to use and process 18 | that data. By using any portion of this solution, you acknowledge, assume and 19 | accept all risks, known and unknown, associated with its usage, including with 20 | respect to your deployment of any portion of this solution in your systems, or 21 | usage in connection with your business, if at all. 22 | -------------------------------------------------------------------------------- /src/youtube_channel/bq_schema.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "channel_id", 4 | "type": "STRING", 5 | "mode": "REQUIRED", 6 | "description": "The YouTube Channel ID" 7 | }, 8 | { 9 | "name": "view_count", 10 | "type": "INT64", 11 | "mode": "NULLABLE", 12 | "description": "The number of views the channel has" 13 | }, 14 | { 15 | "name": "video_count", 16 | "type": "INT64", 17 | "mode": "NULLABLE", 18 | "description": "The number of videos the channel has uploaded to it" 19 | }, 20 | { 21 | "name": "subscriber_count", 22 | "type": "INT64", 23 | "mode": "NULLABLE", 24 | "description": "The number of subscribers the channel has" 25 | }, 26 | { 27 | "name": "title", 28 | "type": "STRING", 29 | "mode": "NULLABLE", 30 | "description": "The title of the YouTube channel" 31 | }, 32 | { 33 | "name": "title_language", 34 | "type": "STRING", 35 | "mode": "NULLABLE", 36 | "description": "The predicted language of the title" 37 | }, 38 | { 39 | "name": "title_language_confidence", 40 | "type": "FLOAT64", 41 | "mode": "NULLABLE", 42 | "description": "The confidence of the prediction" 43 | }, 44 | { 45 | "name": "country", 46 | "type": "STRING", 47 | "mode": "NULLABLE", 48 | "description": "The country the channel is from" 49 | }, 50 | { 51 | "name": "datetime_updated", 52 | "type": "TIMESTAMP", 53 | "mode": "REQUIRED", 54 | "description": "The datetime the data was pulled from YouTube" 55 | } 56 | ] 57 | -------------------------------------------------------------------------------- /src/youtube_channel/utils/pubsub.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for sending messages to Pub/sub.""" 15 | import json 16 | from typing import Any, Dict 17 | from google.cloud import pubsub_v1 18 | 19 | 20 | def send_dict_to_pubsub(message_dict: Dict[str, Any], 21 | topic: str, 22 | gcp_project: str) -> None: 23 | """Push the dictionary to pubsub. 24 | 25 | Args: 26 | message_dict: the message as a dictionary to push to pubsub 27 | topic: the name of the topic to publish the message to 28 | gcp_project: the Google Cloud Project with the pub/sub topic in 29 | """ 30 | 31 | publisher = pubsub_v1.PublisherClient() 32 | # The `topic_path` method creates a fully qualified identifier 33 | # in the form `projects/{project_id}/topics/{topic_id}` 34 | topic_path = publisher.topic_path(gcp_project, topic) 35 | message_str = json.dumps(message_dict) 36 | # Data must be a bytestring 37 | data = message_str.encode('utf-8') 38 | publisher.publish(topic_path, data) 39 | -------------------------------------------------------------------------------- /src/google_ads_report/utils/pubsub.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for sending messages to Pub/sub.""" 15 | import json 16 | from typing import Any, Dict 17 | from google.cloud import pubsub_v1 18 | 19 | 20 | def send_dict_to_pubsub(message_dict: Dict[str, Any], 21 | topic: str, 22 | gcp_project: str) -> None: 23 | """Push the dictionary to pubsub. 24 | 25 | Args: 26 | message_dict: the message as a dictionary to push to pubsub 27 | topic: the name of the topic to publish the message to 28 | gcp_project: the Google Cloud Project with the pub/sub topic in 29 | """ 30 | 31 | publisher = pubsub_v1.PublisherClient() 32 | # The `topic_path` method creates a fully qualified identifier 33 | # in the form `projects/{project_id}/topics/{topic_id}` 34 | topic_path = publisher.topic_path(gcp_project, topic) 35 | message_str = json.dumps(message_dict) 36 | # Data must be a bytestring 37 | data = message_str.encode('utf-8') 38 | publisher.publish(topic_path, data) 39 | -------------------------------------------------------------------------------- /src/reporting/exclusions_report.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -- Remove duplicate rows from YouTube, pulling only the last updated data 16 | WITH 17 | YouTube AS ( 18 | SELECT * 19 | FROM `${BQ_DATASET}.YouTubeChannel` 20 | WHERE true 21 | QUALIFY ROW_NUMBER() OVER (PARTITION BY channel_id ORDER BY datetime_updated DESC) = 1 22 | ) 23 | SELECT DISTINCT 24 | Excluded.datetime_updated AS excluded_datetime, 25 | Excluded.channel_id, 26 | Ads.placement_target_url, 27 | Excluded.customer_id, 28 | YouTube.view_count, 29 | YouTube.video_count, 30 | YouTube.subscriber_count, 31 | YouTube.title, 32 | YouTube.title_language, 33 | YouTube.title_language_confidence, 34 | YouTube.country, 35 | Ads.impressions, 36 | Ads.cost_micros, 37 | Ads.conversions, 38 | Ads.video_view_rate, 39 | Ads.video_views, 40 | Ads.clicks, 41 | Ads.average_cpm, 42 | Ads.ctr, 43 | Ads.all_conversions_from_interactions_rate, 44 | FROM 45 | `${BQ_DATASET}.GoogleAdsExclusion` AS Excluded 46 | LEFT JOIN 47 | YouTube USING (channel_id) 48 | LEFT JOIN 49 | `${BQ_DATASET}.GoogleAdsReport` AS Ads 50 | USING (channel_id, customer_id) 51 | -------------------------------------------------------------------------------- /src/google_ads_accounts/main_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Unit tests for main.py""" 15 | import unittest 16 | from unittest.mock import MagicMock, patch 17 | import main 18 | 19 | 20 | class MainTestCase(unittest.TestCase): 21 | 22 | @patch('main.run') 23 | def test_main(self, mock_run): 24 | mock_request = MagicMock() 25 | mock_request.get_json.return_value = {} 26 | response = main.main(mock_request) 27 | self.assertEqual(response.status_code, 400) 28 | mock_run.assert_not_called() 29 | mock_request.get_json.return_value = { 30 | 'sheet_id': '12345', 31 | } 32 | response = main.main(mock_request) 33 | self.assertEqual(response.status_code, 200) 34 | mock_run.assert_called_once() 35 | 36 | def test_gads_filters_to_sql_string(self): 37 | config_filters = [['impressions', '>', '1']] 38 | gaql = main.gads_filters_to_gaql_string(config_filters) 39 | self.assertEqual(gaql, 'metrics.impressions > 1') 40 | 41 | config_filters = [['impressions', '>', '1'], ['clicks', '<', '50']] 42 | gaql = main.gads_filters_to_gaql_string(config_filters) 43 | self.assertEqual(gaql, 44 | 'metrics.impressions > 1 AND metrics.clicks < 50') 45 | 46 | 47 | if __name__ == '__main__': 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ads Placement Excluder 2 | 3 | It is manual and challenging to detect YouTube channel placements which might be 4 | spam (low performance with high cost), and exclude them from future advertising. 5 | Google Ads does not currently provide enough granularity to identify all spam 6 | channels. 7 | 8 | Ads Placement Excluder allows an advertiser, to define what their interpretation 9 | of a spam channel is, and it will leverage the Google Ads & YouTube APIs to 10 | automate identifying these placements, and exclude them from future advertising. 11 | 12 | ## Architecture 13 | See [architecture.md](./docs/architecture.md). 14 | 15 | ## Reporting 16 | The solution provides a DataStudio dashboard to monitor the solution. See 17 | [reporting.md](./docs/reporting.md) for more information. 18 | 19 | ## Get Started 20 | See [deployment.md](./docs/deployment.md) for information on how to deploy the 21 | solution and get started. 22 | 23 | ## Disclaimers 24 | __This is not an officially supported Google product.__ 25 | 26 | Copyright 2022 Google LLC. This solution, including any related sample code or 27 | data, is made available on an “as is,” “as available,” and “with all faults” 28 | basis, solely for illustrative purposes, and without warranty or representation 29 | of any kind. This solution is experimental, unsupported and provided solely for 30 | your convenience. Your use of it is subject to your agreements with Google, as 31 | applicable, and may constitute a beta feature as defined under those agreements. 32 | To the extent that you make any data available to Google in connection with your 33 | use of the solution, you represent and warrant that you have all necessary and 34 | appropriate rights, consents and permissions to permit Google to use and process 35 | that data. By using any portion of this solution, you acknowledge, assume and 36 | accept all risks, known and unknown, associated with its usage, including with 37 | respect to your deployment of any portion of this solution in your systems, or 38 | usage in connection with your business, if at all. 39 | -------------------------------------------------------------------------------- /src/google_ads_report/bq_schema.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "datetime_updated", 4 | "type": "TIMESTAMP", 5 | "mode": "REQUIRED", 6 | "description": "The datetime the data was pulled from YouTube" 7 | }, 8 | { 9 | "name": "customer_id", 10 | "type": "STRING", 11 | "mode": "REQUIRED", 12 | "description": "The customer ID in Google Ads" 13 | }, 14 | { 15 | "name": "channel_id", 16 | "type": "STRING", 17 | "mode": "REQUIRED", 18 | "description": "The YouTube Channel ID" 19 | }, 20 | { 21 | "name": "placement_target_url", 22 | "type": "STRING", 23 | "mode": "NULLABLE", 24 | "description": "The URL of the placement" 25 | }, 26 | { 27 | "name": "impressions", 28 | "type": "INT64", 29 | "mode": "NULLABLE", 30 | "description": "The number of impressions on the placement" 31 | }, 32 | { 33 | "name": "cost_micros", 34 | "type": "INT64", 35 | "mode": "NULLABLE", 36 | "description": "The cost in micros" 37 | }, 38 | { 39 | "name": "conversions", 40 | "type": "FLOAT64", 41 | "mode": "NULLABLE", 42 | "description": "The number of conversions" 43 | }, 44 | { 45 | "name": "video_view_rate", 46 | "type": "FLOAT64", 47 | "mode": "NULLABLE", 48 | "description": "The video view rate" 49 | }, 50 | { 51 | "name": "video_views", 52 | "type": "INT64", 53 | "mode": "NULLABLE", 54 | "description": "The number of video views" 55 | }, 56 | { 57 | "name": "clicks", 58 | "type": "INT64", 59 | "mode": "NULLABLE", 60 | "description": "The number of clicks" 61 | }, 62 | { 63 | "name": "average_cpm", 64 | "type": "FLOAT64", 65 | "mode": "NULLABLE", 66 | "description": "The average CPM" 67 | }, 68 | { 69 | "name": "ctr", 70 | "type": "FLOAT64", 71 | "mode": "NULLABLE", 72 | "description": "The number of click through rate" 73 | }, 74 | { 75 | "name": "all_conversions_from_interactions_rate", 76 | "type": "FLOAT64", 77 | "mode": "NULLABLE", 78 | "description": "The conversion rate" 79 | } 80 | ] 81 | -------------------------------------------------------------------------------- /src/google_ads_accounts/utils/pubsub.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for sending messages to Pub/sub.""" 15 | import json 16 | from typing import Any, Dict, List 17 | from google.cloud import pubsub_v1 18 | 19 | 20 | def send_dict_to_pubsub(message_dict: Dict[str, Any], 21 | topic: str, 22 | gcp_project: str) -> None: 23 | """Push the dictionary to pubsub. 24 | 25 | Args: 26 | message_dict: the message as a dictionary to push to pubsub 27 | topic: the name of the topic to publish the message to 28 | gcp_project: the Google Cloud Project with the pub/sub topic in 29 | """ 30 | 31 | publisher = pubsub_v1.PublisherClient() 32 | # The `topic_path` method creates a fully qualified identifier 33 | # in the form `projects/{project_id}/topics/{topic_id}` 34 | topic_path = publisher.topic_path(gcp_project, topic) 35 | message_str = json.dumps(message_dict) 36 | # Data must be a bytestring 37 | data = message_str.encode('utf-8') 38 | publisher.publish(topic_path, data) 39 | 40 | 41 | def send_dicts_to_pubsub(messages: List[Dict[str, Any]], 42 | topic: str, 43 | gcp_project: str) -> None: 44 | """Push each message in the list to pubsub. 45 | 46 | Args: 47 | messages: a list of messages as dicts to push to pubsub 48 | topic: the name of the topic to publish the message to 49 | gcp_project: the Google Cloud Project with the pub/sub topic in 50 | """ 51 | for message in messages: 52 | send_dict_to_pubsub( 53 | message_dict=message, topic=topic, gcp_project=gcp_project) 54 | -------------------------------------------------------------------------------- /docs/reporting.md: -------------------------------------------------------------------------------- 1 | # Ads Placement Excluder Reporting 2 | 3 | There is a DataStudio dashboard that can be used to monitor the behaviour of the 4 | solution, and identify which channels are being excluded. 5 | 6 | ![Google Ads Account Architecture Diagram]( 7 | ./images/ape-datastudio-report-example.png) 8 | 9 | ## Get Started 10 | 11 | 1. Make a copy of the template from [here]( 12 | https://datastudio.google.com/reporting/4a616bed-85e9-4794-a748-721051c10755) 13 | to your Drive folder 14 | 2. While copying choose `ViewExclusions` as a new data source. `ViewExclusions` 15 | view will be created automatically by Terraform after the first deployment. 16 | a. If `ViewExclusions` does not appear in available data sources you need to 17 | Create Data Source -> Big Query -> Your Project and find `ViewExclusions` 18 | table there b. You can also add a custom data source to each chart in a chart 19 | setup tab afterwards 20 | 3. Sometimes `customer_id` is auto-defined as a date leading to the chart 21 | configuration error. You can change the field type manually to number via 22 | Resource -> Manage Data Sources -> Edit 23 | 4. You can adjust charts and filters according to your needs 24 | 25 | ## Disclaimers 26 | 27 | __This is not an officially supported Google product.__ 28 | 29 | Copyright 2022 Google LLC. This solution, including any related sample code or 30 | data, is made available on an “as is,” “as available,” and “with all faults” 31 | basis, solely for illustrative purposes, and without warranty or representation 32 | of any kind. This solution is experimental, unsupported and provided solely for 33 | your convenience. Your use of it is subject to your agreements with Google, as 34 | applicable, and may constitute a beta feature as defined under those agreements. 35 | To the extent that you make any data available to Google in connection with your 36 | use of the solution, you represent and warrant that you have all necessary and 37 | appropriate rights, consents and permissions to permit Google to use and process 38 | that data. By using any portion of this solution, you acknowledge, assume and 39 | accept all risks, known and unknown, associated with its usage, including with 40 | respect to your deployment of any portion of this solution in your systems, or 41 | usage in connection with your business, if at all. 42 | -------------------------------------------------------------------------------- /src/google_ads_report/utils/gcs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for working with Google Cloud Storage.""" 15 | from google.cloud.storage.client import Client 16 | from google.cloud.storage.blob import Blob 17 | import pandas as pd 18 | 19 | 20 | def upload_blob_from_df(df: pd.DataFrame, bucket: str, blob_name: str) -> Blob: 21 | """Upload a Pandas DataFrame to a Google Clous Storage bucket. 22 | 23 | Args: 24 | df: the Pandas dataframe to upload 25 | bucket (str): Google Cloud Storage bucket. 26 | blob_name (str): Google Cloud Storage blob name. 27 | """ 28 | return upload_blob_from_string( 29 | blob_string=df.to_csv(index=False), 30 | blob_name=blob_name, 31 | bucket=bucket) 32 | 33 | 34 | def upload_blob_from_string( 35 | bucket: str, blob_string: str, blob_name: str, content_type='text/csv' 36 | ) -> Blob: 37 | """Uploads a file to Google Cloud Storage. 38 | 39 | Args: 40 | bucket (str): Google Cloud Storage bucket. 41 | blob_string (str): The content of the blob. 42 | blob_name (str): Google Cloud Storage blob name. 43 | content_type (optional str): the content type of the string, e.g. 44 | text/csv. 45 | 46 | Returns: 47 | Blob: Newly created Google Cloud Storage file blob. 48 | """ 49 | blob = create_blob(bucket, blob_name) 50 | blob.upload_from_string(blob_string, content_type=content_type) 51 | return blob 52 | 53 | 54 | def create_blob(bucket_name: str, blob_name: str) -> Blob: 55 | """Creates a blob on Google Cloud Storage. 56 | 57 | Args: 58 | bucket_name (str): Google Cloud Storage bucket. 59 | blob_name (str): Google Cloud Storage blob name. 60 | 61 | Returns: 62 | Blob: Google Cloud Storage file blob. 63 | """ 64 | client = Client() 65 | bucket = client.bucket(bucket_name) 66 | blob = bucket.blob(blob_name) 67 | return blob 68 | -------------------------------------------------------------------------------- /src/youtube_channel/utils/gcs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for working with Google Cloud Storage.""" 15 | from google.cloud.storage.client import Client 16 | from google.cloud.storage.blob import Blob 17 | import pandas as pd 18 | 19 | 20 | def upload_blob_from_df(df: pd.DataFrame, bucket: str, blob_name: str) -> Blob: 21 | """Upload a Pandas DataFrame to a Google Cloud Storage bucket. 22 | 23 | Args: 24 | df: the Pandas dataframe to upload 25 | bucket (str): Google Cloud Storage bucket. 26 | blob_name (str): Google Cloud Storage blob name. 27 | """ 28 | return upload_blob_from_string( 29 | blob_string=df.to_csv(index=False), 30 | blob_name=blob_name, 31 | bucket=bucket) 32 | 33 | 34 | def upload_blob_from_string( 35 | bucket: str, blob_string: str, blob_name: str, content_type='text/csv' 36 | ) -> Blob: 37 | """Uploads a file to Google Cloud Storage. 38 | 39 | Args: 40 | bucket (str): Google Cloud Storage bucket. 41 | blob_string (str): The content of the blob. 42 | blob_name (str): Google Cloud Storage blob name. 43 | content_type (optional str): the content type of the string, e.g. 44 | text/csv. 45 | 46 | Returns: 47 | Blob: Newly created Google Cloud Storage file blob. 48 | """ 49 | blob = create_blob(bucket, blob_name) 50 | blob.upload_from_string(blob_string, content_type=content_type) 51 | return blob 52 | 53 | 54 | def create_blob(bucket_name: str, blob_name: str) -> Blob: 55 | """Creates a blob on Google Cloud Storage. 56 | 57 | Args: 58 | bucket_name (str): Google Cloud Storage bucket. 59 | blob_name (str): Google Cloud Storage blob name. 60 | 61 | Returns: 62 | Blob: Google Cloud Storage file blob. 63 | """ 64 | client = Client() 65 | bucket = client.bucket(bucket_name) 66 | blob = bucket.blob(blob_name) 67 | return blob 68 | -------------------------------------------------------------------------------- /src/google_ads_excluder/utils/gcs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for working with Google Cloud Storage.""" 15 | from google.cloud.storage.client import Client 16 | from google.cloud.storage.blob import Blob 17 | import pandas as pd 18 | 19 | 20 | def upload_blob_from_df(df: pd.DataFrame, bucket: str, blob_name: str) -> Blob: 21 | """Upload a Pandas DataFrame to a Google Clous Storage bucket. 22 | 23 | Args: 24 | df: the Pandas dataframe to upload 25 | bucket (str): Google Cloud Storage bucket. 26 | blob_name (str): Google Cloud Storage blob name. 27 | """ 28 | return upload_blob_from_string( 29 | blob_string=df.to_csv(index=False), 30 | blob_name=blob_name, 31 | bucket=bucket) 32 | 33 | 34 | def upload_blob_from_string( 35 | bucket: str, blob_string: str, blob_name: str, content_type='text/csv' 36 | ) -> Blob: 37 | """Uploads a file to Google Cloud Storage. 38 | 39 | Args: 40 | bucket (str): Google Cloud Storage bucket. 41 | blob_string (str): The content of the blob. 42 | blob_name (str): Google Cloud Storage blob name. 43 | content_type (optional str): the content type of the string, e.g. 44 | text/csv. 45 | 46 | Returns: 47 | Blob: Newly created Google Cloud Storage file blob. 48 | """ 49 | blob = create_blob(bucket, blob_name) 50 | blob.upload_from_string(blob_string, content_type=content_type) 51 | return blob 52 | 53 | 54 | def create_blob(bucket_name: str, blob_name: str) -> Blob: 55 | """Creates a blob on Google Cloud Storage. 56 | 57 | Args: 58 | bucket_name (str): Google Cloud Storage bucket. 59 | blob_name (str): Google Cloud Storage blob name. 60 | 61 | Returns: 62 | Blob: Google Cloud Storage file blob. 63 | """ 64 | client = Client() 65 | bucket = client.bucket(bucket_name) 66 | blob = bucket.blob(blob_name) 67 | return blob 68 | -------------------------------------------------------------------------------- /src/google_ads_report/README.md: -------------------------------------------------------------------------------- 1 | # Google Ads Reporting Service 2 | 3 | This service is responsible for running a report from Google Ads based on the 4 | [group_placement_view]( 5 | https://developers.google.com/google-ads/api/fields/v11/group_placement_view), 6 | with the configured filters, and outputting that as a CSV to a Cloud Storage 7 | bucket, with a BigQuery table in front of it. The data pulled from the report is 8 | filtered to only have YouTube channels. 9 | 10 | ## Local Deployment 11 | To run the code ensure the following environment variables are set: 12 | 13 | ``` 14 | export GOOGLE_ADS_USE_PROTO_PLUS=false 15 | export GOOGLE_ADS_REFRESH_TOKEN= 16 | export GOOGLE_ADS_CLIENT_ID= 17 | export GOOGLE_ADS_CLIENT_SECRET= 18 | export GOOGLE_ADS_DEVELOPER_TOKEN= 19 | export GOOGLE_ADS_LOGIN_CUSTOMER_ID= 20 | export GOOGLE_CLOUD_PROJECT= 21 | export APE_YOUTUBE_PUBSUB_TOPIC= 22 | export APE_GCS_DATA_BUCKET= 23 | ``` 24 | 25 | Next install the dev requirements: 26 | 27 | ``` 28 | pip install -r requirements_dev.txt 29 | ``` 30 | 31 | Then start the server by running: 32 | 33 | ``` 34 | functions-framework --target=main --signature-type=event --port=8080 35 | ``` 36 | 37 | You can then make a post request by running the following: 38 | 39 | ``` 40 | curl localhost:8080 \ 41 | -X POST \ 42 | -H "Content-Type: application/json" \ 43 | -d "{ \"data\": { \"data\": \"$(echo '{ "customer_id": "1234567890", "lookback_days": 90, "gads_filters": "metrics.impressions > 0", "sheet_id": "abcdefghijklmnop-mk"}' | base64)\" }}" 44 | ``` 45 | 46 | ### Mac users 47 | 48 | You may need to set this environment variable for the Google Ads report stream 49 | to work, [see Github for more info](https://github.com/rails/rails/issues/38560). 50 | 51 | ``` 52 | export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES 53 | ``` 54 | 55 | ## Disclaimers 56 | __This is not an officially supported Google product.__ 57 | 58 | Copyright 2022 Google LLC. This solution, including any related sample code or 59 | data, is made available on an “as is,” “as available,” and “with all faults” 60 | basis, solely for illustrative purposes, and without warranty or representation 61 | of any kind. This solution is experimental, unsupported and provided solely for 62 | your convenience. Your use of it is subject to your agreements with Google, as 63 | applicable, and may constitute a beta feature as defined under those agreements. 64 | To the extent that you make any data available to Google in connection with your 65 | use of the solution, you represent and warrant that you have all necessary and 66 | appropriate rights, consents and permissions to permit Google to use and process 67 | that data. By using any portion of this solution, you acknowledge, assume and 68 | accept all risks, known and unknown, associated with its usage, including with 69 | respect to your deployment of any portion of this solution in your systems, or 70 | usage in connection with your business, if at all. 71 | -------------------------------------------------------------------------------- /src/google_ads_accounts/README.md: -------------------------------------------------------------------------------- 1 | # Google Ads Account Function 2 | 3 | This service is responsible for deciding which Google Ads accounts the Ads 4 | Placement Excluder solution should run for, and kicking off the downstream 5 | pipeline. Each account is pushed as a separate message into the topic to enable 6 | concurrency. 7 | 8 | ## Local Deployment 9 | To run the code ensure the following environment variables are set: 10 | 11 | ``` 12 | export GOOGLE_CLOUD_PROJECT=ads-placement-excluder 13 | export APE_ADS_REPORT_PUBSUB_TOPIC=ads-report-topic 14 | ``` 15 | 16 | The code uses [Google Application Default credentials]( 17 | https://google-auth.readthedocs.io/en/master/reference/google.auth.html) for 18 | auth. 19 | 20 | First create OAuth desktop credentials in Google Cloud, and download the client 21 | ID and client secret as a JSON file. 22 | 23 | Then run the following command, updating the path to point to the JSON file 24 | downloaded in the previous step: 25 | ``` 26 | gcloud auth application-default login \ 27 | --scopes='https://www.googleapis.com/auth/spreadsheets.readonly,https://www.googleapis.com/auth/cloud-platform' \ 28 | --client-id-file=/path/to/client-id-file.json 29 | ``` 30 | [Optionally] [see this article]( 31 | https://medium.com/google-cloud/google-oauth-credential-going-deeper-the-hard-way-f403cf3edf9d) 32 | for a detailed explanation, why this is needed. 33 | 34 | Next install the dev requirements: 35 | 36 | ``` 37 | pip install -r requirements_dev.txt 38 | ``` 39 | 40 | Then start the server by running: 41 | 42 | ``` 43 | functions-framework --target=main --port=8080 44 | ``` 45 | 46 | You can then make a post request by running the following: 47 | 48 | ``` 49 | curl localhost:8080 \ 50 | -X POST \ 51 | -H "Content-Type: application/json" \ 52 | -d '{"sheet_id": "12g3IoIP4Lk_UU3xtJsIiCSDxjNAn30vT4lOzSZPS-mk"}' 53 | ``` 54 | 55 | ## Disclaimers 56 | __This is not an officially supported Google product.__ 57 | 58 | Copyright 2022 Google LLC. This solution, including any related sample code or 59 | data, is made available on an “as is,” “as available,” and “with all faults” 60 | basis, solely for illustrative purposes, and without warranty or representation 61 | of any kind. This solution is experimental, unsupported and provided solely for 62 | your convenience. Your use of it is subject to your agreements with Google, as 63 | applicable, and may constitute a beta feature as defined under those agreements. 64 | To the extent that you make any data available to Google in connection with your 65 | use of the solution, you represent and warrant that you have all necessary and 66 | appropriate rights, consents and permissions to permit Google to use and process 67 | that data. By using any portion of this solution, you acknowledge, assume and 68 | accept all risks, known and unknown, associated with its usage, including with 69 | respect to your deployment of any portion of this solution in your systems, or 70 | usage in connection with your business, if at all. 71 | -------------------------------------------------------------------------------- /src/youtube_channel/main_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Unit tests for main.py""" 15 | import unittest 16 | import numpy as np 17 | import pandas as pd 18 | import main 19 | 20 | 21 | class MainTestCase(unittest.TestCase): 22 | 23 | def test_sanitise_youtube_dataframe(self): 24 | columns = [ 25 | 'title', 26 | 'view_count', 27 | 'video_count', 28 | 'subscriber_count', 29 | 'title_language_confidence', 30 | ] 31 | raw_data = [ 32 | ['String with a new line \n', '10', '1', '3', '0.56'], 33 | ['String, with, commas in,it', '10', '1', '3', '0.56'], 34 | ['String with "double quotes" in it', '10', '1', '3', '0.56'], 35 | ["String with 'single quotes' in it", '10', '1', '3', '0.56'], 36 | [' String with white space ', '10', '1', '3', '0.56'], 37 | ['String with $\r\t\n;:,', '10', '1', '3', '0.56'], 38 | ['Строка написана на русском языке', '10', '1', '3', '0.56'], 39 | ['用中文寫的字符串', '10', '1', '3', '0.56'], 40 | ] 41 | expected_data = [ 42 | ['String with a new line', 10, 1, 3, 0.56], 43 | ['String with commas init', 10, 1, 3, 0.56], 44 | ['String with double quotes in it', 10, 1, 3, 0.56], 45 | ['String with single quotes in it', 10, 1, 3, 0.56], 46 | ['String with white space', 10, 1, 3, 0.56], 47 | ['String with', 10, 1, 3, 0.56], 48 | ['Строка написана на русском языке', 10, 1, 3, 0.56], 49 | ['用中文寫的字符串', 10, 1, 3, 0.56], 50 | ] 51 | raw_df = pd.DataFrame(data=raw_data, columns=columns) 52 | expected_df = pd.DataFrame(data=expected_data, columns=columns) 53 | response_df = main.sanitise_youtube_dataframe(raw_df) 54 | pd.testing.assert_frame_equal(expected_df, response_df) 55 | 56 | def test_split_list_to_chunks(self): 57 | lst = np.arange(150) 58 | max_chunk_size = 50 59 | chunks = main.split_list_to_chunks(lst, max_chunk_size) 60 | self.assertEqual(len(chunks), 3) 61 | self.assertEqual(len(chunks[0]), 50) 62 | self.assertEqual(len(chunks[1]), 50) 63 | self.assertEqual(len(chunks[2]), 50) 64 | 65 | lst = np.arange(151) 66 | max_chunk_size = 50 67 | chunks = main.split_list_to_chunks(lst, max_chunk_size) 68 | self.assertEqual(len(chunks), 4) 69 | self.assertTrue(len(chunks[0]) < 50) 70 | self.assertTrue(len(chunks[1]) < 50) 71 | self.assertTrue(len(chunks[2]) < 50) 72 | self.assertTrue(len(chunks[3]) < 50) 73 | -------------------------------------------------------------------------------- /src/google_ads_excluder/README.md: -------------------------------------------------------------------------------- 1 | # Google Ads Exclusion service 2 | 3 | The Google Ads Excluder service is responsible for applying the filters in the 4 | config Google Sheet to the data, to determine which channels should be excluded 5 | in Google Ads. Channels identified for exclusion are then uploaded to the shared 6 | placement list in Google Ads, and the output written to BigQuery for reporting. 7 | 8 | ## Local Deployment 9 | To run the code ensure the following environment variables are set: 10 | 11 | ``` 12 | export GOOGLE_CLOUD_PROJECT= 13 | export APE_BIGQUERY_DATASET= 14 | export APE_EXCLUSION_VALIDATE_ONLY= 15 | export APE_GCS_DATA_BUCKET= 16 | export GOOGLE_ADS_USE_PROTO_PLUS=false 17 | export GOOGLE_ADS_REFRESH_TOKEN= 18 | export GOOGLE_ADS_CLIENT_ID= 19 | export GOOGLE_ADS_CLIENT_SECRET= 20 | export GOOGLE_ADS_DEVELOPER_TOKEN= 21 | export GOOGLE_ADS_LOGIN_CUSTOMER_ID= 22 | ``` 23 | 24 | The code uses [Google Application Default credentials]( 25 | https://google-auth.readthedocs.io/en/master/reference/google.auth.html) for 26 | auth. 27 | 28 | First create OAuth desktop credentials in Google Cloud, and download the client 29 | ID and client secret as a JSON file. 30 | 31 | Then run the following command, updating the path to point to the JSON file 32 | downloaded in the previous step: 33 | ``` 34 | gcloud auth application-default login \ 35 | --scopes='https://www.googleapis.com/auth/spreadsheets.readonly,https://www.googleapis.com/auth/cloud-platform' \ 36 | --client-id-file=/path/to/client-id-file.json 37 | ``` 38 | [Optionally] [see this article]( 39 | https://medium.com/google-cloud/google-oauth-credential-going-deeper-the-hard-way-f403cf3edf9d) 40 | for a detailed explanation, why this is needed. 41 | 42 | Next install the dev requirements: 43 | 44 | ``` 45 | pip install -r requirements_dev.txt 46 | ``` 47 | 48 | Start the function: 49 | 50 | ``` 51 | functions-framework --target=main --signature-type=event --port=8080 52 | ``` 53 | 54 | You can then make a post request by running the following: 55 | 56 | ``` 57 | curl localhost:8080 \ 58 | -X POST \ 59 | -H "Content-Type: application/json" \ 60 | -d "{ \"data\": { \"data\": \"$(echo '{ "customer_id": "1234567890", "sheet_id": "abcdefghijklmnop-mk" }' | base64)\" }}" 61 | ``` 62 | 63 | ## Disclaimers 64 | __This is not an officially supported Google product.__ 65 | 66 | Copyright 2022 Google LLC. This solution, including any related sample code or 67 | data, is made available on an “as is,” “as available,” and “with all faults” 68 | basis, solely for illustrative purposes, and without warranty or representation 69 | of any kind. This solution is experimental, unsupported and provided solely for 70 | your convenience. Your use of it is subject to your agreements with Google, as 71 | applicable, and may constitute a beta feature as defined under those agreements. 72 | To the extent that you make any data available to Google in connection with your 73 | use of the solution, you represent and warrant that you have all necessary and 74 | appropriate rights, consents and permissions to permit Google to use and process 75 | that data. By using any portion of this solution, you acknowledge, assume and 76 | accept all risks, known and unknown, associated with its usage, including with 77 | respect to your deployment of any portion of this solution in your systems, or 78 | usage in connection with your business, if at all. 79 | -------------------------------------------------------------------------------- /src/google_ads_report/main_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Unit tests for main.py""" 15 | import base64 16 | from datetime import datetime 17 | import json 18 | from typing import Any, Dict 19 | import unittest 20 | from unittest.mock import patch 21 | import jsonschema 22 | import main 23 | 24 | 25 | class MainTestCase(unittest.TestCase): 26 | 27 | def _create_event(self, data: Dict[str, Any]) -> Dict[str, Any]: 28 | """A helper function for creating mock event data. 29 | 30 | Args: 31 | data: a dictionary containing the event data. 32 | """ 33 | return { 34 | 'data': base64.b64encode(json.dumps(data).encode('utf-8')) 35 | } 36 | 37 | @patch('main.start_job') 38 | def test_main(self, mock_start_job): 39 | event = self._create_event({'abc': '123'}) 40 | with self.assertRaises(jsonschema.exceptions.ValidationError): 41 | main.main(event, {}) 42 | mock_start_job.assert_not_called() 43 | 44 | event = self._create_event({'customer_id': '123'}) 45 | with self.assertRaises(jsonschema.exceptions.ValidationError): 46 | main.main(event, {}) 47 | mock_start_job.assert_not_called() 48 | 49 | event = self._create_event({'lookback_days': 90}) 50 | with self.assertRaises(jsonschema.exceptions.ValidationError): 51 | main.main(event, {}) 52 | mock_start_job.assert_not_called() 53 | 54 | event = self._create_event({ 55 | 'sheet_id': 'abcdefghijklmnop-mk', 56 | 'customer_id': '123', 57 | 'lookback_days': 90, 58 | 'gads_filters': 'metrics.clicks > 10', 59 | }) 60 | main.main(event, {}) 61 | mock_start_job.assert_called_once() 62 | 63 | def test_get_query_dates(self): 64 | today_str = '2022-07-01' 65 | today = datetime.strptime(today_str, '%Y-%m-%d') 66 | date_from, date_to = main.get_query_dates(90, today) 67 | self.assertEqual(date_to, today_str) 68 | self.assertEqual(date_from, '2022-04-02') 69 | 70 | @patch('main.get_query_dates') 71 | def test_get_report_query(self, mock_get_query_dates): 72 | mock_get_query_dates.return_value = ('2022-01-01', '2022-01-31') 73 | lookback_days = 90 74 | gads_filters = None 75 | query = main.get_report_query(lookback_days, gads_filters) 76 | query = query.strip() 77 | # check it doesn't end in AND - this would be an invalid query 78 | self.assertNotEqual('AND', query[-3:]) 79 | 80 | gads_filters = 'metrics.clicks > 10' 81 | query = main.get_report_query(lookback_days, gads_filters) 82 | self.assertIn(gads_filters, query) 83 | 84 | 85 | if __name__ == '__main__': 86 | unittest.main() 87 | -------------------------------------------------------------------------------- /src/youtube_channel/README.md: -------------------------------------------------------------------------------- 1 | # YouTube Channel Reporting Service 2 | 3 | This service is responsible for pulling metrics about the YouTube channels from 4 | the Google Ads Report. For example the number of views the channel has had, and 5 | the number of subscribers. It then uses the Google Translate API (if enabled in 6 | the config), to determine the language the YouTube channel title is in. 7 | 8 | ## Google Translate API 9 | This service leverages [Google's Translation API]( 10 | https://cloud.google.com/translate/docs/basic/detecting-language), for detecting 11 | the language of the YouTube channel's title. 12 | 13 | This is enabled/disabled in the configuration Google Sheet. See the 14 | [basic pricing](https://cloud.google.com/translate/pricing) for cost 15 | information. 16 | 17 | ## Local Deployment 18 | To run the code ensure the following environment variables are set: 19 | 20 | ``` 21 | export GOOGLE_CLOUD_PROJECT= 22 | export APE_BIGQUERY_DATASET= 23 | export APE_GCS_DATA_BUCKET= 24 | ``` 25 | 26 | The code uses [Google Application Default credentials]( 27 | https://google-auth.readthedocs.io/en/master/reference/google.auth.html) for 28 | auth. 29 | 30 | First create OAuth desktop credentials in Google Cloud, and download the client 31 | ID and client secret as a JSON file. 32 | 33 | Then run the following command, updating the path to point to the JSON file 34 | downloaded in the previous step: 35 | ``` 36 | gcloud auth application-default login \ 37 | --scopes='https://www.googleapis.com/auth/spreadsheets.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/youtube' \ 38 | --client-id-file=/path/to/client-id-file.json 39 | ``` 40 | [Optionally] [see this article]( 41 | https://medium.com/google-cloud/google-oauth-credential-going-deeper-the-hard-way-f403cf3edf9d) 42 | for a detailed explanation, why this is needed. 43 | 44 | Next install the dev requirements: 45 | 46 | ``` 47 | pip install -r requirements_dev.txt 48 | ``` 49 | 50 | Start the function: 51 | 52 | ``` 53 | functions-framework --target=main --signature-type=event --port=8080 54 | ``` 55 | 56 | You can then make a post request by running the following: 57 | 58 | ``` 59 | curl localhost:8080 \ 60 | -X POST \ 61 | -H "Content-Type: application/json" \ 62 | -d "{ \"data\": { \"data\": \"$(echo '{ "customer_id": "1234567890", "sheet_id": "abcdefghijklmnop-mk" }' | base64)\" }}" 63 | ``` 64 | 65 | ## Disclaimers 66 | __This is not an officially supported Google product.__ 67 | 68 | Copyright 2022 Google LLC. This solution, including any related sample code or 69 | data, is made available on an “as is,” “as available,” and “with all faults” 70 | basis, solely for illustrative purposes, and without warranty or representation 71 | of any kind. This solution is experimental, unsupported and provided solely for 72 | your convenience. Your use of it is subject to your agreements with Google, as 73 | applicable, and may constitute a beta feature as defined under those agreements. 74 | To the extent that you make any data available to Google in connection with your 75 | use of the solution, you represent and warrant that you have all necessary and 76 | appropriate rights, consents and permissions to permit Google to use and process 77 | that data. By using any portion of this solution, you acknowledge, assume and 78 | accept all risks, known and unknown, associated with its usage, including with 79 | respect to your deployment of any portion of this solution in your systems, or 80 | usage in connection with your business, if at all. 81 | -------------------------------------------------------------------------------- /src/google_ads_accounts/main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Fetch the Google Ads configs and push them to pub/sub.""" 15 | import logging 16 | import os 17 | import sys 18 | from typing import Any, List, Dict 19 | import flask 20 | import google.auth 21 | from googleapiclient.discovery import build 22 | import jsonschema 23 | from utils import pubsub 24 | 25 | 26 | logging.basicConfig(stream=sys.stdout) 27 | logger = logging.getLogger(__name__) 28 | logger.setLevel(logging.INFO) 29 | 30 | # The Google Cloud project containing the pub/sub topic 31 | GOOGLE_CLOUD_PROJECT = os.environ.get('GOOGLE_CLOUD_PROJECT') 32 | # The name of the pub/sub topic 33 | APE_ADS_REPORT_PUBSUB_TOPIC = os.environ.get('APE_ADS_REPORT_PUBSUB_TOPIC') 34 | # The access scopes used in this function 35 | SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly'] 36 | 37 | # The schema of the JSON in the request 38 | request_schema = { 39 | 'type': 'object', 40 | 'properties': { 41 | 'sheet_id': {'type': 'string'}, 42 | }, 43 | 'required': ['sheet_id', ] 44 | } 45 | 46 | 47 | def main(request: flask.Request) -> flask.Response: 48 | """The entry point: extract the data from the payload and starts the job. 49 | 50 | The request payload must match the request_schema object above. 51 | 52 | Args: 53 | request (flask.Request): HTTP request object. 54 | Returns: 55 | The flask response. 56 | """ 57 | logger.info('Google Ads Account Service triggered.') 58 | request_json = request.get_json() 59 | logger.info('JSON payload: %s', request_json) 60 | response = {} 61 | try: 62 | jsonschema.validate(instance=request_json, schema=request_schema) 63 | except jsonschema.exceptions.ValidationError as err: 64 | logger.error('Invalid request payload: %s', err) 65 | response['status'] = 'Failed' 66 | response['message'] = err.message 67 | return flask.Response(flask.json.dumps(response), 68 | status=400, 69 | mimetype='application/json') 70 | 71 | run(request_json['sheet_id']) 72 | 73 | response['status'] = 'Success' 74 | response['message'] = 'Downloaded data successfully' 75 | return flask.Response(flask.json.dumps(response), 76 | status=200, 77 | mimetype='application/json') 78 | 79 | 80 | def run(sheet_id: str) -> None: 81 | """Orchestration for the function. 82 | 83 | Args: 84 | sheet_id: the ID of the Google Sheet containing the config. 85 | """ 86 | logger.info('Running Google Ads account script') 87 | account_configs = get_config_from_sheet(sheet_id) 88 | send_messages_to_pubsub(account_configs) 89 | logger.info('Done.') 90 | 91 | 92 | def get_config_from_sheet(sheet_id: str) -> List[Dict[str, Any]]: 93 | """Get the Ads account config from the Google Sheet, and return the results. 94 | 95 | Args: 96 | sheet_id: the ID of the Google Sheet containing the config. 97 | 98 | Returns: 99 | Returns a row for each account a report needs to be run for. 100 | 101 | [ 102 | { 103 | 'sheet_id': 'abcdefghijklmnop-mk', 104 | 'customer_id': '1234567890' 105 | 'lookback_days': 90, 106 | 'gads_filters': 'metrics.clicks > 10', 107 | }, 108 | ... 109 | ] 110 | """ 111 | logger.info('Getting config from sheet: %s', sheet_id) 112 | credentials, project_id = google.auth.default(scopes=SCOPES) 113 | sheets_service = build('sheets', 'v4', credentials=credentials) 114 | sheet = sheets_service.spreadsheets() 115 | 116 | customer_ids = sheet.values().get( 117 | spreadsheetId=sheet_id, 118 | range='google_ads_customer_ids').execute().get('values', []) 119 | gads_filters = sheet.values().get( 120 | spreadsheetId=sheet_id, 121 | range='google_ads_filters').execute().get('values', []) 122 | lookback_days = sheet.values().get( 123 | spreadsheetId=sheet_id, 124 | range='google_ads_lookback_days').execute().get('values', 125 | [['30']])[0][0] 126 | 127 | gads_filters_str = gads_filters_to_gaql_string(gads_filters) 128 | 129 | logger.info('Returned %i customer_ids', len(customer_ids)) 130 | account_configs = [] 131 | for customer_id, is_enabled in customer_ids: 132 | if is_enabled == 'Enabled': 133 | account_configs.append({ 134 | 'sheet_id': sheet_id, 135 | 'customer_id': customer_id, 136 | 'lookback_days': int(lookback_days), 137 | 'gads_filters': gads_filters_str, 138 | }) 139 | else: 140 | logger.info('Ignoring disabled row: %s', customer_id) 141 | 142 | logger.info('Account configs:') 143 | logger.info(account_configs) 144 | return account_configs 145 | 146 | 147 | def gads_filters_to_gaql_string(config_filters: List[List[str]]) -> str: 148 | """Turn the Google Ads filters into a GAQL compatible string. 149 | 150 | The config sheet has the filters in a list of lists, these need to be 151 | combined, so they can be used in a WHERE clause in the GAQL that is passed 152 | to Google Ads. See: 153 | https://developers.google.com/google-ads/api/docs/query/overview 154 | 155 | Each row is "AND" together. 156 | 157 | Args: 158 | config_filters: the filters from the Google Sheet 159 | 160 | Returns: 161 | A string that can be used in the WHERE statement of the Google Ads Query 162 | Language. 163 | """ 164 | conditions = [] 165 | for row in config_filters: 166 | conditions.append(f'metrics.{row[0]} {row[1]} {row[2]}') 167 | return ' AND '.join(conditions) 168 | 169 | 170 | def send_messages_to_pubsub(messages: List[Dict[str, Any]]) -> None: 171 | """Push each of the messages to the pubsub topic. 172 | 173 | Args: 174 | messages: the list of messages to push to pubsub 175 | """ 176 | logger.info('Sending messages to pubsub') 177 | logger.info('Messages: %s', messages) 178 | pubsub.send_dicts_to_pubsub( 179 | messages=messages, 180 | topic=APE_ADS_REPORT_PUBSUB_TOPIC, 181 | gcp_project=GOOGLE_CLOUD_PROJECT) 182 | logger.info('All messages published') 183 | -------------------------------------------------------------------------------- /docs/deployment.md: -------------------------------------------------------------------------------- 1 | # Ads Placement Excluder Deployment 2 | 3 | This doc provides information on how to deploy the Ads Placement Excluder 4 | solution. 5 | 6 | The deployment uses [Terraform](https://www.terraform.io/) to automate the 7 | deployment, and to keep all the Infrastructure as Code (IaC). The files can be 8 | found in the `/terraform/` folder in this repo. 9 | 10 | ## Roles 11 | The project creates a service account with the following roles: 12 | 13 | - `roles/cloudfunctions.invoker` 14 | - `roles/bigquery.jobUser` 15 | - `roles/bigquery.dataViewer` 16 | - `roles/pubsub.publisher` 17 | - `roles/storage.objectAdmin` 18 | 19 | As a user deploying the project, you will require these roles and the following: 20 | 21 | - `roles/storage.admin` 22 | - `roles/iam.securityAdmin` 23 | 24 | Read more about [Google Cloud roles here]( 25 | https://cloud.google.com/iam/docs/understanding-roles). 26 | 27 | ## OAuth 28 | The project uses OAauth2.0 scopes and service account roles to manage 29 | permissions. These are the scopes that are required when generating a refresh 30 | token. 31 | 32 | ``` 33 | https://www.googleapis.com/auth/spreadsheets.readonly 34 | https://www.googleapis.com/auth/cloud-platform 35 | https://www.googleapis.com/auth/youtube 36 | https://www.googleapis.com/auth/adwords 37 | ``` 38 | 39 | ## Pre-requisites 40 | 41 | - A new Google Cloud Project 42 | - Appropriate permissions to be able to deploy the project (see [roles](#roles)) 43 | - Create a copy of [the template Google Sheet]( 44 | https://docs.google.com/spreadsheets/d/1IAo8yvrY4BMuOaWnZ2O8wfJ6L36sOjCOnD7cigMxKwI/copy) 45 | and make a note of the Google Sheet ID (found in the URL after the `/d/`) 46 | - Access to the appropriate Google Ads accounts 47 | - A Google Ads [Developer Token]( 48 | https://developers.google.com/google-ads/api/docs/first-call/dev-token) 49 | 50 | 51 | ## Deployment 52 | 53 | ### Manual Steps 54 | These changes need to be done once manually, as they are not controlled by Terraform: 55 | 56 | 1. Open the Google Cloud Project in the UI. 57 | 2. Go to [Cloud Storage](https://console.cloud.google.com/storage/browser) and 58 | create a new bucket, which will be used to keep track of the Terraform state, 59 | e.g. `my-awesome-project-terraform`. Make a note of the name of the bucket. 60 | 3. Open the [OAuth Consent Screen]( 61 | https://console.cloud.google.com/apis/credentials/consent) and create a new 62 | internal app. 63 | 4. Open the [API Credentials Screen]( 64 | https://console.cloud.google.com/apis/credentials) -> Create credentials -> 65 | OAuth Client ID -> Web app -> Set 66 | `https://developers.google.com/oauthplayground` as an authorised redirect 67 | URI. Make a note of the `client_id` and the `client_secret`. 68 | 5. Open the [OAuth playground](https://developers.google.com/oauthplayground/), 69 | and generate a refresh token for the [above scopes](#oauth), using the 70 | `client_id` and `client_secret` generated in the previous step: 71 | ![cloud-shell](./images/oauth-configuration.png) 72 | 6. Open Cloud Shell: 73 | ![cloud-shell](./images/cloud-shell.png) 74 | 7. Enable the APIs in the project by running the following: 75 | 76 | ``` 77 | gcloud services enable \ 78 | serviceusage.googleapis.com \ 79 | cloudresourcemanager.googleapis.com \ 80 | iam.googleapis.com \ 81 | cloudresourcemanager.googleapis.com \ 82 | serviceusage.googleapis.com \ 83 | bigquery.googleapis.com \ 84 | googleads.googleapis.com \ 85 | youtube.googleapis.com \ 86 | cloudfunctions.googleapis.com \ 87 | cloudbuild.googleapis.com \ 88 | sheets.googleapis.com \ 89 | cloudscheduler.googleapis.com \ 90 | translate.googleapis.com 91 | ``` 92 | 93 | _Side note_: If you're interested in the reason why the APIs aren't controlled 94 | through Terraform, [read this guide]( 95 | https://medium.com/rockedscience/how-to-fully-automate-the-deployment-of-google-cloud-platform-projects-with-terraform-16c33f1fb31f). 96 | 97 | ### Terraform 98 | 99 | 1. Whilst still in Cloud shell, `git clone` the project, and `cd` into the 100 | directory. 101 | 2. Run the following commands to initialise Terraform: 102 | ``` 103 | cd terraform 104 | terraform init 105 | ``` 106 | When prompted, enter the name of the bucket created in step 2 in manual 107 | steps. 108 | 109 | 3. Create a file named `terraform.tfvars` and add the following variables: 110 | ``` 111 | project_id = "" 112 | oauth_refresh_token = "" 113 | google_cloud_client_id = "" 114 | google_cloud_client_secret = "" 115 | google_ads_developer_token = "" 116 | google_ads_login_customer_id = "" 117 | config_sheet_id = "" 118 | ``` 119 | Note that the `google_ads_login_customer_id` is the MCC customer ID in Google 120 | Ads. 121 | 122 | 4. Run `terraform plan` and review the proposed changes. 123 | 5. Run `terraform apply` to create the infrastructure. 124 | 6. The email of the service account created will be output, give view only 125 | access to the Google sheet containing the config. 126 | 127 | By default, the code will be triggered every hour by Cloud Scheduler. To test 128 | everything is working, configure the Google Sheet ([see below](#google-sheet)) 129 | and force run the Cloud Scheduler job in the UI. 130 | 131 | ## Google Sheet 132 | 133 | Open your copy of the Google Sheet. This is what you'll be using to configure 134 | the Ads Placement Excluder solution. 135 | 136 | There are notes in the Sheet that contain instructions for how to set this up. 137 | 138 | One area to highlight is the [basic Translation API]( 139 | https://cloud.google.com/translate/docs/basic/detecting-language) used in the 140 | YouTube service, has a cost element to it ([see pricing]( 141 | https://cloud.google.com/translate/pricing)). If you want to include language 142 | filters on the YouTube channel title, ensure that this is enabled. 143 | 144 | If this is disabled, and then enabled at a later date, it does not backfill the 145 | gaps in data. If you wish to backfill this data. Manually clear the files in the 146 | Cloud Storage bucket containing the data, essentially deleting the YouTube data, 147 | then re-run Cloud Scheduler. 148 | 149 | ## Disclaimers 150 | __This is not an officially supported Google product.__ 151 | 152 | Copyright 2022 Google LLC. This solution, including any related sample code or 153 | data, is made available on an “as is,” “as available,” and “with all faults” 154 | basis, solely for illustrative purposes, and without warranty or representation 155 | of any kind. This solution is experimental, unsupported and provided solely for 156 | your convenience. Your use of it is subject to your agreements with Google, as 157 | applicable, and may constitute a beta feature as defined under those agreements. 158 | To the extent that you make any data available to Google in connection with your 159 | use of the solution, you represent and warrant that you have all necessary and 160 | appropriate rights, consents and permissions to permit Google to use and process 161 | that data. By using any portion of this solution, you acknowledge, assume and 162 | accept all risks, known and unknown, associated with its usage, including with 163 | respect to your deployment of any portion of this solution in your systems, or 164 | usage in connection with your business, if at all. 165 | -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- 1 | # Ads Placement Excluder Architecture 2 | 3 | ## Google Cloud Architecture 4 | 5 | The solution is split into four microservices: 6 | 7 | - Google Ads Account Service 8 | - Google Ads Reporting Service 9 | - YouTube Channel Service 10 | - Google Ads Exclusion Service 11 | 12 | The source code for each of the Cloud Functions can be found under the `/src/` 13 | directory of this repo. Each function has its own README file, which contains 14 | instructions for local deployment. 15 | 16 | ### Google Ads Account Service 17 | 18 | This service is responsible for deciding which Google Ads accounts the Ads 19 | Placement Excluder solution should run for, and kicking off the downstream 20 | pipeline. Each account is pushed as a separate message into the topic to enable 21 | concurrency. 22 | 23 | ![Google Ads Account Architecture Diagram]( 24 | ./images/ape-account-service-architecture-diagram.png) 25 | 26 | 1. Cloud Scheduler triggers the Account Cloud Function. 27 | 2. The function pulls from the Google Sheet the Google Ads customer IDs to run 28 | the code for, and the filters to apply to the Google Ads report. 29 | 3. Each customer ID is pushed in a separate message to Pub/Sub. 30 | 31 | ### Google Ads Reporting Service 32 | 33 | This service is responsible for running a report from Google Ads based on the 34 | [group_placement_view]( 35 | https://developers.google.com/google-ads/api/fields/v11/group_placement_view), 36 | with the configured filters, and outputting that as a CSV to a Cloud Storage 37 | bucket, with a BigQuery table in front of it. The data pulled from the report is 38 | filtered to only have YouTube channels. 39 | 40 | ![Google Ads Report Architecture Diagram]( 41 | ./images/ape-report-service-architecture-diagram.png) 42 | 43 | 1. Pub/Sub triggers the Cloud Function. 44 | 2. The report is downloaded from Google Ads. 45 | 3. The output is written as a CSV to Cloud Storage. 46 | 4. A message is passed to the next Pub/Sub topic. 47 | 48 | ### YouTube Channel Reporting Service 49 | 50 | This service is responsible for pulling metrics about the YouTube channels from 51 | the Google Ads Report. For example the number of views the channel has had, and 52 | the number of subscribers. It then uses the Google Translate API (if enabled in 53 | the config), to determine the language the YouTube channel title is in. 54 | 55 | ![YouTube Channel Architecture Diagram]( 56 | ./images/ape-youtube-service-architecture-diagram.png) 57 | 58 | 1. Pub/Sub triggers the Cloud Function. 59 | 2. The function reads the new channels that were pulled from Google Ads. It does 60 | not refresh the data for existing channels. 61 | 3. The config is used to determine if the Translate API should be used. 62 | 4. The function pulls the YouTube data for each of the channels in step 2. 63 | 5. If the Translate API filter is enabled, for each channel it will use the API 64 | to detect the language and the confidence level of the prediction. 65 | 6. The output is written as a CSV to Cloud Storage. 66 | 7. A message is passed to the next Pub/Sub topic. 67 | 68 | ### Google Ads Exclusion service 69 | 70 | The Google Ads Excluder service is responsible for applying the filters in the 71 | config Google Sheet to the data, to determine which channels should be excluded 72 | in Google Ads. Channels identified for exclusion are then uploaded to the shared 73 | placement list in Google Ads, and the output written to BigQuery for reporting. 74 | 75 | ![Google Ads Exclusion Architecture Diagram]( 76 | ./images/ape-excluder-service-architecture-diagram.png) 77 | 78 | 1. Pub/Sub triggers the Cloud Function. 79 | 2. The function reads the filters from the config Sheet. 80 | 3. It applies the filters to BigQuery to identify channels that need to be 81 | excluded. 82 | 4. These are then uploaded to Google Ads 83 | 5. The exclusions are also written to BigQuery for reporting purposes. 84 | 85 | ### Entire Solution Architecture 86 | 87 | Combining the individual services, you can see the combined architecture diagram 88 | below: 89 | 90 | ![Overall Architecture Diagram](./images/ape-architecture-diagram.png) 91 | 92 | 1. Cloud Scheduler triggers the Account Cloud Function. 93 | 2. The function pulls the Google Ads customer IDs to run the code for, and the 94 | filters to apply to the Google Ads report. 95 | 3. Each customer ID is pushed in a separate message to Pub/Sub. 96 | 4. Pub/Sub triggers the Cloud Function. 97 | 5. The report is downloaded from Google Ads. 98 | 6. The output is written as a CSV to Cloud Storage. 99 | 7. A message is passed to the next Pub/Sub topic. 100 | 8. Pub/Sub triggers the Cloud Function. 101 | 9. The function reads the channels that were pulled from Google Ads. 102 | 10. The config is used to determine if the Translate API should be used. 103 | 11. The function pulls the YouTube data for each of the channels in step 2. 104 | 12. If the Translate API filter is enabled, for each channel it will use the API 105 | to detect the language and the confidence level of the prediction. 106 | 13. The output is written as a CSV to Cloud Storage. 107 | 14. A message is passed to the next Pub/Sub topic. 108 | 15. Pub/Sub triggers the Cloud Function. 109 | 16. The function reads the filters from the config Sheet. 110 | 17. It applies the filters to BigQuery to identify channels that need to be 111 | excluded. 112 | 18. These are then uploaded to Google Ads 113 | 19. The exclusions are also written to BigQuery for reporting purposes. 114 | 115 | ## BigQuery External Tables Using Cloud Storage 116 | 117 | The solution uses [BigQuery External tables with a Google Cloud Storage 118 | backend](https://cloud.google.com/bigquery/docs/external-data-cloud-storage), 119 | instead of writing to BigQuery directly due to concurrency. BigQuery has much 120 | stricter limits in place about concurrent writes ([docs]( 121 | https://cloud.google.com/bigquery/quotas)), so if the solution is configured 122 | with several Google Ads accounts, it can run into difficulty when writing 123 | directly to BigQuery. 124 | 125 | Leveraging Cloud Storage removes this limitation, and the 126 | BigQuery External Table provides a way of querying the data using SQL. 127 | 128 | ## Google Ads Exclusions 129 | 130 | There are several places that exclusions can be applied in Google Ads. This 131 | solution applies exclusions to [shared placement exclusion lists]( 132 | https://support.google.com/google-ads/answer/9162992?hl=en-GB). 133 | 134 | The list is configured in the configuration Google Sheet, where you enter the 135 | customer ID of your MCC account, and the ID of the exclusion list. To find the 136 | ID of the list, open the list in the UI and look at the value set in the query 137 | string parameter `sharedSetId`. 138 | 139 | ## Disclaimers 140 | 141 | __This is not an officially supported Google product.__ 142 | 143 | Copyright 2022 Google LLC. This solution, including any related sample code or 144 | data, is made available on an “as is,” “as available,” and “with all faults” 145 | basis, solely for illustrative purposes, and without warranty or representation 146 | of any kind. This solution is experimental, unsupported and provided solely for 147 | your convenience. Your use of it is subject to your agreements with Google, as 148 | applicable, and may constitute a beta feature as defined under those agreements. 149 | To the extent that you make any data available to Google in connection with your 150 | use of the solution, you represent and warrant that you have all necessary and 151 | appropriate rights, consents and permissions to permit Google to use and process 152 | that data. By using any portion of this solution, you acknowledge, assume and 153 | accept all risks, known and unknown, associated with its usage, including with 154 | respect to your deployment of any portion of this solution in your systems, or 155 | usage in connection with your business, if at all. 156 | -------------------------------------------------------------------------------- /src/google_ads_report/main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Output the placement report from Google Ads to BigQuery.""" 15 | import base64 16 | import json 17 | from datetime import datetime, timedelta 18 | import logging 19 | import os 20 | import sys 21 | from typing import Any, Dict, Optional, Tuple 22 | from google.ads.googleads.client import GoogleAdsClient 23 | import jsonschema 24 | import pandas as pd 25 | from utils import gcs 26 | from utils import pubsub 27 | 28 | 29 | logging.basicConfig(stream=sys.stdout) 30 | logger = logging.getLogger(__name__) 31 | logger.setLevel(logging.INFO) 32 | 33 | # The Google Cloud project containing the GCS bucket 34 | GOOGLE_CLOUD_PROJECT = os.environ.get('GOOGLE_CLOUD_PROJECT') 35 | # The bucket to write the data to 36 | APE_GCS_DATA_BUCKET = os.environ.get('APE_GCS_DATA_BUCKET') 37 | # The pub/sub topic to send the success message to 38 | APE_YOUTUBE_PUBSUB_TOPIC = os.environ.get('APE_YOUTUBE_PUBSUB_TOPIC') 39 | 40 | # The schema of the JSON in the event payload 41 | message_schema = { 42 | 'type': 'object', 43 | 'properties': { 44 | 'sheet_id': {'type': 'string'}, 45 | 'customer_id': {'type': 'string'}, 46 | 'lookback_days': {'type': 'number'}, 47 | 'gads_filters': {'type': 'string'}, 48 | }, 49 | 'required': ['sheet_id', 'customer_id', 'lookback_days', 'gads_filters', ] 50 | } 51 | 52 | 53 | def main(event: Dict[str, Any], context: Dict[str, Any]) -> None: 54 | """The entry point: extract the data from the payload and starts the job. 55 | 56 | The pub/sub message must match the message_schema object above. 57 | 58 | Args: 59 | event: A dictionary representing the event data payload. 60 | context: An object containing metadata about the event. 61 | """ 62 | del context 63 | logger.info('Google Ads Reporting Service triggered.') 64 | logger.info('Message: %s', event) 65 | message = base64.b64decode(event['data']).decode('utf-8') 66 | logger.info('Decoded message: %s', message) 67 | message_json = json.loads(message) 68 | logger.info('JSON message: %s', message_json) 69 | 70 | # Will raise jsonschema.exceptions.ValidationError if the schema is invalid 71 | jsonschema.validate(instance=message_json, schema=message_schema) 72 | 73 | start_job( 74 | message_json.get('sheet_id'), 75 | message_json.get('customer_id'), 76 | message_json.get('lookback_days'), 77 | message_json.get('gads_filters'), 78 | ) 79 | 80 | logger.info('Done') 81 | 82 | 83 | def start_job( 84 | sheet_id: str, 85 | customer_id: str, 86 | lookback_days: int, 87 | gads_filters: str, 88 | ) -> None: 89 | """Start the job to run the report from Google Ads & output it. 90 | 91 | Args: 92 | sheet_id: the ID of the Google Sheet containing the config. 93 | customer_id: the customer ID to fetch the Google Ads data for. 94 | lookback_days: the number of days from today to look back when fetching 95 | the report. 96 | gads_filters: the filters to apply to the Google Ads report query 97 | """ 98 | logger.info('Starting job to fetch data for %s', customer_id) 99 | report_df = get_report_df(customer_id, lookback_days, gads_filters) 100 | write_results_to_gcs(report_df, customer_id) 101 | send_messages_to_pubsub(customer_id, sheet_id) 102 | logger.info('Job complete') 103 | 104 | 105 | def get_report_df( 106 | customer_id: str, 107 | lookback_days: int, 108 | gads_filters: str) -> pd.DataFrame: 109 | """Run the placement report in Google Ads & return a Dataframe of the data. 110 | 111 | Args: 112 | customer_id: the customer ID to fetch the Google Ads data for. 113 | lookback_days: the number of days from today to look back when fetching 114 | the report. 115 | gads_filters: the filters to apply to the Google Ads report query 116 | 117 | Returns: 118 | A Pandas DataFrame containing the report results. 119 | """ 120 | logger.info('Getting report stream for %s', customer_id) 121 | now = datetime.now() 122 | client = GoogleAdsClient.load_from_env(version='v11') 123 | ga_service = client.get_service("GoogleAdsService") 124 | 125 | query = get_report_query(lookback_days, gads_filters) 126 | search_request = client.get_type("SearchGoogleAdsStreamRequest") 127 | search_request.customer_id = customer_id 128 | search_request.query = query 129 | stream = ga_service.search_stream(search_request) 130 | 131 | # The client and iterator needs to be in the same function, as per 132 | # https://github.com/googleads/google-ads-python/issues/384#issuecomment-791639397 133 | # So this can't be refactored out 134 | logger.info('Processing response stream') 135 | data = [] 136 | for batch in stream: 137 | for row in batch.results: 138 | data.append([ 139 | now, 140 | row.customer.id, 141 | row.group_placement_view.placement, 142 | row.group_placement_view.target_url, 143 | row.metrics.impressions, 144 | row.metrics.cost_micros, 145 | row.metrics.conversions, 146 | row.metrics.video_view_rate, 147 | row.metrics.video_views, 148 | row.metrics.clicks, 149 | row.metrics.average_cpm, 150 | row.metrics.ctr, 151 | row.metrics.all_conversions_from_interactions_rate, 152 | ]) 153 | return pd.DataFrame(data, columns=[ 154 | 'datetime_updated', 155 | 'customer_id', 156 | 'channel_id', 157 | 'placement_target_url', 158 | 'impressions', 159 | 'cost_micros', 160 | 'conversions', 161 | 'video_view_rate', 162 | 'video_views', 163 | 'clicks', 164 | 'average_cpm', 165 | 'ctr', 166 | 'all_conversions_from_interactions_rate', 167 | ]) 168 | 169 | 170 | def get_report_query(lookback_days: int, 171 | gads_filters: Optional[str] = None) -> str: 172 | """Build and return the Google Ads report query. 173 | 174 | Args: 175 | lookback_days: the number of days from today to look back when fetching 176 | the report. 177 | gads_filters: the filters to apply to the Google Ads report query 178 | 179 | Return: 180 | The Google Ads query. 181 | """ 182 | logger.info('Getting report query') 183 | date_from, date_to = get_query_dates(lookback_days) 184 | where_query = '' 185 | if gads_filters is not None: 186 | where_query = f'AND {gads_filters}' 187 | query = f""" 188 | SELECT 189 | customer.id, 190 | group_placement_view.placement, 191 | group_placement_view.target_url, 192 | metrics.impressions, 193 | metrics.cost_micros, 194 | metrics.conversions, 195 | metrics.video_views, 196 | metrics.video_view_rate, 197 | metrics.clicks, 198 | metrics.average_cpm, 199 | metrics.ctr, 200 | metrics.all_conversions_from_interactions_rate 201 | FROM 202 | group_placement_view 203 | WHERE group_placement_view.placement_type = "YOUTUBE_CHANNEL" 204 | AND campaign.advertising_channel_type = "VIDEO" 205 | AND segments.date BETWEEN "{date_from}" AND "{date_to}" 206 | {where_query} 207 | """ 208 | logger.info(query) 209 | return query 210 | 211 | 212 | def get_query_dates(lookback_days: int, 213 | today: datetime = None) -> Tuple[str, str]: 214 | """Return a tuple of string dates in %Y-%m-%d format for the GAds report. 215 | 216 | Google Ads queries require a string date in the above format. This function 217 | will lookback X days from today, and return this date as a string. 218 | 219 | Args: 220 | lookback_days: the number of days from today to look back when fetching 221 | the report. 222 | today: the date representing today. If no date is provided 223 | datetime.today() is used. 224 | 225 | Return: 226 | The string date 227 | """ 228 | logger.info('Getting query dates') 229 | dt_format = '%Y-%m-%d' 230 | if today is None: 231 | today = datetime.today() 232 | date_from = today - timedelta(days=lookback_days) 233 | return ( 234 | date_from.strftime(dt_format), 235 | today.strftime(dt_format), 236 | ) 237 | 238 | 239 | def write_results_to_gcs(report_df: pd.DataFrame, customer_id: str) -> None: 240 | """Write the report dataframe to GCS as a CSV file 241 | 242 | Args: 243 | report_df: the dataframe based on the Google Ads report. 244 | customer_id: the customer ID to fetch the Google Ads data for. 245 | """ 246 | logger.info('Writing results to GCS: %s', APE_GCS_DATA_BUCKET) 247 | number_of_rows = len(report_df.index) 248 | logger.info('There are %s rows', number_of_rows) 249 | if number_of_rows > 0: 250 | blob_name = f'google_ads_report/{customer_id}.csv' 251 | logger.info('Blob name: %s', blob_name) 252 | gcs.upload_blob_from_df( 253 | df=report_df, 254 | blob_name=blob_name, 255 | bucket=APE_GCS_DATA_BUCKET) 256 | logger.info('Blob uploaded to GCS') 257 | else: 258 | logger.info('There is nothing to write to GCS') 259 | 260 | 261 | def send_messages_to_pubsub(customer_id: str, sheet_id: str) -> None: 262 | """Push the customer ID to pub/sub when the job completes. 263 | 264 | Args: 265 | customer_id: the customer ID to fetch the Google Ads data for. 266 | sheet_id: the ID of the Google Sheet containing the config. 267 | """ 268 | message_dict = { 269 | 'customer_id': customer_id, 270 | 'sheet_id': sheet_id, 271 | } 272 | logger.info('Sending message to pub/sub:', message_dict) 273 | pubsub.send_dict_to_pubsub( 274 | message_dict=message_dict, 275 | topic=APE_YOUTUBE_PUBSUB_TOPIC, 276 | gcp_project=GOOGLE_CLOUD_PROJECT) 277 | logger.info('Message published') 278 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /src/google_ads_excluder/main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Filter the data for spam placements and exclude them in Google Ads.""" 15 | import base64 16 | import json 17 | import logging 18 | import os 19 | import sys 20 | from datetime import datetime 21 | from typing import Any, Dict, List, Union 22 | import uuid 23 | import google.auth 24 | import google.auth.credentials 25 | from googleapiclient.discovery import build 26 | from google.ads.googleads.client import GoogleAdsClient 27 | from google.cloud import bigquery 28 | import jsonschema 29 | import pandas as pd 30 | from utils import gcs 31 | 32 | 33 | logging.basicConfig(stream=sys.stdout) 34 | logger = logging.getLogger(__name__) 35 | logger.setLevel(logging.INFO) 36 | 37 | # The Google Cloud project 38 | GOOGLE_CLOUD_PROJECT = os.environ.get('GOOGLE_CLOUD_PROJECT') 39 | # The bucket to write the data to 40 | APE_GCS_DATA_BUCKET = os.environ.get('APE_GCS_DATA_BUCKET') 41 | # The name of the BigQuery Dataset 42 | BQ_DATASET = os.environ.get('APE_BIGQUERY_DATASET') 43 | # Set False to apply the exclusions in Google Ads. If True, the call will be 44 | # made to the API and validated, but the exclusion won't be applied and you 45 | # won't see it in the UI. You probably want this to be True in a dev environment 46 | # and False in prod. 47 | VALIDATE_ONLY = os.environ.get( 48 | 'APE_EXCLUSION_VALIDATE_ONLY', 'False').lower() in ('true', '1', 't') 49 | 50 | # The access scopes used in this function 51 | SCOPES = [ 52 | 'https://www.googleapis.com/auth/spreadsheets.readonly', 53 | 'https://www.googleapis.com/auth/cloud-platform', 54 | ] 55 | 56 | # The schema of the JSON in the event payload 57 | message_schema = { 58 | 'type': 'object', 59 | 'properties': { 60 | 'sheet_id': {'type': 'string'}, 61 | 'customer_id': {'type': 'string'}, 62 | }, 63 | 'required': ['sheet_id', 'customer_id', ] 64 | } 65 | 66 | 67 | def main(event: Dict[str, Any], context: Dict[str, Any]) -> None: 68 | """The entry point: extract the data from the payload and starts the job. 69 | 70 | The pub/sub message must match the message_schema object above. 71 | 72 | Args: 73 | event: A dictionary representing the event data payload. 74 | context: An object containing metadata about the event. 75 | 76 | Raises: 77 | jsonschema.exceptions.ValidationError if the message from pub/sub is not 78 | what is expected. 79 | """ 80 | del context 81 | logger.info('Google Ads Exclusion service triggered.') 82 | logger.info('Message: %s', event) 83 | message = base64.b64decode(event['data']).decode('utf-8') 84 | logger.info('Decoded message: %s', message) 85 | message_json = json.loads(message) 86 | logger.info('JSON message: %s', message_json) 87 | 88 | # Will raise jsonschema.exceptions.ValidationError if the schema is invalid 89 | jsonschema.validate(instance=message_json, schema=message_schema) 90 | 91 | run(message_json.get('customer_id'), message_json.get('sheet_id')) 92 | 93 | logger.info('Done') 94 | 95 | 96 | def run(customer_id: str, sheet_id: str) -> None: 97 | """Start the job to run the report from Google Ads & output it. 98 | 99 | Args: 100 | customer_id: the Google Ads customer ID to process. 101 | sheet_id: the ID of the Google Sheet containing the config. 102 | """ 103 | logger.info('Starting job to fetch data for %s', customer_id) 104 | credentials = get_auth_credentials() 105 | filters = get_config_filters(sheet_id, credentials) 106 | 107 | placements = get_spam_placements(customer_id, filters, credentials) 108 | if placements is not None: 109 | exclude_placements_in_gads(placements, sheet_id, credentials) 110 | write_results_to_gcs(customer_id, placements) 111 | logger.info('Job complete') 112 | 113 | 114 | def get_auth_credentials() -> google.auth.credentials.Credentials: 115 | """Return credentials for Google APIs.""" 116 | credentials, project_id = google.auth.default(scopes=SCOPES) 117 | return credentials 118 | 119 | 120 | def get_config_filters(sheet_id: str, 121 | credentials: google.auth.credentials.Credentials) -> str: 122 | """Get the filters for identifying a spam placement from the config. 123 | 124 | Args: 125 | sheet_id: the ID of the Google Sheet containing the config. 126 | credentials: Google Auth credentials 127 | 128 | Returns: 129 | SQL WHERE conditions for that can be run on BigQuery, e.g. 130 | view_count > 1000000 AND subscriber_count > 10000 131 | """ 132 | logger.info('Getting config from sheet %s', sheet_id) 133 | 134 | result = get_range_values_from_sheet( 135 | sheet_id, 'yt_exclusion_filters', credentials) 136 | 137 | logger.info('Returned %i rows', len(result)) 138 | filters = youtube_filters_to_sql_string(result) 139 | if len(filters) == 0: 140 | raise google.api_core.exceptions.BadRequest("Filters are not set") 141 | 142 | return filters 143 | 144 | 145 | def get_range_values_from_sheet( 146 | sheet_id: str, 147 | sheet_range: str, 148 | credentials: google.auth.credentials.Credentials 149 | ) -> List[List[str]]: 150 | """Get the values from a named range in the Google Sheet. 151 | 152 | Args: 153 | sheet_id: the Google Sheet ID to fetch data from. 154 | sheet_range: the range in the Google Sheet to get the values from 155 | credentials: Google Auth credentials 156 | 157 | Returns: 158 | Each row in the response represents a row in the Sheet. 159 | """ 160 | logger.info(f'Getting range "{sheet_range}" from sheet: {sheet_id}') 161 | sheets_service = build('sheets', 'v4', credentials=credentials) 162 | sheet = sheets_service.spreadsheets() 163 | return sheet.values().get( 164 | spreadsheetId=sheet_id, 165 | range=sheet_range).execute().get('values', []) 166 | 167 | 168 | def youtube_filters_to_sql_string(config_filters: List[List[str]]) -> str: 169 | """Turn the YouTube filters into a SQL compatible string. 170 | 171 | The config sheet has the filters in a list of lists, these need to be 172 | combined, so they can be used in a WHERE clause in the SQL. 173 | 174 | Each row is "AND" together. 175 | 176 | Args: 177 | config_filters: the filters from the Google Sheet 178 | 179 | Returns: 180 | A string that can be used in the WHERE statement of SQL Language. 181 | """ 182 | conditions = [] 183 | for row in config_filters: 184 | if len(row) == 3: 185 | conditions.append(f'{row[0]} {row[1]} {row[2]}') 186 | 187 | return ' AND '.join(conditions) 188 | 189 | 190 | def get_spam_placements(customer_id: str, 191 | filters: str, 192 | credentials: google.auth.credentials.Credentials 193 | ) -> Union[List[str], None]: 194 | """Run a query to find spam placements in BigQuery and return as a list. 195 | 196 | Args: 197 | customer_id: the Google Ads customer ID to process. 198 | filters: a string containing WHERE conditions to add to the query based 199 | on the config Google Sheet. 200 | credentials: Google Auth credentials 201 | 202 | Returns: 203 | A list of placement IDs which should be excluded. 204 | """ 205 | 206 | logger.info('Getting spam placements from BigQuery') 207 | logger.info('Connecting to: %s BigQuery', GOOGLE_CLOUD_PROJECT) 208 | client = bigquery.Client( 209 | project=GOOGLE_CLOUD_PROJECT, credentials=credentials) 210 | 211 | query = f""" 212 | SELECT DISTINCT 213 | Yt.channel_id 214 | FROM 215 | `{BQ_DATASET}.GoogleAdsReport` AS Ads 216 | LEFT JOIN 217 | {BQ_DATASET}.YouTubeChannel AS Yt 218 | USING(channel_id) 219 | LEFT JOIN 220 | `{BQ_DATASET}.GoogleAdsExclusion` AS Excluded 221 | USING(channel_id) 222 | WHERE 223 | Ads.customer_id = "{customer_id}" 224 | AND Excluded.channel_id IS NULL 225 | AND ( 226 | Excluded.customer_id = "{customer_id}" 227 | OR Excluded.customer_id IS NULL 228 | ) 229 | AND {filters} 230 | """ 231 | logger.info('Running query: %s', query) 232 | 233 | rows = client.query(query).result() 234 | 235 | if rows.total_rows == 0: 236 | logger.info('There is nothing to update') 237 | return None 238 | channel_ids = [] 239 | for row in rows: 240 | channel_ids.append(row.channel_id) 241 | logger.info('Received %s channel_ids', len(channel_ids)) 242 | return channel_ids 243 | 244 | 245 | def exclude_placements_in_gads( 246 | placements: List[str], 247 | sheet_id: str, 248 | credentials: google.auth.credentials.Credentials = None 249 | ) -> None: 250 | """Exclude the placements in the Google Ads account. 251 | 252 | Args: 253 | placements: a list of YouTube channel IDs which should be excluded. 254 | sheet_id: the ID of the Google Sheet containing the config. 255 | credentials: Google Auth credentials 256 | """ 257 | logger.info('Excluding placements in Google Ads.') 258 | 259 | if credentials is None: 260 | logger.info('No auth credentials provided. Fetching them.') 261 | credentials = get_auth_credentials() 262 | 263 | shared_set_id = get_range_values_from_sheet( 264 | sheet_id=sheet_id, 265 | sheet_range='placement_exclusion_list_id', 266 | credentials=credentials)[0][0] 267 | customer_id = get_range_values_from_sheet( 268 | sheet_id=sheet_id, 269 | sheet_range='placement_exclusion_customer_id', 270 | credentials=credentials)[0][0] 271 | 272 | client = GoogleAdsClient.load_from_env(version='v11') 273 | service = client.get_service('SharedCriterionService') 274 | 275 | shared_set = f'customers/{customer_id}/sharedSets/{shared_set_id}' 276 | 277 | operations = [] 278 | logger.info('Processing the %i placements', len(placements)) 279 | for placement in placements: 280 | operation = client.get_type('SharedCriterionOperation') 281 | criterion = operation.create 282 | criterion.shared_set = shared_set 283 | criterion.youtube_channel.channel_id = placement 284 | operations.append(operation) 285 | 286 | placements_len = len(placements) 287 | logger.info('There are %i operations to upload', placements_len) 288 | logger.info('Validate_only mode: %s', VALIDATE_ONLY) 289 | if placements_len > 0: 290 | response = service.mutate_shared_criteria( 291 | request={ 292 | 'validate_only': VALIDATE_ONLY, 293 | 'customer_id': customer_id, 294 | 'operations': operations 295 | } 296 | ) 297 | logger.info('Response from the upload:') 298 | logger.info(response) 299 | 300 | logger.info('Done.') 301 | 302 | 303 | def write_results_to_gcs(customer_id: str, 304 | placements: List[str], 305 | ) -> None: 306 | """Write the exclusions to GCS as a CSV file. 307 | 308 | Historical data is preserved so all file writes have a UUID appended to it. 309 | 310 | Args: 311 | customer_id: the Google Ads customer ID to process. 312 | placements: alist of placement IDs which should be excluded. 313 | """ 314 | exclusions_df = pd.DataFrame(placements, columns=[ 315 | 'channel_id', 316 | ]) 317 | exclusions_df['customer_id'] = int(customer_id) 318 | exclusions_df['datetime_updated'] = datetime.now() 319 | 320 | logger.info('Writing results to GCS: %s', APE_GCS_DATA_BUCKET) 321 | number_of_rows = len(exclusions_df.index) 322 | logger.info('There are %s rows', number_of_rows) 323 | if number_of_rows > 0: 324 | uuid_str = str(uuid.uuid4()) 325 | blob_name = f'google_ads_exclusion/{customer_id}-{uuid_str}.csv' 326 | logger.info('Blob name: %s', blob_name) 327 | gcs.upload_blob_from_df( 328 | df=exclusions_df, 329 | blob_name=blob_name, 330 | bucket=APE_GCS_DATA_BUCKET) 331 | logger.info('Blob uploaded to GCS') 332 | else: 333 | logger.info('There is nothing to write to GCS') 334 | -------------------------------------------------------------------------------- /terraform/main.tf: -------------------------------------------------------------------------------- 1 | provider "google" { 2 | project = var.project_id 3 | region = var.region 4 | } 5 | 6 | # SERVICE ACCOUNT -------------------------------------------------------------- 7 | resource "google_service_account" "service_account" { 8 | account_id = "ads-placement-excluder-runner" 9 | display_name = "Service Account for running Ads Placement Excluder" 10 | } 11 | resource "google_project_iam_member" "cloud_functions_invoker_role" { 12 | project = var.project_id 13 | role = "roles/cloudfunctions.invoker" 14 | member = "serviceAccount:${google_service_account.service_account.email}" 15 | } 16 | resource "google_project_iam_member" "bigquery_job_user_role" { 17 | project = var.project_id 18 | role = "roles/bigquery.jobUser" 19 | member = "serviceAccount:${google_service_account.service_account.email}" 20 | } 21 | resource "google_project_iam_member" "bigquery_data_viewer_role" { 22 | project = var.project_id 23 | role = "roles/bigquery.dataViewer" 24 | member = "serviceAccount:${google_service_account.service_account.email}" 25 | } 26 | resource "google_project_iam_member" "pubsub_publisher_role" { 27 | project = var.project_id 28 | role = "roles/pubsub.publisher" 29 | member = "serviceAccount:${google_service_account.service_account.email}" 30 | } 31 | resource "google_project_iam_member" "storage_object_admin_role" { 32 | project = var.project_id 33 | role = "roles/storage.objectAdmin" 34 | member = "serviceAccount:${google_service_account.service_account.email}" 35 | } 36 | 37 | # CLOUD STORAGE ---------------------------------------------------------------- 38 | resource "google_storage_bucket" "ape_data_bucket" { 39 | name = "${var.project_id}-ape-data" 40 | location = var.region 41 | force_destroy = true 42 | uniform_bucket_level_access = true 43 | } 44 | # This bucket is used to store the cloud functions for deployment. 45 | # The project ID is used to make sure the name is globally unique 46 | resource "google_storage_bucket" "function_bucket" { 47 | name = "${var.project_id}-functions" 48 | location = var.region 49 | force_destroy = true 50 | uniform_bucket_level_access = true 51 | 52 | lifecycle_rule { 53 | condition { 54 | age = 1 55 | } 56 | action { 57 | type = "Delete" 58 | } 59 | } 60 | } 61 | 62 | # CLOUD FUNCTIONS -------------------------------------------------------------- 63 | data "archive_file" "google_ads_accounts_zip" { 64 | type = "zip" 65 | output_path = ".temp/google_ads_accounts_source.zip" 66 | source_dir = "../src/google_ads_accounts" 67 | } 68 | data "archive_file" "google_ads_report_zip" { 69 | type = "zip" 70 | output_path = ".temp/google_ads_report_source.zip" 71 | source_dir = "../src/google_ads_report" 72 | } 73 | data "archive_file" "youtube_channel_zip" { 74 | type = "zip" 75 | output_path = ".temp/youtube_channel_source.zip" 76 | source_dir = "../src/youtube_channel/" 77 | } 78 | data "archive_file" "google_ads_excluder_zip" { 79 | type = "zip" 80 | output_path = ".temp/google_ads_excluder_source.zip" 81 | source_dir = "../src/google_ads_excluder/" 82 | } 83 | 84 | resource "google_storage_bucket_object" "google_ads_accounts" { 85 | name = "google_ads_accounts_${data.archive_file.google_ads_accounts_zip.output_md5}.zip" 86 | bucket = google_storage_bucket.function_bucket.name 87 | source = data.archive_file.google_ads_accounts_zip.output_path 88 | depends_on = [data.archive_file.google_ads_accounts_zip] 89 | } 90 | resource "google_storage_bucket_object" "google_ads_report" { 91 | name = "google_ads_report_${data.archive_file.google_ads_report_zip.output_md5}.zip" 92 | bucket = google_storage_bucket.function_bucket.name 93 | source = data.archive_file.google_ads_report_zip.output_path 94 | depends_on = [data.archive_file.google_ads_report_zip] 95 | } 96 | resource "google_storage_bucket_object" "youtube_channel" { 97 | name = "youtube_channel_${data.archive_file.youtube_channel_zip.output_md5}.zip" 98 | bucket = google_storage_bucket.function_bucket.name 99 | source = data.archive_file.youtube_channel_zip.output_path 100 | depends_on = [data.archive_file.youtube_channel_zip] 101 | } 102 | resource "google_storage_bucket_object" "google_ads_excluder" { 103 | name = "google_ads_excluder_${data.archive_file.google_ads_excluder_zip.output_md5}.zip" 104 | bucket = google_storage_bucket.function_bucket.name 105 | source = data.archive_file.google_ads_excluder_zip.output_path 106 | depends_on = [data.archive_file.google_ads_excluder_zip] 107 | } 108 | 109 | resource "google_cloudfunctions_function" "google_ads_accounts_function" { 110 | region = var.region 111 | name = "ape-google_ads_accounts" 112 | description = "Identify which reports to run the Google Ads report for." 113 | runtime = "python310" 114 | source_archive_bucket = google_storage_bucket.function_bucket.name 115 | source_archive_object = google_storage_bucket_object.google_ads_accounts.name 116 | service_account_email = google_service_account.service_account.email 117 | timeout = 540 118 | available_memory_mb = 1024 119 | entry_point = "main" 120 | trigger_http = true 121 | 122 | environment_variables = { 123 | GOOGLE_CLOUD_PROJECT = var.project_id 124 | APE_ADS_REPORT_PUBSUB_TOPIC = google_pubsub_topic.google_ads_report_pubsub_topic.name 125 | } 126 | } 127 | resource "google_cloudfunctions_function" "google_ads_report_function" { 128 | region = var.region 129 | name = "ape-google_ads_report" 130 | description = "Move the placement report from Google Ads to BigQuery." 131 | runtime = "python310" 132 | source_archive_bucket = google_storage_bucket.function_bucket.name 133 | source_archive_object = google_storage_bucket_object.google_ads_report.name 134 | service_account_email = google_service_account.service_account.email 135 | timeout = 540 136 | available_memory_mb = 1024 137 | entry_point = "main" 138 | 139 | event_trigger { 140 | event_type = "providers/cloud.pubsub/eventTypes/topic.publish" 141 | resource = google_pubsub_topic.google_ads_report_pubsub_topic.name 142 | } 143 | 144 | environment_variables = { 145 | GOOGLE_ADS_USE_PROTO_PLUS = false 146 | GOOGLE_ADS_REFRESH_TOKEN = var.oauth_refresh_token 147 | GOOGLE_ADS_CLIENT_ID = var.google_cloud_client_id 148 | GOOGLE_ADS_CLIENT_SECRET = var.google_cloud_client_secret 149 | GOOGLE_ADS_DEVELOPER_TOKEN = var.google_ads_developer_token 150 | GOOGLE_ADS_LOGIN_CUSTOMER_ID = var.google_ads_login_customer_id 151 | GOOGLE_CLOUD_PROJECT = var.project_id 152 | APE_GCS_DATA_BUCKET = google_storage_bucket.ape_data_bucket.name 153 | APE_YOUTUBE_PUBSUB_TOPIC = google_pubsub_topic.youtube_pubsub_topic.name 154 | } 155 | } 156 | resource "google_cloudfunctions_function" "youtube_channel_function" { 157 | region = var.region 158 | name = "ape-youtube_channels" 159 | description = "Pull the channel data from the YouTube API." 160 | runtime = "python310" 161 | source_archive_bucket = google_storage_bucket.function_bucket.name 162 | source_archive_object = google_storage_bucket_object.youtube_channel.name 163 | service_account_email = google_service_account.service_account.email 164 | timeout = 540 165 | available_memory_mb = 1024 166 | entry_point = "main" 167 | 168 | event_trigger { 169 | event_type = "providers/cloud.pubsub/eventTypes/topic.publish" 170 | resource = google_pubsub_topic.youtube_pubsub_topic.name 171 | } 172 | 173 | environment_variables = { 174 | GOOGLE_CLOUD_PROJECT = var.project_id 175 | APE_ADS_EXCLUDER_PUBSUB_TOPIC = google_pubsub_topic.google_ads_excluder_pubsub_topic.name 176 | APE_BIGQUERY_DATASET = google_bigquery_dataset.dataset.dataset_id 177 | APE_GCS_DATA_BUCKET = google_storage_bucket.ape_data_bucket.name 178 | } 179 | } 180 | resource "google_cloudfunctions_function" "google_ads_excluder_function" { 181 | region = var.region 182 | name = "ape-google_ads_excluder" 183 | description = "Exclude the channels in Google Ads" 184 | runtime = "python310" 185 | source_archive_bucket = google_storage_bucket.function_bucket.name 186 | source_archive_object = google_storage_bucket_object.google_ads_excluder.name 187 | service_account_email = google_service_account.service_account.email 188 | timeout = 540 189 | available_memory_mb = 1024 190 | entry_point = "main" 191 | 192 | event_trigger { 193 | event_type = "providers/cloud.pubsub/eventTypes/topic.publish" 194 | resource = google_pubsub_topic.google_ads_excluder_pubsub_topic.name 195 | } 196 | 197 | environment_variables = { 198 | GOOGLE_CLOUD_PROJECT = var.project_id 199 | GOOGLE_ADS_USE_PROTO_PLUS = false 200 | GOOGLE_ADS_REFRESH_TOKEN = var.oauth_refresh_token 201 | GOOGLE_ADS_CLIENT_ID = var.google_cloud_client_id 202 | GOOGLE_ADS_CLIENT_SECRET = var.google_cloud_client_secret 203 | GOOGLE_ADS_DEVELOPER_TOKEN = var.google_ads_developer_token 204 | GOOGLE_ADS_LOGIN_CUSTOMER_ID = var.google_ads_login_customer_id 205 | APE_BIGQUERY_DATASET = google_bigquery_dataset.dataset.dataset_id 206 | APE_GCS_DATA_BUCKET = google_storage_bucket.ape_data_bucket.name 207 | } 208 | } 209 | 210 | # BIGQUERY --------------------------------------------------------------------- 211 | resource "google_bigquery_dataset" "dataset" { 212 | dataset_id = var.bq_dataset 213 | location = var.region 214 | description = "Ads Placement Excluder BQ Dataset" 215 | delete_contents_on_destroy = true 216 | } 217 | resource "google_bigquery_table" "google_ads_report_table" { 218 | dataset_id = google_bigquery_dataset.dataset.dataset_id 219 | table_id = "GoogleAdsReport" 220 | deletion_protection = false 221 | 222 | external_data_configuration { 223 | autodetect = false 224 | source_format = "CSV" 225 | source_uris = [ 226 | "gs://${google_storage_bucket.ape_data_bucket.name}/google_ads_report/*.csv" 227 | ] 228 | schema = file("../src/google_ads_report/bq_schema.json") 229 | csv_options { 230 | quote = "" 231 | skip_leading_rows = "1" 232 | } 233 | } 234 | } 235 | resource "google_bigquery_table" "youtube_channel_table" { 236 | dataset_id = google_bigquery_dataset.dataset.dataset_id 237 | table_id = "YouTubeChannel" 238 | deletion_protection = false 239 | 240 | external_data_configuration { 241 | autodetect = false 242 | source_format = "CSV" 243 | source_uris = [ 244 | "gs://${google_storage_bucket.ape_data_bucket.name}/youtube_channel/*.csv" 245 | ] 246 | schema = file("../src/youtube_channel/bq_schema.json") 247 | csv_options { 248 | quote = "" 249 | skip_leading_rows = "1" 250 | } 251 | } 252 | } 253 | resource "google_bigquery_table" "google_ads_exclusions_table" { 254 | dataset_id = google_bigquery_dataset.dataset.dataset_id 255 | table_id = "GoogleAdsExclusion" 256 | deletion_protection = false 257 | 258 | external_data_configuration { 259 | autodetect = false 260 | source_format = "CSV" 261 | source_uris = [ 262 | "gs://${google_storage_bucket.ape_data_bucket.name}/google_ads_exclusion/*.csv" 263 | ] 264 | schema = file("../src/google_ads_excluder/bq_schema.json") 265 | csv_options { 266 | quote = "" 267 | skip_leading_rows = "1" 268 | } 269 | } 270 | } 271 | resource "google_bigquery_table" "exclusions_report" { 272 | dataset_id = google_bigquery_dataset.dataset.dataset_id 273 | table_id = "ViewExclusions" 274 | deletion_protection = false 275 | depends_on = [ 276 | google_bigquery_dataset.dataset, 277 | google_bigquery_table.google_ads_report_table, 278 | google_bigquery_table.youtube_channel_table, 279 | google_bigquery_table.google_ads_exclusions_table 280 | ] 281 | view { 282 | query = templatefile( 283 | "../src/reporting/exclusions_report.sql", 284 | { 285 | BQ_DATASET = google_bigquery_dataset.dataset.dataset_id 286 | } 287 | ) 288 | use_legacy_sql = false 289 | } 290 | } 291 | 292 | # PUB/SUB ---------------------------------------------------------------------- 293 | resource "google_pubsub_topic" "google_ads_report_pubsub_topic" { 294 | name = "ape-google-ads-report-topic" 295 | message_retention_duration = "604800s" 296 | } 297 | resource "google_pubsub_topic" "youtube_pubsub_topic" { 298 | name = "ape-youtube-channel-topic" 299 | message_retention_duration = "604800s" 300 | } 301 | resource "google_pubsub_topic" "google_ads_excluder_pubsub_topic" { 302 | name = "ape-google-ads-excluder-topic" 303 | message_retention_duration = "604800s" 304 | } 305 | 306 | # CLOUD_SCHEDULER -------------------------------------------------------------- 307 | locals { 308 | scheduler_body = < None: 69 | """The entry point: extract the data from the payload and starts the job. 70 | 71 | The pub/sub message must match the message_schema object above. 72 | 73 | Args: 74 | event: A dictionary representing the event data payload. 75 | context: An object containing metadata about the event. 76 | 77 | Raises: 78 | jsonschema.exceptions.ValidationError if the message from pub/sub is not 79 | what is expected. 80 | """ 81 | del context 82 | logger.info('YouTube channel service triggered.') 83 | logger.info('Message: %s', event) 84 | message = base64.b64decode(event['data']).decode('utf-8') 85 | logger.info('Decoded message: %s', message) 86 | message_json = json.loads(message) 87 | logger.info('JSON message: %s', message_json) 88 | 89 | # Will raise jsonschema.exceptions.ValidationError if the schema is invalid 90 | jsonschema.validate(instance=message_json, schema=message_schema) 91 | 92 | run(message_json.get('customer_id'), message_json.get('sheet_id')) 93 | 94 | logger.info('Done') 95 | 96 | 97 | def run(customer_id: str, sheet_id: str) -> None: 98 | """Orchestration to pull YouTube data and output it to BigQuery. 99 | 100 | Args: 101 | customer_id: the Google Ads customer ID to process. 102 | sheet_id: the ID of the Google Sheet containing the config. 103 | """ 104 | credentials = get_auth_credentials() 105 | channel_ids = get_placements_query(customer_id, credentials) 106 | if len(channel_ids) > 0: 107 | get_youtube_dataframe(channel_ids, sheet_id, customer_id, credentials) 108 | else: 109 | logger.info('No channel IDs to process') 110 | send_messages_to_pubsub(customer_id, sheet_id) 111 | logger.info('Done') 112 | 113 | 114 | def get_auth_credentials() -> google.auth.credentials.Credentials: 115 | """Return credentials for Google APIs.""" 116 | credentials, project_id = google.auth.default() 117 | return credentials 118 | 119 | 120 | def get_placements_query( 121 | customer_id: str, 122 | credentials: google.auth.credentials.Credentials 123 | ) -> List[str]: 124 | """Get the placements from the Google Ads report in BigQuery. 125 | 126 | Args: 127 | customer_id: the Google Ads customer ID to process. 128 | credentials: Google Auth credentials 129 | 130 | Returns: 131 | A list of placement IDs that need to be pulled from YouTube 132 | """ 133 | logger.info('Getting Placements from Google Ads') 134 | logger.info('Connecting to: %s BigQuery', GOOGLE_CLOUD_PROJECT) 135 | client = bigquery.Client( 136 | project=GOOGLE_CLOUD_PROJECT, credentials=credentials) 137 | 138 | query = f""" 139 | SELECT DISTINCT 140 | Ads.channel_id 141 | FROM 142 | `{BQ_DATASET}.GoogleAdsReport` AS Ads 143 | LEFT JOIN 144 | `{BQ_DATASET}.YouTubeChannel` AS YouTube 145 | USING(channel_id) 146 | WHERE 147 | Ads.customer_id = "{customer_id}" 148 | AND YouTube.channel_id IS NULL 149 | """ 150 | logger.info('Running query: %s', query) 151 | rows = client.query(query).result() 152 | channel_ids = [] 153 | for row in rows: 154 | channel_ids.append(row.channel_id) 155 | logger.info('Received %s channel_ids', len(channel_ids)) 156 | return channel_ids 157 | 158 | 159 | def get_youtube_dataframe( 160 | channel_ids: List[str], 161 | sheet_id: str, 162 | customer_id: str, 163 | credentials: google.auth.credentials.Credentials 164 | ) -> None: 165 | """Pull information on each of the channels provide from the YouTube API. 166 | 167 | The YouTube API only allows pulling up to 50 channels in each request, so 168 | multiple requests have to be made to pull all the data. See the docs for 169 | more details: 170 | https://developers.google.com/youtube/v3/docs/channels/list 171 | 172 | Args: 173 | channel_ids: the channel IDs to pull the info on from YouTube 174 | sheet_id: the ID of the Google Sheet containing the config. 175 | customer_id: the Google Ads customer ID to process. 176 | credentials: Google Auth credentials 177 | """ 178 | logger.info('Getting YouTube data for channel IDs') 179 | # Maximum number of channels per YouTube request. See: 180 | # https://developers.google.com/youtube/v3/docs/channels/list 181 | chunk_size = 50 182 | chunks = split_list_to_chunks(channel_ids, chunk_size) 183 | number_of_chunks = len(chunks) 184 | 185 | logger.info('Connecting to the youtube API') 186 | youtube = build('youtube', 'v3', credentials=credentials) 187 | is_translated = get_translate_filter(sheet_id, credentials) 188 | 189 | for i, chunk in enumerate(chunks): 190 | logger.info(f'Processing chunk {i + 1} of {number_of_chunks}') 191 | chunk_list = list(chunk) 192 | request = youtube.channels().list( 193 | part='id, statistics, snippet, brandingSettings', 194 | id=chunk_list, 195 | maxResults=chunk_size) 196 | response = request.execute() 197 | channels = process_youtube_response(response, chunk_list, is_translated) 198 | youtube_df = pd.DataFrame(channels, columns=[ 199 | 'channel_id', 200 | 'view_count', 201 | 'video_count', 202 | 'subscriber_count', 203 | 'title', 204 | 'title_language', 205 | 'title_language_confidence', 206 | 'country', 207 | ]) 208 | youtube_df['datetime_updated'] = datetime.now() 209 | youtube_df = sanitise_youtube_dataframe(youtube_df) 210 | write_results_to_gcs(youtube_df, customer_id) 211 | logger.info('YouTube channel info complete') 212 | 213 | 214 | def sanitise_youtube_dataframe(youtube_df: pd.DataFrame) -> pd.DataFrame: 215 | """Takes the dataframe from YouTube and sanitises it to write as a CSV. 216 | 217 | Args: 218 | youtube_df: the dataframe containing the YouTube data 219 | 220 | Returns: 221 | The YouTube dataframe but sanitised to be safe to write to a CSV. 222 | """ 223 | youtube_df = youtube_df.astype({ 224 | 'view_count': 'int', 225 | 'video_count': 'int', 226 | 'subscriber_count': 'int', 227 | 'title_language_confidence': 'float', 228 | }) 229 | # remove problematic characters from the title field as the break BigQuery 230 | # even when escaped in the CSV 231 | youtube_df['title'] = youtube_df['title'].str.replace( 232 | APE_CSV_PROBLEM_CHARACTERS_REGEX, '', regex=True) 233 | youtube_df['title'] = youtube_df['title'].str.strip() 234 | return youtube_df 235 | 236 | 237 | def split_list_to_chunks( 238 | lst: List[Any], max_size_of_chunk: int) -> List[np.ndarray]: 239 | """Split the list into X chunks with the maximum size as specified. 240 | 241 | Args: 242 | lst: The list to be split into chunks 243 | max_size_of_chunk: the maximum number of elements that should be in a 244 | chunk. 245 | 246 | Returns: 247 | A list containing numpy array chunks of the original list. 248 | """ 249 | logger.info('Splitting list into chunks') 250 | num_of_chunks = math.ceil(len(lst) / max_size_of_chunk) 251 | chunks = np.array_split(lst, num_of_chunks) 252 | logger.info('Split list into %i chunks', num_of_chunks) 253 | return chunks 254 | 255 | 256 | def process_youtube_response( 257 | response: Dict[str, Any], 258 | channel_ids: List[str], 259 | is_translated: bool, 260 | ) -> List[List[Any]]: 261 | """Process the YouTube response to extract the required information. 262 | 263 | Args: 264 | response: The YouTube channels list response 265 | https://developers.google.com/youtube/v3/docs/channels/list#response 266 | channel_ids: A list of the channel IDs passed in the request 267 | is_translated: A flag showing whether YouTube channel title should be translated or not 268 | 269 | Returns: 270 | A list of dicts where each dict represents data from one channel 271 | """ 272 | logger.info('Processing youtube response') 273 | data = [] 274 | if response.get('pageInfo').get('totalResults') == 0: 275 | logger.warning('The YouTube response has no results: %s', response) 276 | logger.warning(channel_ids) 277 | return data 278 | 279 | for channel in response['items']: 280 | title = channel.get('snippet').get('title', '') 281 | if is_translated: 282 | title_language, confidence = detect_language(title) 283 | else: 284 | title_language = '' 285 | confidence = 0 286 | data.append([ 287 | channel.get('id'), 288 | channel.get('statistics').get('viewCount', None), 289 | channel.get('statistics').get('subscriberCount', None), 290 | channel.get('statistics').get('videoCount', None), 291 | title, 292 | title_language, 293 | confidence, 294 | channel.get('snippet').get('country', ''), 295 | ]) 296 | return data 297 | 298 | 299 | def get_translate_filter( 300 | sheet_id: str, 301 | credentials: google.auth.credentials.Credentials 302 | ) -> bool: 303 | """Get the filter for YouTube channel title translation. 304 | 305 | Args: 306 | sheet_id: the ID of the Google Sheet containing the config. 307 | credentials: Google Auth credentials 308 | 309 | Returns: 310 | True if filter is enabled, False otherwise 311 | """ 312 | logger.info('Getting config from sheet %s', sheet_id) 313 | 314 | sheets_service = build('sheets', 'v4', credentials=credentials) 315 | sheet = sheets_service.spreadsheets() 316 | 317 | result = sheet.values().get( 318 | spreadsheetId=sheet_id, 319 | range='yt_translation_filter').execute().get('values', [['Disabled']])[0][0] 320 | 321 | is_enabled = True if result == 'Enabled' else False 322 | logger.info('Translation filter enabled is %s', is_enabled) 323 | 324 | return is_enabled 325 | 326 | 327 | def detect_language(text: str) -> Tuple[str, float]: 328 | """Detects the text's language. 329 | 330 | Args: 331 | text: the text to base the translation off of 332 | 333 | Returns: 334 | A tuple containing the language and the confidence. 335 | """ 336 | logger.debug('Detecting language for %s', text) 337 | translate_client = translate.Client() 338 | result = translate_client.detect_language(text) 339 | return result['language'], result['confidence'] 340 | 341 | 342 | def write_results_to_gcs(youtube_df: pd.DataFrame, customer_id: str) -> None: 343 | """Write the YouTube dataframe to GCS as a CSV file. 344 | 345 | Historical data is preserved so all file writes have a UUID appended to it. 346 | 347 | Args: 348 | youtube_df: the dataframe based on the YouTube data. 349 | customer_id: the customer ID to fetch the Google Ads data for. 350 | """ 351 | logger.info('Writing results to GCS: %s', APE_GCS_DATA_BUCKET) 352 | number_of_rows = len(youtube_df.index) 353 | logger.info('There are %s rows', number_of_rows) 354 | if number_of_rows > 0: 355 | uuid_str = str(uuid.uuid4()) 356 | blob_name = f'youtube_channel/{customer_id}-{uuid_str}.csv' 357 | logger.info('Blob name: %s', blob_name) 358 | gcs.upload_blob_from_df( 359 | df=youtube_df, 360 | blob_name=blob_name, 361 | bucket=APE_GCS_DATA_BUCKET) 362 | logger.info('Blob uploaded to GCS') 363 | else: 364 | logger.info('There is nothing to write to GCS') 365 | 366 | 367 | def send_messages_to_pubsub(customer_id: str, sheet_id: str) -> None: 368 | """Push the customer ID to pub/sub when the job completes. 369 | 370 | Args: 371 | customer_id: the customer ID to fetch the Google Ads data for. 372 | sheet_id: the ID of the Google Sheet containing the config. 373 | """ 374 | message_dict = { 375 | 'customer_id': customer_id, 376 | 'sheet_id': sheet_id, 377 | } 378 | logger.info('Sending message to pub/sub:', message_dict) 379 | pubsub.send_dict_to_pubsub( 380 | message_dict=message_dict, 381 | topic=APE_ADS_EXCLUDER_PUBSUB_TOPIC, 382 | gcp_project=GOOGLE_CLOUD_PROJECT) 383 | logger.info('Message published') 384 | --------------------------------------------------------------------------------