├── src
    ├── google_ads_report
    │   ├── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── pubsub.py
    │   │   └── gcs.py
    │   ├── requirements_dev.txt
    │   ├── requirements.txt
    │   ├── bq_schema.json
    │   ├── README.md
    │   ├── main_test.py
    │   └── main.py
    ├── youtube_channel
    │   ├── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── pubsub.py
    │   │   └── gcs.py
    │   ├── requirements_dev.txt
    │   ├── requirements.txt
    │   ├── bq_schema.json
    │   ├── main_test.py
    │   ├── README.md
    │   └── main.py
    ├── google_ads_accounts
    │   ├── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── pubsub.py
    │   ├── requirements_dev.txt
    │   ├── requirements.txt
    │   ├── main_test.py
    │   ├── README.md
    │   └── main.py
    ├── google_ads_excluder
    │   ├── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── gcs.py
    │   ├── requirements_dev.txt
    │   ├── requirements.txt
    │   ├── bq_schema.json
    │   ├── README.md
    │   └── main.py
    └── reporting
    │   ├── README.md
    │   └── exclusions_report.sql
├── terraform
    ├── backend.tf
    ├── outputs.tf
    ├── variables.tf
    └── main.tf
├── docs
    ├── images
    │   ├── cloud-shell.png
    │   ├── oauth-configuration.png
    │   ├── ape-architecture-diagram.png
    │   ├── ape-datastudio-report-example.png
    │   ├── ape-account-service-architecture-diagram.png
    │   ├── ape-report-service-architecture-diagram.png
    │   ├── ape-youtube-service-architecture-diagram.png
    │   └── ape-excluder-service-architecture-diagram.png
    ├── reporting.md
    ├── deployment.md
    └── architecture.md
├── .gitignore
├── contributing.md
├── README.md
└── LICENSE


/src/google_ads_report/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/youtube_channel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/google_ads_accounts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/google_ads_excluder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/youtube_channel/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/google_ads_accounts/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/google_ads_excluder/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/google_ads_report/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/google_ads_report/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | 
3 | functions-framework==3.1.0
4 | 


--------------------------------------------------------------------------------
/src/youtube_channel/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | 
3 | functions-framework==3.1.0
4 | 


--------------------------------------------------------------------------------
/src/google_ads_accounts/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | 
3 | functions-framework==3.1.0
4 | 


--------------------------------------------------------------------------------
/src/google_ads_excluder/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | 
3 | functions-framework==3.1.0
4 | 


--------------------------------------------------------------------------------
/terraform/backend.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 |   backend "gcs" {
3 |     prefix = "terraform/state"
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/images/cloud-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/cloud-shell.png


--------------------------------------------------------------------------------
/terraform/outputs.tf:
--------------------------------------------------------------------------------
1 | output "service_account_email" {
2 |   value = google_service_account.service_account.email
3 | }
4 | 


--------------------------------------------------------------------------------
/docs/images/oauth-configuration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/oauth-configuration.png


--------------------------------------------------------------------------------
/docs/images/ape-architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-architecture-diagram.png


--------------------------------------------------------------------------------
/docs/images/ape-datastudio-report-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-datastudio-report-example.png


--------------------------------------------------------------------------------
/src/google_ads_report/requirements.txt:
--------------------------------------------------------------------------------
1 | google-ads==18.0.0
2 | google-cloud-pubsub==2.13.4
3 | google-cloud-storage==2.5.0
4 | jsonschema==4.9.1
5 | pandas==1.4.3
6 | 


--------------------------------------------------------------------------------
/docs/images/ape-account-service-architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-account-service-architecture-diagram.png


--------------------------------------------------------------------------------
/docs/images/ape-report-service-architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-report-service-architecture-diagram.png


--------------------------------------------------------------------------------
/docs/images/ape-youtube-service-architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-youtube-service-architecture-diagram.png


--------------------------------------------------------------------------------
/docs/images/ape-excluder-service-architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-excluder-service-architecture-diagram.png


--------------------------------------------------------------------------------
/src/google_ads_accounts/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask==2.1.3
2 | google-auth-httplib2==0.1.0
3 | google-auth-oauthlib==0.5.2
4 | google-api-python-client==2.55.0
5 | google-cloud-pubsub==2.13.4
6 | jsonschema==4.9.1
7 | pydata-google-auth==1.4.0
8 | 


--------------------------------------------------------------------------------
/src/google_ads_excluder/requirements.txt:
--------------------------------------------------------------------------------
 1 | google-ads==18.0.0
 2 | google-auth-httplib2==0.1.0
 3 | google-auth-oauthlib==0.5.2
 4 | google-api-python-client==2.55.0
 5 | google-cloud-bigquery==3.3.0
 6 | google-cloud-pubsub==2.13.4
 7 | google-cloud-storage==2.2.1
 8 | jsonschema==4.9.1
 9 | pandas==1.4.3
10 | 


--------------------------------------------------------------------------------
/src/youtube_channel/requirements.txt:
--------------------------------------------------------------------------------
 1 | google-api-python-client==2.55.0
 2 | google-auth==1.35.0
 3 | google-auth-httplib2==0.1.0
 4 | google-auth-oauthlib==0.5.2
 5 | google-cloud-bigquery==3.3.2
 6 | google-cloud-pubsub==2.13.6
 7 | google-cloud-storage==2.2.1
 8 | google-cloud-translate==2.0.1
 9 | jsonschema==4.9.1
10 | numpy==1.23.1
11 | pandas==1.4.3
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Temp deployment
 2 | .temp/
 3 | .zip
 4 | 
 5 | # Virtual envs
 6 | env/
 7 | venv/
 8 | venvs/
 9 | 
10 | # IDEs
11 | .idea/
12 | 
13 | # Terraform
14 | **/*.tfvars
15 | **/.terraform/*
16 | *.tfstate
17 | *.tfstate.*
18 | .terraform.lock.hcl
19 | out/
20 | 
21 | # Python
22 | __pycache__
23 | 
24 | # Google Cloud credentials
25 | creds.json
26 | 
27 | # OS
28 | .DS_STORE
29 | 


--------------------------------------------------------------------------------
/src/google_ads_excluder/bq_schema.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "name": "channel_id",
 4 |     "type": "STRING",
 5 |     "mode": "REQUIRED",
 6 |     "description": "The YouTube Channel ID"
 7 |   },
 8 |   {
 9 |     "name": "customer_id",
10 |     "type": "STRING",
11 |     "mode": "REQUIRED",
12 |     "description": "The Google Ads Customer ID where this placement originated"
13 |   },
14 |   {
15 |     "name": "datetime_updated",
16 |     "type": "TIMESTAMP",
17 |     "mode": "REQUIRED",
18 |     "description": "The datetime the exclusion was made"
19 |   }
20 | ]
21 | 


--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement (CLA). You (or your employer) retain the copyright to your
10 | contribution; this simply gives us permission to use and redistribute your
11 | contributions as part of the project. Head over to
12 | <https://cla.developers.google.com/> to see your current agreements on file or
13 | to sign a new one.
14 | 
15 | You generally only need to submit a CLA once, so if you've already submitted one
16 | (even if it was for a different project), you probably don't need to do it
17 | again.
18 | 
19 | ## Code Reviews
20 | 
21 | All submissions, including submissions by project members, require review. We
22 | use GitHub pull requests for this purpose. Consult
23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
24 | information on using pull requests.
25 | 
26 | ## Community Guidelines
27 | 
28 | This project follows
29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
30 | 


--------------------------------------------------------------------------------
/terraform/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "project_id" {
 2 |   type        = string
 3 |   description = "The project ID to deploy the resources to"
 4 | }
 5 | 
 6 | variable "region" {
 7 |   type        = string
 8 |   description = "The region to deploy the resources to, e.g. europe-west2"
 9 |   default     = "europe-west2"
10 | }
11 | 
12 | variable "oauth_refresh_token" {
13 |   type        = string
14 |   description = "The OAuth refresh token"
15 | }
16 | 
17 | variable "google_cloud_client_id" {
18 |   type        = string
19 |   description = "The client ID from Google Cloud"
20 | }
21 | 
22 | variable "google_cloud_client_secret" {
23 |   type        = string
24 |   description = "The client secret from Google Cloud"
25 | }
26 | 
27 | variable "google_ads_developer_token" {
28 |   type        = string
29 |   description = "The Google Ads developer token"
30 | }
31 | 
32 | variable "google_ads_login_customer_id" {
33 |   type        = string
34 |   description = "The Google Ads MCC customer ID with no dashes"
35 | }
36 | 
37 | variable "config_sheet_id" {
38 |   type        = string
39 |   description = "The Google Sheeet ID containing the config"
40 | }
41 | 
42 | variable "bq_dataset" {
43 |   type        = string
44 |   description = "The name of the BQ dataset"
45 |   default     = "ads_placement_excluder"
46 | }
47 | 


--------------------------------------------------------------------------------
/src/reporting/README.md:
--------------------------------------------------------------------------------
 1 | # Ads Placement Excluder Reporting
 2 | 
 3 | This code is used to build a DataStudio dashboard to provide visibility into the
 4 | Ads Placement Excluder solution.
 5 | 
 6 | ## Disclaimers
 7 | __This is not an officially supported Google product.__
 8 | 
 9 | Copyright 2022 Google LLC. This solution, including any related sample code or
10 | data, is made available on an “as is,” “as available,” and “with all faults”
11 | basis, solely for illustrative purposes, and without warranty or representation
12 | of any kind. This solution is experimental, unsupported and provided solely for
13 | your convenience. Your use of it is subject to your agreements with Google, as
14 | applicable, and may constitute a beta feature as defined under those agreements.
15 | To the extent that you make any data available to Google in connection with your
16 | use of the solution, you represent and warrant that you have all necessary and
17 | appropriate rights, consents and permissions to permit Google to use and process
18 | that data. By using any portion of this solution, you acknowledge, assume and
19 | accept all risks, known and unknown, associated with its usage, including with
20 | respect to your deployment of any portion of this solution in your systems, or
21 | usage in connection with your business, if at all.
22 | 


--------------------------------------------------------------------------------
/src/youtube_channel/bq_schema.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "name": "channel_id",
 4 |     "type": "STRING",
 5 |     "mode": "REQUIRED",
 6 |     "description": "The YouTube Channel ID"
 7 |   },
 8 |   {
 9 |     "name": "view_count",
10 |     "type": "INT64",
11 |     "mode": "NULLABLE",
12 |     "description": "The number of views the channel has"
13 |   },
14 |   {
15 |     "name": "video_count",
16 |     "type": "INT64",
17 |     "mode": "NULLABLE",
18 |     "description": "The number of videos the channel has uploaded to it"
19 |   },
20 |   {
21 |     "name": "subscriber_count",
22 |     "type": "INT64",
23 |     "mode": "NULLABLE",
24 |     "description": "The number of subscribers the channel has"
25 |   },
26 |   {
27 |     "name": "title",
28 |     "type": "STRING",
29 |     "mode": "NULLABLE",
30 |     "description": "The title of the YouTube channel"
31 |   },
32 |   {
33 |     "name": "title_language",
34 |     "type": "STRING",
35 |     "mode": "NULLABLE",
36 |     "description": "The predicted language of the title"
37 |   },
38 |   {
39 |     "name": "title_language_confidence",
40 |     "type": "FLOAT64",
41 |     "mode": "NULLABLE",
42 |     "description": "The confidence of the prediction"
43 |   },
44 |   {
45 |     "name": "country",
46 |     "type": "STRING",
47 |     "mode": "NULLABLE",
48 |     "description": "The country the channel is from"
49 |   },
50 |   {
51 |     "name": "datetime_updated",
52 |     "type": "TIMESTAMP",
53 |     "mode": "REQUIRED",
54 |     "description": "The datetime the data was pulled from YouTube"
55 |   }
56 | ]
57 | 


--------------------------------------------------------------------------------
/src/youtube_channel/utils/pubsub.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for sending messages to Pub/sub."""
15 | import json
16 | from typing import Any, Dict
17 | from google.cloud import pubsub_v1
18 | 
19 | 
20 | def send_dict_to_pubsub(message_dict: Dict[str, Any],
21 |                         topic: str,
22 |                         gcp_project: str) -> None:
23 |     """Push the dictionary to pubsub.
24 | 
25 |     Args:
26 |         message_dict: the message as a dictionary to push to pubsub
27 |         topic: the name of the topic to publish the message to
28 |         gcp_project: the Google Cloud Project with the pub/sub topic in
29 |     """
30 | 
31 |     publisher = pubsub_v1.PublisherClient()
32 |     # The `topic_path` method creates a fully qualified identifier
33 |     # in the form `projects/{project_id}/topics/{topic_id}`
34 |     topic_path = publisher.topic_path(gcp_project, topic)
35 |     message_str = json.dumps(message_dict)
36 |     # Data must be a bytestring
37 |     data = message_str.encode('utf-8')
38 |     publisher.publish(topic_path, data)
39 | 


--------------------------------------------------------------------------------
/src/google_ads_report/utils/pubsub.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for sending messages to Pub/sub."""
15 | import json
16 | from typing import Any, Dict
17 | from google.cloud import pubsub_v1
18 | 
19 | 
20 | def send_dict_to_pubsub(message_dict: Dict[str, Any],
21 |                         topic: str,
22 |                         gcp_project: str) -> None:
23 |     """Push the dictionary to pubsub.
24 | 
25 |     Args:
26 |         message_dict: the message as a dictionary to push to pubsub
27 |         topic: the name of the topic to publish the message to
28 |         gcp_project: the Google Cloud Project with the pub/sub topic in
29 |     """
30 | 
31 |     publisher = pubsub_v1.PublisherClient()
32 |     # The `topic_path` method creates a fully qualified identifier
33 |     # in the form `projects/{project_id}/topics/{topic_id}`
34 |     topic_path = publisher.topic_path(gcp_project, topic)
35 |     message_str = json.dumps(message_dict)
36 |     # Data must be a bytestring
37 |     data = message_str.encode('utf-8')
38 |     publisher.publish(topic_path, data)
39 | 


--------------------------------------------------------------------------------
/src/reporting/exclusions_report.sql:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | -- Remove duplicate rows from YouTube, pulling only the last updated data
16 | WITH
17 |   YouTube AS (
18 |     SELECT *
19 |     FROM `${BQ_DATASET}.YouTubeChannel`
20 |     WHERE true
21 |     QUALIFY ROW_NUMBER() OVER (PARTITION BY channel_id ORDER BY datetime_updated DESC) = 1
22 |   )
23 | SELECT DISTINCT
24 |   Excluded.datetime_updated AS excluded_datetime,
25 |   Excluded.channel_id,
26 |   Ads.placement_target_url,
27 |   Excluded.customer_id,
28 |   YouTube.view_count,
29 |   YouTube.video_count,
30 |   YouTube.subscriber_count,
31 |   YouTube.title,
32 |   YouTube.title_language,
33 |   YouTube.title_language_confidence,
34 |   YouTube.country,
35 |   Ads.impressions,
36 |   Ads.cost_micros,
37 |   Ads.conversions,
38 |   Ads.video_view_rate,
39 |   Ads.video_views,
40 |   Ads.clicks,
41 |   Ads.average_cpm,
42 |   Ads.ctr,
43 |   Ads.all_conversions_from_interactions_rate,
44 | FROM
45 |   `${BQ_DATASET}.GoogleAdsExclusion` AS Excluded
46 | LEFT JOIN
47 |   YouTube USING (channel_id)
48 | LEFT JOIN
49 |   `${BQ_DATASET}.GoogleAdsReport` AS Ads
50 |   USING (channel_id, customer_id)
51 | 


--------------------------------------------------------------------------------
/src/google_ads_accounts/main_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Unit tests for main.py"""
15 | import unittest
16 | from unittest.mock import MagicMock, patch
17 | import main
18 | 
19 | 
20 | class MainTestCase(unittest.TestCase):
21 | 
22 |     @patch('main.run')
23 |     def test_main(self, mock_run):
24 |         mock_request = MagicMock()
25 |         mock_request.get_json.return_value = {}
26 |         response = main.main(mock_request)
27 |         self.assertEqual(response.status_code, 400)
28 |         mock_run.assert_not_called()
29 |         mock_request.get_json.return_value = {
30 |             'sheet_id': '12345',
31 |         }
32 |         response = main.main(mock_request)
33 |         self.assertEqual(response.status_code, 200)
34 |         mock_run.assert_called_once()
35 | 
36 |     def test_gads_filters_to_sql_string(self):
37 |         config_filters = [['impressions', '>', '1']]
38 |         gaql = main.gads_filters_to_gaql_string(config_filters)
39 |         self.assertEqual(gaql, 'metrics.impressions > 1')
40 | 
41 |         config_filters = [['impressions', '>', '1'], ['clicks', '<', '50']]
42 |         gaql = main.gads_filters_to_gaql_string(config_filters)
43 |         self.assertEqual(gaql,
44 |                          'metrics.impressions > 1 AND metrics.clicks < 50')
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     unittest.main()
49 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Ads Placement Excluder
 2 | 
 3 | It is manual and challenging to detect YouTube channel placements which might be
 4 | spam (low performance with high cost), and exclude them from future advertising.
 5 | Google Ads does not currently provide enough granularity to identify all spam
 6 | channels.
 7 | 
 8 | Ads Placement Excluder allows an advertiser, to define what their interpretation
 9 | of a spam channel is, and it will leverage the Google Ads & YouTube APIs to
10 | automate identifying these placements, and exclude them from future advertising.
11 | 
12 | ## Architecture
13 | See [architecture.md](./docs/architecture.md).
14 | 
15 | ## Reporting
16 | The solution provides a DataStudio dashboard to monitor the solution. See
17 | [reporting.md](./docs/reporting.md) for more information.
18 | 
19 | ## Get Started
20 | See [deployment.md](./docs/deployment.md) for information on how to deploy the
21 | solution and get started.
22 | 
23 | ## Disclaimers
24 | __This is not an officially supported Google product.__
25 | 
26 | Copyright 2022 Google LLC. This solution, including any related sample code or
27 | data, is made available on an “as is,” “as available,” and “with all faults”
28 | basis, solely for illustrative purposes, and without warranty or representation
29 | of any kind. This solution is experimental, unsupported and provided solely for
30 | your convenience. Your use of it is subject to your agreements with Google, as
31 | applicable, and may constitute a beta feature as defined under those agreements.
32 | To the extent that you make any data available to Google in connection with your
33 | use of the solution, you represent and warrant that you have all necessary and
34 | appropriate rights, consents and permissions to permit Google to use and process
35 | that data. By using any portion of this solution, you acknowledge, assume and
36 | accept all risks, known and unknown, associated with its usage, including with
37 | respect to your deployment of any portion of this solution in your systems, or
38 | usage in connection with your business, if at all.
39 | 


--------------------------------------------------------------------------------
/src/google_ads_report/bq_schema.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "name": "datetime_updated",
 4 |     "type": "TIMESTAMP",
 5 |     "mode": "REQUIRED",
 6 |     "description": "The datetime the data was pulled from YouTube"
 7 |   },
 8 |   {
 9 |     "name": "customer_id",
10 |     "type": "STRING",
11 |     "mode": "REQUIRED",
12 |     "description": "The customer ID in Google Ads"
13 |   },
14 |   {
15 |     "name": "channel_id",
16 |     "type": "STRING",
17 |     "mode": "REQUIRED",
18 |     "description": "The YouTube Channel ID"
19 |   },
20 |   {
21 |     "name": "placement_target_url",
22 |     "type": "STRING",
23 |     "mode": "NULLABLE",
24 |     "description": "The URL of the placement"
25 |   },
26 |   {
27 |     "name": "impressions",
28 |     "type": "INT64",
29 |     "mode": "NULLABLE",
30 |     "description": "The number of impressions on the placement"
31 |   },
32 |   {
33 |     "name": "cost_micros",
34 |     "type": "INT64",
35 |     "mode": "NULLABLE",
36 |     "description": "The cost in micros"
37 |   },
38 |   {
39 |     "name": "conversions",
40 |     "type": "FLOAT64",
41 |     "mode": "NULLABLE",
42 |     "description": "The number of conversions"
43 |   },
44 |   {
45 |     "name": "video_view_rate",
46 |     "type": "FLOAT64",
47 |     "mode": "NULLABLE",
48 |     "description": "The video view rate"
49 |   },
50 |   {
51 |     "name": "video_views",
52 |     "type": "INT64",
53 |     "mode": "NULLABLE",
54 |     "description": "The number of video views"
55 |   },
56 |   {
57 |     "name": "clicks",
58 |     "type": "INT64",
59 |     "mode": "NULLABLE",
60 |     "description": "The number of clicks"
61 |   },
62 |   {
63 |     "name": "average_cpm",
64 |     "type": "FLOAT64",
65 |     "mode": "NULLABLE",
66 |     "description": "The average CPM"
67 |   },
68 |   {
69 |     "name": "ctr",
70 |     "type": "FLOAT64",
71 |     "mode": "NULLABLE",
72 |     "description": "The number of click through rate"
73 |   },
74 |   {
75 |     "name": "all_conversions_from_interactions_rate",
76 |     "type": "FLOAT64",
77 |     "mode": "NULLABLE",
78 |     "description": "The conversion rate"
79 |   }
80 | ]
81 | 


--------------------------------------------------------------------------------
/src/google_ads_accounts/utils/pubsub.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for sending messages to Pub/sub."""
15 | import json
16 | from typing import Any, Dict, List
17 | from google.cloud import pubsub_v1
18 | 
19 | 
20 | def send_dict_to_pubsub(message_dict: Dict[str, Any],
21 |                         topic: str,
22 |                         gcp_project: str) -> None:
23 |     """Push the dictionary to pubsub.
24 | 
25 |     Args:
26 |         message_dict: the message as a dictionary to push to pubsub
27 |         topic: the name of the topic to publish the message to
28 |         gcp_project: the Google Cloud Project with the pub/sub topic in
29 |     """
30 | 
31 |     publisher = pubsub_v1.PublisherClient()
32 |     # The `topic_path` method creates a fully qualified identifier
33 |     # in the form `projects/{project_id}/topics/{topic_id}`
34 |     topic_path = publisher.topic_path(gcp_project, topic)
35 |     message_str = json.dumps(message_dict)
36 |     # Data must be a bytestring
37 |     data = message_str.encode('utf-8')
38 |     publisher.publish(topic_path, data)
39 | 
40 | 
41 | def send_dicts_to_pubsub(messages: List[Dict[str, Any]],
42 |                          topic: str,
43 |                          gcp_project: str) -> None:
44 |     """Push each message in the list to pubsub.
45 | 
46 |     Args:
47 |         messages: a list of messages as dicts to push to pubsub
48 |         topic: the name of the topic to publish the message to
49 |         gcp_project: the Google Cloud Project with the pub/sub topic in
50 |     """
51 |     for message in messages:
52 |         send_dict_to_pubsub(
53 |             message_dict=message, topic=topic, gcp_project=gcp_project)
54 | 


--------------------------------------------------------------------------------
/docs/reporting.md:
--------------------------------------------------------------------------------
 1 | # Ads Placement Excluder Reporting
 2 | 
 3 | There is a DataStudio dashboard that can be used to monitor the behaviour of the
 4 | solution, and identify which channels are being excluded.
 5 | 
 6 | ![Google Ads Account Architecture Diagram](
 7 | ./images/ape-datastudio-report-example.png)
 8 | 
 9 | ## Get Started
10 | 
11 | 1. Make a copy of the template from [here](
12 |    https://datastudio.google.com/reporting/4a616bed-85e9-4794-a748-721051c10755)
13 |    to your Drive folder
14 | 2. While copying choose `ViewExclusions` as a new data source. `ViewExclusions`
15 |    view will be created automatically by Terraform after the first deployment.
16 |    a. If `ViewExclusions` does not appear in available data sources you need to
17 |    Create Data Source -> Big Query -> Your Project and find `ViewExclusions`
18 |    table there b. You can also add a custom data source to each chart in a chart
19 |    setup tab afterwards
20 | 3. Sometimes `customer_id` is auto-defined as a date leading to the chart
21 |    configuration error. You can change the field type manually to number via
22 |    Resource -> Manage Data Sources -> Edit
23 | 4. You can adjust charts and filters according to your needs
24 | 
25 | ## Disclaimers
26 | 
27 | __This is not an officially supported Google product.__
28 | 
29 | Copyright 2022 Google LLC. This solution, including any related sample code or
30 | data, is made available on an “as is,” “as available,” and “with all faults”
31 | basis, solely for illustrative purposes, and without warranty or representation
32 | of any kind. This solution is experimental, unsupported and provided solely for
33 | your convenience. Your use of it is subject to your agreements with Google, as
34 | applicable, and may constitute a beta feature as defined under those agreements.
35 | To the extent that you make any data available to Google in connection with your
36 | use of the solution, you represent and warrant that you have all necessary and
37 | appropriate rights, consents and permissions to permit Google to use and process
38 | that data. By using any portion of this solution, you acknowledge, assume and
39 | accept all risks, known and unknown, associated with its usage, including with
40 | respect to your deployment of any portion of this solution in your systems, or
41 | usage in connection with your business, if at all.
42 | 


--------------------------------------------------------------------------------
/src/google_ads_report/utils/gcs.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for working with Google Cloud Storage."""
15 | from google.cloud.storage.client import Client
16 | from google.cloud.storage.blob import Blob
17 | import pandas as pd
18 | 
19 | 
20 | def upload_blob_from_df(df: pd.DataFrame, bucket: str, blob_name: str) -> Blob:
21 |     """Upload a Pandas DataFrame to a Google Clous Storage bucket.
22 | 
23 |     Args:
24 |         df: the Pandas dataframe to upload
25 |         bucket (str): Google Cloud Storage bucket.
26 |         blob_name (str): Google Cloud Storage blob name.
27 |     """
28 |     return upload_blob_from_string(
29 |         blob_string=df.to_csv(index=False),
30 |         blob_name=blob_name,
31 |         bucket=bucket)
32 | 
33 | 
34 | def upload_blob_from_string(
35 |         bucket: str, blob_string: str, blob_name: str, content_type='text/csv'
36 | ) -> Blob:
37 |     """Uploads a file to Google Cloud Storage.
38 | 
39 |     Args:
40 |         bucket (str): Google Cloud Storage bucket.
41 |         blob_string (str): The content of the blob.
42 |         blob_name (str): Google Cloud Storage blob name.
43 |         content_type (optional str): the content type of the string, e.g.
44 |             text/csv.
45 | 
46 |     Returns:
47 |         Blob: Newly created Google Cloud Storage file blob.
48 |     """
49 |     blob = create_blob(bucket, blob_name)
50 |     blob.upload_from_string(blob_string, content_type=content_type)
51 |     return blob
52 | 
53 | 
54 | def create_blob(bucket_name: str, blob_name: str) -> Blob:
55 |     """Creates a blob on Google Cloud Storage.
56 | 
57 |     Args:
58 |         bucket_name (str): Google Cloud Storage bucket.
59 |         blob_name (str): Google Cloud Storage blob name.
60 | 
61 |     Returns:
62 |           Blob: Google Cloud Storage file blob.
63 |     """
64 |     client = Client()
65 |     bucket = client.bucket(bucket_name)
66 |     blob = bucket.blob(blob_name)
67 |     return blob
68 | 


--------------------------------------------------------------------------------
/src/youtube_channel/utils/gcs.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for working with Google Cloud Storage."""
15 | from google.cloud.storage.client import Client
16 | from google.cloud.storage.blob import Blob
17 | import pandas as pd
18 | 
19 | 
20 | def upload_blob_from_df(df: pd.DataFrame, bucket: str, blob_name: str) -> Blob:
21 |     """Upload a Pandas DataFrame to a Google Cloud Storage bucket.
22 | 
23 |     Args:
24 |         df: the Pandas dataframe to upload
25 |         bucket (str): Google Cloud Storage bucket.
26 |         blob_name (str): Google Cloud Storage blob name.
27 |     """
28 |     return upload_blob_from_string(
29 |         blob_string=df.to_csv(index=False),
30 |         blob_name=blob_name,
31 |         bucket=bucket)
32 | 
33 | 
34 | def upload_blob_from_string(
35 |         bucket: str, blob_string: str, blob_name: str, content_type='text/csv'
36 | ) -> Blob:
37 |     """Uploads a file to Google Cloud Storage.
38 | 
39 |     Args:
40 |         bucket (str): Google Cloud Storage bucket.
41 |         blob_string (str): The content of the blob.
42 |         blob_name (str): Google Cloud Storage blob name.
43 |         content_type (optional str): the content type of the string, e.g.
44 |             text/csv.
45 | 
46 |     Returns:
47 |         Blob: Newly created Google Cloud Storage file blob.
48 |     """
49 |     blob = create_blob(bucket, blob_name)
50 |     blob.upload_from_string(blob_string, content_type=content_type)
51 |     return blob
52 | 
53 | 
54 | def create_blob(bucket_name: str, blob_name: str) -> Blob:
55 |     """Creates a blob on Google Cloud Storage.
56 | 
57 |     Args:
58 |         bucket_name (str): Google Cloud Storage bucket.
59 |         blob_name (str): Google Cloud Storage blob name.
60 | 
61 |     Returns:
62 |           Blob: Google Cloud Storage file blob.
63 |     """
64 |     client = Client()
65 |     bucket = client.bucket(bucket_name)
66 |     blob = bucket.blob(blob_name)
67 |     return blob
68 | 


--------------------------------------------------------------------------------
/src/google_ads_excluder/utils/gcs.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for working with Google Cloud Storage."""
15 | from google.cloud.storage.client import Client
16 | from google.cloud.storage.blob import Blob
17 | import pandas as pd
18 | 
19 | 
20 | def upload_blob_from_df(df: pd.DataFrame, bucket: str, blob_name: str) -> Blob:
21 |     """Upload a Pandas DataFrame to a Google Clous Storage bucket.
22 | 
23 |     Args:
24 |         df: the Pandas dataframe to upload
25 |         bucket (str): Google Cloud Storage bucket.
26 |         blob_name (str): Google Cloud Storage blob name.
27 |     """
28 |     return upload_blob_from_string(
29 |         blob_string=df.to_csv(index=False),
30 |         blob_name=blob_name,
31 |         bucket=bucket)
32 | 
33 | 
34 | def upload_blob_from_string(
35 |         bucket: str, blob_string: str, blob_name: str, content_type='text/csv'
36 | ) -> Blob:
37 |     """Uploads a file to Google Cloud Storage.
38 | 
39 |     Args:
40 |         bucket (str): Google Cloud Storage bucket.
41 |         blob_string (str): The content of the blob.
42 |         blob_name (str): Google Cloud Storage blob name.
43 |         content_type (optional str): the content type of the string, e.g.
44 |             text/csv.
45 | 
46 |     Returns:
47 |         Blob: Newly created Google Cloud Storage file blob.
48 |     """
49 |     blob = create_blob(bucket, blob_name)
50 |     blob.upload_from_string(blob_string, content_type=content_type)
51 |     return blob
52 | 
53 | 
54 | def create_blob(bucket_name: str, blob_name: str) -> Blob:
55 |     """Creates a blob on Google Cloud Storage.
56 | 
57 |     Args:
58 |         bucket_name (str): Google Cloud Storage bucket.
59 |         blob_name (str): Google Cloud Storage blob name.
60 | 
61 |     Returns:
62 |           Blob: Google Cloud Storage file blob.
63 |     """
64 |     client = Client()
65 |     bucket = client.bucket(bucket_name)
66 |     blob = bucket.blob(blob_name)
67 |     return blob
68 | 


--------------------------------------------------------------------------------
/src/google_ads_report/README.md:
--------------------------------------------------------------------------------
 1 | # Google Ads Reporting Service
 2 | 
 3 | This service is responsible for running a report from Google Ads based on the
 4 | [group_placement_view](
 5 | https://developers.google.com/google-ads/api/fields/v11/group_placement_view),
 6 | with the configured filters, and outputting that as a CSV to a Cloud Storage
 7 | bucket, with a BigQuery table in front of it. The data pulled from the report is
 8 | filtered to only have YouTube channels.
 9 | 
10 | ## Local Deployment
11 | To run the code ensure the following environment variables are set:
12 | 
13 | ```
14 | export GOOGLE_ADS_USE_PROTO_PLUS=false
15 | export GOOGLE_ADS_REFRESH_TOKEN=
16 | export GOOGLE_ADS_CLIENT_ID=
17 | export GOOGLE_ADS_CLIENT_SECRET=
18 | export GOOGLE_ADS_DEVELOPER_TOKEN=
19 | export GOOGLE_ADS_LOGIN_CUSTOMER_ID=
20 | export GOOGLE_CLOUD_PROJECT=
21 | export APE_YOUTUBE_PUBSUB_TOPIC=
22 | export APE_GCS_DATA_BUCKET=
23 | ```
24 | 
25 | Next install the dev requirements:
26 | 
27 | ```
28 | pip install -r requirements_dev.txt
29 | ```
30 | 
31 | Then start the server by running:
32 | 
33 | ```
34 | functions-framework --target=main --signature-type=event --port=8080
35 | ```
36 | 
37 | You can then make a post request by running the following:
38 | 
39 | ```
40 | curl localhost:8080 \
41 |   -X POST \
42 |   -H "Content-Type: application/json" \
43 |   -d "{ \"data\": { \"data\": \"$(echo '{ "customer_id": "1234567890", "lookback_days": 90, "gads_filters": "metrics.impressions > 0", "sheet_id": "abcdefghijklmnop-mk"}' | base64)\" }}"
44 | ```
45 | 
46 | ### Mac users
47 | 
48 | You may need to set this environment variable for the Google Ads report stream
49 | to work, [see Github for more info](https://github.com/rails/rails/issues/38560).
50 | 
51 | ```
52 | export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
53 | ```
54 | 
55 | ## Disclaimers
56 | __This is not an officially supported Google product.__
57 | 
58 | Copyright 2022 Google LLC. This solution, including any related sample code or
59 | data, is made available on an “as is,” “as available,” and “with all faults”
60 | basis, solely for illustrative purposes, and without warranty or representation
61 | of any kind. This solution is experimental, unsupported and provided solely for
62 | your convenience. Your use of it is subject to your agreements with Google, as
63 | applicable, and may constitute a beta feature as defined under those agreements.
64 | To the extent that you make any data available to Google in connection with your
65 | use of the solution, you represent and warrant that you have all necessary and
66 | appropriate rights, consents and permissions to permit Google to use and process
67 | that data. By using any portion of this solution, you acknowledge, assume and
68 | accept all risks, known and unknown, associated with its usage, including with
69 | respect to your deployment of any portion of this solution in your systems, or
70 | usage in connection with your business, if at all.
71 | 


--------------------------------------------------------------------------------
/src/google_ads_accounts/README.md:
--------------------------------------------------------------------------------
 1 | # Google Ads Account Function
 2 | 
 3 | This service is responsible for deciding which Google Ads accounts the Ads
 4 | Placement Excluder solution should run for, and kicking off the downstream
 5 | pipeline. Each account is pushed as a separate message into the topic to enable
 6 | concurrency.
 7 | 
 8 | ## Local Deployment
 9 | To run the code ensure the following environment variables are set:
10 | 
11 | ```
12 | export GOOGLE_CLOUD_PROJECT=ads-placement-excluder
13 | export APE_ADS_REPORT_PUBSUB_TOPIC=ads-report-topic
14 | ```
15 | 
16 | The code uses [Google Application Default credentials](
17 | https://google-auth.readthedocs.io/en/master/reference/google.auth.html) for
18 | auth.
19 | 
20 | First create OAuth desktop credentials in Google Cloud, and download the client
21 | ID and client secret as a JSON file.
22 | 
23 | Then run the following command, updating the path to point to the JSON file
24 | downloaded in the previous step:
25 | ```
26 | gcloud auth application-default login \
27 |   --scopes='https://www.googleapis.com/auth/spreadsheets.readonly,https://www.googleapis.com/auth/cloud-platform' \
28 |   --client-id-file=/path/to/client-id-file.json
29 | ```
30 | [Optionally] [see this article](
31 | https://medium.com/google-cloud/google-oauth-credential-going-deeper-the-hard-way-f403cf3edf9d)
32 | for a detailed explanation, why this is needed.
33 | 
34 | Next install the dev requirements:
35 | 
36 | ```
37 | pip install -r requirements_dev.txt
38 | ```
39 | 
40 | Then start the server by running:
41 | 
42 | ```
43 | functions-framework --target=main --port=8080
44 | ```
45 | 
46 | You can then make a post request by running the following:
47 | 
48 | ```
49 | curl localhost:8080 \
50 |   -X POST \
51 |   -H "Content-Type: application/json" \
52 |   -d '{"sheet_id": "12g3IoIP4Lk_UU3xtJsIiCSDxjNAn30vT4lOzSZPS-mk"}'
53 | ```
54 | 
55 | ## Disclaimers
56 | __This is not an officially supported Google product.__
57 | 
58 | Copyright 2022 Google LLC. This solution, including any related sample code or
59 | data, is made available on an “as is,” “as available,” and “with all faults”
60 | basis, solely for illustrative purposes, and without warranty or representation
61 | of any kind. This solution is experimental, unsupported and provided solely for
62 | your convenience. Your use of it is subject to your agreements with Google, as
63 | applicable, and may constitute a beta feature as defined under those agreements.
64 | To the extent that you make any data available to Google in connection with your
65 | use of the solution, you represent and warrant that you have all necessary and
66 | appropriate rights, consents and permissions to permit Google to use and process
67 | that data. By using any portion of this solution, you acknowledge, assume and
68 | accept all risks, known and unknown, associated with its usage, including with
69 | respect to your deployment of any portion of this solution in your systems, or
70 | usage in connection with your business, if at all.
71 | 


--------------------------------------------------------------------------------
/src/youtube_channel/main_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Unit tests for main.py"""
15 | import unittest
16 | import numpy as np
17 | import pandas as pd
18 | import main
19 | 
20 | 
21 | class MainTestCase(unittest.TestCase):
22 | 
23 |     def test_sanitise_youtube_dataframe(self):
24 |         columns = [
25 |             'title',
26 |             'view_count',
27 |             'video_count',
28 |             'subscriber_count',
29 |             'title_language_confidence',
30 |         ]
31 |         raw_data = [
32 |             ['String with a new line \n', '10', '1', '3', '0.56'],
33 |             ['String, with, commas in,it', '10', '1', '3', '0.56'],
34 |             ['String with "double quotes" in it', '10', '1', '3', '0.56'],
35 |             ["String with 'single quotes' in it", '10', '1', '3', '0.56'],
36 |             ['  String with white space    ', '10', '1', '3', '0.56'],
37 |             ['String with $\r\t\n;:,', '10', '1', '3', '0.56'],
38 |             ['Строка написана на русском языке', '10', '1', '3', '0.56'],
39 |             ['用中文寫的字符串', '10', '1', '3', '0.56'],
40 |         ]
41 |         expected_data = [
42 |             ['String with a new line', 10, 1, 3, 0.56],
43 |             ['String with commas init', 10, 1, 3, 0.56],
44 |             ['String with double quotes in it', 10, 1, 3, 0.56],
45 |             ['String with single quotes in it', 10, 1, 3, 0.56],
46 |             ['String with white space', 10, 1, 3, 0.56],
47 |             ['String with', 10, 1, 3, 0.56],
48 |             ['Строка написана на русском языке', 10, 1, 3, 0.56],
49 |             ['用中文寫的字符串', 10, 1, 3, 0.56],
50 |         ]
51 |         raw_df = pd.DataFrame(data=raw_data, columns=columns)
52 |         expected_df = pd.DataFrame(data=expected_data, columns=columns)
53 |         response_df = main.sanitise_youtube_dataframe(raw_df)
54 |         pd.testing.assert_frame_equal(expected_df, response_df)
55 | 
56 |     def test_split_list_to_chunks(self):
57 |         lst = np.arange(150)
58 |         max_chunk_size = 50
59 |         chunks = main.split_list_to_chunks(lst, max_chunk_size)
60 |         self.assertEqual(len(chunks), 3)
61 |         self.assertEqual(len(chunks[0]), 50)
62 |         self.assertEqual(len(chunks[1]), 50)
63 |         self.assertEqual(len(chunks[2]), 50)
64 | 
65 |         lst = np.arange(151)
66 |         max_chunk_size = 50
67 |         chunks = main.split_list_to_chunks(lst, max_chunk_size)
68 |         self.assertEqual(len(chunks), 4)
69 |         self.assertTrue(len(chunks[0]) < 50)
70 |         self.assertTrue(len(chunks[1]) < 50)
71 |         self.assertTrue(len(chunks[2]) < 50)
72 |         self.assertTrue(len(chunks[3]) < 50)
73 | 


--------------------------------------------------------------------------------
/src/google_ads_excluder/README.md:
--------------------------------------------------------------------------------
 1 | # Google Ads Exclusion service
 2 | 
 3 | The Google Ads Excluder service is responsible for applying the filters in the
 4 | config Google Sheet to the data, to determine which channels should be excluded
 5 | in Google Ads. Channels identified for exclusion are then uploaded to the shared
 6 | placement list in Google Ads, and the output written to BigQuery for reporting.
 7 | 
 8 | ## Local Deployment
 9 | To run the code ensure the following environment variables are set:
10 | 
11 | ```
12 | export GOOGLE_CLOUD_PROJECT=
13 | export APE_BIGQUERY_DATASET=
14 | export APE_EXCLUSION_VALIDATE_ONLY=
15 | export APE_GCS_DATA_BUCKET=
16 | export GOOGLE_ADS_USE_PROTO_PLUS=false
17 | export GOOGLE_ADS_REFRESH_TOKEN=
18 | export GOOGLE_ADS_CLIENT_ID=
19 | export GOOGLE_ADS_CLIENT_SECRET=
20 | export GOOGLE_ADS_DEVELOPER_TOKEN=
21 | export GOOGLE_ADS_LOGIN_CUSTOMER_ID=
22 | ```
23 | 
24 | The code uses [Google Application Default credentials](
25 | https://google-auth.readthedocs.io/en/master/reference/google.auth.html) for
26 | auth.
27 | 
28 | First create OAuth desktop credentials in Google Cloud, and download the client
29 | ID and client secret as a JSON file.
30 | 
31 | Then run the following command, updating the path to point to the JSON file
32 | downloaded in the previous step:
33 | ```
34 | gcloud auth application-default login \
35 |   --scopes='https://www.googleapis.com/auth/spreadsheets.readonly,https://www.googleapis.com/auth/cloud-platform' \
36 |   --client-id-file=/path/to/client-id-file.json
37 | ```
38 | [Optionally] [see this article](
39 | https://medium.com/google-cloud/google-oauth-credential-going-deeper-the-hard-way-f403cf3edf9d)
40 | for a detailed explanation, why this is needed.
41 | 
42 | Next install the dev requirements:
43 | 
44 | ```
45 | pip install -r requirements_dev.txt
46 | ```
47 | 
48 | Start the function:
49 | 
50 | ```
51 | functions-framework --target=main --signature-type=event --port=8080
52 | ```
53 | 
54 | You can then make a post request by running the following:
55 | 
56 | ```
57 | curl localhost:8080 \
58 |   -X POST \
59 |   -H "Content-Type: application/json" \
60 |   -d "{ \"data\": { \"data\": \"$(echo '{ "customer_id": "1234567890", "sheet_id": "abcdefghijklmnop-mk" }' | base64)\" }}"
61 | ```
62 | 
63 | ## Disclaimers
64 | __This is not an officially supported Google product.__
65 | 
66 | Copyright 2022 Google LLC. This solution, including any related sample code or
67 | data, is made available on an “as is,” “as available,” and “with all faults”
68 | basis, solely for illustrative purposes, and without warranty or representation
69 | of any kind. This solution is experimental, unsupported and provided solely for
70 | your convenience. Your use of it is subject to your agreements with Google, as
71 | applicable, and may constitute a beta feature as defined under those agreements.
72 | To the extent that you make any data available to Google in connection with your
73 | use of the solution, you represent and warrant that you have all necessary and
74 | appropriate rights, consents and permissions to permit Google to use and process
75 | that data. By using any portion of this solution, you acknowledge, assume and
76 | accept all risks, known and unknown, associated with its usage, including with
77 | respect to your deployment of any portion of this solution in your systems, or
78 | usage in connection with your business, if at all.
79 | 


--------------------------------------------------------------------------------
/src/google_ads_report/main_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Unit tests for main.py"""
15 | import base64
16 | from datetime import datetime
17 | import json
18 | from typing import Any, Dict
19 | import unittest
20 | from unittest.mock import patch
21 | import jsonschema
22 | import main
23 | 
24 | 
25 | class MainTestCase(unittest.TestCase):
26 | 
27 |     def _create_event(self, data: Dict[str, Any]) -> Dict[str, Any]:
28 |         """A helper function for creating mock event data.
29 | 
30 |         Args:
31 |             data: a dictionary containing the event data.
32 |         """
33 |         return {
34 |             'data': base64.b64encode(json.dumps(data).encode('utf-8'))
35 |         }
36 | 
37 |     @patch('main.start_job')
38 |     def test_main(self, mock_start_job):
39 |         event = self._create_event({'abc': '123'})
40 |         with self.assertRaises(jsonschema.exceptions.ValidationError):
41 |             main.main(event, {})
42 |         mock_start_job.assert_not_called()
43 | 
44 |         event = self._create_event({'customer_id': '123'})
45 |         with self.assertRaises(jsonschema.exceptions.ValidationError):
46 |             main.main(event, {})
47 |         mock_start_job.assert_not_called()
48 | 
49 |         event = self._create_event({'lookback_days': 90})
50 |         with self.assertRaises(jsonschema.exceptions.ValidationError):
51 |             main.main(event, {})
52 |         mock_start_job.assert_not_called()
53 | 
54 |         event = self._create_event({
55 |             'sheet_id': 'abcdefghijklmnop-mk',
56 |             'customer_id': '123',
57 |             'lookback_days': 90,
58 |             'gads_filters': 'metrics.clicks > 10',
59 |         })
60 |         main.main(event, {})
61 |         mock_start_job.assert_called_once()
62 | 
63 |     def test_get_query_dates(self):
64 |         today_str = '2022-07-01'
65 |         today = datetime.strptime(today_str, '%Y-%m-%d')
66 |         date_from, date_to = main.get_query_dates(90, today)
67 |         self.assertEqual(date_to, today_str)
68 |         self.assertEqual(date_from, '2022-04-02')
69 | 
70 |     @patch('main.get_query_dates')
71 |     def test_get_report_query(self, mock_get_query_dates):
72 |         mock_get_query_dates.return_value = ('2022-01-01', '2022-01-31')
73 |         lookback_days = 90
74 |         gads_filters = None
75 |         query = main.get_report_query(lookback_days, gads_filters)
76 |         query = query.strip()
77 |         # check it doesn't end in AND - this would be an invalid query
78 |         self.assertNotEqual('AND', query[-3:])
79 | 
80 |         gads_filters = 'metrics.clicks > 10'
81 |         query = main.get_report_query(lookback_days, gads_filters)
82 |         self.assertIn(gads_filters, query)
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     unittest.main()
87 | 


--------------------------------------------------------------------------------
/src/youtube_channel/README.md:
--------------------------------------------------------------------------------
 1 | # YouTube Channel Reporting Service
 2 | 
 3 | This service is responsible for pulling metrics about the YouTube channels from
 4 | the Google Ads Report. For example the number of views the channel has had, and
 5 | the number of subscribers. It then uses the Google Translate API (if enabled in
 6 | the config), to determine the language the YouTube channel title is in.
 7 | 
 8 | ## Google Translate API
 9 | This service leverages [Google's Translation API](
10 | https://cloud.google.com/translate/docs/basic/detecting-language), for detecting
11 | the language of the YouTube channel's title.
12 | 
13 | This is enabled/disabled in the configuration Google Sheet. See the
14 | [basic pricing](https://cloud.google.com/translate/pricing) for cost
15 | information.
16 | 
17 | ## Local Deployment
18 | To run the code ensure the following environment variables are set:
19 | 
20 | ```
21 | export GOOGLE_CLOUD_PROJECT=
22 | export APE_BIGQUERY_DATASET=
23 | export APE_GCS_DATA_BUCKET=
24 | ```
25 | 
26 | The code uses [Google Application Default credentials](
27 | https://google-auth.readthedocs.io/en/master/reference/google.auth.html) for
28 | auth.
29 | 
30 | First create OAuth desktop credentials in Google Cloud, and download the client
31 | ID and client secret as a JSON file.
32 | 
33 | Then run the following command, updating the path to point to the JSON file
34 | downloaded in the previous step:
35 | ```
36 | gcloud auth application-default login \
37 |   --scopes='https://www.googleapis.com/auth/spreadsheets.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/youtube' \
38 |   --client-id-file=/path/to/client-id-file.json
39 | ```
40 | [Optionally] [see this article](
41 | https://medium.com/google-cloud/google-oauth-credential-going-deeper-the-hard-way-f403cf3edf9d)
42 | for a detailed explanation, why this is needed.
43 | 
44 | Next install the dev requirements:
45 | 
46 | ```
47 | pip install -r requirements_dev.txt
48 | ```
49 | 
50 | Start the function:
51 | 
52 | ```
53 | functions-framework --target=main --signature-type=event --port=8080
54 | ```
55 | 
56 | You can then make a post request by running the following:
57 | 
58 | ```
59 | curl localhost:8080 \
60 |   -X POST \
61 |   -H "Content-Type: application/json" \
62 |   -d "{ \"data\": { \"data\": \"$(echo '{ "customer_id": "1234567890", "sheet_id": "abcdefghijklmnop-mk" }' | base64)\" }}"
63 | ```
64 | 
65 | ## Disclaimers
66 | __This is not an officially supported Google product.__
67 | 
68 | Copyright 2022 Google LLC. This solution, including any related sample code or
69 | data, is made available on an “as is,” “as available,” and “with all faults”
70 | basis, solely for illustrative purposes, and without warranty or representation
71 | of any kind. This solution is experimental, unsupported and provided solely for
72 | your convenience. Your use of it is subject to your agreements with Google, as
73 | applicable, and may constitute a beta feature as defined under those agreements.
74 | To the extent that you make any data available to Google in connection with your
75 | use of the solution, you represent and warrant that you have all necessary and
76 | appropriate rights, consents and permissions to permit Google to use and process
77 | that data. By using any portion of this solution, you acknowledge, assume and
78 | accept all risks, known and unknown, associated with its usage, including with
79 | respect to your deployment of any portion of this solution in your systems, or
80 | usage in connection with your business, if at all.
81 | 


--------------------------------------------------------------------------------
/src/google_ads_accounts/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     https://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Fetch the Google Ads configs and push them to pub/sub."""
 15 | import logging
 16 | import os
 17 | import sys
 18 | from typing import Any, List, Dict
 19 | import flask
 20 | import google.auth
 21 | from googleapiclient.discovery import build
 22 | import jsonschema
 23 | from utils import pubsub
 24 | 
 25 | 
 26 | logging.basicConfig(stream=sys.stdout)
 27 | logger = logging.getLogger(__name__)
 28 | logger.setLevel(logging.INFO)
 29 | 
 30 | # The Google Cloud project containing the pub/sub topic
 31 | GOOGLE_CLOUD_PROJECT = os.environ.get('GOOGLE_CLOUD_PROJECT')
 32 | # The name of the pub/sub topic
 33 | APE_ADS_REPORT_PUBSUB_TOPIC = os.environ.get('APE_ADS_REPORT_PUBSUB_TOPIC')
 34 | # The access scopes used in this function
 35 | SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
 36 | 
 37 | # The schema of the JSON in the request
 38 | request_schema = {
 39 |     'type': 'object',
 40 |     'properties': {
 41 |         'sheet_id': {'type': 'string'},
 42 |     },
 43 |     'required': ['sheet_id', ]
 44 | }
 45 | 
 46 | 
 47 | def main(request: flask.Request) -> flask.Response:
 48 |     """The entry point: extract the data from the payload and starts the job.
 49 | 
 50 |     The request payload must match the request_schema object above.
 51 | 
 52 |     Args:
 53 |         request (flask.Request): HTTP request object.
 54 |     Returns:
 55 |         The flask response.
 56 |     """
 57 |     logger.info('Google Ads Account Service triggered.')
 58 |     request_json = request.get_json()
 59 |     logger.info('JSON payload: %s', request_json)
 60 |     response = {}
 61 |     try:
 62 |         jsonschema.validate(instance=request_json, schema=request_schema)
 63 |     except jsonschema.exceptions.ValidationError as err:
 64 |         logger.error('Invalid request payload: %s', err)
 65 |         response['status'] = 'Failed'
 66 |         response['message'] = err.message
 67 |         return flask.Response(flask.json.dumps(response),
 68 |                               status=400,
 69 |                               mimetype='application/json')
 70 | 
 71 |     run(request_json['sheet_id'])
 72 | 
 73 |     response['status'] = 'Success'
 74 |     response['message'] = 'Downloaded data successfully'
 75 |     return flask.Response(flask.json.dumps(response),
 76 |                           status=200,
 77 |                           mimetype='application/json')
 78 | 
 79 | 
 80 | def run(sheet_id: str) -> None:
 81 |     """Orchestration for the function.
 82 | 
 83 |     Args:
 84 |         sheet_id: the ID of the Google Sheet containing the config.
 85 |     """
 86 |     logger.info('Running Google Ads account script')
 87 |     account_configs = get_config_from_sheet(sheet_id)
 88 |     send_messages_to_pubsub(account_configs)
 89 |     logger.info('Done.')
 90 | 
 91 | 
 92 | def get_config_from_sheet(sheet_id: str) -> List[Dict[str, Any]]:
 93 |     """Get the Ads account config from the Google Sheet, and return the results.
 94 | 
 95 |     Args:
 96 |         sheet_id: the ID of the Google Sheet containing the config.
 97 | 
 98 |     Returns:
 99 |         Returns a row for each account a report needs to be run for.
100 | 
101 |         [
102 |             {
103 |                 'sheet_id': 'abcdefghijklmnop-mk',
104 |                 'customer_id': '1234567890'
105 |                 'lookback_days': 90,
106 |                 'gads_filters': 'metrics.clicks > 10',
107 |             },
108 |             ...
109 |         ]
110 |     """
111 |     logger.info('Getting config from sheet: %s', sheet_id)
112 |     credentials, project_id = google.auth.default(scopes=SCOPES)
113 |     sheets_service = build('sheets', 'v4', credentials=credentials)
114 |     sheet = sheets_service.spreadsheets()
115 | 
116 |     customer_ids = sheet.values().get(
117 |         spreadsheetId=sheet_id,
118 |         range='google_ads_customer_ids').execute().get('values', [])
119 |     gads_filters = sheet.values().get(
120 |         spreadsheetId=sheet_id,
121 |         range='google_ads_filters').execute().get('values', [])
122 |     lookback_days = sheet.values().get(
123 |         spreadsheetId=sheet_id,
124 |         range='google_ads_lookback_days').execute().get('values',
125 |                                                         [['30']])[0][0]
126 | 
127 |     gads_filters_str = gads_filters_to_gaql_string(gads_filters)
128 | 
129 |     logger.info('Returned %i customer_ids', len(customer_ids))
130 |     account_configs = []
131 |     for customer_id, is_enabled in customer_ids:
132 |         if is_enabled == 'Enabled':
133 |             account_configs.append({
134 |                 'sheet_id': sheet_id,
135 |                 'customer_id': customer_id,
136 |                 'lookback_days': int(lookback_days),
137 |                 'gads_filters': gads_filters_str,
138 |             })
139 |         else:
140 |             logger.info('Ignoring disabled row: %s', customer_id)
141 | 
142 |     logger.info('Account configs:')
143 |     logger.info(account_configs)
144 |     return account_configs
145 | 
146 | 
147 | def gads_filters_to_gaql_string(config_filters: List[List[str]]) -> str:
148 |     """Turn the Google Ads filters into a GAQL compatible string.
149 | 
150 |     The config sheet has the filters in a list of lists, these need to be
151 |     combined, so they can be used in a WHERE clause in the GAQL that is passed
152 |     to Google Ads. See:
153 |     https://developers.google.com/google-ads/api/docs/query/overview
154 | 
155 |     Each row is "AND" together.
156 | 
157 |     Args:
158 |         config_filters: the filters from the Google Sheet
159 | 
160 |     Returns:
161 |         A string that can be used in the WHERE statement of the Google Ads Query
162 |         Language.
163 |     """
164 |     conditions = []
165 |     for row in config_filters:
166 |         conditions.append(f'metrics.{row[0]} {row[1]} {row[2]}')
167 |     return ' AND '.join(conditions)
168 | 
169 | 
170 | def send_messages_to_pubsub(messages: List[Dict[str, Any]]) -> None:
171 |     """Push each of the messages to the pubsub topic.
172 | 
173 |     Args:
174 |         messages: the list of messages to push to pubsub
175 |     """
176 |     logger.info('Sending messages to pubsub')
177 |     logger.info('Messages: %s', messages)
178 |     pubsub.send_dicts_to_pubsub(
179 |         messages=messages,
180 |         topic=APE_ADS_REPORT_PUBSUB_TOPIC,
181 |         gcp_project=GOOGLE_CLOUD_PROJECT)
182 |     logger.info('All messages published')
183 | 


--------------------------------------------------------------------------------
/docs/deployment.md:
--------------------------------------------------------------------------------
  1 | # Ads Placement Excluder Deployment
  2 | 
  3 | This doc provides information on how to deploy the Ads Placement Excluder
  4 | solution.
  5 | 
  6 | The deployment uses [Terraform](https://www.terraform.io/) to automate the
  7 | deployment, and to keep all the Infrastructure as Code (IaC). The files can be
  8 | found in the `/terraform/` folder in this repo.
  9 | 
 10 | ## Roles
 11 | The project creates a service account with the following roles:
 12 | 
 13 | - `roles/cloudfunctions.invoker`
 14 | - `roles/bigquery.jobUser`
 15 | - `roles/bigquery.dataViewer`
 16 | - `roles/pubsub.publisher`
 17 | - `roles/storage.objectAdmin`
 18 | 
 19 | As a user deploying the project, you will require these roles and the following:
 20 | 
 21 | - `roles/storage.admin`
 22 | - `roles/iam.securityAdmin`
 23 | 
 24 | Read more about [Google Cloud roles here](
 25 | https://cloud.google.com/iam/docs/understanding-roles).
 26 | 
 27 | ## OAuth
 28 | The project uses OAauth2.0 scopes and service account roles to manage
 29 | permissions. These are the scopes that are required when generating a refresh
 30 | token.
 31 | 
 32 | ```
 33 | https://www.googleapis.com/auth/spreadsheets.readonly
 34 | https://www.googleapis.com/auth/cloud-platform
 35 | https://www.googleapis.com/auth/youtube
 36 | https://www.googleapis.com/auth/adwords
 37 | ```
 38 | 
 39 | ## Pre-requisites
 40 | 
 41 | - A new Google Cloud Project
 42 | - Appropriate permissions to be able to deploy the project (see [roles](#roles))
 43 | - Create a copy of [the template Google Sheet](
 44 |   https://docs.google.com/spreadsheets/d/1IAo8yvrY4BMuOaWnZ2O8wfJ6L36sOjCOnD7cigMxKwI/copy)
 45 |   and make a note of the Google Sheet ID (found in the URL after the `/d/`)
 46 | - Access to the appropriate Google Ads accounts
 47 | - A Google Ads [Developer Token](
 48 |   https://developers.google.com/google-ads/api/docs/first-call/dev-token)
 49 | 
 50 | 
 51 | ## Deployment
 52 | 
 53 | ### Manual Steps
 54 | These changes need to be done once manually, as they are not controlled by Terraform:
 55 | 
 56 | 1. Open the Google Cloud Project in the UI.
 57 | 2. Go to [Cloud Storage](https://console.cloud.google.com/storage/browser) and
 58 |    create a new bucket, which will be used to keep track of the Terraform state,
 59 |    e.g. `my-awesome-project-terraform`. Make a note of the name of the bucket.
 60 | 3. Open the [OAuth Consent Screen](
 61 |    https://console.cloud.google.com/apis/credentials/consent) and create a new
 62 |    internal app.
 63 | 4. Open the [API Credentials Screen](
 64 |    https://console.cloud.google.com/apis/credentials) -> Create credentials ->
 65 |    OAuth Client ID -> Web app -> Set
 66 |    `https://developers.google.com/oauthplayground` as an authorised redirect
 67 |    URI. Make a note of the `client_id` and the `client_secret`.
 68 | 5. Open the [OAuth playground](https://developers.google.com/oauthplayground/),
 69 |    and generate a refresh token for the [above scopes](#oauth), using the
 70 |    `client_id` and `client_secret` generated in the previous step:
 71 |    ![cloud-shell](./images/oauth-configuration.png)
 72 | 6. Open Cloud Shell:
 73 |    ![cloud-shell](./images/cloud-shell.png)
 74 | 7. Enable the APIs in the project by running the following:
 75 | 
 76 |    ```
 77 |    gcloud services enable \
 78 |       serviceusage.googleapis.com \
 79 |       cloudresourcemanager.googleapis.com \
 80 |       iam.googleapis.com \
 81 |       cloudresourcemanager.googleapis.com \
 82 |       serviceusage.googleapis.com \
 83 |       bigquery.googleapis.com \
 84 |       googleads.googleapis.com \
 85 |       youtube.googleapis.com \
 86 |       cloudfunctions.googleapis.com \
 87 |       cloudbuild.googleapis.com \
 88 |       sheets.googleapis.com \
 89 |       cloudscheduler.googleapis.com \
 90 |       translate.googleapis.com
 91 |    ```
 92 | 
 93 | _Side note_: If you're interested in the reason why the APIs aren't controlled
 94 | through Terraform, [read this guide](
 95 | https://medium.com/rockedscience/how-to-fully-automate-the-deployment-of-google-cloud-platform-projects-with-terraform-16c33f1fb31f).
 96 | 
 97 | ### Terraform
 98 | 
 99 | 1. Whilst still in Cloud shell, `git clone` the project, and `cd` into the
100 |    directory.
101 | 2. Run the following commands to initialise Terraform:
102 |    ```
103 |    cd terraform
104 |    terraform init
105 |    ```
106 |    When prompted, enter the name of the bucket created in step 2 in manual
107 |    steps.
108 | 
109 | 3. Create a file named `terraform.tfvars` and add the following variables:
110 |    ```
111 |    project_id = ""
112 |    oauth_refresh_token = ""
113 |    google_cloud_client_id = ""
114 |    google_cloud_client_secret = ""
115 |    google_ads_developer_token = ""
116 |    google_ads_login_customer_id = ""
117 |    config_sheet_id = ""
118 |    ```
119 |    Note that the `google_ads_login_customer_id` is the MCC customer ID in Google
120 |    Ads.
121 | 
122 | 4. Run `terraform plan` and review the proposed changes.
123 | 5. Run `terraform apply` to create the infrastructure.
124 | 6. The email of the service account created will be output, give view only
125 |    access to the Google sheet containing the config.
126 | 
127 | By default, the code will be triggered every hour by Cloud Scheduler. To test
128 | everything is working, configure the Google Sheet ([see below](#google-sheet))
129 | and force run the Cloud Scheduler job in the UI.
130 | 
131 | ## Google Sheet
132 | 
133 | Open your copy of the Google Sheet. This is what you'll be using to configure
134 | the Ads Placement Excluder solution.
135 | 
136 | There are notes in the Sheet that contain instructions for how to set this up.
137 | 
138 | One area to highlight is the [basic Translation API](
139 | https://cloud.google.com/translate/docs/basic/detecting-language) used in the
140 | YouTube service, has a cost element to it ([see pricing](
141 | https://cloud.google.com/translate/pricing)). If you want to include language
142 | filters on the YouTube channel title, ensure that this is enabled.
143 | 
144 | If this is disabled, and then enabled at a later date, it does not backfill the
145 | gaps in data. If you wish to backfill this data. Manually clear the files in the
146 | Cloud Storage bucket containing the data, essentially deleting the YouTube data,
147 | then re-run Cloud Scheduler.
148 | 
149 | ## Disclaimers
150 | __This is not an officially supported Google product.__
151 | 
152 | Copyright 2022 Google LLC. This solution, including any related sample code or
153 | data, is made available on an “as is,” “as available,” and “with all faults”
154 | basis, solely for illustrative purposes, and without warranty or representation
155 | of any kind. This solution is experimental, unsupported and provided solely for
156 | your convenience. Your use of it is subject to your agreements with Google, as
157 | applicable, and may constitute a beta feature as defined under those agreements.
158 | To the extent that you make any data available to Google in connection with your
159 | use of the solution, you represent and warrant that you have all necessary and
160 | appropriate rights, consents and permissions to permit Google to use and process
161 | that data. By using any portion of this solution, you acknowledge, assume and
162 | accept all risks, known and unknown, associated with its usage, including with
163 | respect to your deployment of any portion of this solution in your systems, or
164 | usage in connection with your business, if at all.
165 | 


--------------------------------------------------------------------------------
/docs/architecture.md:
--------------------------------------------------------------------------------
  1 | # Ads Placement Excluder Architecture
  2 | 
  3 | ## Google Cloud Architecture
  4 | 
  5 | The solution is split into four microservices:
  6 | 
  7 | - Google Ads Account Service
  8 | - Google Ads Reporting Service
  9 | - YouTube Channel Service
 10 | - Google Ads Exclusion Service
 11 | 
 12 | The source code for each of the Cloud Functions can be found under the `/src/`
 13 | directory of this repo. Each function has its own README file, which contains
 14 | instructions for local deployment.
 15 | 
 16 | ### Google Ads Account Service
 17 | 
 18 | This service is responsible for deciding which Google Ads accounts the Ads
 19 | Placement Excluder solution should run for, and kicking off the downstream
 20 | pipeline. Each account is pushed as a separate message into the topic to enable
 21 | concurrency.
 22 | 
 23 | ![Google Ads Account Architecture Diagram](
 24 | ./images/ape-account-service-architecture-diagram.png)
 25 | 
 26 | 1. Cloud Scheduler triggers the Account Cloud Function.
 27 | 2. The function pulls from the Google Sheet the Google Ads customer IDs to run
 28 |    the code for, and the filters to apply to the Google Ads report.
 29 | 3. Each customer ID is pushed in a separate message to Pub/Sub.
 30 | 
 31 | ### Google Ads Reporting Service
 32 | 
 33 | This service is responsible for running a report from Google Ads based on the
 34 | [group_placement_view](
 35 | https://developers.google.com/google-ads/api/fields/v11/group_placement_view),
 36 | with the configured filters, and outputting that as a CSV to a Cloud Storage
 37 | bucket, with a BigQuery table in front of it. The data pulled from the report is
 38 | filtered to only have YouTube channels.
 39 | 
 40 | ![Google Ads Report Architecture Diagram](
 41 | ./images/ape-report-service-architecture-diagram.png)
 42 | 
 43 | 1. Pub/Sub triggers the Cloud Function.
 44 | 2. The report is downloaded from Google Ads.
 45 | 3. The output is written as a CSV to Cloud Storage.
 46 | 4. A message is passed to the next Pub/Sub topic.
 47 | 
 48 | ### YouTube Channel Reporting Service
 49 | 
 50 | This service is responsible for pulling metrics about the YouTube channels from
 51 | the Google Ads Report. For example the number of views the channel has had, and
 52 | the number of subscribers. It then uses the Google Translate API (if enabled in
 53 | the config), to determine the language the YouTube channel title is in.
 54 | 
 55 | ![YouTube Channel Architecture Diagram](
 56 | ./images/ape-youtube-service-architecture-diagram.png)
 57 | 
 58 | 1. Pub/Sub triggers the Cloud Function.
 59 | 2. The function reads the new channels that were pulled from Google Ads. It does
 60 |    not refresh the data for existing channels.
 61 | 3. The config is used to determine if the Translate API should be used.
 62 | 4. The function pulls the YouTube data for each of the channels in step 2.
 63 | 5. If the Translate API filter is enabled, for each channel it will use the API
 64 |    to detect the language and the confidence level of the prediction.
 65 | 6. The output is written as a CSV to Cloud Storage.
 66 | 7. A message is passed to the next Pub/Sub topic.
 67 | 
 68 | ### Google Ads Exclusion service
 69 | 
 70 | The Google Ads Excluder service is responsible for applying the filters in the
 71 | config Google Sheet to the data, to determine which channels should be excluded
 72 | in Google Ads. Channels identified for exclusion are then uploaded to the shared
 73 | placement list in Google Ads, and the output written to BigQuery for reporting.
 74 | 
 75 | ![Google Ads Exclusion Architecture Diagram](
 76 | ./images/ape-excluder-service-architecture-diagram.png)
 77 | 
 78 | 1. Pub/Sub triggers the Cloud Function.
 79 | 2. The function reads the filters from the config Sheet.
 80 | 3. It applies the filters to BigQuery to identify channels that need to be
 81 |    excluded.
 82 | 4. These are then uploaded to Google Ads
 83 | 5. The exclusions are also written to BigQuery for reporting purposes.
 84 | 
 85 | ### Entire Solution Architecture
 86 | 
 87 | Combining the individual services, you can see the combined architecture diagram
 88 | below:
 89 | 
 90 | ![Overall Architecture Diagram](./images/ape-architecture-diagram.png)
 91 | 
 92 | 1. Cloud Scheduler triggers the Account Cloud Function.
 93 | 2. The function pulls the Google Ads customer IDs to run the code for, and the
 94 |    filters to apply to the Google Ads report.
 95 | 3. Each customer ID is pushed in a separate message to Pub/Sub.
 96 | 4. Pub/Sub triggers the Cloud Function.
 97 | 5. The report is downloaded from Google Ads.
 98 | 6. The output is written as a CSV to Cloud Storage.
 99 | 7. A message is passed to the next Pub/Sub topic.
100 | 8. Pub/Sub triggers the Cloud Function.
101 | 9. The function reads the channels that were pulled from Google Ads.
102 | 10. The config is used to determine if the Translate API should be used.
103 | 11. The function pulls the YouTube data for each of the channels in step 2.
104 | 12. If the Translate API filter is enabled, for each channel it will use the API
105 |     to detect the language and the confidence level of the prediction.
106 | 13. The output is written as a CSV to Cloud Storage.
107 | 14. A message is passed to the next Pub/Sub topic.
108 | 15. Pub/Sub triggers the Cloud Function.
109 | 16. The function reads the filters from the config Sheet.
110 | 17. It applies the filters to BigQuery to identify channels that need to be
111 |     excluded.
112 | 18. These are then uploaded to Google Ads
113 | 19. The exclusions are also written to BigQuery for reporting purposes.
114 | 
115 | ## BigQuery External Tables Using Cloud Storage
116 | 
117 | The solution uses [BigQuery External tables with a Google Cloud Storage
118 | backend](https://cloud.google.com/bigquery/docs/external-data-cloud-storage),
119 | instead of writing to BigQuery directly due to concurrency. BigQuery has much
120 | stricter limits in place about concurrent writes ([docs](
121 | https://cloud.google.com/bigquery/quotas)), so if the solution is configured
122 | with several Google Ads accounts, it can run into difficulty when writing
123 | directly to BigQuery.
124 | 
125 | Leveraging Cloud Storage removes this limitation, and the
126 | BigQuery External Table provides a way of querying the data using SQL.
127 | 
128 | ## Google Ads Exclusions
129 | 
130 | There are several places that exclusions can be applied in Google Ads. This
131 | solution applies exclusions to [shared placement exclusion lists](
132 | https://support.google.com/google-ads/answer/9162992?hl=en-GB).
133 | 
134 | The list is configured in the configuration Google Sheet, where you enter the
135 | customer ID of your MCC account, and the ID of the exclusion list. To find the
136 | ID of the list, open the list in the UI and look at the value set in the query
137 | string parameter `sharedSetId`.
138 | 
139 | ## Disclaimers
140 | 
141 | __This is not an officially supported Google product.__
142 | 
143 | Copyright 2022 Google LLC. This solution, including any related sample code or
144 | data, is made available on an “as is,” “as available,” and “with all faults”
145 | basis, solely for illustrative purposes, and without warranty or representation
146 | of any kind. This solution is experimental, unsupported and provided solely for
147 | your convenience. Your use of it is subject to your agreements with Google, as
148 | applicable, and may constitute a beta feature as defined under those agreements.
149 | To the extent that you make any data available to Google in connection with your
150 | use of the solution, you represent and warrant that you have all necessary and
151 | appropriate rights, consents and permissions to permit Google to use and process
152 | that data. By using any portion of this solution, you acknowledge, assume and
153 | accept all risks, known and unknown, associated with its usage, including with
154 | respect to your deployment of any portion of this solution in your systems, or
155 | usage in connection with your business, if at all.
156 | 


--------------------------------------------------------------------------------
/src/google_ads_report/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     https://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Output the placement report from Google Ads to BigQuery."""
 15 | import base64
 16 | import json
 17 | from datetime import datetime, timedelta
 18 | import logging
 19 | import os
 20 | import sys
 21 | from typing import Any, Dict, Optional, Tuple
 22 | from google.ads.googleads.client import GoogleAdsClient
 23 | import jsonschema
 24 | import pandas as pd
 25 | from utils import gcs
 26 | from utils import pubsub
 27 | 
 28 | 
 29 | logging.basicConfig(stream=sys.stdout)
 30 | logger = logging.getLogger(__name__)
 31 | logger.setLevel(logging.INFO)
 32 | 
 33 | # The Google Cloud project containing the GCS bucket
 34 | GOOGLE_CLOUD_PROJECT = os.environ.get('GOOGLE_CLOUD_PROJECT')
 35 | # The bucket to write the data to
 36 | APE_GCS_DATA_BUCKET = os.environ.get('APE_GCS_DATA_BUCKET')
 37 | # The pub/sub topic to send the success message to
 38 | APE_YOUTUBE_PUBSUB_TOPIC = os.environ.get('APE_YOUTUBE_PUBSUB_TOPIC')
 39 | 
 40 | # The schema of the JSON in the event payload
 41 | message_schema = {
 42 |     'type': 'object',
 43 |     'properties': {
 44 |         'sheet_id': {'type': 'string'},
 45 |         'customer_id': {'type': 'string'},
 46 |         'lookback_days': {'type': 'number'},
 47 |         'gads_filters': {'type': 'string'},
 48 |     },
 49 |     'required': ['sheet_id', 'customer_id', 'lookback_days', 'gads_filters', ]
 50 | }
 51 | 
 52 | 
 53 | def main(event: Dict[str, Any], context: Dict[str, Any]) -> None:
 54 |     """The entry point: extract the data from the payload and starts the job.
 55 | 
 56 |     The pub/sub message must match the message_schema object above.
 57 | 
 58 |     Args:
 59 |         event: A dictionary representing the event data payload.
 60 |         context: An object containing metadata about the event.
 61 |     """
 62 |     del context
 63 |     logger.info('Google Ads Reporting Service triggered.')
 64 |     logger.info('Message: %s', event)
 65 |     message = base64.b64decode(event['data']).decode('utf-8')
 66 |     logger.info('Decoded message: %s', message)
 67 |     message_json = json.loads(message)
 68 |     logger.info('JSON message: %s', message_json)
 69 | 
 70 |     # Will raise jsonschema.exceptions.ValidationError if the schema is invalid
 71 |     jsonschema.validate(instance=message_json, schema=message_schema)
 72 | 
 73 |     start_job(
 74 |         message_json.get('sheet_id'),
 75 |         message_json.get('customer_id'),
 76 |         message_json.get('lookback_days'),
 77 |         message_json.get('gads_filters'),
 78 |     )
 79 | 
 80 |     logger.info('Done')
 81 | 
 82 | 
 83 | def start_job(
 84 |         sheet_id: str,
 85 |         customer_id: str,
 86 |         lookback_days: int,
 87 |         gads_filters: str,
 88 | ) -> None:
 89 |     """Start the job to run the report from Google Ads & output it.
 90 | 
 91 |     Args:
 92 |         sheet_id: the ID of the Google Sheet containing the config.
 93 |         customer_id: the customer ID to fetch the Google Ads data for.
 94 |         lookback_days: the number of days from today to look back when fetching
 95 |             the report.
 96 |         gads_filters: the filters to apply to the Google Ads report query
 97 |     """
 98 |     logger.info('Starting job to fetch data for %s', customer_id)
 99 |     report_df = get_report_df(customer_id, lookback_days, gads_filters)
100 |     write_results_to_gcs(report_df, customer_id)
101 |     send_messages_to_pubsub(customer_id, sheet_id)
102 |     logger.info('Job complete')
103 | 
104 | 
105 | def get_report_df(
106 |         customer_id: str,
107 |         lookback_days: int,
108 |         gads_filters: str) -> pd.DataFrame:
109 |     """Run the placement report in Google Ads & return a Dataframe of the data.
110 | 
111 |     Args:
112 |         customer_id: the customer ID to fetch the Google Ads data for.
113 |         lookback_days: the number of days from today to look back when fetching
114 |             the report.
115 |         gads_filters: the filters to apply to the Google Ads report query
116 | 
117 |     Returns:
118 |         A Pandas DataFrame containing the report results.
119 |     """
120 |     logger.info('Getting report stream for %s', customer_id)
121 |     now = datetime.now()
122 |     client = GoogleAdsClient.load_from_env(version='v11')
123 |     ga_service = client.get_service("GoogleAdsService")
124 | 
125 |     query = get_report_query(lookback_days, gads_filters)
126 |     search_request = client.get_type("SearchGoogleAdsStreamRequest")
127 |     search_request.customer_id = customer_id
128 |     search_request.query = query
129 |     stream = ga_service.search_stream(search_request)
130 | 
131 |     # The client and iterator needs to be in the same function, as per
132 |     # https://github.com/googleads/google-ads-python/issues/384#issuecomment-791639397
133 |     # So this can't be refactored out
134 |     logger.info('Processing response stream')
135 |     data = []
136 |     for batch in stream:
137 |         for row in batch.results:
138 |             data.append([
139 |                 now,
140 |                 row.customer.id,
141 |                 row.group_placement_view.placement,
142 |                 row.group_placement_view.target_url,
143 |                 row.metrics.impressions,
144 |                 row.metrics.cost_micros,
145 |                 row.metrics.conversions,
146 |                 row.metrics.video_view_rate,
147 |                 row.metrics.video_views,
148 |                 row.metrics.clicks,
149 |                 row.metrics.average_cpm,
150 |                 row.metrics.ctr,
151 |                 row.metrics.all_conversions_from_interactions_rate,
152 |             ])
153 |     return pd.DataFrame(data, columns=[
154 |         'datetime_updated',
155 |         'customer_id',
156 |         'channel_id',
157 |         'placement_target_url',
158 |         'impressions',
159 |         'cost_micros',
160 |         'conversions',
161 |         'video_view_rate',
162 |         'video_views',
163 |         'clicks',
164 |         'average_cpm',
165 |         'ctr',
166 |         'all_conversions_from_interactions_rate',
167 |     ])
168 | 
169 | 
170 | def get_report_query(lookback_days: int,
171 |                      gads_filters: Optional[str] = None) -> str:
172 |     """Build and return the Google Ads report query.
173 | 
174 |     Args:
175 |         lookback_days: the number of days from today to look back when fetching
176 |             the report.
177 |         gads_filters: the filters to apply to the Google Ads report query
178 | 
179 |     Return:
180 |         The Google Ads query.
181 |     """
182 |     logger.info('Getting report query')
183 |     date_from, date_to = get_query_dates(lookback_days)
184 |     where_query = ''
185 |     if gads_filters is not None:
186 |         where_query = f'AND {gads_filters}'
187 |     query = f"""
188 |         SELECT
189 |             customer.id,
190 |             group_placement_view.placement,
191 |             group_placement_view.target_url,
192 |             metrics.impressions,
193 |             metrics.cost_micros,
194 |             metrics.conversions,
195 |             metrics.video_views,
196 |             metrics.video_view_rate,
197 |             metrics.clicks,
198 |             metrics.average_cpm,
199 |             metrics.ctr,
200 |             metrics.all_conversions_from_interactions_rate
201 |         FROM
202 |             group_placement_view
203 |         WHERE group_placement_view.placement_type = "YOUTUBE_CHANNEL"
204 |             AND campaign.advertising_channel_type = "VIDEO"
205 |             AND segments.date BETWEEN "{date_from}" AND "{date_to}"
206 |             {where_query}
207 |     """
208 |     logger.info(query)
209 |     return query
210 | 
211 | 
212 | def get_query_dates(lookback_days: int,
213 |                     today: datetime = None) -> Tuple[str, str]:
214 |     """Return a tuple of string dates in %Y-%m-%d format for the GAds report.
215 | 
216 |     Google Ads queries require a string date in the above format. This function
217 |     will lookback X days from today, and return this date as a string.
218 | 
219 |     Args:
220 |         lookback_days: the number of days from today to look back when fetching
221 |             the report.
222 |         today: the date representing today. If no date is provided
223 |             datetime.today() is used.
224 | 
225 |     Return:
226 |         The string date
227 |     """
228 |     logger.info('Getting query dates')
229 |     dt_format = '%Y-%m-%d'
230 |     if today is None:
231 |         today = datetime.today()
232 |     date_from = today - timedelta(days=lookback_days)
233 |     return (
234 |         date_from.strftime(dt_format),
235 |         today.strftime(dt_format),
236 |     )
237 | 
238 | 
239 | def write_results_to_gcs(report_df: pd.DataFrame, customer_id: str) -> None:
240 |     """Write the report dataframe to GCS as a CSV file
241 | 
242 |     Args:
243 |         report_df: the dataframe based on the Google Ads report.
244 |         customer_id: the customer ID to fetch the Google Ads data for.
245 |     """
246 |     logger.info('Writing results to GCS: %s', APE_GCS_DATA_BUCKET)
247 |     number_of_rows = len(report_df.index)
248 |     logger.info('There are %s rows', number_of_rows)
249 |     if number_of_rows > 0:
250 |         blob_name = f'google_ads_report/{customer_id}.csv'
251 |         logger.info('Blob name: %s', blob_name)
252 |         gcs.upload_blob_from_df(
253 |             df=report_df,
254 |             blob_name=blob_name,
255 |             bucket=APE_GCS_DATA_BUCKET)
256 |         logger.info('Blob uploaded to GCS')
257 |     else:
258 |         logger.info('There is nothing to write to GCS')
259 | 
260 | 
261 | def send_messages_to_pubsub(customer_id: str, sheet_id: str) -> None:
262 |     """Push the customer ID to pub/sub when the job completes.
263 | 
264 |     Args:
265 |         customer_id: the customer ID to fetch the Google Ads data for.
266 |         sheet_id: the ID of the Google Sheet containing the config.
267 |     """
268 |     message_dict = {
269 |         'customer_id': customer_id,
270 |         'sheet_id': sheet_id,
271 |     }
272 |     logger.info('Sending message to pub/sub:', message_dict)
273 |     pubsub.send_dict_to_pubsub(
274 |         message_dict=message_dict,
275 |         topic=APE_YOUTUBE_PUBSUB_TOPIC,
276 |         gcp_project=GOOGLE_CLOUD_PROJECT)
277 |     logger.info('Message published')
278 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/src/google_ads_excluder/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     https://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Filter the data for spam placements and exclude them in Google Ads."""
 15 | import base64
 16 | import json
 17 | import logging
 18 | import os
 19 | import sys
 20 | from datetime import datetime
 21 | from typing import Any, Dict, List, Union
 22 | import uuid
 23 | import google.auth
 24 | import google.auth.credentials
 25 | from googleapiclient.discovery import build
 26 | from google.ads.googleads.client import GoogleAdsClient
 27 | from google.cloud import bigquery
 28 | import jsonschema
 29 | import pandas as pd
 30 | from utils import gcs
 31 | 
 32 | 
 33 | logging.basicConfig(stream=sys.stdout)
 34 | logger = logging.getLogger(__name__)
 35 | logger.setLevel(logging.INFO)
 36 | 
 37 | # The Google Cloud project
 38 | GOOGLE_CLOUD_PROJECT = os.environ.get('GOOGLE_CLOUD_PROJECT')
 39 | # The bucket to write the data to
 40 | APE_GCS_DATA_BUCKET = os.environ.get('APE_GCS_DATA_BUCKET')
 41 | # The name of the BigQuery Dataset
 42 | BQ_DATASET = os.environ.get('APE_BIGQUERY_DATASET')
 43 | # Set False to apply the exclusions in Google Ads. If True, the call will be
 44 | # made to the API and validated, but the exclusion won't be applied and you
 45 | # won't see it in the UI. You probably want this to be True in a dev environment
 46 | # and False in prod.
 47 | VALIDATE_ONLY = os.environ.get(
 48 |     'APE_EXCLUSION_VALIDATE_ONLY', 'False').lower() in ('true', '1', 't')
 49 | 
 50 | # The access scopes used in this function
 51 | SCOPES = [
 52 |     'https://www.googleapis.com/auth/spreadsheets.readonly',
 53 |     'https://www.googleapis.com/auth/cloud-platform',
 54 | ]
 55 | 
 56 | # The schema of the JSON in the event payload
 57 | message_schema = {
 58 |     'type': 'object',
 59 |     'properties': {
 60 |         'sheet_id': {'type': 'string'},
 61 |         'customer_id': {'type': 'string'},
 62 |     },
 63 |     'required': ['sheet_id', 'customer_id', ]
 64 | }
 65 | 
 66 | 
 67 | def main(event: Dict[str, Any], context: Dict[str, Any]) -> None:
 68 |     """The entry point: extract the data from the payload and starts the job.
 69 | 
 70 |     The pub/sub message must match the message_schema object above.
 71 | 
 72 |     Args:
 73 |         event: A dictionary representing the event data payload.
 74 |         context: An object containing metadata about the event.
 75 | 
 76 |     Raises:
 77 |         jsonschema.exceptions.ValidationError if the message from pub/sub is not
 78 |         what is expected.
 79 |     """
 80 |     del context
 81 |     logger.info('Google Ads Exclusion service triggered.')
 82 |     logger.info('Message: %s', event)
 83 |     message = base64.b64decode(event['data']).decode('utf-8')
 84 |     logger.info('Decoded message: %s', message)
 85 |     message_json = json.loads(message)
 86 |     logger.info('JSON message: %s', message_json)
 87 | 
 88 |     # Will raise jsonschema.exceptions.ValidationError if the schema is invalid
 89 |     jsonschema.validate(instance=message_json, schema=message_schema)
 90 | 
 91 |     run(message_json.get('customer_id'), message_json.get('sheet_id'))
 92 | 
 93 |     logger.info('Done')
 94 | 
 95 | 
 96 | def run(customer_id: str, sheet_id: str) -> None:
 97 |     """Start the job to run the report from Google Ads & output it.
 98 | 
 99 |     Args:
100 |         customer_id: the Google Ads customer ID to process.
101 |         sheet_id: the ID of the Google Sheet containing the config.
102 |     """
103 |     logger.info('Starting job to fetch data for %s', customer_id)
104 |     credentials = get_auth_credentials()
105 |     filters = get_config_filters(sheet_id, credentials)
106 | 
107 |     placements = get_spam_placements(customer_id, filters, credentials)
108 |     if placements is not None:
109 |         exclude_placements_in_gads(placements, sheet_id, credentials)
110 |         write_results_to_gcs(customer_id, placements)
111 |     logger.info('Job complete')
112 | 
113 | 
114 | def get_auth_credentials() -> google.auth.credentials.Credentials:
115 |     """Return credentials for Google APIs."""
116 |     credentials, project_id = google.auth.default(scopes=SCOPES)
117 |     return credentials
118 | 
119 | 
120 | def get_config_filters(sheet_id: str,
121 |                        credentials: google.auth.credentials.Credentials) -> str:
122 |     """Get the filters for identifying a spam placement from the config.
123 | 
124 |     Args:
125 |         sheet_id: the ID of the Google Sheet containing the config.
126 |         credentials: Google Auth credentials
127 | 
128 |     Returns:
129 |         SQL WHERE conditions for that can be run on BigQuery, e.g.
130 |         view_count > 1000000 AND subscriber_count > 10000
131 |     """
132 |     logger.info('Getting config from sheet %s', sheet_id)
133 | 
134 |     result = get_range_values_from_sheet(
135 |         sheet_id, 'yt_exclusion_filters', credentials)
136 | 
137 |     logger.info('Returned %i rows', len(result))
138 |     filters = youtube_filters_to_sql_string(result)
139 |     if len(filters) == 0:
140 |         raise google.api_core.exceptions.BadRequest("Filters are not set")
141 | 
142 |     return filters
143 | 
144 | 
145 | def get_range_values_from_sheet(
146 |         sheet_id: str,
147 |         sheet_range: str,
148 |         credentials: google.auth.credentials.Credentials
149 | ) -> List[List[str]]:
150 |     """Get the values from a named range in the Google Sheet.
151 | 
152 |     Args:
153 |         sheet_id: the Google Sheet ID to fetch data from.
154 |         sheet_range: the range in the Google Sheet to get the values from
155 |         credentials: Google Auth credentials
156 | 
157 |     Returns:
158 |         Each row in the response represents a row in the Sheet.
159 |     """
160 |     logger.info(f'Getting range "{sheet_range}" from sheet: {sheet_id}')
161 |     sheets_service = build('sheets', 'v4', credentials=credentials)
162 |     sheet = sheets_service.spreadsheets()
163 |     return sheet.values().get(
164 |         spreadsheetId=sheet_id,
165 |         range=sheet_range).execute().get('values', [])
166 | 
167 | 
168 | def youtube_filters_to_sql_string(config_filters: List[List[str]]) -> str:
169 |     """Turn the YouTube  filters into a SQL compatible string.
170 | 
171 |     The config sheet has the filters in a list of lists, these need to be
172 |     combined, so they can be used in a WHERE clause in the SQL.
173 | 
174 |     Each row is "AND" together.
175 | 
176 |     Args:
177 |         config_filters: the filters from the Google Sheet
178 | 
179 |     Returns:
180 |         A string that can be used in the WHERE statement of SQL Language.
181 |     """
182 |     conditions = []
183 |     for row in config_filters:
184 |         if len(row) == 3:
185 |             conditions.append(f'{row[0]} {row[1]} {row[2]}')
186 | 
187 |     return ' AND '.join(conditions)
188 | 
189 | 
190 | def get_spam_placements(customer_id: str,
191 |                         filters: str,
192 |                         credentials: google.auth.credentials.Credentials
193 |                         ) -> Union[List[str], None]:
194 |     """Run a query to find spam placements in BigQuery and return as a list.
195 | 
196 |     Args:
197 |         customer_id: the Google Ads customer ID to process.
198 |         filters: a string containing WHERE conditions to add to the query based
199 |             on the config Google Sheet.
200 |         credentials: Google Auth credentials
201 | 
202 |     Returns:
203 |         A list of placement IDs which should be excluded.
204 |     """
205 | 
206 |     logger.info('Getting spam placements from BigQuery')
207 |     logger.info('Connecting to: %s BigQuery', GOOGLE_CLOUD_PROJECT)
208 |     client = bigquery.Client(
209 |         project=GOOGLE_CLOUD_PROJECT, credentials=credentials)
210 | 
211 |     query = f"""
212 |         SELECT DISTINCT
213 |             Yt.channel_id
214 |         FROM
215 |             `{BQ_DATASET}.GoogleAdsReport` AS Ads
216 |         LEFT JOIN
217 |             {BQ_DATASET}.YouTubeChannel AS Yt
218 |             USING(channel_id)
219 |         LEFT JOIN
220 |             `{BQ_DATASET}.GoogleAdsExclusion` AS Excluded
221 |             USING(channel_id)
222 |         WHERE
223 |             Ads.customer_id = "{customer_id}"
224 |             AND Excluded.channel_id IS NULL
225 |             AND (
226 |                 Excluded.customer_id = "{customer_id}"
227 |                 OR Excluded.customer_id IS NULL
228 |             )
229 |             AND {filters}
230 |         """
231 |     logger.info('Running query: %s', query)
232 | 
233 |     rows = client.query(query).result()
234 | 
235 |     if rows.total_rows == 0:
236 |         logger.info('There is nothing to update')
237 |         return None
238 |     channel_ids = []
239 |     for row in rows:
240 |         channel_ids.append(row.channel_id)
241 |     logger.info('Received %s channel_ids', len(channel_ids))
242 |     return channel_ids
243 | 
244 | 
245 | def exclude_placements_in_gads(
246 |         placements: List[str],
247 |         sheet_id: str,
248 |         credentials: google.auth.credentials.Credentials = None
249 | ) -> None:
250 |     """Exclude the placements in the Google Ads account.
251 | 
252 |     Args:
253 |         placements: a list of YouTube channel IDs which should be excluded.
254 |         sheet_id: the ID of the Google Sheet containing the config.
255 |         credentials: Google Auth credentials
256 |     """
257 |     logger.info('Excluding placements in Google Ads.')
258 | 
259 |     if credentials is None:
260 |         logger.info('No auth credentials provided. Fetching them.')
261 |         credentials = get_auth_credentials()
262 | 
263 |     shared_set_id = get_range_values_from_sheet(
264 |         sheet_id=sheet_id,
265 |         sheet_range='placement_exclusion_list_id',
266 |         credentials=credentials)[0][0]
267 |     customer_id = get_range_values_from_sheet(
268 |         sheet_id=sheet_id,
269 |         sheet_range='placement_exclusion_customer_id',
270 |         credentials=credentials)[0][0]
271 | 
272 |     client = GoogleAdsClient.load_from_env(version='v11')
273 |     service = client.get_service('SharedCriterionService')
274 | 
275 |     shared_set = f'customers/{customer_id}/sharedSets/{shared_set_id}'
276 | 
277 |     operations = []
278 |     logger.info('Processing the %i placements', len(placements))
279 |     for placement in placements:
280 |         operation = client.get_type('SharedCriterionOperation')
281 |         criterion = operation.create
282 |         criterion.shared_set = shared_set
283 |         criterion.youtube_channel.channel_id = placement
284 |         operations.append(operation)
285 | 
286 |     placements_len = len(placements)
287 |     logger.info('There are %i operations to upload', placements_len)
288 |     logger.info('Validate_only mode: %s', VALIDATE_ONLY)
289 |     if placements_len > 0:
290 |         response = service.mutate_shared_criteria(
291 |             request={
292 |                 'validate_only': VALIDATE_ONLY,
293 |                 'customer_id': customer_id,
294 |                 'operations': operations
295 |             }
296 |         )
297 |         logger.info('Response from the upload:')
298 |         logger.info(response)
299 | 
300 |     logger.info('Done.')
301 | 
302 | 
303 | def write_results_to_gcs(customer_id: str,
304 |                          placements: List[str],
305 |                          ) -> None:
306 |     """Write the exclusions to GCS as a CSV file.
307 | 
308 |     Historical data is preserved so all file writes have a UUID appended to it.
309 | 
310 |      Args:
311 |         customer_id: the Google Ads customer ID to process.
312 |         placements: alist of placement IDs which should be excluded.
313 |     """
314 |     exclusions_df = pd.DataFrame(placements, columns=[
315 |         'channel_id',
316 |     ])
317 |     exclusions_df['customer_id'] = int(customer_id)
318 |     exclusions_df['datetime_updated'] = datetime.now()
319 | 
320 |     logger.info('Writing results to GCS: %s', APE_GCS_DATA_BUCKET)
321 |     number_of_rows = len(exclusions_df.index)
322 |     logger.info('There are %s rows', number_of_rows)
323 |     if number_of_rows > 0:
324 |         uuid_str = str(uuid.uuid4())
325 |         blob_name = f'google_ads_exclusion/{customer_id}-{uuid_str}.csv'
326 |         logger.info('Blob name: %s', blob_name)
327 |         gcs.upload_blob_from_df(
328 |             df=exclusions_df,
329 |             blob_name=blob_name,
330 |             bucket=APE_GCS_DATA_BUCKET)
331 |         logger.info('Blob uploaded to GCS')
332 |     else:
333 |         logger.info('There is nothing to write to GCS')
334 | 


--------------------------------------------------------------------------------
/terraform/main.tf:
--------------------------------------------------------------------------------
  1 | provider "google" {
  2 |   project = var.project_id
  3 |   region  = var.region
  4 | }
  5 | 
  6 | # SERVICE ACCOUNT --------------------------------------------------------------
  7 | resource "google_service_account" "service_account" {
  8 |   account_id   = "ads-placement-excluder-runner"
  9 |   display_name = "Service Account for running Ads Placement Excluder"
 10 | }
 11 | resource "google_project_iam_member" "cloud_functions_invoker_role" {
 12 |   project = var.project_id
 13 |   role    = "roles/cloudfunctions.invoker"
 14 |   member  = "serviceAccount:${google_service_account.service_account.email}"
 15 | }
 16 | resource "google_project_iam_member" "bigquery_job_user_role" {
 17 |   project = var.project_id
 18 |   role    = "roles/bigquery.jobUser"
 19 |   member  = "serviceAccount:${google_service_account.service_account.email}"
 20 | }
 21 | resource "google_project_iam_member" "bigquery_data_viewer_role" {
 22 |   project = var.project_id
 23 |   role    = "roles/bigquery.dataViewer"
 24 |   member  = "serviceAccount:${google_service_account.service_account.email}"
 25 | }
 26 | resource "google_project_iam_member" "pubsub_publisher_role" {
 27 |   project = var.project_id
 28 |   role    = "roles/pubsub.publisher"
 29 |   member  = "serviceAccount:${google_service_account.service_account.email}"
 30 | }
 31 | resource "google_project_iam_member" "storage_object_admin_role" {
 32 |   project = var.project_id
 33 |   role    = "roles/storage.objectAdmin"
 34 |   member  = "serviceAccount:${google_service_account.service_account.email}"
 35 | }
 36 | 
 37 | # CLOUD STORAGE ----------------------------------------------------------------
 38 | resource "google_storage_bucket" "ape_data_bucket" {
 39 |   name                        = "${var.project_id}-ape-data"
 40 |   location                    = var.region
 41 |   force_destroy               = true
 42 |   uniform_bucket_level_access = true
 43 | }
 44 | # This bucket is used to store the cloud functions for deployment.
 45 | # The project ID is used to make sure the name is globally unique
 46 | resource "google_storage_bucket" "function_bucket" {
 47 |   name                        = "${var.project_id}-functions"
 48 |   location                    = var.region
 49 |   force_destroy               = true
 50 |   uniform_bucket_level_access = true
 51 | 
 52 |   lifecycle_rule {
 53 |     condition {
 54 |       age = 1
 55 |     }
 56 |     action {
 57 |       type = "Delete"
 58 |     }
 59 |   }
 60 | }
 61 | 
 62 | # CLOUD FUNCTIONS --------------------------------------------------------------
 63 | data "archive_file" "google_ads_accounts_zip" {
 64 |   type        = "zip"
 65 |   output_path = ".temp/google_ads_accounts_source.zip"
 66 |   source_dir  = "../src/google_ads_accounts"
 67 | }
 68 | data "archive_file" "google_ads_report_zip" {
 69 |   type        = "zip"
 70 |   output_path = ".temp/google_ads_report_source.zip"
 71 |   source_dir  = "../src/google_ads_report"
 72 | }
 73 | data "archive_file" "youtube_channel_zip" {
 74 |   type        = "zip"
 75 |   output_path = ".temp/youtube_channel_source.zip"
 76 |   source_dir  = "../src/youtube_channel/"
 77 | }
 78 | data "archive_file" "google_ads_excluder_zip" {
 79 |   type        = "zip"
 80 |   output_path = ".temp/google_ads_excluder_source.zip"
 81 |   source_dir  = "../src/google_ads_excluder/"
 82 | }
 83 | 
 84 | resource "google_storage_bucket_object" "google_ads_accounts" {
 85 |   name       = "google_ads_accounts_${data.archive_file.google_ads_accounts_zip.output_md5}.zip"
 86 |   bucket     = google_storage_bucket.function_bucket.name
 87 |   source     = data.archive_file.google_ads_accounts_zip.output_path
 88 |   depends_on = [data.archive_file.google_ads_accounts_zip]
 89 | }
 90 | resource "google_storage_bucket_object" "google_ads_report" {
 91 |   name       = "google_ads_report_${data.archive_file.google_ads_report_zip.output_md5}.zip"
 92 |   bucket     = google_storage_bucket.function_bucket.name
 93 |   source     = data.archive_file.google_ads_report_zip.output_path
 94 |   depends_on = [data.archive_file.google_ads_report_zip]
 95 | }
 96 | resource "google_storage_bucket_object" "youtube_channel" {
 97 |   name       = "youtube_channel_${data.archive_file.youtube_channel_zip.output_md5}.zip"
 98 |   bucket     = google_storage_bucket.function_bucket.name
 99 |   source     = data.archive_file.youtube_channel_zip.output_path
100 |   depends_on = [data.archive_file.youtube_channel_zip]
101 | }
102 | resource "google_storage_bucket_object" "google_ads_excluder" {
103 |   name       = "google_ads_excluder_${data.archive_file.google_ads_excluder_zip.output_md5}.zip"
104 |   bucket     = google_storage_bucket.function_bucket.name
105 |   source     = data.archive_file.google_ads_excluder_zip.output_path
106 |   depends_on = [data.archive_file.google_ads_excluder_zip]
107 | }
108 | 
109 | resource "google_cloudfunctions_function" "google_ads_accounts_function" {
110 |   region                = var.region
111 |   name                  = "ape-google_ads_accounts"
112 |   description           = "Identify which reports to run the Google Ads report for."
113 |   runtime               = "python310"
114 |   source_archive_bucket = google_storage_bucket.function_bucket.name
115 |   source_archive_object = google_storage_bucket_object.google_ads_accounts.name
116 |   service_account_email = google_service_account.service_account.email
117 |   timeout               = 540
118 |   available_memory_mb   = 1024
119 |   entry_point           = "main"
120 |   trigger_http          = true
121 | 
122 |   environment_variables = {
123 |     GOOGLE_CLOUD_PROJECT         = var.project_id
124 |     APE_ADS_REPORT_PUBSUB_TOPIC  = google_pubsub_topic.google_ads_report_pubsub_topic.name
125 |   }
126 | }
127 | resource "google_cloudfunctions_function" "google_ads_report_function" {
128 |   region                = var.region
129 |   name                  = "ape-google_ads_report"
130 |   description           = "Move the placement report from Google Ads to BigQuery."
131 |   runtime               = "python310"
132 |   source_archive_bucket = google_storage_bucket.function_bucket.name
133 |   source_archive_object = google_storage_bucket_object.google_ads_report.name
134 |   service_account_email = google_service_account.service_account.email
135 |   timeout               = 540
136 |   available_memory_mb   = 1024
137 |   entry_point           = "main"
138 | 
139 |   event_trigger {
140 |     event_type = "providers/cloud.pubsub/eventTypes/topic.publish"
141 |     resource   = google_pubsub_topic.google_ads_report_pubsub_topic.name
142 |   }
143 | 
144 |   environment_variables = {
145 |     GOOGLE_ADS_USE_PROTO_PLUS    = false
146 |     GOOGLE_ADS_REFRESH_TOKEN     = var.oauth_refresh_token
147 |     GOOGLE_ADS_CLIENT_ID         = var.google_cloud_client_id
148 |     GOOGLE_ADS_CLIENT_SECRET     = var.google_cloud_client_secret
149 |     GOOGLE_ADS_DEVELOPER_TOKEN   = var.google_ads_developer_token
150 |     GOOGLE_ADS_LOGIN_CUSTOMER_ID = var.google_ads_login_customer_id
151 |     GOOGLE_CLOUD_PROJECT         = var.project_id
152 |     APE_GCS_DATA_BUCKET          = google_storage_bucket.ape_data_bucket.name
153 |     APE_YOUTUBE_PUBSUB_TOPIC     = google_pubsub_topic.youtube_pubsub_topic.name
154 |   }
155 | }
156 | resource "google_cloudfunctions_function" "youtube_channel_function" {
157 |   region                = var.region
158 |   name                  = "ape-youtube_channels"
159 |   description           = "Pull the channel data from the YouTube API."
160 |   runtime               = "python310"
161 |   source_archive_bucket = google_storage_bucket.function_bucket.name
162 |   source_archive_object = google_storage_bucket_object.youtube_channel.name
163 |   service_account_email = google_service_account.service_account.email
164 |   timeout               = 540
165 |   available_memory_mb   = 1024
166 |   entry_point           = "main"
167 | 
168 |   event_trigger {
169 |     event_type     = "providers/cloud.pubsub/eventTypes/topic.publish"
170 |     resource       = google_pubsub_topic.youtube_pubsub_topic.name
171 |   }
172 | 
173 |   environment_variables = {
174 |     GOOGLE_CLOUD_PROJECT          = var.project_id
175 |     APE_ADS_EXCLUDER_PUBSUB_TOPIC = google_pubsub_topic.google_ads_excluder_pubsub_topic.name
176 |     APE_BIGQUERY_DATASET          = google_bigquery_dataset.dataset.dataset_id
177 |     APE_GCS_DATA_BUCKET           = google_storage_bucket.ape_data_bucket.name
178 |   }
179 | }
180 | resource "google_cloudfunctions_function" "google_ads_excluder_function" {
181 |   region                = var.region
182 |   name                  = "ape-google_ads_excluder"
183 |   description           = "Exclude the channels in Google Ads"
184 |   runtime               = "python310"
185 |   source_archive_bucket = google_storage_bucket.function_bucket.name
186 |   source_archive_object = google_storage_bucket_object.google_ads_excluder.name
187 |   service_account_email = google_service_account.service_account.email
188 |   timeout               = 540
189 |   available_memory_mb   = 1024
190 |   entry_point           = "main"
191 | 
192 |   event_trigger {
193 |     event_type     = "providers/cloud.pubsub/eventTypes/topic.publish"
194 |     resource       = google_pubsub_topic.google_ads_excluder_pubsub_topic.name
195 |   }
196 | 
197 |   environment_variables = {
198 |     GOOGLE_CLOUD_PROJECT         = var.project_id
199 |     GOOGLE_ADS_USE_PROTO_PLUS    = false
200 |     GOOGLE_ADS_REFRESH_TOKEN     = var.oauth_refresh_token
201 |     GOOGLE_ADS_CLIENT_ID         = var.google_cloud_client_id
202 |     GOOGLE_ADS_CLIENT_SECRET     = var.google_cloud_client_secret
203 |     GOOGLE_ADS_DEVELOPER_TOKEN   = var.google_ads_developer_token
204 |     GOOGLE_ADS_LOGIN_CUSTOMER_ID = var.google_ads_login_customer_id
205 |     APE_BIGQUERY_DATASET         = google_bigquery_dataset.dataset.dataset_id
206 |     APE_GCS_DATA_BUCKET          = google_storage_bucket.ape_data_bucket.name
207 |   }
208 | }
209 | 
210 | # BIGQUERY ---------------------------------------------------------------------
211 | resource "google_bigquery_dataset" "dataset" {
212 |   dataset_id                  = var.bq_dataset
213 |   location                    = var.region
214 |   description                 = "Ads Placement Excluder BQ Dataset"
215 |   delete_contents_on_destroy  = true
216 | }
217 | resource "google_bigquery_table" "google_ads_report_table" {
218 |   dataset_id          = google_bigquery_dataset.dataset.dataset_id
219 |   table_id            = "GoogleAdsReport"
220 |   deletion_protection = false
221 | 
222 |   external_data_configuration {
223 |     autodetect    = false
224 |     source_format = "CSV"
225 |     source_uris   = [
226 |       "gs://${google_storage_bucket.ape_data_bucket.name}/google_ads_report/*.csv"
227 |     ]
228 |     schema        = file("../src/google_ads_report/bq_schema.json")
229 |     csv_options {
230 |       quote             = ""
231 |       skip_leading_rows = "1"
232 |     }
233 |   }
234 | }
235 | resource "google_bigquery_table" "youtube_channel_table" {
236 |   dataset_id          = google_bigquery_dataset.dataset.dataset_id
237 |   table_id            = "YouTubeChannel"
238 |   deletion_protection = false
239 | 
240 |   external_data_configuration {
241 |     autodetect    = false
242 |     source_format = "CSV"
243 |     source_uris   = [
244 |       "gs://${google_storage_bucket.ape_data_bucket.name}/youtube_channel/*.csv"
245 |     ]
246 |     schema        = file("../src/youtube_channel/bq_schema.json")
247 |     csv_options {
248 |       quote             = ""
249 |       skip_leading_rows = "1"
250 |     }
251 |   }
252 | }
253 | resource "google_bigquery_table" "google_ads_exclusions_table" {
254 |   dataset_id          = google_bigquery_dataset.dataset.dataset_id
255 |   table_id            = "GoogleAdsExclusion"
256 |   deletion_protection = false
257 | 
258 |   external_data_configuration {
259 |     autodetect    = false
260 |     source_format = "CSV"
261 |     source_uris   = [
262 |       "gs://${google_storage_bucket.ape_data_bucket.name}/google_ads_exclusion/*.csv"
263 |     ]
264 |     schema        = file("../src/google_ads_excluder/bq_schema.json")
265 |     csv_options {
266 |       quote             = ""
267 |       skip_leading_rows = "1"
268 |     }
269 |   }
270 | }
271 | resource "google_bigquery_table" "exclusions_report" {
272 |   dataset_id          = google_bigquery_dataset.dataset.dataset_id
273 |   table_id            = "ViewExclusions"
274 |   deletion_protection = false
275 |   depends_on          = [
276 |     google_bigquery_dataset.dataset,
277 |     google_bigquery_table.google_ads_report_table,
278 |     google_bigquery_table.youtube_channel_table,
279 |     google_bigquery_table.google_ads_exclusions_table
280 |   ]
281 |   view {
282 |     query = templatefile(
283 |     "../src/reporting/exclusions_report.sql",
284 |     {
285 |       BQ_DATASET = google_bigquery_dataset.dataset.dataset_id
286 |     }
287 |     )
288 |     use_legacy_sql = false
289 |   }
290 | }
291 | 
292 | # PUB/SUB ----------------------------------------------------------------------
293 | resource "google_pubsub_topic" "google_ads_report_pubsub_topic" {
294 |   name                       = "ape-google-ads-report-topic"
295 |   message_retention_duration = "604800s"
296 | }
297 | resource "google_pubsub_topic" "youtube_pubsub_topic" {
298 |   name                       = "ape-youtube-channel-topic"
299 |   message_retention_duration = "604800s"
300 | }
301 | resource "google_pubsub_topic" "google_ads_excluder_pubsub_topic" {
302 |   name                       = "ape-google-ads-excluder-topic"
303 |   message_retention_duration = "604800s"
304 | }
305 | 
306 | # CLOUD_SCHEDULER --------------------------------------------------------------
307 | locals {
308 |   scheduler_body = <<EOF
309 |     {
310 |         "sheet_id": "${var.config_sheet_id}"
311 |     }
312 |     EOF
313 | }
314 | resource "google_cloud_scheduler_job" "ape_scheduler" {
315 |   name             = "ads_placement_excluder"
316 |   description      = "Run the Ads Placement Excluder pipeline"
317 |   schedule         = "0 * * * *"
318 |   time_zone        = "Etc/UTC"
319 |   attempt_deadline = "320s"
320 |   region           = var.region
321 | 
322 |   http_target {
323 |     http_method = "POST"
324 |     uri         = google_cloudfunctions_function.google_ads_accounts_function.https_trigger_url
325 |     body        = base64encode(local.scheduler_body)
326 |     headers     = {
327 |       "Content-Type" = "application/json"
328 |     }
329 |     oidc_token {
330 |       service_account_email = google_service_account.service_account.email
331 |     }
332 |   }
333 | }
334 | 


--------------------------------------------------------------------------------
/src/youtube_channel/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     https://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Pull YouTube data for the placements in the Google Ads report."""
 15 | import base64
 16 | from datetime import datetime
 17 | import json
 18 | import logging
 19 | import math
 20 | import os
 21 | import sys
 22 | from typing import Any, Dict, List, Tuple
 23 | import uuid
 24 | import google.auth
 25 | import google.auth.credentials
 26 | from google.cloud import bigquery
 27 | from google.cloud import translate_v2 as translate
 28 | from googleapiclient.discovery import build
 29 | import jsonschema
 30 | import pandas as pd
 31 | import numpy as np
 32 | from utils import gcs
 33 | from utils import pubsub
 34 | 
 35 | 
 36 | logging.basicConfig(stream=sys.stdout)
 37 | logger = logging.getLogger(__name__)
 38 | logger.setLevel(logging.INFO)
 39 | 
 40 | # The Google Cloud project
 41 | GOOGLE_CLOUD_PROJECT = os.environ.get('GOOGLE_CLOUD_PROJECT')
 42 | # The bucket to write the data to
 43 | APE_GCS_DATA_BUCKET = os.environ.get('APE_GCS_DATA_BUCKET')
 44 | # The name of the BigQuery Dataset
 45 | BQ_DATASET = os.environ.get('APE_BIGQUERY_DATASET')
 46 | # The pub/sub topic to send the success message to
 47 | APE_ADS_EXCLUDER_PUBSUB_TOPIC = os.environ.get('APE_ADS_EXCLUDER_PUBSUB_TOPIC')
 48 | # Optional variable to specify the problematic CSV characters. This is an env
 49 | # variable, so if any other characters come up they can be replaced in the
 50 | # Cloud Function UI without redeploying the solution.
 51 | APE_CSV_PROBLEM_CHARACTERS_REGEX = os.environ.get(
 52 |     'APE_CSV_PROBLEM_CHARACTERS', r'\$|\"|\'|\r|\n|\t|,|;|:')
 53 | 
 54 | # The access scopes used in this function
 55 | SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
 56 | 
 57 | # The schema of the JSON in the event payload
 58 | message_schema = {
 59 |     'type': 'object',
 60 |     'properties': {
 61 |         'sheet_id': {'type': 'string'},
 62 |         'customer_id': {'type': 'string'},
 63 |     },
 64 |     'required': ['sheet_id', 'customer_id', ]
 65 | }
 66 | 
 67 | 
 68 | def main(event: Dict[str, Any], context: Dict[str, Any]) -> None:
 69 |     """The entry point: extract the data from the payload and starts the job.
 70 | 
 71 |     The pub/sub message must match the message_schema object above.
 72 | 
 73 |     Args:
 74 |         event: A dictionary representing the event data payload.
 75 |         context: An object containing metadata about the event.
 76 | 
 77 |     Raises:
 78 |         jsonschema.exceptions.ValidationError if the message from pub/sub is not
 79 |         what is expected.
 80 |     """
 81 |     del context
 82 |     logger.info('YouTube channel service triggered.')
 83 |     logger.info('Message: %s', event)
 84 |     message = base64.b64decode(event['data']).decode('utf-8')
 85 |     logger.info('Decoded message: %s', message)
 86 |     message_json = json.loads(message)
 87 |     logger.info('JSON message: %s', message_json)
 88 | 
 89 |     # Will raise jsonschema.exceptions.ValidationError if the schema is invalid
 90 |     jsonschema.validate(instance=message_json, schema=message_schema)
 91 | 
 92 |     run(message_json.get('customer_id'), message_json.get('sheet_id'))
 93 | 
 94 |     logger.info('Done')
 95 | 
 96 | 
 97 | def run(customer_id: str, sheet_id: str) -> None:
 98 |     """Orchestration to pull YouTube data and output it to BigQuery.
 99 | 
100 |     Args:
101 |         customer_id: the Google Ads customer ID to process.
102 |         sheet_id: the ID of the Google Sheet containing the config.
103 |     """
104 |     credentials = get_auth_credentials()
105 |     channel_ids = get_placements_query(customer_id, credentials)
106 |     if len(channel_ids) > 0:
107 |         get_youtube_dataframe(channel_ids, sheet_id, customer_id, credentials)
108 |     else:
109 |         logger.info('No channel IDs to process')
110 |     send_messages_to_pubsub(customer_id, sheet_id)
111 |     logger.info('Done')
112 | 
113 | 
114 | def get_auth_credentials() -> google.auth.credentials.Credentials:
115 |     """Return credentials for Google APIs."""
116 |     credentials, project_id = google.auth.default()
117 |     return credentials
118 | 
119 | 
120 | def get_placements_query(
121 |         customer_id: str,
122 |         credentials: google.auth.credentials.Credentials
123 | ) -> List[str]:
124 |     """Get the placements from the Google Ads report in BigQuery.
125 | 
126 |     Args:
127 |         customer_id: the Google Ads customer ID to process.
128 |         credentials: Google Auth credentials
129 | 
130 |     Returns:
131 |         A list of placement IDs that need to be pulled from YouTube
132 |     """
133 |     logger.info('Getting Placements from Google Ads')
134 |     logger.info('Connecting to: %s BigQuery', GOOGLE_CLOUD_PROJECT)
135 |     client = bigquery.Client(
136 |         project=GOOGLE_CLOUD_PROJECT, credentials=credentials)
137 | 
138 |     query = f"""
139 |         SELECT DISTINCT
140 |             Ads.channel_id
141 |         FROM
142 |             `{BQ_DATASET}.GoogleAdsReport` AS Ads
143 |         LEFT JOIN
144 |             `{BQ_DATASET}.YouTubeChannel` AS YouTube
145 |             USING(channel_id)
146 |         WHERE
147 |             Ads.customer_id = "{customer_id}"
148 |             AND YouTube.channel_id IS NULL
149 |     """
150 |     logger.info('Running query: %s', query)
151 |     rows = client.query(query).result()
152 |     channel_ids = []
153 |     for row in rows:
154 |         channel_ids.append(row.channel_id)
155 |     logger.info('Received %s channel_ids', len(channel_ids))
156 |     return channel_ids
157 | 
158 | 
159 | def get_youtube_dataframe(
160 |         channel_ids: List[str],
161 |         sheet_id: str,
162 |         customer_id: str,
163 |         credentials: google.auth.credentials.Credentials
164 | ) -> None:
165 |     """Pull information on each of the channels provide from the YouTube API.
166 | 
167 |     The YouTube API only allows pulling up to 50 channels in each request, so
168 |     multiple requests have to be made to pull all the data. See the docs for
169 |     more details:
170 |     https://developers.google.com/youtube/v3/docs/channels/list
171 | 
172 |     Args:
173 |         channel_ids: the channel IDs to pull the info on from YouTube
174 |         sheet_id: the ID of the Google Sheet containing the config.
175 |         customer_id: the Google Ads customer ID to process.
176 |         credentials: Google Auth credentials
177 |     """
178 |     logger.info('Getting YouTube data for channel IDs')
179 |     # Maximum number of channels per YouTube request. See:
180 |     # https://developers.google.com/youtube/v3/docs/channels/list
181 |     chunk_size = 50
182 |     chunks = split_list_to_chunks(channel_ids, chunk_size)
183 |     number_of_chunks = len(chunks)
184 | 
185 |     logger.info('Connecting to the youtube API')
186 |     youtube = build('youtube', 'v3', credentials=credentials)
187 |     is_translated = get_translate_filter(sheet_id, credentials)
188 | 
189 |     for i, chunk in enumerate(chunks):
190 |         logger.info(f'Processing chunk {i + 1} of {number_of_chunks}')
191 |         chunk_list = list(chunk)
192 |         request = youtube.channels().list(
193 |             part='id, statistics, snippet, brandingSettings',
194 |             id=chunk_list,
195 |             maxResults=chunk_size)
196 |         response = request.execute()
197 |         channels = process_youtube_response(response, chunk_list, is_translated)
198 |         youtube_df = pd.DataFrame(channels, columns=[
199 |             'channel_id',
200 |             'view_count',
201 |             'video_count',
202 |             'subscriber_count',
203 |             'title',
204 |             'title_language',
205 |             'title_language_confidence',
206 |             'country',
207 |         ])
208 |         youtube_df['datetime_updated'] = datetime.now()
209 |         youtube_df = sanitise_youtube_dataframe(youtube_df)
210 |         write_results_to_gcs(youtube_df, customer_id)
211 |     logger.info('YouTube channel info complete')
212 | 
213 | 
214 | def sanitise_youtube_dataframe(youtube_df: pd.DataFrame) -> pd.DataFrame:
215 |     """Takes the dataframe from YouTube and sanitises it to write as a CSV.
216 | 
217 |     Args:
218 |         youtube_df: the dataframe containing the YouTube data
219 | 
220 |     Returns:
221 |         The YouTube dataframe but sanitised to be safe to write to a CSV.
222 |     """
223 |     youtube_df = youtube_df.astype({
224 |         'view_count': 'int',
225 |         'video_count': 'int',
226 |         'subscriber_count': 'int',
227 |         'title_language_confidence': 'float',
228 |     })
229 |     # remove problematic characters from the title field as the break BigQuery
230 |     # even when escaped in the CSV
231 |     youtube_df['title'] = youtube_df['title'].str.replace(
232 |         APE_CSV_PROBLEM_CHARACTERS_REGEX, '', regex=True)
233 |     youtube_df['title'] = youtube_df['title'].str.strip()
234 |     return youtube_df
235 | 
236 | 
237 | def split_list_to_chunks(
238 |         lst: List[Any], max_size_of_chunk: int) -> List[np.ndarray]:
239 |     """Split the list into X chunks with the maximum size as specified.
240 | 
241 |     Args:
242 |         lst: The list to be split into chunks
243 |         max_size_of_chunk: the maximum number of elements that should be in a
244 |             chunk.
245 | 
246 |     Returns:
247 |         A list containing numpy array chunks of the original list.
248 |     """
249 |     logger.info('Splitting list into chunks')
250 |     num_of_chunks = math.ceil(len(lst) / max_size_of_chunk)
251 |     chunks = np.array_split(lst, num_of_chunks)
252 |     logger.info('Split list into %i chunks', num_of_chunks)
253 |     return chunks
254 | 
255 | 
256 | def process_youtube_response(
257 |         response: Dict[str, Any],
258 |         channel_ids: List[str],
259 |         is_translated: bool,
260 | ) -> List[List[Any]]:
261 |     """Process the YouTube response to extract the required information.
262 | 
263 |     Args:
264 |         response: The YouTube channels list response
265 |             https://developers.google.com/youtube/v3/docs/channels/list#response
266 |         channel_ids: A list of the channel IDs passed in the request
267 |         is_translated: A flag showing whether YouTube channel title should be translated or not
268 | 
269 |     Returns:
270 |         A list of dicts where each dict represents data from one channel
271 |     """
272 |     logger.info('Processing youtube response')
273 |     data = []
274 |     if response.get('pageInfo').get('totalResults') == 0:
275 |         logger.warning('The YouTube response has no results: %s', response)
276 |         logger.warning(channel_ids)
277 |         return data
278 | 
279 |     for channel in response['items']:
280 |         title = channel.get('snippet').get('title', '')
281 |         if is_translated:
282 |             title_language, confidence = detect_language(title)
283 |         else:
284 |             title_language = ''
285 |             confidence = 0
286 |         data.append([
287 |             channel.get('id'),
288 |             channel.get('statistics').get('viewCount', None),
289 |             channel.get('statistics').get('subscriberCount', None),
290 |             channel.get('statistics').get('videoCount', None),
291 |             title,
292 |             title_language,
293 |             confidence,
294 |             channel.get('snippet').get('country', ''),
295 |         ])
296 |     return data
297 | 
298 | 
299 | def get_translate_filter(
300 |         sheet_id: str,
301 |         credentials: google.auth.credentials.Credentials
302 | ) -> bool:
303 |     """Get the filter for YouTube channel title translation.
304 | 
305 |     Args:
306 |         sheet_id: the ID of the Google Sheet containing the config.
307 |         credentials: Google Auth credentials
308 | 
309 |     Returns:
310 |         True if filter is enabled, False otherwise
311 |     """
312 |     logger.info('Getting config from sheet %s', sheet_id)
313 | 
314 |     sheets_service = build('sheets', 'v4', credentials=credentials)
315 |     sheet = sheets_service.spreadsheets()
316 | 
317 |     result = sheet.values().get(
318 |         spreadsheetId=sheet_id,
319 |         range='yt_translation_filter').execute().get('values', [['Disabled']])[0][0]
320 | 
321 |     is_enabled = True if result == 'Enabled' else False
322 |     logger.info('Translation filter enabled is %s', is_enabled)
323 | 
324 |     return is_enabled
325 | 
326 | 
327 | def detect_language(text: str) -> Tuple[str, float]:
328 |     """Detects the text's language.
329 | 
330 |     Args:
331 |         text: the text to base the translation off of
332 | 
333 |     Returns:
334 |         A tuple containing the language and the confidence.
335 |     """
336 |     logger.debug('Detecting language for %s', text)
337 |     translate_client = translate.Client()
338 |     result = translate_client.detect_language(text)
339 |     return result['language'], result['confidence']
340 | 
341 | 
342 | def write_results_to_gcs(youtube_df: pd.DataFrame, customer_id: str) -> None:
343 |     """Write the YouTube dataframe to GCS as a CSV file.
344 | 
345 |     Historical data is preserved so all file writes have a UUID appended to it.
346 | 
347 |     Args:
348 |         youtube_df: the dataframe based on the YouTube data.
349 |         customer_id: the customer ID to fetch the Google Ads data for.
350 |     """
351 |     logger.info('Writing results to GCS: %s', APE_GCS_DATA_BUCKET)
352 |     number_of_rows = len(youtube_df.index)
353 |     logger.info('There are %s rows', number_of_rows)
354 |     if number_of_rows > 0:
355 |         uuid_str = str(uuid.uuid4())
356 |         blob_name = f'youtube_channel/{customer_id}-{uuid_str}.csv'
357 |         logger.info('Blob name: %s', blob_name)
358 |         gcs.upload_blob_from_df(
359 |             df=youtube_df,
360 |             blob_name=blob_name,
361 |             bucket=APE_GCS_DATA_BUCKET)
362 |         logger.info('Blob uploaded to GCS')
363 |     else:
364 |         logger.info('There is nothing to write to GCS')
365 | 
366 | 
367 | def send_messages_to_pubsub(customer_id: str, sheet_id: str) -> None:
368 |     """Push the customer ID to pub/sub when the job completes.
369 | 
370 |     Args:
371 |         customer_id: the customer ID to fetch the Google Ads data for.
372 |         sheet_id: the ID of the Google Sheet containing the config.
373 |     """
374 |     message_dict = {
375 |         'customer_id': customer_id,
376 |         'sheet_id': sheet_id,
377 |     }
378 |     logger.info('Sending message to pub/sub:', message_dict)
379 |     pubsub.send_dict_to_pubsub(
380 |         message_dict=message_dict,
381 |         topic=APE_ADS_EXCLUDER_PUBSUB_TOPIC,
382 |         gcp_project=GOOGLE_CLOUD_PROJECT)
383 |     logger.info('Message published')
384 | 


--------------------------------------------------------------------------------