├── src
├── google_ads_report
│ ├── __init__.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── pubsub.py
│ │ └── gcs.py
│ ├── requirements_dev.txt
│ ├── requirements.txt
│ ├── bq_schema.json
│ ├── README.md
│ ├── main_test.py
│ └── main.py
├── youtube_channel
│ ├── __init__.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── pubsub.py
│ │ └── gcs.py
│ ├── requirements_dev.txt
│ ├── requirements.txt
│ ├── bq_schema.json
│ ├── main_test.py
│ ├── README.md
│ └── main.py
├── google_ads_accounts
│ ├── __init__.py
│ ├── utils
│ │ ├── __init__.py
│ │ └── pubsub.py
│ ├── requirements_dev.txt
│ ├── requirements.txt
│ ├── main_test.py
│ ├── README.md
│ └── main.py
├── google_ads_excluder
│ ├── __init__.py
│ ├── utils
│ │ ├── __init__.py
│ │ └── gcs.py
│ ├── requirements_dev.txt
│ ├── requirements.txt
│ ├── bq_schema.json
│ ├── README.md
│ └── main.py
└── reporting
│ ├── README.md
│ └── exclusions_report.sql
├── terraform
├── backend.tf
├── outputs.tf
├── variables.tf
└── main.tf
├── docs
├── images
│ ├── cloud-shell.png
│ ├── oauth-configuration.png
│ ├── ape-architecture-diagram.png
│ ├── ape-datastudio-report-example.png
│ ├── ape-account-service-architecture-diagram.png
│ ├── ape-report-service-architecture-diagram.png
│ ├── ape-youtube-service-architecture-diagram.png
│ └── ape-excluder-service-architecture-diagram.png
├── reporting.md
├── deployment.md
└── architecture.md
├── .gitignore
├── contributing.md
├── README.md
└── LICENSE
/src/google_ads_report/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/youtube_channel/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/google_ads_accounts/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/google_ads_excluder/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/youtube_channel/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/google_ads_accounts/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/google_ads_excluder/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/google_ads_report/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/google_ads_report/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 |
3 | functions-framework==3.1.0
4 |
--------------------------------------------------------------------------------
/src/youtube_channel/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 |
3 | functions-framework==3.1.0
4 |
--------------------------------------------------------------------------------
/src/google_ads_accounts/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 |
3 | functions-framework==3.1.0
4 |
--------------------------------------------------------------------------------
/src/google_ads_excluder/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 |
3 | functions-framework==3.1.0
4 |
--------------------------------------------------------------------------------
/terraform/backend.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | backend "gcs" {
3 | prefix = "terraform/state"
4 | }
5 | }
6 |
--------------------------------------------------------------------------------
/docs/images/cloud-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/cloud-shell.png
--------------------------------------------------------------------------------
/terraform/outputs.tf:
--------------------------------------------------------------------------------
1 | output "service_account_email" {
2 | value = google_service_account.service_account.email
3 | }
4 |
--------------------------------------------------------------------------------
/docs/images/oauth-configuration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/oauth-configuration.png
--------------------------------------------------------------------------------
/docs/images/ape-architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-architecture-diagram.png
--------------------------------------------------------------------------------
/docs/images/ape-datastudio-report-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-datastudio-report-example.png
--------------------------------------------------------------------------------
/src/google_ads_report/requirements.txt:
--------------------------------------------------------------------------------
1 | google-ads==18.0.0
2 | google-cloud-pubsub==2.13.4
3 | google-cloud-storage==2.5.0
4 | jsonschema==4.9.1
5 | pandas==1.4.3
6 |
--------------------------------------------------------------------------------
/docs/images/ape-account-service-architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-account-service-architecture-diagram.png
--------------------------------------------------------------------------------
/docs/images/ape-report-service-architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-report-service-architecture-diagram.png
--------------------------------------------------------------------------------
/docs/images/ape-youtube-service-architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-youtube-service-architecture-diagram.png
--------------------------------------------------------------------------------
/docs/images/ape-excluder-service-architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ads-placement-excluder/main/docs/images/ape-excluder-service-architecture-diagram.png
--------------------------------------------------------------------------------
/src/google_ads_accounts/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask==2.1.3
2 | google-auth-httplib2==0.1.0
3 | google-auth-oauthlib==0.5.2
4 | google-api-python-client==2.55.0
5 | google-cloud-pubsub==2.13.4
6 | jsonschema==4.9.1
7 | pydata-google-auth==1.4.0
8 |
--------------------------------------------------------------------------------
/src/google_ads_excluder/requirements.txt:
--------------------------------------------------------------------------------
1 | google-ads==18.0.0
2 | google-auth-httplib2==0.1.0
3 | google-auth-oauthlib==0.5.2
4 | google-api-python-client==2.55.0
5 | google-cloud-bigquery==3.3.0
6 | google-cloud-pubsub==2.13.4
7 | google-cloud-storage==2.2.1
8 | jsonschema==4.9.1
9 | pandas==1.4.3
10 |
--------------------------------------------------------------------------------
/src/youtube_channel/requirements.txt:
--------------------------------------------------------------------------------
1 | google-api-python-client==2.55.0
2 | google-auth==1.35.0
3 | google-auth-httplib2==0.1.0
4 | google-auth-oauthlib==0.5.2
5 | google-cloud-bigquery==3.3.2
6 | google-cloud-pubsub==2.13.6
7 | google-cloud-storage==2.2.1
8 | google-cloud-translate==2.0.1
9 | jsonschema==4.9.1
10 | numpy==1.23.1
11 | pandas==1.4.3
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Temp deployment
2 | .temp/
3 | .zip
4 |
5 | # Virtual envs
6 | env/
7 | venv/
8 | venvs/
9 |
10 | # IDEs
11 | .idea/
12 |
13 | # Terraform
14 | **/*.tfvars
15 | **/.terraform/*
16 | *.tfstate
17 | *.tfstate.*
18 | .terraform.lock.hcl
19 | out/
20 |
21 | # Python
22 | __pycache__
23 |
24 | # Google Cloud credentials
25 | creds.json
26 |
27 | # OS
28 | .DS_STORE
29 |
--------------------------------------------------------------------------------
/src/google_ads_excluder/bq_schema.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "name": "channel_id",
4 | "type": "STRING",
5 | "mode": "REQUIRED",
6 | "description": "The YouTube Channel ID"
7 | },
8 | {
9 | "name": "customer_id",
10 | "type": "STRING",
11 | "mode": "REQUIRED",
12 | "description": "The Google Ads Customer ID where this placement originated"
13 | },
14 | {
15 | "name": "datetime_updated",
16 | "type": "TIMESTAMP",
17 | "mode": "REQUIRED",
18 | "description": "The datetime the exclusion was made"
19 | }
20 | ]
21 |
--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement (CLA). You (or your employer) retain the copyright to your
10 | contribution; this simply gives us permission to use and redistribute your
11 | contributions as part of the project. Head over to
12 | to see your current agreements on file or
13 | to sign a new one.
14 |
15 | You generally only need to submit a CLA once, so if you've already submitted one
16 | (even if it was for a different project), you probably don't need to do it
17 | again.
18 |
19 | ## Code Reviews
20 |
21 | All submissions, including submissions by project members, require review. We
22 | use GitHub pull requests for this purpose. Consult
23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
24 | information on using pull requests.
25 |
26 | ## Community Guidelines
27 |
28 | This project follows
29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
30 |
--------------------------------------------------------------------------------
/terraform/variables.tf:
--------------------------------------------------------------------------------
1 | variable "project_id" {
2 | type = string
3 | description = "The project ID to deploy the resources to"
4 | }
5 |
6 | variable "region" {
7 | type = string
8 | description = "The region to deploy the resources to, e.g. europe-west2"
9 | default = "europe-west2"
10 | }
11 |
12 | variable "oauth_refresh_token" {
13 | type = string
14 | description = "The OAuth refresh token"
15 | }
16 |
17 | variable "google_cloud_client_id" {
18 | type = string
19 | description = "The client ID from Google Cloud"
20 | }
21 |
22 | variable "google_cloud_client_secret" {
23 | type = string
24 | description = "The client secret from Google Cloud"
25 | }
26 |
27 | variable "google_ads_developer_token" {
28 | type = string
29 | description = "The Google Ads developer token"
30 | }
31 |
32 | variable "google_ads_login_customer_id" {
33 | type = string
34 | description = "The Google Ads MCC customer ID with no dashes"
35 | }
36 |
37 | variable "config_sheet_id" {
38 | type = string
39 | description = "The Google Sheeet ID containing the config"
40 | }
41 |
42 | variable "bq_dataset" {
43 | type = string
44 | description = "The name of the BQ dataset"
45 | default = "ads_placement_excluder"
46 | }
47 |
--------------------------------------------------------------------------------
/src/reporting/README.md:
--------------------------------------------------------------------------------
1 | # Ads Placement Excluder Reporting
2 |
3 | This code is used to build a DataStudio dashboard to provide visibility into the
4 | Ads Placement Excluder solution.
5 |
6 | ## Disclaimers
7 | __This is not an officially supported Google product.__
8 |
9 | Copyright 2022 Google LLC. This solution, including any related sample code or
10 | data, is made available on an “as is,” “as available,” and “with all faults”
11 | basis, solely for illustrative purposes, and without warranty or representation
12 | of any kind. This solution is experimental, unsupported and provided solely for
13 | your convenience. Your use of it is subject to your agreements with Google, as
14 | applicable, and may constitute a beta feature as defined under those agreements.
15 | To the extent that you make any data available to Google in connection with your
16 | use of the solution, you represent and warrant that you have all necessary and
17 | appropriate rights, consents and permissions to permit Google to use and process
18 | that data. By using any portion of this solution, you acknowledge, assume and
19 | accept all risks, known and unknown, associated with its usage, including with
20 | respect to your deployment of any portion of this solution in your systems, or
21 | usage in connection with your business, if at all.
22 |
--------------------------------------------------------------------------------
/src/youtube_channel/bq_schema.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "name": "channel_id",
4 | "type": "STRING",
5 | "mode": "REQUIRED",
6 | "description": "The YouTube Channel ID"
7 | },
8 | {
9 | "name": "view_count",
10 | "type": "INT64",
11 | "mode": "NULLABLE",
12 | "description": "The number of views the channel has"
13 | },
14 | {
15 | "name": "video_count",
16 | "type": "INT64",
17 | "mode": "NULLABLE",
18 | "description": "The number of videos the channel has uploaded to it"
19 | },
20 | {
21 | "name": "subscriber_count",
22 | "type": "INT64",
23 | "mode": "NULLABLE",
24 | "description": "The number of subscribers the channel has"
25 | },
26 | {
27 | "name": "title",
28 | "type": "STRING",
29 | "mode": "NULLABLE",
30 | "description": "The title of the YouTube channel"
31 | },
32 | {
33 | "name": "title_language",
34 | "type": "STRING",
35 | "mode": "NULLABLE",
36 | "description": "The predicted language of the title"
37 | },
38 | {
39 | "name": "title_language_confidence",
40 | "type": "FLOAT64",
41 | "mode": "NULLABLE",
42 | "description": "The confidence of the prediction"
43 | },
44 | {
45 | "name": "country",
46 | "type": "STRING",
47 | "mode": "NULLABLE",
48 | "description": "The country the channel is from"
49 | },
50 | {
51 | "name": "datetime_updated",
52 | "type": "TIMESTAMP",
53 | "mode": "REQUIRED",
54 | "description": "The datetime the data was pulled from YouTube"
55 | }
56 | ]
57 |
--------------------------------------------------------------------------------
/src/youtube_channel/utils/pubsub.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for sending messages to Pub/sub."""
15 | import json
16 | from typing import Any, Dict
17 | from google.cloud import pubsub_v1
18 |
19 |
20 | def send_dict_to_pubsub(message_dict: Dict[str, Any],
21 | topic: str,
22 | gcp_project: str) -> None:
23 | """Push the dictionary to pubsub.
24 |
25 | Args:
26 | message_dict: the message as a dictionary to push to pubsub
27 | topic: the name of the topic to publish the message to
28 | gcp_project: the Google Cloud Project with the pub/sub topic in
29 | """
30 |
31 | publisher = pubsub_v1.PublisherClient()
32 | # The `topic_path` method creates a fully qualified identifier
33 | # in the form `projects/{project_id}/topics/{topic_id}`
34 | topic_path = publisher.topic_path(gcp_project, topic)
35 | message_str = json.dumps(message_dict)
36 | # Data must be a bytestring
37 | data = message_str.encode('utf-8')
38 | publisher.publish(topic_path, data)
39 |
--------------------------------------------------------------------------------
/src/google_ads_report/utils/pubsub.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for sending messages to Pub/sub."""
15 | import json
16 | from typing import Any, Dict
17 | from google.cloud import pubsub_v1
18 |
19 |
20 | def send_dict_to_pubsub(message_dict: Dict[str, Any],
21 | topic: str,
22 | gcp_project: str) -> None:
23 | """Push the dictionary to pubsub.
24 |
25 | Args:
26 | message_dict: the message as a dictionary to push to pubsub
27 | topic: the name of the topic to publish the message to
28 | gcp_project: the Google Cloud Project with the pub/sub topic in
29 | """
30 |
31 | publisher = pubsub_v1.PublisherClient()
32 | # The `topic_path` method creates a fully qualified identifier
33 | # in the form `projects/{project_id}/topics/{topic_id}`
34 | topic_path = publisher.topic_path(gcp_project, topic)
35 | message_str = json.dumps(message_dict)
36 | # Data must be a bytestring
37 | data = message_str.encode('utf-8')
38 | publisher.publish(topic_path, data)
39 |
--------------------------------------------------------------------------------
/src/reporting/exclusions_report.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | -- Remove duplicate rows from YouTube, pulling only the last updated data
16 | WITH
17 | YouTube AS (
18 | SELECT *
19 | FROM `${BQ_DATASET}.YouTubeChannel`
20 | WHERE true
21 | QUALIFY ROW_NUMBER() OVER (PARTITION BY channel_id ORDER BY datetime_updated DESC) = 1
22 | )
23 | SELECT DISTINCT
24 | Excluded.datetime_updated AS excluded_datetime,
25 | Excluded.channel_id,
26 | Ads.placement_target_url,
27 | Excluded.customer_id,
28 | YouTube.view_count,
29 | YouTube.video_count,
30 | YouTube.subscriber_count,
31 | YouTube.title,
32 | YouTube.title_language,
33 | YouTube.title_language_confidence,
34 | YouTube.country,
35 | Ads.impressions,
36 | Ads.cost_micros,
37 | Ads.conversions,
38 | Ads.video_view_rate,
39 | Ads.video_views,
40 | Ads.clicks,
41 | Ads.average_cpm,
42 | Ads.ctr,
43 | Ads.all_conversions_from_interactions_rate,
44 | FROM
45 | `${BQ_DATASET}.GoogleAdsExclusion` AS Excluded
46 | LEFT JOIN
47 | YouTube USING (channel_id)
48 | LEFT JOIN
49 | `${BQ_DATASET}.GoogleAdsReport` AS Ads
50 | USING (channel_id, customer_id)
51 |
--------------------------------------------------------------------------------
/src/google_ads_accounts/main_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Unit tests for main.py"""
15 | import unittest
16 | from unittest.mock import MagicMock, patch
17 | import main
18 |
19 |
20 | class MainTestCase(unittest.TestCase):
21 |
22 | @patch('main.run')
23 | def test_main(self, mock_run):
24 | mock_request = MagicMock()
25 | mock_request.get_json.return_value = {}
26 | response = main.main(mock_request)
27 | self.assertEqual(response.status_code, 400)
28 | mock_run.assert_not_called()
29 | mock_request.get_json.return_value = {
30 | 'sheet_id': '12345',
31 | }
32 | response = main.main(mock_request)
33 | self.assertEqual(response.status_code, 200)
34 | mock_run.assert_called_once()
35 |
36 | def test_gads_filters_to_sql_string(self):
37 | config_filters = [['impressions', '>', '1']]
38 | gaql = main.gads_filters_to_gaql_string(config_filters)
39 | self.assertEqual(gaql, 'metrics.impressions > 1')
40 |
41 | config_filters = [['impressions', '>', '1'], ['clicks', '<', '50']]
42 | gaql = main.gads_filters_to_gaql_string(config_filters)
43 | self.assertEqual(gaql,
44 | 'metrics.impressions > 1 AND metrics.clicks < 50')
45 |
46 |
47 | if __name__ == '__main__':
48 | unittest.main()
49 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Ads Placement Excluder
2 |
3 | It is manual and challenging to detect YouTube channel placements which might be
4 | spam (low performance with high cost), and exclude them from future advertising.
5 | Google Ads does not currently provide enough granularity to identify all spam
6 | channels.
7 |
8 | Ads Placement Excluder allows an advertiser, to define what their interpretation
9 | of a spam channel is, and it will leverage the Google Ads & YouTube APIs to
10 | automate identifying these placements, and exclude them from future advertising.
11 |
12 | ## Architecture
13 | See [architecture.md](./docs/architecture.md).
14 |
15 | ## Reporting
16 | The solution provides a DataStudio dashboard to monitor the solution. See
17 | [reporting.md](./docs/reporting.md) for more information.
18 |
19 | ## Get Started
20 | See [deployment.md](./docs/deployment.md) for information on how to deploy the
21 | solution and get started.
22 |
23 | ## Disclaimers
24 | __This is not an officially supported Google product.__
25 |
26 | Copyright 2022 Google LLC. This solution, including any related sample code or
27 | data, is made available on an “as is,” “as available,” and “with all faults”
28 | basis, solely for illustrative purposes, and without warranty or representation
29 | of any kind. This solution is experimental, unsupported and provided solely for
30 | your convenience. Your use of it is subject to your agreements with Google, as
31 | applicable, and may constitute a beta feature as defined under those agreements.
32 | To the extent that you make any data available to Google in connection with your
33 | use of the solution, you represent and warrant that you have all necessary and
34 | appropriate rights, consents and permissions to permit Google to use and process
35 | that data. By using any portion of this solution, you acknowledge, assume and
36 | accept all risks, known and unknown, associated with its usage, including with
37 | respect to your deployment of any portion of this solution in your systems, or
38 | usage in connection with your business, if at all.
39 |
--------------------------------------------------------------------------------
/src/google_ads_report/bq_schema.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "name": "datetime_updated",
4 | "type": "TIMESTAMP",
5 | "mode": "REQUIRED",
6 | "description": "The datetime the data was pulled from YouTube"
7 | },
8 | {
9 | "name": "customer_id",
10 | "type": "STRING",
11 | "mode": "REQUIRED",
12 | "description": "The customer ID in Google Ads"
13 | },
14 | {
15 | "name": "channel_id",
16 | "type": "STRING",
17 | "mode": "REQUIRED",
18 | "description": "The YouTube Channel ID"
19 | },
20 | {
21 | "name": "placement_target_url",
22 | "type": "STRING",
23 | "mode": "NULLABLE",
24 | "description": "The URL of the placement"
25 | },
26 | {
27 | "name": "impressions",
28 | "type": "INT64",
29 | "mode": "NULLABLE",
30 | "description": "The number of impressions on the placement"
31 | },
32 | {
33 | "name": "cost_micros",
34 | "type": "INT64",
35 | "mode": "NULLABLE",
36 | "description": "The cost in micros"
37 | },
38 | {
39 | "name": "conversions",
40 | "type": "FLOAT64",
41 | "mode": "NULLABLE",
42 | "description": "The number of conversions"
43 | },
44 | {
45 | "name": "video_view_rate",
46 | "type": "FLOAT64",
47 | "mode": "NULLABLE",
48 | "description": "The video view rate"
49 | },
50 | {
51 | "name": "video_views",
52 | "type": "INT64",
53 | "mode": "NULLABLE",
54 | "description": "The number of video views"
55 | },
56 | {
57 | "name": "clicks",
58 | "type": "INT64",
59 | "mode": "NULLABLE",
60 | "description": "The number of clicks"
61 | },
62 | {
63 | "name": "average_cpm",
64 | "type": "FLOAT64",
65 | "mode": "NULLABLE",
66 | "description": "The average CPM"
67 | },
68 | {
69 | "name": "ctr",
70 | "type": "FLOAT64",
71 | "mode": "NULLABLE",
72 | "description": "The number of click through rate"
73 | },
74 | {
75 | "name": "all_conversions_from_interactions_rate",
76 | "type": "FLOAT64",
77 | "mode": "NULLABLE",
78 | "description": "The conversion rate"
79 | }
80 | ]
81 |
--------------------------------------------------------------------------------
/src/google_ads_accounts/utils/pubsub.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for sending messages to Pub/sub."""
15 | import json
16 | from typing import Any, Dict, List
17 | from google.cloud import pubsub_v1
18 |
19 |
20 | def send_dict_to_pubsub(message_dict: Dict[str, Any],
21 | topic: str,
22 | gcp_project: str) -> None:
23 | """Push the dictionary to pubsub.
24 |
25 | Args:
26 | message_dict: the message as a dictionary to push to pubsub
27 | topic: the name of the topic to publish the message to
28 | gcp_project: the Google Cloud Project with the pub/sub topic in
29 | """
30 |
31 | publisher = pubsub_v1.PublisherClient()
32 | # The `topic_path` method creates a fully qualified identifier
33 | # in the form `projects/{project_id}/topics/{topic_id}`
34 | topic_path = publisher.topic_path(gcp_project, topic)
35 | message_str = json.dumps(message_dict)
36 | # Data must be a bytestring
37 | data = message_str.encode('utf-8')
38 | publisher.publish(topic_path, data)
39 |
40 |
41 | def send_dicts_to_pubsub(messages: List[Dict[str, Any]],
42 | topic: str,
43 | gcp_project: str) -> None:
44 | """Push each message in the list to pubsub.
45 |
46 | Args:
47 | messages: a list of messages as dicts to push to pubsub
48 | topic: the name of the topic to publish the message to
49 | gcp_project: the Google Cloud Project with the pub/sub topic in
50 | """
51 | for message in messages:
52 | send_dict_to_pubsub(
53 | message_dict=message, topic=topic, gcp_project=gcp_project)
54 |
--------------------------------------------------------------------------------
/docs/reporting.md:
--------------------------------------------------------------------------------
1 | # Ads Placement Excluder Reporting
2 |
3 | There is a DataStudio dashboard that can be used to monitor the behaviour of the
4 | solution, and identify which channels are being excluded.
5 |
6 | 
8 |
9 | ## Get Started
10 |
11 | 1. Make a copy of the template from [here](
12 | https://datastudio.google.com/reporting/4a616bed-85e9-4794-a748-721051c10755)
13 | to your Drive folder
14 | 2. While copying choose `ViewExclusions` as a new data source. `ViewExclusions`
15 | view will be created automatically by Terraform after the first deployment.
16 | a. If `ViewExclusions` does not appear in available data sources you need to
17 | Create Data Source -> Big Query -> Your Project and find `ViewExclusions`
18 | table there b. You can also add a custom data source to each chart in a chart
19 | setup tab afterwards
20 | 3. Sometimes `customer_id` is auto-defined as a date leading to the chart
21 | configuration error. You can change the field type manually to number via
22 | Resource -> Manage Data Sources -> Edit
23 | 4. You can adjust charts and filters according to your needs
24 |
25 | ## Disclaimers
26 |
27 | __This is not an officially supported Google product.__
28 |
29 | Copyright 2022 Google LLC. This solution, including any related sample code or
30 | data, is made available on an “as is,” “as available,” and “with all faults”
31 | basis, solely for illustrative purposes, and without warranty or representation
32 | of any kind. This solution is experimental, unsupported and provided solely for
33 | your convenience. Your use of it is subject to your agreements with Google, as
34 | applicable, and may constitute a beta feature as defined under those agreements.
35 | To the extent that you make any data available to Google in connection with your
36 | use of the solution, you represent and warrant that you have all necessary and
37 | appropriate rights, consents and permissions to permit Google to use and process
38 | that data. By using any portion of this solution, you acknowledge, assume and
39 | accept all risks, known and unknown, associated with its usage, including with
40 | respect to your deployment of any portion of this solution in your systems, or
41 | usage in connection with your business, if at all.
42 |
--------------------------------------------------------------------------------
/src/google_ads_report/utils/gcs.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for working with Google Cloud Storage."""
15 | from google.cloud.storage.client import Client
16 | from google.cloud.storage.blob import Blob
17 | import pandas as pd
18 |
19 |
20 | def upload_blob_from_df(df: pd.DataFrame, bucket: str, blob_name: str) -> Blob:
21 | """Upload a Pandas DataFrame to a Google Clous Storage bucket.
22 |
23 | Args:
24 | df: the Pandas dataframe to upload
25 | bucket (str): Google Cloud Storage bucket.
26 | blob_name (str): Google Cloud Storage blob name.
27 | """
28 | return upload_blob_from_string(
29 | blob_string=df.to_csv(index=False),
30 | blob_name=blob_name,
31 | bucket=bucket)
32 |
33 |
34 | def upload_blob_from_string(
35 | bucket: str, blob_string: str, blob_name: str, content_type='text/csv'
36 | ) -> Blob:
37 | """Uploads a file to Google Cloud Storage.
38 |
39 | Args:
40 | bucket (str): Google Cloud Storage bucket.
41 | blob_string (str): The content of the blob.
42 | blob_name (str): Google Cloud Storage blob name.
43 | content_type (optional str): the content type of the string, e.g.
44 | text/csv.
45 |
46 | Returns:
47 | Blob: Newly created Google Cloud Storage file blob.
48 | """
49 | blob = create_blob(bucket, blob_name)
50 | blob.upload_from_string(blob_string, content_type=content_type)
51 | return blob
52 |
53 |
54 | def create_blob(bucket_name: str, blob_name: str) -> Blob:
55 | """Creates a blob on Google Cloud Storage.
56 |
57 | Args:
58 | bucket_name (str): Google Cloud Storage bucket.
59 | blob_name (str): Google Cloud Storage blob name.
60 |
61 | Returns:
62 | Blob: Google Cloud Storage file blob.
63 | """
64 | client = Client()
65 | bucket = client.bucket(bucket_name)
66 | blob = bucket.blob(blob_name)
67 | return blob
68 |
--------------------------------------------------------------------------------
/src/youtube_channel/utils/gcs.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for working with Google Cloud Storage."""
15 | from google.cloud.storage.client import Client
16 | from google.cloud.storage.blob import Blob
17 | import pandas as pd
18 |
19 |
20 | def upload_blob_from_df(df: pd.DataFrame, bucket: str, blob_name: str) -> Blob:
21 | """Upload a Pandas DataFrame to a Google Cloud Storage bucket.
22 |
23 | Args:
24 | df: the Pandas dataframe to upload
25 | bucket (str): Google Cloud Storage bucket.
26 | blob_name (str): Google Cloud Storage blob name.
27 | """
28 | return upload_blob_from_string(
29 | blob_string=df.to_csv(index=False),
30 | blob_name=blob_name,
31 | bucket=bucket)
32 |
33 |
34 | def upload_blob_from_string(
35 | bucket: str, blob_string: str, blob_name: str, content_type='text/csv'
36 | ) -> Blob:
37 | """Uploads a file to Google Cloud Storage.
38 |
39 | Args:
40 | bucket (str): Google Cloud Storage bucket.
41 | blob_string (str): The content of the blob.
42 | blob_name (str): Google Cloud Storage blob name.
43 | content_type (optional str): the content type of the string, e.g.
44 | text/csv.
45 |
46 | Returns:
47 | Blob: Newly created Google Cloud Storage file blob.
48 | """
49 | blob = create_blob(bucket, blob_name)
50 | blob.upload_from_string(blob_string, content_type=content_type)
51 | return blob
52 |
53 |
54 | def create_blob(bucket_name: str, blob_name: str) -> Blob:
55 | """Creates a blob on Google Cloud Storage.
56 |
57 | Args:
58 | bucket_name (str): Google Cloud Storage bucket.
59 | blob_name (str): Google Cloud Storage blob name.
60 |
61 | Returns:
62 | Blob: Google Cloud Storage file blob.
63 | """
64 | client = Client()
65 | bucket = client.bucket(bucket_name)
66 | blob = bucket.blob(blob_name)
67 | return blob
68 |
--------------------------------------------------------------------------------
/src/google_ads_excluder/utils/gcs.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for working with Google Cloud Storage."""
15 | from google.cloud.storage.client import Client
16 | from google.cloud.storage.blob import Blob
17 | import pandas as pd
18 |
19 |
20 | def upload_blob_from_df(df: pd.DataFrame, bucket: str, blob_name: str) -> Blob:
21 | """Upload a Pandas DataFrame to a Google Clous Storage bucket.
22 |
23 | Args:
24 | df: the Pandas dataframe to upload
25 | bucket (str): Google Cloud Storage bucket.
26 | blob_name (str): Google Cloud Storage blob name.
27 | """
28 | return upload_blob_from_string(
29 | blob_string=df.to_csv(index=False),
30 | blob_name=blob_name,
31 | bucket=bucket)
32 |
33 |
34 | def upload_blob_from_string(
35 | bucket: str, blob_string: str, blob_name: str, content_type='text/csv'
36 | ) -> Blob:
37 | """Uploads a file to Google Cloud Storage.
38 |
39 | Args:
40 | bucket (str): Google Cloud Storage bucket.
41 | blob_string (str): The content of the blob.
42 | blob_name (str): Google Cloud Storage blob name.
43 | content_type (optional str): the content type of the string, e.g.
44 | text/csv.
45 |
46 | Returns:
47 | Blob: Newly created Google Cloud Storage file blob.
48 | """
49 | blob = create_blob(bucket, blob_name)
50 | blob.upload_from_string(blob_string, content_type=content_type)
51 | return blob
52 |
53 |
54 | def create_blob(bucket_name: str, blob_name: str) -> Blob:
55 | """Creates a blob on Google Cloud Storage.
56 |
57 | Args:
58 | bucket_name (str): Google Cloud Storage bucket.
59 | blob_name (str): Google Cloud Storage blob name.
60 |
61 | Returns:
62 | Blob: Google Cloud Storage file blob.
63 | """
64 | client = Client()
65 | bucket = client.bucket(bucket_name)
66 | blob = bucket.blob(blob_name)
67 | return blob
68 |
--------------------------------------------------------------------------------
/src/google_ads_report/README.md:
--------------------------------------------------------------------------------
1 | # Google Ads Reporting Service
2 |
3 | This service is responsible for running a report from Google Ads based on the
4 | [group_placement_view](
5 | https://developers.google.com/google-ads/api/fields/v11/group_placement_view),
6 | with the configured filters, and outputting that as a CSV to a Cloud Storage
7 | bucket, with a BigQuery table in front of it. The data pulled from the report is
8 | filtered to only have YouTube channels.
9 |
10 | ## Local Deployment
11 | To run the code ensure the following environment variables are set:
12 |
13 | ```
14 | export GOOGLE_ADS_USE_PROTO_PLUS=false
15 | export GOOGLE_ADS_REFRESH_TOKEN=
16 | export GOOGLE_ADS_CLIENT_ID=
17 | export GOOGLE_ADS_CLIENT_SECRET=
18 | export GOOGLE_ADS_DEVELOPER_TOKEN=
19 | export GOOGLE_ADS_LOGIN_CUSTOMER_ID=
20 | export GOOGLE_CLOUD_PROJECT=
21 | export APE_YOUTUBE_PUBSUB_TOPIC=
22 | export APE_GCS_DATA_BUCKET=
23 | ```
24 |
25 | Next install the dev requirements:
26 |
27 | ```
28 | pip install -r requirements_dev.txt
29 | ```
30 |
31 | Then start the server by running:
32 |
33 | ```
34 | functions-framework --target=main --signature-type=event --port=8080
35 | ```
36 |
37 | You can then make a post request by running the following:
38 |
39 | ```
40 | curl localhost:8080 \
41 | -X POST \
42 | -H "Content-Type: application/json" \
43 | -d "{ \"data\": { \"data\": \"$(echo '{ "customer_id": "1234567890", "lookback_days": 90, "gads_filters": "metrics.impressions > 0", "sheet_id": "abcdefghijklmnop-mk"}' | base64)\" }}"
44 | ```
45 |
46 | ### Mac users
47 |
48 | You may need to set this environment variable for the Google Ads report stream
49 | to work, [see Github for more info](https://github.com/rails/rails/issues/38560).
50 |
51 | ```
52 | export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
53 | ```
54 |
55 | ## Disclaimers
56 | __This is not an officially supported Google product.__
57 |
58 | Copyright 2022 Google LLC. This solution, including any related sample code or
59 | data, is made available on an “as is,” “as available,” and “with all faults”
60 | basis, solely for illustrative purposes, and without warranty or representation
61 | of any kind. This solution is experimental, unsupported and provided solely for
62 | your convenience. Your use of it is subject to your agreements with Google, as
63 | applicable, and may constitute a beta feature as defined under those agreements.
64 | To the extent that you make any data available to Google in connection with your
65 | use of the solution, you represent and warrant that you have all necessary and
66 | appropriate rights, consents and permissions to permit Google to use and process
67 | that data. By using any portion of this solution, you acknowledge, assume and
68 | accept all risks, known and unknown, associated with its usage, including with
69 | respect to your deployment of any portion of this solution in your systems, or
70 | usage in connection with your business, if at all.
71 |
--------------------------------------------------------------------------------
/src/google_ads_accounts/README.md:
--------------------------------------------------------------------------------
1 | # Google Ads Account Function
2 |
3 | This service is responsible for deciding which Google Ads accounts the Ads
4 | Placement Excluder solution should run for, and kicking off the downstream
5 | pipeline. Each account is pushed as a separate message into the topic to enable
6 | concurrency.
7 |
8 | ## Local Deployment
9 | To run the code ensure the following environment variables are set:
10 |
11 | ```
12 | export GOOGLE_CLOUD_PROJECT=ads-placement-excluder
13 | export APE_ADS_REPORT_PUBSUB_TOPIC=ads-report-topic
14 | ```
15 |
16 | The code uses [Google Application Default credentials](
17 | https://google-auth.readthedocs.io/en/master/reference/google.auth.html) for
18 | auth.
19 |
20 | First create OAuth desktop credentials in Google Cloud, and download the client
21 | ID and client secret as a JSON file.
22 |
23 | Then run the following command, updating the path to point to the JSON file
24 | downloaded in the previous step:
25 | ```
26 | gcloud auth application-default login \
27 | --scopes='https://www.googleapis.com/auth/spreadsheets.readonly,https://www.googleapis.com/auth/cloud-platform' \
28 | --client-id-file=/path/to/client-id-file.json
29 | ```
30 | [Optionally] [see this article](
31 | https://medium.com/google-cloud/google-oauth-credential-going-deeper-the-hard-way-f403cf3edf9d)
32 | for a detailed explanation, why this is needed.
33 |
34 | Next install the dev requirements:
35 |
36 | ```
37 | pip install -r requirements_dev.txt
38 | ```
39 |
40 | Then start the server by running:
41 |
42 | ```
43 | functions-framework --target=main --port=8080
44 | ```
45 |
46 | You can then make a post request by running the following:
47 |
48 | ```
49 | curl localhost:8080 \
50 | -X POST \
51 | -H "Content-Type: application/json" \
52 | -d '{"sheet_id": "12g3IoIP4Lk_UU3xtJsIiCSDxjNAn30vT4lOzSZPS-mk"}'
53 | ```
54 |
55 | ## Disclaimers
56 | __This is not an officially supported Google product.__
57 |
58 | Copyright 2022 Google LLC. This solution, including any related sample code or
59 | data, is made available on an “as is,” “as available,” and “with all faults”
60 | basis, solely for illustrative purposes, and without warranty or representation
61 | of any kind. This solution is experimental, unsupported and provided solely for
62 | your convenience. Your use of it is subject to your agreements with Google, as
63 | applicable, and may constitute a beta feature as defined under those agreements.
64 | To the extent that you make any data available to Google in connection with your
65 | use of the solution, you represent and warrant that you have all necessary and
66 | appropriate rights, consents and permissions to permit Google to use and process
67 | that data. By using any portion of this solution, you acknowledge, assume and
68 | accept all risks, known and unknown, associated with its usage, including with
69 | respect to your deployment of any portion of this solution in your systems, or
70 | usage in connection with your business, if at all.
71 |
--------------------------------------------------------------------------------
/src/youtube_channel/main_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Unit tests for main.py"""
15 | import unittest
16 | import numpy as np
17 | import pandas as pd
18 | import main
19 |
20 |
21 | class MainTestCase(unittest.TestCase):
22 |
23 | def test_sanitise_youtube_dataframe(self):
24 | columns = [
25 | 'title',
26 | 'view_count',
27 | 'video_count',
28 | 'subscriber_count',
29 | 'title_language_confidence',
30 | ]
31 | raw_data = [
32 | ['String with a new line \n', '10', '1', '3', '0.56'],
33 | ['String, with, commas in,it', '10', '1', '3', '0.56'],
34 | ['String with "double quotes" in it', '10', '1', '3', '0.56'],
35 | ["String with 'single quotes' in it", '10', '1', '3', '0.56'],
36 | [' String with white space ', '10', '1', '3', '0.56'],
37 | ['String with $\r\t\n;:,', '10', '1', '3', '0.56'],
38 | ['Строка написана на русском языке', '10', '1', '3', '0.56'],
39 | ['用中文寫的字符串', '10', '1', '3', '0.56'],
40 | ]
41 | expected_data = [
42 | ['String with a new line', 10, 1, 3, 0.56],
43 | ['String with commas init', 10, 1, 3, 0.56],
44 | ['String with double quotes in it', 10, 1, 3, 0.56],
45 | ['String with single quotes in it', 10, 1, 3, 0.56],
46 | ['String with white space', 10, 1, 3, 0.56],
47 | ['String with', 10, 1, 3, 0.56],
48 | ['Строка написана на русском языке', 10, 1, 3, 0.56],
49 | ['用中文寫的字符串', 10, 1, 3, 0.56],
50 | ]
51 | raw_df = pd.DataFrame(data=raw_data, columns=columns)
52 | expected_df = pd.DataFrame(data=expected_data, columns=columns)
53 | response_df = main.sanitise_youtube_dataframe(raw_df)
54 | pd.testing.assert_frame_equal(expected_df, response_df)
55 |
56 | def test_split_list_to_chunks(self):
57 | lst = np.arange(150)
58 | max_chunk_size = 50
59 | chunks = main.split_list_to_chunks(lst, max_chunk_size)
60 | self.assertEqual(len(chunks), 3)
61 | self.assertEqual(len(chunks[0]), 50)
62 | self.assertEqual(len(chunks[1]), 50)
63 | self.assertEqual(len(chunks[2]), 50)
64 |
65 | lst = np.arange(151)
66 | max_chunk_size = 50
67 | chunks = main.split_list_to_chunks(lst, max_chunk_size)
68 | self.assertEqual(len(chunks), 4)
69 | self.assertTrue(len(chunks[0]) < 50)
70 | self.assertTrue(len(chunks[1]) < 50)
71 | self.assertTrue(len(chunks[2]) < 50)
72 | self.assertTrue(len(chunks[3]) < 50)
73 |
--------------------------------------------------------------------------------
/src/google_ads_excluder/README.md:
--------------------------------------------------------------------------------
1 | # Google Ads Exclusion service
2 |
3 | The Google Ads Excluder service is responsible for applying the filters in the
4 | config Google Sheet to the data, to determine which channels should be excluded
5 | in Google Ads. Channels identified for exclusion are then uploaded to the shared
6 | placement list in Google Ads, and the output written to BigQuery for reporting.
7 |
8 | ## Local Deployment
9 | To run the code ensure the following environment variables are set:
10 |
11 | ```
12 | export GOOGLE_CLOUD_PROJECT=
13 | export APE_BIGQUERY_DATASET=
14 | export APE_EXCLUSION_VALIDATE_ONLY=
15 | export APE_GCS_DATA_BUCKET=
16 | export GOOGLE_ADS_USE_PROTO_PLUS=false
17 | export GOOGLE_ADS_REFRESH_TOKEN=
18 | export GOOGLE_ADS_CLIENT_ID=
19 | export GOOGLE_ADS_CLIENT_SECRET=
20 | export GOOGLE_ADS_DEVELOPER_TOKEN=
21 | export GOOGLE_ADS_LOGIN_CUSTOMER_ID=
22 | ```
23 |
24 | The code uses [Google Application Default credentials](
25 | https://google-auth.readthedocs.io/en/master/reference/google.auth.html) for
26 | auth.
27 |
28 | First create OAuth desktop credentials in Google Cloud, and download the client
29 | ID and client secret as a JSON file.
30 |
31 | Then run the following command, updating the path to point to the JSON file
32 | downloaded in the previous step:
33 | ```
34 | gcloud auth application-default login \
35 | --scopes='https://www.googleapis.com/auth/spreadsheets.readonly,https://www.googleapis.com/auth/cloud-platform' \
36 | --client-id-file=/path/to/client-id-file.json
37 | ```
38 | [Optionally] [see this article](
39 | https://medium.com/google-cloud/google-oauth-credential-going-deeper-the-hard-way-f403cf3edf9d)
40 | for a detailed explanation, why this is needed.
41 |
42 | Next install the dev requirements:
43 |
44 | ```
45 | pip install -r requirements_dev.txt
46 | ```
47 |
48 | Start the function:
49 |
50 | ```
51 | functions-framework --target=main --signature-type=event --port=8080
52 | ```
53 |
54 | You can then make a post request by running the following:
55 |
56 | ```
57 | curl localhost:8080 \
58 | -X POST \
59 | -H "Content-Type: application/json" \
60 | -d "{ \"data\": { \"data\": \"$(echo '{ "customer_id": "1234567890", "sheet_id": "abcdefghijklmnop-mk" }' | base64)\" }}"
61 | ```
62 |
63 | ## Disclaimers
64 | __This is not an officially supported Google product.__
65 |
66 | Copyright 2022 Google LLC. This solution, including any related sample code or
67 | data, is made available on an “as is,” “as available,” and “with all faults”
68 | basis, solely for illustrative purposes, and without warranty or representation
69 | of any kind. This solution is experimental, unsupported and provided solely for
70 | your convenience. Your use of it is subject to your agreements with Google, as
71 | applicable, and may constitute a beta feature as defined under those agreements.
72 | To the extent that you make any data available to Google in connection with your
73 | use of the solution, you represent and warrant that you have all necessary and
74 | appropriate rights, consents and permissions to permit Google to use and process
75 | that data. By using any portion of this solution, you acknowledge, assume and
76 | accept all risks, known and unknown, associated with its usage, including with
77 | respect to your deployment of any portion of this solution in your systems, or
78 | usage in connection with your business, if at all.
79 |
--------------------------------------------------------------------------------
/src/google_ads_report/main_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Unit tests for main.py"""
15 | import base64
16 | from datetime import datetime
17 | import json
18 | from typing import Any, Dict
19 | import unittest
20 | from unittest.mock import patch
21 | import jsonschema
22 | import main
23 |
24 |
25 | class MainTestCase(unittest.TestCase):
26 |
27 | def _create_event(self, data: Dict[str, Any]) -> Dict[str, Any]:
28 | """A helper function for creating mock event data.
29 |
30 | Args:
31 | data: a dictionary containing the event data.
32 | """
33 | return {
34 | 'data': base64.b64encode(json.dumps(data).encode('utf-8'))
35 | }
36 |
37 | @patch('main.start_job')
38 | def test_main(self, mock_start_job):
39 | event = self._create_event({'abc': '123'})
40 | with self.assertRaises(jsonschema.exceptions.ValidationError):
41 | main.main(event, {})
42 | mock_start_job.assert_not_called()
43 |
44 | event = self._create_event({'customer_id': '123'})
45 | with self.assertRaises(jsonschema.exceptions.ValidationError):
46 | main.main(event, {})
47 | mock_start_job.assert_not_called()
48 |
49 | event = self._create_event({'lookback_days': 90})
50 | with self.assertRaises(jsonschema.exceptions.ValidationError):
51 | main.main(event, {})
52 | mock_start_job.assert_not_called()
53 |
54 | event = self._create_event({
55 | 'sheet_id': 'abcdefghijklmnop-mk',
56 | 'customer_id': '123',
57 | 'lookback_days': 90,
58 | 'gads_filters': 'metrics.clicks > 10',
59 | })
60 | main.main(event, {})
61 | mock_start_job.assert_called_once()
62 |
63 | def test_get_query_dates(self):
64 | today_str = '2022-07-01'
65 | today = datetime.strptime(today_str, '%Y-%m-%d')
66 | date_from, date_to = main.get_query_dates(90, today)
67 | self.assertEqual(date_to, today_str)
68 | self.assertEqual(date_from, '2022-04-02')
69 |
70 | @patch('main.get_query_dates')
71 | def test_get_report_query(self, mock_get_query_dates):
72 | mock_get_query_dates.return_value = ('2022-01-01', '2022-01-31')
73 | lookback_days = 90
74 | gads_filters = None
75 | query = main.get_report_query(lookback_days, gads_filters)
76 | query = query.strip()
77 | # check it doesn't end in AND - this would be an invalid query
78 | self.assertNotEqual('AND', query[-3:])
79 |
80 | gads_filters = 'metrics.clicks > 10'
81 | query = main.get_report_query(lookback_days, gads_filters)
82 | self.assertIn(gads_filters, query)
83 |
84 |
85 | if __name__ == '__main__':
86 | unittest.main()
87 |
--------------------------------------------------------------------------------
/src/youtube_channel/README.md:
--------------------------------------------------------------------------------
1 | # YouTube Channel Reporting Service
2 |
3 | This service is responsible for pulling metrics about the YouTube channels from
4 | the Google Ads Report. For example the number of views the channel has had, and
5 | the number of subscribers. It then uses the Google Translate API (if enabled in
6 | the config), to determine the language the YouTube channel title is in.
7 |
8 | ## Google Translate API
9 | This service leverages [Google's Translation API](
10 | https://cloud.google.com/translate/docs/basic/detecting-language), for detecting
11 | the language of the YouTube channel's title.
12 |
13 | This is enabled/disabled in the configuration Google Sheet. See the
14 | [basic pricing](https://cloud.google.com/translate/pricing) for cost
15 | information.
16 |
17 | ## Local Deployment
18 | To run the code ensure the following environment variables are set:
19 |
20 | ```
21 | export GOOGLE_CLOUD_PROJECT=
22 | export APE_BIGQUERY_DATASET=
23 | export APE_GCS_DATA_BUCKET=
24 | ```
25 |
26 | The code uses [Google Application Default credentials](
27 | https://google-auth.readthedocs.io/en/master/reference/google.auth.html) for
28 | auth.
29 |
30 | First create OAuth desktop credentials in Google Cloud, and download the client
31 | ID and client secret as a JSON file.
32 |
33 | Then run the following command, updating the path to point to the JSON file
34 | downloaded in the previous step:
35 | ```
36 | gcloud auth application-default login \
37 | --scopes='https://www.googleapis.com/auth/spreadsheets.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/youtube' \
38 | --client-id-file=/path/to/client-id-file.json
39 | ```
40 | [Optionally] [see this article](
41 | https://medium.com/google-cloud/google-oauth-credential-going-deeper-the-hard-way-f403cf3edf9d)
42 | for a detailed explanation, why this is needed.
43 |
44 | Next install the dev requirements:
45 |
46 | ```
47 | pip install -r requirements_dev.txt
48 | ```
49 |
50 | Start the function:
51 |
52 | ```
53 | functions-framework --target=main --signature-type=event --port=8080
54 | ```
55 |
56 | You can then make a post request by running the following:
57 |
58 | ```
59 | curl localhost:8080 \
60 | -X POST \
61 | -H "Content-Type: application/json" \
62 | -d "{ \"data\": { \"data\": \"$(echo '{ "customer_id": "1234567890", "sheet_id": "abcdefghijklmnop-mk" }' | base64)\" }}"
63 | ```
64 |
65 | ## Disclaimers
66 | __This is not an officially supported Google product.__
67 |
68 | Copyright 2022 Google LLC. This solution, including any related sample code or
69 | data, is made available on an “as is,” “as available,” and “with all faults”
70 | basis, solely for illustrative purposes, and without warranty or representation
71 | of any kind. This solution is experimental, unsupported and provided solely for
72 | your convenience. Your use of it is subject to your agreements with Google, as
73 | applicable, and may constitute a beta feature as defined under those agreements.
74 | To the extent that you make any data available to Google in connection with your
75 | use of the solution, you represent and warrant that you have all necessary and
76 | appropriate rights, consents and permissions to permit Google to use and process
77 | that data. By using any portion of this solution, you acknowledge, assume and
78 | accept all risks, known and unknown, associated with its usage, including with
79 | respect to your deployment of any portion of this solution in your systems, or
80 | usage in connection with your business, if at all.
81 |
--------------------------------------------------------------------------------
/src/google_ads_accounts/main.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Fetch the Google Ads configs and push them to pub/sub."""
15 | import logging
16 | import os
17 | import sys
18 | from typing import Any, List, Dict
19 | import flask
20 | import google.auth
21 | from googleapiclient.discovery import build
22 | import jsonschema
23 | from utils import pubsub
24 |
25 |
26 | logging.basicConfig(stream=sys.stdout)
27 | logger = logging.getLogger(__name__)
28 | logger.setLevel(logging.INFO)
29 |
30 | # The Google Cloud project containing the pub/sub topic
31 | GOOGLE_CLOUD_PROJECT = os.environ.get('GOOGLE_CLOUD_PROJECT')
32 | # The name of the pub/sub topic
33 | APE_ADS_REPORT_PUBSUB_TOPIC = os.environ.get('APE_ADS_REPORT_PUBSUB_TOPIC')
34 | # The access scopes used in this function
35 | SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
36 |
37 | # The schema of the JSON in the request
38 | request_schema = {
39 | 'type': 'object',
40 | 'properties': {
41 | 'sheet_id': {'type': 'string'},
42 | },
43 | 'required': ['sheet_id', ]
44 | }
45 |
46 |
47 | def main(request: flask.Request) -> flask.Response:
48 | """The entry point: extract the data from the payload and starts the job.
49 |
50 | The request payload must match the request_schema object above.
51 |
52 | Args:
53 | request (flask.Request): HTTP request object.
54 | Returns:
55 | The flask response.
56 | """
57 | logger.info('Google Ads Account Service triggered.')
58 | request_json = request.get_json()
59 | logger.info('JSON payload: %s', request_json)
60 | response = {}
61 | try:
62 | jsonschema.validate(instance=request_json, schema=request_schema)
63 | except jsonschema.exceptions.ValidationError as err:
64 | logger.error('Invalid request payload: %s', err)
65 | response['status'] = 'Failed'
66 | response['message'] = err.message
67 | return flask.Response(flask.json.dumps(response),
68 | status=400,
69 | mimetype='application/json')
70 |
71 | run(request_json['sheet_id'])
72 |
73 | response['status'] = 'Success'
74 | response['message'] = 'Downloaded data successfully'
75 | return flask.Response(flask.json.dumps(response),
76 | status=200,
77 | mimetype='application/json')
78 |
79 |
80 | def run(sheet_id: str) -> None:
81 | """Orchestration for the function.
82 |
83 | Args:
84 | sheet_id: the ID of the Google Sheet containing the config.
85 | """
86 | logger.info('Running Google Ads account script')
87 | account_configs = get_config_from_sheet(sheet_id)
88 | send_messages_to_pubsub(account_configs)
89 | logger.info('Done.')
90 |
91 |
92 | def get_config_from_sheet(sheet_id: str) -> List[Dict[str, Any]]:
93 | """Get the Ads account config from the Google Sheet, and return the results.
94 |
95 | Args:
96 | sheet_id: the ID of the Google Sheet containing the config.
97 |
98 | Returns:
99 | Returns a row for each account a report needs to be run for.
100 |
101 | [
102 | {
103 | 'sheet_id': 'abcdefghijklmnop-mk',
104 | 'customer_id': '1234567890'
105 | 'lookback_days': 90,
106 | 'gads_filters': 'metrics.clicks > 10',
107 | },
108 | ...
109 | ]
110 | """
111 | logger.info('Getting config from sheet: %s', sheet_id)
112 | credentials, project_id = google.auth.default(scopes=SCOPES)
113 | sheets_service = build('sheets', 'v4', credentials=credentials)
114 | sheet = sheets_service.spreadsheets()
115 |
116 | customer_ids = sheet.values().get(
117 | spreadsheetId=sheet_id,
118 | range='google_ads_customer_ids').execute().get('values', [])
119 | gads_filters = sheet.values().get(
120 | spreadsheetId=sheet_id,
121 | range='google_ads_filters').execute().get('values', [])
122 | lookback_days = sheet.values().get(
123 | spreadsheetId=sheet_id,
124 | range='google_ads_lookback_days').execute().get('values',
125 | [['30']])[0][0]
126 |
127 | gads_filters_str = gads_filters_to_gaql_string(gads_filters)
128 |
129 | logger.info('Returned %i customer_ids', len(customer_ids))
130 | account_configs = []
131 | for customer_id, is_enabled in customer_ids:
132 | if is_enabled == 'Enabled':
133 | account_configs.append({
134 | 'sheet_id': sheet_id,
135 | 'customer_id': customer_id,
136 | 'lookback_days': int(lookback_days),
137 | 'gads_filters': gads_filters_str,
138 | })
139 | else:
140 | logger.info('Ignoring disabled row: %s', customer_id)
141 |
142 | logger.info('Account configs:')
143 | logger.info(account_configs)
144 | return account_configs
145 |
146 |
147 | def gads_filters_to_gaql_string(config_filters: List[List[str]]) -> str:
148 | """Turn the Google Ads filters into a GAQL compatible string.
149 |
150 | The config sheet has the filters in a list of lists, these need to be
151 | combined, so they can be used in a WHERE clause in the GAQL that is passed
152 | to Google Ads. See:
153 | https://developers.google.com/google-ads/api/docs/query/overview
154 |
155 | Each row is "AND" together.
156 |
157 | Args:
158 | config_filters: the filters from the Google Sheet
159 |
160 | Returns:
161 | A string that can be used in the WHERE statement of the Google Ads Query
162 | Language.
163 | """
164 | conditions = []
165 | for row in config_filters:
166 | conditions.append(f'metrics.{row[0]} {row[1]} {row[2]}')
167 | return ' AND '.join(conditions)
168 |
169 |
170 | def send_messages_to_pubsub(messages: List[Dict[str, Any]]) -> None:
171 | """Push each of the messages to the pubsub topic.
172 |
173 | Args:
174 | messages: the list of messages to push to pubsub
175 | """
176 | logger.info('Sending messages to pubsub')
177 | logger.info('Messages: %s', messages)
178 | pubsub.send_dicts_to_pubsub(
179 | messages=messages,
180 | topic=APE_ADS_REPORT_PUBSUB_TOPIC,
181 | gcp_project=GOOGLE_CLOUD_PROJECT)
182 | logger.info('All messages published')
183 |
--------------------------------------------------------------------------------
/docs/deployment.md:
--------------------------------------------------------------------------------
1 | # Ads Placement Excluder Deployment
2 |
3 | This doc provides information on how to deploy the Ads Placement Excluder
4 | solution.
5 |
6 | The deployment uses [Terraform](https://www.terraform.io/) to automate the
7 | deployment, and to keep all the Infrastructure as Code (IaC). The files can be
8 | found in the `/terraform/` folder in this repo.
9 |
10 | ## Roles
11 | The project creates a service account with the following roles:
12 |
13 | - `roles/cloudfunctions.invoker`
14 | - `roles/bigquery.jobUser`
15 | - `roles/bigquery.dataViewer`
16 | - `roles/pubsub.publisher`
17 | - `roles/storage.objectAdmin`
18 |
19 | As a user deploying the project, you will require these roles and the following:
20 |
21 | - `roles/storage.admin`
22 | - `roles/iam.securityAdmin`
23 |
24 | Read more about [Google Cloud roles here](
25 | https://cloud.google.com/iam/docs/understanding-roles).
26 |
27 | ## OAuth
28 | The project uses OAauth2.0 scopes and service account roles to manage
29 | permissions. These are the scopes that are required when generating a refresh
30 | token.
31 |
32 | ```
33 | https://www.googleapis.com/auth/spreadsheets.readonly
34 | https://www.googleapis.com/auth/cloud-platform
35 | https://www.googleapis.com/auth/youtube
36 | https://www.googleapis.com/auth/adwords
37 | ```
38 |
39 | ## Pre-requisites
40 |
41 | - A new Google Cloud Project
42 | - Appropriate permissions to be able to deploy the project (see [roles](#roles))
43 | - Create a copy of [the template Google Sheet](
44 | https://docs.google.com/spreadsheets/d/1IAo8yvrY4BMuOaWnZ2O8wfJ6L36sOjCOnD7cigMxKwI/copy)
45 | and make a note of the Google Sheet ID (found in the URL after the `/d/`)
46 | - Access to the appropriate Google Ads accounts
47 | - A Google Ads [Developer Token](
48 | https://developers.google.com/google-ads/api/docs/first-call/dev-token)
49 |
50 |
51 | ## Deployment
52 |
53 | ### Manual Steps
54 | These changes need to be done once manually, as they are not controlled by Terraform:
55 |
56 | 1. Open the Google Cloud Project in the UI.
57 | 2. Go to [Cloud Storage](https://console.cloud.google.com/storage/browser) and
58 | create a new bucket, which will be used to keep track of the Terraform state,
59 | e.g. `my-awesome-project-terraform`. Make a note of the name of the bucket.
60 | 3. Open the [OAuth Consent Screen](
61 | https://console.cloud.google.com/apis/credentials/consent) and create a new
62 | internal app.
63 | 4. Open the [API Credentials Screen](
64 | https://console.cloud.google.com/apis/credentials) -> Create credentials ->
65 | OAuth Client ID -> Web app -> Set
66 | `https://developers.google.com/oauthplayground` as an authorised redirect
67 | URI. Make a note of the `client_id` and the `client_secret`.
68 | 5. Open the [OAuth playground](https://developers.google.com/oauthplayground/),
69 | and generate a refresh token for the [above scopes](#oauth), using the
70 | `client_id` and `client_secret` generated in the previous step:
71 | 
72 | 6. Open Cloud Shell:
73 | 
74 | 7. Enable the APIs in the project by running the following:
75 |
76 | ```
77 | gcloud services enable \
78 | serviceusage.googleapis.com \
79 | cloudresourcemanager.googleapis.com \
80 | iam.googleapis.com \
81 | cloudresourcemanager.googleapis.com \
82 | serviceusage.googleapis.com \
83 | bigquery.googleapis.com \
84 | googleads.googleapis.com \
85 | youtube.googleapis.com \
86 | cloudfunctions.googleapis.com \
87 | cloudbuild.googleapis.com \
88 | sheets.googleapis.com \
89 | cloudscheduler.googleapis.com \
90 | translate.googleapis.com
91 | ```
92 |
93 | _Side note_: If you're interested in the reason why the APIs aren't controlled
94 | through Terraform, [read this guide](
95 | https://medium.com/rockedscience/how-to-fully-automate-the-deployment-of-google-cloud-platform-projects-with-terraform-16c33f1fb31f).
96 |
97 | ### Terraform
98 |
99 | 1. Whilst still in Cloud shell, `git clone` the project, and `cd` into the
100 | directory.
101 | 2. Run the following commands to initialise Terraform:
102 | ```
103 | cd terraform
104 | terraform init
105 | ```
106 | When prompted, enter the name of the bucket created in step 2 in manual
107 | steps.
108 |
109 | 3. Create a file named `terraform.tfvars` and add the following variables:
110 | ```
111 | project_id = ""
112 | oauth_refresh_token = ""
113 | google_cloud_client_id = ""
114 | google_cloud_client_secret = ""
115 | google_ads_developer_token = ""
116 | google_ads_login_customer_id = ""
117 | config_sheet_id = ""
118 | ```
119 | Note that the `google_ads_login_customer_id` is the MCC customer ID in Google
120 | Ads.
121 |
122 | 4. Run `terraform plan` and review the proposed changes.
123 | 5. Run `terraform apply` to create the infrastructure.
124 | 6. The email of the service account created will be output, give view only
125 | access to the Google sheet containing the config.
126 |
127 | By default, the code will be triggered every hour by Cloud Scheduler. To test
128 | everything is working, configure the Google Sheet ([see below](#google-sheet))
129 | and force run the Cloud Scheduler job in the UI.
130 |
131 | ## Google Sheet
132 |
133 | Open your copy of the Google Sheet. This is what you'll be using to configure
134 | the Ads Placement Excluder solution.
135 |
136 | There are notes in the Sheet that contain instructions for how to set this up.
137 |
138 | One area to highlight is the [basic Translation API](
139 | https://cloud.google.com/translate/docs/basic/detecting-language) used in the
140 | YouTube service, has a cost element to it ([see pricing](
141 | https://cloud.google.com/translate/pricing)). If you want to include language
142 | filters on the YouTube channel title, ensure that this is enabled.
143 |
144 | If this is disabled, and then enabled at a later date, it does not backfill the
145 | gaps in data. If you wish to backfill this data. Manually clear the files in the
146 | Cloud Storage bucket containing the data, essentially deleting the YouTube data,
147 | then re-run Cloud Scheduler.
148 |
149 | ## Disclaimers
150 | __This is not an officially supported Google product.__
151 |
152 | Copyright 2022 Google LLC. This solution, including any related sample code or
153 | data, is made available on an “as is,” “as available,” and “with all faults”
154 | basis, solely for illustrative purposes, and without warranty or representation
155 | of any kind. This solution is experimental, unsupported and provided solely for
156 | your convenience. Your use of it is subject to your agreements with Google, as
157 | applicable, and may constitute a beta feature as defined under those agreements.
158 | To the extent that you make any data available to Google in connection with your
159 | use of the solution, you represent and warrant that you have all necessary and
160 | appropriate rights, consents and permissions to permit Google to use and process
161 | that data. By using any portion of this solution, you acknowledge, assume and
162 | accept all risks, known and unknown, associated with its usage, including with
163 | respect to your deployment of any portion of this solution in your systems, or
164 | usage in connection with your business, if at all.
165 |
--------------------------------------------------------------------------------
/docs/architecture.md:
--------------------------------------------------------------------------------
1 | # Ads Placement Excluder Architecture
2 |
3 | ## Google Cloud Architecture
4 |
5 | The solution is split into four microservices:
6 |
7 | - Google Ads Account Service
8 | - Google Ads Reporting Service
9 | - YouTube Channel Service
10 | - Google Ads Exclusion Service
11 |
12 | The source code for each of the Cloud Functions can be found under the `/src/`
13 | directory of this repo. Each function has its own README file, which contains
14 | instructions for local deployment.
15 |
16 | ### Google Ads Account Service
17 |
18 | This service is responsible for deciding which Google Ads accounts the Ads
19 | Placement Excluder solution should run for, and kicking off the downstream
20 | pipeline. Each account is pushed as a separate message into the topic to enable
21 | concurrency.
22 |
23 | 
25 |
26 | 1. Cloud Scheduler triggers the Account Cloud Function.
27 | 2. The function pulls from the Google Sheet the Google Ads customer IDs to run
28 | the code for, and the filters to apply to the Google Ads report.
29 | 3. Each customer ID is pushed in a separate message to Pub/Sub.
30 |
31 | ### Google Ads Reporting Service
32 |
33 | This service is responsible for running a report from Google Ads based on the
34 | [group_placement_view](
35 | https://developers.google.com/google-ads/api/fields/v11/group_placement_view),
36 | with the configured filters, and outputting that as a CSV to a Cloud Storage
37 | bucket, with a BigQuery table in front of it. The data pulled from the report is
38 | filtered to only have YouTube channels.
39 |
40 | 
42 |
43 | 1. Pub/Sub triggers the Cloud Function.
44 | 2. The report is downloaded from Google Ads.
45 | 3. The output is written as a CSV to Cloud Storage.
46 | 4. A message is passed to the next Pub/Sub topic.
47 |
48 | ### YouTube Channel Reporting Service
49 |
50 | This service is responsible for pulling metrics about the YouTube channels from
51 | the Google Ads Report. For example the number of views the channel has had, and
52 | the number of subscribers. It then uses the Google Translate API (if enabled in
53 | the config), to determine the language the YouTube channel title is in.
54 |
55 | 
57 |
58 | 1. Pub/Sub triggers the Cloud Function.
59 | 2. The function reads the new channels that were pulled from Google Ads. It does
60 | not refresh the data for existing channels.
61 | 3. The config is used to determine if the Translate API should be used.
62 | 4. The function pulls the YouTube data for each of the channels in step 2.
63 | 5. If the Translate API filter is enabled, for each channel it will use the API
64 | to detect the language and the confidence level of the prediction.
65 | 6. The output is written as a CSV to Cloud Storage.
66 | 7. A message is passed to the next Pub/Sub topic.
67 |
68 | ### Google Ads Exclusion service
69 |
70 | The Google Ads Excluder service is responsible for applying the filters in the
71 | config Google Sheet to the data, to determine which channels should be excluded
72 | in Google Ads. Channels identified for exclusion are then uploaded to the shared
73 | placement list in Google Ads, and the output written to BigQuery for reporting.
74 |
75 | 
77 |
78 | 1. Pub/Sub triggers the Cloud Function.
79 | 2. The function reads the filters from the config Sheet.
80 | 3. It applies the filters to BigQuery to identify channels that need to be
81 | excluded.
82 | 4. These are then uploaded to Google Ads
83 | 5. The exclusions are also written to BigQuery for reporting purposes.
84 |
85 | ### Entire Solution Architecture
86 |
87 | Combining the individual services, you can see the combined architecture diagram
88 | below:
89 |
90 | 
91 |
92 | 1. Cloud Scheduler triggers the Account Cloud Function.
93 | 2. The function pulls the Google Ads customer IDs to run the code for, and the
94 | filters to apply to the Google Ads report.
95 | 3. Each customer ID is pushed in a separate message to Pub/Sub.
96 | 4. Pub/Sub triggers the Cloud Function.
97 | 5. The report is downloaded from Google Ads.
98 | 6. The output is written as a CSV to Cloud Storage.
99 | 7. A message is passed to the next Pub/Sub topic.
100 | 8. Pub/Sub triggers the Cloud Function.
101 | 9. The function reads the channels that were pulled from Google Ads.
102 | 10. The config is used to determine if the Translate API should be used.
103 | 11. The function pulls the YouTube data for each of the channels in step 2.
104 | 12. If the Translate API filter is enabled, for each channel it will use the API
105 | to detect the language and the confidence level of the prediction.
106 | 13. The output is written as a CSV to Cloud Storage.
107 | 14. A message is passed to the next Pub/Sub topic.
108 | 15. Pub/Sub triggers the Cloud Function.
109 | 16. The function reads the filters from the config Sheet.
110 | 17. It applies the filters to BigQuery to identify channels that need to be
111 | excluded.
112 | 18. These are then uploaded to Google Ads
113 | 19. The exclusions are also written to BigQuery for reporting purposes.
114 |
115 | ## BigQuery External Tables Using Cloud Storage
116 |
117 | The solution uses [BigQuery External tables with a Google Cloud Storage
118 | backend](https://cloud.google.com/bigquery/docs/external-data-cloud-storage),
119 | instead of writing to BigQuery directly due to concurrency. BigQuery has much
120 | stricter limits in place about concurrent writes ([docs](
121 | https://cloud.google.com/bigquery/quotas)), so if the solution is configured
122 | with several Google Ads accounts, it can run into difficulty when writing
123 | directly to BigQuery.
124 |
125 | Leveraging Cloud Storage removes this limitation, and the
126 | BigQuery External Table provides a way of querying the data using SQL.
127 |
128 | ## Google Ads Exclusions
129 |
130 | There are several places that exclusions can be applied in Google Ads. This
131 | solution applies exclusions to [shared placement exclusion lists](
132 | https://support.google.com/google-ads/answer/9162992?hl=en-GB).
133 |
134 | The list is configured in the configuration Google Sheet, where you enter the
135 | customer ID of your MCC account, and the ID of the exclusion list. To find the
136 | ID of the list, open the list in the UI and look at the value set in the query
137 | string parameter `sharedSetId`.
138 |
139 | ## Disclaimers
140 |
141 | __This is not an officially supported Google product.__
142 |
143 | Copyright 2022 Google LLC. This solution, including any related sample code or
144 | data, is made available on an “as is,” “as available,” and “with all faults”
145 | basis, solely for illustrative purposes, and without warranty or representation
146 | of any kind. This solution is experimental, unsupported and provided solely for
147 | your convenience. Your use of it is subject to your agreements with Google, as
148 | applicable, and may constitute a beta feature as defined under those agreements.
149 | To the extent that you make any data available to Google in connection with your
150 | use of the solution, you represent and warrant that you have all necessary and
151 | appropriate rights, consents and permissions to permit Google to use and process
152 | that data. By using any portion of this solution, you acknowledge, assume and
153 | accept all risks, known and unknown, associated with its usage, including with
154 | respect to your deployment of any portion of this solution in your systems, or
155 | usage in connection with your business, if at all.
156 |
--------------------------------------------------------------------------------
/src/google_ads_report/main.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Output the placement report from Google Ads to BigQuery."""
15 | import base64
16 | import json
17 | from datetime import datetime, timedelta
18 | import logging
19 | import os
20 | import sys
21 | from typing import Any, Dict, Optional, Tuple
22 | from google.ads.googleads.client import GoogleAdsClient
23 | import jsonschema
24 | import pandas as pd
25 | from utils import gcs
26 | from utils import pubsub
27 |
28 |
29 | logging.basicConfig(stream=sys.stdout)
30 | logger = logging.getLogger(__name__)
31 | logger.setLevel(logging.INFO)
32 |
33 | # The Google Cloud project containing the GCS bucket
34 | GOOGLE_CLOUD_PROJECT = os.environ.get('GOOGLE_CLOUD_PROJECT')
35 | # The bucket to write the data to
36 | APE_GCS_DATA_BUCKET = os.environ.get('APE_GCS_DATA_BUCKET')
37 | # The pub/sub topic to send the success message to
38 | APE_YOUTUBE_PUBSUB_TOPIC = os.environ.get('APE_YOUTUBE_PUBSUB_TOPIC')
39 |
40 | # The schema of the JSON in the event payload
41 | message_schema = {
42 | 'type': 'object',
43 | 'properties': {
44 | 'sheet_id': {'type': 'string'},
45 | 'customer_id': {'type': 'string'},
46 | 'lookback_days': {'type': 'number'},
47 | 'gads_filters': {'type': 'string'},
48 | },
49 | 'required': ['sheet_id', 'customer_id', 'lookback_days', 'gads_filters', ]
50 | }
51 |
52 |
53 | def main(event: Dict[str, Any], context: Dict[str, Any]) -> None:
54 | """The entry point: extract the data from the payload and starts the job.
55 |
56 | The pub/sub message must match the message_schema object above.
57 |
58 | Args:
59 | event: A dictionary representing the event data payload.
60 | context: An object containing metadata about the event.
61 | """
62 | del context
63 | logger.info('Google Ads Reporting Service triggered.')
64 | logger.info('Message: %s', event)
65 | message = base64.b64decode(event['data']).decode('utf-8')
66 | logger.info('Decoded message: %s', message)
67 | message_json = json.loads(message)
68 | logger.info('JSON message: %s', message_json)
69 |
70 | # Will raise jsonschema.exceptions.ValidationError if the schema is invalid
71 | jsonschema.validate(instance=message_json, schema=message_schema)
72 |
73 | start_job(
74 | message_json.get('sheet_id'),
75 | message_json.get('customer_id'),
76 | message_json.get('lookback_days'),
77 | message_json.get('gads_filters'),
78 | )
79 |
80 | logger.info('Done')
81 |
82 |
83 | def start_job(
84 | sheet_id: str,
85 | customer_id: str,
86 | lookback_days: int,
87 | gads_filters: str,
88 | ) -> None:
89 | """Start the job to run the report from Google Ads & output it.
90 |
91 | Args:
92 | sheet_id: the ID of the Google Sheet containing the config.
93 | customer_id: the customer ID to fetch the Google Ads data for.
94 | lookback_days: the number of days from today to look back when fetching
95 | the report.
96 | gads_filters: the filters to apply to the Google Ads report query
97 | """
98 | logger.info('Starting job to fetch data for %s', customer_id)
99 | report_df = get_report_df(customer_id, lookback_days, gads_filters)
100 | write_results_to_gcs(report_df, customer_id)
101 | send_messages_to_pubsub(customer_id, sheet_id)
102 | logger.info('Job complete')
103 |
104 |
105 | def get_report_df(
106 | customer_id: str,
107 | lookback_days: int,
108 | gads_filters: str) -> pd.DataFrame:
109 | """Run the placement report in Google Ads & return a Dataframe of the data.
110 |
111 | Args:
112 | customer_id: the customer ID to fetch the Google Ads data for.
113 | lookback_days: the number of days from today to look back when fetching
114 | the report.
115 | gads_filters: the filters to apply to the Google Ads report query
116 |
117 | Returns:
118 | A Pandas DataFrame containing the report results.
119 | """
120 | logger.info('Getting report stream for %s', customer_id)
121 | now = datetime.now()
122 | client = GoogleAdsClient.load_from_env(version='v11')
123 | ga_service = client.get_service("GoogleAdsService")
124 |
125 | query = get_report_query(lookback_days, gads_filters)
126 | search_request = client.get_type("SearchGoogleAdsStreamRequest")
127 | search_request.customer_id = customer_id
128 | search_request.query = query
129 | stream = ga_service.search_stream(search_request)
130 |
131 | # The client and iterator needs to be in the same function, as per
132 | # https://github.com/googleads/google-ads-python/issues/384#issuecomment-791639397
133 | # So this can't be refactored out
134 | logger.info('Processing response stream')
135 | data = []
136 | for batch in stream:
137 | for row in batch.results:
138 | data.append([
139 | now,
140 | row.customer.id,
141 | row.group_placement_view.placement,
142 | row.group_placement_view.target_url,
143 | row.metrics.impressions,
144 | row.metrics.cost_micros,
145 | row.metrics.conversions,
146 | row.metrics.video_view_rate,
147 | row.metrics.video_views,
148 | row.metrics.clicks,
149 | row.metrics.average_cpm,
150 | row.metrics.ctr,
151 | row.metrics.all_conversions_from_interactions_rate,
152 | ])
153 | return pd.DataFrame(data, columns=[
154 | 'datetime_updated',
155 | 'customer_id',
156 | 'channel_id',
157 | 'placement_target_url',
158 | 'impressions',
159 | 'cost_micros',
160 | 'conversions',
161 | 'video_view_rate',
162 | 'video_views',
163 | 'clicks',
164 | 'average_cpm',
165 | 'ctr',
166 | 'all_conversions_from_interactions_rate',
167 | ])
168 |
169 |
170 | def get_report_query(lookback_days: int,
171 | gads_filters: Optional[str] = None) -> str:
172 | """Build and return the Google Ads report query.
173 |
174 | Args:
175 | lookback_days: the number of days from today to look back when fetching
176 | the report.
177 | gads_filters: the filters to apply to the Google Ads report query
178 |
179 | Return:
180 | The Google Ads query.
181 | """
182 | logger.info('Getting report query')
183 | date_from, date_to = get_query_dates(lookback_days)
184 | where_query = ''
185 | if gads_filters is not None:
186 | where_query = f'AND {gads_filters}'
187 | query = f"""
188 | SELECT
189 | customer.id,
190 | group_placement_view.placement,
191 | group_placement_view.target_url,
192 | metrics.impressions,
193 | metrics.cost_micros,
194 | metrics.conversions,
195 | metrics.video_views,
196 | metrics.video_view_rate,
197 | metrics.clicks,
198 | metrics.average_cpm,
199 | metrics.ctr,
200 | metrics.all_conversions_from_interactions_rate
201 | FROM
202 | group_placement_view
203 | WHERE group_placement_view.placement_type = "YOUTUBE_CHANNEL"
204 | AND campaign.advertising_channel_type = "VIDEO"
205 | AND segments.date BETWEEN "{date_from}" AND "{date_to}"
206 | {where_query}
207 | """
208 | logger.info(query)
209 | return query
210 |
211 |
212 | def get_query_dates(lookback_days: int,
213 | today: datetime = None) -> Tuple[str, str]:
214 | """Return a tuple of string dates in %Y-%m-%d format for the GAds report.
215 |
216 | Google Ads queries require a string date in the above format. This function
217 | will lookback X days from today, and return this date as a string.
218 |
219 | Args:
220 | lookback_days: the number of days from today to look back when fetching
221 | the report.
222 | today: the date representing today. If no date is provided
223 | datetime.today() is used.
224 |
225 | Return:
226 | The string date
227 | """
228 | logger.info('Getting query dates')
229 | dt_format = '%Y-%m-%d'
230 | if today is None:
231 | today = datetime.today()
232 | date_from = today - timedelta(days=lookback_days)
233 | return (
234 | date_from.strftime(dt_format),
235 | today.strftime(dt_format),
236 | )
237 |
238 |
239 | def write_results_to_gcs(report_df: pd.DataFrame, customer_id: str) -> None:
240 | """Write the report dataframe to GCS as a CSV file
241 |
242 | Args:
243 | report_df: the dataframe based on the Google Ads report.
244 | customer_id: the customer ID to fetch the Google Ads data for.
245 | """
246 | logger.info('Writing results to GCS: %s', APE_GCS_DATA_BUCKET)
247 | number_of_rows = len(report_df.index)
248 | logger.info('There are %s rows', number_of_rows)
249 | if number_of_rows > 0:
250 | blob_name = f'google_ads_report/{customer_id}.csv'
251 | logger.info('Blob name: %s', blob_name)
252 | gcs.upload_blob_from_df(
253 | df=report_df,
254 | blob_name=blob_name,
255 | bucket=APE_GCS_DATA_BUCKET)
256 | logger.info('Blob uploaded to GCS')
257 | else:
258 | logger.info('There is nothing to write to GCS')
259 |
260 |
261 | def send_messages_to_pubsub(customer_id: str, sheet_id: str) -> None:
262 | """Push the customer ID to pub/sub when the job completes.
263 |
264 | Args:
265 | customer_id: the customer ID to fetch the Google Ads data for.
266 | sheet_id: the ID of the Google Sheet containing the config.
267 | """
268 | message_dict = {
269 | 'customer_id': customer_id,
270 | 'sheet_id': sheet_id,
271 | }
272 | logger.info('Sending message to pub/sub:', message_dict)
273 | pubsub.send_dict_to_pubsub(
274 | message_dict=message_dict,
275 | topic=APE_YOUTUBE_PUBSUB_TOPIC,
276 | gcp_project=GOOGLE_CLOUD_PROJECT)
277 | logger.info('Message published')
278 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/src/google_ads_excluder/main.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Filter the data for spam placements and exclude them in Google Ads."""
15 | import base64
16 | import json
17 | import logging
18 | import os
19 | import sys
20 | from datetime import datetime
21 | from typing import Any, Dict, List, Union
22 | import uuid
23 | import google.auth
24 | import google.auth.credentials
25 | from googleapiclient.discovery import build
26 | from google.ads.googleads.client import GoogleAdsClient
27 | from google.cloud import bigquery
28 | import jsonschema
29 | import pandas as pd
30 | from utils import gcs
31 |
32 |
33 | logging.basicConfig(stream=sys.stdout)
34 | logger = logging.getLogger(__name__)
35 | logger.setLevel(logging.INFO)
36 |
37 | # The Google Cloud project
38 | GOOGLE_CLOUD_PROJECT = os.environ.get('GOOGLE_CLOUD_PROJECT')
39 | # The bucket to write the data to
40 | APE_GCS_DATA_BUCKET = os.environ.get('APE_GCS_DATA_BUCKET')
41 | # The name of the BigQuery Dataset
42 | BQ_DATASET = os.environ.get('APE_BIGQUERY_DATASET')
43 | # Set False to apply the exclusions in Google Ads. If True, the call will be
44 | # made to the API and validated, but the exclusion won't be applied and you
45 | # won't see it in the UI. You probably want this to be True in a dev environment
46 | # and False in prod.
47 | VALIDATE_ONLY = os.environ.get(
48 | 'APE_EXCLUSION_VALIDATE_ONLY', 'False').lower() in ('true', '1', 't')
49 |
50 | # The access scopes used in this function
51 | SCOPES = [
52 | 'https://www.googleapis.com/auth/spreadsheets.readonly',
53 | 'https://www.googleapis.com/auth/cloud-platform',
54 | ]
55 |
56 | # The schema of the JSON in the event payload
57 | message_schema = {
58 | 'type': 'object',
59 | 'properties': {
60 | 'sheet_id': {'type': 'string'},
61 | 'customer_id': {'type': 'string'},
62 | },
63 | 'required': ['sheet_id', 'customer_id', ]
64 | }
65 |
66 |
67 | def main(event: Dict[str, Any], context: Dict[str, Any]) -> None:
68 | """The entry point: extract the data from the payload and starts the job.
69 |
70 | The pub/sub message must match the message_schema object above.
71 |
72 | Args:
73 | event: A dictionary representing the event data payload.
74 | context: An object containing metadata about the event.
75 |
76 | Raises:
77 | jsonschema.exceptions.ValidationError if the message from pub/sub is not
78 | what is expected.
79 | """
80 | del context
81 | logger.info('Google Ads Exclusion service triggered.')
82 | logger.info('Message: %s', event)
83 | message = base64.b64decode(event['data']).decode('utf-8')
84 | logger.info('Decoded message: %s', message)
85 | message_json = json.loads(message)
86 | logger.info('JSON message: %s', message_json)
87 |
88 | # Will raise jsonschema.exceptions.ValidationError if the schema is invalid
89 | jsonschema.validate(instance=message_json, schema=message_schema)
90 |
91 | run(message_json.get('customer_id'), message_json.get('sheet_id'))
92 |
93 | logger.info('Done')
94 |
95 |
96 | def run(customer_id: str, sheet_id: str) -> None:
97 | """Start the job to run the report from Google Ads & output it.
98 |
99 | Args:
100 | customer_id: the Google Ads customer ID to process.
101 | sheet_id: the ID of the Google Sheet containing the config.
102 | """
103 | logger.info('Starting job to fetch data for %s', customer_id)
104 | credentials = get_auth_credentials()
105 | filters = get_config_filters(sheet_id, credentials)
106 |
107 | placements = get_spam_placements(customer_id, filters, credentials)
108 | if placements is not None:
109 | exclude_placements_in_gads(placements, sheet_id, credentials)
110 | write_results_to_gcs(customer_id, placements)
111 | logger.info('Job complete')
112 |
113 |
114 | def get_auth_credentials() -> google.auth.credentials.Credentials:
115 | """Return credentials for Google APIs."""
116 | credentials, project_id = google.auth.default(scopes=SCOPES)
117 | return credentials
118 |
119 |
120 | def get_config_filters(sheet_id: str,
121 | credentials: google.auth.credentials.Credentials) -> str:
122 | """Get the filters for identifying a spam placement from the config.
123 |
124 | Args:
125 | sheet_id: the ID of the Google Sheet containing the config.
126 | credentials: Google Auth credentials
127 |
128 | Returns:
129 | SQL WHERE conditions for that can be run on BigQuery, e.g.
130 | view_count > 1000000 AND subscriber_count > 10000
131 | """
132 | logger.info('Getting config from sheet %s', sheet_id)
133 |
134 | result = get_range_values_from_sheet(
135 | sheet_id, 'yt_exclusion_filters', credentials)
136 |
137 | logger.info('Returned %i rows', len(result))
138 | filters = youtube_filters_to_sql_string(result)
139 | if len(filters) == 0:
140 | raise google.api_core.exceptions.BadRequest("Filters are not set")
141 |
142 | return filters
143 |
144 |
145 | def get_range_values_from_sheet(
146 | sheet_id: str,
147 | sheet_range: str,
148 | credentials: google.auth.credentials.Credentials
149 | ) -> List[List[str]]:
150 | """Get the values from a named range in the Google Sheet.
151 |
152 | Args:
153 | sheet_id: the Google Sheet ID to fetch data from.
154 | sheet_range: the range in the Google Sheet to get the values from
155 | credentials: Google Auth credentials
156 |
157 | Returns:
158 | Each row in the response represents a row in the Sheet.
159 | """
160 | logger.info(f'Getting range "{sheet_range}" from sheet: {sheet_id}')
161 | sheets_service = build('sheets', 'v4', credentials=credentials)
162 | sheet = sheets_service.spreadsheets()
163 | return sheet.values().get(
164 | spreadsheetId=sheet_id,
165 | range=sheet_range).execute().get('values', [])
166 |
167 |
168 | def youtube_filters_to_sql_string(config_filters: List[List[str]]) -> str:
169 | """Turn the YouTube filters into a SQL compatible string.
170 |
171 | The config sheet has the filters in a list of lists, these need to be
172 | combined, so they can be used in a WHERE clause in the SQL.
173 |
174 | Each row is "AND" together.
175 |
176 | Args:
177 | config_filters: the filters from the Google Sheet
178 |
179 | Returns:
180 | A string that can be used in the WHERE statement of SQL Language.
181 | """
182 | conditions = []
183 | for row in config_filters:
184 | if len(row) == 3:
185 | conditions.append(f'{row[0]} {row[1]} {row[2]}')
186 |
187 | return ' AND '.join(conditions)
188 |
189 |
190 | def get_spam_placements(customer_id: str,
191 | filters: str,
192 | credentials: google.auth.credentials.Credentials
193 | ) -> Union[List[str], None]:
194 | """Run a query to find spam placements in BigQuery and return as a list.
195 |
196 | Args:
197 | customer_id: the Google Ads customer ID to process.
198 | filters: a string containing WHERE conditions to add to the query based
199 | on the config Google Sheet.
200 | credentials: Google Auth credentials
201 |
202 | Returns:
203 | A list of placement IDs which should be excluded.
204 | """
205 |
206 | logger.info('Getting spam placements from BigQuery')
207 | logger.info('Connecting to: %s BigQuery', GOOGLE_CLOUD_PROJECT)
208 | client = bigquery.Client(
209 | project=GOOGLE_CLOUD_PROJECT, credentials=credentials)
210 |
211 | query = f"""
212 | SELECT DISTINCT
213 | Yt.channel_id
214 | FROM
215 | `{BQ_DATASET}.GoogleAdsReport` AS Ads
216 | LEFT JOIN
217 | {BQ_DATASET}.YouTubeChannel AS Yt
218 | USING(channel_id)
219 | LEFT JOIN
220 | `{BQ_DATASET}.GoogleAdsExclusion` AS Excluded
221 | USING(channel_id)
222 | WHERE
223 | Ads.customer_id = "{customer_id}"
224 | AND Excluded.channel_id IS NULL
225 | AND (
226 | Excluded.customer_id = "{customer_id}"
227 | OR Excluded.customer_id IS NULL
228 | )
229 | AND {filters}
230 | """
231 | logger.info('Running query: %s', query)
232 |
233 | rows = client.query(query).result()
234 |
235 | if rows.total_rows == 0:
236 | logger.info('There is nothing to update')
237 | return None
238 | channel_ids = []
239 | for row in rows:
240 | channel_ids.append(row.channel_id)
241 | logger.info('Received %s channel_ids', len(channel_ids))
242 | return channel_ids
243 |
244 |
245 | def exclude_placements_in_gads(
246 | placements: List[str],
247 | sheet_id: str,
248 | credentials: google.auth.credentials.Credentials = None
249 | ) -> None:
250 | """Exclude the placements in the Google Ads account.
251 |
252 | Args:
253 | placements: a list of YouTube channel IDs which should be excluded.
254 | sheet_id: the ID of the Google Sheet containing the config.
255 | credentials: Google Auth credentials
256 | """
257 | logger.info('Excluding placements in Google Ads.')
258 |
259 | if credentials is None:
260 | logger.info('No auth credentials provided. Fetching them.')
261 | credentials = get_auth_credentials()
262 |
263 | shared_set_id = get_range_values_from_sheet(
264 | sheet_id=sheet_id,
265 | sheet_range='placement_exclusion_list_id',
266 | credentials=credentials)[0][0]
267 | customer_id = get_range_values_from_sheet(
268 | sheet_id=sheet_id,
269 | sheet_range='placement_exclusion_customer_id',
270 | credentials=credentials)[0][0]
271 |
272 | client = GoogleAdsClient.load_from_env(version='v11')
273 | service = client.get_service('SharedCriterionService')
274 |
275 | shared_set = f'customers/{customer_id}/sharedSets/{shared_set_id}'
276 |
277 | operations = []
278 | logger.info('Processing the %i placements', len(placements))
279 | for placement in placements:
280 | operation = client.get_type('SharedCriterionOperation')
281 | criterion = operation.create
282 | criterion.shared_set = shared_set
283 | criterion.youtube_channel.channel_id = placement
284 | operations.append(operation)
285 |
286 | placements_len = len(placements)
287 | logger.info('There are %i operations to upload', placements_len)
288 | logger.info('Validate_only mode: %s', VALIDATE_ONLY)
289 | if placements_len > 0:
290 | response = service.mutate_shared_criteria(
291 | request={
292 | 'validate_only': VALIDATE_ONLY,
293 | 'customer_id': customer_id,
294 | 'operations': operations
295 | }
296 | )
297 | logger.info('Response from the upload:')
298 | logger.info(response)
299 |
300 | logger.info('Done.')
301 |
302 |
303 | def write_results_to_gcs(customer_id: str,
304 | placements: List[str],
305 | ) -> None:
306 | """Write the exclusions to GCS as a CSV file.
307 |
308 | Historical data is preserved so all file writes have a UUID appended to it.
309 |
310 | Args:
311 | customer_id: the Google Ads customer ID to process.
312 | placements: alist of placement IDs which should be excluded.
313 | """
314 | exclusions_df = pd.DataFrame(placements, columns=[
315 | 'channel_id',
316 | ])
317 | exclusions_df['customer_id'] = int(customer_id)
318 | exclusions_df['datetime_updated'] = datetime.now()
319 |
320 | logger.info('Writing results to GCS: %s', APE_GCS_DATA_BUCKET)
321 | number_of_rows = len(exclusions_df.index)
322 | logger.info('There are %s rows', number_of_rows)
323 | if number_of_rows > 0:
324 | uuid_str = str(uuid.uuid4())
325 | blob_name = f'google_ads_exclusion/{customer_id}-{uuid_str}.csv'
326 | logger.info('Blob name: %s', blob_name)
327 | gcs.upload_blob_from_df(
328 | df=exclusions_df,
329 | blob_name=blob_name,
330 | bucket=APE_GCS_DATA_BUCKET)
331 | logger.info('Blob uploaded to GCS')
332 | else:
333 | logger.info('There is nothing to write to GCS')
334 |
--------------------------------------------------------------------------------
/terraform/main.tf:
--------------------------------------------------------------------------------
1 | provider "google" {
2 | project = var.project_id
3 | region = var.region
4 | }
5 |
6 | # SERVICE ACCOUNT --------------------------------------------------------------
7 | resource "google_service_account" "service_account" {
8 | account_id = "ads-placement-excluder-runner"
9 | display_name = "Service Account for running Ads Placement Excluder"
10 | }
11 | resource "google_project_iam_member" "cloud_functions_invoker_role" {
12 | project = var.project_id
13 | role = "roles/cloudfunctions.invoker"
14 | member = "serviceAccount:${google_service_account.service_account.email}"
15 | }
16 | resource "google_project_iam_member" "bigquery_job_user_role" {
17 | project = var.project_id
18 | role = "roles/bigquery.jobUser"
19 | member = "serviceAccount:${google_service_account.service_account.email}"
20 | }
21 | resource "google_project_iam_member" "bigquery_data_viewer_role" {
22 | project = var.project_id
23 | role = "roles/bigquery.dataViewer"
24 | member = "serviceAccount:${google_service_account.service_account.email}"
25 | }
26 | resource "google_project_iam_member" "pubsub_publisher_role" {
27 | project = var.project_id
28 | role = "roles/pubsub.publisher"
29 | member = "serviceAccount:${google_service_account.service_account.email}"
30 | }
31 | resource "google_project_iam_member" "storage_object_admin_role" {
32 | project = var.project_id
33 | role = "roles/storage.objectAdmin"
34 | member = "serviceAccount:${google_service_account.service_account.email}"
35 | }
36 |
37 | # CLOUD STORAGE ----------------------------------------------------------------
38 | resource "google_storage_bucket" "ape_data_bucket" {
39 | name = "${var.project_id}-ape-data"
40 | location = var.region
41 | force_destroy = true
42 | uniform_bucket_level_access = true
43 | }
44 | # This bucket is used to store the cloud functions for deployment.
45 | # The project ID is used to make sure the name is globally unique
46 | resource "google_storage_bucket" "function_bucket" {
47 | name = "${var.project_id}-functions"
48 | location = var.region
49 | force_destroy = true
50 | uniform_bucket_level_access = true
51 |
52 | lifecycle_rule {
53 | condition {
54 | age = 1
55 | }
56 | action {
57 | type = "Delete"
58 | }
59 | }
60 | }
61 |
62 | # CLOUD FUNCTIONS --------------------------------------------------------------
63 | data "archive_file" "google_ads_accounts_zip" {
64 | type = "zip"
65 | output_path = ".temp/google_ads_accounts_source.zip"
66 | source_dir = "../src/google_ads_accounts"
67 | }
68 | data "archive_file" "google_ads_report_zip" {
69 | type = "zip"
70 | output_path = ".temp/google_ads_report_source.zip"
71 | source_dir = "../src/google_ads_report"
72 | }
73 | data "archive_file" "youtube_channel_zip" {
74 | type = "zip"
75 | output_path = ".temp/youtube_channel_source.zip"
76 | source_dir = "../src/youtube_channel/"
77 | }
78 | data "archive_file" "google_ads_excluder_zip" {
79 | type = "zip"
80 | output_path = ".temp/google_ads_excluder_source.zip"
81 | source_dir = "../src/google_ads_excluder/"
82 | }
83 |
84 | resource "google_storage_bucket_object" "google_ads_accounts" {
85 | name = "google_ads_accounts_${data.archive_file.google_ads_accounts_zip.output_md5}.zip"
86 | bucket = google_storage_bucket.function_bucket.name
87 | source = data.archive_file.google_ads_accounts_zip.output_path
88 | depends_on = [data.archive_file.google_ads_accounts_zip]
89 | }
90 | resource "google_storage_bucket_object" "google_ads_report" {
91 | name = "google_ads_report_${data.archive_file.google_ads_report_zip.output_md5}.zip"
92 | bucket = google_storage_bucket.function_bucket.name
93 | source = data.archive_file.google_ads_report_zip.output_path
94 | depends_on = [data.archive_file.google_ads_report_zip]
95 | }
96 | resource "google_storage_bucket_object" "youtube_channel" {
97 | name = "youtube_channel_${data.archive_file.youtube_channel_zip.output_md5}.zip"
98 | bucket = google_storage_bucket.function_bucket.name
99 | source = data.archive_file.youtube_channel_zip.output_path
100 | depends_on = [data.archive_file.youtube_channel_zip]
101 | }
102 | resource "google_storage_bucket_object" "google_ads_excluder" {
103 | name = "google_ads_excluder_${data.archive_file.google_ads_excluder_zip.output_md5}.zip"
104 | bucket = google_storage_bucket.function_bucket.name
105 | source = data.archive_file.google_ads_excluder_zip.output_path
106 | depends_on = [data.archive_file.google_ads_excluder_zip]
107 | }
108 |
109 | resource "google_cloudfunctions_function" "google_ads_accounts_function" {
110 | region = var.region
111 | name = "ape-google_ads_accounts"
112 | description = "Identify which reports to run the Google Ads report for."
113 | runtime = "python310"
114 | source_archive_bucket = google_storage_bucket.function_bucket.name
115 | source_archive_object = google_storage_bucket_object.google_ads_accounts.name
116 | service_account_email = google_service_account.service_account.email
117 | timeout = 540
118 | available_memory_mb = 1024
119 | entry_point = "main"
120 | trigger_http = true
121 |
122 | environment_variables = {
123 | GOOGLE_CLOUD_PROJECT = var.project_id
124 | APE_ADS_REPORT_PUBSUB_TOPIC = google_pubsub_topic.google_ads_report_pubsub_topic.name
125 | }
126 | }
127 | resource "google_cloudfunctions_function" "google_ads_report_function" {
128 | region = var.region
129 | name = "ape-google_ads_report"
130 | description = "Move the placement report from Google Ads to BigQuery."
131 | runtime = "python310"
132 | source_archive_bucket = google_storage_bucket.function_bucket.name
133 | source_archive_object = google_storage_bucket_object.google_ads_report.name
134 | service_account_email = google_service_account.service_account.email
135 | timeout = 540
136 | available_memory_mb = 1024
137 | entry_point = "main"
138 |
139 | event_trigger {
140 | event_type = "providers/cloud.pubsub/eventTypes/topic.publish"
141 | resource = google_pubsub_topic.google_ads_report_pubsub_topic.name
142 | }
143 |
144 | environment_variables = {
145 | GOOGLE_ADS_USE_PROTO_PLUS = false
146 | GOOGLE_ADS_REFRESH_TOKEN = var.oauth_refresh_token
147 | GOOGLE_ADS_CLIENT_ID = var.google_cloud_client_id
148 | GOOGLE_ADS_CLIENT_SECRET = var.google_cloud_client_secret
149 | GOOGLE_ADS_DEVELOPER_TOKEN = var.google_ads_developer_token
150 | GOOGLE_ADS_LOGIN_CUSTOMER_ID = var.google_ads_login_customer_id
151 | GOOGLE_CLOUD_PROJECT = var.project_id
152 | APE_GCS_DATA_BUCKET = google_storage_bucket.ape_data_bucket.name
153 | APE_YOUTUBE_PUBSUB_TOPIC = google_pubsub_topic.youtube_pubsub_topic.name
154 | }
155 | }
156 | resource "google_cloudfunctions_function" "youtube_channel_function" {
157 | region = var.region
158 | name = "ape-youtube_channels"
159 | description = "Pull the channel data from the YouTube API."
160 | runtime = "python310"
161 | source_archive_bucket = google_storage_bucket.function_bucket.name
162 | source_archive_object = google_storage_bucket_object.youtube_channel.name
163 | service_account_email = google_service_account.service_account.email
164 | timeout = 540
165 | available_memory_mb = 1024
166 | entry_point = "main"
167 |
168 | event_trigger {
169 | event_type = "providers/cloud.pubsub/eventTypes/topic.publish"
170 | resource = google_pubsub_topic.youtube_pubsub_topic.name
171 | }
172 |
173 | environment_variables = {
174 | GOOGLE_CLOUD_PROJECT = var.project_id
175 | APE_ADS_EXCLUDER_PUBSUB_TOPIC = google_pubsub_topic.google_ads_excluder_pubsub_topic.name
176 | APE_BIGQUERY_DATASET = google_bigquery_dataset.dataset.dataset_id
177 | APE_GCS_DATA_BUCKET = google_storage_bucket.ape_data_bucket.name
178 | }
179 | }
180 | resource "google_cloudfunctions_function" "google_ads_excluder_function" {
181 | region = var.region
182 | name = "ape-google_ads_excluder"
183 | description = "Exclude the channels in Google Ads"
184 | runtime = "python310"
185 | source_archive_bucket = google_storage_bucket.function_bucket.name
186 | source_archive_object = google_storage_bucket_object.google_ads_excluder.name
187 | service_account_email = google_service_account.service_account.email
188 | timeout = 540
189 | available_memory_mb = 1024
190 | entry_point = "main"
191 |
192 | event_trigger {
193 | event_type = "providers/cloud.pubsub/eventTypes/topic.publish"
194 | resource = google_pubsub_topic.google_ads_excluder_pubsub_topic.name
195 | }
196 |
197 | environment_variables = {
198 | GOOGLE_CLOUD_PROJECT = var.project_id
199 | GOOGLE_ADS_USE_PROTO_PLUS = false
200 | GOOGLE_ADS_REFRESH_TOKEN = var.oauth_refresh_token
201 | GOOGLE_ADS_CLIENT_ID = var.google_cloud_client_id
202 | GOOGLE_ADS_CLIENT_SECRET = var.google_cloud_client_secret
203 | GOOGLE_ADS_DEVELOPER_TOKEN = var.google_ads_developer_token
204 | GOOGLE_ADS_LOGIN_CUSTOMER_ID = var.google_ads_login_customer_id
205 | APE_BIGQUERY_DATASET = google_bigquery_dataset.dataset.dataset_id
206 | APE_GCS_DATA_BUCKET = google_storage_bucket.ape_data_bucket.name
207 | }
208 | }
209 |
210 | # BIGQUERY ---------------------------------------------------------------------
211 | resource "google_bigquery_dataset" "dataset" {
212 | dataset_id = var.bq_dataset
213 | location = var.region
214 | description = "Ads Placement Excluder BQ Dataset"
215 | delete_contents_on_destroy = true
216 | }
217 | resource "google_bigquery_table" "google_ads_report_table" {
218 | dataset_id = google_bigquery_dataset.dataset.dataset_id
219 | table_id = "GoogleAdsReport"
220 | deletion_protection = false
221 |
222 | external_data_configuration {
223 | autodetect = false
224 | source_format = "CSV"
225 | source_uris = [
226 | "gs://${google_storage_bucket.ape_data_bucket.name}/google_ads_report/*.csv"
227 | ]
228 | schema = file("../src/google_ads_report/bq_schema.json")
229 | csv_options {
230 | quote = ""
231 | skip_leading_rows = "1"
232 | }
233 | }
234 | }
235 | resource "google_bigquery_table" "youtube_channel_table" {
236 | dataset_id = google_bigquery_dataset.dataset.dataset_id
237 | table_id = "YouTubeChannel"
238 | deletion_protection = false
239 |
240 | external_data_configuration {
241 | autodetect = false
242 | source_format = "CSV"
243 | source_uris = [
244 | "gs://${google_storage_bucket.ape_data_bucket.name}/youtube_channel/*.csv"
245 | ]
246 | schema = file("../src/youtube_channel/bq_schema.json")
247 | csv_options {
248 | quote = ""
249 | skip_leading_rows = "1"
250 | }
251 | }
252 | }
253 | resource "google_bigquery_table" "google_ads_exclusions_table" {
254 | dataset_id = google_bigquery_dataset.dataset.dataset_id
255 | table_id = "GoogleAdsExclusion"
256 | deletion_protection = false
257 |
258 | external_data_configuration {
259 | autodetect = false
260 | source_format = "CSV"
261 | source_uris = [
262 | "gs://${google_storage_bucket.ape_data_bucket.name}/google_ads_exclusion/*.csv"
263 | ]
264 | schema = file("../src/google_ads_excluder/bq_schema.json")
265 | csv_options {
266 | quote = ""
267 | skip_leading_rows = "1"
268 | }
269 | }
270 | }
271 | resource "google_bigquery_table" "exclusions_report" {
272 | dataset_id = google_bigquery_dataset.dataset.dataset_id
273 | table_id = "ViewExclusions"
274 | deletion_protection = false
275 | depends_on = [
276 | google_bigquery_dataset.dataset,
277 | google_bigquery_table.google_ads_report_table,
278 | google_bigquery_table.youtube_channel_table,
279 | google_bigquery_table.google_ads_exclusions_table
280 | ]
281 | view {
282 | query = templatefile(
283 | "../src/reporting/exclusions_report.sql",
284 | {
285 | BQ_DATASET = google_bigquery_dataset.dataset.dataset_id
286 | }
287 | )
288 | use_legacy_sql = false
289 | }
290 | }
291 |
292 | # PUB/SUB ----------------------------------------------------------------------
293 | resource "google_pubsub_topic" "google_ads_report_pubsub_topic" {
294 | name = "ape-google-ads-report-topic"
295 | message_retention_duration = "604800s"
296 | }
297 | resource "google_pubsub_topic" "youtube_pubsub_topic" {
298 | name = "ape-youtube-channel-topic"
299 | message_retention_duration = "604800s"
300 | }
301 | resource "google_pubsub_topic" "google_ads_excluder_pubsub_topic" {
302 | name = "ape-google-ads-excluder-topic"
303 | message_retention_duration = "604800s"
304 | }
305 |
306 | # CLOUD_SCHEDULER --------------------------------------------------------------
307 | locals {
308 | scheduler_body = < None:
69 | """The entry point: extract the data from the payload and starts the job.
70 |
71 | The pub/sub message must match the message_schema object above.
72 |
73 | Args:
74 | event: A dictionary representing the event data payload.
75 | context: An object containing metadata about the event.
76 |
77 | Raises:
78 | jsonschema.exceptions.ValidationError if the message from pub/sub is not
79 | what is expected.
80 | """
81 | del context
82 | logger.info('YouTube channel service triggered.')
83 | logger.info('Message: %s', event)
84 | message = base64.b64decode(event['data']).decode('utf-8')
85 | logger.info('Decoded message: %s', message)
86 | message_json = json.loads(message)
87 | logger.info('JSON message: %s', message_json)
88 |
89 | # Will raise jsonschema.exceptions.ValidationError if the schema is invalid
90 | jsonschema.validate(instance=message_json, schema=message_schema)
91 |
92 | run(message_json.get('customer_id'), message_json.get('sheet_id'))
93 |
94 | logger.info('Done')
95 |
96 |
97 | def run(customer_id: str, sheet_id: str) -> None:
98 | """Orchestration to pull YouTube data and output it to BigQuery.
99 |
100 | Args:
101 | customer_id: the Google Ads customer ID to process.
102 | sheet_id: the ID of the Google Sheet containing the config.
103 | """
104 | credentials = get_auth_credentials()
105 | channel_ids = get_placements_query(customer_id, credentials)
106 | if len(channel_ids) > 0:
107 | get_youtube_dataframe(channel_ids, sheet_id, customer_id, credentials)
108 | else:
109 | logger.info('No channel IDs to process')
110 | send_messages_to_pubsub(customer_id, sheet_id)
111 | logger.info('Done')
112 |
113 |
114 | def get_auth_credentials() -> google.auth.credentials.Credentials:
115 | """Return credentials for Google APIs."""
116 | credentials, project_id = google.auth.default()
117 | return credentials
118 |
119 |
120 | def get_placements_query(
121 | customer_id: str,
122 | credentials: google.auth.credentials.Credentials
123 | ) -> List[str]:
124 | """Get the placements from the Google Ads report in BigQuery.
125 |
126 | Args:
127 | customer_id: the Google Ads customer ID to process.
128 | credentials: Google Auth credentials
129 |
130 | Returns:
131 | A list of placement IDs that need to be pulled from YouTube
132 | """
133 | logger.info('Getting Placements from Google Ads')
134 | logger.info('Connecting to: %s BigQuery', GOOGLE_CLOUD_PROJECT)
135 | client = bigquery.Client(
136 | project=GOOGLE_CLOUD_PROJECT, credentials=credentials)
137 |
138 | query = f"""
139 | SELECT DISTINCT
140 | Ads.channel_id
141 | FROM
142 | `{BQ_DATASET}.GoogleAdsReport` AS Ads
143 | LEFT JOIN
144 | `{BQ_DATASET}.YouTubeChannel` AS YouTube
145 | USING(channel_id)
146 | WHERE
147 | Ads.customer_id = "{customer_id}"
148 | AND YouTube.channel_id IS NULL
149 | """
150 | logger.info('Running query: %s', query)
151 | rows = client.query(query).result()
152 | channel_ids = []
153 | for row in rows:
154 | channel_ids.append(row.channel_id)
155 | logger.info('Received %s channel_ids', len(channel_ids))
156 | return channel_ids
157 |
158 |
159 | def get_youtube_dataframe(
160 | channel_ids: List[str],
161 | sheet_id: str,
162 | customer_id: str,
163 | credentials: google.auth.credentials.Credentials
164 | ) -> None:
165 | """Pull information on each of the channels provide from the YouTube API.
166 |
167 | The YouTube API only allows pulling up to 50 channels in each request, so
168 | multiple requests have to be made to pull all the data. See the docs for
169 | more details:
170 | https://developers.google.com/youtube/v3/docs/channels/list
171 |
172 | Args:
173 | channel_ids: the channel IDs to pull the info on from YouTube
174 | sheet_id: the ID of the Google Sheet containing the config.
175 | customer_id: the Google Ads customer ID to process.
176 | credentials: Google Auth credentials
177 | """
178 | logger.info('Getting YouTube data for channel IDs')
179 | # Maximum number of channels per YouTube request. See:
180 | # https://developers.google.com/youtube/v3/docs/channels/list
181 | chunk_size = 50
182 | chunks = split_list_to_chunks(channel_ids, chunk_size)
183 | number_of_chunks = len(chunks)
184 |
185 | logger.info('Connecting to the youtube API')
186 | youtube = build('youtube', 'v3', credentials=credentials)
187 | is_translated = get_translate_filter(sheet_id, credentials)
188 |
189 | for i, chunk in enumerate(chunks):
190 | logger.info(f'Processing chunk {i + 1} of {number_of_chunks}')
191 | chunk_list = list(chunk)
192 | request = youtube.channels().list(
193 | part='id, statistics, snippet, brandingSettings',
194 | id=chunk_list,
195 | maxResults=chunk_size)
196 | response = request.execute()
197 | channels = process_youtube_response(response, chunk_list, is_translated)
198 | youtube_df = pd.DataFrame(channels, columns=[
199 | 'channel_id',
200 | 'view_count',
201 | 'video_count',
202 | 'subscriber_count',
203 | 'title',
204 | 'title_language',
205 | 'title_language_confidence',
206 | 'country',
207 | ])
208 | youtube_df['datetime_updated'] = datetime.now()
209 | youtube_df = sanitise_youtube_dataframe(youtube_df)
210 | write_results_to_gcs(youtube_df, customer_id)
211 | logger.info('YouTube channel info complete')
212 |
213 |
214 | def sanitise_youtube_dataframe(youtube_df: pd.DataFrame) -> pd.DataFrame:
215 | """Takes the dataframe from YouTube and sanitises it to write as a CSV.
216 |
217 | Args:
218 | youtube_df: the dataframe containing the YouTube data
219 |
220 | Returns:
221 | The YouTube dataframe but sanitised to be safe to write to a CSV.
222 | """
223 | youtube_df = youtube_df.astype({
224 | 'view_count': 'int',
225 | 'video_count': 'int',
226 | 'subscriber_count': 'int',
227 | 'title_language_confidence': 'float',
228 | })
229 | # remove problematic characters from the title field as the break BigQuery
230 | # even when escaped in the CSV
231 | youtube_df['title'] = youtube_df['title'].str.replace(
232 | APE_CSV_PROBLEM_CHARACTERS_REGEX, '', regex=True)
233 | youtube_df['title'] = youtube_df['title'].str.strip()
234 | return youtube_df
235 |
236 |
237 | def split_list_to_chunks(
238 | lst: List[Any], max_size_of_chunk: int) -> List[np.ndarray]:
239 | """Split the list into X chunks with the maximum size as specified.
240 |
241 | Args:
242 | lst: The list to be split into chunks
243 | max_size_of_chunk: the maximum number of elements that should be in a
244 | chunk.
245 |
246 | Returns:
247 | A list containing numpy array chunks of the original list.
248 | """
249 | logger.info('Splitting list into chunks')
250 | num_of_chunks = math.ceil(len(lst) / max_size_of_chunk)
251 | chunks = np.array_split(lst, num_of_chunks)
252 | logger.info('Split list into %i chunks', num_of_chunks)
253 | return chunks
254 |
255 |
256 | def process_youtube_response(
257 | response: Dict[str, Any],
258 | channel_ids: List[str],
259 | is_translated: bool,
260 | ) -> List[List[Any]]:
261 | """Process the YouTube response to extract the required information.
262 |
263 | Args:
264 | response: The YouTube channels list response
265 | https://developers.google.com/youtube/v3/docs/channels/list#response
266 | channel_ids: A list of the channel IDs passed in the request
267 | is_translated: A flag showing whether YouTube channel title should be translated or not
268 |
269 | Returns:
270 | A list of dicts where each dict represents data from one channel
271 | """
272 | logger.info('Processing youtube response')
273 | data = []
274 | if response.get('pageInfo').get('totalResults') == 0:
275 | logger.warning('The YouTube response has no results: %s', response)
276 | logger.warning(channel_ids)
277 | return data
278 |
279 | for channel in response['items']:
280 | title = channel.get('snippet').get('title', '')
281 | if is_translated:
282 | title_language, confidence = detect_language(title)
283 | else:
284 | title_language = ''
285 | confidence = 0
286 | data.append([
287 | channel.get('id'),
288 | channel.get('statistics').get('viewCount', None),
289 | channel.get('statistics').get('subscriberCount', None),
290 | channel.get('statistics').get('videoCount', None),
291 | title,
292 | title_language,
293 | confidence,
294 | channel.get('snippet').get('country', ''),
295 | ])
296 | return data
297 |
298 |
299 | def get_translate_filter(
300 | sheet_id: str,
301 | credentials: google.auth.credentials.Credentials
302 | ) -> bool:
303 | """Get the filter for YouTube channel title translation.
304 |
305 | Args:
306 | sheet_id: the ID of the Google Sheet containing the config.
307 | credentials: Google Auth credentials
308 |
309 | Returns:
310 | True if filter is enabled, False otherwise
311 | """
312 | logger.info('Getting config from sheet %s', sheet_id)
313 |
314 | sheets_service = build('sheets', 'v4', credentials=credentials)
315 | sheet = sheets_service.spreadsheets()
316 |
317 | result = sheet.values().get(
318 | spreadsheetId=sheet_id,
319 | range='yt_translation_filter').execute().get('values', [['Disabled']])[0][0]
320 |
321 | is_enabled = True if result == 'Enabled' else False
322 | logger.info('Translation filter enabled is %s', is_enabled)
323 |
324 | return is_enabled
325 |
326 |
327 | def detect_language(text: str) -> Tuple[str, float]:
328 | """Detects the text's language.
329 |
330 | Args:
331 | text: the text to base the translation off of
332 |
333 | Returns:
334 | A tuple containing the language and the confidence.
335 | """
336 | logger.debug('Detecting language for %s', text)
337 | translate_client = translate.Client()
338 | result = translate_client.detect_language(text)
339 | return result['language'], result['confidence']
340 |
341 |
342 | def write_results_to_gcs(youtube_df: pd.DataFrame, customer_id: str) -> None:
343 | """Write the YouTube dataframe to GCS as a CSV file.
344 |
345 | Historical data is preserved so all file writes have a UUID appended to it.
346 |
347 | Args:
348 | youtube_df: the dataframe based on the YouTube data.
349 | customer_id: the customer ID to fetch the Google Ads data for.
350 | """
351 | logger.info('Writing results to GCS: %s', APE_GCS_DATA_BUCKET)
352 | number_of_rows = len(youtube_df.index)
353 | logger.info('There are %s rows', number_of_rows)
354 | if number_of_rows > 0:
355 | uuid_str = str(uuid.uuid4())
356 | blob_name = f'youtube_channel/{customer_id}-{uuid_str}.csv'
357 | logger.info('Blob name: %s', blob_name)
358 | gcs.upload_blob_from_df(
359 | df=youtube_df,
360 | blob_name=blob_name,
361 | bucket=APE_GCS_DATA_BUCKET)
362 | logger.info('Blob uploaded to GCS')
363 | else:
364 | logger.info('There is nothing to write to GCS')
365 |
366 |
367 | def send_messages_to_pubsub(customer_id: str, sheet_id: str) -> None:
368 | """Push the customer ID to pub/sub when the job completes.
369 |
370 | Args:
371 | customer_id: the customer ID to fetch the Google Ads data for.
372 | sheet_id: the ID of the Google Sheet containing the config.
373 | """
374 | message_dict = {
375 | 'customer_id': customer_id,
376 | 'sheet_id': sheet_id,
377 | }
378 | logger.info('Sending message to pub/sub:', message_dict)
379 | pubsub.send_dict_to_pubsub(
380 | message_dict=message_dict,
381 | topic=APE_ADS_EXCLUDER_PUBSUB_TOPIC,
382 | gcp_project=GOOGLE_CLOUD_PROJECT)
383 | logger.info('Message published')
384 |
--------------------------------------------------------------------------------