├── .gitignore
├── images
├── copy_icon.png
└── architecture.png
├── plugins
└── cloud_utils
│ ├── images
│ └── module_overview.png
│ ├── __init__.py
│ ├── utils.py
│ ├── cloud_api.py
│ ├── cloud_auth.py
│ ├── cloud_storage.py
│ └── cloud_composer.py
├── requirements.txt
├── config.yaml
├── setup.sh
├── scripts
├── 3_customer_view.sql
├── targeted_products
│ ├── targeted_product_ddl.sql
│ └── construct_parsed_criteria.sql
├── 2_product_metrics_view.sql
├── materialize_product_historical.sql
├── market_insights
│ ├── historical_view.sql
│ ├── best_sellers_workflow.sql
│ └── snapshot_view.sql
├── 1_product_view.sql
├── 4_product_detailed_view.sql
├── materialize_product_detailed.sql
└── main_workflow.sql
├── CONTRIBUTING.md
├── config_parser.py
├── auth.py
├── data
└── language_codes.csv
├── cloud_env_setup.py
├── cloud_bigquery.py
├── README.md
├── LICENSE
└── cloud_data_transfer.py
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache*
2 |
--------------------------------------------------------------------------------
/images/copy_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/shopping-markup/HEAD/images/copy_icon.png
--------------------------------------------------------------------------------
/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/shopping-markup/HEAD/images/architecture.png
--------------------------------------------------------------------------------
/plugins/cloud_utils/images/module_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/shopping-markup/HEAD/plugins/cloud_utils/images/module_overview.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | google-cloud-bigquery==3.4.2
2 | google-api-python-client==2.74.0
3 | google-cloud-bigquery-datatransfer==3.10.1
4 | grpcio==1.51.1
5 | grpcio-tools==1.51.1
6 | PyYAML==6.0
7 | pytz==2022.7.1
--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # The list of configurations used by MarkUp installation process.
17 |
18 | # The BigQuery dataset location.
19 | LOCATION: us
20 |
--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | #!/bin/bash
16 | # MarkUp setup script.
17 |
18 | set -e
19 |
20 | VIRTUALENV_PATH=$HOME/"markup-venv"
21 |
22 | # Create virtual environment with python3
23 | if [[ ! -d "${VIRTUALENV_PATH}" ]]; then
24 | virtualenv -p python3 "${VIRTUALENV_PATH}"
25 | fi
26 |
27 |
28 | # Activate virtual environment.
29 | source ~/markup-venv/bin/activate
30 |
31 | # Install dependencies.
32 | pip install -r requirements.txt
33 |
34 | # Setup cloud environment.
35 | PYTHONPATH=src/plugins:$PYTHONPATH
36 | export PYTHONPATH
37 | python cloud_env_setup.py "$@"
38 |
--------------------------------------------------------------------------------
/scripts/3_customer_view.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | CREATE OR REPLACE VIEW `{project_id}.{dataset}.customer_view`
16 | AS (
17 | WITH
18 | LatestDate AS (
19 | SELECT
20 | MAX(_DATA_DATE) AS latest_date
21 | FROM
22 | `{project_id}.{dataset}.Customer_{external_customer_id}`
23 | )
24 | SELECT DISTINCT
25 | _DATA_DATE AS data_date,
26 | LatestDate.latest_date,
27 | ExternalCustomerId,
28 | AccountDescriptiveName
29 | FROM
30 | `{project_id}.{dataset}.Customer_{external_customer_id}`,
31 | LatestDate
32 | );
33 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows
28 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
29 |
--------------------------------------------------------------------------------
/plugins/cloud_utils/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Copyright 2020 Google LLC
17 | #
18 | # Licensed under the Apache License, Version 2.0 (the "License");
19 | # you may not use this file except in compliance with the License.
20 | # You may obtain a copy of the License at
21 | #
22 | # http://www.apache.org/licenses/LICENSE-2.0
23 | #
24 | # Unless required by applicable law or agreed to in writing, software
25 | # distributed under the License is distributed on an "AS IS" BASIS,
26 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
27 | # See the License for the specific language governing permissions and
28 | # limitations under the License.
29 |
30 |
--------------------------------------------------------------------------------
/config_parser.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # python3
17 | """Config parser module.
18 |
19 | This module retrieves config values.
20 | """
21 |
22 | import functools
23 | import yaml
24 |
25 |
26 | @functools.lru_cache()
27 | def _get_config(config_key: str) -> str:
28 | """Returns value for a given config key.
29 |
30 | The config values are retrieved from "config.yaml" file for the
31 | first invocation of config key. The subsequent invocation returns
32 | the value from cache.
33 |
34 | Args:
35 | config_key: The key to retrieve a value for.
36 |
37 | Returns:
38 | Value for key from config file.
39 | """
40 |
41 | with open('config.yaml') as config_file:
42 | configs = yaml.safe_load(config_file)
43 | return configs[config_key]
44 |
45 |
46 | def get_dataset_location() -> str:
47 | """Returns the dataset location."""
48 | return _get_config('LOCATION')
49 |
--------------------------------------------------------------------------------
/auth.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # python3
17 | """Contains cloud authentication related functionality."""
18 |
19 | import logging
20 | from typing import List
21 | from urllib import parse
22 |
23 | BASE_URL = 'https://www.gstatic.com/bigquerydatatransfer/oauthz/auth'
24 | REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'
25 |
26 |
27 | def retrieve_authorization_code(client_id: str, scopes: List[str],
28 | app_name: str):
29 | """Returns authorization code.
30 |
31 | Args:
32 | client_id: The client id.
33 | scopes: The list of scopes.
34 | app_name: Name of the app.
35 | """
36 | scopes_str = ' '.join(scopes)
37 | authorization_code_request = {
38 | 'client_id': client_id,
39 | 'scope': scopes_str,
40 | 'redirect_uri': REDIRECT_URI
41 | }
42 |
43 | encoded_request = parse.urlencode(
44 | authorization_code_request, quote_via=parse.quote)
45 | url = f'{BASE_URL}?{encoded_request}'
46 | logging.info(
47 | 'Please click on the URL below to authorize %s and paste the '
48 | 'authorization code.', app_name)
49 | logging.info('URL - %s', url)
50 |
51 | return input('Authorization Code : ')
52 |
--------------------------------------------------------------------------------
/scripts/targeted_products/targeted_product_ddl.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | -- DDL definition for TargetedProduct table.
16 | --
17 | -- Using `CREATE IF NOT EXISTS` as the table is used in `product_detailed` views.
18 | CREATE OR REPLACE TABLE `{project_id}.{dataset}.TargetedProduct_{external_customer_id}`
19 | (
20 | data_date DATE,
21 | product_id STRING,
22 | merchant_id INT64,
23 | target_country STRING
24 | );
25 |
26 | -- DDL definition for ParsedCriteria table.
27 | CREATE OR REPLACE TABLE `{project_id}.{dataset}.ParsedCriteria_{external_customer_id}`
28 | (
29 | criteria STRING,
30 | custom_label0 STRING,
31 | custom_label1 STRING,
32 | custom_label2 STRING,
33 | custom_label3 STRING,
34 | custom_label4 STRING,
35 | product_type_l1 STRING,
36 | product_type_l2 STRING,
37 | product_type_l3 STRING,
38 | product_type_l4 STRING,
39 | product_type_l5 STRING,
40 | google_product_category_l1 STRING,
41 | google_product_category_l2 STRING,
42 | google_product_category_l3 STRING,
43 | google_product_category_l4 STRING,
44 | google_product_category_l5 STRING,
45 | brand STRING,
46 | offer_id STRING,
47 | channel STRING,
48 | channel_exclusivity STRING,
49 | condition STRING
50 | );
51 |
--------------------------------------------------------------------------------
/scripts/2_product_metrics_view.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | CREATE OR REPLACE VIEW `{project_id}.{dataset}.product_metrics_view`
16 | AS (
17 | WITH
18 | CountryTable AS (
19 | SELECT DISTINCT
20 | parent_id AS country_criterion,
21 | country_code
22 | FROM
23 | `{project_id}.{dataset}.geo_targets`
24 | ),
25 | LanguageTable AS (
26 | SELECT DISTINCT
27 | criterion_id AS language_criterion,
28 | language_code
29 | FROM
30 | `{project_id}.{dataset}.language_codes`
31 | )
32 | SELECT
33 | _DATA_DATE AS data_date,
34 | externalcustomerid,
35 | merchantid,
36 | channel,
37 | LanguageTable.language_code,
38 | CountryTable.country_code,
39 | offerid AS offer_id,
40 | SUM(impressions) AS impressions,
41 | SUM(clicks) AS clicks,
42 | SUM(cost) AS cost,
43 | SUM(conversions) AS conversions,
44 | SUM(ConversionValue) AS conversions_value
45 | FROM
46 | `{project_id}.{dataset}.ShoppingProductStats_{external_customer_id}`
47 | AS ShoppingProductStats
48 | INNER JOIN
49 | CountryTable
50 | ON CountryTable.country_criterion = ShoppingProductStats.countrycriteriaid
51 | INNER JOIN
52 | LanguageTable
53 | ON CAST(LanguageTable.language_criterion AS STRING) = ShoppingProductStats.languagecriteriaid
54 | GROUP BY
55 | _DATA_DATE,
56 | externalcustomerid,
57 | merchantid,
58 | channel,
59 | LanguageTable.language_code,
60 | CountryTable.country_code,
61 | offer_id
62 | );
63 |
--------------------------------------------------------------------------------
/scripts/materialize_product_historical.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | -- Stored procedure for creating historic snapshot at a product category level.
16 |
17 | CREATE OR REPLACE PROCEDURE `{project_id}.{dataset}.product_historical_proc`()
18 | BEGIN
19 | CREATE OR REPLACE TABLE `{project_id}.{dataset}.product_historical_materialized`
20 | AS (
21 | SELECT
22 | data_date,
23 | account_id,
24 | product_type_l1,
25 | product_type_l2,
26 | product_type_l3,
27 | product_type_l4,
28 | product_type_l5,
29 | target_country,
30 | channel,
31 | COUNT(DISTINCT unique_product_id) AS total_products,
32 | COUNT(DISTINCT IF(is_approved = 1, unique_product_id, NULL)) AS total_approved,
33 | COUNT(DISTINCT IF(funnel_in_stock = 1, unique_product_id, NULL)) AS total_in_stock,
34 | COUNT(DISTINCT IF(funnel_targeted = 1, unique_product_id, NULL)) AS total_targeted,
35 | COUNT(DISTINCT IF(funnel_has_impression = 1, unique_product_id, NULL)) AS total_products_with_impressions_in_30_days,
36 | COUNT(DISTINCT IF(funnel_has_clicks = 1, unique_product_id, NULL)) AS total_products_with_clicks_in_30_days,
37 | IFNULL(SUM(impressions_30_days), 0) AS total_impressions_30_days,
38 | IFNULL(SUM(clicks_30_days), 0) AS total_clicks_30_days,
39 | IFNULL(SUM(cost_30_days), 0) AS total_cost_30_days
40 | FROM
41 | `{project_id}.{dataset}.product_detailed_view`
42 | WHERE
43 | data_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)
44 | GROUP BY
45 | data_date,
46 | account_id,
47 | product_type_l1,
48 | product_type_l2,
49 | product_type_l3,
50 | product_type_l4,
51 | product_type_l5,
52 | target_country,
53 | channel
54 | );
55 | END;
56 |
57 | CALL `{project_id}.{dataset}.product_historical_proc`();
58 |
--------------------------------------------------------------------------------
/scripts/market_insights/historical_view.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | -- Creates a historical view for Performance, Status, Price & Price Benchmarks.
16 | CREATE OR REPLACE VIEW `{project_id}.{dataset}.market_insights_historical_view` AS (
17 | SELECT
18 | data_date,
19 | unique_product_id,
20 | target_country,
21 | price,
22 | price_currency,
23 | sale_price,
24 | sale_price_currency,
25 | price_benchmark_value,
26 | price_benchmark_currency,
27 | price_benchmark_timestamp,
28 | CASE
29 | WHEN price_benchmark_value IS NULL THEN ''
30 | WHEN (SAFE_DIVIDE(price, price_benchmark_value) - 1) < -0.01 THEN 'Less than PB' -- ASSUMPTION: Enter % as a decimal here
31 | WHEN (SAFE_DIVIDE(price, price_benchmark_value) - 1) > 0.01 THEN 'More than PB' -- ASSUMPTION: Enter % as a decimal here
32 | ELSE 'Equal to PB'
33 | END AS price_competitiveness_band,
34 | SAFE_DIVIDE(price, price_benchmark_value) - 1 AS price_vs_benchmark,
35 | SAFE_DIVIDE(price, price_benchmark_value) - 1 AS sale_price_vs_benchmark,
36 | FROM (
37 | SELECT DISTINCT
38 | _PARTITIONDATE as data_date,
39 | CONCAT(CAST(merchant_id AS STRING), '|', product_id) AS unique_product_id,
40 | target_country,
41 | price.value AS price,
42 | price.currency as price_currency,
43 | sale_price.value AS sale_price,
44 | sale_price.currency AS sale_price_currency,
45 | FROM `{project_id}.{dataset}.Products_{merchant_id}` AS Products,
46 | Products.destinations,
47 | UNNEST(ARRAY_CONCAT(destinations.approved_countries, destinations.pending_countries, destinations.disapproved_countries)) AS target_country
48 | )
49 | LEFT JOIN (
50 | SELECT
51 | _PARTITIONDATE as data_date,
52 | CONCAT(CAST(merchant_id AS STRING), '|', product_id) AS unique_product_id,
53 | country_of_sale as target_country,
54 | price_benchmark_value,
55 | price_benchmark_currency,
56 | price_benchmark_timestamp
57 | FROM `{project_id}.{dataset}.Products_PriceBenchmarks_{merchant_id}`
58 | )
59 | USING (data_date, unique_product_id, target_country)
60 | )
61 |
--------------------------------------------------------------------------------
/scripts/market_insights/best_sellers_workflow.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | CREATE OR REPLACE TABLE `{project_id}.{dataset}.market_insights_best_sellers_materialized` AS (
16 | WITH
17 | best_sellers AS (
18 | SELECT
19 | _PARTITIONDATE as data_date,
20 | rank_id,
21 | rank,
22 | previous_rank,
23 | ranking_country,
24 | ranking_category,
25 | ranking_category_path.name as ranking_category_path,
26 | IF(
27 | ARRAY_LENGTH(SPLIT(ranking_category_path.name, ' > ')) = 1,
28 | ranking_category_path.name,
29 | NULL
30 | ) as ranking_category_name_l1,
31 | IF(
32 | ARRAY_LENGTH(SPLIT(ranking_category_path.name, ' > ')) = 2,
33 | ranking_category_path.name,
34 | NULL
35 | ) as ranking_category_name_l2,
36 | IF(
37 | ARRAY_LENGTH(SPLIT(ranking_category_path.name, ' > ')) = 3,
38 | ranking_category_path.name,
39 | NULL
40 | ) as ranking_category_name_l3,
41 | (SELECT ANY_VALUE(name) FROM b.product_title) AS product_title,
42 | gtins,
43 | brand,
44 | google_product_category_path.name as google_product_category_path,
45 | google_product_category,
46 | price_range.min,
47 | price_range.max,
48 | price_range.currency,
49 | FROM
50 | `{project_id}.{dataset}.BestSellers_TopProducts_{merchant_id}` b
51 | JOIN b.google_product_category_path google_product_category_path
52 | JOIN b.ranking_category_path ranking_category_path
53 | JOIN b.product_title product_title
54 | WHERE
55 | _PARTITIONDATE = DATE_SUB(CURRENT_DATE(), INTERVAL 2 DAY)
56 | # Adjust as necessary for other locales
57 | AND (product_title.locale IN ("en-US") OR product_title.locale IS NULL)
58 | AND google_product_category_path.locale = "en-US"
59 | AND ranking_category_path.locale = "en-US"
60 | ),
61 | inventory AS (
62 | SELECT DISTINCT
63 | rank_id
64 | FROM
65 | `{project_id}.{dataset}.BestSellers_TopProducts_Inventory_{merchant_id}`
66 | WHERE
67 | _PARTITIONDATE = DATE_SUB(CURRENT_DATE(), INTERVAL 2 DAY)
68 | )
69 | SELECT
70 | best_sellers.*,
71 | IF(inventory.rank_id IS NULL, False, True) AS is_in_inventory,
72 | FROM
73 | best_sellers
74 | LEFT JOIN
75 | inventory
76 | USING (rank_id)
77 | );
78 |
--------------------------------------------------------------------------------
/data/language_codes.csv:
--------------------------------------------------------------------------------
1 | Language name,Language code,Criterion ID
2 | Arabic,ar,1019
3 | Bengali,bn,1056
4 | Brazilian Portuguese,pt_BR,1016
5 | Bulgarian,bg,1020
6 | Catalan,ca,1038
7 | Chinese,zh,1008
8 | Chinese (simplified),zh_CN,1017
9 | Chinese (traditional),zh_TW,1018
10 | Croatian,hr,1039
11 | Czech,cs,1021
12 | Danish,da,1009
13 | Dutch,nl,1010
14 | English,en,1000
15 | English (US),en_US,1006
16 | English (GB),en_GB,1007
17 | Estonian,et,1043
18 | Filipino,tl,1042
19 | Finnish,fi,1011
20 | French,fr,1002
21 | German,de,1001
22 | Greek,el,1022
23 | Hebrew,iw,1027
24 | Hindi,hi,1023
25 | Hungarian,hu,1024
26 | Icelandic,is,1026
27 | Indonesian,id,1025
28 | Italian,it,1004
29 | Japanese,ja,1005
30 | Korean,ko,1012
31 | Latvian,lv,1028
32 | Lithuanian,lt,1029
33 | Malay,ms,1102
34 | Norwegian,no,1013
35 | Persian,fa,1064
36 | Polish,pl,1030
37 | Portuguese,pt,1014
38 | Romanian,ro,1032
39 | Russian,ru,1031
40 | Serbian,sr,1035
41 | Slovak,sk,1033
42 | Slovenian,sl,1034
43 | Spanish,es,1003
44 | Swedish,sv,1015
45 | Tamil,ta,1130
46 | Telugu,te,1131
47 | Thai,th,1044
48 | Turkish,tr,1037
49 | Ukrainian,uk,1036
50 | Urdu,ur,1041
51 | Vietnamese,vi,1040
52 | N/A,aa,1045
53 | N/A,ab,1046
54 | N/A,af,1047
55 | N/A,am,1048
56 | N/A,as,1049
57 | N/A,ay,1050
58 | N/A,az,1051
59 | N/A,ba,1052
60 | N/A,be,1053
61 | N/A,bh,1054
62 | N/A,bi,1055
63 | N/A,bn,1056
64 | N/A,bo,1057
65 | N/A,br,1058
66 | N/A,co,1059
67 | N/A,cy,1060
68 | N/A,nn,1061
69 | N/A,eo,1062
70 | N/A,eu,1063
71 | N/A,fa,1064
72 | N/A,fj,1065
73 | N/A,fo,1066
74 | N/A,fy,1067
75 | N/A,ga,1068
76 | N/A,gd,1069
77 | N/A,gl,1070
78 | N/A,gn,1071
79 | N/A,gu,1072
80 | N/A,gv,1073
81 | N/A,ha,1074
82 | N/A,bs,1075
83 | N/A,hy,1076
84 | N/A,ia,1077
85 | N/A,ie,1078
86 | N/A,ik,1079
87 | N/A,iu,1080
88 | N/A,jv,1081
89 | N/A,ka,1082
90 | N/A,kk,1083
91 | N/A,kl,1084
92 | N/A,km,1085
93 | N/A,kn,1086
94 | N/A,ks,1087
95 | N/A,ku,1088
96 | N/A,kw,1089
97 | N/A,ky,1090
98 | N/A,la,1091
99 | N/A,lb,1092
100 | N/A,ln,1093
101 | N/A,lo,1094
102 | N/A,mg,1095
103 | N/A,mi,1096
104 | N/A,mk,1097
105 | N/A,ml,1098
106 | N/A,mn,1099
107 | N/A,mo,1100
108 | N/A,mr,1101
109 | N/A,ms,1102
110 | N/A,mt,1103
111 | N/A,my,1104
112 | N/A,na,1105
113 | N/A,ne,1106
114 | N/A,oc,1107
115 | N/A,om,1108
116 | N/A,or,1109
117 | N/A,pa,1110
118 | N/A,ps,1111
119 | N/A,qu,1112
120 | N/A,rm,1113
121 | N/A,rn,1114
122 | N/A,rw,1115
123 | N/A,sa,1116
124 | N/A,sd,1117
125 | N/A,se,1118
126 | N/A,sg,1119
127 | N/A,zu,1120
128 | N/A,si,1121
129 | N/A,sm,1122
130 | N/A,sn,1123
131 | N/A,so,1124
132 | N/A,sq,1125
133 | N/A,ss,1126
134 | N/A,st,1127
135 | N/A,su,1128
136 | N/A,sw,1129
137 | N/A,ta,1130
138 | N/A,te,1131
139 | N/A,tg,1132
140 | N/A,ti,1133
141 | N/A,tk,1134
142 | N/A,tn,1135
143 | N/A,to,1136
144 | N/A,ts,1137
145 | N/A,tt,1138
146 | N/A,tw,1139
147 | N/A,ug,1140
148 | N/A,uz,1141
149 | N/A,vo,1142
150 | N/A,wo,1143
151 | N/A,xh,1144
152 | N/A,yi,1145
153 | N/A,yo,1146
154 | N/A,za,1147
155 | N/A,en_AU,1148
156 | N/A,pt_PT,1149
157 | N/A,zh_HK,1150
--------------------------------------------------------------------------------
/scripts/market_insights/snapshot_view.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | -- Creates a latest snapshot view with Best Sellers & Price Benchmarks
16 | CREATE OR REPLACE VIEW `{project_id}.{dataset}.market_insights_snapshot_view` AS (
17 | WITH
18 | price_benchmarks AS (
19 | SELECT
20 | pb.data_date AS data_date,
21 | pb.unique_product_id,
22 | pb.target_country,
23 | pb.price_benchmark_value,
24 | pb.price_benchmark_currency,
25 | pb.price_benchmark_timestamp,
26 | CASE
27 | WHEN pb.price_benchmark_value IS NULL THEN ''
28 | WHEN (SAFE_DIVIDE(product.effective_price_value, pb.price_benchmark_value) - 1) < -0.01
29 | THEN 'Less than PB' -- ASSUMPTION: Enter % as a decimal here
30 | WHEN (SAFE_DIVIDE(product.effective_price_value, pb.price_benchmark_value) - 1) > 0.01
31 | THEN 'More than PB' -- ASSUMPTION: Enter % as a decimal here
32 | ELSE 'Equal to PB'
33 | END AS price_competitiveness_band,
34 | SAFE_DIVIDE(product.effective_price_value, pb.price_benchmark_value) - 1 AS price_vs_benchmark,
35 | product.effective_price_value AS effective_price,
36 | FROM (
37 | SELECT
38 | unique_product_id,
39 | target_country,
40 | latest_date,
41 | IF(
42 | sale_price_effective_start_date <= CURRENT_TIMESTAMP()
43 | AND sale_price_effective_end_date > CURRENT_TIMESTAMP(),
44 | sale_price.value,
45 | price.value) AS effective_price_value
46 | FROM
47 | `{project_id}.{dataset}.product_detailed_materialized`
48 | ) AS product
49 | INNER JOIN (
50 | SELECT
51 | _PARTITIONDATE as data_date,
52 | CONCAT(CAST(merchant_id AS STRING), '|', product_id) AS unique_product_id,
53 | country_of_sale as target_country,
54 | price_benchmark_value,
55 | price_benchmark_currency,
56 | price_benchmark_timestamp
57 | FROM `{project_id}.{dataset}.Products_PriceBenchmarks_{merchant_id}`
58 | ) pb
59 | ON
60 | product.unique_product_id = pb.unique_product_id
61 | AND product.target_country = pb.target_country
62 | AND product.latest_date = pb.data_date
63 | ),
64 | best_sellers AS (
65 | SELECT DISTINCT
66 | _PARTITIONDATE AS data_date,
67 | CONCAT(CAST(merchant_id AS STRING), '|', product_id) AS unique_product_id,
68 | SPLIT(rank_id, ':')[SAFE_ORDINAL(2)] AS target_country,
69 | TRUE as is_best_seller,
70 | FROM
71 | `{project_id}.{dataset}.BestSellers_TopProducts_Inventory_{merchant_id}`
72 | WHERE _PARTITIONDATE = DATE_SUB(CURRENT_DATE(), INTERVAL 2 DAY)
73 | )
74 | SELECT
75 | product,
76 | price_benchmarks,
77 | best_sellers,
78 | FROM (
79 | SELECT
80 | *,
81 | IF(
82 | sale_price_effective_start_date <= CURRENT_TIMESTAMP()
83 | AND sale_price_effective_end_date > CURRENT_TIMESTAMP(),
84 | sale_price.value,
85 | price.value) AS effective_price,
86 | FROM `{project_id}.{dataset}.product_detailed_materialized`
87 | ) AS product
88 | LEFT JOIN price_benchmarks
89 | ON product.unique_product_id = price_benchmarks.unique_product_id
90 | AND product.target_country = price_benchmarks.target_country
91 | AND product.latest_date = price_benchmarks.data_date
92 | LEFT JOIN best_sellers
93 | ON product.unique_product_id = best_sellers.unique_product_id
94 | AND product.target_country = best_sellers.target_country
95 | AND product.latest_date = best_sellers.data_date
96 | );
97 |
--------------------------------------------------------------------------------
/plugins/cloud_utils/utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | # Copyright 2020 Google LLC
18 | #
19 | # Licensed under the Apache License, Version 2.0 (the "License");
20 | # you may not use this file except in compliance with the License.
21 | # You may obtain a copy of the License at
22 | #
23 | # http://www.apache.org/licenses/LICENSE-2.0
24 | #
25 | # Unless required by applicable law or agreed to in writing, software
26 | # distributed under the License is distributed on an "AS IS" BASIS,
27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 | # See the License for the specific language governing permissions and
29 | # limitations under the License.
30 |
31 | """A utility module for cloud_utils package.
32 |
33 | This module implements the common methods required for different modules under
34 | cloud_utils package.
35 | """
36 |
37 | import time
38 | from typing import Any, Dict, Text
39 |
40 | import logging
41 | import apiclient
42 | from googleapiclient import errors
43 | from googleapiclient import http
44 |
45 | from google.api_core import retry
46 |
47 |
48 | # Number of seconds to wait before re-checking for operation status.
49 | _WAIT_FOR_OPERATION_SLEEP_SECONDS = 10
50 | _RETRIABLE_STATUS_CODES = (
51 | 429, # Too Many Requests
52 | 500, # Internal Server Error
53 | 503) # Service Unavailable
54 |
55 |
56 | class Error(Exception):
57 | """A generic error thrown for any exception in utils module."""
58 | pass
59 |
60 |
61 | def _is_retriable_http_error(error: errors.HttpError) -> bool:
62 | """Checks if HttpError is in _RETRIABLE_STATUS_CODES.
63 |
64 | This function requires HttpError to have a valid response.
65 |
66 | Args:
67 | error: The http error to check.
68 |
69 | Returns:
70 | True if HttpError is retriable, otherwise False.
71 | """
72 | if error.__dict__['resp'].status in _RETRIABLE_STATUS_CODES:
73 | return True
74 | return False
75 |
76 |
77 | @retry.Retry(predicate=_is_retriable_http_error)
78 | def execute_request(request: http.HttpRequest) -> Any:
79 | """Executes an HTTP request and return its response.
80 |
81 | This method executes an HTTP request and retries if the response error code is
82 | capable of being retried. Refer to `retry_utils.py` module to see the list of
83 | error codes for which the retry will be attempted.
84 |
85 | Args:
86 | request: HTTP request to be executed.
87 |
88 | Returns:
89 | response: Response from the HTTP request.
90 | """
91 | response = request.execute()
92 | return response
93 |
94 |
95 | def wait_for_operation(operation_client: apiclient.discovery.Resource,
96 | operation: Dict[Text, Any]) -> None:
97 | """Waits for the completion of operation.
98 |
99 | This method retrieves operation resource and checks for its status. If the
100 | operation is not completed, then the operation is re-checked after
101 | `_WAIT_FOR_OPERATION_SLEEP_SECONDS` seconds.
102 |
103 | Args:
104 | operation_client: Client with methods for interacting with the operation
105 | APIs. The `build_service_client` method from `cloud_auth` module can be
106 | used to build the client.
107 | operation: Resource representing long running operation.
108 |
109 | Raises:
110 | Error: If the operation is not successfully completed.
111 | """
112 | while True:
113 | request = operation_client.get(name=operation['name'])
114 | updated_operation = execute_request(request)
115 | if updated_operation.get('done'):
116 | logging.info(f'Operation {operation["name"]} successfully completed.')
117 | return
118 | if updated_operation.get('error'):
119 | logging.info(
120 | f'Operation {operation["name"]} failed to complete successfully.')
121 | raise Error(
122 | f'Operation {operation["name"]} not completed. Error Details - '
123 | f'{updated_operation["error"]}')
124 | logging.info(
125 | f'Operation {operation["name"]} still in progress. Sleeping for '
126 | f'{_WAIT_FOR_OPERATION_SLEEP_SECONDS} seconds before retrying.')
127 | time.sleep(_WAIT_FOR_OPERATION_SLEEP_SECONDS)
128 |
--------------------------------------------------------------------------------
/plugins/cloud_utils/cloud_api.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | # Copyright 2020 Google LLC
18 | #
19 | # Licensed under the Apache License, Version 2.0 (the "License");
20 | # you may not use this file except in compliance with the License.
21 | # You may obtain a copy of the License at
22 | #
23 | # http://www.apache.org/licenses/LICENSE-2.0
24 | #
25 | # Unless required by applicable law or agreed to in writing, software
26 | # distributed under the License is distributed on an "AS IS" BASIS,
27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 | # See the License for the specific language governing permissions and
29 | # limitations under the License.
30 |
31 | """Wrapper for Cloud APIs."""
32 |
33 | import json
34 | from typing import Any, List
35 |
36 | import logging
37 | from googleapiclient import errors
38 | import requests.exceptions
39 |
40 | from google.auth.transport.requests import AuthorizedSession
41 |
42 | from plugins.cloud_utils import cloud_auth
43 | from plugins.cloud_utils import utils
44 |
45 | _SERVICE_URL = 'https://serviceusage.googleapis.com/v1/projects'
46 |
47 |
48 | class Error(Exception):
49 | """A generic error thrown for any exceptions in cloud_api module."""
50 | pass
51 |
52 |
53 | class CloudApiUtils(object):
54 | """CloudApiUtils class provides methods to manage Cloud Services.
55 |
56 | This class manages Cloud APIs within a single GCP project.
57 |
58 | Typical usage example:
59 | >>> ca_utils = CloudApiUtils('project_id',
60 | 'service_account_key_file.json')
61 | >>> ca_utils.enable_apis(['storage-component.googleapis.com'])
62 | """
63 |
64 | def __init__(self,
65 | project_id: str,
66 | service_account_key_file: str = None) -> None:
67 | """Initialise new instance of CloudApiUtils.
68 |
69 | Args:
70 | project_id: GCP project id.
71 | service_account_key_file: Optional. File containing service account key.
72 | If not passed the default credential will be used. There are following
73 | ways to create service accounts:
74 | 1. Use `create_service_account_key` method from `cloud_auth` module.
75 | 2. Use `gcloud` command line utility as documented here -
76 | https://cloud.google.com/iam/docs/creating-managing-service-account-keys
77 | """
78 | self.client = cloud_auth.build_service_client('serviceusage',
79 | service_account_key_file)
80 | self.project_id = project_id
81 |
82 | def enable_apis(self, apis: List[str]) -> None:
83 | """Enables multiple Cloud APIs for a GCP project.
84 |
85 | Args:
86 | apis: The list of APIs to be enabled.
87 |
88 | Raises:
89 | Error: If the request was not processed successfully.
90 | """
91 | parent = f'projects/{self.project_id}'
92 | request_body = {'serviceIds': apis}
93 | try:
94 | request = self.client.services().batchEnable(
95 | parent=parent, body=request_body)
96 | operation = utils.execute_request(request)
97 | utils.wait_for_operation(self.client.operations(), operation)
98 | except errors.HttpError:
99 | logging.exception('Error occurred while enabling Cloud APIs.')
100 | raise Error('Error occurred while enabling Cloud APIs.')
101 |
102 |
103 | def post_request(session: AuthorizedSession, url: str, data: Any) -> None:
104 | """Posts a request to the given url.
105 |
106 | Args:
107 | session: The authorised session.
108 | url: The url.
109 | data: The data to be posted.
110 |
111 | Raises:
112 | Error: If the request was not processed successfully.
113 | """
114 | try:
115 | response = session.post(url, data)
116 | response.raise_for_status()
117 | except requests.exceptions.HTTPError as error:
118 | logging.exception('HTTPError "%s" "%s": post_request failed',
119 | error.response.status_code, error.response.reason)
120 | raise Error('HTTPError {} {}: post_request failed.'.format(
121 | error.response.status_code, error.response.reason))
122 |
123 |
124 | def disable_api(session: AuthorizedSession, project_id: str, api: str) -> None:
125 | """Disables Cloud API for a given project.
126 |
127 | Args:
128 | session: The authorised session.
129 | project_id: GCP project id.
130 | api: The API to be disabled.
131 | """
132 | disable_api_url = '{}/{}/services/{}:disable'.format(_SERVICE_URL, project_id,
133 | api)
134 | logging.info('Disabling following API for "%s" project : "%s".', project_id,
135 | api)
136 | post_request(session, disable_api_url, {'disableDependentServices': True})
137 |
138 |
139 | def is_api_enabled(session: AuthorizedSession, project_id: str,
140 | api: str) -> bool:
141 | """Checks if Cloud API is enabled for given project.
142 |
143 | Args:
144 | session: The authorised session.
145 | project_id: GCP project id.
146 | api: The API to be checked.
147 |
148 | Returns:
149 | True: If the API is enabled.
150 | False: If the API is not enabled.
151 | """
152 | get_service_url = '{}/{}/services/{}'.format(_SERVICE_URL, project_id, api)
153 | response = session.get(get_service_url)
154 | service = json.loads(response.content)
155 | if service['state'] == 'ENABLED':
156 | return True
157 | return False
158 |
--------------------------------------------------------------------------------
/cloud_env_setup.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # python3
17 | """Cloud Environment setup module for MarkUp premium.
18 |
19 | This module automates the following 4 steps:
20 | 1. Enable all the required Cloud APIs.
21 | 2. Create GMC and Google Ads data transfer service.
22 | 3. Setup tables and views in BigQuery.
23 | """
24 |
25 | import argparse
26 | import logging
27 | import os
28 | from typing import Dict, Union
29 |
30 | import cloud_bigquery
31 | import cloud_data_transfer
32 | import config_parser
33 | from google.cloud import exceptions
34 | from plugins.cloud_utils import cloud_api
35 |
36 | # Set logging level.
37 | logging.getLogger().setLevel(logging.INFO)
38 | logging.getLogger('googleapiclient.discovery').setLevel(logging.WARNING)
39 |
40 | # Required Cloud APIs to be enabled.
41 | _APIS_TO_BE_ENABLED = [
42 | 'bigquery.googleapis.com', 'bigquerydatatransfer.googleapis.com'
43 | ]
44 | _DATASET_ID = 'markup'
45 | _MATERIALIZE_PRODUCT_DETAILED_SQL = 'scripts/materialize_product_detailed.sql'
46 | _MATERIALIZE_PRODUCT_HISTORICAL_SQL = (
47 | 'scripts/materialize_product_historical.sql')
48 |
49 |
50 | def enable_apis(project_id: str) -> None:
51 | """Enables list of cloud APIs for given cloud project.
52 |
53 | Args:
54 | project_id: A cloud project id.
55 | """
56 | cloud_api_utils = cloud_api.CloudApiUtils(project_id=project_id)
57 | cloud_api_utils.enable_apis(_APIS_TO_BE_ENABLED)
58 |
59 |
60 | def parse_boolean(arg: str):
61 | """Returns boolean representation of argument."""
62 | arg = str(arg).lower()
63 | if 'true'.startswith(arg):
64 | return True
65 | return False
66 |
67 |
68 | def parse_arguments() -> argparse.Namespace:
69 | """Initialize command line parser using argparse.
70 |
71 | Returns:
72 | An argparse.ArgumentParser.
73 | """
74 | parser = argparse.ArgumentParser()
75 | parser.add_argument('--project_id', help='GCP project id.', required=True)
76 | parser.add_argument(
77 | '--dataset_id',
78 | help='BigQuery dataset id.',
79 | default=_DATASET_ID,
80 | required=False)
81 | parser.add_argument(
82 | '--merchant_id', help='Google Merchant Center Account Id.', required=True)
83 | parser.add_argument(
84 | '--ads_customer_id',
85 | help='Google Ads External Customer Id.',
86 | required=True)
87 | parser.add_argument(
88 | '--market_insights',
89 | help='Deploy Market Insights solution.',
90 | type=parse_boolean,
91 | required=True)
92 | return parser.parse_args()
93 |
94 |
95 | def main():
96 | args = parse_arguments()
97 | ads_customer_id = args.ads_customer_id.replace('-', '')
98 | data_transfer = cloud_data_transfer.CloudDataTransferUtils(args.project_id)
99 | logging.info('Enabling APIs.')
100 | enable_apis(args.project_id)
101 | logging.info('Enabled APIs.')
102 | logging.info('Creating %s dataset.', args.dataset_id)
103 | cloud_bigquery.create_dataset_if_not_exists(args.project_id, args.dataset_id)
104 | merchant_center_config = data_transfer.create_merchant_center_transfer(
105 | args.merchant_id, args.dataset_id, args.market_insights)
106 | ads_config = data_transfer.create_google_ads_transfer(ads_customer_id,
107 | args.dataset_id)
108 | try:
109 | logging.info('Checking the GMC data transfer status.')
110 | data_transfer.wait_for_transfer_completion(merchant_center_config)
111 | logging.info('The GMC data have been successfully transferred.')
112 | except cloud_data_transfer.DataTransferError:
113 | logging.error('If you have just created GMC transfer - you may need to'
114 | 'wait for up to 90 minutes before the data of your Merchant'
115 | 'account are prepared and available for the transfer.')
116 | raise
117 | logging.info('Checking the Google Ads data transfer status.')
118 | data_transfer.wait_for_transfer_completion(ads_config)
119 | logging.info('The Google Ads data have been successfully transferred.')
120 | cloud_bigquery.load_language_codes(args.project_id, args.dataset_id)
121 | cloud_bigquery.load_geo_targets(args.project_id, args.dataset_id)
122 | logging.info('Creating MarkUp specific views.')
123 | cloud_bigquery.execute_queries(args.project_id, args.dataset_id,
124 | args.merchant_id, ads_customer_id,
125 | args.market_insights)
126 | logging.info('Created MarkUp specific views.')
127 | logging.info('Updating targeted products')
128 | query = cloud_bigquery.get_main_workflow_sql(args.project_id, args.dataset_id,
129 | args.merchant_id,
130 | ads_customer_id)
131 | data_transfer.schedule_query(
132 | f'Main workflow - {args.dataset_id} - {ads_customer_id}', query)
133 | logging.info('Job created to run markup main workflow.')
134 | if args.market_insights:
135 | logging.info('Market insights requested, creating scheduled query')
136 | best_sellers_query = cloud_bigquery.get_best_sellers_workflow_sql(
137 | args.project_id, args.dataset_id, args.merchant_id)
138 | data_transfer.schedule_query(
139 | f'Best sellers workflow - {args.dataset_id} - {args.merchant_id}',
140 | best_sellers_query)
141 | logging.info('Job created to run best sellers workflow.')
142 | logging.info('MarkUp installation is complete!')
143 |
144 |
145 | if __name__ == '__main__':
146 | main()
147 |
--------------------------------------------------------------------------------
/scripts/targeted_products/construct_parsed_criteria.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | -- Constructs insert statements to populate ParsedCriteria table.
16 | --
17 | -- @param criterions Array of product group criterions.
18 | -- @return String containing insert sql statements to construct ParsedCriteria
19 | -- table.
20 | CREATE OR REPLACE FUNCTION `{project_id}.{dataset}.constructParsedCriteria_{external_customer_id}`(criterions ARRAY)
21 | RETURNS STRING
22 | LANGUAGE js AS """
23 | function getParsedCriteria(criterion) {{
24 | let parsedCriteria = {{}}
25 | parsedCriteria['criteria'] = criterion;
26 | subCriterions = criterion.split('&+');
27 | for (subCriterion of subCriterions) {{
28 | if(subCriterion.startsWith('custom')) {{
29 | const customLabelRegex = /custom(\\d+)/;
30 | result = subCriterion.split('==');
31 |
32 | index = result[0].match(customLabelRegex)[1];
33 | value = result[1]
34 | if(value != '*') {{
35 | parsedCriteria['custom_label' + index] = value;
36 | }}
37 | }}
38 | if(subCriterion.startsWith('brand==')) {{
39 | result = subCriterion.split('==');
40 | value = result[1];
41 | if(value != '*') {{
42 | parsedCriteria['brand'] = value;
43 | }}
44 | }}
45 | if(subCriterion.startsWith('product_type_')) {{
46 | const productTypeRegex = /product_type_l(\\d+)/;
47 | result = subCriterion.split('==');
48 |
49 | index = result[0].match(productTypeRegex)[1];
50 | value = result[1]
51 | if(value != '*') {{
52 | parsedCriteria['product_type_l' + index] = value;
53 | }}
54 | }}
55 | if(subCriterion.startsWith('category_')) {{
56 | const categoryRegex = /category_l(\\d+)/;
57 | result = subCriterion.split('==');
58 |
59 | index = result[0].match(categoryRegex)[1];
60 | value = result[1]
61 | if(value != '*') {{
62 | parsedCriteria['google_product_category_l' + index] = value;
63 | }}
64 | }}
65 | if(subCriterion.startsWith('id==')) {{
66 | result = subCriterion.split('==');
67 | value = result[1];
68 | if(value != '*') {{
69 | parsedCriteria['offer_id'] = value;
70 | }}
71 | }}
72 | if(subCriterion.startsWith('channel==')) {{
73 | result = subCriterion.split('==');
74 | value = result[1];
75 | if(value != '*') {{
76 | channel = value.split(':')[1];
77 | parsedCriteria['channel'] = channel;
78 | }}
79 | }}
80 | if(subCriterion.startsWith('channel_exclusivity==')) {{
81 | result = subCriterion.split('==');
82 | value = result[1];
83 | if(value != '*') {{
84 | channel_exclusivity = value.split(':')[1];
85 | parsedCriteria['channel_exclusivity'] = channel_exclusivity;
86 | }}
87 | }}
88 | if(subCriterion.startsWith('c_condition==')) {{
89 | result = subCriterion.split('==');
90 | value = result[1];
91 | if(value != '*') {{
92 | condition = value.split(':')[1];
93 | parsedCriteria['condition'] = condition;
94 | }}
95 | }}
96 | }}
97 | return parsedCriteria;
98 | }}
99 | sql = 'INSERT INTO `{project_id}.{dataset}.ParsedCriteria_{external_customer_id}` VALUES ';
100 | i = 0;
101 | for (criterion of criterions) {{
102 | criterion = criterion.replace(/"/g, '\\\\"');
103 | parsedCriteria = getParsedCriteria(criterion)
104 | if ( i!=0 ) {{
105 | sql += ',';
106 | }}
107 | sql += '('
108 | sql += '"' + criterion + '",';
109 | sql += (parsedCriteria['custom_label0'] ? '"' + parsedCriteria['custom_label0'] + '"' : 'NULL') + ',';
110 | sql += (parsedCriteria['custom_label1'] ? '"' + parsedCriteria['custom_label1'] + '"' : 'NULL') + ',';
111 | sql += (parsedCriteria['custom_label2'] ? '"' + parsedCriteria['custom_label2'] + '"' : 'NULL') + ',';
112 | sql += (parsedCriteria['custom_label3'] ? '"' + parsedCriteria['custom_label3'] + '"' : 'NULL') + ',';
113 | sql += (parsedCriteria['custom_label4'] ? '"' + parsedCriteria['custom_label4'] + '"' : 'NULL') + ',';
114 | sql += (parsedCriteria['product_type_l1'] ? '"' + parsedCriteria['product_type_l1'] + '"' : 'NULL') + ',';
115 | sql += (parsedCriteria['product_type_l2'] ? '"' + parsedCriteria['product_type_l2'] + '"' : 'NULL') + ',';
116 | sql += (parsedCriteria['product_type_l3'] ? '"' + parsedCriteria['product_type_l3'] + '"' : 'NULL') + ',';
117 | sql += (parsedCriteria['product_type_l4'] ? '"' + parsedCriteria['product_type_l4'] + '"' : 'NULL') + ',';
118 | sql += (parsedCriteria['product_type_l5'] ? '"' + parsedCriteria['product_type_l5'] + '"' : 'NULL') + ',';
119 | sql += (parsedCriteria['google_product_category_l1'] ? '"' + parsedCriteria['google_product_category_l1'] + '"' : 'NULL') + ',';
120 | sql += (parsedCriteria['google_product_category_l2'] ? '"' + parsedCriteria['google_product_category_l2'] + '"' : 'NULL') + ',';
121 | sql += (parsedCriteria['google_product_category_l3'] ? '"' + parsedCriteria['google_product_category_l3'] + '"' : 'NULL') + ',';
122 | sql += (parsedCriteria['google_product_category_l4'] ? '"' + parsedCriteria['google_product_category_l4'] + '"' : 'NULL') + ',';
123 | sql += (parsedCriteria['google_product_category_l5'] ? '"' + parsedCriteria['google_product_category_l5'] + '"' : 'NULL') + ',';
124 | sql += (parsedCriteria['brand'] ? '"' + parsedCriteria['brand'] + '"' : 'NULL') + ',';
125 | sql += (parsedCriteria['offer_id'] ? '"' + parsedCriteria['offer_id'] + '"' : 'NULL') + ',';
126 | sql += (parsedCriteria['channel'] ? '"' + parsedCriteria['channel'] + '"' : 'NULL') + ',';
127 | sql += (parsedCriteria['channel_exclusivity'] ? '"' + parsedCriteria['channel_exclusivity'] + '"' : 'NULL') + ',';
128 | sql += (parsedCriteria['condition'] ? '"' + parsedCriteria['condition'] + '"' : 'NULL');
129 | sql += ')';
130 | i += 1;
131 | }}
132 | return sql;
133 | """;
134 |
--------------------------------------------------------------------------------
/scripts/1_product_view.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | -- Creates a latest snapshot view of products.
16 | --
17 | -- The Products_ table has product data partitioned by date.
18 | -- This view will get latest product data and create derived columns useful
19 | -- for further processing of data.
20 |
21 | CREATE OR REPLACE VIEW `{project_id}.{dataset}.product_view_{merchant_id}`
22 | AS (
23 | WITH
24 | ApprovedOffer AS (
25 | SELECT DISTINCT
26 | _PARTITIONDATE,
27 | product_id,
28 | merchant_id,
29 | target_country
30 | FROM
31 | `{project_id}.{dataset}.Products_{merchant_id}` AS Products,
32 | Products.destinations,
33 | destinations.approved_countries AS target_country
34 | ),
35 | PendingOffer AS (
36 | SELECT DISTINCT
37 | _PARTITIONDATE,
38 | product_id,
39 | merchant_id,
40 | target_country
41 | FROM
42 | `{project_id}.{dataset}.Products_{merchant_id}` AS Products,
43 | Products.destinations,
44 | destinations.pending_countries AS target_country
45 | ),
46 | DisapprovedOffer AS (
47 | SELECT DISTINCT
48 | _PARTITIONDATE,
49 | product_id,
50 | merchant_id,
51 | target_country
52 | FROM
53 | `{project_id}.{dataset}.Products_{merchant_id}` AS Products,
54 | Products.destinations,
55 | destinations.disapproved_countries AS target_country
56 | ),
57 | OfferIssue AS (
58 | SELECT
59 | _PARTITIONDATE,
60 | product_id,
61 | merchant_id,
62 | target_country,
63 | STRING_AGG(
64 | IF(LOWER(issues.servability) = 'disapproved', issues.short_description, NULL), ", ")
65 | AS disapproval_issues,
66 | STRING_AGG(
67 | IF(LOWER(issues.servability) = 'demoted', issues.short_description, NULL), ", ")
68 | AS demotion_issues,
69 | STRING_AGG(
70 | IF(LOWER(issues.servability) = 'unaffected', issues.short_description, NULL), ", ")
71 | AS warning_issues
72 | FROM
73 | `{project_id}.{dataset}.Products_{merchant_id}` AS Products,
74 | Products.issues,
75 | issues.applicable_countries AS target_country
76 | GROUP BY
77 | 1, 2, 3, 4
78 | ),
79 | MultiChannelTable AS (
80 | SELECT DISTINCT
81 | _PARTITIONDATE,
82 | merchant_id,
83 | product_id
84 | FROM
85 | `{project_id}.{dataset}.Products_{merchant_id}`
86 | GROUP BY
87 | _PARTITIONDATE,
88 | merchant_id,
89 | product_id
90 | HAVING COUNT(DISTINCT(channel)) > 1
91 | ),
92 | LatestDate AS (
93 | SELECT
94 | MAX(_PARTITIONDATE) AS latest_date
95 | FROM
96 | `{project_id}.{dataset}.Products_{merchant_id}`
97 | ),
98 | ProductStatus AS (
99 | SELECT
100 | Products._PARTITIONDATE as data_date,
101 | LatestDate.latest_date,
102 | Products.product_id,
103 | Products.merchant_id,
104 | Products.aggregator_id,
105 | Products.offer_id,
106 | Products.title,
107 | Products.description,
108 | Products.link,
109 | Products.mobile_link,
110 | Products.image_link,
111 | Products.additional_image_links,
112 | Products.content_language,
113 | COALESCE(
114 | ApprovedOffer.target_country,
115 | PendingOffer.target_country,
116 | DisapprovedOffer.target_country) AS target_country,
117 | Products.channel,
118 | Products.expiration_date,
119 | Products.google_expiration_date,
120 | Products.adult,
121 | Products.age_group,
122 | Products.availability,
123 | Products.availability_date,
124 | Products.brand,
125 | Products.color,
126 | Products.condition,
127 | Products.custom_labels,
128 | Products.gender,
129 | Products.gtin,
130 | Products.item_group_id,
131 | Products.material,
132 | Products.mpn,
133 | Products.pattern,
134 | Products.price,
135 | Products.sale_price,
136 | Products.sale_price_effective_start_date,
137 | Products.sale_price_effective_end_date,
138 | Products.google_product_category,
139 | Products.google_product_category_path,
140 | Products.product_type,
141 | Products.additional_product_types,
142 | IF(ApprovedOffer.product_id IS NULL, 0, 1) is_approved,
143 | CONCAT(CAST(Products.merchant_id AS STRING), '|', Products.product_id)
144 | AS unique_product_id,
145 | IFNULL(SPLIT(Products.product_type, '>')[SAFE_OFFSET(0)], 'N/A') AS product_type_l1,
146 | IFNULL(SPLIT(Products.product_type, '>')[SAFE_OFFSET(1)], 'N/A') AS product_type_l2,
147 | IFNULL(SPLIT(Products.product_type, '>')[SAFE_OFFSET(2)], 'N/A') AS product_type_l3,
148 | IFNULL(SPLIT(Products.product_type, '>')[SAFE_OFFSET(3)], 'N/A') AS product_type_l4,
149 | IFNULL(SPLIT(Products.product_type, '>')[SAFE_OFFSET(4)], 'N/A') AS product_type_l5,
150 | IFNULL(SPLIT(Products.google_product_category_path, '>')[SAFE_OFFSET(0)], 'N/A')
151 | AS google_product_category_l1,
152 | IFNULL(SPLIT(Products.google_product_category_path, '>')[SAFE_OFFSET(1)], 'N/A')
153 | AS google_product_category_l2,
154 | IFNULL(SPLIT(Products.google_product_category_path, '>')[SAFE_OFFSET(2)], 'N/A')
155 | AS google_product_category_l3,
156 | IFNULL(SPLIT(Products.google_product_category_path, '>')[SAFE_OFFSET(3)], 'N/A')
157 | AS google_product_category_l4,
158 | IFNULL(SPLIT(Products.google_product_category_path, '>')[SAFE_OFFSET(4)], 'N/A')
159 | AS google_product_category_l5,
160 | IF(Products.availability = 'in stock', 1, 0) AS in_stock,
161 | IF(MultiChannelTable.product_id IS NULL, 'single_channel', 'multi_channel') AS channel_exclusivity
162 | FROM
163 | `{project_id}.{dataset}.Products_{merchant_id}` AS Products,
164 | LatestDate
165 | LEFT JOIN ApprovedOffer USING (_PARTITIONDATE, product_id, merchant_id)
166 | LEFT JOIN PendingOffer USING (_PARTITIONDATE, product_id, merchant_id)
167 | LEFT JOIN DisapprovedOffer USING (_PARTITIONDATE, product_id, merchant_id)
168 | LEFT JOIN MultiChannelTable USING (_PARTITIONDATE, product_id, merchant_id)
169 | )
170 | SELECT
171 | ProductStatus.*,
172 | OfferIssue.disapproval_issues,
173 | OfferIssue.demotion_issues,
174 | OfferIssue.warning_issues
175 | FROM
176 | ProductStatus
177 | LEFT JOIN OfferIssue
178 | ON
179 | OfferIssue._PARTITIONDATE = ProductStatus.data_date
180 | AND OfferIssue.product_id = ProductStatus.product_id
181 | AND OfferIssue.merchant_id = ProductStatus.merchant_id
182 | AND OfferIssue.target_country = ProductStatus.target_country
183 | );
184 |
--------------------------------------------------------------------------------
/cloud_bigquery.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # python3
17 | """Cloud BigQuery module."""
18 |
19 | import logging
20 | import os
21 | from typing import Any, Dict
22 |
23 | import config_parser
24 | from google.cloud import bigquery
25 | from google.cloud import exceptions
26 |
27 | # Main workflow sql.
28 | _MAIN_WORKFLOW_SQL = 'scripts/main_workflow.sql'
29 | _BEST_SELLERS_WORKFLOW_SQL = 'scripts/market_insights/best_sellers_workflow.sql'
30 |
31 | # Set logging level.
32 | logging.getLogger().setLevel(logging.INFO)
33 | logging.getLogger('googleapiclient.discovery').setLevel(logging.WARNING)
34 |
35 |
36 | def create_dataset_if_not_exists(project_id: str, dataset_id: str) -> None:
37 | """Creates BigQuery dataset if it doesn't exists.
38 |
39 | Args:
40 | project_id: A cloud project id.
41 | dataset_id: BigQuery dataset id.
42 | """
43 | # Construct a BigQuery client object.
44 | client = bigquery.Client(project=project_id)
45 | fully_qualified_dataset_id = f'{project_id}.{dataset_id}'
46 | try:
47 | client.get_dataset(fully_qualified_dataset_id)
48 | logging.info('Dataset %s already exists.', fully_qualified_dataset_id)
49 | except exceptions.NotFound:
50 | logging.info('Dataset %s is not found.', fully_qualified_dataset_id)
51 | dataset = bigquery.Dataset(fully_qualified_dataset_id)
52 | dataset.location = config_parser.get_dataset_location()
53 | client.create_dataset(dataset)
54 | logging.info('Dataset %s created.', fully_qualified_dataset_id)
55 |
56 |
57 | def load_language_codes(project_id: str, dataset_id: str) -> None:
58 | """Loads language codes."""
59 | client = bigquery.Client(project=project_id)
60 | fully_qualified_table_id = f'{project_id}.{dataset_id}.language_codes'
61 | job_config = bigquery.LoadJobConfig(
62 | source_format=bigquery.SourceFormat.CSV,
63 | skip_leading_rows=1,
64 | autodetect=True,
65 | )
66 | file_name = 'data/language_codes.csv'
67 | with open(file_name, 'rb') as source_file:
68 | job = client.load_table_from_file(
69 | source_file, fully_qualified_table_id, job_config=job_config)
70 |
71 | job.result()
72 |
73 |
74 | def load_geo_targets(project_id: str, dataset_id: str) -> None:
75 | """Loads geo targets."""
76 | client = bigquery.Client(project=project_id)
77 | fully_qualified_table_id = f'{project_id}.{dataset_id}.geo_targets'
78 | job_config = bigquery.LoadJobConfig(
79 | source_format=bigquery.SourceFormat.CSV,
80 | skip_leading_rows=1,
81 | autodetect=True,
82 | )
83 | file_name = 'data/geo_targets.csv'
84 | with open(file_name, 'rb') as source_file:
85 | job = client.load_table_from_file(
86 | source_file, fully_qualified_table_id, job_config=job_config)
87 |
88 | job.result()
89 |
90 |
91 | def read_file(file_path: str) -> str:
92 | """Reads and returns contents of the file.
93 |
94 | Args:
95 | file_path: File path.
96 |
97 | Returns:
98 | content: File content.
99 |
100 | Raises:
101 | FileNotFoundError: If the provided file is not found.
102 | """
103 | try:
104 | with open(file_path, 'r') as stream:
105 | content = stream.read()
106 | except FileNotFoundError as e:
107 | raise FileNotFoundError(
108 | f'The file "{file_path}" could not be found.') from e
109 | else:
110 | return content
111 |
112 |
113 | def configure_sql(sql_path: str, query_params: Dict[str, Any]) -> str:
114 | """Configures parameters of SQL script with variables supplied.
115 |
116 | Args:
117 | sql_path: Path to SQL script.
118 | query_params: Configuration containing query parameter values.
119 |
120 | Returns:
121 | sql_script: String representation of SQL script with parameters assigned.
122 | """
123 | sql_script = read_file(sql_path)
124 |
125 | params = {}
126 | for param_key, param_value in query_params.items():
127 | # If given value is list of strings (ex. 'a,b,c'), create tuple of
128 | # strings (ex. ('a', 'b', 'c')) to pass to SQL IN operator.
129 | if isinstance(param_value, str) and ',' in param_value:
130 | params[param_key] = tuple(param_value.split(','))
131 | else:
132 | params[param_key] = param_value
133 |
134 | return sql_script.format(**params)
135 |
136 |
137 | def execute_queries(project_id: str, dataset_id: str, merchant_id: str,
138 | customer_id: str, enable_market_insights: bool) -> None:
139 | """Executes list of queries."""
140 | # Sql files to be executed in a specific order.
141 | # The prefix "scripts" should be omitted.
142 | sql_files = [
143 | '1_product_view.sql',
144 | 'targeted_products/targeted_product_ddl.sql',
145 | 'targeted_products/construct_parsed_criteria.sql',
146 | '2_product_metrics_view.sql',
147 | '3_customer_view.sql',
148 | '4_product_detailed_view.sql',
149 | 'materialize_product_detailed.sql',
150 | 'materialize_product_historical.sql',
151 | ]
152 | if enable_market_insights:
153 | market_insights_sql_files = [
154 | 'market_insights/snapshot_view.sql',
155 | 'market_insights/historical_view.sql'
156 | ]
157 | sql_files.extend(market_insights_sql_files)
158 | prefix = 'scripts'
159 | query_params = {
160 | 'project_id': project_id,
161 | 'dataset': dataset_id,
162 | 'merchant_id': merchant_id,
163 | 'external_customer_id': customer_id
164 | }
165 | location = config_parser.get_dataset_location()
166 | client = bigquery.Client(project=project_id)
167 | for sql_file in sql_files:
168 | try:
169 | query = configure_sql(os.path.join(prefix, sql_file), query_params)
170 | query_job = client.query(query, location=location)
171 | query_job.result()
172 | except:
173 | logging.exception('Error in %s', sql_file)
174 | raise
175 |
176 |
177 | def get_main_workflow_sql(project_id: str, dataset_id: str, merchant_id: str,
178 | customer_id: str) -> str:
179 | """Returns main workflow sql.
180 |
181 | Args:
182 | project_id: A cloud project id.
183 | dataset_id: BigQuery dataset id.
184 | merchant_id: Merchant center id.
185 | customer_id: Google Ads customer id.
186 | """
187 | query_params = {
188 | 'project_id': project_id,
189 | 'dataset': dataset_id,
190 | 'merchant_id': merchant_id,
191 | 'external_customer_id': customer_id
192 | }
193 | return configure_sql(_MAIN_WORKFLOW_SQL, query_params)
194 |
195 |
196 | def get_best_sellers_workflow_sql(project_id: str, dataset_id: str,
197 | merchant_id: str) -> str:
198 | """Returns main workflow sql.
199 |
200 | Args:
201 | project_id: A cloud project id.
202 | dataset_id: BigQuery dataset id.
203 | merchant_id: Merchant center id.
204 | """
205 | query_params = {
206 | 'project_id': project_id,
207 | 'dataset': dataset_id,
208 | 'merchant_id': merchant_id
209 | }
210 | return configure_sql(_BEST_SELLERS_WORKFLOW_SQL, query_params)
211 |
--------------------------------------------------------------------------------
/scripts/4_product_detailed_view.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | -- Creates a latest snapshot view of products combined with performance metrics.
16 | CREATE OR REPLACE VIEW
17 | `{project_id}.{dataset}.product_detailed_view` AS
18 | WITH
19 | ProductMetrics AS (
20 | SELECT
21 | product_view.data_date,
22 | product_view.unique_product_id,
23 | product_metrics_view.externalcustomerid,
24 | product_view.target_country,
25 | SUM(product_metrics_view.impressions) AS impressions_30_days,
26 | SUM(product_metrics_view.clicks) AS clicks_30_days,
27 | SUM(product_metrics_view.cost) AS cost_30_days,
28 | SUM(product_metrics_view.conversions) AS conversions_30_days,
29 | SUM(product_metrics_view.conversions_value) AS conversions_value_30_days
30 | FROM
31 | `{project_id}.{dataset}.product_metrics_view` product_metrics_view
32 | INNER JOIN
33 | `{project_id}.{dataset}.product_view_{merchant_id}` product_view
34 | ON
35 | product_metrics_view.merchantid = product_view.merchant_id
36 | AND LOWER(product_metrics_view.channel) = LOWER(product_view.channel)
37 | AND LOWER(product_metrics_view.language_code) = LOWER(product_view.content_language)
38 | AND LOWER(product_metrics_view.country_code) = LOWER(product_view.target_country)
39 | AND LOWER(product_metrics_view.offer_id) = LOWER(product_view.offer_id)
40 | AND product_metrics_view.data_date
41 | BETWEEN DATE_SUB(product_view.data_date, INTERVAL 30 DAY)
42 | AND product_view.data_date
43 | GROUP BY
44 | data_date,
45 | unique_product_id,
46 | externalcustomerid,
47 | target_country
48 | ),
49 | ProductData AS (
50 | SELECT
51 | product_view.data_date,
52 | product_view.latest_date,
53 | COALESCE(product_view.aggregator_id, product_view.merchant_id) AS account_id,
54 | MAX(customer_view.accountdescriptivename) AS account_display_name,
55 | product_view.merchant_id AS sub_account_id,
56 | product_view.unique_product_id,
57 | product_view.target_country,
58 | MAX(product_view.offer_id) AS offer_id,
59 | MAX(product_view.channel) AS channel,
60 | MAX(product_view.in_stock) AS in_stock,
61 | # An offer is labeled as approved when able to serve on all destinations
62 | MAX(is_approved) AS is_approved,
63 | # Aggregated Issues & Servability Statuses
64 | MAX(disapproval_issues) as disapproval_issues,
65 | MAX(demotion_issues) as demotion_issues,
66 | MAX(warning_issues) as warning_issues,
67 | MIN(IF(TargetedProduct.product_id IS NULL, 0, 1)) AS is_targeted,
68 | MAX(title) AS title,
69 | MAX(link) AS item_url,
70 | MAX(product_type_l1) AS product_type_l1,
71 | MAX(product_type_l2) AS product_type_l2,
72 | MAX(product_type_l3) AS product_type_l3,
73 | MAX(product_type_l4) AS product_type_l4,
74 | MAX(product_type_l5) AS product_type_l5,
75 | MAX(google_product_category_l1) AS google_product_category_l1,
76 | MAX(google_product_category_l2) AS google_product_category_l2,
77 | MAX(google_product_category_l3) AS google_product_category_l3,
78 | MAX(google_product_category_l4) AS google_product_category_l4,
79 | MAX(google_product_category_l5) AS google_product_category_l5,
80 | MAX(custom_labels.label_0) AS custom_label_0,
81 | MAX(custom_labels.label_1) AS custom_label_1,
82 | MAX(custom_labels.label_2) AS custom_label_2,
83 | MAX(custom_labels.label_3) AS custom_label_3,
84 | MAX(custom_labels.label_4) AS custom_label_4,
85 | MAX(product_view.brand) AS brand,
86 | MAX(ProductMetrics.impressions_30_days) AS impressions_30_days,
87 | MAX(ProductMetrics.clicks_30_days) AS clicks_30_days,
88 | MAX(ProductMetrics.cost_30_days) AS cost_30_days,
89 | MAX(ProductMetrics.conversions_30_days) AS conversions_30_days,
90 | MAX(ProductMetrics.conversions_value_30_days) AS conversions_value_30_days,
91 | MAX(description) AS description,
92 | MAX(mobile_link) AS mobile_link,
93 | MAX(image_link) AS image_link,
94 | ANY_VALUE(additional_image_links) AS additional_image_links,
95 | MAX(content_language) AS content_language,
96 | MAX(expiration_date) AS expiration_date,
97 | MAX(google_expiration_date) AS google_expiration_date,
98 | MAX(adult) AS adult,
99 | MAX(age_group) AS age_group,
100 | MAX(availability) AS availability,
101 | MAX(availability_date) AS availability_date,
102 | MAX(color) AS color,
103 | MAX(condition) AS condition,
104 | MAX(gender) AS gender,
105 | MAX(gtin) AS gtin,
106 | MAX(item_group_id) AS item_group_id,
107 | MAX(material) AS material,
108 | MAX(mpn) AS mpn,
109 | MAX(pattern) AS pattern,
110 | ANY_VALUE(price) AS price,
111 | ANY_VALUE(sale_price) AS sale_price,
112 | MAX(sale_price_effective_start_date) AS sale_price_effective_start_date,
113 | MAX(sale_price_effective_end_date) AS sale_price_effective_end_date,
114 | ANY_VALUE(additional_product_types) AS additional_product_types
115 | FROM
116 | `{project_id}.{dataset}.product_view_{merchant_id}` product_view
117 | LEFT JOIN
118 | ProductMetrics
119 | ON
120 | ProductMetrics.data_date = product_view.data_date
121 | AND ProductMetrics.unique_product_id = product_view.unique_product_id
122 | AND ProductMetrics.target_country = product_view.target_country
123 | LEFT JOIN
124 | `{project_id}.{dataset}.customer_view` customer_view
125 | ON
126 | customer_view.externalcustomerid = ProductMetrics.externalcustomerid
127 | AND customer_view.data_date = ProductMetrics.data_date
128 | LEFT JOIN
129 | `{project_id}.{dataset}.TargetedProduct_{external_customer_id}` TargetedProduct
130 | ON
131 | TargetedProduct.merchant_id = product_view.merchant_id
132 | AND TargetedProduct.product_id = product_view.product_id
133 | AND TargetedProduct.data_date = product_view.data_date
134 | AND TargetedProduct.target_country = product_view.target_country
135 | GROUP BY
136 | data_date,
137 | latest_date,
138 | account_id,
139 | product_view.merchant_id,
140 | product_view.unique_product_id,
141 | target_country
142 | )
143 | SELECT
144 | *,
145 | CASE
146 | WHEN is_approved = 1 AND in_stock = 1
147 | THEN 1
148 | ELSE 0
149 | END AS funnel_in_stock,
150 | CASE
151 | WHEN is_approved = 1 AND in_stock = 1 AND is_targeted = 1
152 | THEN 1
153 | ELSE 0
154 | END AS funnel_targeted,
155 | CASE
156 | WHEN
157 | is_approved = 1
158 | AND in_stock = 1
159 | AND is_targeted = 1
160 | AND impressions_30_days > 0
161 | THEN 1
162 | ELSE 0
163 | END AS funnel_has_impression,
164 | CASE
165 | WHEN
166 | is_approved = 1
167 | AND in_stock = 1
168 | AND is_targeted = 1
169 | AND impressions_30_days > 0
170 | AND clicks_30_days > 0
171 | THEN 1
172 | ELSE 0
173 | END AS funnel_has_clicks
174 | FROM
175 | ProductData;
176 |
--------------------------------------------------------------------------------
/scripts/materialize_product_detailed.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | -- Stored procedure for materializing product detailed data.
16 | --
17 | -- "product_detailed_materialized" differs from "product_detailed_view" in the
18 | -- following ways:
19 | -- 1. The two data sources (GMC and Google Ads) are joined based on the latest
20 | -- available data.
21 | -- 2. "product_detailed_materialized" is used by the MarkUp DataStudio
22 | -- dashboard in all the pages except the historical trends.
23 | -- 3. "product_detailed_view" is used to populate only the historical trends
24 | -- data.
25 | -- The main reason for the decision was the fragile nature of the data
26 | -- transfers. When either of the Google Ads or GMC transfer fails which seems to
27 | -- happen quite often bulk of the MarkUp dashboard is not usable.
28 |
29 | CREATE OR REPLACE PROCEDURE `{project_id}.{dataset}.product_detailed_proc`()
30 | BEGIN
31 | CREATE OR REPLACE TABLE `{project_id}.{dataset}.product_detailed_materialized`
32 | AS (
33 | WITH
34 | TargetedProduct AS (
35 | SELECT
36 | merchant_id,
37 | product_id,
38 | target_country
39 | FROM
40 | `{project_id}.{dataset}.TargetedProduct_{external_customer_id}`
41 | WHERE
42 | data_date IN (
43 | (
44 | SELECT
45 | MAX(data_date)
46 | FROM
47 | `{project_id}.{dataset}.TargetedProduct_{external_customer_id}`
48 | )
49 | )
50 | ),
51 | ProductMetrics AS (
52 | SELECT
53 | product_view.latest_date,
54 | product_view.unique_product_id,
55 | product_metrics_view.externalcustomerid,
56 | product_view.target_country,
57 | SUM(product_metrics_view.impressions) AS impressions_30_days,
58 | SUM(product_metrics_view.clicks) AS clicks_30_days,
59 | SUM(product_metrics_view.cost) AS cost_30_days,
60 | SUM(product_metrics_view.conversions) AS conversions_30_days,
61 | SUM(product_metrics_view.conversions_value) AS conversions_value_30_days
62 | FROM
63 | `{project_id}.{dataset}.product_metrics_view` product_metrics_view
64 | INNER JOIN
65 | `{project_id}.{dataset}.product_view_{merchant_id}` product_view
66 | ON
67 | product_metrics_view.merchantid = product_view.merchant_id
68 | AND LOWER(product_metrics_view.channel) = LOWER(product_view.channel)
69 | AND LOWER(product_metrics_view.language_code) = LOWER(product_view.content_language)
70 | AND LOWER(product_metrics_view.country_code) = LOWER(product_view.target_country)
71 | AND LOWER(product_metrics_view.offer_id) = LOWER(product_view.offer_id)
72 | AND product_metrics_view.data_date
73 | BETWEEN DATE_SUB(product_view.latest_date, INTERVAL 30 DAY)
74 | AND product_view.latest_date
75 | WHERE
76 | product_view.data_date = product_view.latest_date
77 | GROUP BY
78 | latest_date,
79 | unique_product_id,
80 | externalcustomerid,
81 | target_country
82 | ),
83 | ProductData AS (
84 | SELECT
85 | product_view.data_date,
86 | product_view.latest_date,
87 | COALESCE(product_view.aggregator_id, product_view.merchant_id) AS account_id,
88 | MAX(customer_view.accountdescriptivename) AS account_display_name,
89 | product_view.merchant_id AS sub_account_id,
90 | product_view.unique_product_id,
91 | product_view.target_country,
92 | MAX(product_view.offer_id) AS offer_id,
93 | MAX(product_view.channel) AS channel,
94 | MAX(product_view.in_stock) AS in_stock,
95 | # An offer is labeled as approved when able to serve on all destinations
96 | MAX(is_approved) AS is_approved,
97 | # Aggregated Issues & Servability Statuses
98 | MAX(disapproval_issues) as disapproval_issues,
99 | MAX(demotion_issues) as demotion_issues,
100 | MAX(warning_issues) as warning_issues,
101 | MIN(IF(TargetedProduct.product_id IS NULL, 0, 1)) AS is_targeted,
102 | MAX(title) AS title,
103 | MAX(link) AS item_url,
104 | MAX(product_type_l1) AS product_type_l1,
105 | MAX(product_type_l2) AS product_type_l2,
106 | MAX(product_type_l3) AS product_type_l3,
107 | MAX(product_type_l4) AS product_type_l4,
108 | MAX(product_type_l5) AS product_type_l5,
109 | MAX(google_product_category_l1) AS google_product_category_l1,
110 | MAX(google_product_category_l2) AS google_product_category_l2,
111 | MAX(google_product_category_l3) AS google_product_category_l3,
112 | MAX(google_product_category_l4) AS google_product_category_l4,
113 | MAX(google_product_category_l5) AS google_product_category_l5,
114 | MAX(custom_labels.label_0) AS custom_label_0,
115 | MAX(custom_labels.label_1) AS custom_label_1,
116 | MAX(custom_labels.label_2) AS custom_label_2,
117 | MAX(custom_labels.label_3) AS custom_label_3,
118 | MAX(custom_labels.label_4) AS custom_label_4,
119 | MAX(product_view.brand) AS brand,
120 | MAX(ProductMetrics.impressions_30_days) AS impressions_30_days,
121 | MAX(ProductMetrics.clicks_30_days) AS clicks_30_days,
122 | MAX(ProductMetrics.cost_30_days) AS cost_30_days,
123 | MAX(ProductMetrics.conversions_30_days) AS conversions_30_days,
124 | MAX(ProductMetrics.conversions_value_30_days) AS conversions_value_30_days,
125 | MAX(description) AS description,
126 | MAX(mobile_link) AS mobile_link,
127 | MAX(image_link) AS image_link,
128 | ANY_VALUE(additional_image_links) AS additional_image_links,
129 | MAX(content_language) AS content_language,
130 | MAX(expiration_date) AS expiration_date,
131 | MAX(google_expiration_date) AS google_expiration_date,
132 | MAX(adult) AS adult,
133 | MAX(age_group) AS age_group,
134 | MAX(availability) AS availability,
135 | MAX(availability_date) AS availability_date,
136 | MAX(color) AS color,
137 | MAX(condition) AS condition,
138 | MAX(gender) AS gender,
139 | MAX(gtin) AS gtin,
140 | MAX(item_group_id) AS item_group_id,
141 | MAX(material) AS material,
142 | MAX(mpn) AS mpn,
143 | MAX(pattern) AS pattern,
144 | ANY_VALUE(price) AS price,
145 | ANY_VALUE(sale_price) AS sale_price,
146 | MAX(sale_price_effective_start_date) AS sale_price_effective_start_date,
147 | MAX(sale_price_effective_end_date) AS sale_price_effective_end_date,
148 | ANY_VALUE(additional_product_types) AS additional_product_types
149 | FROM
150 | `{project_id}.{dataset}.product_view_{merchant_id}` product_view
151 | LEFT JOIN
152 | ProductMetrics
153 | ON
154 | ProductMetrics.latest_date = product_view.data_date
155 | AND ProductMetrics.unique_product_id = product_view.unique_product_id
156 | AND ProductMetrics.target_country = product_view.target_country
157 | LEFT JOIN
158 | `{project_id}.{dataset}.customer_view` customer_view
159 | ON
160 | customer_view.externalcustomerid = ProductMetrics.externalcustomerid
161 | AND customer_view.data_date = customer_view.latest_date
162 | LEFT JOIN
163 | TargetedProduct
164 | ON
165 | TargetedProduct.merchant_id = product_view.merchant_id
166 | AND TargetedProduct.product_id = product_view.product_id
167 | AND TargetedProduct.target_country = product_view.target_country
168 | WHERE
169 | product_view.data_date = product_view.latest_date
170 | GROUP BY
171 | data_date,
172 | latest_date,
173 | account_id,
174 | product_view.merchant_id,
175 | product_view.unique_product_id,
176 | product_view.target_country
177 | )
178 | SELECT
179 | *,
180 | CASE
181 | WHEN is_approved = 1 AND in_stock = 1
182 | THEN 1
183 | ELSE 0
184 | END AS funnel_in_stock,
185 | CASE
186 | WHEN is_approved = 1 AND in_stock = 1 AND is_targeted = 1
187 | THEN 1
188 | ELSE 0
189 | END AS funnel_targeted,
190 | CASE
191 | WHEN
192 | is_approved = 1
193 | AND in_stock = 1
194 | AND is_targeted = 1
195 | AND impressions_30_days > 0
196 | THEN 1
197 | ELSE 0
198 | END AS funnel_has_impression,
199 | CASE
200 | WHEN
201 | is_approved = 1
202 | AND in_stock = 1
203 | AND is_targeted = 1
204 | AND impressions_30_days > 0
205 | AND clicks_30_days > 0
206 | THEN 1
207 | ELSE 0
208 | END AS funnel_has_clicks
209 | FROM
210 | ProductData
211 | );
212 | END;
213 |
214 | CALL `{project_id}.{dataset}.product_detailed_proc`();
215 |
--------------------------------------------------------------------------------
/scripts/main_workflow.sql:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC..
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | -- Backfill main workflow.
16 | --
17 | -- The script parses target crierion to get the list of targeted products. It
18 | -- then materializes product detailed and product historical tables. The script
19 | -- uses @run_date parameter and hence can be backfilled on a specific date. This
20 | -- is useful when a Google Ads or GMC data transfer has failed on a specific
21 | -- day.
22 |
23 | DECLARE to_be_processed ARRAY DEFAULT [];
24 | DECLARE where_clause STRING;
25 | DECLARE i INT64 DEFAULT 0;
26 | DECLARE BATCH_SIZE INT64 DEFAULT 500;
27 | DECLARE total_criterions DEFAULT 0;
28 |
29 | -- Clean-up existing tables.
30 | DELETE FROM
31 | `{project_id}.{dataset}.ParsedCriteria_{external_customer_id}`
32 | WHERE 1=1;
33 |
34 | DELETE FROM
35 | `{project_id}.{dataset}.TargetedProduct_{external_customer_id}`
36 | WHERE
37 | -- Delete data older than 90 days.
38 | data_date < DATE_SUB(@run_date, INTERVAL 90 DAY)
39 | OR data_date = @run_date;
40 |
41 | CREATE TEMPORARY TABLE IF NOT EXISTS DistinctCriterion AS (
42 | WITH DistinctCriterion AS (
43 | SELECT DISTINCT
44 | Criteria
45 | FROM
46 | `{project_id}.{dataset}.Criteria_{external_customer_id}` AS CriteriaTable
47 | WHERE
48 | CriteriaType = 'PRODUCT_PARTITION'
49 | -- If the run_date is not a backfill then use the latest available data.
50 | AND (
51 | (
52 | @run_date = CURRENT_DATE()
53 | AND CriteriaTable._DATA_DATE = CriteriaTable._LATEST_DATE)
54 | OR (
55 | @run_date <> CURRENT_DATE()
56 | AND CriteriaTable._DATA_DATE = @run_date))
57 | )
58 | SELECT
59 | Criteria,
60 | ROW_NUMBER() OVER (ORDER BY Criteria asc) as RowNum
61 | FROM
62 | DistinctCriterion
63 | );
64 |
65 | SET total_criterions = (SELECT COUNT(1) FROM DistinctCriterion);
66 |
67 | LOOP
68 | IF i >= total_criterions THEN
69 | BREAK;
70 | END IF;
71 | SET to_be_processed = (
72 | SELECT
73 | ARRAY_AGG(Criteria)
74 | FROM
75 | DistinctCriterion
76 | WHERE
77 | RowNum BETWEEN i AND i+BATCH_SIZE
78 | );
79 | SET i = i + BATCH_SIZE + 1;
80 | EXECUTE IMMEDIATE `{project_id}.{dataset}.constructParsedCriteria_{external_customer_id}`(to_be_processed);
81 | END LOOP;
82 |
83 |
84 | CREATE TEMP TABLE CriteriaInfo
85 | AS (
86 | WITH TargetedMerchantInfo AS (
87 | SELECT DISTINCT
88 | MerchantId AS merchant_id,
89 | AdGroupId AS ad_group_id,
90 | UPPER(GeoTargets.Country_Code) AS target_country
91 | FROM
92 | `{project_id}.{dataset}.ShoppingProductStats_{external_customer_id}` AS ShoppingProductStats
93 | INNER JOIN `{project_id}.{dataset}.geo_targets` GeoTargets
94 | ON GeoTargets.parent_id = ShoppingProductStats.CountryCriteriaId
95 | WHERE
96 | -- If the run_date is not a backfill then use the latest available data.
97 | (
98 | (
99 | @run_date = CURRENT_DATE()
100 | AND ShoppingProductStats._DATA_DATE = ShoppingProductStats._LATEST_DATE)
101 | OR (
102 | @run_date <> CURRENT_DATE()
103 | AND ShoppingProductStats._DATA_DATE = @run_date))
104 | )
105 | SELECT
106 | TargetedMerchantInfo.merchant_id,
107 | TargetedMerchantInfo.target_country,
108 | CriteriaTable.criteria
109 | FROM
110 | TargetedMerchantInfo
111 | INNER JOIN
112 | `{project_id}.{dataset}.Criteria_{external_customer_id}` AS CriteriaTable
113 | ON
114 | CriteriaTable.AdGroupId = TargetedMerchantInfo.ad_group_id
115 | -- If the run_date is not a backfill then use the latest available data.
116 | AND (
117 | (
118 | @run_date = CURRENT_DATE()
119 | AND CriteriaTable._DATA_DATE = CriteriaTable._LATEST_DATE)
120 | OR (
121 | @run_date <> CURRENT_DATE()
122 | AND CriteriaTable._DATA_DATE = @run_date))
123 | );
124 |
125 | CREATE TEMP TABLE IdTargetedOffer
126 | AS (
127 | SELECT DISTINCT
128 | CriteriaInfo.merchant_id,
129 | CriteriaInfo.target_country,
130 | ParsedCriteria.offer_id
131 | FROM
132 | `{project_id}.{dataset}.ParsedCriteria_{external_customer_id}` ParsedCriteria
133 | INNER JOIN CriteriaInfo
134 | ON ParsedCriteria.criteria = CriteriaInfo.criteria
135 | WHERE
136 | ParsedCriteria.offer_id IS NOT NULL
137 | );
138 |
139 |
140 | CREATE TEMP TABLE IdTargeted
141 | AS (
142 | SELECT
143 | ProductView.data_date,
144 | ProductView.product_id,
145 | ProductView.merchant_id,
146 | ProductView.target_country
147 | FROM
148 | `{project_id}.{dataset}.product_view_{merchant_id}` AS ProductView
149 | INNER JOIN IdTargetedOffer
150 | ON
151 | IdTargetedOffer.merchant_id = ProductView.merchant_id
152 | AND IdTargetedOffer.target_country = ProductView.target_country
153 | AND IdTargetedOffer.offer_id = ProductView.offer_id
154 | WHERE
155 | -- If the run_date is not a backfill then use the latest available data.
156 | (
157 | (
158 | @run_date = CURRENT_DATE()
159 | AND ProductView.data_date = ProductView.latest_date)
160 | OR (
161 | @run_date <> CURRENT_DATE()
162 | AND ProductView.data_date = @run_date))
163 | );
164 |
165 | CREATE TEMP TABLE NonIdTargeted
166 | AS (
167 | SELECT
168 | ProductView.data_date,
169 | ProductView.product_id,
170 | ProductView.merchant_id,
171 | ProductView.target_country
172 | FROM
173 | `{project_id}.{dataset}.product_view_{merchant_id}` AS ProductView
174 | INNER JOIN CriteriaInfo
175 | ON
176 | CriteriaInfo.merchant_id = ProductView.merchant_id
177 | AND CriteriaInfo.target_country = ProductView.target_country
178 | INNER JOIN `{project_id}.{dataset}.ParsedCriteria_{external_customer_id}` AS ParsedCriteria
179 | ON
180 | ParsedCriteria.criteria = CriteriaInfo.criteria
181 | AND (
182 | ParsedCriteria.custom_label0 IS NULL
183 | OR TRIM(LOWER(ParsedCriteria.custom_label0)) = TRIM(LOWER(ProductView.custom_labels.label_0)))
184 | AND (
185 | ParsedCriteria.custom_label1 IS NULL
186 | OR TRIM(LOWER(ParsedCriteria.custom_label1)) = TRIM(LOWER(ProductView.custom_labels.label_1)))
187 | AND (
188 | ParsedCriteria.custom_label2 IS NULL
189 | OR TRIM(LOWER(ParsedCriteria.custom_label2)) = TRIM(LOWER(ProductView.custom_labels.label_2)))
190 | AND (
191 | ParsedCriteria.custom_label3 IS NULL
192 | OR TRIM(LOWER(ParsedCriteria.custom_label3)) = TRIM(LOWER(ProductView.custom_labels.label_3)))
193 | AND (
194 | ParsedCriteria.custom_label4 IS NULL
195 | OR TRIM(LOWER(ParsedCriteria.custom_label4)) = TRIM(LOWER(ProductView.custom_labels.label_4)))
196 | AND (
197 | ParsedCriteria.product_type_l1 IS NULL
198 | OR TRIM(LOWER(ParsedCriteria.product_type_l1)) = TRIM(LOWER(ProductView.product_type_l1)))
199 | AND (
200 | ParsedCriteria.product_type_l2 IS NULL
201 | OR TRIM(LOWER(ParsedCriteria.product_type_l2)) = TRIM(LOWER(ProductView.product_type_l2)))
202 | AND (
203 | ParsedCriteria.product_type_l3 IS NULL
204 | OR TRIM(LOWER(ParsedCriteria.product_type_l3)) = TRIM(LOWER(ProductView.product_type_l3)))
205 | AND (
206 | ParsedCriteria.product_type_l4 IS NULL
207 | OR TRIM(LOWER(ParsedCriteria.product_type_l4)) = TRIM(LOWER(ProductView.product_type_l4)))
208 | AND (
209 | ParsedCriteria.product_type_l5 IS NULL
210 | OR TRIM(LOWER(ParsedCriteria.product_type_l5)) = TRIM(LOWER(ProductView.product_type_l5)))
211 | AND (
212 | ParsedCriteria.google_product_category_l1 IS NULL
213 | OR TRIM(LOWER(ParsedCriteria.google_product_category_l1)) = TRIM(LOWER(ProductView.google_product_category_l1)))
214 | AND (
215 | ParsedCriteria.google_product_category_l2 IS NULL
216 | OR TRIM(LOWER(ParsedCriteria.google_product_category_l2)) = TRIM(LOWER(ProductView.google_product_category_l2)))
217 | AND (
218 | ParsedCriteria.google_product_category_l3 IS NULL
219 | OR TRIM(LOWER(ParsedCriteria.google_product_category_l3)) = TRIM(LOWER(ProductView.google_product_category_l3)))
220 | AND (
221 | ParsedCriteria.google_product_category_l4 IS NULL
222 | OR TRIM(LOWER(ParsedCriteria.google_product_category_l4)) = TRIM(LOWER(ProductView.google_product_category_l4)))
223 | AND (
224 | ParsedCriteria.google_product_category_l5 IS NULL
225 | OR TRIM(LOWER(ParsedCriteria.google_product_category_l5)) = TRIM(LOWER(ProductView.google_product_category_l5)))
226 | AND (
227 | ParsedCriteria.brand IS NULL
228 | OR TRIM(LOWER(ParsedCriteria.brand)) = TRIM(LOWER(ProductView.brand)))
229 | AND (
230 | ParsedCriteria.channel IS NULL
231 | OR TRIM(LOWER(ParsedCriteria.channel)) = TRIM(LOWER(ProductView.channel)))
232 | AND (
233 | ParsedCriteria.channel_exclusivity IS NULL
234 | OR TRIM(LOWER(ParsedCriteria.channel_exclusivity)) = TRIM(LOWER(ProductView.channel_exclusivity)))
235 | AND (
236 | ParsedCriteria.condition IS NULL
237 | OR TRIM(LOWER(ParsedCriteria.condition)) = TRIM(LOWER(ProductView.condition)))
238 | WHERE
239 | ParsedCriteria.offer_id IS NULL
240 | -- If the run_date is not a backfill then use the latest available data.
241 | AND (
242 | (
243 | @run_date = CURRENT_DATE()
244 | AND ProductView.data_date = ProductView.latest_date)
245 | OR (
246 | @run_date <> CURRENT_DATE()
247 | AND ProductView.data_date = @run_date))
248 | );
249 |
250 | INSERT `{project_id}.{dataset}.TargetedProduct_{external_customer_id}`
251 | (
252 | data_date,
253 | product_id,
254 | merchant_id,
255 | target_country
256 | )
257 | SELECT
258 | data_date,
259 | product_id,
260 | merchant_id,
261 | target_country
262 | FROM
263 | IdTargeted
264 | UNION ALL
265 | SELECT
266 | data_date,
267 | product_id,
268 | merchant_id,
269 | target_country
270 | FROM
271 | NonIdTargeted;
272 |
273 |
274 | -- Update product detailed and product historical materialized tables.
275 | CALL `{project_id}.{dataset}.product_detailed_proc`();
276 | CALL `{project_id}.{dataset}.product_historical_proc`();
277 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MarkUp
2 |
3 | --------------------------------------------------------------------------------
4 |
5 | > NEW UPDATE: This tool is deprecated, please go to
6 | > [Shopping Insider](https://github.com/google/shopping_insider)
7 |
8 | > Disclaimer: This is not an officially supported Google product.
9 |
10 | ### Please re-install MarkUp if you have installed it before Oct 2021. This is to support the breaking schema changes in BigQuery Data Transfer Service.
11 |
12 |
13 |
14 | MarkUp is a tool to enable retailers grow their business using
15 | [Google Merchant Center](https://www.google.com/retail/solutions/merchant-center/)
16 | by taking actionable data-driven decisions to optimize shopping feed health and
17 | ads performance.
18 |
19 | ## Contents
20 |
21 | * [1. Overview](#1-overview)
22 | * [1.1. Value Proposition](#1-1-value-proposition)
23 | * [1.2. Solution Architecture](#solution-architecture)
24 | * [1.3. Solution Options](#solution-options)
25 | * [2. Installation](#2-installation)
26 | * [2.1. Environment Setup](#environment-setup)
27 | * [2.2. Cloud Environment Setup](#cloud-environment-setup)
28 | * [2.3. Configure Data Sources](#configure-data-sources)
29 | * [2.4. Create Data-Studio Dashboard](#data-studio-dashboard)
30 |
31 | ## 1. Overview
32 |
33 | MarkUp solution is built for Shopping Ads customers to take actionable
34 | data-driven decisions to improve their feed health and shopping ads performance.
35 |
36 | ### 1.1. Value Proposition
37 |
38 | * Users can find opportunities and issues at each stage of the Shopping Funnel
39 | both overall and detailed data cuts.
40 |
41 | * Richer insights with data joins to provide overall and product level
42 | performance information pivoted towards custom attributes (product type,
43 | brand, etc) for deeper insights.
44 |
45 | * Near real-time dashboard to share data and insights across different teams
46 | and areas of the business seamlessly to address issues & optimize
47 | performance.
48 |
49 | ### 1.2 Solution Architecture
50 |
51 | The solution will export data from GMC and Google Ads to your Google Cloud
52 | Project on a daily basis and provide insights via Data Studio dashboard.
53 |
54 |
55 |
56 | ### 1.3 Solution Options
57 |
58 | At this time, there are two onboarding options available:
59 |
60 | #### Markup
61 |
62 | This is the base solution that exclusively uses the products and product issues
63 | tables available via the Merchant Center Transfer. This will allow you to set up
64 | the
65 | [Markup Dashboard Template](https://datastudio.google.com/reporting/806b9d3e-073a-43c2-bba0-53a0eb65302f/page/QK7kB/preview).
66 |
67 | #### Markup + Market Insights
68 |
69 | By enabling Market Insights during the installation process, this will
70 | additionally configure the Market Insights tables available via the Merchant
71 | Center Transfer, Price Benchmarks & Best Sellers, as well as three additional
72 | BigQuery views:
73 |
74 | * `market_insights_snapshot` - a snapshot view that joins the latest product
75 | feed data with available price benchmarks, best seller status, and Google
76 | Ads performance over the last 30 days.
77 | * `market_insights_historical` - a date partitioned view that joins the latest
78 | product feed data with historical price, price benchmarks, and Google Ads
79 | performance over the entire transfer data set.
80 | * `market_insights_best_sellers` - a view that joins the latest Best Sellers
81 | Top Products table with inventory status to show a ranked list of Top
82 | Products broken out by category.
83 | * Please note: this view is set to only show data for the `en-US` locale.
84 | For other locales, you will need to adjust the view's filtering after
85 | installation.
86 |
87 | With these additional views, you will be able to set up the
88 | [Merchant Market Insights Dashboard Template](https://datastudio.google.com/reporting/37411ae9-b5f3-4062-89ea-ea521c885c30/page/QK7kB/preview)
89 | in addition to the above Markup Dashboard template.
90 |
91 | ## 2. Installation
92 |
93 | ### 2.1. Google Cloud Platform(GCP) setup
94 |
95 | #### 2.1.1 Create a GCP project with billing account
96 |
97 | You may skip this step if you already have a GCP account with billing enabled.
98 |
99 | * How to [Create a GCP account](https://cloud.google.com/?authuser=1) (if you
100 | don't have one already!)
101 |
102 | * How to
103 | [Create and Manage Projects](https://cloud.google.com/resource-manager/docs/creating-managing-projects)
104 |
105 | * How to
106 | [Create, Modify, or Close Your Billing Account](https://cloud.google.com/billing/docs/how-to/manage-billing-account)
107 |
108 | #### 2.1.2 Check the permissions
109 |
110 | Make sure the user running the installation has following permissions.
111 |
112 | * [Standard Access For GMC](https://support.google.com/merchants/answer/1637190?hl=en)
113 |
114 | * [Standard Access For Google Ads](https://support.google.com/google-ads/answer/7476552?hl=en)
115 |
116 | * [Editor(or Owner) Role in Google Cloud Project](https://cloud.google.com/iam/docs/understanding-roles)
117 |
118 | ### 2.2. Cloud environment setup
119 |
120 | #### 2.2.1 Setup local environment.
121 |
122 | [Download and authenticate gcloud.](https://cloud.google.com/sdk/#Quick_Start)
123 |
124 | Alternatively, if the GMC account has less than 50 Million products, you could
125 | use [Cloud Shell](https://ssh.cloud.google.com/cloudshell?shellonly=true), which
126 | comes with gcloud already installed. The cloud shell disconnects after 1 hour
127 | and hence we recommend using local environment for large accounts since they
128 | could take more than 1 hour to finish the installation.
129 |
130 | #### 2.2.2 Check out source codes
131 |
132 | Open the [cloud shell](https://ssh.cloud.google.com/cloudshell?shellonly=true)
133 | or your terminal(if running locally) and clone the repository.
134 |
135 | ```
136 | git clone https://github.com/google/shopping-markup
137 | ```
138 |
139 | #### 2.2.3 Run install script
140 |
141 | Please provide following inputs when running the `setup.sh` script:
142 |
143 | * [GCP Project Id](https://cloud.google.com/resource-manager/docs/creating-managing-projects)
144 |
145 | * [Google Merchant Center Id](https://support.google.com/merchants/answer/188924?hl=en)
146 |
147 | * [Google Ads External Customer Id](https://support.google.com/google-ads/answer/1704344?hl=en)
148 |
149 | * Market Insights - whether to deploy Market Insights solution. Allowed
150 | Values - True or False
151 |
152 | ```
153 | cd shopping-markup;
154 | sh setup.sh --project_id= --merchant_id= --ads_customer_id= --market_insights=False
155 | ```
156 |
157 | When installing, the script will check whether the current user has the proper
158 | authorization to continue. It may ask you to open cloud authorization URL in the
159 | browser. Please follow the instructions as mentioned in the command line.
160 |
161 | #### Note - If the script fails when you run it for the first time, it might be due to delay in preparing Merchant account data. Please wait up to 1-3 days before re-running the script.
162 |
163 | During the installation process, the script will do following:
164 |
165 | * Enable Google Cloud Components and Google APIs
166 |
167 | * [BigQuery](https://console.cloud.google.com/bigquery)
168 |
169 | * [BigQuery Data Transfer](https://console.cloud.google.com/bigquery/transfers)
170 |
171 | * Create Google Merchant Center and Google Ads data transfers.
172 |
173 | * Create recurring data transfer jobs so that the latest data is imported in
174 | near real time.
175 |
176 | * Create following MarkUp specific SQL tables.
177 |
178 | * product_detailed_materialized - Latest snapshot view of products
179 | combined with performance metrics. Each offer is split into rows for
180 | each targeted country, rows are keyed by unique_product_id and
181 | target_country.
182 | * product_historical_materialized - Historic snapshot of performance
183 | metrics at a product category level.
184 |
185 | #### 2.2.4 [Optional] Update location and locales if different than US
186 |
187 | * If your data shouldn't be materialized in US, change the BigQuery dataset
188 | location in config.yaml
189 |
190 | * [Market Insights only] Adjust the locales in best_sellers_workflow.sql, by
191 | default set to "en-US"
192 |
193 | * You could make the changes before running the install script or after
194 |
195 | * If you're making the changes afterwards, re-run the install script
196 | * Check the scheduled queries in BigQuery and disable any older version of
197 | the Main Workflow
198 |
199 | ## 2.3. Configure Data Sources
200 |
201 | You will need to create or copy required Data Source(s) in Data Studio:
202 |
203 | ### For Markup:
204 |
205 | * Create `product_detailed_materialized` Data Source (linked to
206 | `markup.product_detailed_materialized`)
207 | * Create `product_historical_materialized` Data Source (linked to
208 | `markup.product_historical_materialized`)
209 |
210 | To create a data source:
211 |
212 | * Click on the
213 | [link](https://datastudio.google.com/c/u/0/datasources/create?connectorId=2)
214 |
215 | * Make sure you are using BigQuery connector. If not choose "`BigQuery`" from
216 | the list of available connectors.
217 |
218 | * Search your GCP Project Id under My Projects.
219 |
220 | * Under Dataset, click on "`markup`".
221 |
222 | * Under Table, choose the required table view.
223 |
224 | * Click `Connect` on the top right corner and wait for the data-source to be
225 | created
226 |
227 | ### For Merchant Market Insights:
228 |
229 | * Copy
230 | [Market Insights Snapshot (TEMPLATE)](https://datastudio.google.com/datasources/9dbdb290-0ea6-4d81-90df-5e4f9ec6f504)
231 | and connect it to `markup.market_insights_snapshot_view`
232 | * Copy
233 | [Market Insights Historical (TEMPLATE)](https://datastudio.google.com/datasources/0397b27f-96b8-44cd-afca-645f64882a26)
234 | and connect it to `markup.market_insights_historical_view`
235 | * Copy
236 | [Market Insights Best Sellers (TEMPLATE)](https://datastudio.google.com/datasources/b2f5bafe-01e2-4e30-bfb3-022a6c2f3ad6)
237 | and connect it to `markup.market_insights_best_sellers_materialized`
238 |
239 | To copy a data source:
240 |
241 | * Click on the data source template link above.
242 |
243 | * Click on the
icon in the top right corner
244 | next to "Create Report".
245 |
246 | * Click "Copy Data Source" on the "Copy Data Source" pop-up.
247 |
248 | * Select your Project, Dataset, and Table to be connected, then press
249 | "Reconnect" in the top right corner.
250 |
251 | * Click "Apply" on the "Apply Connection Changes" pop-up
252 |
253 | * Repeat this process for all three data source templates above.
254 |
255 | ## 2.4. Create Data-Studio Dashboard(s)
256 |
257 | ### For Markup:
258 |
259 | * Click on the following link to the Data Studio template:
260 | [link](https://datastudio.google.com/reporting/22c41768-140b-4572-9bf7-4e7e1891a8dd/preview)
261 |
262 | * Click "`Use Template`"
263 |
264 | * Choose the new "`product_detailed_materialized`" and
265 | "`product_historical_materialized`" data-sources created in the previous
266 | step
267 |
268 | * Click "`Copy Report`"
269 |
270 | ### For Merchant Market Insights:
271 |
272 | * Click on the following link to the Data Studio template:
273 | [link](https://datastudio.google.com/reporting/806b9d3e-073a-43c2-bba0-53a0eb65302f/page/QK7kB/preview)
274 |
275 | * Click "`Use Template`"
276 |
277 | * Choose the three data-sources copied in the previous step
278 |
279 | * Click "`Copy Report`"
280 |
281 | #### Note - The performance metrics in the dashboard might take 12-24 hours to appear.
282 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/plugins/cloud_utils/cloud_auth.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | # Copyright 2020 Google LLC
18 | #
19 | # Licensed under the Apache License, Version 2.0 (the "License");
20 | # you may not use this file except in compliance with the License.
21 | # You may obtain a copy of the License at
22 | #
23 | # http://www.apache.org/licenses/LICENSE-2.0
24 | #
25 | # Unless required by applicable law or agreed to in writing, software
26 | # distributed under the License is distributed on an "AS IS" BASIS,
27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 | # See the License for the specific language governing permissions and
29 | # limitations under the License.
30 |
31 | """Perform auth related operations in GCP."""
32 |
33 | import base64
34 | import json
35 | import os
36 | import textwrap
37 | from typing import Any, Dict
38 | import logging
39 |
40 | import apiclient
41 | from googleapiclient import discovery
42 | from googleapiclient import errors
43 | import google.auth
44 | from google.auth import credentials
45 | from google.auth.transport import requests
46 | from google.oauth2 import service_account
47 | from plugins.cloud_utils import utils
48 |
49 | # HTTP status code
50 | _NOT_FOUND_ERROR_CODE = 404
51 |
52 | # Scope to manage service accounts
53 | _SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
54 |
55 |
56 | class Error(Exception):
57 | """A generic error thrown for any exceptions in cloud_auth module."""
58 | pass
59 |
60 |
61 | def get_credentials(
62 | service_account_key_file: str = None) -> credentials.Credentials:
63 | """Get credentials to authenticate while calling GCP APIs.
64 |
65 | Args:
66 | service_account_key_file: Optional. File containing service account key. If
67 | not passed the default credential will be used.
68 |
69 | Returns:
70 | credential: Credential object to authenticate while calling GCP APIs.
71 |
72 | Raises:
73 | FileNotFoundError: If the provided file is not found.
74 | Error: If no default credentials are found and service account key file is
75 | not given.
76 | """
77 | if service_account_key_file is None:
78 | try:
79 | default_credentials, _ = google.auth.default()
80 | return default_credentials
81 | except google.auth.exceptions.DefaultCredentialsError as error:
82 | error_message = textwrap.dedent("""
83 | No default credentials found. Please run
84 | `gcloud auth application-default login` before continuing.
85 | Error: {}""".format(error))
86 | raise Error(error_message)
87 | if not os.path.isfile(service_account_key_file):
88 | raise FileNotFoundError(
89 | 'The service account private key file "{}" not found.'.format(
90 | service_account_key_file))
91 | return service_account.Credentials.from_service_account_file(
92 | service_account_key_file, scopes=[_SCOPE])
93 |
94 |
95 | def build_service_client(
96 | service_name: str,
97 | service_account_key_file: str = None) -> apiclient.discovery.Resource:
98 | """Construct a Resource for interacting with GCP service APIs.
99 |
100 | Args:
101 | service_name: Name of the service for which the client is created.
102 | service_account_key_file: Optional. File containing service account key. If
103 | not passed the default credential will be used.
104 |
105 | Returns:
106 | client: A client with methods for interacting with the service APIs.
107 | """
108 | credentials_info = get_credentials(service_account_key_file)
109 | return discovery.build(
110 | service_name, 'v1', credentials=credentials_info, cache_discovery=False)
111 |
112 |
113 | def _get_resource_manager_client() -> apiclient.discovery.Resource:
114 | """Creates a new resource manager client to manage GCP projects.
115 |
116 | Returns:
117 | client: The newly created resource manager client.
118 | """
119 | return build_service_client('cloudresourcemanager')
120 |
121 |
122 | def _get_iam_client() -> apiclient.discovery.Resource:
123 | """Creates a new iam client.
124 |
125 | Returns:
126 | client: The newly created client.
127 | """
128 | return build_service_client('iam')
129 |
130 |
131 | def _get_service_account_client() -> apiclient.discovery.Resource:
132 | """Creates a new client to manage service accounts.
133 |
134 | Returns:
135 | client: The newly created service account client.
136 | """
137 | return _get_iam_client().projects().serviceAccounts()
138 |
139 |
140 | def _get_service_account_email(project_id: str,
141 | service_account_name: str) -> str:
142 | """Retrieves the service account email id.
143 |
144 | Args:
145 | project_id: GCP project id.
146 | service_account_name: The service account name.
147 |
148 | Returns:
149 | service_account_email: The service account email id.
150 | """
151 | service_account_email = '{}@{}.iam.gserviceaccount.com'.format(
152 | service_account_name, project_id)
153 | return service_account_email
154 |
155 |
156 | def _get_service_account_name(project_id: str,
157 | service_account_name: str) -> str:
158 | """Retrieves fully qualified service account name.
159 |
160 | Args:
161 | project_id: GCP project id.
162 | service_account_name: The service account name.
163 |
164 | Returns:
165 | service_account_name: The fully qualified service account name.
166 | """
167 | service_account_email = _get_service_account_email(project_id,
168 | service_account_name)
169 | service_account_name = 'projects/{}/serviceAccounts/{}'.format(
170 | project_id, service_account_email)
171 | return service_account_name
172 |
173 |
174 | def create_service_account(project_id: str, service_account_name: str,
175 | role_name: str, file_name: str) -> Dict[str, Any]:
176 | """Create a new service account.
177 |
178 | Args:
179 | project_id: GCP project id.
180 | service_account_name: The service account name.
181 | role_name: The role to be assigned to the service account.
182 | file_name: The file where service account key will be stored.
183 |
184 | Returns:
185 | service_account: The newly created service account.
186 |
187 | Raises:
188 | ValueError: If the service_account_name is empty.
189 | ValueError: If the file_name is empty.
190 | """
191 | if not service_account_name:
192 | raise ValueError('Service account name cannot be empty.')
193 | if not file_name:
194 | raise ValueError('The file name cannot be empty.')
195 | service_account_details = get_service_account(project_id,
196 | service_account_name)
197 | if service_account_details:
198 | return service_account_details
199 | logging.info('Creating "%s" service account in "%s" project',
200 | service_account_name, project_id)
201 | request = _get_service_account_client().create(
202 | name='projects/' + project_id,
203 | body={
204 | 'accountId': service_account_name,
205 | 'serviceAccount': {
206 | 'displayName': service_account_name.upper()
207 | },
208 | })
209 |
210 | service_account_details = utils.execute_request(request)
211 | set_service_account_role(project_id, service_account_name, role_name)
212 | create_service_account_key(project_id, service_account_name, file_name)
213 | return service_account_details
214 |
215 |
216 | def get_service_account(project_id: str,
217 | service_account_name: str) -> Dict[str, Any]:
218 | """Find the service account with given name.
219 |
220 | Args:
221 | project_id: GCP project id.
222 | service_account_name: The service account name.
223 |
224 | Returns:
225 | service_account: If the service account is found in the cloud project.
226 | None: If no service account is found.
227 | """
228 | try:
229 | logging.info('Retrieving "%s" service account in "%s" project',
230 | service_account_name, project_id)
231 | name = 'projects/{}/serviceAccounts/{}@{}.iam.gserviceaccount.com'.format(
232 | project_id, service_account_name, project_id)
233 | service_account_details = _get_service_account_client().get(
234 | name=name).execute()
235 | return service_account_details
236 | except errors.HttpError as error:
237 | if error.resp.status == _NOT_FOUND_ERROR_CODE:
238 | return None
239 | logging.exception('Error occurred while retrieving service account: "%s".',
240 | error)
241 | raise Error('Error occurred while retrieving service account.')
242 |
243 |
244 | def create_service_account_key(project_id: str, service_account_name: str,
245 | file_name: str) -> None:
246 | """Creates key for service account and writes the private key to the file.
247 |
248 | Args:
249 | project_id: GCP project id.
250 | service_account_name: The service account name.
251 | file_name: The file to which the private key will be written.
252 | """
253 | with open(file_name, 'w+') as file_object:
254 | _create_service_account_key(project_id, service_account_name, file_object)
255 |
256 |
257 | def _create_service_account_key(project_id: str, service_account_name: str,
258 | file_object: Any) -> None:
259 | """Creates key for service account and writes private key to the file object.
260 |
261 | Args:
262 | project_id: GCP project id.
263 | service_account_name: The service account name.
264 | file_object: The file object to which the private key will be written.
265 | """
266 | name = 'projects/{}/serviceAccounts/{}@{}.iam.gserviceaccount.com'.format(
267 | project_id, service_account_name, project_id)
268 | logging.info(
269 | 'Creating service account key for "%s" service account in "%s" project',
270 | service_account_name, project_id)
271 | request = _get_service_account_client().keys().create(name=name, body={})
272 | service_account_key = utils.execute_request(request)
273 | private_key = base64.b64decode(service_account_key['privateKeyData']).decode()
274 | private_key_json = json.loads(private_key)
275 | json.dump(private_key_json, file_object)
276 |
277 |
278 | def set_service_account_role(project_id, service_account_name,
279 | role_name) -> None:
280 | """Adds role to a given service account.
281 |
282 | The roles grant service accounts appropriate permissions to use specific
283 | resource. The role_name can be either primitive, predefined or custom roles.
284 | Please see https://cloud.google.com/iam/docs/understanding-roles for list of
285 | allowed primitive and predefined roles.
286 |
287 | Args:
288 | project_id: GCP project id.
289 | service_account_name: The service account name.
290 | role_name: The role to be added to the service account. The role_name
291 | doesn't need "roles/" prefix to be added. Allowed values -
292 | https://cloud.google.com/iam/docs/understanding-roles. e.g - editor
293 | """
294 | logging.info('Adding "%s" role to "%s" service account in "%s" project',
295 | role_name, service_account_name, project_id)
296 | # Read existing binding.
297 | service_account_email = _get_service_account_email(project_id,
298 | service_account_name)
299 | member = 'serviceAccount:{}'.format(service_account_email)
300 | binding = {'role': f'roles/{role_name}', 'members': [member]}
301 | request = _get_resource_manager_client().projects().getIamPolicy(
302 | resource=project_id)
303 | policy = utils.execute_request(request)
304 |
305 | # Modify binding.
306 | policy['bindings'].append(binding)
307 |
308 | # Write binding.
309 | set_iam_policy_request_body = {'policy': policy}
310 | request = _get_resource_manager_client().projects().setIamPolicy(
311 | resource=project_id, body=set_iam_policy_request_body)
312 | utils.execute_request(request)
313 |
314 |
315 | def get_auth_session(
316 | service_account_key_file: str) -> requests.AuthorizedSession:
317 | """Creates AuthorizedSession for given service account.
318 |
319 | Args:
320 | service_account_key_file: File which contains service account private key.
321 |
322 | Returns:
323 | authorized_session: AuthorizedSession for service account.
324 |
325 | Raises:
326 | FileNotFoundError: If the provided file is not found.
327 | """
328 | credentials_info = get_credentials(service_account_key_file)
329 | return requests.AuthorizedSession(credentials_info)
330 |
--------------------------------------------------------------------------------
/plugins/cloud_utils/cloud_storage.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | # Copyright 2020 Google LLC
18 | #
19 | # Licensed under the Apache License, Version 2.0 (the "License");
20 | # you may not use this file except in compliance with the License.
21 | # You may obtain a copy of the License at
22 | #
23 | # http://www.apache.org/licenses/LICENSE-2.0
24 | #
25 | # Unless required by applicable law or agreed to in writing, software
26 | # distributed under the License is distributed on an "AS IS" BASIS,
27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 | # See the License for the specific language governing permissions and
29 | # limitations under the License.
30 |
31 | """Manage operations on Cloud Storage."""
32 |
33 | import os
34 | from typing import Optional, Tuple
35 | from urllib import parse
36 |
37 | import logging
38 |
39 | from google.api_core import exceptions
40 | from google.api_core import retry
41 | from google.cloud import storage
42 | from plugins.cloud_utils import cloud_auth
43 |
44 |
45 | class Error(Exception):
46 | """A generic error thrown for exceptions in cloud_storage module."""
47 | pass
48 |
49 |
50 | class CloudStorageUtils(object):
51 | """CloudStorageUtils provides methods to manage Cloud Storage.
52 |
53 | Typical usage example:
54 | >>> cloud_storage = CloudStorageUtils('service_account_key_file.json')
55 | >>> cloud_storage.upload_file('/tmp/file.txt', 'gs://bucket_name')
56 | """
57 |
58 | def __init__(self,
59 | project_id: str,
60 | service_account_key_file: Optional[str] = None) -> None:
61 | """Initialize new instance of CloudStorageUtils.
62 |
63 | Args:
64 | project_id: GCP project id.
65 | service_account_key_file: File containing service account key. If not
66 | passed the default credential will be used. There are following ways to
67 | create service accounts -
68 | 1) Use `build_service_client` method from `cloud_auth` module.
69 | 2) Use `gcloud` command line utility as documented here -
70 | https://cloud.google.com/iam/docs/creating-managing-service-account-keys
71 | """
72 | credentials = cloud_auth.get_credentials(service_account_key_file)
73 | self.client = storage.Client(project=project_id, credentials=credentials)
74 |
75 | def _parse_blob_url(self, url: str) -> Tuple[str, str]:
76 | """"Parses Cloud Storage URL and returns a tuple of bucket name and path.
77 |
78 | Args:
79 | url: The full URL to blob, in the form of 'gs://bucket_name/path/to/blob'.
80 |
81 | Returns:
82 | Tuple: The bucket name and path tuple.
83 |
84 | Raises:
85 | Error: If the upload was not successful.
86 | """
87 | result = parse.urlparse(url)
88 | # result.path will be '/path/to/blob', we need to strip the leading '/'.
89 | bucket_name, path = result.hostname, result.path[1:]
90 | if not bucket_name:
91 | error_message = (f'Invalid URL - "{url}". The URL should be in the form '
92 | f'of "gs://bucket_name/path/to/blob"')
93 | logging.exception(error_message)
94 | raise Error(error_message)
95 | return bucket_name, path
96 |
97 | @retry.Retry()
98 | def _get_or_create_bucket(self, bucket_name: str) -> storage.bucket.Bucket:
99 | """Retrieves or creates Cloud Storage bucket given bucket name.
100 |
101 | If the bucket doesn't exist in the Cloud Storage, it will be created and the
102 | newly created bucket will be returned. The operation will be retried for
103 | transient errors e.g. - google.api_core.exceptions.InternalServerError.
104 |
105 | Args:
106 | bucket_name: Name of the bucket to be retieved.
107 |
108 | Returns:
109 | google.cloud.storage.bucket.Bucket: The bucket object.
110 | """
111 | try:
112 | return self.client.get_bucket(bucket_name)
113 | except exceptions.NotFound:
114 | logging.info(f'Cloud Storage bucket "{bucket_name}" not found. '
115 | 'Hence creating the bucket.')
116 | bucket = self.client.create_bucket(bucket_name)
117 | logging.info(
118 | f'Cloud Storage bucket "{bucket_name}" created successfully.')
119 | return bucket
120 |
121 | def upload_file_to_url(self, source_file_path: str,
122 | destination_file_url: str):
123 | """Uploads file from source file system to Cloud Storage.
124 |
125 | This is a convenience method that parses bucket name and path, and calls
126 | upload_file, so that the client can pass the full path as a whole.
127 |
128 | Args:
129 | source_file_path: Path to the file to be uploaded. e.g - /tmp/file.txt
130 | destination_file_url: The full URL to destination file, in the form of
131 | 'gs://bucket_name/path/to/file'.
132 | """
133 | bucket_name, path = self._parse_blob_url(destination_file_url)
134 | self.upload_file(source_file_path, bucket_name, path)
135 |
136 | def upload_file(self, source_file_path: str, bucket_name: str,
137 | destination_file_path: str) -> None:
138 | """Uploads file from source file system to Cloud Storage.
139 |
140 | If the bucket doesn't exist in the Cloud Storage, it will be created.
141 |
142 | Args:
143 | source_file_path: Path to the file to be uploaded. e.g - /tmp/file.txt
144 | bucket_name: Cloud Storage bucket to which the file should be uploaded. If
145 | the Cloud Storage URL is 'gs://bucket1/file1.txt', then the bucket_name
146 | would be 'bucket1'.
147 | destination_file_path: Path of the destination blob/object within the
148 | Cloud Storage bucket. If the Cloud Storage URL is
149 | 'gs://bucket1/dir1/file1.txt', then the destination_file_path would be
150 | 'dir1/file1.txt'.
151 | Raises:
152 | FileNotFoundError: If the provided file is not found.
153 | Error: If the upload was not successful.
154 | """
155 | if not os.path.isfile(source_file_path):
156 | logging.error(f'The file "{source_file_path}" could not be found.')
157 | raise FileNotFoundError(
158 | f'The file "{source_file_path}" could not be found.')
159 | try:
160 | logging.info('Uploading "%s" file to "gs://%s/%s"', source_file_path,
161 | bucket_name, destination_file_path)
162 | bucket = self._get_or_create_bucket(bucket_name)
163 | self._upload_file(source_file_path, bucket, destination_file_path)
164 | logging.info('Uploaded "%s" file to "gs://%s/%s"', source_file_path,
165 | bucket_name, destination_file_path)
166 | except exceptions.RetryError:
167 | error_message = (f'Error when uploading file "{source_file_path}" to '
168 | f'"gs://{bucket_name}/{destination_file_path}"')
169 | logging.exception(error_message)
170 | raise Error(error_message)
171 |
172 | @retry.Retry()
173 | def _upload_file(self, source_file_path: str, bucket: storage.bucket.Bucket,
174 | destination_file_path: str) -> None:
175 | """Uploads file to Cloud Storage with Retry logic.
176 |
177 | The Retry decorator will retry transient API errors. Following errors are
178 | some examples of transient errors:
179 | 1. google.api_core.exceptions.InternalServerError
180 | 2. google.api_core.exceptions.TooManyRequests
181 | 3. google.api_core.exceptions.ServiceUnavailable
182 |
183 | Args:
184 | source_file_path: Path to the file to be uploaded. e.g - /tmp/file.txt
185 | bucket: Cloud Storage bucket to which the file should be uploaded.
186 | destination_file_path: Path of the destination blob/object within the
187 | Cloud Storage bucket. If the Cloud Storage URL is
188 | 'gs://bucket1/dir1/file1.txt', then the destination_file_path would be
189 | 'dir1/file1.txt'.
190 | """
191 | destination_blob = bucket.blob(destination_file_path)
192 | destination_blob.upload_from_filename(source_file_path)
193 |
194 | def upload_directory_to_url(self, source_directory_path: str,
195 | destination_dir_url: str):
196 | """Uploads an entire directory to Cloud Storage.
197 |
198 | This is a convenience method that parses bucket name and path, and calls
199 | upload_directory, so that the client can pass the full path as a whole.
200 |
201 | Args:
202 | source_directory_path: Path to the directory to be uploaded: e.g -
203 | /tmp/dir1/dir2
204 | destination_dir_url: The full URL to destination directory, in the form of
205 | 'gs://bucket_name/path/to/dir'.
206 | """
207 | bucket_name, path = self._parse_blob_url(destination_dir_url)
208 | self.upload_directory(source_directory_path, bucket_name, path)
209 |
210 | def upload_directory(self, source_directory_path: str, bucket_name: str,
211 | destination_dir_path: str) -> None:
212 | """Uploads an entire directory to Cloud Storage.
213 |
214 | All the files in the source directory are identified recursively and
215 | uploaded to Cloud Storage bucket. The symlinks in the source directory is
216 | ignored to avoid infinite recursion. If the bucket doesn't exist in the
217 | Cloud Storage it will be created.
218 |
219 | Args:
220 | source_directory_path: Path to the directory to be uploaded: e.g -
221 | /tmp/dir1/dir2
222 | bucket_name: Cloud Storage bucket to which the directory should be
223 | uploaded. If the Cloud Storage URL is 'gs://bucket1/dir1/dir2', then the
224 | bucket_name would be 'bucket1'.
225 | destination_dir_path: Path of the destination blob/object within the
226 | Cloud Storage bucket. If the Cloud Storage URL is
227 | 'gs://bucket/dir1/dir2', then the destination_dir_path would be
228 | 'dir1/dir2'.
229 |
230 | Raises:
231 | FileNotFoundError: If the provided directory is not found.
232 | """
233 | if not os.path.isdir(source_directory_path):
234 | logging.error(
235 | f'The directory "{source_directory_path}" could not be found.')
236 | raise FileNotFoundError(
237 | f'The directory "{source_directory_path}" could not be found.')
238 | logging.info('Uploading "%s" directory to "gs://%s/%s"',
239 | source_directory_path, bucket_name, destination_dir_path)
240 | files_to_upload = []
241 | for (root, _, files) in os.walk(source_directory_path):
242 | if not files:
243 | continue
244 | for file in files:
245 | full_path = os.path.join(root, file)
246 | files_to_upload.append(full_path)
247 | bucket = self._get_or_create_bucket(bucket_name)
248 | for file in files_to_upload:
249 | # Construct destination path by replacing source directory path:
250 | # If the source directory is `/tmp/dir1` and destination_dir_path is
251 | # `obj1/obj2` then file `/tmp/dir1/dir2/file.txt` will have a destination
252 | # file path `obj1/obj2/dir2/file.txt`
253 | destination_file_path = file.replace(source_directory_path,
254 | destination_dir_path)
255 | self._upload_file(file, bucket, destination_file_path)
256 | logging.info('Uploaded "%s" directory to "gs://%s/%s"',
257 | source_directory_path, bucket_name, destination_dir_path)
258 |
259 | def write_to_path(self, file_content: str, destination_file_path: str):
260 | """Writes file content to Cloud Storage file.
261 |
262 | This is a convenience method that parses bucket name and path, and calls
263 | write_to_file, so that the client can pass the full path as a whole.
264 |
265 | Args:
266 | file_content: The content to be written to the file.
267 | destination_file_path: The full path to destination file, in the form of
268 | 'gs://bucket_name/path/to/file'.
269 | """
270 | bucket_name, path = self._parse_blob_url(destination_file_path)
271 | self.write_to_file(file_content, bucket_name, path)
272 |
273 | @retry.Retry()
274 | def write_to_file(self, file_content: str, bucket_name: str,
275 | destination_file_path: str):
276 | """Writes file content to Cloud Storage file.
277 |
278 | If the bucket doesn't exist in the Cloud Storage, it will be created. If the
279 | file already exists in the Cloud Storage, the content of the file will be
280 | overwritten. The operation will be retried for transient errors e.g. -
281 | google.api_core.exceptions.InternalServerError.
282 |
283 | Args:
284 | file_content: The content to be written to the file.
285 | bucket_name: Cloud Storage bucket to which the content should be written.
286 | If the Cloud Storage URL is 'gs://bucket1/file1.txt', then the
287 | bucket_name would be 'bucket1'.
288 | destination_file_path: Path of the destination blob/object within the
289 | Cloud Storage bucket. If the Cloud Storage URL is
290 | 'gs://bucket1/dir1/file1.txt', then the destination_file_path would be
291 | 'dir1/file1.txt'.
292 | """
293 | logging.info('Writing data to "gs://%s/%s"', bucket_name,
294 | destination_file_path)
295 | bucket = self._get_or_create_bucket(bucket_name)
296 | destination_blob = bucket.blob(destination_file_path)
297 | destination_blob.upload_from_string(file_content)
298 | logging.info('Successfully wrote data to "gs://%s/%s"', bucket_name,
299 | destination_file_path)
300 |
--------------------------------------------------------------------------------
/plugins/cloud_utils/cloud_composer.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | # Copyright 2020 Google LLC
18 | #
19 | # Licensed under the Apache License, Version 2.0 (the "License");
20 | # you may not use this file except in compliance with the License.
21 | # You may obtain a copy of the License at
22 | #
23 | # http://www.apache.org/licenses/LICENSE-2.0
24 | #
25 | # Unless required by applicable law or agreed to in writing, software
26 | # distributed under the License is distributed on an "AS IS" BASIS,
27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 | # See the License for the specific language governing permissions and
29 | # limitations under the License.
30 |
31 | """Manage operations on Cloud Composer."""
32 |
33 | from typing import Any, Dict
34 | import logging
35 |
36 | from googleapiclient import errors
37 | from plugins.cloud_utils import cloud_auth
38 | from plugins.cloud_utils import utils
39 |
40 | # Default Cloud Composer configuration.
41 | _LOCATION = 'us-central1'
42 | _DISC_SIZE = 20 # In GBs.
43 | _MACHINE_TYPE = 'n1-standard-1'
44 | _PYTHON_VERSION = '3'
45 | _HTTP_CONFLICT_CODE = 409
46 |
47 |
48 | class Error(Exception):
49 | """A generic error thrown for any exception in cloud_composer module."""
50 | pass
51 |
52 |
53 | class CloudComposerUtils(object):
54 | """CloudComposerUtils class provides methods to manage Composer environment.
55 |
56 | This class manages Cloud Composer service within a single GCP
57 | project.
58 |
59 | Typical usage example:
60 | >>> composer = CloudComposerUtils('project_id',
61 | 'us-west1',
62 | 'service_account_key_file.json')
63 | >>> composer.create_environment()
64 | """
65 |
66 | _CONFIG_KEY = 'config' # Key to get Composer configurations.
67 | _DAG_FOLDER_KEY = 'dagGcsPrefix' # Key to get Cloud Storage DAG folder URL.
68 |
69 | def __init__(self,
70 | project_id: str,
71 | location: str = _LOCATION,
72 | service_account_key_file: str = None) -> None:
73 | """Initialise new instance of CloudComposerUtils.
74 |
75 | Args:
76 | project_id: GCP project id.
77 | location: Optional. Region under which the Composer environment needs to
78 | be managed. It defaults to 'us-central1'. Allowed values -
79 | https://cloud.google.com/compute/docs/regions-zones/.
80 | service_account_key_file: Optional. File containing service account key.
81 | If not passed the default credential will be used. There are following
82 | ways to create service accounts:
83 | 1. Use `build_service_client` method from `cloud_auth` module.
84 | 2. Use `gcloud` command line utility as documented here -
85 | https://cloud.google.com/iam/docs/creating-managing-service-account-keys
86 | """
87 | self.client = cloud_auth.build_service_client('composer',
88 | service_account_key_file)
89 | self.project_id = project_id
90 | self.location = location
91 |
92 | def _get_fully_qualified_env_name(self, environment_name: str) -> str:
93 | """Constructs fully qualified environment name.
94 |
95 | Args:
96 | environment_name: Name of Composer environment.
97 |
98 | Returns:
99 | fully_qualified_name: Fully qualified environment name in the following
100 | format -
101 | projects/project_id/location/location_name/environment/environment_name
102 | """
103 | fully_qualified_name = (f'projects/{self.project_id}/locations/'
104 | f'{self.location}/environments/{environment_name}')
105 | return fully_qualified_name
106 |
107 | def create_environment(self,
108 | environment_name: str,
109 | zone: str = 'b',
110 | disk_size_gb: int = _DISC_SIZE,
111 | machine_type: str = _MACHINE_TYPE,
112 | image_version: str = None,
113 | python_version: str = _PYTHON_VERSION) -> None:
114 | """Creates new Cloud Composer environment.
115 |
116 | Args:
117 | environment_name: Name of Composer environment.
118 | zone: Optional. Zone where the Composer environment will be created. It
119 | defaults to 'b' since zone 'b' is present in all the regions.
120 | Allowed values - https://cloud.google.com/compute/docs/regions-zones/.
121 | disk_size_gb: Optional. The disk size in GB used for node VMs. It defaults
122 | to 20GB since it is the minimum size.
123 | machine_type: Optional. The parameter will specify what type of VM to
124 | create.It defaults to 'n1-standard-1'. Allowed values -
125 | https://cloud.google.com/compute/docs/machine-types.
126 | image_version: The version of Composer and Airflow running in the
127 | environment. If this is not provided, a default version is used as per
128 | https://cloud.google.com/composer/docs/concepts/versioning/composer-versions.
129 | python_version: The version of Python used to run the Apache Airflow. It
130 | defaults to '3'.
131 |
132 | Raises:
133 | Error: If the provided disk size is less than 20GB.
134 | """
135 | if disk_size_gb < 20:
136 | raise Error(('The minimum disk size needs to be 20GB to create Composer '
137 | 'environment'))
138 | fully_qualified_name = self._get_fully_qualified_env_name(environment_name)
139 | parent = f'projects/{self.project_id}/locations/{self.location}'
140 | composer_zone = f'{self.location}-{zone}'
141 | location = f'projects/{self.project_id}/zones/{composer_zone}'
142 | machine_type = (f'projects/{self.project_id}/zones/{composer_zone}/'
143 | f'machineTypes/{machine_type}')
144 | software_config = {
145 | 'pythonVersion': python_version
146 | }
147 | if image_version:
148 | software_config['imageVersion'] = image_version
149 | request_body = {
150 | 'name': fully_qualified_name,
151 | 'config': {
152 | 'nodeConfig': {
153 | 'location': location,
154 | 'machineType': machine_type,
155 | 'diskSizeGb': disk_size_gb
156 | },
157 | 'softwareConfig': software_config
158 | }
159 | }
160 | logging.info('Creating "%s" Composer environment for "%s" project.',
161 | fully_qualified_name, self.project_id)
162 | try:
163 | request = self.client.projects().locations().environments().create(
164 | parent=parent, body=request_body)
165 | operation = utils.execute_request(request)
166 | operation_client = self.client.projects().locations().operations()
167 | utils.wait_for_operation(operation_client, operation)
168 | except errors.HttpError as error:
169 | if error.__dict__['resp'].status == _HTTP_CONFLICT_CODE:
170 | logging.info('The Composer environment %s already exists.',
171 | fully_qualified_name)
172 | return
173 | logging.exception('Error occurred while creating Composer environment.')
174 | raise Error('Error occurred while creating Composer environment.')
175 |
176 | def install_python_packages(self, environment_name: str,
177 | packages: Dict[str, str]) -> None:
178 | """Install Python packages on the existing Composer environment.
179 |
180 | Args:
181 | environment_name: Name of the existing Composer environment. The fully
182 | qualified environment name will be constructed as follows -
183 | 'projects/{project_id}/locations/{location}/environments/
184 | {environment_name}'.
185 | packages: Dictionary of Python packages to be installed in the Composer
186 | environment. Each entry in the dictionary has dependency name as the key
187 | and version as the value. e.g -
188 | {'tensorflow' : "<=1.0.1", 'apache-beam': '==2.12.0', 'flask': '>1.0.3'}
189 |
190 | Raises:
191 | Error: If the list of packages is empty.
192 | """
193 | if not packages:
194 | raise Error('Package list cannot be empty.')
195 | fully_qualified_name = self._get_fully_qualified_env_name(environment_name)
196 | logging.info('Installing "%s" packages in "%s" Composer environment.',
197 | packages, fully_qualified_name)
198 | try:
199 | request_body = {
200 | 'name': fully_qualified_name,
201 | 'config': {
202 | 'softwareConfig': {
203 | 'pypiPackages': packages
204 | }
205 | }
206 | }
207 | request = (
208 | self.client.projects().locations().environments().patch(
209 | name=fully_qualified_name,
210 | body=request_body,
211 | updateMask='config.softwareConfig.pypiPackages'))
212 | operation = utils.execute_request(request)
213 | operation_client = self.client.projects().locations().operations()
214 | utils.wait_for_operation(operation_client, operation)
215 | logging.info('Installed "%s" packages in "%s" Composer environment.',
216 | packages, fully_qualified_name)
217 | except errors.HttpError:
218 | logging.exception('Error occurred while installing packages.')
219 | raise Error('Error occurred while installing python packages.')
220 |
221 | def set_environment_variables(self, environment_name: str,
222 | environment_variables: Dict[str, str]) -> None:
223 | """Sets environment variables on the existing Composer environment.
224 |
225 | Args:
226 | environment_name: Name of the existing Composer environment. The fully
227 | qualified environment name will be constructed as follows -
228 | 'projects/{project_id}/locations/{location}/environments/
229 | {environment_name}'.
230 | environment_variables: Environment variables to be added to the Composer
231 | environment.
232 |
233 | Raises:
234 | Error: If the request was not processed successfully.
235 | """
236 | fully_qualified_name = self._get_fully_qualified_env_name(environment_name)
237 | logging.info(
238 | 'Setting "%s" environment variables in "%s" Composer '
239 | 'environment.', environment_variables, fully_qualified_name)
240 | try:
241 | request_body = {
242 | 'name': fully_qualified_name,
243 | 'config': {
244 | 'softwareConfig': {
245 | 'envVariables': environment_variables
246 | }
247 | }
248 | }
249 | request = (
250 | self.client.projects().locations().environments().patch(
251 | name=fully_qualified_name,
252 | body=request_body,
253 | updateMask='config.softwareConfig.envVariables'))
254 | operation = utils.execute_request(request)
255 | operation_client = self.client.projects().locations().operations()
256 | utils.wait_for_operation(operation_client, operation)
257 | logging.info(
258 | 'Updated "%s" environment variables in "%s" Composer '
259 | 'environment.', environment_variables, fully_qualified_name)
260 | except errors.HttpError:
261 | logging.exception('Error occurred while setting environment variables.')
262 | raise Error('Error occurred while setting environment variables.')
263 |
264 | def override_airflow_configs(
265 | self, environment_name: str, airflow_config_overrides: Dict[str,
266 | str]) -> None:
267 | """Overrides Airflow configurations on the existing Composer environment.
268 |
269 | Args:
270 | environment_name: Name of the existing Composer environment. The fully
271 | qualified environment name will be constructed as follows -
272 | 'projects/{project_id}/locations/{location}/environments/
273 | {environment_name}'.
274 | airflow_config_overrides: Airflow configurations to be overridden in the
275 | Composer environment.
276 |
277 | Raises:
278 | Error: If the request was not processed successfully.
279 | """
280 | fully_qualified_name = self._get_fully_qualified_env_name(environment_name)
281 | logging.info(
282 | 'Overriding "%s" Airflow configurations in "%s" Composer '
283 | 'environment.', airflow_config_overrides, fully_qualified_name)
284 | try:
285 | request_body = {
286 | 'name': fully_qualified_name,
287 | 'config': {
288 | 'softwareConfig': {
289 | 'airflowConfigOverrides': airflow_config_overrides
290 | }
291 | }
292 | }
293 | request = (
294 | self.client.projects().locations().environments().patch(
295 | name=fully_qualified_name,
296 | body=request_body,
297 | updateMask='config.softwareConfig.airflowConfigOverrides'))
298 | operation = utils.execute_request(request)
299 | operation_client = self.client.projects().locations().operations()
300 | utils.wait_for_operation(operation_client, operation)
301 | logging.info(
302 | 'Airflow configurations "%s" has been overridden in "%s" Composer '
303 | 'environment.', airflow_config_overrides, fully_qualified_name)
304 | except errors.HttpError:
305 | logging.exception(
306 | 'Error occurred while overriding Airflow configurations.')
307 | raise Error('Error occurred while overriding Airflow configurations.')
308 |
309 | def get_environment(self, environment_name: str) -> Dict[str, Any]:
310 | """Retrieves details of a Composer environment.
311 |
312 | Args:
313 | environment_name: Name of the existing Composer environment. The fully
314 | qualified environment name will be constructed as follows -
315 | 'projects/{project_id}/locations/{location}/environments/
316 | {environment_name}'.
317 |
318 | Returns:
319 | environment: Details of Composer environment.
320 |
321 | Raises:
322 | Error: If the request was not processed successfully.
323 | """
324 | fully_qualified_name = self._get_fully_qualified_env_name(environment_name)
325 | logging.info('Retrieving Composer environment details for "%s"',
326 | fully_qualified_name)
327 | try:
328 | request = self.client.projects().locations().environments().get(
329 | name=fully_qualified_name)
330 | composer_environment_details = utils.execute_request(request)
331 | return composer_environment_details
332 | except errors.HttpError:
333 | logging.exception('Error while retrieving Composer environment details.')
334 | raise Error('Error while retrieving Composer environment details.')
335 |
336 | def get_dags_folder(self, environment_name: str) -> str:
337 | """Returns Cloud Storage URL for DAGs folder for a Composer environment.
338 |
339 | Args:
340 | environment_name: Name of the existing Composer environment. The fully
341 | qualified environment name will be constructed as follows -
342 | 'projects/{project_id}/locations/{location}/environments/
343 | {environment_name}'.
344 |
345 | Returns:
346 | The Cloud Storage DAGs URL.
347 | """
348 | environment_details = self.get_environment(environment_name)
349 | environment_config = environment_details[self._CONFIG_KEY]
350 | return environment_config[self._DAG_FOLDER_KEY]
351 |
--------------------------------------------------------------------------------
/cloud_data_transfer.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 Google LLC..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # python3
17 | """Module for managing BigQuery data transfers."""
18 |
19 | import datetime
20 | import logging
21 | import time
22 | from typing import Any, Dict
23 |
24 | import auth
25 | import config_parser
26 | from google.cloud import bigquery_datatransfer
27 | from google.protobuf import struct_pb2
28 | from google.protobuf import timestamp_pb2
29 | import pytz
30 |
31 | _MERCHANT_CENTER_ID = 'merchant_center' # Data source id for Merchant Center.
32 | _GOOGLE_ADS_ID = 'adwords' # Data source id for Google Ads.
33 | _SLEEP_SECONDS = 60 # Seconds to sleep before checking resource status.
34 | _MAX_POLL_COUNTER = 100
35 | _PENDING_STATE = 2
36 | _RUNNING_STATE = 3
37 | _SUCCESS_STATE = 4
38 | _FAILED_STATE = 5
39 | _CANCELLED_STATE = 6
40 |
41 |
42 | class Error(Exception):
43 | """Base error for this module."""
44 |
45 |
46 | class DataTransferError(Error):
47 | """An exception to be raised when data transfer was not successful."""
48 |
49 |
50 | class CloudDataTransferUtils(object):
51 | """This class provides methods to manage BigQuery data transfers.
52 |
53 | Typical usage example:
54 | >>> data_transfer = CloudDataTransferUtils('project_id')
55 | >>> data_transfer.create_merchant_center_transfer(12345, 'dataset_id')
56 | """
57 |
58 | def __init__(self, project_id: str):
59 | """Initialise new instance of CloudDataTransferUtils.
60 |
61 | Args:
62 | project_id: GCP project id.
63 | """
64 | self.project_id = project_id
65 | self.client = bigquery_datatransfer.DataTransferServiceClient()
66 |
67 | def wait_for_transfer_completion(self, transfer_config: Dict[str,
68 | Any]) -> None:
69 | """Waits for the completion of data transfer operation.
70 |
71 | This method retrieves data transfer operation and checks for its status. If
72 | the operation is not completed, then the operation is re-checked after
73 | `_SLEEP_SECONDS` seconds.
74 |
75 | Args:
76 | transfer_config: Resource representing data transfer.
77 |
78 | Raises:
79 | DataTransferError: If the data transfer is not successfully completed.
80 | """
81 | transfer_config_name = transfer_config.name
82 | transfer_config_id = transfer_config_name.split('/')[-1]
83 | poll_counter = 0 # Counter to keep polling count.
84 | while True:
85 | transfer_config_path = ('projects/' + self.project_id + '/locations/' +
86 | config_parser.get_dataset_location() +
87 | '/transferConfigs/' + transfer_config_id)
88 | response = self.client.list_transfer_runs(
89 | {'parent': transfer_config_path})
90 | latest_transfer = None
91 | for transfer in response:
92 | latest_transfer = transfer
93 | break
94 | if not latest_transfer:
95 | return
96 | if latest_transfer.state == _SUCCESS_STATE:
97 | logging.info('Transfer %s was successful.', transfer_config_name)
98 | return
99 | if (latest_transfer.state == _FAILED_STATE or
100 | latest_transfer.state == _CANCELLED_STATE):
101 | error_message = (f'Transfer {transfer_config_name} was not successful. '
102 | f'Error - {latest_transfer.error_status}')
103 | logging.error(error_message)
104 | raise DataTransferError(error_message)
105 | logging.info(
106 | 'Transfer %s still in progress. Sleeping for %s seconds before '
107 | 'checking again.', transfer_config_name, _SLEEP_SECONDS)
108 | time.sleep(_SLEEP_SECONDS)
109 | poll_counter += 1
110 | if poll_counter >= _MAX_POLL_COUNTER:
111 | error_message = (f'Transfer {transfer_config_name} is taking too long'
112 | ' to finish. Hence failing the request.')
113 | logging.error(error_message)
114 | raise DataTransferError(error_message)
115 |
116 | def _get_existing_transfer(self,
117 | data_source_id: str,
118 | destination_dataset_id: str = None,
119 | params: Dict[str, str] = None,
120 | name: str = None) -> bool:
121 | """Gets data transfer if it already exists.
122 |
123 | Args:
124 | data_source_id: Data source id.
125 | destination_dataset_id: BigQuery dataset id.
126 | params: Data transfer specific parameters.
127 | name: The display name of the transfer.
128 |
129 | Returns:
130 | Data Transfer if the transfer already exists.
131 | None otherwise.
132 | """
133 | dataset_location = config_parser.get_dataset_location()
134 | parent = 'projects/' + self.project_id + '/locations/' + dataset_location
135 | for transfer_config in self.client.list_transfer_configs({'parent': parent
136 | }):
137 | if transfer_config.data_source_id != data_source_id:
138 | continue
139 | if (destination_dataset_id and
140 | transfer_config.destination_dataset_id != destination_dataset_id):
141 | continue
142 | # If the transfer config is in Failed state, we should ignore.
143 | is_valid_state = transfer_config.state in (_PENDING_STATE, _RUNNING_STATE,
144 | _SUCCESS_STATE)
145 | params_match = self._check_params_match(transfer_config, params)
146 | name_matches = name is None or name == transfer_config.display_name
147 | if params_match and is_valid_state and name_matches:
148 | return transfer_config
149 | return None
150 |
151 | def _check_params_match(self,
152 | transfer_config: bigquery_datatransfer.TransferConfig,
153 | params: Dict[str, str]) -> bool:
154 | """Checks if given parameters are present in transfer config.
155 |
156 | Args:
157 | transfer_config: Data transfer configuration.
158 | params: Data transfer specific parameters.
159 |
160 | Returns:
161 | True if given parameters are present in transfer config, False otherwise.
162 | """
163 | if not params:
164 | return True
165 | for key, value in params.items():
166 | config_params = transfer_config.params
167 | if key not in config_params or config_params[key] != value:
168 | return False
169 | return True
170 |
171 | def _update_existing_transfer(
172 | self, transfer_config: bigquery_datatransfer.TransferConfig,
173 | params: Dict[str, str]) -> bigquery_datatransfer.TransferConfig:
174 | """Updates existing data transfer.
175 |
176 | If the parameters are already present in the config, then the transfer
177 | config update is skipped.
178 |
179 | Args:
180 | transfer_config: Data transfer configuration to update.
181 | params: Data transfer specific parameters.
182 |
183 | Returns:
184 | Updated data transfer config.
185 | """
186 | if self._check_params_match(transfer_config, params):
187 | logging.info(
188 | 'The data transfer config "%s" parameters match. Hence '
189 | 'skipping update.', transfer_config.display_name)
190 | return transfer_config
191 | new_transfer_config = bigquery_datatransfer.TransferConfig()
192 | new_transfer_config.CopyFrom(transfer_config)
193 | # Clear existing parameter values.
194 | new_transfer_config.params.Clear()
195 | for key, value in params.items():
196 | new_transfer_config.params[key] = value
197 | # Only params field is updated.
198 | update_mask = {'paths': ['params']}
199 | new_transfer_config = self.client.update_transfer_config(
200 | new_transfer_config, update_mask)
201 | logging.info('The data transfer config "%s" parameters updated.',
202 | new_transfer_config.display_name)
203 | return new_transfer_config
204 |
205 | def create_merchant_center_transfer(
206 | self, merchant_id: str, destination_dataset: str,
207 | enable_market_insights: bool) -> bigquery_datatransfer.TransferConfig:
208 | """Creates a new merchant center transfer.
209 |
210 | Merchant center allows retailers to store product info into Google. This
211 | method creates a data transfer config to copy the product data to BigQuery.
212 |
213 | Args:
214 | merchant_id: Google Merchant Center(GMC) account id.
215 | destination_dataset: BigQuery dataset id.
216 | enable_market_insights: Whether to deploy market insights solution.
217 |
218 | Returns:
219 | Transfer config.
220 | """
221 | logging.info('Creating Merchant Center Transfer.')
222 | parameters = struct_pb2.Struct()
223 | parameters['merchant_id'] = merchant_id
224 | parameters['export_products'] = True
225 | if enable_market_insights:
226 | parameters['export_price_benchmarks'] = True
227 | parameters['export_best_sellers'] = True
228 | data_transfer_config = self._get_existing_transfer(_MERCHANT_CENTER_ID,
229 | destination_dataset,
230 | parameters)
231 | if data_transfer_config:
232 | logging.info(
233 | 'Data transfer for merchant id %s to destination dataset %s '
234 | 'already exists.', merchant_id, destination_dataset)
235 | return self._update_existing_transfer(data_transfer_config, parameters)
236 | logging.info(
237 | 'Creating data transfer for merchant id %s to destination dataset %s',
238 | merchant_id, destination_dataset)
239 | has_valid_credentials = self._check_valid_credentials(_MERCHANT_CENTER_ID)
240 | authorization_code = None
241 | if not has_valid_credentials:
242 | authorization_code = self._get_authorization_code(_MERCHANT_CENTER_ID)
243 | dataset_location = config_parser.get_dataset_location()
244 | parent = 'projects/' + self.project_id + '/locations/' + dataset_location
245 | input_config = {
246 | 'display_name': f'Merchant Center Transfer - {merchant_id}',
247 | 'data_source_id': _MERCHANT_CENTER_ID,
248 | 'destination_dataset_id': destination_dataset,
249 | 'params': parameters,
250 | 'data_refresh_window_days': 0,
251 | }
252 | request = bigquery_datatransfer.CreateTransferConfigRequest(
253 | parent=parent,
254 | transfer_config=input_config,
255 | authorization_code=authorization_code,
256 | )
257 | transfer_config = self.client.create_transfer_config(request)
258 | logging.info(
259 | 'Data transfer created for merchant id %s to destination dataset %s',
260 | merchant_id, destination_dataset)
261 | return transfer_config
262 |
263 | def create_google_ads_transfer(
264 | self,
265 | customer_id: str,
266 | destination_dataset: str,
267 | backfill_days: int = 30) -> bigquery_datatransfer.TransferConfig:
268 | """Creates a new Google Ads transfer.
269 |
270 | This method creates a data transfer config to copy Google Ads data to
271 | BigQuery dataset.
272 |
273 | Args:
274 | customer_id: Google Ads customer id.
275 | destination_dataset: BigQuery dataset id.
276 | backfill_days: Number of days to backfill.
277 |
278 | Returns:
279 | Transfer config.
280 | """
281 | logging.info('Creating Google Ads Transfer.')
282 |
283 | parameters = struct_pb2.Struct()
284 | parameters['customer_id'] = customer_id
285 | data_transfer_config = self._get_existing_transfer(_GOOGLE_ADS_ID,
286 | destination_dataset,
287 | parameters)
288 | if data_transfer_config:
289 | logging.info(
290 | 'Data transfer for Google Ads customer id %s to destination dataset '
291 | '%s already exists.', customer_id, destination_dataset)
292 | return data_transfer_config
293 | logging.info(
294 | 'Creating data transfer for Google Ads customer id %s to destination '
295 | 'dataset %s', customer_id, destination_dataset)
296 | has_valid_credentials = self._check_valid_credentials(_GOOGLE_ADS_ID)
297 | authorization_code = None
298 | if not has_valid_credentials:
299 | authorization_code = self._get_authorization_code(_GOOGLE_ADS_ID)
300 | dataset_location = config_parser.get_dataset_location()
301 | parent = 'projects/' + self.project_id + '/locations/' + dataset_location
302 | input_config = {
303 | 'display_name': f'Google Ads Transfer - {customer_id}',
304 | 'data_source_id': _GOOGLE_ADS_ID,
305 | 'destination_dataset_id': destination_dataset,
306 | 'params': parameters,
307 | 'data_refresh_window_days': 1,
308 | }
309 | request = bigquery_datatransfer.CreateTransferConfigRequest(
310 | parent=parent,
311 | transfer_config=input_config,
312 | authorization_code=authorization_code,
313 | )
314 | transfer_config = self.client.create_transfer_config(request=request)
315 | logging.info(
316 | 'Data transfer created for Google Ads customer id %s to destination '
317 | 'dataset %s', customer_id, destination_dataset)
318 | if backfill_days:
319 | transfer_config_name = transfer_config.name
320 | transfer_config_id = transfer_config_name.split('/')[-1]
321 | start_time = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(
322 | days=backfill_days)
323 | end_time = datetime.datetime.now(tz=pytz.utc)
324 | start_time = start_time.replace(hour=0, minute=0, second=0, microsecond=0)
325 | end_time = end_time.replace(hour=0, minute=0, second=0, microsecond=0)
326 | transfer_config_path = parent + '/transferConfigs/' + transfer_config_id
327 | start_time_pb = timestamp_pb2.Timestamp()
328 | end_time_pb = timestamp_pb2.Timestamp()
329 | start_time_pb.FromDatetime(start_time)
330 | end_time_pb.FromDatetime(end_time)
331 | self.client.schedule_transfer_runs(
332 | parent=transfer_config_path,
333 | start_time=start_time_pb,
334 | end_time=end_time_pb)
335 | return transfer_config
336 |
337 | def schedule_query(self, name: str,
338 | query_string: str) -> bigquery_datatransfer.TransferConfig:
339 | """Schedules query to run every day.
340 |
341 | Args:
342 | name: Name of the scheduled query.
343 | query_string: The query to be run.
344 |
345 | Returns:
346 | Transfer config.
347 | """
348 | data_transfer_config = self._get_existing_transfer(
349 | 'scheduled_query', name=name)
350 | parameters = struct_pb2.Struct()
351 | parameters['query'] = query_string
352 | if data_transfer_config:
353 | logging.info('Data transfer for scheduling query "%s" already exists.',
354 | name)
355 | updated_transfer_config = self._update_existing_transfer(
356 | data_transfer_config, parameters)
357 | logging.info('Data transfer for scheduling query "%s" updated.', name)
358 | start_time_pb = timestamp_pb2.Timestamp()
359 | start_time = datetime.datetime.now(tz=pytz.utc)
360 | start_time_pb.FromDatetime(start_time)
361 | self.client.start_manual_transfer_runs(
362 | parent=updated_transfer_config.name, requested_run_time=start_time_pb)
363 | logging.info('One time manual run started. It might take up to 1 hour for'
364 | ' performance data to reflect on the dash.')
365 | return updated_transfer_config
366 | dataset_location = config_parser.get_dataset_location()
367 | has_valid_credentials = self._check_valid_credentials('scheduled_query')
368 | authorization_code = ''
369 | if not has_valid_credentials:
370 | authorization_code = self._get_authorization_code('scheduled_query')
371 | parent = 'projects/' + self.project_id + '/locations/' + dataset_location
372 | input_config = bigquery_datatransfer.TransferConfig(
373 | display_name=name,
374 | data_source_id='scheduled_query',
375 | params={'query': query_string},
376 | schedule='every 24 hours',
377 | )
378 | request = bigquery_datatransfer.CreateTransferConfigRequest(
379 | parent=parent,
380 | transfer_config=input_config,
381 | authorization_code=authorization_code,
382 | )
383 | transfer_config = self.client.create_transfer_config(request=request)
384 | return transfer_config
385 |
386 | def _get_data_source(self,
387 | data_source_id: str) -> bigquery_datatransfer.DataSource:
388 | """Returns data source.
389 |
390 | Args:
391 | data_source_id: Data source id.
392 | """
393 | dataset_location = config_parser.get_dataset_location()
394 | name = ('projects/' + self.project_id + '/locations/' + dataset_location +
395 | '/dataSources/' + data_source_id)
396 | return self.client.get_data_source({'name': name})
397 |
398 | def _check_valid_credentials(self, data_source_id: str) -> bool:
399 | """Returns true if valid credentials exist for the given data source.
400 |
401 | Args:
402 | data_source_id: Data source id.
403 | """
404 | dataset_location = config_parser.get_dataset_location()
405 | name = ('projects/' + self.project_id + '/locations/' + dataset_location +
406 | '/dataSources/' + data_source_id)
407 | response = self.client.check_valid_creds({'name': name})
408 | return response.has_valid_creds
409 |
410 | def _get_authorization_code(self, data_source_id: str) -> str:
411 | """Returns authorization code for a given data source.
412 |
413 | Args:
414 | data_source_id: Data source id.
415 | """
416 | data_source = self._get_data_source(data_source_id)
417 | client_id = data_source.client_id
418 | scopes = data_source.scopes
419 |
420 | if not data_source:
421 | raise AssertionError('Invalid data source')
422 | return auth.retrieve_authorization_code(client_id, scopes, data_source_id)
423 |
--------------------------------------------------------------------------------