├── .gitignore ├── images ├── copy_icon.png └── architecture.png ├── plugins └── cloud_utils │ ├── images │ └── module_overview.png │ ├── __init__.py │ ├── utils.py │ ├── cloud_api.py │ ├── cloud_auth.py │ ├── cloud_storage.py │ └── cloud_composer.py ├── requirements.txt ├── config.yaml ├── setup.sh ├── scripts ├── 3_customer_view.sql ├── targeted_products │ ├── targeted_product_ddl.sql │ └── construct_parsed_criteria.sql ├── 2_product_metrics_view.sql ├── materialize_product_historical.sql ├── market_insights │ ├── historical_view.sql │ ├── best_sellers_workflow.sql │ └── snapshot_view.sql ├── 1_product_view.sql ├── 4_product_detailed_view.sql ├── materialize_product_detailed.sql └── main_workflow.sql ├── CONTRIBUTING.md ├── config_parser.py ├── auth.py ├── data └── language_codes.csv ├── cloud_env_setup.py ├── cloud_bigquery.py ├── README.md ├── LICENSE └── cloud_data_transfer.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache* 2 | -------------------------------------------------------------------------------- /images/copy_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/shopping-markup/HEAD/images/copy_icon.png -------------------------------------------------------------------------------- /images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/shopping-markup/HEAD/images/architecture.png -------------------------------------------------------------------------------- /plugins/cloud_utils/images/module_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/shopping-markup/HEAD/plugins/cloud_utils/images/module_overview.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | google-cloud-bigquery==3.4.2 2 | google-api-python-client==2.74.0 3 | google-cloud-bigquery-datatransfer==3.10.1 4 | grpcio==1.51.1 5 | grpcio-tools==1.51.1 6 | PyYAML==6.0 7 | pytz==2022.7.1 -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # The list of configurations used by MarkUp installation process. 17 | 18 | # The BigQuery dataset location. 19 | LOCATION: us 20 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #!/bin/bash 16 | # MarkUp setup script. 17 | 18 | set -e 19 | 20 | VIRTUALENV_PATH=$HOME/"markup-venv" 21 | 22 | # Create virtual environment with python3 23 | if [[ ! -d "${VIRTUALENV_PATH}" ]]; then 24 | virtualenv -p python3 "${VIRTUALENV_PATH}" 25 | fi 26 | 27 | 28 | # Activate virtual environment. 29 | source ~/markup-venv/bin/activate 30 | 31 | # Install dependencies. 32 | pip install -r requirements.txt 33 | 34 | # Setup cloud environment. 35 | PYTHONPATH=src/plugins:$PYTHONPATH 36 | export PYTHONPATH 37 | python cloud_env_setup.py "$@" 38 | -------------------------------------------------------------------------------- /scripts/3_customer_view.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | CREATE OR REPLACE VIEW `{project_id}.{dataset}.customer_view` 16 | AS ( 17 | WITH 18 | LatestDate AS ( 19 | SELECT 20 | MAX(_DATA_DATE) AS latest_date 21 | FROM 22 | `{project_id}.{dataset}.Customer_{external_customer_id}` 23 | ) 24 | SELECT DISTINCT 25 | _DATA_DATE AS data_date, 26 | LatestDate.latest_date, 27 | ExternalCustomerId, 28 | AccountDescriptiveName 29 | FROM 30 | `{project_id}.{dataset}.Customer_{external_customer_id}`, 31 | LatestDate 32 | ); 33 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows 28 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 29 | -------------------------------------------------------------------------------- /plugins/cloud_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Copyright 2020 Google LLC 17 | # 18 | # Licensed under the Apache License, Version 2.0 (the "License"); 19 | # you may not use this file except in compliance with the License. 20 | # You may obtain a copy of the License at 21 | # 22 | # http://www.apache.org/licenses/LICENSE-2.0 23 | # 24 | # Unless required by applicable law or agreed to in writing, software 25 | # distributed under the License is distributed on an "AS IS" BASIS, 26 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 27 | # See the License for the specific language governing permissions and 28 | # limitations under the License. 29 | 30 | -------------------------------------------------------------------------------- /config_parser.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # python3 17 | """Config parser module. 18 | 19 | This module retrieves config values. 20 | """ 21 | 22 | import functools 23 | import yaml 24 | 25 | 26 | @functools.lru_cache() 27 | def _get_config(config_key: str) -> str: 28 | """Returns value for a given config key. 29 | 30 | The config values are retrieved from "config.yaml" file for the 31 | first invocation of config key. The subsequent invocation returns 32 | the value from cache. 33 | 34 | Args: 35 | config_key: The key to retrieve a value for. 36 | 37 | Returns: 38 | Value for key from config file. 39 | """ 40 | 41 | with open('config.yaml') as config_file: 42 | configs = yaml.safe_load(config_file) 43 | return configs[config_key] 44 | 45 | 46 | def get_dataset_location() -> str: 47 | """Returns the dataset location.""" 48 | return _get_config('LOCATION') 49 | -------------------------------------------------------------------------------- /auth.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # python3 17 | """Contains cloud authentication related functionality.""" 18 | 19 | import logging 20 | from typing import List 21 | from urllib import parse 22 | 23 | BASE_URL = 'https://www.gstatic.com/bigquerydatatransfer/oauthz/auth' 24 | REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob' 25 | 26 | 27 | def retrieve_authorization_code(client_id: str, scopes: List[str], 28 | app_name: str): 29 | """Returns authorization code. 30 | 31 | Args: 32 | client_id: The client id. 33 | scopes: The list of scopes. 34 | app_name: Name of the app. 35 | """ 36 | scopes_str = ' '.join(scopes) 37 | authorization_code_request = { 38 | 'client_id': client_id, 39 | 'scope': scopes_str, 40 | 'redirect_uri': REDIRECT_URI 41 | } 42 | 43 | encoded_request = parse.urlencode( 44 | authorization_code_request, quote_via=parse.quote) 45 | url = f'{BASE_URL}?{encoded_request}' 46 | logging.info( 47 | 'Please click on the URL below to authorize %s and paste the ' 48 | 'authorization code.', app_name) 49 | logging.info('URL - %s', url) 50 | 51 | return input('Authorization Code : ') 52 | -------------------------------------------------------------------------------- /scripts/targeted_products/targeted_product_ddl.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -- DDL definition for TargetedProduct table. 16 | -- 17 | -- Using `CREATE IF NOT EXISTS` as the table is used in `product_detailed` views. 18 | CREATE OR REPLACE TABLE `{project_id}.{dataset}.TargetedProduct_{external_customer_id}` 19 | ( 20 | data_date DATE, 21 | product_id STRING, 22 | merchant_id INT64, 23 | target_country STRING 24 | ); 25 | 26 | -- DDL definition for ParsedCriteria table. 27 | CREATE OR REPLACE TABLE `{project_id}.{dataset}.ParsedCriteria_{external_customer_id}` 28 | ( 29 | criteria STRING, 30 | custom_label0 STRING, 31 | custom_label1 STRING, 32 | custom_label2 STRING, 33 | custom_label3 STRING, 34 | custom_label4 STRING, 35 | product_type_l1 STRING, 36 | product_type_l2 STRING, 37 | product_type_l3 STRING, 38 | product_type_l4 STRING, 39 | product_type_l5 STRING, 40 | google_product_category_l1 STRING, 41 | google_product_category_l2 STRING, 42 | google_product_category_l3 STRING, 43 | google_product_category_l4 STRING, 44 | google_product_category_l5 STRING, 45 | brand STRING, 46 | offer_id STRING, 47 | channel STRING, 48 | channel_exclusivity STRING, 49 | condition STRING 50 | ); 51 | -------------------------------------------------------------------------------- /scripts/2_product_metrics_view.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | CREATE OR REPLACE VIEW `{project_id}.{dataset}.product_metrics_view` 16 | AS ( 17 | WITH 18 | CountryTable AS ( 19 | SELECT DISTINCT 20 | parent_id AS country_criterion, 21 | country_code 22 | FROM 23 | `{project_id}.{dataset}.geo_targets` 24 | ), 25 | LanguageTable AS ( 26 | SELECT DISTINCT 27 | criterion_id AS language_criterion, 28 | language_code 29 | FROM 30 | `{project_id}.{dataset}.language_codes` 31 | ) 32 | SELECT 33 | _DATA_DATE AS data_date, 34 | externalcustomerid, 35 | merchantid, 36 | channel, 37 | LanguageTable.language_code, 38 | CountryTable.country_code, 39 | offerid AS offer_id, 40 | SUM(impressions) AS impressions, 41 | SUM(clicks) AS clicks, 42 | SUM(cost) AS cost, 43 | SUM(conversions) AS conversions, 44 | SUM(ConversionValue) AS conversions_value 45 | FROM 46 | `{project_id}.{dataset}.ShoppingProductStats_{external_customer_id}` 47 | AS ShoppingProductStats 48 | INNER JOIN 49 | CountryTable 50 | ON CountryTable.country_criterion = ShoppingProductStats.countrycriteriaid 51 | INNER JOIN 52 | LanguageTable 53 | ON CAST(LanguageTable.language_criterion AS STRING) = ShoppingProductStats.languagecriteriaid 54 | GROUP BY 55 | _DATA_DATE, 56 | externalcustomerid, 57 | merchantid, 58 | channel, 59 | LanguageTable.language_code, 60 | CountryTable.country_code, 61 | offer_id 62 | ); 63 | -------------------------------------------------------------------------------- /scripts/materialize_product_historical.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -- Stored procedure for creating historic snapshot at a product category level. 16 | 17 | CREATE OR REPLACE PROCEDURE `{project_id}.{dataset}.product_historical_proc`() 18 | BEGIN 19 | CREATE OR REPLACE TABLE `{project_id}.{dataset}.product_historical_materialized` 20 | AS ( 21 | SELECT 22 | data_date, 23 | account_id, 24 | product_type_l1, 25 | product_type_l2, 26 | product_type_l3, 27 | product_type_l4, 28 | product_type_l5, 29 | target_country, 30 | channel, 31 | COUNT(DISTINCT unique_product_id) AS total_products, 32 | COUNT(DISTINCT IF(is_approved = 1, unique_product_id, NULL)) AS total_approved, 33 | COUNT(DISTINCT IF(funnel_in_stock = 1, unique_product_id, NULL)) AS total_in_stock, 34 | COUNT(DISTINCT IF(funnel_targeted = 1, unique_product_id, NULL)) AS total_targeted, 35 | COUNT(DISTINCT IF(funnel_has_impression = 1, unique_product_id, NULL)) AS total_products_with_impressions_in_30_days, 36 | COUNT(DISTINCT IF(funnel_has_clicks = 1, unique_product_id, NULL)) AS total_products_with_clicks_in_30_days, 37 | IFNULL(SUM(impressions_30_days), 0) AS total_impressions_30_days, 38 | IFNULL(SUM(clicks_30_days), 0) AS total_clicks_30_days, 39 | IFNULL(SUM(cost_30_days), 0) AS total_cost_30_days 40 | FROM 41 | `{project_id}.{dataset}.product_detailed_view` 42 | WHERE 43 | data_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY) 44 | GROUP BY 45 | data_date, 46 | account_id, 47 | product_type_l1, 48 | product_type_l2, 49 | product_type_l3, 50 | product_type_l4, 51 | product_type_l5, 52 | target_country, 53 | channel 54 | ); 55 | END; 56 | 57 | CALL `{project_id}.{dataset}.product_historical_proc`(); 58 | -------------------------------------------------------------------------------- /scripts/market_insights/historical_view.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -- Creates a historical view for Performance, Status, Price & Price Benchmarks. 16 | CREATE OR REPLACE VIEW `{project_id}.{dataset}.market_insights_historical_view` AS ( 17 | SELECT 18 | data_date, 19 | unique_product_id, 20 | target_country, 21 | price, 22 | price_currency, 23 | sale_price, 24 | sale_price_currency, 25 | price_benchmark_value, 26 | price_benchmark_currency, 27 | price_benchmark_timestamp, 28 | CASE 29 | WHEN price_benchmark_value IS NULL THEN '' 30 | WHEN (SAFE_DIVIDE(price, price_benchmark_value) - 1) < -0.01 THEN 'Less than PB' -- ASSUMPTION: Enter % as a decimal here 31 | WHEN (SAFE_DIVIDE(price, price_benchmark_value) - 1) > 0.01 THEN 'More than PB' -- ASSUMPTION: Enter % as a decimal here 32 | ELSE 'Equal to PB' 33 | END AS price_competitiveness_band, 34 | SAFE_DIVIDE(price, price_benchmark_value) - 1 AS price_vs_benchmark, 35 | SAFE_DIVIDE(price, price_benchmark_value) - 1 AS sale_price_vs_benchmark, 36 | FROM ( 37 | SELECT DISTINCT 38 | _PARTITIONDATE as data_date, 39 | CONCAT(CAST(merchant_id AS STRING), '|', product_id) AS unique_product_id, 40 | target_country, 41 | price.value AS price, 42 | price.currency as price_currency, 43 | sale_price.value AS sale_price, 44 | sale_price.currency AS sale_price_currency, 45 | FROM `{project_id}.{dataset}.Products_{merchant_id}` AS Products, 46 | Products.destinations, 47 | UNNEST(ARRAY_CONCAT(destinations.approved_countries, destinations.pending_countries, destinations.disapproved_countries)) AS target_country 48 | ) 49 | LEFT JOIN ( 50 | SELECT 51 | _PARTITIONDATE as data_date, 52 | CONCAT(CAST(merchant_id AS STRING), '|', product_id) AS unique_product_id, 53 | country_of_sale as target_country, 54 | price_benchmark_value, 55 | price_benchmark_currency, 56 | price_benchmark_timestamp 57 | FROM `{project_id}.{dataset}.Products_PriceBenchmarks_{merchant_id}` 58 | ) 59 | USING (data_date, unique_product_id, target_country) 60 | ) 61 | -------------------------------------------------------------------------------- /scripts/market_insights/best_sellers_workflow.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | CREATE OR REPLACE TABLE `{project_id}.{dataset}.market_insights_best_sellers_materialized` AS ( 16 | WITH 17 | best_sellers AS ( 18 | SELECT 19 | _PARTITIONDATE as data_date, 20 | rank_id, 21 | rank, 22 | previous_rank, 23 | ranking_country, 24 | ranking_category, 25 | ranking_category_path.name as ranking_category_path, 26 | IF( 27 | ARRAY_LENGTH(SPLIT(ranking_category_path.name, ' > ')) = 1, 28 | ranking_category_path.name, 29 | NULL 30 | ) as ranking_category_name_l1, 31 | IF( 32 | ARRAY_LENGTH(SPLIT(ranking_category_path.name, ' > ')) = 2, 33 | ranking_category_path.name, 34 | NULL 35 | ) as ranking_category_name_l2, 36 | IF( 37 | ARRAY_LENGTH(SPLIT(ranking_category_path.name, ' > ')) = 3, 38 | ranking_category_path.name, 39 | NULL 40 | ) as ranking_category_name_l3, 41 | (SELECT ANY_VALUE(name) FROM b.product_title) AS product_title, 42 | gtins, 43 | brand, 44 | google_product_category_path.name as google_product_category_path, 45 | google_product_category, 46 | price_range.min, 47 | price_range.max, 48 | price_range.currency, 49 | FROM 50 | `{project_id}.{dataset}.BestSellers_TopProducts_{merchant_id}` b 51 | JOIN b.google_product_category_path google_product_category_path 52 | JOIN b.ranking_category_path ranking_category_path 53 | JOIN b.product_title product_title 54 | WHERE 55 | _PARTITIONDATE = DATE_SUB(CURRENT_DATE(), INTERVAL 2 DAY) 56 | # Adjust as necessary for other locales 57 | AND (product_title.locale IN ("en-US") OR product_title.locale IS NULL) 58 | AND google_product_category_path.locale = "en-US" 59 | AND ranking_category_path.locale = "en-US" 60 | ), 61 | inventory AS ( 62 | SELECT DISTINCT 63 | rank_id 64 | FROM 65 | `{project_id}.{dataset}.BestSellers_TopProducts_Inventory_{merchant_id}` 66 | WHERE 67 | _PARTITIONDATE = DATE_SUB(CURRENT_DATE(), INTERVAL 2 DAY) 68 | ) 69 | SELECT 70 | best_sellers.*, 71 | IF(inventory.rank_id IS NULL, False, True) AS is_in_inventory, 72 | FROM 73 | best_sellers 74 | LEFT JOIN 75 | inventory 76 | USING (rank_id) 77 | ); 78 | -------------------------------------------------------------------------------- /data/language_codes.csv: -------------------------------------------------------------------------------- 1 | Language name,Language code,Criterion ID 2 | Arabic,ar,1019 3 | Bengali,bn,1056 4 | Brazilian Portuguese,pt_BR,1016 5 | Bulgarian,bg,1020 6 | Catalan,ca,1038 7 | Chinese,zh,1008 8 | Chinese (simplified),zh_CN,1017 9 | Chinese (traditional),zh_TW,1018 10 | Croatian,hr,1039 11 | Czech,cs,1021 12 | Danish,da,1009 13 | Dutch,nl,1010 14 | English,en,1000 15 | English (US),en_US,1006 16 | English (GB),en_GB,1007 17 | Estonian,et,1043 18 | Filipino,tl,1042 19 | Finnish,fi,1011 20 | French,fr,1002 21 | German,de,1001 22 | Greek,el,1022 23 | Hebrew,iw,1027 24 | Hindi,hi,1023 25 | Hungarian,hu,1024 26 | Icelandic,is,1026 27 | Indonesian,id,1025 28 | Italian,it,1004 29 | Japanese,ja,1005 30 | Korean,ko,1012 31 | Latvian,lv,1028 32 | Lithuanian,lt,1029 33 | Malay,ms,1102 34 | Norwegian,no,1013 35 | Persian,fa,1064 36 | Polish,pl,1030 37 | Portuguese,pt,1014 38 | Romanian,ro,1032 39 | Russian,ru,1031 40 | Serbian,sr,1035 41 | Slovak,sk,1033 42 | Slovenian,sl,1034 43 | Spanish,es,1003 44 | Swedish,sv,1015 45 | Tamil,ta,1130 46 | Telugu,te,1131 47 | Thai,th,1044 48 | Turkish,tr,1037 49 | Ukrainian,uk,1036 50 | Urdu,ur,1041 51 | Vietnamese,vi,1040 52 | N/A,aa,1045 53 | N/A,ab,1046 54 | N/A,af,1047 55 | N/A,am,1048 56 | N/A,as,1049 57 | N/A,ay,1050 58 | N/A,az,1051 59 | N/A,ba,1052 60 | N/A,be,1053 61 | N/A,bh,1054 62 | N/A,bi,1055 63 | N/A,bn,1056 64 | N/A,bo,1057 65 | N/A,br,1058 66 | N/A,co,1059 67 | N/A,cy,1060 68 | N/A,nn,1061 69 | N/A,eo,1062 70 | N/A,eu,1063 71 | N/A,fa,1064 72 | N/A,fj,1065 73 | N/A,fo,1066 74 | N/A,fy,1067 75 | N/A,ga,1068 76 | N/A,gd,1069 77 | N/A,gl,1070 78 | N/A,gn,1071 79 | N/A,gu,1072 80 | N/A,gv,1073 81 | N/A,ha,1074 82 | N/A,bs,1075 83 | N/A,hy,1076 84 | N/A,ia,1077 85 | N/A,ie,1078 86 | N/A,ik,1079 87 | N/A,iu,1080 88 | N/A,jv,1081 89 | N/A,ka,1082 90 | N/A,kk,1083 91 | N/A,kl,1084 92 | N/A,km,1085 93 | N/A,kn,1086 94 | N/A,ks,1087 95 | N/A,ku,1088 96 | N/A,kw,1089 97 | N/A,ky,1090 98 | N/A,la,1091 99 | N/A,lb,1092 100 | N/A,ln,1093 101 | N/A,lo,1094 102 | N/A,mg,1095 103 | N/A,mi,1096 104 | N/A,mk,1097 105 | N/A,ml,1098 106 | N/A,mn,1099 107 | N/A,mo,1100 108 | N/A,mr,1101 109 | N/A,ms,1102 110 | N/A,mt,1103 111 | N/A,my,1104 112 | N/A,na,1105 113 | N/A,ne,1106 114 | N/A,oc,1107 115 | N/A,om,1108 116 | N/A,or,1109 117 | N/A,pa,1110 118 | N/A,ps,1111 119 | N/A,qu,1112 120 | N/A,rm,1113 121 | N/A,rn,1114 122 | N/A,rw,1115 123 | N/A,sa,1116 124 | N/A,sd,1117 125 | N/A,se,1118 126 | N/A,sg,1119 127 | N/A,zu,1120 128 | N/A,si,1121 129 | N/A,sm,1122 130 | N/A,sn,1123 131 | N/A,so,1124 132 | N/A,sq,1125 133 | N/A,ss,1126 134 | N/A,st,1127 135 | N/A,su,1128 136 | N/A,sw,1129 137 | N/A,ta,1130 138 | N/A,te,1131 139 | N/A,tg,1132 140 | N/A,ti,1133 141 | N/A,tk,1134 142 | N/A,tn,1135 143 | N/A,to,1136 144 | N/A,ts,1137 145 | N/A,tt,1138 146 | N/A,tw,1139 147 | N/A,ug,1140 148 | N/A,uz,1141 149 | N/A,vo,1142 150 | N/A,wo,1143 151 | N/A,xh,1144 152 | N/A,yi,1145 153 | N/A,yo,1146 154 | N/A,za,1147 155 | N/A,en_AU,1148 156 | N/A,pt_PT,1149 157 | N/A,zh_HK,1150 -------------------------------------------------------------------------------- /scripts/market_insights/snapshot_view.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -- Creates a latest snapshot view with Best Sellers & Price Benchmarks 16 | CREATE OR REPLACE VIEW `{project_id}.{dataset}.market_insights_snapshot_view` AS ( 17 | WITH 18 | price_benchmarks AS ( 19 | SELECT 20 | pb.data_date AS data_date, 21 | pb.unique_product_id, 22 | pb.target_country, 23 | pb.price_benchmark_value, 24 | pb.price_benchmark_currency, 25 | pb.price_benchmark_timestamp, 26 | CASE 27 | WHEN pb.price_benchmark_value IS NULL THEN '' 28 | WHEN (SAFE_DIVIDE(product.effective_price_value, pb.price_benchmark_value) - 1) < -0.01 29 | THEN 'Less than PB' -- ASSUMPTION: Enter % as a decimal here 30 | WHEN (SAFE_DIVIDE(product.effective_price_value, pb.price_benchmark_value) - 1) > 0.01 31 | THEN 'More than PB' -- ASSUMPTION: Enter % as a decimal here 32 | ELSE 'Equal to PB' 33 | END AS price_competitiveness_band, 34 | SAFE_DIVIDE(product.effective_price_value, pb.price_benchmark_value) - 1 AS price_vs_benchmark, 35 | product.effective_price_value AS effective_price, 36 | FROM ( 37 | SELECT 38 | unique_product_id, 39 | target_country, 40 | latest_date, 41 | IF( 42 | sale_price_effective_start_date <= CURRENT_TIMESTAMP() 43 | AND sale_price_effective_end_date > CURRENT_TIMESTAMP(), 44 | sale_price.value, 45 | price.value) AS effective_price_value 46 | FROM 47 | `{project_id}.{dataset}.product_detailed_materialized` 48 | ) AS product 49 | INNER JOIN ( 50 | SELECT 51 | _PARTITIONDATE as data_date, 52 | CONCAT(CAST(merchant_id AS STRING), '|', product_id) AS unique_product_id, 53 | country_of_sale as target_country, 54 | price_benchmark_value, 55 | price_benchmark_currency, 56 | price_benchmark_timestamp 57 | FROM `{project_id}.{dataset}.Products_PriceBenchmarks_{merchant_id}` 58 | ) pb 59 | ON 60 | product.unique_product_id = pb.unique_product_id 61 | AND product.target_country = pb.target_country 62 | AND product.latest_date = pb.data_date 63 | ), 64 | best_sellers AS ( 65 | SELECT DISTINCT 66 | _PARTITIONDATE AS data_date, 67 | CONCAT(CAST(merchant_id AS STRING), '|', product_id) AS unique_product_id, 68 | SPLIT(rank_id, ':')[SAFE_ORDINAL(2)] AS target_country, 69 | TRUE as is_best_seller, 70 | FROM 71 | `{project_id}.{dataset}.BestSellers_TopProducts_Inventory_{merchant_id}` 72 | WHERE _PARTITIONDATE = DATE_SUB(CURRENT_DATE(), INTERVAL 2 DAY) 73 | ) 74 | SELECT 75 | product, 76 | price_benchmarks, 77 | best_sellers, 78 | FROM ( 79 | SELECT 80 | *, 81 | IF( 82 | sale_price_effective_start_date <= CURRENT_TIMESTAMP() 83 | AND sale_price_effective_end_date > CURRENT_TIMESTAMP(), 84 | sale_price.value, 85 | price.value) AS effective_price, 86 | FROM `{project_id}.{dataset}.product_detailed_materialized` 87 | ) AS product 88 | LEFT JOIN price_benchmarks 89 | ON product.unique_product_id = price_benchmarks.unique_product_id 90 | AND product.target_country = price_benchmarks.target_country 91 | AND product.latest_date = price_benchmarks.data_date 92 | LEFT JOIN best_sellers 93 | ON product.unique_product_id = best_sellers.unique_product_id 94 | AND product.target_country = best_sellers.target_country 95 | AND product.latest_date = best_sellers.data_date 96 | ); 97 | -------------------------------------------------------------------------------- /plugins/cloud_utils/utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Lint as: python3 17 | # Copyright 2020 Google LLC 18 | # 19 | # Licensed under the Apache License, Version 2.0 (the "License"); 20 | # you may not use this file except in compliance with the License. 21 | # You may obtain a copy of the License at 22 | # 23 | # http://www.apache.org/licenses/LICENSE-2.0 24 | # 25 | # Unless required by applicable law or agreed to in writing, software 26 | # distributed under the License is distributed on an "AS IS" BASIS, 27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 28 | # See the License for the specific language governing permissions and 29 | # limitations under the License. 30 | 31 | """A utility module for cloud_utils package. 32 | 33 | This module implements the common methods required for different modules under 34 | cloud_utils package. 35 | """ 36 | 37 | import time 38 | from typing import Any, Dict, Text 39 | 40 | import logging 41 | import apiclient 42 | from googleapiclient import errors 43 | from googleapiclient import http 44 | 45 | from google.api_core import retry 46 | 47 | 48 | # Number of seconds to wait before re-checking for operation status. 49 | _WAIT_FOR_OPERATION_SLEEP_SECONDS = 10 50 | _RETRIABLE_STATUS_CODES = ( 51 | 429, # Too Many Requests 52 | 500, # Internal Server Error 53 | 503) # Service Unavailable 54 | 55 | 56 | class Error(Exception): 57 | """A generic error thrown for any exception in utils module.""" 58 | pass 59 | 60 | 61 | def _is_retriable_http_error(error: errors.HttpError) -> bool: 62 | """Checks if HttpError is in _RETRIABLE_STATUS_CODES. 63 | 64 | This function requires HttpError to have a valid response. 65 | 66 | Args: 67 | error: The http error to check. 68 | 69 | Returns: 70 | True if HttpError is retriable, otherwise False. 71 | """ 72 | if error.__dict__['resp'].status in _RETRIABLE_STATUS_CODES: 73 | return True 74 | return False 75 | 76 | 77 | @retry.Retry(predicate=_is_retriable_http_error) 78 | def execute_request(request: http.HttpRequest) -> Any: 79 | """Executes an HTTP request and return its response. 80 | 81 | This method executes an HTTP request and retries if the response error code is 82 | capable of being retried. Refer to `retry_utils.py` module to see the list of 83 | error codes for which the retry will be attempted. 84 | 85 | Args: 86 | request: HTTP request to be executed. 87 | 88 | Returns: 89 | response: Response from the HTTP request. 90 | """ 91 | response = request.execute() 92 | return response 93 | 94 | 95 | def wait_for_operation(operation_client: apiclient.discovery.Resource, 96 | operation: Dict[Text, Any]) -> None: 97 | """Waits for the completion of operation. 98 | 99 | This method retrieves operation resource and checks for its status. If the 100 | operation is not completed, then the operation is re-checked after 101 | `_WAIT_FOR_OPERATION_SLEEP_SECONDS` seconds. 102 | 103 | Args: 104 | operation_client: Client with methods for interacting with the operation 105 | APIs. The `build_service_client` method from `cloud_auth` module can be 106 | used to build the client. 107 | operation: Resource representing long running operation. 108 | 109 | Raises: 110 | Error: If the operation is not successfully completed. 111 | """ 112 | while True: 113 | request = operation_client.get(name=operation['name']) 114 | updated_operation = execute_request(request) 115 | if updated_operation.get('done'): 116 | logging.info(f'Operation {operation["name"]} successfully completed.') 117 | return 118 | if updated_operation.get('error'): 119 | logging.info( 120 | f'Operation {operation["name"]} failed to complete successfully.') 121 | raise Error( 122 | f'Operation {operation["name"]} not completed. Error Details - ' 123 | f'{updated_operation["error"]}') 124 | logging.info( 125 | f'Operation {operation["name"]} still in progress. Sleeping for ' 126 | f'{_WAIT_FOR_OPERATION_SLEEP_SECONDS} seconds before retrying.') 127 | time.sleep(_WAIT_FOR_OPERATION_SLEEP_SECONDS) 128 | -------------------------------------------------------------------------------- /plugins/cloud_utils/cloud_api.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Lint as: python3 17 | # Copyright 2020 Google LLC 18 | # 19 | # Licensed under the Apache License, Version 2.0 (the "License"); 20 | # you may not use this file except in compliance with the License. 21 | # You may obtain a copy of the License at 22 | # 23 | # http://www.apache.org/licenses/LICENSE-2.0 24 | # 25 | # Unless required by applicable law or agreed to in writing, software 26 | # distributed under the License is distributed on an "AS IS" BASIS, 27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 28 | # See the License for the specific language governing permissions and 29 | # limitations under the License. 30 | 31 | """Wrapper for Cloud APIs.""" 32 | 33 | import json 34 | from typing import Any, List 35 | 36 | import logging 37 | from googleapiclient import errors 38 | import requests.exceptions 39 | 40 | from google.auth.transport.requests import AuthorizedSession 41 | 42 | from plugins.cloud_utils import cloud_auth 43 | from plugins.cloud_utils import utils 44 | 45 | _SERVICE_URL = 'https://serviceusage.googleapis.com/v1/projects' 46 | 47 | 48 | class Error(Exception): 49 | """A generic error thrown for any exceptions in cloud_api module.""" 50 | pass 51 | 52 | 53 | class CloudApiUtils(object): 54 | """CloudApiUtils class provides methods to manage Cloud Services. 55 | 56 | This class manages Cloud APIs within a single GCP project. 57 | 58 | Typical usage example: 59 | >>> ca_utils = CloudApiUtils('project_id', 60 | 'service_account_key_file.json') 61 | >>> ca_utils.enable_apis(['storage-component.googleapis.com']) 62 | """ 63 | 64 | def __init__(self, 65 | project_id: str, 66 | service_account_key_file: str = None) -> None: 67 | """Initialise new instance of CloudApiUtils. 68 | 69 | Args: 70 | project_id: GCP project id. 71 | service_account_key_file: Optional. File containing service account key. 72 | If not passed the default credential will be used. There are following 73 | ways to create service accounts: 74 | 1. Use `create_service_account_key` method from `cloud_auth` module. 75 | 2. Use `gcloud` command line utility as documented here - 76 | https://cloud.google.com/iam/docs/creating-managing-service-account-keys 77 | """ 78 | self.client = cloud_auth.build_service_client('serviceusage', 79 | service_account_key_file) 80 | self.project_id = project_id 81 | 82 | def enable_apis(self, apis: List[str]) -> None: 83 | """Enables multiple Cloud APIs for a GCP project. 84 | 85 | Args: 86 | apis: The list of APIs to be enabled. 87 | 88 | Raises: 89 | Error: If the request was not processed successfully. 90 | """ 91 | parent = f'projects/{self.project_id}' 92 | request_body = {'serviceIds': apis} 93 | try: 94 | request = self.client.services().batchEnable( 95 | parent=parent, body=request_body) 96 | operation = utils.execute_request(request) 97 | utils.wait_for_operation(self.client.operations(), operation) 98 | except errors.HttpError: 99 | logging.exception('Error occurred while enabling Cloud APIs.') 100 | raise Error('Error occurred while enabling Cloud APIs.') 101 | 102 | 103 | def post_request(session: AuthorizedSession, url: str, data: Any) -> None: 104 | """Posts a request to the given url. 105 | 106 | Args: 107 | session: The authorised session. 108 | url: The url. 109 | data: The data to be posted. 110 | 111 | Raises: 112 | Error: If the request was not processed successfully. 113 | """ 114 | try: 115 | response = session.post(url, data) 116 | response.raise_for_status() 117 | except requests.exceptions.HTTPError as error: 118 | logging.exception('HTTPError "%s" "%s": post_request failed', 119 | error.response.status_code, error.response.reason) 120 | raise Error('HTTPError {} {}: post_request failed.'.format( 121 | error.response.status_code, error.response.reason)) 122 | 123 | 124 | def disable_api(session: AuthorizedSession, project_id: str, api: str) -> None: 125 | """Disables Cloud API for a given project. 126 | 127 | Args: 128 | session: The authorised session. 129 | project_id: GCP project id. 130 | api: The API to be disabled. 131 | """ 132 | disable_api_url = '{}/{}/services/{}:disable'.format(_SERVICE_URL, project_id, 133 | api) 134 | logging.info('Disabling following API for "%s" project : "%s".', project_id, 135 | api) 136 | post_request(session, disable_api_url, {'disableDependentServices': True}) 137 | 138 | 139 | def is_api_enabled(session: AuthorizedSession, project_id: str, 140 | api: str) -> bool: 141 | """Checks if Cloud API is enabled for given project. 142 | 143 | Args: 144 | session: The authorised session. 145 | project_id: GCP project id. 146 | api: The API to be checked. 147 | 148 | Returns: 149 | True: If the API is enabled. 150 | False: If the API is not enabled. 151 | """ 152 | get_service_url = '{}/{}/services/{}'.format(_SERVICE_URL, project_id, api) 153 | response = session.get(get_service_url) 154 | service = json.loads(response.content) 155 | if service['state'] == 'ENABLED': 156 | return True 157 | return False 158 | -------------------------------------------------------------------------------- /cloud_env_setup.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # python3 17 | """Cloud Environment setup module for MarkUp premium. 18 | 19 | This module automates the following 4 steps: 20 | 1. Enable all the required Cloud APIs. 21 | 2. Create GMC and Google Ads data transfer service. 22 | 3. Setup tables and views in BigQuery. 23 | """ 24 | 25 | import argparse 26 | import logging 27 | import os 28 | from typing import Dict, Union 29 | 30 | import cloud_bigquery 31 | import cloud_data_transfer 32 | import config_parser 33 | from google.cloud import exceptions 34 | from plugins.cloud_utils import cloud_api 35 | 36 | # Set logging level. 37 | logging.getLogger().setLevel(logging.INFO) 38 | logging.getLogger('googleapiclient.discovery').setLevel(logging.WARNING) 39 | 40 | # Required Cloud APIs to be enabled. 41 | _APIS_TO_BE_ENABLED = [ 42 | 'bigquery.googleapis.com', 'bigquerydatatransfer.googleapis.com' 43 | ] 44 | _DATASET_ID = 'markup' 45 | _MATERIALIZE_PRODUCT_DETAILED_SQL = 'scripts/materialize_product_detailed.sql' 46 | _MATERIALIZE_PRODUCT_HISTORICAL_SQL = ( 47 | 'scripts/materialize_product_historical.sql') 48 | 49 | 50 | def enable_apis(project_id: str) -> None: 51 | """Enables list of cloud APIs for given cloud project. 52 | 53 | Args: 54 | project_id: A cloud project id. 55 | """ 56 | cloud_api_utils = cloud_api.CloudApiUtils(project_id=project_id) 57 | cloud_api_utils.enable_apis(_APIS_TO_BE_ENABLED) 58 | 59 | 60 | def parse_boolean(arg: str): 61 | """Returns boolean representation of argument.""" 62 | arg = str(arg).lower() 63 | if 'true'.startswith(arg): 64 | return True 65 | return False 66 | 67 | 68 | def parse_arguments() -> argparse.Namespace: 69 | """Initialize command line parser using argparse. 70 | 71 | Returns: 72 | An argparse.ArgumentParser. 73 | """ 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument('--project_id', help='GCP project id.', required=True) 76 | parser.add_argument( 77 | '--dataset_id', 78 | help='BigQuery dataset id.', 79 | default=_DATASET_ID, 80 | required=False) 81 | parser.add_argument( 82 | '--merchant_id', help='Google Merchant Center Account Id.', required=True) 83 | parser.add_argument( 84 | '--ads_customer_id', 85 | help='Google Ads External Customer Id.', 86 | required=True) 87 | parser.add_argument( 88 | '--market_insights', 89 | help='Deploy Market Insights solution.', 90 | type=parse_boolean, 91 | required=True) 92 | return parser.parse_args() 93 | 94 | 95 | def main(): 96 | args = parse_arguments() 97 | ads_customer_id = args.ads_customer_id.replace('-', '') 98 | data_transfer = cloud_data_transfer.CloudDataTransferUtils(args.project_id) 99 | logging.info('Enabling APIs.') 100 | enable_apis(args.project_id) 101 | logging.info('Enabled APIs.') 102 | logging.info('Creating %s dataset.', args.dataset_id) 103 | cloud_bigquery.create_dataset_if_not_exists(args.project_id, args.dataset_id) 104 | merchant_center_config = data_transfer.create_merchant_center_transfer( 105 | args.merchant_id, args.dataset_id, args.market_insights) 106 | ads_config = data_transfer.create_google_ads_transfer(ads_customer_id, 107 | args.dataset_id) 108 | try: 109 | logging.info('Checking the GMC data transfer status.') 110 | data_transfer.wait_for_transfer_completion(merchant_center_config) 111 | logging.info('The GMC data have been successfully transferred.') 112 | except cloud_data_transfer.DataTransferError: 113 | logging.error('If you have just created GMC transfer - you may need to' 114 | 'wait for up to 90 minutes before the data of your Merchant' 115 | 'account are prepared and available for the transfer.') 116 | raise 117 | logging.info('Checking the Google Ads data transfer status.') 118 | data_transfer.wait_for_transfer_completion(ads_config) 119 | logging.info('The Google Ads data have been successfully transferred.') 120 | cloud_bigquery.load_language_codes(args.project_id, args.dataset_id) 121 | cloud_bigquery.load_geo_targets(args.project_id, args.dataset_id) 122 | logging.info('Creating MarkUp specific views.') 123 | cloud_bigquery.execute_queries(args.project_id, args.dataset_id, 124 | args.merchant_id, ads_customer_id, 125 | args.market_insights) 126 | logging.info('Created MarkUp specific views.') 127 | logging.info('Updating targeted products') 128 | query = cloud_bigquery.get_main_workflow_sql(args.project_id, args.dataset_id, 129 | args.merchant_id, 130 | ads_customer_id) 131 | data_transfer.schedule_query( 132 | f'Main workflow - {args.dataset_id} - {ads_customer_id}', query) 133 | logging.info('Job created to run markup main workflow.') 134 | if args.market_insights: 135 | logging.info('Market insights requested, creating scheduled query') 136 | best_sellers_query = cloud_bigquery.get_best_sellers_workflow_sql( 137 | args.project_id, args.dataset_id, args.merchant_id) 138 | data_transfer.schedule_query( 139 | f'Best sellers workflow - {args.dataset_id} - {args.merchant_id}', 140 | best_sellers_query) 141 | logging.info('Job created to run best sellers workflow.') 142 | logging.info('MarkUp installation is complete!') 143 | 144 | 145 | if __name__ == '__main__': 146 | main() 147 | -------------------------------------------------------------------------------- /scripts/targeted_products/construct_parsed_criteria.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -- Constructs insert statements to populate ParsedCriteria table. 16 | -- 17 | -- @param criterions Array of product group criterions. 18 | -- @return String containing insert sql statements to construct ParsedCriteria 19 | -- table. 20 | CREATE OR REPLACE FUNCTION `{project_id}.{dataset}.constructParsedCriteria_{external_customer_id}`(criterions ARRAY) 21 | RETURNS STRING 22 | LANGUAGE js AS """ 23 | function getParsedCriteria(criterion) {{ 24 | let parsedCriteria = {{}} 25 | parsedCriteria['criteria'] = criterion; 26 | subCriterions = criterion.split('&+'); 27 | for (subCriterion of subCriterions) {{ 28 | if(subCriterion.startsWith('custom')) {{ 29 | const customLabelRegex = /custom(\\d+)/; 30 | result = subCriterion.split('=='); 31 | 32 | index = result[0].match(customLabelRegex)[1]; 33 | value = result[1] 34 | if(value != '*') {{ 35 | parsedCriteria['custom_label' + index] = value; 36 | }} 37 | }} 38 | if(subCriterion.startsWith('brand==')) {{ 39 | result = subCriterion.split('=='); 40 | value = result[1]; 41 | if(value != '*') {{ 42 | parsedCriteria['brand'] = value; 43 | }} 44 | }} 45 | if(subCriterion.startsWith('product_type_')) {{ 46 | const productTypeRegex = /product_type_l(\\d+)/; 47 | result = subCriterion.split('=='); 48 | 49 | index = result[0].match(productTypeRegex)[1]; 50 | value = result[1] 51 | if(value != '*') {{ 52 | parsedCriteria['product_type_l' + index] = value; 53 | }} 54 | }} 55 | if(subCriterion.startsWith('category_')) {{ 56 | const categoryRegex = /category_l(\\d+)/; 57 | result = subCriterion.split('=='); 58 | 59 | index = result[0].match(categoryRegex)[1]; 60 | value = result[1] 61 | if(value != '*') {{ 62 | parsedCriteria['google_product_category_l' + index] = value; 63 | }} 64 | }} 65 | if(subCriterion.startsWith('id==')) {{ 66 | result = subCriterion.split('=='); 67 | value = result[1]; 68 | if(value != '*') {{ 69 | parsedCriteria['offer_id'] = value; 70 | }} 71 | }} 72 | if(subCriterion.startsWith('channel==')) {{ 73 | result = subCriterion.split('=='); 74 | value = result[1]; 75 | if(value != '*') {{ 76 | channel = value.split(':')[1]; 77 | parsedCriteria['channel'] = channel; 78 | }} 79 | }} 80 | if(subCriterion.startsWith('channel_exclusivity==')) {{ 81 | result = subCriterion.split('=='); 82 | value = result[1]; 83 | if(value != '*') {{ 84 | channel_exclusivity = value.split(':')[1]; 85 | parsedCriteria['channel_exclusivity'] = channel_exclusivity; 86 | }} 87 | }} 88 | if(subCriterion.startsWith('c_condition==')) {{ 89 | result = subCriterion.split('=='); 90 | value = result[1]; 91 | if(value != '*') {{ 92 | condition = value.split(':')[1]; 93 | parsedCriteria['condition'] = condition; 94 | }} 95 | }} 96 | }} 97 | return parsedCriteria; 98 | }} 99 | sql = 'INSERT INTO `{project_id}.{dataset}.ParsedCriteria_{external_customer_id}` VALUES '; 100 | i = 0; 101 | for (criterion of criterions) {{ 102 | criterion = criterion.replace(/"/g, '\\\\"'); 103 | parsedCriteria = getParsedCriteria(criterion) 104 | if ( i!=0 ) {{ 105 | sql += ','; 106 | }} 107 | sql += '(' 108 | sql += '"' + criterion + '",'; 109 | sql += (parsedCriteria['custom_label0'] ? '"' + parsedCriteria['custom_label0'] + '"' : 'NULL') + ','; 110 | sql += (parsedCriteria['custom_label1'] ? '"' + parsedCriteria['custom_label1'] + '"' : 'NULL') + ','; 111 | sql += (parsedCriteria['custom_label2'] ? '"' + parsedCriteria['custom_label2'] + '"' : 'NULL') + ','; 112 | sql += (parsedCriteria['custom_label3'] ? '"' + parsedCriteria['custom_label3'] + '"' : 'NULL') + ','; 113 | sql += (parsedCriteria['custom_label4'] ? '"' + parsedCriteria['custom_label4'] + '"' : 'NULL') + ','; 114 | sql += (parsedCriteria['product_type_l1'] ? '"' + parsedCriteria['product_type_l1'] + '"' : 'NULL') + ','; 115 | sql += (parsedCriteria['product_type_l2'] ? '"' + parsedCriteria['product_type_l2'] + '"' : 'NULL') + ','; 116 | sql += (parsedCriteria['product_type_l3'] ? '"' + parsedCriteria['product_type_l3'] + '"' : 'NULL') + ','; 117 | sql += (parsedCriteria['product_type_l4'] ? '"' + parsedCriteria['product_type_l4'] + '"' : 'NULL') + ','; 118 | sql += (parsedCriteria['product_type_l5'] ? '"' + parsedCriteria['product_type_l5'] + '"' : 'NULL') + ','; 119 | sql += (parsedCriteria['google_product_category_l1'] ? '"' + parsedCriteria['google_product_category_l1'] + '"' : 'NULL') + ','; 120 | sql += (parsedCriteria['google_product_category_l2'] ? '"' + parsedCriteria['google_product_category_l2'] + '"' : 'NULL') + ','; 121 | sql += (parsedCriteria['google_product_category_l3'] ? '"' + parsedCriteria['google_product_category_l3'] + '"' : 'NULL') + ','; 122 | sql += (parsedCriteria['google_product_category_l4'] ? '"' + parsedCriteria['google_product_category_l4'] + '"' : 'NULL') + ','; 123 | sql += (parsedCriteria['google_product_category_l5'] ? '"' + parsedCriteria['google_product_category_l5'] + '"' : 'NULL') + ','; 124 | sql += (parsedCriteria['brand'] ? '"' + parsedCriteria['brand'] + '"' : 'NULL') + ','; 125 | sql += (parsedCriteria['offer_id'] ? '"' + parsedCriteria['offer_id'] + '"' : 'NULL') + ','; 126 | sql += (parsedCriteria['channel'] ? '"' + parsedCriteria['channel'] + '"' : 'NULL') + ','; 127 | sql += (parsedCriteria['channel_exclusivity'] ? '"' + parsedCriteria['channel_exclusivity'] + '"' : 'NULL') + ','; 128 | sql += (parsedCriteria['condition'] ? '"' + parsedCriteria['condition'] + '"' : 'NULL'); 129 | sql += ')'; 130 | i += 1; 131 | }} 132 | return sql; 133 | """; 134 | -------------------------------------------------------------------------------- /scripts/1_product_view.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -- Creates a latest snapshot view of products. 16 | -- 17 | -- The Products_ table has product data partitioned by date. 18 | -- This view will get latest product data and create derived columns useful 19 | -- for further processing of data. 20 | 21 | CREATE OR REPLACE VIEW `{project_id}.{dataset}.product_view_{merchant_id}` 22 | AS ( 23 | WITH 24 | ApprovedOffer AS ( 25 | SELECT DISTINCT 26 | _PARTITIONDATE, 27 | product_id, 28 | merchant_id, 29 | target_country 30 | FROM 31 | `{project_id}.{dataset}.Products_{merchant_id}` AS Products, 32 | Products.destinations, 33 | destinations.approved_countries AS target_country 34 | ), 35 | PendingOffer AS ( 36 | SELECT DISTINCT 37 | _PARTITIONDATE, 38 | product_id, 39 | merchant_id, 40 | target_country 41 | FROM 42 | `{project_id}.{dataset}.Products_{merchant_id}` AS Products, 43 | Products.destinations, 44 | destinations.pending_countries AS target_country 45 | ), 46 | DisapprovedOffer AS ( 47 | SELECT DISTINCT 48 | _PARTITIONDATE, 49 | product_id, 50 | merchant_id, 51 | target_country 52 | FROM 53 | `{project_id}.{dataset}.Products_{merchant_id}` AS Products, 54 | Products.destinations, 55 | destinations.disapproved_countries AS target_country 56 | ), 57 | OfferIssue AS ( 58 | SELECT 59 | _PARTITIONDATE, 60 | product_id, 61 | merchant_id, 62 | target_country, 63 | STRING_AGG( 64 | IF(LOWER(issues.servability) = 'disapproved', issues.short_description, NULL), ", ") 65 | AS disapproval_issues, 66 | STRING_AGG( 67 | IF(LOWER(issues.servability) = 'demoted', issues.short_description, NULL), ", ") 68 | AS demotion_issues, 69 | STRING_AGG( 70 | IF(LOWER(issues.servability) = 'unaffected', issues.short_description, NULL), ", ") 71 | AS warning_issues 72 | FROM 73 | `{project_id}.{dataset}.Products_{merchant_id}` AS Products, 74 | Products.issues, 75 | issues.applicable_countries AS target_country 76 | GROUP BY 77 | 1, 2, 3, 4 78 | ), 79 | MultiChannelTable AS ( 80 | SELECT DISTINCT 81 | _PARTITIONDATE, 82 | merchant_id, 83 | product_id 84 | FROM 85 | `{project_id}.{dataset}.Products_{merchant_id}` 86 | GROUP BY 87 | _PARTITIONDATE, 88 | merchant_id, 89 | product_id 90 | HAVING COUNT(DISTINCT(channel)) > 1 91 | ), 92 | LatestDate AS ( 93 | SELECT 94 | MAX(_PARTITIONDATE) AS latest_date 95 | FROM 96 | `{project_id}.{dataset}.Products_{merchant_id}` 97 | ), 98 | ProductStatus AS ( 99 | SELECT 100 | Products._PARTITIONDATE as data_date, 101 | LatestDate.latest_date, 102 | Products.product_id, 103 | Products.merchant_id, 104 | Products.aggregator_id, 105 | Products.offer_id, 106 | Products.title, 107 | Products.description, 108 | Products.link, 109 | Products.mobile_link, 110 | Products.image_link, 111 | Products.additional_image_links, 112 | Products.content_language, 113 | COALESCE( 114 | ApprovedOffer.target_country, 115 | PendingOffer.target_country, 116 | DisapprovedOffer.target_country) AS target_country, 117 | Products.channel, 118 | Products.expiration_date, 119 | Products.google_expiration_date, 120 | Products.adult, 121 | Products.age_group, 122 | Products.availability, 123 | Products.availability_date, 124 | Products.brand, 125 | Products.color, 126 | Products.condition, 127 | Products.custom_labels, 128 | Products.gender, 129 | Products.gtin, 130 | Products.item_group_id, 131 | Products.material, 132 | Products.mpn, 133 | Products.pattern, 134 | Products.price, 135 | Products.sale_price, 136 | Products.sale_price_effective_start_date, 137 | Products.sale_price_effective_end_date, 138 | Products.google_product_category, 139 | Products.google_product_category_path, 140 | Products.product_type, 141 | Products.additional_product_types, 142 | IF(ApprovedOffer.product_id IS NULL, 0, 1) is_approved, 143 | CONCAT(CAST(Products.merchant_id AS STRING), '|', Products.product_id) 144 | AS unique_product_id, 145 | IFNULL(SPLIT(Products.product_type, '>')[SAFE_OFFSET(0)], 'N/A') AS product_type_l1, 146 | IFNULL(SPLIT(Products.product_type, '>')[SAFE_OFFSET(1)], 'N/A') AS product_type_l2, 147 | IFNULL(SPLIT(Products.product_type, '>')[SAFE_OFFSET(2)], 'N/A') AS product_type_l3, 148 | IFNULL(SPLIT(Products.product_type, '>')[SAFE_OFFSET(3)], 'N/A') AS product_type_l4, 149 | IFNULL(SPLIT(Products.product_type, '>')[SAFE_OFFSET(4)], 'N/A') AS product_type_l5, 150 | IFNULL(SPLIT(Products.google_product_category_path, '>')[SAFE_OFFSET(0)], 'N/A') 151 | AS google_product_category_l1, 152 | IFNULL(SPLIT(Products.google_product_category_path, '>')[SAFE_OFFSET(1)], 'N/A') 153 | AS google_product_category_l2, 154 | IFNULL(SPLIT(Products.google_product_category_path, '>')[SAFE_OFFSET(2)], 'N/A') 155 | AS google_product_category_l3, 156 | IFNULL(SPLIT(Products.google_product_category_path, '>')[SAFE_OFFSET(3)], 'N/A') 157 | AS google_product_category_l4, 158 | IFNULL(SPLIT(Products.google_product_category_path, '>')[SAFE_OFFSET(4)], 'N/A') 159 | AS google_product_category_l5, 160 | IF(Products.availability = 'in stock', 1, 0) AS in_stock, 161 | IF(MultiChannelTable.product_id IS NULL, 'single_channel', 'multi_channel') AS channel_exclusivity 162 | FROM 163 | `{project_id}.{dataset}.Products_{merchant_id}` AS Products, 164 | LatestDate 165 | LEFT JOIN ApprovedOffer USING (_PARTITIONDATE, product_id, merchant_id) 166 | LEFT JOIN PendingOffer USING (_PARTITIONDATE, product_id, merchant_id) 167 | LEFT JOIN DisapprovedOffer USING (_PARTITIONDATE, product_id, merchant_id) 168 | LEFT JOIN MultiChannelTable USING (_PARTITIONDATE, product_id, merchant_id) 169 | ) 170 | SELECT 171 | ProductStatus.*, 172 | OfferIssue.disapproval_issues, 173 | OfferIssue.demotion_issues, 174 | OfferIssue.warning_issues 175 | FROM 176 | ProductStatus 177 | LEFT JOIN OfferIssue 178 | ON 179 | OfferIssue._PARTITIONDATE = ProductStatus.data_date 180 | AND OfferIssue.product_id = ProductStatus.product_id 181 | AND OfferIssue.merchant_id = ProductStatus.merchant_id 182 | AND OfferIssue.target_country = ProductStatus.target_country 183 | ); 184 | -------------------------------------------------------------------------------- /cloud_bigquery.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # python3 17 | """Cloud BigQuery module.""" 18 | 19 | import logging 20 | import os 21 | from typing import Any, Dict 22 | 23 | import config_parser 24 | from google.cloud import bigquery 25 | from google.cloud import exceptions 26 | 27 | # Main workflow sql. 28 | _MAIN_WORKFLOW_SQL = 'scripts/main_workflow.sql' 29 | _BEST_SELLERS_WORKFLOW_SQL = 'scripts/market_insights/best_sellers_workflow.sql' 30 | 31 | # Set logging level. 32 | logging.getLogger().setLevel(logging.INFO) 33 | logging.getLogger('googleapiclient.discovery').setLevel(logging.WARNING) 34 | 35 | 36 | def create_dataset_if_not_exists(project_id: str, dataset_id: str) -> None: 37 | """Creates BigQuery dataset if it doesn't exists. 38 | 39 | Args: 40 | project_id: A cloud project id. 41 | dataset_id: BigQuery dataset id. 42 | """ 43 | # Construct a BigQuery client object. 44 | client = bigquery.Client(project=project_id) 45 | fully_qualified_dataset_id = f'{project_id}.{dataset_id}' 46 | try: 47 | client.get_dataset(fully_qualified_dataset_id) 48 | logging.info('Dataset %s already exists.', fully_qualified_dataset_id) 49 | except exceptions.NotFound: 50 | logging.info('Dataset %s is not found.', fully_qualified_dataset_id) 51 | dataset = bigquery.Dataset(fully_qualified_dataset_id) 52 | dataset.location = config_parser.get_dataset_location() 53 | client.create_dataset(dataset) 54 | logging.info('Dataset %s created.', fully_qualified_dataset_id) 55 | 56 | 57 | def load_language_codes(project_id: str, dataset_id: str) -> None: 58 | """Loads language codes.""" 59 | client = bigquery.Client(project=project_id) 60 | fully_qualified_table_id = f'{project_id}.{dataset_id}.language_codes' 61 | job_config = bigquery.LoadJobConfig( 62 | source_format=bigquery.SourceFormat.CSV, 63 | skip_leading_rows=1, 64 | autodetect=True, 65 | ) 66 | file_name = 'data/language_codes.csv' 67 | with open(file_name, 'rb') as source_file: 68 | job = client.load_table_from_file( 69 | source_file, fully_qualified_table_id, job_config=job_config) 70 | 71 | job.result() 72 | 73 | 74 | def load_geo_targets(project_id: str, dataset_id: str) -> None: 75 | """Loads geo targets.""" 76 | client = bigquery.Client(project=project_id) 77 | fully_qualified_table_id = f'{project_id}.{dataset_id}.geo_targets' 78 | job_config = bigquery.LoadJobConfig( 79 | source_format=bigquery.SourceFormat.CSV, 80 | skip_leading_rows=1, 81 | autodetect=True, 82 | ) 83 | file_name = 'data/geo_targets.csv' 84 | with open(file_name, 'rb') as source_file: 85 | job = client.load_table_from_file( 86 | source_file, fully_qualified_table_id, job_config=job_config) 87 | 88 | job.result() 89 | 90 | 91 | def read_file(file_path: str) -> str: 92 | """Reads and returns contents of the file. 93 | 94 | Args: 95 | file_path: File path. 96 | 97 | Returns: 98 | content: File content. 99 | 100 | Raises: 101 | FileNotFoundError: If the provided file is not found. 102 | """ 103 | try: 104 | with open(file_path, 'r') as stream: 105 | content = stream.read() 106 | except FileNotFoundError as e: 107 | raise FileNotFoundError( 108 | f'The file "{file_path}" could not be found.') from e 109 | else: 110 | return content 111 | 112 | 113 | def configure_sql(sql_path: str, query_params: Dict[str, Any]) -> str: 114 | """Configures parameters of SQL script with variables supplied. 115 | 116 | Args: 117 | sql_path: Path to SQL script. 118 | query_params: Configuration containing query parameter values. 119 | 120 | Returns: 121 | sql_script: String representation of SQL script with parameters assigned. 122 | """ 123 | sql_script = read_file(sql_path) 124 | 125 | params = {} 126 | for param_key, param_value in query_params.items(): 127 | # If given value is list of strings (ex. 'a,b,c'), create tuple of 128 | # strings (ex. ('a', 'b', 'c')) to pass to SQL IN operator. 129 | if isinstance(param_value, str) and ',' in param_value: 130 | params[param_key] = tuple(param_value.split(',')) 131 | else: 132 | params[param_key] = param_value 133 | 134 | return sql_script.format(**params) 135 | 136 | 137 | def execute_queries(project_id: str, dataset_id: str, merchant_id: str, 138 | customer_id: str, enable_market_insights: bool) -> None: 139 | """Executes list of queries.""" 140 | # Sql files to be executed in a specific order. 141 | # The prefix "scripts" should be omitted. 142 | sql_files = [ 143 | '1_product_view.sql', 144 | 'targeted_products/targeted_product_ddl.sql', 145 | 'targeted_products/construct_parsed_criteria.sql', 146 | '2_product_metrics_view.sql', 147 | '3_customer_view.sql', 148 | '4_product_detailed_view.sql', 149 | 'materialize_product_detailed.sql', 150 | 'materialize_product_historical.sql', 151 | ] 152 | if enable_market_insights: 153 | market_insights_sql_files = [ 154 | 'market_insights/snapshot_view.sql', 155 | 'market_insights/historical_view.sql' 156 | ] 157 | sql_files.extend(market_insights_sql_files) 158 | prefix = 'scripts' 159 | query_params = { 160 | 'project_id': project_id, 161 | 'dataset': dataset_id, 162 | 'merchant_id': merchant_id, 163 | 'external_customer_id': customer_id 164 | } 165 | location = config_parser.get_dataset_location() 166 | client = bigquery.Client(project=project_id) 167 | for sql_file in sql_files: 168 | try: 169 | query = configure_sql(os.path.join(prefix, sql_file), query_params) 170 | query_job = client.query(query, location=location) 171 | query_job.result() 172 | except: 173 | logging.exception('Error in %s', sql_file) 174 | raise 175 | 176 | 177 | def get_main_workflow_sql(project_id: str, dataset_id: str, merchant_id: str, 178 | customer_id: str) -> str: 179 | """Returns main workflow sql. 180 | 181 | Args: 182 | project_id: A cloud project id. 183 | dataset_id: BigQuery dataset id. 184 | merchant_id: Merchant center id. 185 | customer_id: Google Ads customer id. 186 | """ 187 | query_params = { 188 | 'project_id': project_id, 189 | 'dataset': dataset_id, 190 | 'merchant_id': merchant_id, 191 | 'external_customer_id': customer_id 192 | } 193 | return configure_sql(_MAIN_WORKFLOW_SQL, query_params) 194 | 195 | 196 | def get_best_sellers_workflow_sql(project_id: str, dataset_id: str, 197 | merchant_id: str) -> str: 198 | """Returns main workflow sql. 199 | 200 | Args: 201 | project_id: A cloud project id. 202 | dataset_id: BigQuery dataset id. 203 | merchant_id: Merchant center id. 204 | """ 205 | query_params = { 206 | 'project_id': project_id, 207 | 'dataset': dataset_id, 208 | 'merchant_id': merchant_id 209 | } 210 | return configure_sql(_BEST_SELLERS_WORKFLOW_SQL, query_params) 211 | -------------------------------------------------------------------------------- /scripts/4_product_detailed_view.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -- Creates a latest snapshot view of products combined with performance metrics. 16 | CREATE OR REPLACE VIEW 17 | `{project_id}.{dataset}.product_detailed_view` AS 18 | WITH 19 | ProductMetrics AS ( 20 | SELECT 21 | product_view.data_date, 22 | product_view.unique_product_id, 23 | product_metrics_view.externalcustomerid, 24 | product_view.target_country, 25 | SUM(product_metrics_view.impressions) AS impressions_30_days, 26 | SUM(product_metrics_view.clicks) AS clicks_30_days, 27 | SUM(product_metrics_view.cost) AS cost_30_days, 28 | SUM(product_metrics_view.conversions) AS conversions_30_days, 29 | SUM(product_metrics_view.conversions_value) AS conversions_value_30_days 30 | FROM 31 | `{project_id}.{dataset}.product_metrics_view` product_metrics_view 32 | INNER JOIN 33 | `{project_id}.{dataset}.product_view_{merchant_id}` product_view 34 | ON 35 | product_metrics_view.merchantid = product_view.merchant_id 36 | AND LOWER(product_metrics_view.channel) = LOWER(product_view.channel) 37 | AND LOWER(product_metrics_view.language_code) = LOWER(product_view.content_language) 38 | AND LOWER(product_metrics_view.country_code) = LOWER(product_view.target_country) 39 | AND LOWER(product_metrics_view.offer_id) = LOWER(product_view.offer_id) 40 | AND product_metrics_view.data_date 41 | BETWEEN DATE_SUB(product_view.data_date, INTERVAL 30 DAY) 42 | AND product_view.data_date 43 | GROUP BY 44 | data_date, 45 | unique_product_id, 46 | externalcustomerid, 47 | target_country 48 | ), 49 | ProductData AS ( 50 | SELECT 51 | product_view.data_date, 52 | product_view.latest_date, 53 | COALESCE(product_view.aggregator_id, product_view.merchant_id) AS account_id, 54 | MAX(customer_view.accountdescriptivename) AS account_display_name, 55 | product_view.merchant_id AS sub_account_id, 56 | product_view.unique_product_id, 57 | product_view.target_country, 58 | MAX(product_view.offer_id) AS offer_id, 59 | MAX(product_view.channel) AS channel, 60 | MAX(product_view.in_stock) AS in_stock, 61 | # An offer is labeled as approved when able to serve on all destinations 62 | MAX(is_approved) AS is_approved, 63 | # Aggregated Issues & Servability Statuses 64 | MAX(disapproval_issues) as disapproval_issues, 65 | MAX(demotion_issues) as demotion_issues, 66 | MAX(warning_issues) as warning_issues, 67 | MIN(IF(TargetedProduct.product_id IS NULL, 0, 1)) AS is_targeted, 68 | MAX(title) AS title, 69 | MAX(link) AS item_url, 70 | MAX(product_type_l1) AS product_type_l1, 71 | MAX(product_type_l2) AS product_type_l2, 72 | MAX(product_type_l3) AS product_type_l3, 73 | MAX(product_type_l4) AS product_type_l4, 74 | MAX(product_type_l5) AS product_type_l5, 75 | MAX(google_product_category_l1) AS google_product_category_l1, 76 | MAX(google_product_category_l2) AS google_product_category_l2, 77 | MAX(google_product_category_l3) AS google_product_category_l3, 78 | MAX(google_product_category_l4) AS google_product_category_l4, 79 | MAX(google_product_category_l5) AS google_product_category_l5, 80 | MAX(custom_labels.label_0) AS custom_label_0, 81 | MAX(custom_labels.label_1) AS custom_label_1, 82 | MAX(custom_labels.label_2) AS custom_label_2, 83 | MAX(custom_labels.label_3) AS custom_label_3, 84 | MAX(custom_labels.label_4) AS custom_label_4, 85 | MAX(product_view.brand) AS brand, 86 | MAX(ProductMetrics.impressions_30_days) AS impressions_30_days, 87 | MAX(ProductMetrics.clicks_30_days) AS clicks_30_days, 88 | MAX(ProductMetrics.cost_30_days) AS cost_30_days, 89 | MAX(ProductMetrics.conversions_30_days) AS conversions_30_days, 90 | MAX(ProductMetrics.conversions_value_30_days) AS conversions_value_30_days, 91 | MAX(description) AS description, 92 | MAX(mobile_link) AS mobile_link, 93 | MAX(image_link) AS image_link, 94 | ANY_VALUE(additional_image_links) AS additional_image_links, 95 | MAX(content_language) AS content_language, 96 | MAX(expiration_date) AS expiration_date, 97 | MAX(google_expiration_date) AS google_expiration_date, 98 | MAX(adult) AS adult, 99 | MAX(age_group) AS age_group, 100 | MAX(availability) AS availability, 101 | MAX(availability_date) AS availability_date, 102 | MAX(color) AS color, 103 | MAX(condition) AS condition, 104 | MAX(gender) AS gender, 105 | MAX(gtin) AS gtin, 106 | MAX(item_group_id) AS item_group_id, 107 | MAX(material) AS material, 108 | MAX(mpn) AS mpn, 109 | MAX(pattern) AS pattern, 110 | ANY_VALUE(price) AS price, 111 | ANY_VALUE(sale_price) AS sale_price, 112 | MAX(sale_price_effective_start_date) AS sale_price_effective_start_date, 113 | MAX(sale_price_effective_end_date) AS sale_price_effective_end_date, 114 | ANY_VALUE(additional_product_types) AS additional_product_types 115 | FROM 116 | `{project_id}.{dataset}.product_view_{merchant_id}` product_view 117 | LEFT JOIN 118 | ProductMetrics 119 | ON 120 | ProductMetrics.data_date = product_view.data_date 121 | AND ProductMetrics.unique_product_id = product_view.unique_product_id 122 | AND ProductMetrics.target_country = product_view.target_country 123 | LEFT JOIN 124 | `{project_id}.{dataset}.customer_view` customer_view 125 | ON 126 | customer_view.externalcustomerid = ProductMetrics.externalcustomerid 127 | AND customer_view.data_date = ProductMetrics.data_date 128 | LEFT JOIN 129 | `{project_id}.{dataset}.TargetedProduct_{external_customer_id}` TargetedProduct 130 | ON 131 | TargetedProduct.merchant_id = product_view.merchant_id 132 | AND TargetedProduct.product_id = product_view.product_id 133 | AND TargetedProduct.data_date = product_view.data_date 134 | AND TargetedProduct.target_country = product_view.target_country 135 | GROUP BY 136 | data_date, 137 | latest_date, 138 | account_id, 139 | product_view.merchant_id, 140 | product_view.unique_product_id, 141 | target_country 142 | ) 143 | SELECT 144 | *, 145 | CASE 146 | WHEN is_approved = 1 AND in_stock = 1 147 | THEN 1 148 | ELSE 0 149 | END AS funnel_in_stock, 150 | CASE 151 | WHEN is_approved = 1 AND in_stock = 1 AND is_targeted = 1 152 | THEN 1 153 | ELSE 0 154 | END AS funnel_targeted, 155 | CASE 156 | WHEN 157 | is_approved = 1 158 | AND in_stock = 1 159 | AND is_targeted = 1 160 | AND impressions_30_days > 0 161 | THEN 1 162 | ELSE 0 163 | END AS funnel_has_impression, 164 | CASE 165 | WHEN 166 | is_approved = 1 167 | AND in_stock = 1 168 | AND is_targeted = 1 169 | AND impressions_30_days > 0 170 | AND clicks_30_days > 0 171 | THEN 1 172 | ELSE 0 173 | END AS funnel_has_clicks 174 | FROM 175 | ProductData; 176 | -------------------------------------------------------------------------------- /scripts/materialize_product_detailed.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -- Stored procedure for materializing product detailed data. 16 | -- 17 | -- "product_detailed_materialized" differs from "product_detailed_view" in the 18 | -- following ways: 19 | -- 1. The two data sources (GMC and Google Ads) are joined based on the latest 20 | -- available data. 21 | -- 2. "product_detailed_materialized" is used by the MarkUp DataStudio 22 | -- dashboard in all the pages except the historical trends. 23 | -- 3. "product_detailed_view" is used to populate only the historical trends 24 | -- data. 25 | -- The main reason for the decision was the fragile nature of the data 26 | -- transfers. When either of the Google Ads or GMC transfer fails which seems to 27 | -- happen quite often bulk of the MarkUp dashboard is not usable. 28 | 29 | CREATE OR REPLACE PROCEDURE `{project_id}.{dataset}.product_detailed_proc`() 30 | BEGIN 31 | CREATE OR REPLACE TABLE `{project_id}.{dataset}.product_detailed_materialized` 32 | AS ( 33 | WITH 34 | TargetedProduct AS ( 35 | SELECT 36 | merchant_id, 37 | product_id, 38 | target_country 39 | FROM 40 | `{project_id}.{dataset}.TargetedProduct_{external_customer_id}` 41 | WHERE 42 | data_date IN ( 43 | ( 44 | SELECT 45 | MAX(data_date) 46 | FROM 47 | `{project_id}.{dataset}.TargetedProduct_{external_customer_id}` 48 | ) 49 | ) 50 | ), 51 | ProductMetrics AS ( 52 | SELECT 53 | product_view.latest_date, 54 | product_view.unique_product_id, 55 | product_metrics_view.externalcustomerid, 56 | product_view.target_country, 57 | SUM(product_metrics_view.impressions) AS impressions_30_days, 58 | SUM(product_metrics_view.clicks) AS clicks_30_days, 59 | SUM(product_metrics_view.cost) AS cost_30_days, 60 | SUM(product_metrics_view.conversions) AS conversions_30_days, 61 | SUM(product_metrics_view.conversions_value) AS conversions_value_30_days 62 | FROM 63 | `{project_id}.{dataset}.product_metrics_view` product_metrics_view 64 | INNER JOIN 65 | `{project_id}.{dataset}.product_view_{merchant_id}` product_view 66 | ON 67 | product_metrics_view.merchantid = product_view.merchant_id 68 | AND LOWER(product_metrics_view.channel) = LOWER(product_view.channel) 69 | AND LOWER(product_metrics_view.language_code) = LOWER(product_view.content_language) 70 | AND LOWER(product_metrics_view.country_code) = LOWER(product_view.target_country) 71 | AND LOWER(product_metrics_view.offer_id) = LOWER(product_view.offer_id) 72 | AND product_metrics_view.data_date 73 | BETWEEN DATE_SUB(product_view.latest_date, INTERVAL 30 DAY) 74 | AND product_view.latest_date 75 | WHERE 76 | product_view.data_date = product_view.latest_date 77 | GROUP BY 78 | latest_date, 79 | unique_product_id, 80 | externalcustomerid, 81 | target_country 82 | ), 83 | ProductData AS ( 84 | SELECT 85 | product_view.data_date, 86 | product_view.latest_date, 87 | COALESCE(product_view.aggregator_id, product_view.merchant_id) AS account_id, 88 | MAX(customer_view.accountdescriptivename) AS account_display_name, 89 | product_view.merchant_id AS sub_account_id, 90 | product_view.unique_product_id, 91 | product_view.target_country, 92 | MAX(product_view.offer_id) AS offer_id, 93 | MAX(product_view.channel) AS channel, 94 | MAX(product_view.in_stock) AS in_stock, 95 | # An offer is labeled as approved when able to serve on all destinations 96 | MAX(is_approved) AS is_approved, 97 | # Aggregated Issues & Servability Statuses 98 | MAX(disapproval_issues) as disapproval_issues, 99 | MAX(demotion_issues) as demotion_issues, 100 | MAX(warning_issues) as warning_issues, 101 | MIN(IF(TargetedProduct.product_id IS NULL, 0, 1)) AS is_targeted, 102 | MAX(title) AS title, 103 | MAX(link) AS item_url, 104 | MAX(product_type_l1) AS product_type_l1, 105 | MAX(product_type_l2) AS product_type_l2, 106 | MAX(product_type_l3) AS product_type_l3, 107 | MAX(product_type_l4) AS product_type_l4, 108 | MAX(product_type_l5) AS product_type_l5, 109 | MAX(google_product_category_l1) AS google_product_category_l1, 110 | MAX(google_product_category_l2) AS google_product_category_l2, 111 | MAX(google_product_category_l3) AS google_product_category_l3, 112 | MAX(google_product_category_l4) AS google_product_category_l4, 113 | MAX(google_product_category_l5) AS google_product_category_l5, 114 | MAX(custom_labels.label_0) AS custom_label_0, 115 | MAX(custom_labels.label_1) AS custom_label_1, 116 | MAX(custom_labels.label_2) AS custom_label_2, 117 | MAX(custom_labels.label_3) AS custom_label_3, 118 | MAX(custom_labels.label_4) AS custom_label_4, 119 | MAX(product_view.brand) AS brand, 120 | MAX(ProductMetrics.impressions_30_days) AS impressions_30_days, 121 | MAX(ProductMetrics.clicks_30_days) AS clicks_30_days, 122 | MAX(ProductMetrics.cost_30_days) AS cost_30_days, 123 | MAX(ProductMetrics.conversions_30_days) AS conversions_30_days, 124 | MAX(ProductMetrics.conversions_value_30_days) AS conversions_value_30_days, 125 | MAX(description) AS description, 126 | MAX(mobile_link) AS mobile_link, 127 | MAX(image_link) AS image_link, 128 | ANY_VALUE(additional_image_links) AS additional_image_links, 129 | MAX(content_language) AS content_language, 130 | MAX(expiration_date) AS expiration_date, 131 | MAX(google_expiration_date) AS google_expiration_date, 132 | MAX(adult) AS adult, 133 | MAX(age_group) AS age_group, 134 | MAX(availability) AS availability, 135 | MAX(availability_date) AS availability_date, 136 | MAX(color) AS color, 137 | MAX(condition) AS condition, 138 | MAX(gender) AS gender, 139 | MAX(gtin) AS gtin, 140 | MAX(item_group_id) AS item_group_id, 141 | MAX(material) AS material, 142 | MAX(mpn) AS mpn, 143 | MAX(pattern) AS pattern, 144 | ANY_VALUE(price) AS price, 145 | ANY_VALUE(sale_price) AS sale_price, 146 | MAX(sale_price_effective_start_date) AS sale_price_effective_start_date, 147 | MAX(sale_price_effective_end_date) AS sale_price_effective_end_date, 148 | ANY_VALUE(additional_product_types) AS additional_product_types 149 | FROM 150 | `{project_id}.{dataset}.product_view_{merchant_id}` product_view 151 | LEFT JOIN 152 | ProductMetrics 153 | ON 154 | ProductMetrics.latest_date = product_view.data_date 155 | AND ProductMetrics.unique_product_id = product_view.unique_product_id 156 | AND ProductMetrics.target_country = product_view.target_country 157 | LEFT JOIN 158 | `{project_id}.{dataset}.customer_view` customer_view 159 | ON 160 | customer_view.externalcustomerid = ProductMetrics.externalcustomerid 161 | AND customer_view.data_date = customer_view.latest_date 162 | LEFT JOIN 163 | TargetedProduct 164 | ON 165 | TargetedProduct.merchant_id = product_view.merchant_id 166 | AND TargetedProduct.product_id = product_view.product_id 167 | AND TargetedProduct.target_country = product_view.target_country 168 | WHERE 169 | product_view.data_date = product_view.latest_date 170 | GROUP BY 171 | data_date, 172 | latest_date, 173 | account_id, 174 | product_view.merchant_id, 175 | product_view.unique_product_id, 176 | product_view.target_country 177 | ) 178 | SELECT 179 | *, 180 | CASE 181 | WHEN is_approved = 1 AND in_stock = 1 182 | THEN 1 183 | ELSE 0 184 | END AS funnel_in_stock, 185 | CASE 186 | WHEN is_approved = 1 AND in_stock = 1 AND is_targeted = 1 187 | THEN 1 188 | ELSE 0 189 | END AS funnel_targeted, 190 | CASE 191 | WHEN 192 | is_approved = 1 193 | AND in_stock = 1 194 | AND is_targeted = 1 195 | AND impressions_30_days > 0 196 | THEN 1 197 | ELSE 0 198 | END AS funnel_has_impression, 199 | CASE 200 | WHEN 201 | is_approved = 1 202 | AND in_stock = 1 203 | AND is_targeted = 1 204 | AND impressions_30_days > 0 205 | AND clicks_30_days > 0 206 | THEN 1 207 | ELSE 0 208 | END AS funnel_has_clicks 209 | FROM 210 | ProductData 211 | ); 212 | END; 213 | 214 | CALL `{project_id}.{dataset}.product_detailed_proc`(); 215 | -------------------------------------------------------------------------------- /scripts/main_workflow.sql: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC.. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -- Backfill main workflow. 16 | -- 17 | -- The script parses target crierion to get the list of targeted products. It 18 | -- then materializes product detailed and product historical tables. The script 19 | -- uses @run_date parameter and hence can be backfilled on a specific date. This 20 | -- is useful when a Google Ads or GMC data transfer has failed on a specific 21 | -- day. 22 | 23 | DECLARE to_be_processed ARRAY DEFAULT []; 24 | DECLARE where_clause STRING; 25 | DECLARE i INT64 DEFAULT 0; 26 | DECLARE BATCH_SIZE INT64 DEFAULT 500; 27 | DECLARE total_criterions DEFAULT 0; 28 | 29 | -- Clean-up existing tables. 30 | DELETE FROM 31 | `{project_id}.{dataset}.ParsedCriteria_{external_customer_id}` 32 | WHERE 1=1; 33 | 34 | DELETE FROM 35 | `{project_id}.{dataset}.TargetedProduct_{external_customer_id}` 36 | WHERE 37 | -- Delete data older than 90 days. 38 | data_date < DATE_SUB(@run_date, INTERVAL 90 DAY) 39 | OR data_date = @run_date; 40 | 41 | CREATE TEMPORARY TABLE IF NOT EXISTS DistinctCriterion AS ( 42 | WITH DistinctCriterion AS ( 43 | SELECT DISTINCT 44 | Criteria 45 | FROM 46 | `{project_id}.{dataset}.Criteria_{external_customer_id}` AS CriteriaTable 47 | WHERE 48 | CriteriaType = 'PRODUCT_PARTITION' 49 | -- If the run_date is not a backfill then use the latest available data. 50 | AND ( 51 | ( 52 | @run_date = CURRENT_DATE() 53 | AND CriteriaTable._DATA_DATE = CriteriaTable._LATEST_DATE) 54 | OR ( 55 | @run_date <> CURRENT_DATE() 56 | AND CriteriaTable._DATA_DATE = @run_date)) 57 | ) 58 | SELECT 59 | Criteria, 60 | ROW_NUMBER() OVER (ORDER BY Criteria asc) as RowNum 61 | FROM 62 | DistinctCriterion 63 | ); 64 | 65 | SET total_criterions = (SELECT COUNT(1) FROM DistinctCriterion); 66 | 67 | LOOP 68 | IF i >= total_criterions THEN 69 | BREAK; 70 | END IF; 71 | SET to_be_processed = ( 72 | SELECT 73 | ARRAY_AGG(Criteria) 74 | FROM 75 | DistinctCriterion 76 | WHERE 77 | RowNum BETWEEN i AND i+BATCH_SIZE 78 | ); 79 | SET i = i + BATCH_SIZE + 1; 80 | EXECUTE IMMEDIATE `{project_id}.{dataset}.constructParsedCriteria_{external_customer_id}`(to_be_processed); 81 | END LOOP; 82 | 83 | 84 | CREATE TEMP TABLE CriteriaInfo 85 | AS ( 86 | WITH TargetedMerchantInfo AS ( 87 | SELECT DISTINCT 88 | MerchantId AS merchant_id, 89 | AdGroupId AS ad_group_id, 90 | UPPER(GeoTargets.Country_Code) AS target_country 91 | FROM 92 | `{project_id}.{dataset}.ShoppingProductStats_{external_customer_id}` AS ShoppingProductStats 93 | INNER JOIN `{project_id}.{dataset}.geo_targets` GeoTargets 94 | ON GeoTargets.parent_id = ShoppingProductStats.CountryCriteriaId 95 | WHERE 96 | -- If the run_date is not a backfill then use the latest available data. 97 | ( 98 | ( 99 | @run_date = CURRENT_DATE() 100 | AND ShoppingProductStats._DATA_DATE = ShoppingProductStats._LATEST_DATE) 101 | OR ( 102 | @run_date <> CURRENT_DATE() 103 | AND ShoppingProductStats._DATA_DATE = @run_date)) 104 | ) 105 | SELECT 106 | TargetedMerchantInfo.merchant_id, 107 | TargetedMerchantInfo.target_country, 108 | CriteriaTable.criteria 109 | FROM 110 | TargetedMerchantInfo 111 | INNER JOIN 112 | `{project_id}.{dataset}.Criteria_{external_customer_id}` AS CriteriaTable 113 | ON 114 | CriteriaTable.AdGroupId = TargetedMerchantInfo.ad_group_id 115 | -- If the run_date is not a backfill then use the latest available data. 116 | AND ( 117 | ( 118 | @run_date = CURRENT_DATE() 119 | AND CriteriaTable._DATA_DATE = CriteriaTable._LATEST_DATE) 120 | OR ( 121 | @run_date <> CURRENT_DATE() 122 | AND CriteriaTable._DATA_DATE = @run_date)) 123 | ); 124 | 125 | CREATE TEMP TABLE IdTargetedOffer 126 | AS ( 127 | SELECT DISTINCT 128 | CriteriaInfo.merchant_id, 129 | CriteriaInfo.target_country, 130 | ParsedCriteria.offer_id 131 | FROM 132 | `{project_id}.{dataset}.ParsedCriteria_{external_customer_id}` ParsedCriteria 133 | INNER JOIN CriteriaInfo 134 | ON ParsedCriteria.criteria = CriteriaInfo.criteria 135 | WHERE 136 | ParsedCriteria.offer_id IS NOT NULL 137 | ); 138 | 139 | 140 | CREATE TEMP TABLE IdTargeted 141 | AS ( 142 | SELECT 143 | ProductView.data_date, 144 | ProductView.product_id, 145 | ProductView.merchant_id, 146 | ProductView.target_country 147 | FROM 148 | `{project_id}.{dataset}.product_view_{merchant_id}` AS ProductView 149 | INNER JOIN IdTargetedOffer 150 | ON 151 | IdTargetedOffer.merchant_id = ProductView.merchant_id 152 | AND IdTargetedOffer.target_country = ProductView.target_country 153 | AND IdTargetedOffer.offer_id = ProductView.offer_id 154 | WHERE 155 | -- If the run_date is not a backfill then use the latest available data. 156 | ( 157 | ( 158 | @run_date = CURRENT_DATE() 159 | AND ProductView.data_date = ProductView.latest_date) 160 | OR ( 161 | @run_date <> CURRENT_DATE() 162 | AND ProductView.data_date = @run_date)) 163 | ); 164 | 165 | CREATE TEMP TABLE NonIdTargeted 166 | AS ( 167 | SELECT 168 | ProductView.data_date, 169 | ProductView.product_id, 170 | ProductView.merchant_id, 171 | ProductView.target_country 172 | FROM 173 | `{project_id}.{dataset}.product_view_{merchant_id}` AS ProductView 174 | INNER JOIN CriteriaInfo 175 | ON 176 | CriteriaInfo.merchant_id = ProductView.merchant_id 177 | AND CriteriaInfo.target_country = ProductView.target_country 178 | INNER JOIN `{project_id}.{dataset}.ParsedCriteria_{external_customer_id}` AS ParsedCriteria 179 | ON 180 | ParsedCriteria.criteria = CriteriaInfo.criteria 181 | AND ( 182 | ParsedCriteria.custom_label0 IS NULL 183 | OR TRIM(LOWER(ParsedCriteria.custom_label0)) = TRIM(LOWER(ProductView.custom_labels.label_0))) 184 | AND ( 185 | ParsedCriteria.custom_label1 IS NULL 186 | OR TRIM(LOWER(ParsedCriteria.custom_label1)) = TRIM(LOWER(ProductView.custom_labels.label_1))) 187 | AND ( 188 | ParsedCriteria.custom_label2 IS NULL 189 | OR TRIM(LOWER(ParsedCriteria.custom_label2)) = TRIM(LOWER(ProductView.custom_labels.label_2))) 190 | AND ( 191 | ParsedCriteria.custom_label3 IS NULL 192 | OR TRIM(LOWER(ParsedCriteria.custom_label3)) = TRIM(LOWER(ProductView.custom_labels.label_3))) 193 | AND ( 194 | ParsedCriteria.custom_label4 IS NULL 195 | OR TRIM(LOWER(ParsedCriteria.custom_label4)) = TRIM(LOWER(ProductView.custom_labels.label_4))) 196 | AND ( 197 | ParsedCriteria.product_type_l1 IS NULL 198 | OR TRIM(LOWER(ParsedCriteria.product_type_l1)) = TRIM(LOWER(ProductView.product_type_l1))) 199 | AND ( 200 | ParsedCriteria.product_type_l2 IS NULL 201 | OR TRIM(LOWER(ParsedCriteria.product_type_l2)) = TRIM(LOWER(ProductView.product_type_l2))) 202 | AND ( 203 | ParsedCriteria.product_type_l3 IS NULL 204 | OR TRIM(LOWER(ParsedCriteria.product_type_l3)) = TRIM(LOWER(ProductView.product_type_l3))) 205 | AND ( 206 | ParsedCriteria.product_type_l4 IS NULL 207 | OR TRIM(LOWER(ParsedCriteria.product_type_l4)) = TRIM(LOWER(ProductView.product_type_l4))) 208 | AND ( 209 | ParsedCriteria.product_type_l5 IS NULL 210 | OR TRIM(LOWER(ParsedCriteria.product_type_l5)) = TRIM(LOWER(ProductView.product_type_l5))) 211 | AND ( 212 | ParsedCriteria.google_product_category_l1 IS NULL 213 | OR TRIM(LOWER(ParsedCriteria.google_product_category_l1)) = TRIM(LOWER(ProductView.google_product_category_l1))) 214 | AND ( 215 | ParsedCriteria.google_product_category_l2 IS NULL 216 | OR TRIM(LOWER(ParsedCriteria.google_product_category_l2)) = TRIM(LOWER(ProductView.google_product_category_l2))) 217 | AND ( 218 | ParsedCriteria.google_product_category_l3 IS NULL 219 | OR TRIM(LOWER(ParsedCriteria.google_product_category_l3)) = TRIM(LOWER(ProductView.google_product_category_l3))) 220 | AND ( 221 | ParsedCriteria.google_product_category_l4 IS NULL 222 | OR TRIM(LOWER(ParsedCriteria.google_product_category_l4)) = TRIM(LOWER(ProductView.google_product_category_l4))) 223 | AND ( 224 | ParsedCriteria.google_product_category_l5 IS NULL 225 | OR TRIM(LOWER(ParsedCriteria.google_product_category_l5)) = TRIM(LOWER(ProductView.google_product_category_l5))) 226 | AND ( 227 | ParsedCriteria.brand IS NULL 228 | OR TRIM(LOWER(ParsedCriteria.brand)) = TRIM(LOWER(ProductView.brand))) 229 | AND ( 230 | ParsedCriteria.channel IS NULL 231 | OR TRIM(LOWER(ParsedCriteria.channel)) = TRIM(LOWER(ProductView.channel))) 232 | AND ( 233 | ParsedCriteria.channel_exclusivity IS NULL 234 | OR TRIM(LOWER(ParsedCriteria.channel_exclusivity)) = TRIM(LOWER(ProductView.channel_exclusivity))) 235 | AND ( 236 | ParsedCriteria.condition IS NULL 237 | OR TRIM(LOWER(ParsedCriteria.condition)) = TRIM(LOWER(ProductView.condition))) 238 | WHERE 239 | ParsedCriteria.offer_id IS NULL 240 | -- If the run_date is not a backfill then use the latest available data. 241 | AND ( 242 | ( 243 | @run_date = CURRENT_DATE() 244 | AND ProductView.data_date = ProductView.latest_date) 245 | OR ( 246 | @run_date <> CURRENT_DATE() 247 | AND ProductView.data_date = @run_date)) 248 | ); 249 | 250 | INSERT `{project_id}.{dataset}.TargetedProduct_{external_customer_id}` 251 | ( 252 | data_date, 253 | product_id, 254 | merchant_id, 255 | target_country 256 | ) 257 | SELECT 258 | data_date, 259 | product_id, 260 | merchant_id, 261 | target_country 262 | FROM 263 | IdTargeted 264 | UNION ALL 265 | SELECT 266 | data_date, 267 | product_id, 268 | merchant_id, 269 | target_country 270 | FROM 271 | NonIdTargeted; 272 | 273 | 274 | -- Update product detailed and product historical materialized tables. 275 | CALL `{project_id}.{dataset}.product_detailed_proc`(); 276 | CALL `{project_id}.{dataset}.product_historical_proc`(); 277 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MarkUp 2 | 3 | -------------------------------------------------------------------------------- 4 | 5 | > NEW UPDATE: This tool is deprecated, please go to 6 | > [Shopping Insider](https://github.com/google/shopping_insider) 7 | 8 | > Disclaimer: This is not an officially supported Google product. 9 | 10 | ### Please re-install MarkUp if you have installed it before Oct 2021. This is to support the breaking schema changes in BigQuery Data Transfer Service. 11 | 12 |
13 | 14 | MarkUp is a tool to enable retailers grow their business using 15 | [Google Merchant Center](https://www.google.com/retail/solutions/merchant-center/) 16 | by taking actionable data-driven decisions to optimize shopping feed health and 17 | ads performance. 18 | 19 | ## Contents 20 | 21 | * [1. Overview](#1-overview) 22 | * [1.1. Value Proposition](#1-1-value-proposition) 23 | * [1.2. Solution Architecture](#solution-architecture) 24 | * [1.3. Solution Options](#solution-options) 25 | * [2. Installation](#2-installation) 26 | * [2.1. Environment Setup](#environment-setup) 27 | * [2.2. Cloud Environment Setup](#cloud-environment-setup) 28 | * [2.3. Configure Data Sources](#configure-data-sources) 29 | * [2.4. Create Data-Studio Dashboard](#data-studio-dashboard) 30 | 31 | ## 1. Overview 32 | 33 | MarkUp solution is built for Shopping Ads customers to take actionable 34 | data-driven decisions to improve their feed health and shopping ads performance. 35 | 36 | ### 1.1. Value Proposition 37 | 38 | * Users can find opportunities and issues at each stage of the Shopping Funnel 39 | both overall and detailed data cuts. 40 | 41 | * Richer insights with data joins to provide overall and product level 42 | performance information pivoted towards custom attributes (product type, 43 | brand, etc) for deeper insights. 44 | 45 | * Near real-time dashboard to share data and insights across different teams 46 | and areas of the business seamlessly to address issues & optimize 47 | performance. 48 | 49 | ### 1.2 Solution Architecture 50 | 51 | The solution will export data from GMC and Google Ads to your Google Cloud 52 | Project on a daily basis and provide insights via Data Studio dashboard. 53 | 54 | 55 | 56 | ### 1.3 Solution Options 57 | 58 | At this time, there are two onboarding options available: 59 | 60 | #### Markup 61 | 62 | This is the base solution that exclusively uses the products and product issues 63 | tables available via the Merchant Center Transfer. This will allow you to set up 64 | the 65 | [Markup Dashboard Template](https://datastudio.google.com/reporting/806b9d3e-073a-43c2-bba0-53a0eb65302f/page/QK7kB/preview). 66 | 67 | #### Markup + Market Insights 68 | 69 | By enabling Market Insights during the installation process, this will 70 | additionally configure the Market Insights tables available via the Merchant 71 | Center Transfer, Price Benchmarks & Best Sellers, as well as three additional 72 | BigQuery views: 73 | 74 | * `market_insights_snapshot` - a snapshot view that joins the latest product 75 | feed data with available price benchmarks, best seller status, and Google 76 | Ads performance over the last 30 days. 77 | * `market_insights_historical` - a date partitioned view that joins the latest 78 | product feed data with historical price, price benchmarks, and Google Ads 79 | performance over the entire transfer data set. 80 | * `market_insights_best_sellers` - a view that joins the latest Best Sellers 81 | Top Products table with inventory status to show a ranked list of Top 82 | Products broken out by category. 83 | * Please note: this view is set to only show data for the `en-US` locale. 84 | For other locales, you will need to adjust the view's filtering after 85 | installation. 86 | 87 | With these additional views, you will be able to set up the 88 | [Merchant Market Insights Dashboard Template](https://datastudio.google.com/reporting/37411ae9-b5f3-4062-89ea-ea521c885c30/page/QK7kB/preview) 89 | in addition to the above Markup Dashboard template. 90 | 91 | ## 2. Installation 92 | 93 | ### 2.1. Google Cloud Platform(GCP) setup 94 | 95 | #### 2.1.1 Create a GCP project with billing account 96 | 97 | You may skip this step if you already have a GCP account with billing enabled. 98 | 99 | * How to [Create a GCP account](https://cloud.google.com/?authuser=1) (if you 100 | don't have one already!) 101 | 102 | * How to 103 | [Create and Manage Projects](https://cloud.google.com/resource-manager/docs/creating-managing-projects) 104 | 105 | * How to 106 | [Create, Modify, or Close Your Billing Account](https://cloud.google.com/billing/docs/how-to/manage-billing-account) 107 | 108 | #### 2.1.2 Check the permissions 109 | 110 | Make sure the user running the installation has following permissions. 111 | 112 | * [Standard Access For GMC](https://support.google.com/merchants/answer/1637190?hl=en) 113 | 114 | * [Standard Access For Google Ads](https://support.google.com/google-ads/answer/7476552?hl=en) 115 | 116 | * [Editor(or Owner) Role in Google Cloud Project](https://cloud.google.com/iam/docs/understanding-roles) 117 | 118 | ### 2.2. Cloud environment setup 119 | 120 | #### 2.2.1 Setup local environment. 121 | 122 | [Download and authenticate gcloud.](https://cloud.google.com/sdk/#Quick_Start) 123 | 124 | Alternatively, if the GMC account has less than 50 Million products, you could 125 | use [Cloud Shell](https://ssh.cloud.google.com/cloudshell?shellonly=true), which 126 | comes with gcloud already installed. The cloud shell disconnects after 1 hour 127 | and hence we recommend using local environment for large accounts since they 128 | could take more than 1 hour to finish the installation. 129 | 130 | #### 2.2.2 Check out source codes 131 | 132 | Open the [cloud shell](https://ssh.cloud.google.com/cloudshell?shellonly=true) 133 | or your terminal(if running locally) and clone the repository. 134 | 135 | ``` 136 | git clone https://github.com/google/shopping-markup 137 | ``` 138 | 139 | #### 2.2.3 Run install script 140 | 141 | Please provide following inputs when running the `setup.sh` script: 142 | 143 | * [GCP Project Id](https://cloud.google.com/resource-manager/docs/creating-managing-projects) 144 | 145 | * [Google Merchant Center Id](https://support.google.com/merchants/answer/188924?hl=en) 146 | 147 | * [Google Ads External Customer Id](https://support.google.com/google-ads/answer/1704344?hl=en) 148 | 149 | * Market Insights - whether to deploy Market Insights solution. Allowed 150 | Values - True or False 151 | 152 | ``` 153 | cd shopping-markup; 154 | sh setup.sh --project_id= --merchant_id= --ads_customer_id= --market_insights=False 155 | ``` 156 | 157 | When installing, the script will check whether the current user has the proper 158 | authorization to continue. It may ask you to open cloud authorization URL in the 159 | browser. Please follow the instructions as mentioned in the command line. 160 | 161 | #### Note - If the script fails when you run it for the first time, it might be due to delay in preparing Merchant account data. Please wait up to 1-3 days before re-running the script. 162 | 163 | During the installation process, the script will do following: 164 | 165 | * Enable Google Cloud Components and Google APIs 166 | 167 | * [BigQuery](https://console.cloud.google.com/bigquery) 168 | 169 | * [BigQuery Data Transfer](https://console.cloud.google.com/bigquery/transfers) 170 | 171 | * Create Google Merchant Center and Google Ads data transfers. 172 | 173 | * Create recurring data transfer jobs so that the latest data is imported in 174 | near real time. 175 | 176 | * Create following MarkUp specific SQL tables. 177 | 178 | * product_detailed_materialized - Latest snapshot view of products 179 | combined with performance metrics. Each offer is split into rows for 180 | each targeted country, rows are keyed by unique_product_id and 181 | target_country. 182 | * product_historical_materialized - Historic snapshot of performance 183 | metrics at a product category level. 184 | 185 | #### 2.2.4 [Optional] Update location and locales if different than US 186 | 187 | * If your data shouldn't be materialized in US, change the BigQuery dataset 188 | location in config.yaml 189 | 190 | * [Market Insights only] Adjust the locales in best_sellers_workflow.sql, by 191 | default set to "en-US" 192 | 193 | * You could make the changes before running the install script or after 194 | 195 | * If you're making the changes afterwards, re-run the install script 196 | * Check the scheduled queries in BigQuery and disable any older version of 197 | the Main Workflow 198 | 199 | ## 2.3. Configure Data Sources 200 | 201 | You will need to create or copy required Data Source(s) in Data Studio: 202 | 203 | ### For Markup: 204 | 205 | * Create `product_detailed_materialized` Data Source (linked to 206 | `markup.product_detailed_materialized`) 207 | * Create `product_historical_materialized` Data Source (linked to 208 | `markup.product_historical_materialized`) 209 | 210 | To create a data source: 211 | 212 | * Click on the 213 | [link](https://datastudio.google.com/c/u/0/datasources/create?connectorId=2) 214 | 215 | * Make sure you are using BigQuery connector. If not choose "`BigQuery`" from 216 | the list of available connectors. 217 | 218 | * Search your GCP Project Id under My Projects. 219 | 220 | * Under Dataset, click on "`markup`". 221 | 222 | * Under Table, choose the required table view. 223 | 224 | * Click `Connect` on the top right corner and wait for the data-source to be 225 | created 226 | 227 | ### For Merchant Market Insights: 228 | 229 | * Copy 230 | [Market Insights Snapshot (TEMPLATE)](https://datastudio.google.com/datasources/9dbdb290-0ea6-4d81-90df-5e4f9ec6f504) 231 | and connect it to `markup.market_insights_snapshot_view` 232 | * Copy 233 | [Market Insights Historical (TEMPLATE)](https://datastudio.google.com/datasources/0397b27f-96b8-44cd-afca-645f64882a26) 234 | and connect it to `markup.market_insights_historical_view` 235 | * Copy 236 | [Market Insights Best Sellers (TEMPLATE)](https://datastudio.google.com/datasources/b2f5bafe-01e2-4e30-bfb3-022a6c2f3ad6) 237 | and connect it to `markup.market_insights_best_sellers_materialized` 238 | 239 | To copy a data source: 240 | 241 | * Click on the data source template link above. 242 | 243 | * Click on the icon in the top right corner 244 | next to "Create Report". 245 | 246 | * Click "Copy Data Source" on the "Copy Data Source" pop-up. 247 | 248 | * Select your Project, Dataset, and Table to be connected, then press 249 | "Reconnect" in the top right corner. 250 | 251 | * Click "Apply" on the "Apply Connection Changes" pop-up 252 | 253 | * Repeat this process for all three data source templates above. 254 | 255 | ## 2.4. Create Data-Studio Dashboard(s) 256 | 257 | ### For Markup: 258 | 259 | * Click on the following link to the Data Studio template: 260 | [link](https://datastudio.google.com/reporting/22c41768-140b-4572-9bf7-4e7e1891a8dd/preview) 261 | 262 | * Click "`Use Template`" 263 | 264 | * Choose the new "`product_detailed_materialized`" and 265 | "`product_historical_materialized`" data-sources created in the previous 266 | step 267 | 268 | * Click "`Copy Report`" 269 | 270 | ### For Merchant Market Insights: 271 | 272 | * Click on the following link to the Data Studio template: 273 | [link](https://datastudio.google.com/reporting/806b9d3e-073a-43c2-bba0-53a0eb65302f/page/QK7kB/preview) 274 | 275 | * Click "`Use Template`" 276 | 277 | * Choose the three data-sources copied in the previous step 278 | 279 | * Click "`Copy Report`" 280 | 281 | #### Note - The performance metrics in the dashboard might take 12-24 hours to appear. 282 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /plugins/cloud_utils/cloud_auth.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Lint as: python3 17 | # Copyright 2020 Google LLC 18 | # 19 | # Licensed under the Apache License, Version 2.0 (the "License"); 20 | # you may not use this file except in compliance with the License. 21 | # You may obtain a copy of the License at 22 | # 23 | # http://www.apache.org/licenses/LICENSE-2.0 24 | # 25 | # Unless required by applicable law or agreed to in writing, software 26 | # distributed under the License is distributed on an "AS IS" BASIS, 27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 28 | # See the License for the specific language governing permissions and 29 | # limitations under the License. 30 | 31 | """Perform auth related operations in GCP.""" 32 | 33 | import base64 34 | import json 35 | import os 36 | import textwrap 37 | from typing import Any, Dict 38 | import logging 39 | 40 | import apiclient 41 | from googleapiclient import discovery 42 | from googleapiclient import errors 43 | import google.auth 44 | from google.auth import credentials 45 | from google.auth.transport import requests 46 | from google.oauth2 import service_account 47 | from plugins.cloud_utils import utils 48 | 49 | # HTTP status code 50 | _NOT_FOUND_ERROR_CODE = 404 51 | 52 | # Scope to manage service accounts 53 | _SCOPE = 'https://www.googleapis.com/auth/cloud-platform' 54 | 55 | 56 | class Error(Exception): 57 | """A generic error thrown for any exceptions in cloud_auth module.""" 58 | pass 59 | 60 | 61 | def get_credentials( 62 | service_account_key_file: str = None) -> credentials.Credentials: 63 | """Get credentials to authenticate while calling GCP APIs. 64 | 65 | Args: 66 | service_account_key_file: Optional. File containing service account key. If 67 | not passed the default credential will be used. 68 | 69 | Returns: 70 | credential: Credential object to authenticate while calling GCP APIs. 71 | 72 | Raises: 73 | FileNotFoundError: If the provided file is not found. 74 | Error: If no default credentials are found and service account key file is 75 | not given. 76 | """ 77 | if service_account_key_file is None: 78 | try: 79 | default_credentials, _ = google.auth.default() 80 | return default_credentials 81 | except google.auth.exceptions.DefaultCredentialsError as error: 82 | error_message = textwrap.dedent(""" 83 | No default credentials found. Please run 84 | `gcloud auth application-default login` before continuing. 85 | Error: {}""".format(error)) 86 | raise Error(error_message) 87 | if not os.path.isfile(service_account_key_file): 88 | raise FileNotFoundError( 89 | 'The service account private key file "{}" not found.'.format( 90 | service_account_key_file)) 91 | return service_account.Credentials.from_service_account_file( 92 | service_account_key_file, scopes=[_SCOPE]) 93 | 94 | 95 | def build_service_client( 96 | service_name: str, 97 | service_account_key_file: str = None) -> apiclient.discovery.Resource: 98 | """Construct a Resource for interacting with GCP service APIs. 99 | 100 | Args: 101 | service_name: Name of the service for which the client is created. 102 | service_account_key_file: Optional. File containing service account key. If 103 | not passed the default credential will be used. 104 | 105 | Returns: 106 | client: A client with methods for interacting with the service APIs. 107 | """ 108 | credentials_info = get_credentials(service_account_key_file) 109 | return discovery.build( 110 | service_name, 'v1', credentials=credentials_info, cache_discovery=False) 111 | 112 | 113 | def _get_resource_manager_client() -> apiclient.discovery.Resource: 114 | """Creates a new resource manager client to manage GCP projects. 115 | 116 | Returns: 117 | client: The newly created resource manager client. 118 | """ 119 | return build_service_client('cloudresourcemanager') 120 | 121 | 122 | def _get_iam_client() -> apiclient.discovery.Resource: 123 | """Creates a new iam client. 124 | 125 | Returns: 126 | client: The newly created client. 127 | """ 128 | return build_service_client('iam') 129 | 130 | 131 | def _get_service_account_client() -> apiclient.discovery.Resource: 132 | """Creates a new client to manage service accounts. 133 | 134 | Returns: 135 | client: The newly created service account client. 136 | """ 137 | return _get_iam_client().projects().serviceAccounts() 138 | 139 | 140 | def _get_service_account_email(project_id: str, 141 | service_account_name: str) -> str: 142 | """Retrieves the service account email id. 143 | 144 | Args: 145 | project_id: GCP project id. 146 | service_account_name: The service account name. 147 | 148 | Returns: 149 | service_account_email: The service account email id. 150 | """ 151 | service_account_email = '{}@{}.iam.gserviceaccount.com'.format( 152 | service_account_name, project_id) 153 | return service_account_email 154 | 155 | 156 | def _get_service_account_name(project_id: str, 157 | service_account_name: str) -> str: 158 | """Retrieves fully qualified service account name. 159 | 160 | Args: 161 | project_id: GCP project id. 162 | service_account_name: The service account name. 163 | 164 | Returns: 165 | service_account_name: The fully qualified service account name. 166 | """ 167 | service_account_email = _get_service_account_email(project_id, 168 | service_account_name) 169 | service_account_name = 'projects/{}/serviceAccounts/{}'.format( 170 | project_id, service_account_email) 171 | return service_account_name 172 | 173 | 174 | def create_service_account(project_id: str, service_account_name: str, 175 | role_name: str, file_name: str) -> Dict[str, Any]: 176 | """Create a new service account. 177 | 178 | Args: 179 | project_id: GCP project id. 180 | service_account_name: The service account name. 181 | role_name: The role to be assigned to the service account. 182 | file_name: The file where service account key will be stored. 183 | 184 | Returns: 185 | service_account: The newly created service account. 186 | 187 | Raises: 188 | ValueError: If the service_account_name is empty. 189 | ValueError: If the file_name is empty. 190 | """ 191 | if not service_account_name: 192 | raise ValueError('Service account name cannot be empty.') 193 | if not file_name: 194 | raise ValueError('The file name cannot be empty.') 195 | service_account_details = get_service_account(project_id, 196 | service_account_name) 197 | if service_account_details: 198 | return service_account_details 199 | logging.info('Creating "%s" service account in "%s" project', 200 | service_account_name, project_id) 201 | request = _get_service_account_client().create( 202 | name='projects/' + project_id, 203 | body={ 204 | 'accountId': service_account_name, 205 | 'serviceAccount': { 206 | 'displayName': service_account_name.upper() 207 | }, 208 | }) 209 | 210 | service_account_details = utils.execute_request(request) 211 | set_service_account_role(project_id, service_account_name, role_name) 212 | create_service_account_key(project_id, service_account_name, file_name) 213 | return service_account_details 214 | 215 | 216 | def get_service_account(project_id: str, 217 | service_account_name: str) -> Dict[str, Any]: 218 | """Find the service account with given name. 219 | 220 | Args: 221 | project_id: GCP project id. 222 | service_account_name: The service account name. 223 | 224 | Returns: 225 | service_account: If the service account is found in the cloud project. 226 | None: If no service account is found. 227 | """ 228 | try: 229 | logging.info('Retrieving "%s" service account in "%s" project', 230 | service_account_name, project_id) 231 | name = 'projects/{}/serviceAccounts/{}@{}.iam.gserviceaccount.com'.format( 232 | project_id, service_account_name, project_id) 233 | service_account_details = _get_service_account_client().get( 234 | name=name).execute() 235 | return service_account_details 236 | except errors.HttpError as error: 237 | if error.resp.status == _NOT_FOUND_ERROR_CODE: 238 | return None 239 | logging.exception('Error occurred while retrieving service account: "%s".', 240 | error) 241 | raise Error('Error occurred while retrieving service account.') 242 | 243 | 244 | def create_service_account_key(project_id: str, service_account_name: str, 245 | file_name: str) -> None: 246 | """Creates key for service account and writes the private key to the file. 247 | 248 | Args: 249 | project_id: GCP project id. 250 | service_account_name: The service account name. 251 | file_name: The file to which the private key will be written. 252 | """ 253 | with open(file_name, 'w+') as file_object: 254 | _create_service_account_key(project_id, service_account_name, file_object) 255 | 256 | 257 | def _create_service_account_key(project_id: str, service_account_name: str, 258 | file_object: Any) -> None: 259 | """Creates key for service account and writes private key to the file object. 260 | 261 | Args: 262 | project_id: GCP project id. 263 | service_account_name: The service account name. 264 | file_object: The file object to which the private key will be written. 265 | """ 266 | name = 'projects/{}/serviceAccounts/{}@{}.iam.gserviceaccount.com'.format( 267 | project_id, service_account_name, project_id) 268 | logging.info( 269 | 'Creating service account key for "%s" service account in "%s" project', 270 | service_account_name, project_id) 271 | request = _get_service_account_client().keys().create(name=name, body={}) 272 | service_account_key = utils.execute_request(request) 273 | private_key = base64.b64decode(service_account_key['privateKeyData']).decode() 274 | private_key_json = json.loads(private_key) 275 | json.dump(private_key_json, file_object) 276 | 277 | 278 | def set_service_account_role(project_id, service_account_name, 279 | role_name) -> None: 280 | """Adds role to a given service account. 281 | 282 | The roles grant service accounts appropriate permissions to use specific 283 | resource. The role_name can be either primitive, predefined or custom roles. 284 | Please see https://cloud.google.com/iam/docs/understanding-roles for list of 285 | allowed primitive and predefined roles. 286 | 287 | Args: 288 | project_id: GCP project id. 289 | service_account_name: The service account name. 290 | role_name: The role to be added to the service account. The role_name 291 | doesn't need "roles/" prefix to be added. Allowed values - 292 | https://cloud.google.com/iam/docs/understanding-roles. e.g - editor 293 | """ 294 | logging.info('Adding "%s" role to "%s" service account in "%s" project', 295 | role_name, service_account_name, project_id) 296 | # Read existing binding. 297 | service_account_email = _get_service_account_email(project_id, 298 | service_account_name) 299 | member = 'serviceAccount:{}'.format(service_account_email) 300 | binding = {'role': f'roles/{role_name}', 'members': [member]} 301 | request = _get_resource_manager_client().projects().getIamPolicy( 302 | resource=project_id) 303 | policy = utils.execute_request(request) 304 | 305 | # Modify binding. 306 | policy['bindings'].append(binding) 307 | 308 | # Write binding. 309 | set_iam_policy_request_body = {'policy': policy} 310 | request = _get_resource_manager_client().projects().setIamPolicy( 311 | resource=project_id, body=set_iam_policy_request_body) 312 | utils.execute_request(request) 313 | 314 | 315 | def get_auth_session( 316 | service_account_key_file: str) -> requests.AuthorizedSession: 317 | """Creates AuthorizedSession for given service account. 318 | 319 | Args: 320 | service_account_key_file: File which contains service account private key. 321 | 322 | Returns: 323 | authorized_session: AuthorizedSession for service account. 324 | 325 | Raises: 326 | FileNotFoundError: If the provided file is not found. 327 | """ 328 | credentials_info = get_credentials(service_account_key_file) 329 | return requests.AuthorizedSession(credentials_info) 330 | -------------------------------------------------------------------------------- /plugins/cloud_utils/cloud_storage.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Lint as: python3 17 | # Copyright 2020 Google LLC 18 | # 19 | # Licensed under the Apache License, Version 2.0 (the "License"); 20 | # you may not use this file except in compliance with the License. 21 | # You may obtain a copy of the License at 22 | # 23 | # http://www.apache.org/licenses/LICENSE-2.0 24 | # 25 | # Unless required by applicable law or agreed to in writing, software 26 | # distributed under the License is distributed on an "AS IS" BASIS, 27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 28 | # See the License for the specific language governing permissions and 29 | # limitations under the License. 30 | 31 | """Manage operations on Cloud Storage.""" 32 | 33 | import os 34 | from typing import Optional, Tuple 35 | from urllib import parse 36 | 37 | import logging 38 | 39 | from google.api_core import exceptions 40 | from google.api_core import retry 41 | from google.cloud import storage 42 | from plugins.cloud_utils import cloud_auth 43 | 44 | 45 | class Error(Exception): 46 | """A generic error thrown for exceptions in cloud_storage module.""" 47 | pass 48 | 49 | 50 | class CloudStorageUtils(object): 51 | """CloudStorageUtils provides methods to manage Cloud Storage. 52 | 53 | Typical usage example: 54 | >>> cloud_storage = CloudStorageUtils('service_account_key_file.json') 55 | >>> cloud_storage.upload_file('/tmp/file.txt', 'gs://bucket_name') 56 | """ 57 | 58 | def __init__(self, 59 | project_id: str, 60 | service_account_key_file: Optional[str] = None) -> None: 61 | """Initialize new instance of CloudStorageUtils. 62 | 63 | Args: 64 | project_id: GCP project id. 65 | service_account_key_file: File containing service account key. If not 66 | passed the default credential will be used. There are following ways to 67 | create service accounts - 68 | 1) Use `build_service_client` method from `cloud_auth` module. 69 | 2) Use `gcloud` command line utility as documented here - 70 | https://cloud.google.com/iam/docs/creating-managing-service-account-keys 71 | """ 72 | credentials = cloud_auth.get_credentials(service_account_key_file) 73 | self.client = storage.Client(project=project_id, credentials=credentials) 74 | 75 | def _parse_blob_url(self, url: str) -> Tuple[str, str]: 76 | """"Parses Cloud Storage URL and returns a tuple of bucket name and path. 77 | 78 | Args: 79 | url: The full URL to blob, in the form of 'gs://bucket_name/path/to/blob'. 80 | 81 | Returns: 82 | Tuple: The bucket name and path tuple. 83 | 84 | Raises: 85 | Error: If the upload was not successful. 86 | """ 87 | result = parse.urlparse(url) 88 | # result.path will be '/path/to/blob', we need to strip the leading '/'. 89 | bucket_name, path = result.hostname, result.path[1:] 90 | if not bucket_name: 91 | error_message = (f'Invalid URL - "{url}". The URL should be in the form ' 92 | f'of "gs://bucket_name/path/to/blob"') 93 | logging.exception(error_message) 94 | raise Error(error_message) 95 | return bucket_name, path 96 | 97 | @retry.Retry() 98 | def _get_or_create_bucket(self, bucket_name: str) -> storage.bucket.Bucket: 99 | """Retrieves or creates Cloud Storage bucket given bucket name. 100 | 101 | If the bucket doesn't exist in the Cloud Storage, it will be created and the 102 | newly created bucket will be returned. The operation will be retried for 103 | transient errors e.g. - google.api_core.exceptions.InternalServerError. 104 | 105 | Args: 106 | bucket_name: Name of the bucket to be retieved. 107 | 108 | Returns: 109 | google.cloud.storage.bucket.Bucket: The bucket object. 110 | """ 111 | try: 112 | return self.client.get_bucket(bucket_name) 113 | except exceptions.NotFound: 114 | logging.info(f'Cloud Storage bucket "{bucket_name}" not found. ' 115 | 'Hence creating the bucket.') 116 | bucket = self.client.create_bucket(bucket_name) 117 | logging.info( 118 | f'Cloud Storage bucket "{bucket_name}" created successfully.') 119 | return bucket 120 | 121 | def upload_file_to_url(self, source_file_path: str, 122 | destination_file_url: str): 123 | """Uploads file from source file system to Cloud Storage. 124 | 125 | This is a convenience method that parses bucket name and path, and calls 126 | upload_file, so that the client can pass the full path as a whole. 127 | 128 | Args: 129 | source_file_path: Path to the file to be uploaded. e.g - /tmp/file.txt 130 | destination_file_url: The full URL to destination file, in the form of 131 | 'gs://bucket_name/path/to/file'. 132 | """ 133 | bucket_name, path = self._parse_blob_url(destination_file_url) 134 | self.upload_file(source_file_path, bucket_name, path) 135 | 136 | def upload_file(self, source_file_path: str, bucket_name: str, 137 | destination_file_path: str) -> None: 138 | """Uploads file from source file system to Cloud Storage. 139 | 140 | If the bucket doesn't exist in the Cloud Storage, it will be created. 141 | 142 | Args: 143 | source_file_path: Path to the file to be uploaded. e.g - /tmp/file.txt 144 | bucket_name: Cloud Storage bucket to which the file should be uploaded. If 145 | the Cloud Storage URL is 'gs://bucket1/file1.txt', then the bucket_name 146 | would be 'bucket1'. 147 | destination_file_path: Path of the destination blob/object within the 148 | Cloud Storage bucket. If the Cloud Storage URL is 149 | 'gs://bucket1/dir1/file1.txt', then the destination_file_path would be 150 | 'dir1/file1.txt'. 151 | Raises: 152 | FileNotFoundError: If the provided file is not found. 153 | Error: If the upload was not successful. 154 | """ 155 | if not os.path.isfile(source_file_path): 156 | logging.error(f'The file "{source_file_path}" could not be found.') 157 | raise FileNotFoundError( 158 | f'The file "{source_file_path}" could not be found.') 159 | try: 160 | logging.info('Uploading "%s" file to "gs://%s/%s"', source_file_path, 161 | bucket_name, destination_file_path) 162 | bucket = self._get_or_create_bucket(bucket_name) 163 | self._upload_file(source_file_path, bucket, destination_file_path) 164 | logging.info('Uploaded "%s" file to "gs://%s/%s"', source_file_path, 165 | bucket_name, destination_file_path) 166 | except exceptions.RetryError: 167 | error_message = (f'Error when uploading file "{source_file_path}" to ' 168 | f'"gs://{bucket_name}/{destination_file_path}"') 169 | logging.exception(error_message) 170 | raise Error(error_message) 171 | 172 | @retry.Retry() 173 | def _upload_file(self, source_file_path: str, bucket: storage.bucket.Bucket, 174 | destination_file_path: str) -> None: 175 | """Uploads file to Cloud Storage with Retry logic. 176 | 177 | The Retry decorator will retry transient API errors. Following errors are 178 | some examples of transient errors: 179 | 1. google.api_core.exceptions.InternalServerError 180 | 2. google.api_core.exceptions.TooManyRequests 181 | 3. google.api_core.exceptions.ServiceUnavailable 182 | 183 | Args: 184 | source_file_path: Path to the file to be uploaded. e.g - /tmp/file.txt 185 | bucket: Cloud Storage bucket to which the file should be uploaded. 186 | destination_file_path: Path of the destination blob/object within the 187 | Cloud Storage bucket. If the Cloud Storage URL is 188 | 'gs://bucket1/dir1/file1.txt', then the destination_file_path would be 189 | 'dir1/file1.txt'. 190 | """ 191 | destination_blob = bucket.blob(destination_file_path) 192 | destination_blob.upload_from_filename(source_file_path) 193 | 194 | def upload_directory_to_url(self, source_directory_path: str, 195 | destination_dir_url: str): 196 | """Uploads an entire directory to Cloud Storage. 197 | 198 | This is a convenience method that parses bucket name and path, and calls 199 | upload_directory, so that the client can pass the full path as a whole. 200 | 201 | Args: 202 | source_directory_path: Path to the directory to be uploaded: e.g - 203 | /tmp/dir1/dir2 204 | destination_dir_url: The full URL to destination directory, in the form of 205 | 'gs://bucket_name/path/to/dir'. 206 | """ 207 | bucket_name, path = self._parse_blob_url(destination_dir_url) 208 | self.upload_directory(source_directory_path, bucket_name, path) 209 | 210 | def upload_directory(self, source_directory_path: str, bucket_name: str, 211 | destination_dir_path: str) -> None: 212 | """Uploads an entire directory to Cloud Storage. 213 | 214 | All the files in the source directory are identified recursively and 215 | uploaded to Cloud Storage bucket. The symlinks in the source directory is 216 | ignored to avoid infinite recursion. If the bucket doesn't exist in the 217 | Cloud Storage it will be created. 218 | 219 | Args: 220 | source_directory_path: Path to the directory to be uploaded: e.g - 221 | /tmp/dir1/dir2 222 | bucket_name: Cloud Storage bucket to which the directory should be 223 | uploaded. If the Cloud Storage URL is 'gs://bucket1/dir1/dir2', then the 224 | bucket_name would be 'bucket1'. 225 | destination_dir_path: Path of the destination blob/object within the 226 | Cloud Storage bucket. If the Cloud Storage URL is 227 | 'gs://bucket/dir1/dir2', then the destination_dir_path would be 228 | 'dir1/dir2'. 229 | 230 | Raises: 231 | FileNotFoundError: If the provided directory is not found. 232 | """ 233 | if not os.path.isdir(source_directory_path): 234 | logging.error( 235 | f'The directory "{source_directory_path}" could not be found.') 236 | raise FileNotFoundError( 237 | f'The directory "{source_directory_path}" could not be found.') 238 | logging.info('Uploading "%s" directory to "gs://%s/%s"', 239 | source_directory_path, bucket_name, destination_dir_path) 240 | files_to_upload = [] 241 | for (root, _, files) in os.walk(source_directory_path): 242 | if not files: 243 | continue 244 | for file in files: 245 | full_path = os.path.join(root, file) 246 | files_to_upload.append(full_path) 247 | bucket = self._get_or_create_bucket(bucket_name) 248 | for file in files_to_upload: 249 | # Construct destination path by replacing source directory path: 250 | # If the source directory is `/tmp/dir1` and destination_dir_path is 251 | # `obj1/obj2` then file `/tmp/dir1/dir2/file.txt` will have a destination 252 | # file path `obj1/obj2/dir2/file.txt` 253 | destination_file_path = file.replace(source_directory_path, 254 | destination_dir_path) 255 | self._upload_file(file, bucket, destination_file_path) 256 | logging.info('Uploaded "%s" directory to "gs://%s/%s"', 257 | source_directory_path, bucket_name, destination_dir_path) 258 | 259 | def write_to_path(self, file_content: str, destination_file_path: str): 260 | """Writes file content to Cloud Storage file. 261 | 262 | This is a convenience method that parses bucket name and path, and calls 263 | write_to_file, so that the client can pass the full path as a whole. 264 | 265 | Args: 266 | file_content: The content to be written to the file. 267 | destination_file_path: The full path to destination file, in the form of 268 | 'gs://bucket_name/path/to/file'. 269 | """ 270 | bucket_name, path = self._parse_blob_url(destination_file_path) 271 | self.write_to_file(file_content, bucket_name, path) 272 | 273 | @retry.Retry() 274 | def write_to_file(self, file_content: str, bucket_name: str, 275 | destination_file_path: str): 276 | """Writes file content to Cloud Storage file. 277 | 278 | If the bucket doesn't exist in the Cloud Storage, it will be created. If the 279 | file already exists in the Cloud Storage, the content of the file will be 280 | overwritten. The operation will be retried for transient errors e.g. - 281 | google.api_core.exceptions.InternalServerError. 282 | 283 | Args: 284 | file_content: The content to be written to the file. 285 | bucket_name: Cloud Storage bucket to which the content should be written. 286 | If the Cloud Storage URL is 'gs://bucket1/file1.txt', then the 287 | bucket_name would be 'bucket1'. 288 | destination_file_path: Path of the destination blob/object within the 289 | Cloud Storage bucket. If the Cloud Storage URL is 290 | 'gs://bucket1/dir1/file1.txt', then the destination_file_path would be 291 | 'dir1/file1.txt'. 292 | """ 293 | logging.info('Writing data to "gs://%s/%s"', bucket_name, 294 | destination_file_path) 295 | bucket = self._get_or_create_bucket(bucket_name) 296 | destination_blob = bucket.blob(destination_file_path) 297 | destination_blob.upload_from_string(file_content) 298 | logging.info('Successfully wrote data to "gs://%s/%s"', bucket_name, 299 | destination_file_path) 300 | -------------------------------------------------------------------------------- /plugins/cloud_utils/cloud_composer.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Lint as: python3 17 | # Copyright 2020 Google LLC 18 | # 19 | # Licensed under the Apache License, Version 2.0 (the "License"); 20 | # you may not use this file except in compliance with the License. 21 | # You may obtain a copy of the License at 22 | # 23 | # http://www.apache.org/licenses/LICENSE-2.0 24 | # 25 | # Unless required by applicable law or agreed to in writing, software 26 | # distributed under the License is distributed on an "AS IS" BASIS, 27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 28 | # See the License for the specific language governing permissions and 29 | # limitations under the License. 30 | 31 | """Manage operations on Cloud Composer.""" 32 | 33 | from typing import Any, Dict 34 | import logging 35 | 36 | from googleapiclient import errors 37 | from plugins.cloud_utils import cloud_auth 38 | from plugins.cloud_utils import utils 39 | 40 | # Default Cloud Composer configuration. 41 | _LOCATION = 'us-central1' 42 | _DISC_SIZE = 20 # In GBs. 43 | _MACHINE_TYPE = 'n1-standard-1' 44 | _PYTHON_VERSION = '3' 45 | _HTTP_CONFLICT_CODE = 409 46 | 47 | 48 | class Error(Exception): 49 | """A generic error thrown for any exception in cloud_composer module.""" 50 | pass 51 | 52 | 53 | class CloudComposerUtils(object): 54 | """CloudComposerUtils class provides methods to manage Composer environment. 55 | 56 | This class manages Cloud Composer service within a single GCP 57 | project. 58 | 59 | Typical usage example: 60 | >>> composer = CloudComposerUtils('project_id', 61 | 'us-west1', 62 | 'service_account_key_file.json') 63 | >>> composer.create_environment() 64 | """ 65 | 66 | _CONFIG_KEY = 'config' # Key to get Composer configurations. 67 | _DAG_FOLDER_KEY = 'dagGcsPrefix' # Key to get Cloud Storage DAG folder URL. 68 | 69 | def __init__(self, 70 | project_id: str, 71 | location: str = _LOCATION, 72 | service_account_key_file: str = None) -> None: 73 | """Initialise new instance of CloudComposerUtils. 74 | 75 | Args: 76 | project_id: GCP project id. 77 | location: Optional. Region under which the Composer environment needs to 78 | be managed. It defaults to 'us-central1'. Allowed values - 79 | https://cloud.google.com/compute/docs/regions-zones/. 80 | service_account_key_file: Optional. File containing service account key. 81 | If not passed the default credential will be used. There are following 82 | ways to create service accounts: 83 | 1. Use `build_service_client` method from `cloud_auth` module. 84 | 2. Use `gcloud` command line utility as documented here - 85 | https://cloud.google.com/iam/docs/creating-managing-service-account-keys 86 | """ 87 | self.client = cloud_auth.build_service_client('composer', 88 | service_account_key_file) 89 | self.project_id = project_id 90 | self.location = location 91 | 92 | def _get_fully_qualified_env_name(self, environment_name: str) -> str: 93 | """Constructs fully qualified environment name. 94 | 95 | Args: 96 | environment_name: Name of Composer environment. 97 | 98 | Returns: 99 | fully_qualified_name: Fully qualified environment name in the following 100 | format - 101 | projects/project_id/location/location_name/environment/environment_name 102 | """ 103 | fully_qualified_name = (f'projects/{self.project_id}/locations/' 104 | f'{self.location}/environments/{environment_name}') 105 | return fully_qualified_name 106 | 107 | def create_environment(self, 108 | environment_name: str, 109 | zone: str = 'b', 110 | disk_size_gb: int = _DISC_SIZE, 111 | machine_type: str = _MACHINE_TYPE, 112 | image_version: str = None, 113 | python_version: str = _PYTHON_VERSION) -> None: 114 | """Creates new Cloud Composer environment. 115 | 116 | Args: 117 | environment_name: Name of Composer environment. 118 | zone: Optional. Zone where the Composer environment will be created. It 119 | defaults to 'b' since zone 'b' is present in all the regions. 120 | Allowed values - https://cloud.google.com/compute/docs/regions-zones/. 121 | disk_size_gb: Optional. The disk size in GB used for node VMs. It defaults 122 | to 20GB since it is the minimum size. 123 | machine_type: Optional. The parameter will specify what type of VM to 124 | create.It defaults to 'n1-standard-1'. Allowed values - 125 | https://cloud.google.com/compute/docs/machine-types. 126 | image_version: The version of Composer and Airflow running in the 127 | environment. If this is not provided, a default version is used as per 128 | https://cloud.google.com/composer/docs/concepts/versioning/composer-versions. 129 | python_version: The version of Python used to run the Apache Airflow. It 130 | defaults to '3'. 131 | 132 | Raises: 133 | Error: If the provided disk size is less than 20GB. 134 | """ 135 | if disk_size_gb < 20: 136 | raise Error(('The minimum disk size needs to be 20GB to create Composer ' 137 | 'environment')) 138 | fully_qualified_name = self._get_fully_qualified_env_name(environment_name) 139 | parent = f'projects/{self.project_id}/locations/{self.location}' 140 | composer_zone = f'{self.location}-{zone}' 141 | location = f'projects/{self.project_id}/zones/{composer_zone}' 142 | machine_type = (f'projects/{self.project_id}/zones/{composer_zone}/' 143 | f'machineTypes/{machine_type}') 144 | software_config = { 145 | 'pythonVersion': python_version 146 | } 147 | if image_version: 148 | software_config['imageVersion'] = image_version 149 | request_body = { 150 | 'name': fully_qualified_name, 151 | 'config': { 152 | 'nodeConfig': { 153 | 'location': location, 154 | 'machineType': machine_type, 155 | 'diskSizeGb': disk_size_gb 156 | }, 157 | 'softwareConfig': software_config 158 | } 159 | } 160 | logging.info('Creating "%s" Composer environment for "%s" project.', 161 | fully_qualified_name, self.project_id) 162 | try: 163 | request = self.client.projects().locations().environments().create( 164 | parent=parent, body=request_body) 165 | operation = utils.execute_request(request) 166 | operation_client = self.client.projects().locations().operations() 167 | utils.wait_for_operation(operation_client, operation) 168 | except errors.HttpError as error: 169 | if error.__dict__['resp'].status == _HTTP_CONFLICT_CODE: 170 | logging.info('The Composer environment %s already exists.', 171 | fully_qualified_name) 172 | return 173 | logging.exception('Error occurred while creating Composer environment.') 174 | raise Error('Error occurred while creating Composer environment.') 175 | 176 | def install_python_packages(self, environment_name: str, 177 | packages: Dict[str, str]) -> None: 178 | """Install Python packages on the existing Composer environment. 179 | 180 | Args: 181 | environment_name: Name of the existing Composer environment. The fully 182 | qualified environment name will be constructed as follows - 183 | 'projects/{project_id}/locations/{location}/environments/ 184 | {environment_name}'. 185 | packages: Dictionary of Python packages to be installed in the Composer 186 | environment. Each entry in the dictionary has dependency name as the key 187 | and version as the value. e.g - 188 | {'tensorflow' : "<=1.0.1", 'apache-beam': '==2.12.0', 'flask': '>1.0.3'} 189 | 190 | Raises: 191 | Error: If the list of packages is empty. 192 | """ 193 | if not packages: 194 | raise Error('Package list cannot be empty.') 195 | fully_qualified_name = self._get_fully_qualified_env_name(environment_name) 196 | logging.info('Installing "%s" packages in "%s" Composer environment.', 197 | packages, fully_qualified_name) 198 | try: 199 | request_body = { 200 | 'name': fully_qualified_name, 201 | 'config': { 202 | 'softwareConfig': { 203 | 'pypiPackages': packages 204 | } 205 | } 206 | } 207 | request = ( 208 | self.client.projects().locations().environments().patch( 209 | name=fully_qualified_name, 210 | body=request_body, 211 | updateMask='config.softwareConfig.pypiPackages')) 212 | operation = utils.execute_request(request) 213 | operation_client = self.client.projects().locations().operations() 214 | utils.wait_for_operation(operation_client, operation) 215 | logging.info('Installed "%s" packages in "%s" Composer environment.', 216 | packages, fully_qualified_name) 217 | except errors.HttpError: 218 | logging.exception('Error occurred while installing packages.') 219 | raise Error('Error occurred while installing python packages.') 220 | 221 | def set_environment_variables(self, environment_name: str, 222 | environment_variables: Dict[str, str]) -> None: 223 | """Sets environment variables on the existing Composer environment. 224 | 225 | Args: 226 | environment_name: Name of the existing Composer environment. The fully 227 | qualified environment name will be constructed as follows - 228 | 'projects/{project_id}/locations/{location}/environments/ 229 | {environment_name}'. 230 | environment_variables: Environment variables to be added to the Composer 231 | environment. 232 | 233 | Raises: 234 | Error: If the request was not processed successfully. 235 | """ 236 | fully_qualified_name = self._get_fully_qualified_env_name(environment_name) 237 | logging.info( 238 | 'Setting "%s" environment variables in "%s" Composer ' 239 | 'environment.', environment_variables, fully_qualified_name) 240 | try: 241 | request_body = { 242 | 'name': fully_qualified_name, 243 | 'config': { 244 | 'softwareConfig': { 245 | 'envVariables': environment_variables 246 | } 247 | } 248 | } 249 | request = ( 250 | self.client.projects().locations().environments().patch( 251 | name=fully_qualified_name, 252 | body=request_body, 253 | updateMask='config.softwareConfig.envVariables')) 254 | operation = utils.execute_request(request) 255 | operation_client = self.client.projects().locations().operations() 256 | utils.wait_for_operation(operation_client, operation) 257 | logging.info( 258 | 'Updated "%s" environment variables in "%s" Composer ' 259 | 'environment.', environment_variables, fully_qualified_name) 260 | except errors.HttpError: 261 | logging.exception('Error occurred while setting environment variables.') 262 | raise Error('Error occurred while setting environment variables.') 263 | 264 | def override_airflow_configs( 265 | self, environment_name: str, airflow_config_overrides: Dict[str, 266 | str]) -> None: 267 | """Overrides Airflow configurations on the existing Composer environment. 268 | 269 | Args: 270 | environment_name: Name of the existing Composer environment. The fully 271 | qualified environment name will be constructed as follows - 272 | 'projects/{project_id}/locations/{location}/environments/ 273 | {environment_name}'. 274 | airflow_config_overrides: Airflow configurations to be overridden in the 275 | Composer environment. 276 | 277 | Raises: 278 | Error: If the request was not processed successfully. 279 | """ 280 | fully_qualified_name = self._get_fully_qualified_env_name(environment_name) 281 | logging.info( 282 | 'Overriding "%s" Airflow configurations in "%s" Composer ' 283 | 'environment.', airflow_config_overrides, fully_qualified_name) 284 | try: 285 | request_body = { 286 | 'name': fully_qualified_name, 287 | 'config': { 288 | 'softwareConfig': { 289 | 'airflowConfigOverrides': airflow_config_overrides 290 | } 291 | } 292 | } 293 | request = ( 294 | self.client.projects().locations().environments().patch( 295 | name=fully_qualified_name, 296 | body=request_body, 297 | updateMask='config.softwareConfig.airflowConfigOverrides')) 298 | operation = utils.execute_request(request) 299 | operation_client = self.client.projects().locations().operations() 300 | utils.wait_for_operation(operation_client, operation) 301 | logging.info( 302 | 'Airflow configurations "%s" has been overridden in "%s" Composer ' 303 | 'environment.', airflow_config_overrides, fully_qualified_name) 304 | except errors.HttpError: 305 | logging.exception( 306 | 'Error occurred while overriding Airflow configurations.') 307 | raise Error('Error occurred while overriding Airflow configurations.') 308 | 309 | def get_environment(self, environment_name: str) -> Dict[str, Any]: 310 | """Retrieves details of a Composer environment. 311 | 312 | Args: 313 | environment_name: Name of the existing Composer environment. The fully 314 | qualified environment name will be constructed as follows - 315 | 'projects/{project_id}/locations/{location}/environments/ 316 | {environment_name}'. 317 | 318 | Returns: 319 | environment: Details of Composer environment. 320 | 321 | Raises: 322 | Error: If the request was not processed successfully. 323 | """ 324 | fully_qualified_name = self._get_fully_qualified_env_name(environment_name) 325 | logging.info('Retrieving Composer environment details for "%s"', 326 | fully_qualified_name) 327 | try: 328 | request = self.client.projects().locations().environments().get( 329 | name=fully_qualified_name) 330 | composer_environment_details = utils.execute_request(request) 331 | return composer_environment_details 332 | except errors.HttpError: 333 | logging.exception('Error while retrieving Composer environment details.') 334 | raise Error('Error while retrieving Composer environment details.') 335 | 336 | def get_dags_folder(self, environment_name: str) -> str: 337 | """Returns Cloud Storage URL for DAGs folder for a Composer environment. 338 | 339 | Args: 340 | environment_name: Name of the existing Composer environment. The fully 341 | qualified environment name will be constructed as follows - 342 | 'projects/{project_id}/locations/{location}/environments/ 343 | {environment_name}'. 344 | 345 | Returns: 346 | The Cloud Storage DAGs URL. 347 | """ 348 | environment_details = self.get_environment(environment_name) 349 | environment_config = environment_details[self._CONFIG_KEY] 350 | return environment_config[self._DAG_FOLDER_KEY] 351 | -------------------------------------------------------------------------------- /cloud_data_transfer.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google LLC.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # python3 17 | """Module for managing BigQuery data transfers.""" 18 | 19 | import datetime 20 | import logging 21 | import time 22 | from typing import Any, Dict 23 | 24 | import auth 25 | import config_parser 26 | from google.cloud import bigquery_datatransfer 27 | from google.protobuf import struct_pb2 28 | from google.protobuf import timestamp_pb2 29 | import pytz 30 | 31 | _MERCHANT_CENTER_ID = 'merchant_center' # Data source id for Merchant Center. 32 | _GOOGLE_ADS_ID = 'adwords' # Data source id for Google Ads. 33 | _SLEEP_SECONDS = 60 # Seconds to sleep before checking resource status. 34 | _MAX_POLL_COUNTER = 100 35 | _PENDING_STATE = 2 36 | _RUNNING_STATE = 3 37 | _SUCCESS_STATE = 4 38 | _FAILED_STATE = 5 39 | _CANCELLED_STATE = 6 40 | 41 | 42 | class Error(Exception): 43 | """Base error for this module.""" 44 | 45 | 46 | class DataTransferError(Error): 47 | """An exception to be raised when data transfer was not successful.""" 48 | 49 | 50 | class CloudDataTransferUtils(object): 51 | """This class provides methods to manage BigQuery data transfers. 52 | 53 | Typical usage example: 54 | >>> data_transfer = CloudDataTransferUtils('project_id') 55 | >>> data_transfer.create_merchant_center_transfer(12345, 'dataset_id') 56 | """ 57 | 58 | def __init__(self, project_id: str): 59 | """Initialise new instance of CloudDataTransferUtils. 60 | 61 | Args: 62 | project_id: GCP project id. 63 | """ 64 | self.project_id = project_id 65 | self.client = bigquery_datatransfer.DataTransferServiceClient() 66 | 67 | def wait_for_transfer_completion(self, transfer_config: Dict[str, 68 | Any]) -> None: 69 | """Waits for the completion of data transfer operation. 70 | 71 | This method retrieves data transfer operation and checks for its status. If 72 | the operation is not completed, then the operation is re-checked after 73 | `_SLEEP_SECONDS` seconds. 74 | 75 | Args: 76 | transfer_config: Resource representing data transfer. 77 | 78 | Raises: 79 | DataTransferError: If the data transfer is not successfully completed. 80 | """ 81 | transfer_config_name = transfer_config.name 82 | transfer_config_id = transfer_config_name.split('/')[-1] 83 | poll_counter = 0 # Counter to keep polling count. 84 | while True: 85 | transfer_config_path = ('projects/' + self.project_id + '/locations/' + 86 | config_parser.get_dataset_location() + 87 | '/transferConfigs/' + transfer_config_id) 88 | response = self.client.list_transfer_runs( 89 | {'parent': transfer_config_path}) 90 | latest_transfer = None 91 | for transfer in response: 92 | latest_transfer = transfer 93 | break 94 | if not latest_transfer: 95 | return 96 | if latest_transfer.state == _SUCCESS_STATE: 97 | logging.info('Transfer %s was successful.', transfer_config_name) 98 | return 99 | if (latest_transfer.state == _FAILED_STATE or 100 | latest_transfer.state == _CANCELLED_STATE): 101 | error_message = (f'Transfer {transfer_config_name} was not successful. ' 102 | f'Error - {latest_transfer.error_status}') 103 | logging.error(error_message) 104 | raise DataTransferError(error_message) 105 | logging.info( 106 | 'Transfer %s still in progress. Sleeping for %s seconds before ' 107 | 'checking again.', transfer_config_name, _SLEEP_SECONDS) 108 | time.sleep(_SLEEP_SECONDS) 109 | poll_counter += 1 110 | if poll_counter >= _MAX_POLL_COUNTER: 111 | error_message = (f'Transfer {transfer_config_name} is taking too long' 112 | ' to finish. Hence failing the request.') 113 | logging.error(error_message) 114 | raise DataTransferError(error_message) 115 | 116 | def _get_existing_transfer(self, 117 | data_source_id: str, 118 | destination_dataset_id: str = None, 119 | params: Dict[str, str] = None, 120 | name: str = None) -> bool: 121 | """Gets data transfer if it already exists. 122 | 123 | Args: 124 | data_source_id: Data source id. 125 | destination_dataset_id: BigQuery dataset id. 126 | params: Data transfer specific parameters. 127 | name: The display name of the transfer. 128 | 129 | Returns: 130 | Data Transfer if the transfer already exists. 131 | None otherwise. 132 | """ 133 | dataset_location = config_parser.get_dataset_location() 134 | parent = 'projects/' + self.project_id + '/locations/' + dataset_location 135 | for transfer_config in self.client.list_transfer_configs({'parent': parent 136 | }): 137 | if transfer_config.data_source_id != data_source_id: 138 | continue 139 | if (destination_dataset_id and 140 | transfer_config.destination_dataset_id != destination_dataset_id): 141 | continue 142 | # If the transfer config is in Failed state, we should ignore. 143 | is_valid_state = transfer_config.state in (_PENDING_STATE, _RUNNING_STATE, 144 | _SUCCESS_STATE) 145 | params_match = self._check_params_match(transfer_config, params) 146 | name_matches = name is None or name == transfer_config.display_name 147 | if params_match and is_valid_state and name_matches: 148 | return transfer_config 149 | return None 150 | 151 | def _check_params_match(self, 152 | transfer_config: bigquery_datatransfer.TransferConfig, 153 | params: Dict[str, str]) -> bool: 154 | """Checks if given parameters are present in transfer config. 155 | 156 | Args: 157 | transfer_config: Data transfer configuration. 158 | params: Data transfer specific parameters. 159 | 160 | Returns: 161 | True if given parameters are present in transfer config, False otherwise. 162 | """ 163 | if not params: 164 | return True 165 | for key, value in params.items(): 166 | config_params = transfer_config.params 167 | if key not in config_params or config_params[key] != value: 168 | return False 169 | return True 170 | 171 | def _update_existing_transfer( 172 | self, transfer_config: bigquery_datatransfer.TransferConfig, 173 | params: Dict[str, str]) -> bigquery_datatransfer.TransferConfig: 174 | """Updates existing data transfer. 175 | 176 | If the parameters are already present in the config, then the transfer 177 | config update is skipped. 178 | 179 | Args: 180 | transfer_config: Data transfer configuration to update. 181 | params: Data transfer specific parameters. 182 | 183 | Returns: 184 | Updated data transfer config. 185 | """ 186 | if self._check_params_match(transfer_config, params): 187 | logging.info( 188 | 'The data transfer config "%s" parameters match. Hence ' 189 | 'skipping update.', transfer_config.display_name) 190 | return transfer_config 191 | new_transfer_config = bigquery_datatransfer.TransferConfig() 192 | new_transfer_config.CopyFrom(transfer_config) 193 | # Clear existing parameter values. 194 | new_transfer_config.params.Clear() 195 | for key, value in params.items(): 196 | new_transfer_config.params[key] = value 197 | # Only params field is updated. 198 | update_mask = {'paths': ['params']} 199 | new_transfer_config = self.client.update_transfer_config( 200 | new_transfer_config, update_mask) 201 | logging.info('The data transfer config "%s" parameters updated.', 202 | new_transfer_config.display_name) 203 | return new_transfer_config 204 | 205 | def create_merchant_center_transfer( 206 | self, merchant_id: str, destination_dataset: str, 207 | enable_market_insights: bool) -> bigquery_datatransfer.TransferConfig: 208 | """Creates a new merchant center transfer. 209 | 210 | Merchant center allows retailers to store product info into Google. This 211 | method creates a data transfer config to copy the product data to BigQuery. 212 | 213 | Args: 214 | merchant_id: Google Merchant Center(GMC) account id. 215 | destination_dataset: BigQuery dataset id. 216 | enable_market_insights: Whether to deploy market insights solution. 217 | 218 | Returns: 219 | Transfer config. 220 | """ 221 | logging.info('Creating Merchant Center Transfer.') 222 | parameters = struct_pb2.Struct() 223 | parameters['merchant_id'] = merchant_id 224 | parameters['export_products'] = True 225 | if enable_market_insights: 226 | parameters['export_price_benchmarks'] = True 227 | parameters['export_best_sellers'] = True 228 | data_transfer_config = self._get_existing_transfer(_MERCHANT_CENTER_ID, 229 | destination_dataset, 230 | parameters) 231 | if data_transfer_config: 232 | logging.info( 233 | 'Data transfer for merchant id %s to destination dataset %s ' 234 | 'already exists.', merchant_id, destination_dataset) 235 | return self._update_existing_transfer(data_transfer_config, parameters) 236 | logging.info( 237 | 'Creating data transfer for merchant id %s to destination dataset %s', 238 | merchant_id, destination_dataset) 239 | has_valid_credentials = self._check_valid_credentials(_MERCHANT_CENTER_ID) 240 | authorization_code = None 241 | if not has_valid_credentials: 242 | authorization_code = self._get_authorization_code(_MERCHANT_CENTER_ID) 243 | dataset_location = config_parser.get_dataset_location() 244 | parent = 'projects/' + self.project_id + '/locations/' + dataset_location 245 | input_config = { 246 | 'display_name': f'Merchant Center Transfer - {merchant_id}', 247 | 'data_source_id': _MERCHANT_CENTER_ID, 248 | 'destination_dataset_id': destination_dataset, 249 | 'params': parameters, 250 | 'data_refresh_window_days': 0, 251 | } 252 | request = bigquery_datatransfer.CreateTransferConfigRequest( 253 | parent=parent, 254 | transfer_config=input_config, 255 | authorization_code=authorization_code, 256 | ) 257 | transfer_config = self.client.create_transfer_config(request) 258 | logging.info( 259 | 'Data transfer created for merchant id %s to destination dataset %s', 260 | merchant_id, destination_dataset) 261 | return transfer_config 262 | 263 | def create_google_ads_transfer( 264 | self, 265 | customer_id: str, 266 | destination_dataset: str, 267 | backfill_days: int = 30) -> bigquery_datatransfer.TransferConfig: 268 | """Creates a new Google Ads transfer. 269 | 270 | This method creates a data transfer config to copy Google Ads data to 271 | BigQuery dataset. 272 | 273 | Args: 274 | customer_id: Google Ads customer id. 275 | destination_dataset: BigQuery dataset id. 276 | backfill_days: Number of days to backfill. 277 | 278 | Returns: 279 | Transfer config. 280 | """ 281 | logging.info('Creating Google Ads Transfer.') 282 | 283 | parameters = struct_pb2.Struct() 284 | parameters['customer_id'] = customer_id 285 | data_transfer_config = self._get_existing_transfer(_GOOGLE_ADS_ID, 286 | destination_dataset, 287 | parameters) 288 | if data_transfer_config: 289 | logging.info( 290 | 'Data transfer for Google Ads customer id %s to destination dataset ' 291 | '%s already exists.', customer_id, destination_dataset) 292 | return data_transfer_config 293 | logging.info( 294 | 'Creating data transfer for Google Ads customer id %s to destination ' 295 | 'dataset %s', customer_id, destination_dataset) 296 | has_valid_credentials = self._check_valid_credentials(_GOOGLE_ADS_ID) 297 | authorization_code = None 298 | if not has_valid_credentials: 299 | authorization_code = self._get_authorization_code(_GOOGLE_ADS_ID) 300 | dataset_location = config_parser.get_dataset_location() 301 | parent = 'projects/' + self.project_id + '/locations/' + dataset_location 302 | input_config = { 303 | 'display_name': f'Google Ads Transfer - {customer_id}', 304 | 'data_source_id': _GOOGLE_ADS_ID, 305 | 'destination_dataset_id': destination_dataset, 306 | 'params': parameters, 307 | 'data_refresh_window_days': 1, 308 | } 309 | request = bigquery_datatransfer.CreateTransferConfigRequest( 310 | parent=parent, 311 | transfer_config=input_config, 312 | authorization_code=authorization_code, 313 | ) 314 | transfer_config = self.client.create_transfer_config(request=request) 315 | logging.info( 316 | 'Data transfer created for Google Ads customer id %s to destination ' 317 | 'dataset %s', customer_id, destination_dataset) 318 | if backfill_days: 319 | transfer_config_name = transfer_config.name 320 | transfer_config_id = transfer_config_name.split('/')[-1] 321 | start_time = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta( 322 | days=backfill_days) 323 | end_time = datetime.datetime.now(tz=pytz.utc) 324 | start_time = start_time.replace(hour=0, minute=0, second=0, microsecond=0) 325 | end_time = end_time.replace(hour=0, minute=0, second=0, microsecond=0) 326 | transfer_config_path = parent + '/transferConfigs/' + transfer_config_id 327 | start_time_pb = timestamp_pb2.Timestamp() 328 | end_time_pb = timestamp_pb2.Timestamp() 329 | start_time_pb.FromDatetime(start_time) 330 | end_time_pb.FromDatetime(end_time) 331 | self.client.schedule_transfer_runs( 332 | parent=transfer_config_path, 333 | start_time=start_time_pb, 334 | end_time=end_time_pb) 335 | return transfer_config 336 | 337 | def schedule_query(self, name: str, 338 | query_string: str) -> bigquery_datatransfer.TransferConfig: 339 | """Schedules query to run every day. 340 | 341 | Args: 342 | name: Name of the scheduled query. 343 | query_string: The query to be run. 344 | 345 | Returns: 346 | Transfer config. 347 | """ 348 | data_transfer_config = self._get_existing_transfer( 349 | 'scheduled_query', name=name) 350 | parameters = struct_pb2.Struct() 351 | parameters['query'] = query_string 352 | if data_transfer_config: 353 | logging.info('Data transfer for scheduling query "%s" already exists.', 354 | name) 355 | updated_transfer_config = self._update_existing_transfer( 356 | data_transfer_config, parameters) 357 | logging.info('Data transfer for scheduling query "%s" updated.', name) 358 | start_time_pb = timestamp_pb2.Timestamp() 359 | start_time = datetime.datetime.now(tz=pytz.utc) 360 | start_time_pb.FromDatetime(start_time) 361 | self.client.start_manual_transfer_runs( 362 | parent=updated_transfer_config.name, requested_run_time=start_time_pb) 363 | logging.info('One time manual run started. It might take up to 1 hour for' 364 | ' performance data to reflect on the dash.') 365 | return updated_transfer_config 366 | dataset_location = config_parser.get_dataset_location() 367 | has_valid_credentials = self._check_valid_credentials('scheduled_query') 368 | authorization_code = '' 369 | if not has_valid_credentials: 370 | authorization_code = self._get_authorization_code('scheduled_query') 371 | parent = 'projects/' + self.project_id + '/locations/' + dataset_location 372 | input_config = bigquery_datatransfer.TransferConfig( 373 | display_name=name, 374 | data_source_id='scheduled_query', 375 | params={'query': query_string}, 376 | schedule='every 24 hours', 377 | ) 378 | request = bigquery_datatransfer.CreateTransferConfigRequest( 379 | parent=parent, 380 | transfer_config=input_config, 381 | authorization_code=authorization_code, 382 | ) 383 | transfer_config = self.client.create_transfer_config(request=request) 384 | return transfer_config 385 | 386 | def _get_data_source(self, 387 | data_source_id: str) -> bigquery_datatransfer.DataSource: 388 | """Returns data source. 389 | 390 | Args: 391 | data_source_id: Data source id. 392 | """ 393 | dataset_location = config_parser.get_dataset_location() 394 | name = ('projects/' + self.project_id + '/locations/' + dataset_location + 395 | '/dataSources/' + data_source_id) 396 | return self.client.get_data_source({'name': name}) 397 | 398 | def _check_valid_credentials(self, data_source_id: str) -> bool: 399 | """Returns true if valid credentials exist for the given data source. 400 | 401 | Args: 402 | data_source_id: Data source id. 403 | """ 404 | dataset_location = config_parser.get_dataset_location() 405 | name = ('projects/' + self.project_id + '/locations/' + dataset_location + 406 | '/dataSources/' + data_source_id) 407 | response = self.client.check_valid_creds({'name': name}) 408 | return response.has_valid_creds 409 | 410 | def _get_authorization_code(self, data_source_id: str) -> str: 411 | """Returns authorization code for a given data source. 412 | 413 | Args: 414 | data_source_id: Data source id. 415 | """ 416 | data_source = self._get_data_source(data_source_id) 417 | client_id = data_source.client_id 418 | scopes = data_source.scopes 419 | 420 | if not data_source: 421 | raise AssertionError('Invalid data source') 422 | return auth.retrieve_authorization_code(client_id, scopes, data_source_id) 423 | --------------------------------------------------------------------------------