├── README.md ├── tests └── unit │ ├── data │ └── gae │ │ ├── test_search.sql │ │ ├── test_requirements.txt │ │ └── test_config.json │ └── gae │ ├── test_factory.py │ ├── base.py │ ├── connector │ ├── test_gcp.py │ └── test_bigquery.py │ ├── test_main.py │ ├── test_scheduler.py │ ├── test_utils.py │ └── test_worker.py ├── gae ├── standard_requirements.txt ├── worker.yaml ├── config_template ├── __init__.py ├── queue.yaml ├── cron.yaml ├── connector │ ├── __init__.py │ ├── gcp.py │ └── bigquery.py ├── main.yaml ├── appengine_config.py ├── main.py ├── factory.py ├── worker.py ├── scheduler.py ├── utils.py └── queries │ └── search_kpis.sql ├── LICENSE ├── .gitignore └── nox.py /README.md: -------------------------------------------------------------------------------- 1 | # Phoenix-Search 2 | Repository to test Search Algorithms for eCommerces 3 | -------------------------------------------------------------------------------- /tests/unit/data/gae/test_search.sql: -------------------------------------------------------------------------------- 1 | SELECT 1 FROM `project123.source_dataset.source_table` WHERE date={date} 2 | -------------------------------------------------------------------------------- /gae/standard_requirements.txt: -------------------------------------------------------------------------------- 1 | flask==0.12 2 | werkzeug==0.12.2 3 | google-auth 4 | google-api-python-client 5 | google-auth-httplib2 6 | WebTest 7 | -------------------------------------------------------------------------------- /tests/unit/data/gae/test_requirements.txt: -------------------------------------------------------------------------------- 1 | flask==0.12 2 | werkzeug==0.12.2 3 | google-auth 4 | google-api-python-client 5 | google-auth-httplib2 6 | WebTest 7 | pyyaml 8 | -------------------------------------------------------------------------------- /gae/worker.yaml: -------------------------------------------------------------------------------- 1 | runtime: python27 2 | api_version: 1 3 | threadsafe: true 4 | service: worker 5 | 6 | handlers: 7 | - url: /.* 8 | script: worker.app 9 | login: admin 10 | 11 | basic_scaling: 12 | max_instances: 1 13 | -------------------------------------------------------------------------------- /tests/unit/data/gae/test_config.json: -------------------------------------------------------------------------------- 1 | {"jobs":{ 2 | "update_dashboard_tables": { 3 | "table_id": "table_id", 4 | "dataset_id": "dataset_id", 5 | "project_id": "project123", 6 | "query_path": "tests/unit/data/gae/test_search.sql", 7 | "dest_table_id": "search_{}", 8 | "dest_dataset_id": "dest_dataset", 9 | "dest_project_id": "dest_project", 10 | "hostname": "hostname", 11 | "geonetworklocation": "geo_location", 12 | "total_days": 1 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /gae/config_template: -------------------------------------------------------------------------------- 1 | config = {"jobs":{ 2 | "update_dashboard_tables": { 3 | "table_id": Which table to read data from, 4 | "dataset_id": Dataset Id from where to read from, 5 | "project_id": Project Id where data is located, 6 | "query_path": Path to sql query to run, something like "queries/path/to/query.sql", 7 | "dest_table_id": where to save results, such as "search_{}". The script formats the {} to the correspondent date string, 8 | "dest_dataset_id": where to save results, 9 | "dest_project_id": project where results will be saved, 10 | "hostname": this is a parameter in our query, it specifies what hostname is allowed in our ga data, 11 | "geonetworklocation": we use this so we can filter out everybody who belongs to a certain Network ISP, 12 | "total_days": how many days are allowed to exist in BQ. More than that and we delete. This number is an integer. 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Willian Fuks 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /gae/__init__.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | -------------------------------------------------------------------------------- /gae/queue.yaml: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | queue: 25 | - name: default 26 | rate: 1/m 27 | -------------------------------------------------------------------------------- /gae/cron.yaml: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | cron: 25 | - description: daily create and delete tables for search report 26 | url: /run_job/update_dashboard_tables/ 27 | target: phoenix-search 28 | schedule: every day 07:00 29 | -------------------------------------------------------------------------------- /gae/connector/__init__.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | try: 25 | from bigquery import BigQueryService 26 | except ImportError: 27 | # We do this so we don't have to install dependencies when using Flexible 28 | # environment 29 | pass 30 | -------------------------------------------------------------------------------- /gae/main.yaml: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | service: phoenix-search 25 | runtime: python27 26 | api_version: 1 27 | threadsafe: true 28 | 29 | handlers: 30 | - url: /.*/ 31 | script: main.app 32 | login: admin 33 | 34 | basic_scaling: 35 | max_instances: 1 36 | -------------------------------------------------------------------------------- /gae/appengine_config.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | from google.appengine.ext import vendor 25 | 26 | import os 27 | root_path = os.path.dirname(os.path.abspath(__file__)) 28 | lib_path = os.path.join(root_path, 'lib') 29 | if os.path.isdir(lib_path): 30 | vendor.add(lib_path) 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.swp 6 | 7 | # C extensions 8 | #*.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | .nox 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | .spyproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | 105 | # gcloud stuff 106 | key.json 107 | 108 | # project config 109 | config.py 110 | 111 | # VIM tags 112 | tags 113 | -------------------------------------------------------------------------------- /tests/unit/gae/test_factory.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | import sys 25 | import os 26 | import mock 27 | import unittest 28 | 29 | 30 | class TestJobsFactory(unittest.TestCase): 31 | @staticmethod 32 | def _get_target_klass(): 33 | from factory import JobsFactory 34 | 35 | 36 | return JobsFactory 37 | 38 | 39 | def test_factor_job(self): 40 | klass = self._get_target_klass()() 41 | with self.assertRaises(TypeError): 42 | klass.factor_job('invalid_name') 43 | 44 | scheduler = klass.factor_job('update_dashboard_tables') 45 | self.assertEqual(type(scheduler).__name__, 'SchedulerJob') 46 | self.assertEqual(scheduler.url, '/update_dashboard_tables') 47 | self.assertEqual(scheduler.target, 'worker') 48 | -------------------------------------------------------------------------------- /gae/main.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | """Main module working as entry point for routing different jobs.""" 25 | 26 | 27 | import utils 28 | from flask import Flask, request 29 | from factory import JobsFactory 30 | import time 31 | 32 | 33 | app = Flask(__name__) 34 | jobs_factory = JobsFactory() 35 | 36 | 37 | @app.route("/run_job//") 38 | def run_job(job_name): 39 | """This method works as a central manager to choose which job to run 40 | and respective input parameters. 41 | 42 | :type job_name: str 43 | :param job_name: specifies which job to run. 44 | """ 45 | try: 46 | scheduler = jobs_factory.factor_job(job_name) 47 | scheduler.run(request.args) 48 | return str(scheduler) 49 | except Exception as err: 50 | print str(err) 51 | -------------------------------------------------------------------------------- /tests/unit/gae/base.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | import json 25 | import unittest 26 | import mock 27 | import datetime 28 | import os 29 | import shutil 30 | 31 | 32 | class BaseTests(object): 33 | config1_path = 'gae/config.py' 34 | config2_path = 'gae/config2.py' 35 | test_config = 'tests/unit/data/gae/test_config.json' 36 | _recover_flg = False 37 | _utils = None 38 | def prepare_environ(self): 39 | if os.path.isfile(self.config1_path): 40 | shutil.copyfile(self.config1_path, self.config2_path) 41 | self._recover_flg = True 42 | os.remove(self.config1_path) 43 | shutil.copyfile(self.test_config, self.config1_path) 44 | 45 | def clean_environ(self): 46 | if self._recover_flg: 47 | shutil.copyfile(self.config2_path, self.config1_path) 48 | os.remove(self.config2_path) 49 | else: 50 | os.remove(self.config1_path) 51 | 52 | @property 53 | def utils(self): 54 | if not self._utils: 55 | import gae.utils as utils 56 | self._utils = utils 57 | return self._utils 58 | -------------------------------------------------------------------------------- /gae/factory.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | """Factorizes scheduler to run in background.""" 25 | 26 | 27 | from scheduler import SchedulerJob 28 | 29 | 30 | class JobsFactory(object): 31 | """Builds the specified job for GAE.""" 32 | def factor_job(self, job_name): 33 | """Selects one of the available jobs. 34 | 35 | :type job_name: str 36 | :param job_name: name of job to build. 37 | 38 | :rtype: `SchedulerJob` 39 | :returns: scheduler that receives a `URL` and a `target` parameter 40 | to run tasks in background. 41 | """ 42 | job_setup = self.jobs_setup.get(job_name) 43 | if not job_setup: 44 | raise TypeError("Please choose a valid job name.") 45 | return SchedulerJob(url=job_setup['url'], target=job_setup['target']) 46 | 47 | @property 48 | def jobs_setup(self): 49 | """Jobs and their setup to run in GAE. 50 | 51 | :rtype: dict 52 | :returns: dict where keys are jobs names and values are their 53 | description. 54 | """ 55 | return {'update_dashboard_tables': {'url': '/update_dashboard_tables', 56 | 'target': 'worker'}} 57 | -------------------------------------------------------------------------------- /nox.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | import os 25 | 26 | import nox 27 | 28 | 29 | def session_unit_gae(session): 30 | """This session tests only the tests associated to google AppEngine 31 | folder. To run it, type `nox --session gae` 32 | """ 33 | session.interpreter = 'python2.7' 34 | session.virtualenv_dirname = 'unit-gae' 35 | 36 | session.install('-r', 'tests/unit/data/gae/test_requirements.txt') 37 | session.install('pytest', 'pytest-cov', 'mock') 38 | 39 | if not os.path.isdir('/google-cloud-sdk/platform/google_appengine/'): 40 | raise RuntimeError("Please install gcloud components for app engine" 41 | " in order to simulate an AppEngine environment " 42 | " for testing") 43 | 44 | # we set ``gae/exporter`` in PYTHONPATH as well since this becomes 45 | # the root directory when App Engine starts the wsgi server 46 | session.env = {'PYTHONPATH': (':/google-cloud-sdk/platform/' 47 | 'google_appengine/:./:./gae/:/google-cloud-sdk/platform/' 48 | 'google_appenigne/lib/yaml/lib')} 49 | 50 | session.run( 51 | 'py.test', 52 | 'tests/unit/gae/', 53 | '--cov=.', 54 | '--cov-report=html') 55 | 56 | -------------------------------------------------------------------------------- /gae/worker.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | """Worker module used to run background operations""" 25 | 26 | 27 | import utils 28 | from flask import Flask, request 29 | from config import config 30 | from connector.gcp import GCPService 31 | from datetime import datetime, timedelta 32 | 33 | 34 | app = Flask(__name__) 35 | gcp_service = GCPService() 36 | 37 | @app.route("/update_dashboard_tables", methods=['POST']) 38 | def update_search_tables(): 39 | """Creates a new table and deletes previous table if condition mets.""" 40 | setup = config['jobs']['update_dashboard_tables'] 41 | date = (utils.yesterday_date().strftime("%Y%m%d") if 42 | 'date' not in request.form else 43 | utils.process_url_date(request.form.get('date'))) 44 | 45 | query_job_body = utils.search_query_job_body(**dict(setup.items() + 46 | [('date', date)])) 47 | 48 | job = gcp_service.bigquery.execute_job(setup['project_id'], 49 | query_job_body) 50 | gcp_service.bigquery.poll_job(job) 51 | 52 | gcp_service.bigquery.delete_table(project_id=setup['dest_project_id'], 53 | dataset_id=setup['dest_dataset_id'], 54 | table_id=setup['dest_table_id'].format((datetime.now() - 55 | timedelta(days=1 + setup['total_days'])).strftime("%Y%m%d"))) 56 | 57 | return "finished" 58 | -------------------------------------------------------------------------------- /gae/scheduler.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | """Scheduler to run tasks in background in GAE.""" 25 | 26 | 27 | import datetime 28 | 29 | from google.appengine.api import taskqueue 30 | import utils 31 | 32 | 33 | class SchedulerJob(object): 34 | """Job queue tasks. 35 | 36 | :type url: str 37 | :param url: url to trigger for task call. 38 | 39 | :type target: str 40 | param target: name of service to trigger when running background tasks. 41 | """ 42 | 43 | def __init__(self, url, target): 44 | self.task = None 45 | self.url = url 46 | self.target = target 47 | 48 | def run(self, args): 49 | """Executes the specified job in `self.url` and `self.target`. 50 | 51 | :type args: dict 52 | :param args: dictionary with arguments to setup the job, such as which 53 | `date` to process data and so on. 54 | 55 | :raises ValueError: on `self.url` and `self.target` being False. 56 | """ 57 | if not (self.url and self.target): 58 | raise ValueError("Please specify `URL` and `target`") 59 | task = taskqueue.add(url=self.url, target=self.target, 60 | params=args) 61 | self.task = task 62 | 63 | def __str__(self): 64 | if not self.task: 65 | return 'No task has been enqueued so far' 66 | return "Task {} enqued, ETA {}".format(self.task.name, self.task.eta) 67 | -------------------------------------------------------------------------------- /tests/unit/gae/connector/test_gcp.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | import unittest 25 | import mock 26 | 27 | import googleapiclient 28 | import google.auth.credentials 29 | 30 | 31 | class TestGCPFactory(unittest.TestCase): 32 | @staticmethod 33 | def _make_credentials(): 34 | return mock.Mock(spec=google.auth.credentials.Credentials) 35 | 36 | @staticmethod 37 | def _get_target_klass(): 38 | from gae.connector.gcp import GCPService 39 | 40 | 41 | return GCPService 42 | 43 | @mock.patch('gae.connector.gcp.app_engine') 44 | def test_cto_no_credentials(self, app_mock): 45 | app_mock.Credentials.return_value = self._make_credentials() 46 | klass = self._get_target_klass()() 47 | self.assertTrue(isinstance(klass._credentials, 48 | google.auth.credentials.Credentials)) 49 | 50 | def test_cto_with_credentials(self): 51 | klass = self._get_target_klass()(self._make_credentials()) 52 | self.assertTrue(isinstance(klass._credentials, 53 | google.auth.credentials.Credentials)) 54 | 55 | def test_cto_with_invalid_credentials(self): 56 | with self.assertRaises(TypeError): 57 | klass = self._get_target_klass()('invalid credential') 58 | 59 | @mock.patch('gae.connector.gcp.BigQueryService') 60 | def test_bigquery_property(self, bq_mock): 61 | bq_mock.return_value = 'con' 62 | klass = self._get_target_klass()(self._make_credentials()) 63 | self.assertTrue(klass._bigquery is None) 64 | con = klass.bigquery 65 | self.assertEqual(con, 'con') 66 | self.assertTrue(klass.bigquery is not None) 67 | -------------------------------------------------------------------------------- /tests/unit/gae/test_main.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | import sys 25 | import os 26 | import mock 27 | import unittest 28 | import json 29 | 30 | import webtest 31 | from google.appengine.ext import testbed 32 | from werkzeug.datastructures import ImmutableMultiDict 33 | from base import BaseTests 34 | 35 | 36 | class TestMainService(unittest.TestCase, BaseTests): 37 | test_app = None 38 | def setUp(self): 39 | self.prepare_environ() 40 | from gae.main import app 41 | self.test_app = webtest.TestApp(app) 42 | self.testbed = testbed.Testbed() 43 | self.testbed.activate() 44 | 45 | def tearDown(self): 46 | self.clean_environ() 47 | self.testbed.deactivate() 48 | 49 | @mock.patch('gae.main.jobs_factory') 50 | def test_run_job(self, factory_mock): 51 | import types 52 | scheduler_mock = mock.Mock() 53 | def __istr__(self, scheduler_obj): 54 | return 'OK!' 55 | scheduler_mock.__str__ = types.MethodType(__istr__, scheduler_mock) 56 | factory_mock.factor_job.return_value = scheduler_mock 57 | response = self.test_app.get('/run_job/job_name_test/') 58 | factory_mock.factor_job.assert_called_once_with(*['job_name_test']) 59 | scheduler_mock.run.assert_called_once() 60 | self.assertEqual(response.status_int, 200) 61 | 62 | url = '/run_job/job_name_test/?date=xxxx' 63 | response = self.test_app.get(url) 64 | expected = ImmutableMultiDict([('date', 'xxxx')]) 65 | scheduler_mock.run.assert_called_with(expected) 66 | self.assertEqual(response.status_int, 200) 67 | 68 | self.assertEqual(response.text, "OK!") 69 | -------------------------------------------------------------------------------- /gae/connector/gcp.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | """Main class with connectors to different services available in GCP.""" 25 | 26 | 27 | import time 28 | 29 | import google.auth.credentials 30 | import googleapiclient.discovery as disco 31 | from google.auth import app_engine 32 | from . import BigQueryService 33 | 34 | 35 | class GCPService(BigQueryService): 36 | _credentials = None 37 | _bigquery = None 38 | def __init__(self, credentials=None): 39 | """Builds a connector to interact with Google Cloud tools. 40 | :type credentials: `google.auth.credentials.Credentials` or 41 | str 42 | :param credentials: certificates to connect to GCP, can be either 43 | a Credentials class or a path to the json key 44 | file. 45 | :raises: TypeError if credentials is not of type 46 | google.auth.credentials 47 | """ 48 | if (credentials is not None and not isinstance(credentials, 49 | google.auth.credentials.Credentials)): 50 | raise TypeError("credentials must be of type " 51 | "google.auth.credentials") 52 | # if no ``credentials`` is sent then assume we are running this 53 | # code in AppEngine environment 54 | self._credentials = (app_engine.Credentials() if not credentials else 55 | credentials) 56 | #from google.oauth2 import service_account 57 | #self._credentials = (service_account.Credentials.\ 58 | # from_service_account_file('./key.json')) 59 | 60 | @property 61 | def bigquery(self): 62 | if not self._bigquery: 63 | self._bigquery = BigQueryService(self._credentials) 64 | return self._bigquery 65 | -------------------------------------------------------------------------------- /tests/unit/gae/test_scheduler.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | import sys 25 | import os 26 | import mock 27 | import unittest 28 | from collections import namedtuple 29 | 30 | from google.appengine.ext import testbed 31 | 32 | 33 | class TestSchedulerJob(unittest.TestCase): 34 | def setUp(self): 35 | self.testbed = testbed.Testbed() 36 | self.testbed.activate() 37 | self.testbed.init_taskqueue_stub('./gae/') 38 | self.taskqueue_stub = self.testbed.get_stub( 39 | testbed.TASKQUEUE_SERVICE_NAME) 40 | 41 | 42 | def tearDown(self): 43 | self.testbed.deactivate() 44 | 45 | 46 | @staticmethod 47 | def _get_target_klass(): 48 | from scheduler import SchedulerJob 49 | 50 | 51 | return SchedulerJob 52 | 53 | 54 | def test_cto(self): 55 | klass = self._get_target_klass()('/url', 'target') 56 | self.assertEqual(klass.url, '/url') 57 | self.assertEqual(klass.target, 'target') 58 | 59 | 60 | def test_run(self): 61 | klass = self._get_target_klass()(None, None) 62 | 63 | with self.assertRaises(ValueError): 64 | klass.run({}) 65 | 66 | klass = self._get_target_klass()('/url', 'target') 67 | args = {} 68 | klass.run(args) 69 | task = self.taskqueue_stub.get_filtered_tasks()[0] 70 | self.assertEqual(task.url, '/url') 71 | self.assertTrue(task.target is not None) 72 | 73 | args = {'date': 'xxxx'} 74 | klass.run(args) 75 | task = self.taskqueue_stub.get_filtered_tasks()[-1] 76 | self.assertEqual(task.url, '/url') 77 | self.assertTrue(task.target is not None) 78 | self.assertEqual(task.payload, 'date=xxxx') 79 | 80 | 81 | def test___str__(self): 82 | klass = self._get_target_klass()('/url', 'target') 83 | self.assertEqual(str(klass), "No task has been enqueued so far") 84 | args = namedtuple("task", ["name", "eta"]) 85 | task_mock = args("1", "2") 86 | klass.task = task_mock 87 | self.assertEqual(str(klass), "Task 1 enqued, ETA 2") 88 | -------------------------------------------------------------------------------- /gae/connector/bigquery.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | """BigQuery Service used in googleapiclient to interact with the backend 25 | system""" 26 | 27 | 28 | import time 29 | 30 | import googleapiclient.discovery as disco 31 | from googleapiclient.errors import HttpError 32 | 33 | 34 | class BigQueryService(object): 35 | """Class to interact with BigQuery's backend using googleapiclient api. 36 | :type credentials: `google.auth.credentials.Credentials` 37 | :param credentials: certificates to connect to GCP. 38 | """ 39 | def __init__(self, credentials): 40 | self.con = disco.build('bigquery', 'v2', credentials=credentials) 41 | 42 | def execute_job(self, project_id, body): 43 | """Executes a job to run in GCP. 44 | 45 | :type project_id: str 46 | :param projectId: name of project Id to run the job. 47 | 48 | :type body: dict 49 | :param body: dict that specifies the job configuration 50 | """ 51 | return self.con.jobs().insert(projectId=project_id, 52 | body=body).execute(num_retries=3) 53 | 54 | def poll_job(self, job): 55 | """Waits for a job to complete. 56 | :type job: `googleapi.discovery.Resource` 57 | :param job: any job that has been initiated by the connector. 58 | """ 59 | request = self.con.jobs().get( 60 | projectId=job['jobReference']['projectId'], 61 | jobId=job['jobReference']['jobId']) 62 | while True: 63 | result = request.execute(num_retries=3) 64 | if result['status']['state'] == 'DONE': 65 | if 'errorResult' in result['status']: 66 | raise RuntimeError(result['status']['errorResult']) 67 | return 68 | time.sleep(1) 69 | 70 | def delete_table(self, project_id, dataset_id, table_id): 71 | """Deletes table in BQ. 72 | 73 | :type project_id: str 74 | :param project_id: project where table is located. 75 | 76 | :type dataset_id: str 77 | :param dataset_id: dataset where table is located. 78 | 79 | :type table_id: str 80 | :param table_id: table name to delete. 81 | """ 82 | try: 83 | self.con.tables().get(projectId=project_id, datasetId=dataset_id, 84 | tableId=table_id).execute(num_retries=3) 85 | except HttpError: 86 | return 87 | self.con.tables().delete(projectId=project_id, datasetId=dataset_id, 88 | tableId=table_id).execute(num_retries=3) 89 | -------------------------------------------------------------------------------- /tests/unit/gae/test_utils.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | import json 25 | import unittest 26 | import mock 27 | import datetime 28 | import os 29 | import shutil 30 | from collections import Counter 31 | 32 | from base import BaseTests 33 | 34 | 35 | class TestUtils(unittest.TestCase, BaseTests): 36 | def setUp(self): 37 | self.prepare_environ() 38 | 39 | def tearDown(self): 40 | self.clean_environ() 41 | 42 | @staticmethod 43 | def load_mock_config(): 44 | data = (open('tests/unit/data/gae/test_config.json') 45 | .read().replace("config = ", "")) 46 | return json.loads(data) 47 | 48 | def test_yesterday_date(self): 49 | expected = datetime.datetime.now() + datetime.timedelta(days=-1) 50 | result = self.utils.yesterday_date() 51 | self.assertEqual(expected.date(), result.date()) 52 | 53 | @mock.patch('gae.utils.uuid') 54 | def test_search_query_job_body(self, uuid_mock): 55 | query_str = ("SELECT 1 FROM `project123.source_dataset.source_table` " 56 | "WHERE date={date}") 57 | 58 | expected = {'jobReference': { 59 | 'projectId': 'project123', 60 | 'jobId': 'name' 61 | }, 62 | 'configuration': { 63 | 'query': { 64 | 'destinationTable': { 65 | 'datasetId': 'dest_dataset', 66 | 'tableId': 'search_20171010', 67 | 'projectId': 'dest_project' 68 | }, 69 | 'maximumBytesBilled': 100000000000, 70 | 'query': "", 71 | 'useLegacySql': False 72 | } 73 | } 74 | } 75 | uuid_mock.uuid4.return_value = 'name' 76 | result = self.utils.search_query_job_body(**dict( 77 | self.load_mock_config()['jobs'][ 78 | 'update_dashboard_tables'].items() + [('date', '20171010')])) 79 | expected['configuration']['query']['query'] = query_str.format(date= 80 | "20171010") 81 | print 'EXPECTED ', expected 82 | print 'RESULT ', result 83 | self.assertEqual(expected, result) 84 | 85 | def test_format_date(self): 86 | result = self.utils.format_date("20171010") 87 | expected = "2017-10-10" 88 | self.assertEqual(result, expected) 89 | 90 | def test_process_url_date(self): 91 | expected = None 92 | result = self.utils.process_url_date(None) 93 | self.assertEqual(expected, result) 94 | 95 | expected = "20171010" 96 | result = self.utils.process_url_date("20171010") 97 | self.assertEqual(expected, result) 98 | 99 | with self.assertRaises(ValueError): 100 | self.utils.process_url_date("2017-10-10") 101 | -------------------------------------------------------------------------------- /gae/utils.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | """General functions to be used throughout the services modules""" 25 | 26 | 27 | import datetime 28 | import uuid 29 | import time 30 | 31 | 32 | def yesterday_date(): 33 | """Returns datetime for yesterday value 34 | 35 | :rtype: `datetime.datetime` 36 | :returns: yesterday's datetime 37 | """ 38 | return (datetime.datetime.now() + 39 | datetime.timedelta(days=-1)) 40 | 41 | 42 | def search_query_job_body(**kwargs): 43 | """Returns the body to be used in a query job. 44 | 45 | :type kwargs: 46 | :type date: str 47 | :param date: date to format query period. 48 | 49 | :type source.query_path: str 50 | :param query: query string to run against BQ. 51 | 52 | :type source.project_id: str 53 | :param source.project: project where to run query from. 54 | 55 | :type source.dataset_id: str 56 | :param source.dataset_id: dataset where to run query from. 57 | 58 | :type source.table_id: str 59 | :param source.table_id: table where to run query from. 60 | 61 | :type destination.table_id: str 62 | :param destination.table_id: table_id where results should be saved. 63 | 64 | :type destination.dataset_id: str 65 | :param destination.dataset_id: dataset_id where results should be saved. 66 | 67 | :type destination.project_id: str 68 | :param destination.project_id: project_id where results should be saved. 69 | 70 | :rtype: dict 71 | :returns: dict containing body to setup job execution. 72 | """ 73 | query = load_file_content(**kwargs) 74 | return {'jobReference': { 75 | 'projectId': kwargs['project_id'], 76 | 'jobId': str(uuid.uuid4()) 77 | }, 78 | 'configuration': { 79 | 'query': { 80 | 'destinationTable': { 81 | 'datasetId': kwargs['dest_dataset_id'], 82 | 'tableId': kwargs['dest_table_id'].format( 83 | kwargs['date']), 84 | 'projectId': kwargs['dest_project_id'] 85 | }, 86 | 'maximumBytesBilled': 100000000000, #100 GBs max is allowed 87 | 'query': query, 88 | 'useLegacySql': False 89 | } 90 | } 91 | } 92 | 93 | 94 | def load_file_content(**kwargs): 95 | """Reads the string from a source file and formats it using ``kwargs``. 96 | 97 | :type kwargs: dict 98 | :param kwargs: it contains keys and values to render query template. 99 | 100 | :rtype: str 101 | :returns: file string after format processing with ``kwargs`` input. 102 | """ 103 | return open(kwargs.get('query_path')).read().format(**kwargs).strip() 104 | 105 | 106 | def format_date(input_date, format="%Y-%m-%d"): 107 | """Changes input date to a new format. 108 | 109 | :type input_date: str 110 | :param input_date: date string of format "%Y%m%d". 111 | 112 | :type format: str 113 | :param format: new format to port input date. 114 | 115 | :rtype: str 116 | :returns: date string in format `format`. 117 | """ 118 | return datetime.datetime.strptime(input_date, "%Y%m%d").strftime( 119 | format) 120 | 121 | 122 | def process_url_date(date): 123 | """Gets the variable ``date`` from URL. 124 | 125 | :type date: str 126 | :param date: date to process. 127 | 128 | :raises: `ValueError` if ``date`` is not in format "%Y%m%d" and is 129 | not null. 130 | 131 | :rtype: str 132 | :returns: `None` is `date` is empty or a string representation of date 133 | """ 134 | # if ``date`` is defined then it was sent as parameter in the URL request 135 | if date: 136 | try: 137 | datetime.datetime.strptime(date, "%Y%m%d") 138 | except ValueError: 139 | raise 140 | return date 141 | -------------------------------------------------------------------------------- /tests/unit/gae/connector/test_bigquery.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | import unittest 25 | import mock 26 | 27 | import googleapiclient 28 | import google.auth.credentials 29 | from googleapiclient.errors import HttpError 30 | 31 | 32 | class TestBigqueryService(unittest.TestCase): 33 | @staticmethod 34 | def _make_credentials(): 35 | return mock.Mock(spec=google.auth.credentials.Credentials) 36 | 37 | 38 | @staticmethod 39 | def _get_target_klass(): 40 | from gae.connector.bigquery import BigQueryService 41 | 42 | 43 | return BigQueryService 44 | 45 | @mock.patch('gae.connector.bigquery.disco') 46 | def test_cto(self, disco_mock): 47 | mock_cre = self._make_credentials() 48 | disco_mock.build.return_value = 'con' 49 | klass = self._get_target_klass()(mock_cre) 50 | self.assertEqual(klass.con, 'con') 51 | disco_mock.build.assert_called_once_with('bigquery', 'v2', 52 | credentials=mock_cre) 53 | 54 | @mock.patch('gae.connector.bigquery.disco') 55 | def test_execute_job(self, disco_mock): 56 | con_mock = mock.Mock() 57 | disco_mock.build.return_value = con_mock 58 | klass = self._get_target_klass()('cre') 59 | 60 | job_mock = mock.Mock() 61 | resource_mock = mock.Mock() 62 | execute_mock = mock.Mock() 63 | con_mock.jobs = job_mock 64 | job_mock.return_value = resource_mock 65 | resource_mock.insert.return_value = execute_mock 66 | 67 | project_id = 'project123' 68 | body = {'project_id': 'project123'} 69 | 70 | job = klass.execute_job(project_id, body) 71 | resource_mock.insert.assert_called_once_with( 72 | **{'projectId': project_id, 'body': body}) 73 | execute_mock.execute.assert_called_once_with(**{'num_retries': 3}) 74 | 75 | @mock.patch('gae.connector.bigquery.time') 76 | @mock.patch('gae.connector.bigquery.disco') 77 | def test_poll_job(self, disco_mock, time_mock): 78 | con_mock = mock.Mock() 79 | disco_mock.build.return_value = con_mock 80 | klass = self._get_target_klass()('cre') 81 | 82 | job_mock = mock.Mock() 83 | resource_mock = mock.Mock() 84 | request_mock = mock.Mock() 85 | job_mock.jobs.return_value = resource_mock 86 | resource_mock.get.return_value = request_mock 87 | request_mock.execute.return_value = {"status": {"state": "DONE"}} 88 | klass.con = job_mock 89 | 90 | job = {"jobReference": {"projectId": "project123", "jobId": "1"}} 91 | klass.poll_job(job) 92 | request_mock.execute.assert_called_once_with(**{'num_retries': 3}) 93 | 94 | @mock.patch('gae.connector.bigquery.time') 95 | @mock.patch('gae.connector.bigquery.disco') 96 | def test_poll_job_running_time(self, disco_mock, time_mock): 97 | con_mock = mock.Mock() 98 | disco_mock.build.return_value = con_mock 99 | klass = self._get_target_klass()('cre') 100 | 101 | job_mock = mock.Mock() 102 | resource_mock = mock.Mock() 103 | request_mock = mock.Mock() 104 | job_mock.jobs.return_value = resource_mock 105 | resource_mock.get.return_value = request_mock 106 | klass.con = job_mock 107 | 108 | request_mock.execute.side_effect = [{"status": {"state": "RUNNING"}}, 109 | {"status": {"state": "DONE"}}] 110 | job = {"jobReference": {"projectId": "project123", "jobId": "1"}} 111 | klass.poll_job(job) 112 | request_mock.execute.assert_called_with(**{'num_retries': 3}) 113 | time_mock.sleep.assert_called_once_with(1) 114 | 115 | @mock.patch('gae.connector.bigquery.time') 116 | @mock.patch('gae.connector.bigquery.disco') 117 | def test_poll_job(self, disco_mock, time_mock): 118 | con_mock = mock.Mock() 119 | disco_mock.build.return_value = con_mock 120 | klass = self._get_target_klass()('cre') 121 | 122 | job_mock = mock.Mock() 123 | resource_mock = mock.Mock() 124 | request_mock = mock.Mock() 125 | job_mock.jobs.return_value = resource_mock 126 | resource_mock.get.return_value = request_mock 127 | request_mock.execute.return_value = {"status": {"state": "DONE", 128 | "errorResult": True}} 129 | klass.con = job_mock 130 | 131 | job = {"jobReference": {"projectId": "project123", "jobId": "1"}} 132 | with self.assertRaises(RuntimeError): 133 | klass.poll_job(job) 134 | 135 | @mock.patch('gae.connector.bigquery.disco') 136 | def test_delete_table(self, disco_mock): 137 | con_mock = mock.Mock() 138 | disco_mock.build.return_value = con_mock 139 | klass = self._get_target_klass()('cre') 140 | 141 | table_mock = mock.Mock() 142 | get_mock = mock.Mock() 143 | delete = mock.Mock() 144 | execute_mock = mock.Mock(side_effect=HttpError) 145 | con_mock.tables.return_value = table_mock 146 | table_mock.get.return_value = get_mock 147 | get_mock.execute.side_effect = [HttpError(mock.Mock(), 'not found'), 148 | None] 149 | 150 | project_id = 'project123' 151 | dataset_id = 'dataset_id' 152 | table_id = 'table_id' 153 | 154 | result = klass.delete_table(project_id, dataset_id, table_id) 155 | get_mock.execute.assert_called_once_with(num_retries=3) 156 | table_mock.delete.assert_not_called() 157 | table_mock.get.assert_called_once_with(projectId=project_id, 158 | datasetId=dataset_id, tableId= table_id) 159 | 160 | 161 | delete_mock = mock.Mock() 162 | table_mock.delete.return_value = delete_mock 163 | 164 | result = klass.delete_table(project_id, dataset_id, table_id) 165 | table_mock.delete.assert_called_once_with(projectId=project_id, 166 | datasetId=dataset_id, tableId=table_id) 167 | delete_mock.execute.assert_called_once_with(num_retries=3) 168 | -------------------------------------------------------------------------------- /tests/unit/gae/test_worker.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright (c) 2017 Willian Fuks 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | 24 | import json 25 | import datetime 26 | import unittest 27 | import mock 28 | import shutil 29 | import os 30 | 31 | import webtest 32 | 33 | 34 | class TestWorkerBase(object): 35 | _source_config = 'tests/unit/data/gae/test_config.json' 36 | _dest_config = 'gae/config.py' 37 | _remove_config_flag = False 38 | @classmethod 39 | def load_worker_setup(cls): 40 | try: 41 | import gae.worker as worker 42 | except ImportError: 43 | shutil.copyfile(cls._source_config, cls._dest_config) 44 | cls._remove_config_flag = True 45 | 46 | import gae.worker as worker 47 | from gae import utils 48 | cls.utils = utils 49 | cls.worker = worker 50 | 51 | @classmethod 52 | def clean_config(cls): 53 | if cls._remove_config_flag: 54 | os.remove(cls._dest_config) 55 | 56 | 57 | class TestWorkerService(unittest.TestCase, TestWorkerBase): 58 | @classmethod 59 | def setup_class(cls): 60 | cls.load_worker_setup() 61 | cls._test_app = webtest.TestApp(cls.worker.app) 62 | 63 | @classmethod 64 | def teardown_class(cls): 65 | cls.clean_config() 66 | 67 | @classmethod 68 | def load_mock_config(cls): 69 | return json.loads(open(cls._source_config).read().replace( 70 | "config = ", "")) 71 | 72 | @mock.patch('gae.utils.uuid') 73 | @mock.patch('gae.worker.gcp_service') 74 | def test_update_search_tables(self, service_mock, uuid_mock): 75 | uuid_mock.uuid4.return_value = 'name' 76 | # this means that the config file is a pre-defined one 77 | # so we need to replace it in this test 78 | if not self._remove_config_flag: 79 | self.worker.config = self.load_mock_config() 80 | 81 | query_job_body = self.utils.search_query_job_body( 82 | **dict(self.worker.config['jobs'][ 83 | 'update_dashboard_tables'].items() + 84 | [('date', '20171010')])) 85 | 86 | service_mock.bigquery.execute_job.return_value = 'job' 87 | response = self._test_app.post("/update_dashboard_tables", {'date': 88 | "20171010"}) 89 | 90 | service_mock.bigquery.execute_job.assert_any_call(*['project123', 91 | query_job_body]) 92 | service_mock.bigquery.poll_job.assert_called_once_with('job') 93 | self.assertEqual(response.status_int, 200) 94 | 95 | dt = (datetime.datetime.now() - datetime.timedelta(days=2)).strftime( 96 | "%Y%m%d") 97 | 98 | service_mock.bigquery.delete_table.assert_any_call( 99 | project_id='dest_project', dataset_id='dest_dataset', 100 | table_id='search_{}'.format(dt)) 101 | 102 | @mock.patch('gae.utils.uuid') 103 | @mock.patch('gae.worker.gcp_service') 104 | def test_update_search_tables_no_date(self, service_mock, uuid_mock): 105 | uuid_mock.uuid4.return_value = 'name' 106 | # this means that the config file is a pre-defined one 107 | # so we need to replace it in this test 108 | if not self._remove_config_flag: 109 | self.worker.config = self.load_mock_config() 110 | 111 | dt = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime( 112 | "%Y%m%d") 113 | 114 | query_job_body = self.utils.search_query_job_body( 115 | **dict(self.worker.config['jobs'][ 116 | 'update_dashboard_tables'].items() + 117 | [('date', dt)])) 118 | 119 | service_mock.bigquery.execute_job.return_value = 'job' 120 | response = self._test_app.post("/update_dashboard_tables") 121 | 122 | service_mock.bigquery.execute_job.assert_any_call(*['project123', 123 | query_job_body]) 124 | service_mock.bigquery.poll_job.assert_called_once_with('job') 125 | self.assertEqual(response.status_int, 200) 126 | 127 | dt = (datetime.datetime.now() - datetime.timedelta(days=2)).strftime( 128 | "%Y%m%d") 129 | 130 | service_mock.bigquery.delete_table.assert_any_call( 131 | project_id='dest_project', dataset_id='dest_dataset', 132 | table_id='search_{}'.format(dt)) 133 | -------------------------------------------------------------------------------- /gae/queries/search_kpis.sql: -------------------------------------------------------------------------------- 1 | #standardSQL 2 | CREATE TEMP FUNCTION removeAccents(phrase STRING) RETURNS STRING AS (( 3 | SELECT 4 | REGEXP_REPLACE( 5 | REGEXP_REPLACE( 6 | REGEXP_REPLACE( 7 | REGEXP_REPLACE( 8 | REGEXP_REPLACE( 9 | REGEXP_REPLACE( 10 | REGEXP_REPLACE( 11 | REGEXP_REPLACE(phrase, 12 | r'[àáâäåã]', 'a'), 13 | r'[èéêëẽ]', 'e'), 14 | r'[ìíîïĩ]', 'i'), 15 | r'[òóôöøõ]', 'o'), 16 | r'[ùúûüũ]', 'u'), 17 | r'ç', 'c'), 18 | r'ÿ', 'y'), 19 | r'ñ', 'n') 20 | )); 21 | 22 | CREATE TEMP FUNCTION slugify(phrase STRING) RETURNS STRING AS (( 23 | SELECT 24 | REGEXP_REPLACE( 25 | REGEXP_REPLACE( 26 | REGEXP_REPLACE( 27 | REGEXP_REPLACE( 28 | REGEXP_REPLACE( 29 | REGEXP_REPLACE(removeAccents(LOWER(phrase)), 30 | r'\s+', '-'), # replaces space with '-' 31 | r'&', '-e-'), # replaces & with '-e-' 32 | r'[^\w-]+', ''), # replaces non-word chars 33 | r'--+', '-'), # replaces multiple '-' with single one 34 | r'^-+', ''), # trim '-' from start of text 35 | r'-+$', '') # trim '-' from end of text 36 | )); 37 | 38 | CREATE TEMP FUNCTION isSearch(search_phrase STRING, URL STRING) RETURNS BOOL AS (( 39 | SELECT LOGICAL_AND(STRPOS(URL, x) > 0 OR STRPOS(LOWER(URL), removeAccents(LOWER(x))) > 0) FROM UNNEST(SPLIT(search_phrase, ' ')) x 40 | )); 41 | 42 | CREATE TEMP FUNCTION extractConfigSku(sku STRING) RETURNS STRING AS ( 43 | CASE WHEN (CHAR_LENGTH(sku) - CHAR_LENGTH(REGEXP_REPLACE(sku, r'-', '')) = 3) OR (CHAR_LENGTH(sku) - CHAR_LENGTH(REGEXP_REPLACE(sku, r'-', '')) = 1) THEN REGEXP_EXTRACT(sku, r'(.*)-[0-9A-Z]+') 44 | ELSE sku END 45 | ); 46 | 47 | CREATE TEMP FUNCTION processPurchases(skus_clicked ARRAY, purchased_skus ARRAY >) RETURNS FLOAT64 AS (( 48 | SELECT SUM(revenue) FROM UNNEST(purchased_skus) WHERE EXISTS(SELECT 1 FROM UNNEST(skus_clicked) sku_clicked WHERE sku_clicked = sku) 49 | )); 50 | 51 | CREATE TEMP FUNCTION buildFinalResult(hits ARRAY>, rvn FLOAT64) RETURNS STRUCT>, search_flg INT64, net_search_flg INT64, search_rvn FLOAT64, net_search_rvn FLOAT64, u_conversion INT64, u_search_conversion INT64, net_clicks INT64> AS (( 52 | # this solution is kinda ugly but we do so Datastudio can process final results reliably. 53 | STRUCT(ARRAY(SELECT AS STRUCT search, SUM(freq) AS freq, SUM(click) AS clicks, SUM(net_revenue) AS net_revenue, SUM(bounce) AS bounce FROM UNNEST(hits) GROUP BY search) AS data, CASE WHEN EXISTS(SELECT 1 FROM UNNEST(hits) WHERE freq > 0) THEN 1 END AS search_flg, CASE WHEN EXISTS(SELECT 1 FROM UNNEST(hits) WHERE net_revenue > 0) THEN 1 END AS net_search_flg, CASE WHEN EXISTS(SELECT 1 FROM UNNEST(hits) WHERE freq > 0) THEN rvn END AS search_rvn, (SELECT SUM(net_revenue) FROM UNNEST(hits)) AS net_search_rvn, IF(rvn > 0, 1, NULL) AS u_conversion, (CASE WHEN EXISTS(SELECT 1 FROM UNNEST(hits) WHERE freq > 0) AND rvn > 0 THEN 1 END) AS u_search_conversion, (SELECT SUM(click) FROM UNNEST(hits) WHERE freq > 0 and net_revenue > 0) AS net_clicks) 54 | )); 55 | 56 | 57 | WITH `data` AS( 58 | SELECT "1" AS fullvisitorid, 1 AS visitid, "20171220" AS date, STRUCT (100000000.0) AS totals, ARRAY, ecommerceAction STRUCT, eventInfo STRUCT, product ARRAY >>> 59 | [STRUCT(1 AS hitNumber, STRUCT("/" AS pagePath) AS page, 60 | STRUCT("0" AS action_type) AS ecommerceAction, 61 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 62 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 63 | 64 | STRUCT(2 AS hitNumber, STRUCT("/?q=fake+search" AS pagePath) AS page, 65 | STRUCT("0" AS action_type) AS ecommerceAction, 66 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 67 | [STRUCT("sku0" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice), STRUCT("sku1" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 68 | 69 | STRUCT(3 AS hitNumber, STRUCT("/?q=fake+search" AS pagePath) AS page, 70 | STRUCT("0" AS action_type) AS ecommerceAction, 71 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 72 | [STRUCT("sku0" AS productSku, True AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 73 | 74 | STRUCT(4 AS hitNumber, STRUCT("/checkout" AS pagePath) AS page, 75 | STRUCT("6" AS action_type) AS ecommerceAction, 76 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 77 | [STRUCT("sku0-000" AS productSku, False AS isClick, 1 AS productQuantity, 100000000.0 AS productPrice)] AS product)] hits 78 | 79 | UNION ALL 80 | 81 | SELECT "2" AS fullvisitorid, 1 as visitid, "20171220" AS date, STRUCT (NULL) AS totals, ARRAY, ecommerceAction STRUCT, eventInfo STRUCT, product ARRAY >>> 82 | [STRUCT(1 AS hitNumber, STRUCT("/" AS pagePath) AS page, 83 | STRUCT("0" AS action_type) AS ecommerceAction, 84 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 85 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 86 | 87 | STRUCT(2 AS hitNumber, STRUCT("/" AS pagePath) AS page, 88 | STRUCT("0" AS action_type) AS ecommerceAction, 89 | STRUCT("search" AS eventCategory, "submit" AS eventAction, "search string" AS eventLabel) AS eventInfo, 90 | NULL AS product), 91 | 92 | STRUCT(3 AS hitNumber, STRUCT("/?q=search+string" AS pagePath) AS page, 93 | STRUCT("0" AS action_type) AS ecommerceAction, 94 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 95 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product)] hits 96 | 97 | UNION ALL 98 | 99 | SELECT "2" AS fullvisitorid, 2 as visitid, "20171220" AS date, STRUCT (NULL) AS totals, ARRAY, ecommerceAction STRUCT, eventInfo STRUCT, product ARRAY >>> 100 | [STRUCT(1 AS hitNumber, STRUCT("/" AS pagePath) AS page, 101 | STRUCT("0" AS action_type) AS ecommerceAction, 102 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 103 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 104 | 105 | STRUCT(2 AS hitNumber, STRUCT("/" AS pagePath) AS page, 106 | STRUCT("0" AS action_type) AS ecommerceAction, 107 | STRUCT("search" AS eventCategory, "submit" AS eventAction, "search string" AS eventLabel) AS eventInfo, 108 | NULL AS product), 109 | 110 | STRUCT(3 AS hitNumber, STRUCT("/?q=search+string" AS pagePath) AS page, 111 | STRUCT("0" AS action_type) AS ecommerceAction, 112 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 113 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 114 | 115 | 116 | STRUCT(4 AS hitNumber, STRUCT("/?q=search+string" AS pagePath) AS page, 117 | STRUCT("0" AS action_type) AS ecommerceAction, 118 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 119 | [STRUCT("sku0" AS productSku, True AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 120 | 121 | STRUCT(5 AS hitNumber, STRUCT("/" AS pagePath) AS page, 122 | STRUCT("0" AS action_type) AS ecommerceAction, 123 | STRUCT("search" AS eventCategory, "submit" AS eventAction, "search another string" AS eventLabel) AS eventInfo, 124 | NULL AS product), 125 | 126 | STRUCT(6 AS hitNumber, STRUCT("/?q=search+another+string" AS pagePath) AS page, 127 | STRUCT("0" AS action_type) AS ecommerceAction, 128 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 129 | [STRUCT("sku0" AS productSku, True AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product)] hits 130 | 131 | UNION ALL 132 | 133 | SELECT "3" AS fullvisitorid, 1 as visitid, "20171220" AS date, STRUCT (200000000.0) AS totals, ARRAY, ecommerceAction STRUCT, eventInfo STRUCT, product ARRAY >>> 134 | [STRUCT(1 AS hitNumber, STRUCT("/" AS pagePath) AS page, 135 | STRUCT("0" AS action_type) AS ecommerceAction, 136 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 137 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 138 | 139 | STRUCT(2 AS hitNumber, STRUCT("/" AS pagePath) AS page, 140 | STRUCT("0" AS action_type) AS ecommerceAction, 141 | STRUCT("search" AS eventCategory, "submit" AS eventAction, "search string" AS eventLabel) AS eventInfo, 142 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 143 | 144 | STRUCT(3 AS hitNumber, STRUCT("/?q=search+string" AS pagePath) AS page, 145 | STRUCT("0" AS action_type) AS ecommerceAction, 146 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 147 | [STRUCT("sku0" AS productSku, True AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 148 | 149 | STRUCT(4 AS hitNumber, STRUCT("/checkout" AS pagePath) AS page, 150 | STRUCT("6" AS action_type) AS ecommerceAction, 151 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 152 | [STRUCT("sku0-000" AS productSku, False AS isClick, 2 AS productQuantity, 100000000.0 AS productPrice)] AS product) 153 | ] hits 154 | 155 | UNION ALL 156 | 157 | SELECT "4" AS fullvisitorid, 1 as visitid, "20171220" AS date, STRUCT (100000000.0) AS totals, ARRAY, ecommerceAction STRUCT, eventInfo STRUCT, product ARRAY >>> 158 | [STRUCT(1 AS hitNumber, STRUCT("/" AS pagePath) AS page, 159 | STRUCT("0" AS action_type) AS ecommerceAction, 160 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 161 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 162 | 163 | STRUCT(2 AS hitNumber, STRUCT("/" AS pagePath) AS page, 164 | STRUCT("0" AS action_type) AS ecommerceAction, 165 | STRUCT("search" AS eventCategory, "submit" AS eventAction, "Sêãrchí CrÃzĩ Éstrìng" AS eventLabel) AS eventInfo, 166 | NULL AS product), 167 | 168 | STRUCT(3 AS hitNumber, STRUCT("/?q=Sêãrchí CrÃzĩ Éstrìng" AS pagePath) AS page, 169 | STRUCT("0" AS action_type) AS ecommerceAction, 170 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 171 | [STRUCT("sku0" AS productSku, True AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 172 | 173 | STRUCT(4 AS hitNumber, STRUCT("/?q=Searchi%20Crazi%20Estring&sort=discount" AS pagePath) AS page, 174 | STRUCT("0" AS action_type) AS ecommerceAction, 175 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 176 | [STRUCT("sku0" AS productSku, True AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 177 | 178 | STRUCT(5 AS hitNumber, STRUCT("/?q=Searchi%20Crazi%20Estring&sort=discount" AS pagePath) AS page, 179 | STRUCT("0" AS action_type) AS ecommerceAction, 180 | STRUCT("search" AS eventCategory, "submit" AS eventAction, "Searchi crÃzĩ Éstrìng" AS eventLabel) AS eventInfo, 181 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 182 | 183 | STRUCT(6 AS hitNumber, STRUCT("/?q=Searchi%20crazi%20Estring&sort=discount" AS pagePath) AS page, 184 | STRUCT("0" AS action_type) AS ecommerceAction, 185 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 186 | [STRUCT("sku0" AS productSku, True AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 187 | 188 | STRUCT(7 AS hitNumber, STRUCT("/checkout" AS pagePath) AS page, 189 | STRUCT("6" AS action_type) AS ecommerceAction, 190 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 191 | [STRUCT("sku0-000" AS productSku, False AS isClick, 1 AS productQuantity, 100000000.0 AS productPrice)] AS product) 192 | ] hits 193 | 194 | UNION ALL 195 | 196 | SELECT "4" AS fullvisitorid, 2 as visitid, "20171220" AS date, STRUCT (100000000.0) AS totals, ARRAY, ecommerceAction STRUCT, eventInfo STRUCT, product ARRAY >>> 197 | [STRUCT(1 AS hitNumber, STRUCT("/" AS pagePath) AS page, 198 | STRUCT("0" AS action_type) AS ecommerceAction, 199 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 200 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 201 | 202 | STRUCT(2 AS hitNumber, STRUCT("/" AS pagePath) AS page, 203 | STRUCT("0" AS action_type) AS ecommerceAction, 204 | STRUCT("search" AS eventCategory, "submit" AS eventAction, "Seãrchí Crazĩ estrìng" AS eventLabel) AS eventInfo, 205 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 206 | 207 | STRUCT(3 AS hitNumber, STRUCT("/?q=Seãrchí Crazĩ estrìng" AS pagePath) AS page, 208 | STRUCT("0" AS action_type) AS ecommerceAction, 209 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 210 | NULL AS product), 211 | 212 | STRUCT(4 AS hitNumber, STRUCT("/?q=Searchi%20Crazi%20estring&sort=discount" AS pagePath) AS page, 213 | STRUCT("0" AS action_type) AS ecommerceAction, 214 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 215 | [STRUCT("sku1" AS productSku, True AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 216 | 217 | STRUCT(5 AS hitNumber, STRUCT("/?q=Searchi%20Crazi%20estring&sort=discount" AS pagePath) AS page, 218 | STRUCT("0" AS action_type) AS ecommerceAction, 219 | STRUCT("search" AS eventCategory, "submit" AS eventAction, "search other string" AS eventLabel) AS eventInfo, 220 | NULL AS product), 221 | 222 | STRUCT(6 AS hitNumber, STRUCT("/?q=search%20other%20string&sort=discount" AS pagePath) AS page, 223 | STRUCT("0" AS action_type) AS ecommerceAction, 224 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 225 | [STRUCT("sku0" AS productSku, True AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 226 | 227 | STRUCT(7 AS hitNumber, STRUCT("/checkout" AS pagePath) AS page, 228 | STRUCT("6" AS action_type) AS ecommerceAction, 229 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 230 | [STRUCT("sku0-000" AS productSku, False AS isClick, 1 AS productQuantity, 100000000.0 AS productPrice), STRUCT("sku1-000" AS productSku, False AS isClick, 1 AS productQuantity, 150000000.0 AS productPrice)] AS product) 231 | ] hits 232 | 233 | UNION ALL 234 | 235 | SELECT "4" AS fullvisitorid, 1 as visitid, "20171221" AS date, STRUCT (100000000.0) AS totals, ARRAY, ecommerceAction STRUCT, eventInfo STRUCT, product ARRAY >>> 236 | [STRUCT(1 AS hitNumber, STRUCT("/" AS pagePath) AS page, 237 | STRUCT("0" AS action_type) AS ecommerceAction, 238 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 239 | [STRUCT("" AS productSku, False AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product), 240 | 241 | STRUCT(2 AS hitNumber, STRUCT("/" AS pagePath) AS page, 242 | STRUCT("0" AS action_type) AS ecommerceAction, 243 | STRUCT("search" AS eventCategory, "submit" AS eventAction, "Seãrchí Crazĩ estrìng" AS eventLabel) AS eventInfo, 244 | NULL AS product), 245 | 246 | STRUCT(3 AS hitNumber, STRUCT("/?q=Seãrchí Crazĩ estrìng" AS pagePath) AS page, 247 | STRUCT("0" AS action_type) AS ecommerceAction, 248 | STRUCT(NULL AS eventCategory, NULL AS eventAction, NULL AS eventLabel) AS eventInfo, 249 | [STRUCT("sku0" AS productSku, True AS isClick, 0 AS productQuantity, 0.0 AS productPrice)] AS product)] hits 250 | ) 251 | 252 | SELECT 253 | date, 254 | SUM(revenue) user_revenue, 255 | buildFinalResult(ARRAY_CONCAT_AGG(hits), SUM(revenue)) results 256 | FROM( 257 | SELECT 258 | fv, 259 | date, 260 | revenue, 261 | ARRAY(SELECT AS STRUCT search, 1 AS freq, MAX(IF(sku_clicked IS NOT NULL, 1, 0)) click, processPurchases(ARRAY_AGG(DISTINCT sku_clicked IGNORE NULLS), products_purchased) net_revenue, MAX(bounce) bounce FROM UNNEST(hits) WHERE search IS NOT NULL GROUP BY search) hits 262 | FROM( 263 | SELECT 264 | fv, 265 | date, 266 | revenue, 267 | ARRAY(SELECT AS STRUCT slugify(FIRST_VALUE(lbl) OVER (PARTITION BY flg ORDER BY hn)) search, IF(isSearch(FIRST_VALUE(lbl) OVER (PARTITION BY flg ORDER BY hn), pp) AND EXISTS(SELECT 1 FROM UNNEST(product) WHERE isClick), ARRAY(SELECT productSku FROM UNNEST(product))[SAFE_OFFSET(0)], NULL) sku_clicked, IF(isSearch(FIRST_VALUE(lbl) OVER (PARTITION BY flg ORDER BY hn), pp) AND hn = MAX(hn) OVER(), 1, NULL) bounce FROM UNNEST(hits)) hits, 268 | ARRAY(SELECT AS STRUCT extractConfigSku(productSku) sku, SUM(productQuantity * productPrice / 1e6) revenue FROM UNNEST(hits), UNNEST(product) WHERE act_type = '6' GROUP BY 1) products_purchased 269 | FROM( 270 | SELECT 271 | fullvisitorid fv, 272 | totals.totalTransactionRevenue / 1e6 revenue, 273 | date, 274 | ARRAY(SELECT AS STRUCT hitNumber hn, page.pagePath pp, eventInfo.eventCategory cat, IF(eventInfo.eventCategory = 'search', eventInfo.eventLabel, NULL) lbl, SUM(IF(eventInfo.eventCategory = 'search', 1, 0)) OVER(ORDER BY hitNumber) flg, ecommerceAction.action_type act_type, ARRAY(SELECT AS STRUCT productSku, isClick, productQuantity, productPrice FROM UNNEST(product)) product FROM UNNEST(hits)) hits 275 | #FROM `data` 276 | FROM `{project_id}.{dataset_id}.{table_id}` 277 | WHERE TRUE 278 | #AND REGEXP_EXTRACT(_TABLE_SUFFIX, r'.*_(\d+)$') BETWEEN FORMAT_DATE("%Y%m%d", DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) ) AND FORMAT_DATE("%Y%m%d", DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)) 279 | AND REGEXP_EXTRACT(_TABLE_SUFFIX, r'.*_(\d+)') = '{date}' 280 | AND EXISTS(SELECT 1 FROM UNNEST(hits) WHERE REGEXP_CONTAINS(page.hostname, r'{hostname}')) 281 | AND NOT REGEXP_CONTAINS(LOWER(geonetwork.networklocation), r'{geonetworklocation}') 282 | ) 283 | ) 284 | ) 285 | GROUP BY fv, date 286 | --------------------------------------------------------------------------------