├── 1_call_paginated_api ├── .gitignore ├── Pipfile ├── Pipfile.lock ├── README.md ├── main.py ├── models.py └── ramapi.py ├── 2_load_to_bq ├── .gitignore ├── Pipfile ├── Pipfile.lock ├── README.md ├── main.py ├── models.py ├── ramapi.py └── transforms.py ├── 3_deploy_cloud_function ├── .gitignore ├── Pipfile ├── Pipfile.lock ├── README.md ├── fn_load_to_bq │ ├── .gcloudignore │ ├── cloud_test.sh │ ├── deploy.sh │ ├── local_test.sh │ ├── main.py │ ├── requirements.txt │ └── transforms.py ├── fn_ram_api_request │ ├── .gcloudignore │ ├── cloud_test.sh │ ├── deploy.sh │ ├── local_test.sh │ ├── main.py │ ├── models.py │ ├── ramapi.py │ └── requirements.txt └── test_deployed_functions.py ├── 4_continuous_deployment ├── .gitignore ├── Pipfile ├── Pipfile.lock ├── README.md ├── fn_load_to_bq │ ├── .gcloudignore │ ├── ci-cd │ │ └── cloudbuild.yaml │ ├── cloud_test.sh │ ├── deploy.sh │ ├── local_test.sh │ ├── main.py │ ├── requirements.txt │ └── transforms.py ├── fn_ram_api_request │ ├── .gcloudignore │ ├── ci-cd │ │ └── cloudbuild.yaml │ ├── cloud_test.sh │ ├── deploy.sh │ ├── local_test.sh │ ├── main.py │ ├── models.py │ ├── ramapi.py │ └── requirements.txt └── test_deployed_functions.py ├── 5_parallel_execution ├── .gitignore ├── fn_create_cloud_tasks │ ├── .gcloudignore │ ├── Pipfile │ ├── Pipfile.lock │ ├── README.md │ ├── ci-cd │ │ └── cloudbuild.yaml │ ├── deploy.sh │ ├── local_test.sh │ ├── main.py │ ├── requirements.txt │ └── tasks.py ├── fn_load_to_bq │ ├── .gcloudignore │ ├── Pipfile │ ├── Pipfile.lock │ ├── README.md │ ├── ci-cd │ │ └── cloudbuild.yaml │ ├── deploy.sh │ ├── local_test.sh │ ├── main.py │ ├── requirements.txt │ └── transforms.py ├── fn_ram_api_request │ ├── .gcloudignore │ ├── Pipfile │ ├── Pipfile.lock │ ├── README.md │ ├── ci-cd │ │ └── cloudbuild.yaml │ ├── deploy.sh │ ├── local_test.sh │ ├── main.py │ ├── models.py │ ├── ramapi.py │ └── requirements.txt └── initial_script.py ├── 6_prefect_orchestration ├── .gitignore └── prefect-flows │ ├── Pipfile │ ├── Pipfile.lock │ ├── trigger_ram_api.py │ └── trigger_ram_api_pipeline-deployment.py ├── 7_prefect_deployment ├── .gitignore └── prefect-flows │ ├── Dockerfile │ ├── Pipfile │ ├── Pipfile.lock │ ├── blocks │ ├── gcp_cloud_run.py │ ├── gcs_credentials.py │ └── gcs_prefect_deployments_bucket.py │ └── trigger-ram-api │ ├── .prefectignore │ ├── trigger_ram_api.py │ └── trigger_ram_api_pipeline-deployment.py └── 8_terraform_management ├── .gitignore ├── README.md └── terraform ├── .terraform.lock.hcl ├── artifact_registry.tf ├── bigquery.tf ├── cloud_build.tf ├── cloud_tasks.tf ├── compute.tf ├── main.tf ├── outputs.tf ├── service_accounts.tf ├── services.tf ├── sh_scripts └── prefect_agent.sh ├── storage.tf ├── terraform.tfstate └── variables.tf /1_call_paginated_api/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /1_call_paginated_api/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | requests = "*" 8 | 9 | [dev-packages] 10 | black = "*" 11 | 12 | [requires] 13 | python_version = "3.8" 14 | -------------------------------------------------------------------------------- /1_call_paginated_api/Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "54c2e2c6c02052766dd6d6342ec3098ed14d22ff470bd9ac35d91930e41c021f" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.8" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "certifi": { 20 | "hashes": [ 21 | "sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14", 22 | "sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382" 23 | ], 24 | "markers": "python_version >= '3.6'", 25 | "version": "==2022.9.24" 26 | }, 27 | "charset-normalizer": { 28 | "hashes": [ 29 | "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845", 30 | "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f" 31 | ], 32 | "markers": "python_version >= '3.6'", 33 | "version": "==2.1.1" 34 | }, 35 | "idna": { 36 | "hashes": [ 37 | "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", 38 | "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" 39 | ], 40 | "markers": "python_version >= '3.5'", 41 | "version": "==3.4" 42 | }, 43 | "requests": { 44 | "hashes": [ 45 | "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983", 46 | "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349" 47 | ], 48 | "index": "pypi", 49 | "version": "==2.28.1" 50 | }, 51 | "urllib3": { 52 | "hashes": [ 53 | "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e", 54 | "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997" 55 | ], 56 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'", 57 | "version": "==1.26.12" 58 | } 59 | }, 60 | "develop": { 61 | "black": { 62 | "hashes": [ 63 | "sha256:14ff67aec0a47c424bc99b71005202045dc09270da44a27848d534600ac64fc7", 64 | "sha256:197df8509263b0b8614e1df1756b1dd41be6738eed2ba9e9769f3880c2b9d7b6", 65 | "sha256:1e464456d24e23d11fced2bc8c47ef66d471f845c7b7a42f3bd77bf3d1789650", 66 | "sha256:2039230db3c6c639bd84efe3292ec7b06e9214a2992cd9beb293d639c6402edb", 67 | "sha256:21199526696b8f09c3997e2b4db8d0b108d801a348414264d2eb8eb2532e540d", 68 | "sha256:2644b5d63633702bc2c5f3754b1b475378fbbfb481f62319388235d0cd104c2d", 69 | "sha256:432247333090c8c5366e69627ccb363bc58514ae3e63f7fc75c54b1ea80fa7de", 70 | "sha256:444ebfb4e441254e87bad00c661fe32df9969b2bf224373a448d8aca2132b395", 71 | "sha256:5b9b29da4f564ba8787c119f37d174f2b69cdfdf9015b7d8c5c16121ddc054ae", 72 | "sha256:5cc42ca67989e9c3cf859e84c2bf014f6633db63d1cbdf8fdb666dcd9e77e3fa", 73 | "sha256:5d8f74030e67087b219b032aa33a919fae8806d49c867846bfacde57f43972ef", 74 | "sha256:72ef3925f30e12a184889aac03d77d031056860ccae8a1e519f6cbb742736383", 75 | "sha256:819dc789f4498ecc91438a7de64427c73b45035e2e3680c92e18795a839ebb66", 76 | "sha256:915ace4ff03fdfff953962fa672d44be269deb2eaf88499a0f8805221bc68c87", 77 | "sha256:9311e99228ae10023300ecac05be5a296f60d2fd10fff31cf5c1fa4ca4b1988d", 78 | "sha256:974308c58d057a651d182208a484ce80a26dac0caef2895836a92dd6ebd725e0", 79 | "sha256:b8b49776299fece66bffaafe357d929ca9451450f5466e997a7285ab0fe28e3b", 80 | "sha256:c957b2b4ea88587b46cf49d1dc17681c1e672864fd7af32fc1e9664d572b3458", 81 | "sha256:e41a86c6c650bcecc6633ee3180d80a025db041a8e2398dcc059b3afa8382cd4", 82 | "sha256:f513588da599943e0cde4e32cc9879e825d58720d6557062d1098c5ad80080e1", 83 | "sha256:fba8a281e570adafb79f7755ac8721b6cf1bbf691186a287e990c7929c7692ff" 84 | ], 85 | "index": "pypi", 86 | "version": "==22.10.0" 87 | }, 88 | "click": { 89 | "hashes": [ 90 | "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", 91 | "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" 92 | ], 93 | "markers": "python_version >= '3.7'", 94 | "version": "==8.1.3" 95 | }, 96 | "mypy-extensions": { 97 | "hashes": [ 98 | "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", 99 | "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" 100 | ], 101 | "version": "==0.4.3" 102 | }, 103 | "pathspec": { 104 | "hashes": [ 105 | "sha256:46846318467efc4556ccfd27816e004270a9eeeeb4d062ce5e6fc7a87c573f93", 106 | "sha256:7ace6161b621d31e7902eb6b5ae148d12cfd23f4a249b9ffb6b9fee12084323d" 107 | ], 108 | "markers": "python_version >= '3.7'", 109 | "version": "==0.10.1" 110 | }, 111 | "platformdirs": { 112 | "hashes": [ 113 | "sha256:0cb405749187a194f444c25c82ef7225232f11564721eabffc6ec70df83b11cb", 114 | "sha256:6e52c21afff35cb659c6e52d8b4d61b9bd544557180440538f255d9382c8cbe0" 115 | ], 116 | "markers": "python_version >= '3.7'", 117 | "version": "==2.5.3" 118 | }, 119 | "tomli": { 120 | "hashes": [ 121 | "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", 122 | "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" 123 | ], 124 | "markers": "python_full_version < '3.11.0a7'", 125 | "version": "==2.0.1" 126 | }, 127 | "typing-extensions": { 128 | "hashes": [ 129 | "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa", 130 | "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e" 131 | ], 132 | "markers": "python_version < '3.10'", 133 | "version": "==4.4.0" 134 | } 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /1_call_paginated_api/README.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | This tutorial will show you how to paginate through API results with Python, using the Rick and Morty open API as an example. 4 | 5 | Most API's that return data have a concept of pagination, if there's too many records to display (e.g. number of pages on a website or individual records in a report), the API will most likely default to returning a fixed number of records and break down how many pages of records there are. Sometimes, you are also given an option to specify how many records to return with each call, but in this case, we're restricted to 20. 6 | 7 | In the example of the Rick and Morty `Character` endpoint, there are 826 total records and a total of 42 pages (42 x 20 = 840 meaning the last page will only show 6 records). 8 | 9 | API's, and pagination in particulate, can be a confusing concept if you're new to working with them, or have never considered a "clean" way of dealing with them, so there's a few things I want to demonstrate in this example that will hopefully save you time, and make your lives easier in a real-world scenario. 10 | 11 | Note - obviously Rick and Morty data isn't a real world use case, but the API demonstrates some very common features (excluding authentication) of an API which can be altered to fit most other situations. 12 | 13 | In this API, we're actually given an additional piece of information with each API call, a record called `next` which gives us the exact URL we need to call to get the next bit of information. There's a reason I've chosen not to use this approach, which will make more sense if you continue following this series as I expand on the use case and bring it to life with more real-world approaches. 14 | 15 | ## Concepts to Cover 16 | 17 | 1. Dataclasses 18 | 1. Creating a dataclass to handle input parameters 19 | 2. Creating a dataclass to handle the API results 20 | 2. Paginating through all of the results for the API entity 21 | 22 | ## Logic of the Script 23 | - Get the total number of pages 24 | - Loop through a range of 1 to number of pages calling the endpoint with a page parameter. -------------------------------------------------------------------------------- /1_call_paginated_api/main.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from ramapi import get_endpoint 4 | from models import ApiParameters, CharacterSchema 5 | 6 | 7 | ENDPOINT = "character" 8 | 9 | 10 | def get_all_paginated_results( 11 | endpoint: str, pages: int, params: ApiParameters 12 | ) -> List[CharacterSchema]: 13 | results = [] 14 | for page in range(1, pages + 1): 15 | params.page = page 16 | print(f"Calling page {page}") 17 | response = get_endpoint(endpoint, params) 18 | results.extend(response.results) 19 | return results 20 | 21 | 22 | if __name__ == "__main__": 23 | params = ApiParameters() 24 | response = get_endpoint(ENDPOINT, params) 25 | results = get_all_paginated_results(ENDPOINT, response.info.pages, params) 26 | print(f"Total records: {len(results)}") 27 | -------------------------------------------------------------------------------- /1_call_paginated_api/models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict, List, Literal, Optional 3 | 4 | 5 | @dataclass 6 | class CharacterSchema: 7 | id: int 8 | name: str 9 | status: Literal["Alive", "Dead", "unknown"] 10 | species: str 11 | type: str 12 | gender: Literal["Female", "Male", "Genderless", "unknown"] 13 | origin: Dict[str, str] 14 | location: Dict[str, str] 15 | image: str 16 | episode: List[str] 17 | url: str 18 | created: str 19 | 20 | 21 | @dataclass 22 | class ApiInfo: 23 | count: int 24 | pages: int 25 | next: Optional[str] 26 | prev: Optional[str] 27 | 28 | 29 | @dataclass 30 | class ApiResponse: 31 | info: ApiInfo 32 | results: List[CharacterSchema] 33 | 34 | def __post_init__(self): 35 | self.info = ApiInfo(**self.info) 36 | self.results = [CharacterSchema(**x) for x in self.results] 37 | 38 | 39 | @dataclass 40 | class ApiParameters: 41 | page: Optional[str] = None 42 | name: Optional[str] = None 43 | status: Optional[Literal["alive", "dead", "unknown"]] = None 44 | species: Optional[str] = None 45 | type: Optional[str] = None 46 | gender: Optional[Literal["female", "male", "genderless", "unknown"]] = None 47 | -------------------------------------------------------------------------------- /1_call_paginated_api/ramapi.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from dataclasses import asdict 3 | 4 | from models import ApiResponse, ApiParameters 5 | 6 | BASE_URL = "https://rickandmortyapi.com/api" 7 | 8 | 9 | def get_endpoint(endpoint: str, params: ApiParameters) -> ApiResponse: 10 | """Return `ApiResponse` from Rick and Morty `endpoint`""" 11 | response = requests.get(url=f"{BASE_URL}/{endpoint}", params=asdict(params)) 12 | response.raise_for_status() 13 | response = ApiResponse(**response.json()) 14 | 15 | return response 16 | -------------------------------------------------------------------------------- /2_load_to_bq/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /2_load_to_bq/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | requests = "*" 8 | pandas = "*" 9 | google-cloud-bigquery = "*" 10 | 11 | [dev-packages] 12 | black = "*" 13 | 14 | [requires] 15 | python_version = "3.8" 16 | -------------------------------------------------------------------------------- /2_load_to_bq/README.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | This tutorial will show you how to take the next step and store the Rick and Morty API results into a central repository, in this case, BigQuery. 4 | 5 | Load data into Pandas to create a `row_hash` and an `ingestion_date`. Pandas makes the creation of these fields really easy across the whole dataset. 6 | 7 | These are common best practices so that you have a UUID for a record in a dataset, as well as a date for when the data was loaded. In this case, we already have a unique ID that we're retrieving for a specific entity so it's sort of redundant, but in the real world, when you're extracting reporting data or some data that's combined, you aren't always given a record ID. Creating it yourself at this stage of the process can make downstream activities a lot more simple and you'll thank yourself later on. Just to reiterate, it's for demonstrative purposes here. 8 | 9 | One thing we have in this dataset, as we will in a lot of data, is nested elements. We want to be able to handle this during our import to BigQuery since it can handle nested and repeated objects. There's a few additional steps that we need to go through to get the data into the correct format. These include, denormalizing the columns back into nested structure after we've done the transformations we need, converting the data to JSON (this is an easier way to load the data to BigQuery as we can handle nuances ourselves) and finally creating a schema of nested elements. 10 | 11 | ### Data Types 12 | 13 | Generally, when we're loading data into a source table, we want to leave it unchanged so that we always have a raw version of the data to compare with if we ever need to do validation. In the case of dates (like `created`), if we know the string that's returned actually represents some sort of datetime object, it's better that we store it in the correct date format. In this example, we will parse the datetime as a datetime so that our script to generate the BigQuery schema works without having to check whether strings match a particular date format. 14 | 15 | ## Concepts to Cover 16 | 17 | 1. Setting up BigQuery Google Cloud Platform 18 | 2. DataFrame transformations 19 | 3. BigQuery schema creation 20 | 4. Load to BigQuery 21 | 22 | ## Prerequisites 23 | 24 | - Be authenticated with `gcloud` and set your project to the one you want to load data to 25 | - Create the dataset `rick_and_morty` within BigQuery and set the location to "EU" -------------------------------------------------------------------------------- /2_load_to_bq/main.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from dataclasses import asdict 3 | import io 4 | 5 | import pandas as pd 6 | from google.cloud import bigquery 7 | from google.cloud.bigquery.schema import SchemaField 8 | 9 | from ramapi import get_endpoint 10 | from models import ApiParameters, CharacterSchema 11 | from transforms import transform_dataframe 12 | 13 | 14 | ENDPOINT = "character" 15 | 16 | 17 | def get_all_paginated_results( 18 | endpoint: str, pages: int, params: ApiParameters 19 | ) -> List[CharacterSchema]: 20 | results = [] 21 | print(f"Starting loop of {pages} pages") 22 | for page in range(1, pages + 1): 23 | params.page = page 24 | response = get_endpoint(endpoint, params) 25 | results.extend(response.results) 26 | print("Completed") 27 | return results 28 | 29 | 30 | def _generate_bigquery_schema(df: pd.DataFrame) -> List[SchemaField]: 31 | TYPE_MAPPING = { 32 | "i": "INTEGER", 33 | "u": "NUMERIC", 34 | "b": "BOOLEAN", 35 | "f": "FLOAT", 36 | "O": "STRING", 37 | "S": "STRING", 38 | "U": "STRING", 39 | "M": "TIMESTAMP", 40 | } 41 | schema = [] 42 | for column, dtype in df.dtypes.items(): 43 | val = df[column].iloc[0] 44 | mode = "REPEATED" if isinstance(val, list) else "NULLABLE" 45 | 46 | if isinstance(val, dict) or (mode == "REPEATED" and isinstance(val[0], dict)): 47 | fields = _generate_bigquery_schema(pd.json_normalize(val)) 48 | else: 49 | fields = () 50 | 51 | type = "RECORD" if fields else TYPE_MAPPING.get(dtype.kind) 52 | schema.append( 53 | SchemaField( 54 | name=column, 55 | field_type=type, 56 | mode=mode, 57 | fields=fields, 58 | ) 59 | ) 60 | return schema 61 | 62 | 63 | def prepare_data(data: List[CharacterSchema]) -> Tuple[str, List[SchemaField]]: 64 | df = pd.json_normalize([asdict(x) for x in data]) 65 | df = transform_dataframe(df) 66 | schema = _generate_bigquery_schema(df) 67 | json_records = df.to_json(orient="records", lines=True, date_format="iso") 68 | return json_records, schema 69 | 70 | 71 | def load_data_to_bq( 72 | client: bigquery.Client, 73 | data: str, 74 | table_id: str, 75 | load_config: bigquery.LoadJobConfig, 76 | ) -> int: 77 | load_job = client.load_table_from_file( 78 | io.StringIO(data), table_id, location="EU", job_config=load_config 79 | ) 80 | load_job.result() # waits for the job to complete. 81 | destination_table = client.get_table(table_id) 82 | num_rows = destination_table.num_rows 83 | return num_rows 84 | 85 | 86 | if __name__ == "__main__": 87 | params = ApiParameters() 88 | response = get_endpoint(ENDPOINT, params) 89 | results = get_all_paginated_results(ENDPOINT, response.info.pages, params) 90 | json_records, schema = prepare_data(results) 91 | 92 | bigquery_client = bigquery.Client() 93 | load_config = bigquery.LoadJobConfig( 94 | schema=schema, 95 | write_disposition="WRITE_TRUNCATE", 96 | source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, 97 | ) 98 | table_id = f"rick_and_morty.{ENDPOINT}" 99 | num_rows = load_data_to_bq(bigquery_client, json_records, table_id, load_config) 100 | print(f"Successfully loaded {num_rows} to {table_id}") 101 | -------------------------------------------------------------------------------- /2_load_to_bq/models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict, List, Literal, Optional 3 | from datetime import datetime, timezone 4 | 5 | 6 | @dataclass 7 | class CharacterSchema: 8 | id: int 9 | name: str 10 | status: Literal["Alive", "Dead", "unknown"] 11 | species: str 12 | type: str 13 | gender: Literal["Female", "Male", "Genderless", "unknown"] 14 | origin: Dict[str, str] 15 | location: Dict[str, str] 16 | image: str 17 | episode: List[str] 18 | url: str 19 | created: datetime 20 | 21 | def __post_init__(self): 22 | self.created = datetime.strptime(self.created, "%Y-%m-%dT%H:%M:%S.%fZ").replace( 23 | tzinfo=timezone.utc 24 | ) 25 | 26 | 27 | @dataclass 28 | class ApiInfo: 29 | count: int 30 | pages: int 31 | next: Optional[str] 32 | prev: Optional[str] 33 | 34 | 35 | @dataclass 36 | class ApiResponse: 37 | info: ApiInfo 38 | results: List[CharacterSchema] 39 | 40 | def __post_init__(self): 41 | self.info = ApiInfo(**self.info) 42 | self.results = [CharacterSchema(**x) for x in self.results] 43 | 44 | 45 | @dataclass 46 | class ApiParameters: 47 | page: Optional[str] = None 48 | name: Optional[str] = None 49 | status: Optional[Literal["alive", "dead", "unknown"]] = None 50 | species: Optional[str] = None 51 | type: Optional[str] = None 52 | gender: Optional[Literal["female", "male", "genderless", "unknown"]] = None 53 | -------------------------------------------------------------------------------- /2_load_to_bq/ramapi.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from dataclasses import asdict 3 | 4 | from models import ApiResponse, ApiParameters 5 | 6 | BASE_URL = "https://rickandmortyapi.com/api" 7 | 8 | 9 | def get_endpoint(endpoint: str, params: ApiParameters) -> ApiResponse: 10 | """Return `ApiResponse` from Rick and Morty `endpoint`""" 11 | response = requests.get(url=f"{BASE_URL}/{endpoint}", params=asdict(params)) 12 | response.raise_for_status() 13 | response = ApiResponse(**response.json()) 14 | 15 | return response 16 | -------------------------------------------------------------------------------- /2_load_to_bq/transforms.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from typing import Callable, List 3 | import json 4 | 5 | import pandas as pd 6 | 7 | Preprocessor = Callable[[pd.DataFrame], pd.DataFrame] 8 | 9 | 10 | def create_row_hash(df: pd.DataFrame) -> pd.DataFrame: 11 | """Create unique hash of entire DataFrame row.""" 12 | df.set_index(pd.util.hash_pandas_object(df.astype("str")), drop=False, inplace=True) 13 | df = df.reset_index(names=["row_hash"]) 14 | return df 15 | 16 | 17 | def add_current_datetime(df: pd.DataFrame) -> pd.DataFrame: 18 | df.insert(0, "ingestion_date", pd.to_datetime("now", utc=True)) 19 | return df 20 | 21 | 22 | def _get_nested_fields(df: pd.DataFrame) -> List[str]: 23 | """Return a list of nested fields, sorted by the deepest level of nesting first.""" 24 | nested_fields = [*{field.rsplit(".", 1)[0] for field in df.columns if "." in field}] 25 | nested_fields.sort(key=lambda record: len(record.split(".")), reverse=True) 26 | return nested_fields 27 | 28 | 29 | def df_denormalize(df: pd.DataFrame) -> pd.DataFrame: 30 | """ 31 | Convert a normalised DataFrame into a nested structure. 32 | 33 | Fields separated by '.' are considered part of a nested structure. 34 | """ 35 | nested_fields = _get_nested_fields(df) 36 | for field in nested_fields: 37 | list_of_children = [column for column in df.columns if field in column] 38 | rename = { 39 | field_name: field_name.rsplit(".", 1)[1] for field_name in list_of_children 40 | } 41 | renamed_fields = df[list_of_children].rename(columns=rename) 42 | df[field] = json.loads(renamed_fields.to_json(orient="records")) 43 | df.drop(list_of_children, axis=1, inplace=True) 44 | return df 45 | 46 | 47 | def compose(*functions: Preprocessor) -> Preprocessor: 48 | return reduce(lambda f, g: lambda x: g(f(x)), functions) 49 | 50 | 51 | def transform_dataframe(df: pd.DataFrame) -> pd.DataFrame: 52 | preprocessor = compose( 53 | create_row_hash, 54 | add_current_datetime, 55 | df_denormalize, 56 | ) 57 | 58 | return preprocessor(df) 59 | -------------------------------------------------------------------------------- /3_deploy_cloud_function/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /3_deploy_cloud_function/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | requests = "*" 8 | pandas = "*" 9 | google-cloud-bigquery = "*" 10 | functions-framework = "*" 11 | types-flask = "*" 12 | 13 | [dev-packages] 14 | black = "*" 15 | 16 | [requires] 17 | python_version = "3.8" 18 | -------------------------------------------------------------------------------- /3_deploy_cloud_function/README.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | We need to break the existing code out into two separate functions based on their unique responsibilities. 4 | 5 | 1. Calling the API and collecting data 6 | 2. Loading the data into BigQuery 7 | 8 | ## Testing 9 | 10 | fn_get_api_data: `functions-framework --target send_api_request --debug`
11 | fn_load_to_bq: `functions-framework --target create_load_job --debug` 12 | 13 | ## Deploying 14 | 15 | Prerequisites: 16 | - Need to enable a few API's in Google Cloud:
17 | Artifact Registry API
18 | Cloud Build API
19 | Cloud Run Admin API
20 | Cloud Functions API -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_load_to_bq/.gcloudignore: -------------------------------------------------------------------------------- 1 | deploy_function.sh 2 | local_test.sh 3 | cloud_test.sh -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_load_to_bq/cloud_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST \ 2 | -H "Content-Type: application/json" \ 3 | -d "{ 4 | \"data\" : { 5 | \"dataset\": \"rick_and_morty\", 6 | \"table\": \"character\", 7 | \"results\": [{ 8 | \"created\": \"Mon, 25 Oct 2021 09:18:48 GMT\", 9 | \"episode\": [ 10 | \"https://rickandmortyapi.com/api/episode/49\" 11 | ], 12 | \"gender\": \"Female\", 13 | \"id\": 781, 14 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/781.jpeg\", 15 | \"location\": { 16 | \"name\": \"Earth (Replacement Dimension)\", 17 | \"url\": \"https://rickandmortyapi.com/api/location/20\" 18 | }, 19 | \"name\": \"Rick's Garage\", 20 | \"origin\": { 21 | \"name\": \"Earth (Replacement Dimension)\", 22 | \"url\": \"https://rickandmortyapi.com/api/location/20\" 23 | }, 24 | \"species\": \"Robot\", 25 | \"status\": \"Alive\", 26 | \"type\": \"Artificial Intelligence\", 27 | \"url\": \"https://rickandmortyapi.com/api/character/781\" 28 | }, 29 | { 30 | \"created\": \"Mon, 25 Oct 2021 09:20:57 GMT\", 31 | \"episode\": [ 32 | \"https://rickandmortyapi.com/api/episode/49\" 33 | ], 34 | \"gender\": \"Male\", 35 | \"id\": 782, 36 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/782.jpeg\", 37 | \"location\": { 38 | \"name\": \"Birdperson's Consciousness\", 39 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 40 | }, 41 | \"name\": \"Memory Squanchy\", 42 | \"origin\": { 43 | \"name\": \"Birdperson's Consciousness\", 44 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 45 | }, 46 | \"species\": \"Alien\", 47 | \"status\": \"Dead\", 48 | \"type\": \"Memory\", 49 | \"url\": \"https://rickandmortyapi.com/api/character/782\" 50 | }, 51 | { 52 | \"created\": \"Mon, 25 Oct 2021 09:22:40 GMT\", 53 | \"episode\": [ 54 | \"https://rickandmortyapi.com/api/episode/49\" 55 | ], 56 | \"gender\": \"Male\", 57 | \"id\": 783, 58 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/783.jpeg\", 59 | \"location\": { 60 | \"name\": \"Birdperson's Consciousness\", 61 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 62 | }, 63 | \"name\": \"Memory Rick\", 64 | \"origin\": { 65 | \"name\": \"Birdperson's Consciousness\", 66 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 67 | }, 68 | \"species\": \"Human\", 69 | \"status\": \"Dead\", 70 | \"type\": \"Memory\", 71 | \"url\": \"https://rickandmortyapi.com/api/character/783\" 72 | }, 73 | { 74 | \"created\": \"Mon, 25 Oct 2021 09:23:22 GMT\", 75 | \"episode\": [ 76 | \"https://rickandmortyapi.com/api/episode/49\" 77 | ], 78 | \"gender\": \"Male\", 79 | \"id\": 784, 80 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/784.jpeg\", 81 | \"location\": { 82 | \"name\": \"Birdperson's Consciousness\", 83 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 84 | }, 85 | \"name\": \"Memory Rick\", 86 | \"origin\": { 87 | \"name\": \"Birdperson's Consciousness\", 88 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 89 | }, 90 | \"species\": \"Human\", 91 | \"status\": \"Dead\", 92 | \"type\": \"Memory\", 93 | \"url\": \"https://rickandmortyapi.com/api/character/784\" 94 | }, 95 | { 96 | \"created\": \"Mon, 25 Oct 2021 09:24:51 GMT\", 97 | \"episode\": [ 98 | \"https://rickandmortyapi.com/api/episode/49\" 99 | ], 100 | \"gender\": \"Male\", 101 | \"id\": 785, 102 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/785.jpeg\", 103 | \"location\": { 104 | \"name\": \"Birdperson's Consciousness\", 105 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 106 | }, 107 | \"name\": \"Memory Geardude\", 108 | \"origin\": { 109 | \"name\": \"Birdperson's Consciousness\", 110 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 111 | }, 112 | \"species\": \"Alien\", 113 | \"status\": \"Dead\", 114 | \"type\": \"Memory\", 115 | \"url\": \"https://rickandmortyapi.com/api/character/785\" 116 | } 117 | ] 118 | } 119 | } 120 | " -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_load_to_bq/deploy.sh: -------------------------------------------------------------------------------- 1 | gcloud alpha functions deploy load-bq-data \ 2 | --gen2 \ 3 | --region=europe-west2 \ 4 | --runtime=python38 \ 5 | --source=. \ 6 | --entry-point=create_load_job \ 7 | --trigger-http \ 8 | --max-instances=5 \ 9 | --allow-unauthenticated -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_load_to_bq/local_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST localhost:8080 \ 2 | -H "Content-Type: application/json" \ 3 | -d "{ 4 | \"data\" : { 5 | \"dataset\": \"rick_and_morty\", 6 | \"table\": \"character\", 7 | \"results\": [{ 8 | \"created\": \"Mon, 25 Oct 2021 09:18:48 GMT\", 9 | \"episode\": [ 10 | \"https://rickandmortyapi.com/api/episode/49\" 11 | ], 12 | \"gender\": \"Female\", 13 | \"id\": 781, 14 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/781.jpeg\", 15 | \"location\": { 16 | \"name\": \"Earth (Replacement Dimension)\", 17 | \"url\": \"https://rickandmortyapi.com/api/location/20\" 18 | }, 19 | \"name\": \"Rick's Garage\", 20 | \"origin\": { 21 | \"name\": \"Earth (Replacement Dimension)\", 22 | \"url\": \"https://rickandmortyapi.com/api/location/20\" 23 | }, 24 | \"species\": \"Robot\", 25 | \"status\": \"Alive\", 26 | \"type\": \"Artificial Intelligence\", 27 | \"url\": \"https://rickandmortyapi.com/api/character/781\" 28 | }, 29 | { 30 | \"created\": \"Mon, 25 Oct 2021 09:20:57 GMT\", 31 | \"episode\": [ 32 | \"https://rickandmortyapi.com/api/episode/49\" 33 | ], 34 | \"gender\": \"Male\", 35 | \"id\": 782, 36 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/782.jpeg\", 37 | \"location\": { 38 | \"name\": \"Birdperson's Consciousness\", 39 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 40 | }, 41 | \"name\": \"Memory Squanchy\", 42 | \"origin\": { 43 | \"name\": \"Birdperson's Consciousness\", 44 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 45 | }, 46 | \"species\": \"Alien\", 47 | \"status\": \"Dead\", 48 | \"type\": \"Memory\", 49 | \"url\": \"https://rickandmortyapi.com/api/character/782\" 50 | }, 51 | { 52 | \"created\": \"Mon, 25 Oct 2021 09:22:40 GMT\", 53 | \"episode\": [ 54 | \"https://rickandmortyapi.com/api/episode/49\" 55 | ], 56 | \"gender\": \"Male\", 57 | \"id\": 783, 58 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/783.jpeg\", 59 | \"location\": { 60 | \"name\": \"Birdperson's Consciousness\", 61 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 62 | }, 63 | \"name\": \"Memory Rick\", 64 | \"origin\": { 65 | \"name\": \"Birdperson's Consciousness\", 66 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 67 | }, 68 | \"species\": \"Human\", 69 | \"status\": \"Dead\", 70 | \"type\": \"Memory\", 71 | \"url\": \"https://rickandmortyapi.com/api/character/783\" 72 | }, 73 | { 74 | \"created\": \"Mon, 25 Oct 2021 09:23:22 GMT\", 75 | \"episode\": [ 76 | \"https://rickandmortyapi.com/api/episode/49\" 77 | ], 78 | \"gender\": \"Male\", 79 | \"id\": 784, 80 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/784.jpeg\", 81 | \"location\": { 82 | \"name\": \"Birdperson's Consciousness\", 83 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 84 | }, 85 | \"name\": \"Memory Rick\", 86 | \"origin\": { 87 | \"name\": \"Birdperson's Consciousness\", 88 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 89 | }, 90 | \"species\": \"Human\", 91 | \"status\": \"Dead\", 92 | \"type\": \"Memory\", 93 | \"url\": \"https://rickandmortyapi.com/api/character/784\" 94 | }, 95 | { 96 | \"created\": \"Mon, 25 Oct 2021 09:24:51 GMT\", 97 | \"episode\": [ 98 | \"https://rickandmortyapi.com/api/episode/49\" 99 | ], 100 | \"gender\": \"Male\", 101 | \"id\": 785, 102 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/785.jpeg\", 103 | \"location\": { 104 | \"name\": \"Birdperson's Consciousness\", 105 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 106 | }, 107 | \"name\": \"Memory Geardude\", 108 | \"origin\": { 109 | \"name\": \"Birdperson's Consciousness\", 110 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 111 | }, 112 | \"species\": \"Alien\", 113 | \"status\": \"Dead\", 114 | \"type\": \"Memory\", 115 | \"url\": \"https://rickandmortyapi.com/api/character/785\" 116 | } 117 | ] 118 | } 119 | } 120 | " -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_load_to_bq/main.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | import io 3 | 4 | import pandas as pd 5 | from google.cloud import bigquery 6 | from google.cloud.bigquery.schema import SchemaField 7 | import functions_framework 8 | import flask 9 | 10 | from transforms import transform_dataframe 11 | 12 | 13 | def _generate_bigquery_schema(df: pd.DataFrame) -> List[SchemaField]: 14 | TYPE_MAPPING = { 15 | "i": "INTEGER", 16 | "u": "NUMERIC", 17 | "b": "BOOLEAN", 18 | "f": "FLOAT", 19 | "O": "STRING", 20 | "S": "STRING", 21 | "U": "STRING", 22 | "M": "TIMESTAMP", 23 | } 24 | schema = [] 25 | for column, dtype in df.dtypes.items(): 26 | val = df[column].iloc[0] 27 | mode = "REPEATED" if isinstance(val, list) else "NULLABLE" 28 | 29 | if isinstance(val, dict) or (mode == "REPEATED" and isinstance(val[0], dict)): 30 | fields = _generate_bigquery_schema(pd.json_normalize(val)) 31 | else: 32 | fields = () 33 | 34 | type = "RECORD" if fields else TYPE_MAPPING.get(dtype.kind) 35 | schema.append( 36 | SchemaField( 37 | name=column, 38 | field_type=type, 39 | mode=mode, 40 | fields=fields, 41 | ) 42 | ) 43 | return schema 44 | 45 | 46 | def prepare_data(data: List[dict]) -> Tuple[str, List[SchemaField]]: 47 | df = pd.json_normalize(data) 48 | df = transform_dataframe(df) 49 | schema = _generate_bigquery_schema(df) 50 | json_records = df.to_json(orient="records", lines=True, date_format="iso") 51 | return json_records, schema 52 | 53 | 54 | def load_data_to_bq( 55 | client: bigquery.Client, 56 | data: str, 57 | table_id: str, 58 | load_config: bigquery.LoadJobConfig, 59 | ) -> int: 60 | load_job = client.load_table_from_file( 61 | io.StringIO(data), table_id, location="EU", job_config=load_config 62 | ) 63 | load_job.result() # waits for the job to complete. 64 | destination_table = client.get_table(table_id) 65 | num_rows = destination_table.num_rows 66 | return num_rows 67 | 68 | 69 | @functions_framework.http 70 | def create_load_job(request: flask.Request) -> str: 71 | """ 72 | The request is a flask.Request object that contains a `data` record in the following format: 73 | { 74 | ..., 75 | data: { 76 | dataset: str, 77 | table: str, 78 | results: List[dict] 79 | } 80 | """ 81 | request_parameters = request.get_json().get("data") 82 | results = request_parameters["results"] 83 | dataset = request_parameters["dataset"] 84 | table = request_parameters["table"] 85 | json_records, schema = prepare_data(results) 86 | 87 | bigquery_client = bigquery.Client() 88 | load_config = bigquery.LoadJobConfig( 89 | schema=schema, 90 | write_disposition="WRITE_TRUNCATE", 91 | source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, 92 | ) 93 | table_id = f"{dataset}.{table}" 94 | num_rows = load_data_to_bq(bigquery_client, json_records, table_id, load_config) 95 | print(f"Successfully loaded {num_rows} to {table_id}") 96 | return "DONE" 97 | -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_load_to_bq/requirements.txt: -------------------------------------------------------------------------------- 1 | cachetools==5.2.0 2 | certifi==2022.12.7 3 | charset-normalizer==2.1.1 4 | click==8.1.3 5 | cloudevents==1.7.1 6 | deprecation==2.1.0 7 | Flask==2.2.2 8 | functions-framework==3.2.1 9 | google-api-core==2.11.0 10 | google-auth==2.15.0 11 | google-cloud-bigquery==3.4.0 12 | google-cloud-bigquery-storage==2.16.2 13 | google-cloud-core==2.3.2 14 | google-crc32c==1.5.0 15 | google-resumable-media==2.4.0 16 | googleapis-common-protos==1.57.0 17 | grpcio==1.51.1 18 | grpcio-status==1.51.1 19 | gunicorn==20.1.0 20 | idna==3.4 21 | importlib-metadata==5.1.0 22 | itsdangerous==2.1.2 23 | Jinja2==3.1.2 24 | MarkupSafe==2.1.1 25 | numpy==1.23.5 26 | packaging==21.3 27 | pandas==1.5.2 28 | proto-plus==1.22.1 29 | protobuf==4.21.11 30 | pyarrow==10.0.1 31 | pyasn1==0.4.8 32 | pyasn1-modules==0.2.8 33 | pyparsing==3.0.9 34 | python-dateutil==2.8.2 35 | pytz==2022.6 36 | requests==2.28.1 37 | rsa==4.9 38 | six==1.16.0 39 | types-click==7.1.8 40 | types-Flask==1.1.6 41 | types-Jinja2==2.11.9 42 | types-MarkupSafe==1.1.10 43 | types-Werkzeug==1.0.9 44 | urllib3==1.26.13 45 | watchdog==2.2.0 46 | Werkzeug==2.2.2 47 | zipp==3.11.0 48 | -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_load_to_bq/transforms.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from typing import Callable, List 3 | import json 4 | 5 | import pandas as pd 6 | 7 | Preprocessor = Callable[[pd.DataFrame], pd.DataFrame] 8 | 9 | 10 | def create_row_hash(df: pd.DataFrame) -> pd.DataFrame: 11 | """Create unique hash of entire DataFrame row.""" 12 | df.set_index(pd.util.hash_pandas_object(df.astype("str")), drop=False, inplace=True) 13 | df = df.reset_index(names=["row_hash"]) 14 | return df 15 | 16 | 17 | def add_current_datetime(df: pd.DataFrame) -> pd.DataFrame: 18 | df.insert(0, "ingestion_date", pd.to_datetime("now", utc=True)) 19 | return df 20 | 21 | 22 | def _get_nested_fields(df: pd.DataFrame) -> List[str]: 23 | """Return a list of nested fields, sorted by the deepest level of nesting first.""" 24 | nested_fields = [*{field.rsplit(".", 1)[0] for field in df.columns if "." in field}] 25 | nested_fields.sort(key=lambda record: len(record.split(".")), reverse=True) 26 | return nested_fields 27 | 28 | 29 | def df_denormalize(df: pd.DataFrame) -> pd.DataFrame: 30 | """ 31 | Convert a normalised DataFrame into a nested structure. 32 | 33 | Fields separated by '.' are considered part of a nested structure. 34 | """ 35 | nested_fields = _get_nested_fields(df) 36 | for field in nested_fields: 37 | list_of_children = [column for column in df.columns if field in column] 38 | rename = { 39 | field_name: field_name.rsplit(".", 1)[1] for field_name in list_of_children 40 | } 41 | renamed_fields = df[list_of_children].rename(columns=rename) 42 | df[field] = json.loads(renamed_fields.to_json(orient="records")) 43 | df.drop(list_of_children, axis=1, inplace=True) 44 | return df 45 | 46 | 47 | def compose(*functions: Preprocessor) -> Preprocessor: 48 | return reduce(lambda f, g: lambda x: g(f(x)), functions) 49 | 50 | 51 | def transform_dataframe(df: pd.DataFrame) -> pd.DataFrame: 52 | preprocessor = compose( 53 | create_row_hash, 54 | add_current_datetime, 55 | df_denormalize, 56 | ) 57 | 58 | return preprocessor(df) 59 | -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_ram_api_request/.gcloudignore: -------------------------------------------------------------------------------- 1 | deploy_function.sh 2 | local_test.sh 3 | cloud_test.sh -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_ram_api_request/cloud_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST \ 2 | -H "Content-Type: application/json" \ 3 | -d '{ 4 | "data" : { 5 | "endpoint": "character", 6 | "api_params": {} 7 | } 8 | } 9 | ' -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_ram_api_request/deploy.sh: -------------------------------------------------------------------------------- 1 | gcloud alpha functions deploy ram-api-request \ 2 | --gen2 \ 3 | --region=europe-west2 \ 4 | --runtime=python38 \ 5 | --source=. \ 6 | --entry-point=send_api_request \ 7 | --trigger-http \ 8 | --max-instances=5 \ 9 | --allow-unauthenticated -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_ram_api_request/local_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST localhost:8080 \ 2 | -H "Content-Type: application/json" \ 3 | -d '{ 4 | "data" : { 5 | "endpoint": "character", 6 | "api_params": {} 7 | } 8 | } 9 | ' -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_ram_api_request/main.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from dataclasses import asdict 3 | import json 4 | import datetime 5 | 6 | import functions_framework 7 | import flask 8 | 9 | from ramapi import get_endpoint 10 | from models import ApiParameters, CharacterSchema 11 | 12 | 13 | def get_all_paginated_results( 14 | endpoint: str, pages: int, params: ApiParameters 15 | ) -> List[CharacterSchema]: 16 | results = [] 17 | print(f"Starting loop of {pages} pages") 18 | for page in range(1, pages + 1): 19 | params.page = page 20 | response = get_endpoint(endpoint, params) 21 | results.extend(response.results) 22 | print("Completed") 23 | return results 24 | 25 | 26 | def default(obj): 27 | """Create custom default function for json.dumps() method""" 28 | if isinstance(obj, (datetime.date, datetime.datetime)): 29 | return obj.isoformat() 30 | 31 | 32 | @functions_framework.http 33 | def send_api_request(request: flask.Request) -> str: 34 | """ 35 | Return a JSON serialised string representing a list of `CharacterSchema` records. 36 | 37 | The request is a flask.Request object that contains a `data` record in the following format: 38 | { 39 | ..., 40 | data: { 41 | endpoint: str, 42 | api_params: Fields defined in `ApiParameters` dataclass 43 | } 44 | """ 45 | data = request.get_json().get("data") 46 | params = ApiParameters(**data.get("api_params")) 47 | response = get_endpoint(data.get("endpoint"), params) 48 | results = get_all_paginated_results(data.get("endpoint"), response.info.pages, params) 49 | results = [asdict(result) for result in results] 50 | 51 | return json.dumps(results, default=default) 52 | -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_ram_api_request/models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict, List, Literal, Optional 3 | from datetime import datetime, timezone 4 | 5 | 6 | @dataclass 7 | class CharacterSchema: 8 | id: int 9 | name: str 10 | status: Literal["Alive", "Dead", "unknown"] 11 | species: str 12 | type: str 13 | gender: Literal["Female", "Male", "Genderless", "unknown"] 14 | origin: Dict[str, str] 15 | location: Dict[str, str] 16 | image: str 17 | episode: List[str] 18 | url: str 19 | created: datetime 20 | 21 | def __post_init__(self): 22 | self.created = datetime.strptime(self.created, "%Y-%m-%dT%H:%M:%S.%fZ").replace( 23 | tzinfo=timezone.utc 24 | ) 25 | 26 | 27 | @dataclass 28 | class ApiInfo: 29 | count: int 30 | pages: int 31 | next: Optional[str] 32 | prev: Optional[str] 33 | 34 | 35 | @dataclass 36 | class ApiResponse: 37 | info: ApiInfo 38 | results: List[CharacterSchema] 39 | 40 | def __post_init__(self): 41 | self.info = ApiInfo(**self.info) 42 | self.results = [CharacterSchema(**x) for x in self.results] 43 | 44 | 45 | @dataclass 46 | class ApiParameters: 47 | page: Optional[str] = None 48 | name: Optional[str] = None 49 | status: Optional[Literal["alive", "dead", "unknown"]] = None 50 | species: Optional[str] = None 51 | type: Optional[str] = None 52 | gender: Optional[Literal["female", "male", "genderless", "unknown"]] = None 53 | -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_ram_api_request/ramapi.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from dataclasses import asdict 3 | 4 | from models import ApiResponse, ApiParameters 5 | 6 | BASE_URL = "https://rickandmortyapi.com/api" 7 | 8 | 9 | def get_endpoint(endpoint: str, params: ApiParameters) -> ApiResponse: 10 | """Return `ApiResponse` from Rick and Morty `endpoint`""" 11 | response = requests.get(url=f"{BASE_URL}/{endpoint}", params=asdict(params)) 12 | response.raise_for_status() 13 | response = ApiResponse(**response.json()) 14 | 15 | return response 16 | -------------------------------------------------------------------------------- /3_deploy_cloud_function/fn_ram_api_request/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2022.9.24 2 | charset-normalizer==2.1.1 3 | click==8.1.3 4 | idna==3.4 5 | mypy-extensions==0.4.3 6 | pathspec==0.10.1 7 | platformdirs==2.5.3 8 | requests==2.28.1 9 | tomli==2.0.1 10 | typing_extensions==4.4.0 11 | urllib3==1.26.12 12 | types-click==7.1.8 13 | types-Flask==1.1.6 14 | types-Jinja2==2.11.9 15 | types-MarkupSafe==1.1.10 16 | types-Werkzeug==1.0.9 -------------------------------------------------------------------------------- /3_deploy_cloud_function/test_deployed_functions.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | 4 | def get_character_results(): 5 | data = {"data": {"endpoint": "character", "api_params": {}}} 6 | headers = {"Content-Type": "application/json"} 7 | results = requests.post("", data=json.dumps(data), headers=headers) 8 | return results.json() 9 | 10 | def load_bq_data(results): 11 | data = {"data": {"dataset": "rick_and_morty", "table": "character", "results": results}} 12 | headers = {"Content-Type": "application/json"} 13 | results = requests.post("", data=json.dumps(data), headers=headers) 14 | return "DONE" 15 | 16 | if __name__ == "__main__": 17 | results = get_character_results() 18 | load_bq_data(results) -------------------------------------------------------------------------------- /4_continuous_deployment/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /4_continuous_deployment/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | requests = "*" 8 | pandas = "*" 9 | google-cloud-bigquery = "*" 10 | functions-framework = "*" 11 | types-flask = "*" 12 | 13 | [dev-packages] 14 | black = "*" 15 | 16 | [requires] 17 | python_version = "3.8" 18 | -------------------------------------------------------------------------------- /4_continuous_deployment/README.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | We need to break the existing code out into two separate functions based on their unique responsibilities. 4 | 5 | 1. Calling the API and collecting data 6 | 2. Loading the data into BigQuery 7 | 8 | ## Testing 9 | 10 | fn_get_api_data: `functions-framework --target send_api_request --debug`
11 | fn_load_to_bq: `functions-framework --target create_load_job --debug` 12 | 13 | ## Deploying 14 | 15 | Prerequisites: 16 | - Need to enable a few API's in Google Cloud:
17 | Artifact Registry API
18 | Cloud Build API
19 | Cloud Run Admin API
20 | Cloud Functions API -------------------------------------------------------------------------------- /4_continuous_deployment/fn_load_to_bq/.gcloudignore: -------------------------------------------------------------------------------- 1 | deploy_function.sh 2 | local_test.sh 3 | cloud_test.sh -------------------------------------------------------------------------------- /4_continuous_deployment/fn_load_to_bq/ci-cd/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' 3 | entrypoint: gcloud 4 | args: 5 | - 'alpha' 6 | - 'functions' 7 | - 'deploy' 8 | - 'load-bq-data' 9 | - '--gen2' 10 | - '--region' 11 | - 'europe-west2' 12 | - '--runtime' 13 | - 'python38' 14 | - '--source' 15 | - '4_continuous_deployment/fn_load_to_bq' 16 | - '--entry-point' 17 | - 'create_load_job' 18 | - '--trigger-http' 19 | - '--max-instances' 20 | - '5' 21 | - '--service-account' 22 | - '${_SERVICE_ACCOUNT}' 23 | - '--allow-unauthenticated' 24 | options: 25 | logging: CLOUD_LOGGING_ONLY -------------------------------------------------------------------------------- /4_continuous_deployment/fn_load_to_bq/cloud_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST \ 2 | -H "Content-Type: application/json" \ 3 | -d "{ 4 | \"data\" : { 5 | \"dataset\": \"rick_and_morty\", 6 | \"table\": \"character\", 7 | \"results\": [{ 8 | \"created\": \"Mon, 25 Oct 2021 09:18:48 GMT\", 9 | \"episode\": [ 10 | \"https://rickandmortyapi.com/api/episode/49\" 11 | ], 12 | \"gender\": \"Female\", 13 | \"id\": 781, 14 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/781.jpeg\", 15 | \"location\": { 16 | \"name\": \"Earth (Replacement Dimension)\", 17 | \"url\": \"https://rickandmortyapi.com/api/location/20\" 18 | }, 19 | \"name\": \"Rick's Garage\", 20 | \"origin\": { 21 | \"name\": \"Earth (Replacement Dimension)\", 22 | \"url\": \"https://rickandmortyapi.com/api/location/20\" 23 | }, 24 | \"species\": \"Robot\", 25 | \"status\": \"Alive\", 26 | \"type\": \"Artificial Intelligence\", 27 | \"url\": \"https://rickandmortyapi.com/api/character/781\" 28 | }, 29 | { 30 | \"created\": \"Mon, 25 Oct 2021 09:20:57 GMT\", 31 | \"episode\": [ 32 | \"https://rickandmortyapi.com/api/episode/49\" 33 | ], 34 | \"gender\": \"Male\", 35 | \"id\": 782, 36 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/782.jpeg\", 37 | \"location\": { 38 | \"name\": \"Birdperson's Consciousness\", 39 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 40 | }, 41 | \"name\": \"Memory Squanchy\", 42 | \"origin\": { 43 | \"name\": \"Birdperson's Consciousness\", 44 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 45 | }, 46 | \"species\": \"Alien\", 47 | \"status\": \"Dead\", 48 | \"type\": \"Memory\", 49 | \"url\": \"https://rickandmortyapi.com/api/character/782\" 50 | }, 51 | { 52 | \"created\": \"Mon, 25 Oct 2021 09:22:40 GMT\", 53 | \"episode\": [ 54 | \"https://rickandmortyapi.com/api/episode/49\" 55 | ], 56 | \"gender\": \"Male\", 57 | \"id\": 783, 58 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/783.jpeg\", 59 | \"location\": { 60 | \"name\": \"Birdperson's Consciousness\", 61 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 62 | }, 63 | \"name\": \"Memory Rick\", 64 | \"origin\": { 65 | \"name\": \"Birdperson's Consciousness\", 66 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 67 | }, 68 | \"species\": \"Human\", 69 | \"status\": \"Dead\", 70 | \"type\": \"Memory\", 71 | \"url\": \"https://rickandmortyapi.com/api/character/783\" 72 | }, 73 | { 74 | \"created\": \"Mon, 25 Oct 2021 09:23:22 GMT\", 75 | \"episode\": [ 76 | \"https://rickandmortyapi.com/api/episode/49\" 77 | ], 78 | \"gender\": \"Male\", 79 | \"id\": 784, 80 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/784.jpeg\", 81 | \"location\": { 82 | \"name\": \"Birdperson's Consciousness\", 83 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 84 | }, 85 | \"name\": \"Memory Rick\", 86 | \"origin\": { 87 | \"name\": \"Birdperson's Consciousness\", 88 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 89 | }, 90 | \"species\": \"Human\", 91 | \"status\": \"Dead\", 92 | \"type\": \"Memory\", 93 | \"url\": \"https://rickandmortyapi.com/api/character/784\" 94 | }, 95 | { 96 | \"created\": \"Mon, 25 Oct 2021 09:24:51 GMT\", 97 | \"episode\": [ 98 | \"https://rickandmortyapi.com/api/episode/49\" 99 | ], 100 | \"gender\": \"Male\", 101 | \"id\": 785, 102 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/785.jpeg\", 103 | \"location\": { 104 | \"name\": \"Birdperson's Consciousness\", 105 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 106 | }, 107 | \"name\": \"Memory Geardude\", 108 | \"origin\": { 109 | \"name\": \"Birdperson's Consciousness\", 110 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 111 | }, 112 | \"species\": \"Alien\", 113 | \"status\": \"Dead\", 114 | \"type\": \"Memory\", 115 | \"url\": \"https://rickandmortyapi.com/api/character/785\" 116 | } 117 | ] 118 | } 119 | } 120 | " -------------------------------------------------------------------------------- /4_continuous_deployment/fn_load_to_bq/deploy.sh: -------------------------------------------------------------------------------- 1 | gcloud alpha functions deploy load-bq-data \ 2 | --gen2 \ 3 | --region=europe-west2 \ 4 | --runtime=python38 \ 5 | --source=. \ 6 | --entry-point=create_load_job \ 7 | --trigger-http \ 8 | --max-instances=5 \ 9 | --allow-unauthenticated -------------------------------------------------------------------------------- /4_continuous_deployment/fn_load_to_bq/local_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST localhost:8080 \ 2 | -H "Content-Type: application/json" \ 3 | -d "{ 4 | \"data\" : { 5 | \"dataset\": \"rick_and_morty\", 6 | \"table\": \"character\", 7 | \"results\": [{ 8 | \"created\": \"Mon, 25 Oct 2021 09:18:48 GMT\", 9 | \"episode\": [ 10 | \"https://rickandmortyapi.com/api/episode/49\" 11 | ], 12 | \"gender\": \"Female\", 13 | \"id\": 781, 14 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/781.jpeg\", 15 | \"location\": { 16 | \"name\": \"Earth (Replacement Dimension)\", 17 | \"url\": \"https://rickandmortyapi.com/api/location/20\" 18 | }, 19 | \"name\": \"Rick's Garage\", 20 | \"origin\": { 21 | \"name\": \"Earth (Replacement Dimension)\", 22 | \"url\": \"https://rickandmortyapi.com/api/location/20\" 23 | }, 24 | \"species\": \"Robot\", 25 | \"status\": \"Alive\", 26 | \"type\": \"Artificial Intelligence\", 27 | \"url\": \"https://rickandmortyapi.com/api/character/781\" 28 | }, 29 | { 30 | \"created\": \"Mon, 25 Oct 2021 09:20:57 GMT\", 31 | \"episode\": [ 32 | \"https://rickandmortyapi.com/api/episode/49\" 33 | ], 34 | \"gender\": \"Male\", 35 | \"id\": 782, 36 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/782.jpeg\", 37 | \"location\": { 38 | \"name\": \"Birdperson's Consciousness\", 39 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 40 | }, 41 | \"name\": \"Memory Squanchy\", 42 | \"origin\": { 43 | \"name\": \"Birdperson's Consciousness\", 44 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 45 | }, 46 | \"species\": \"Alien\", 47 | \"status\": \"Dead\", 48 | \"type\": \"Memory\", 49 | \"url\": \"https://rickandmortyapi.com/api/character/782\" 50 | }, 51 | { 52 | \"created\": \"Mon, 25 Oct 2021 09:22:40 GMT\", 53 | \"episode\": [ 54 | \"https://rickandmortyapi.com/api/episode/49\" 55 | ], 56 | \"gender\": \"Male\", 57 | \"id\": 783, 58 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/783.jpeg\", 59 | \"location\": { 60 | \"name\": \"Birdperson's Consciousness\", 61 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 62 | }, 63 | \"name\": \"Memory Rick\", 64 | \"origin\": { 65 | \"name\": \"Birdperson's Consciousness\", 66 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 67 | }, 68 | \"species\": \"Human\", 69 | \"status\": \"Dead\", 70 | \"type\": \"Memory\", 71 | \"url\": \"https://rickandmortyapi.com/api/character/783\" 72 | }, 73 | { 74 | \"created\": \"Mon, 25 Oct 2021 09:23:22 GMT\", 75 | \"episode\": [ 76 | \"https://rickandmortyapi.com/api/episode/49\" 77 | ], 78 | \"gender\": \"Male\", 79 | \"id\": 784, 80 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/784.jpeg\", 81 | \"location\": { 82 | \"name\": \"Birdperson's Consciousness\", 83 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 84 | }, 85 | \"name\": \"Memory Rick\", 86 | \"origin\": { 87 | \"name\": \"Birdperson's Consciousness\", 88 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 89 | }, 90 | \"species\": \"Human\", 91 | \"status\": \"Dead\", 92 | \"type\": \"Memory\", 93 | \"url\": \"https://rickandmortyapi.com/api/character/784\" 94 | }, 95 | { 96 | \"created\": \"Mon, 25 Oct 2021 09:24:51 GMT\", 97 | \"episode\": [ 98 | \"https://rickandmortyapi.com/api/episode/49\" 99 | ], 100 | \"gender\": \"Male\", 101 | \"id\": 785, 102 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/785.jpeg\", 103 | \"location\": { 104 | \"name\": \"Birdperson's Consciousness\", 105 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 106 | }, 107 | \"name\": \"Memory Geardude\", 108 | \"origin\": { 109 | \"name\": \"Birdperson's Consciousness\", 110 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 111 | }, 112 | \"species\": \"Alien\", 113 | \"status\": \"Dead\", 114 | \"type\": \"Memory\", 115 | \"url\": \"https://rickandmortyapi.com/api/character/785\" 116 | } 117 | ] 118 | } 119 | } 120 | " -------------------------------------------------------------------------------- /4_continuous_deployment/fn_load_to_bq/main.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | import io 3 | 4 | import pandas as pd 5 | from google.cloud import bigquery 6 | from google.cloud.bigquery.schema import SchemaField 7 | import functions_framework 8 | import flask 9 | 10 | from transforms import transform_dataframe 11 | 12 | 13 | def _generate_bigquery_schema(df: pd.DataFrame) -> List[SchemaField]: 14 | TYPE_MAPPING = { 15 | "i": "INTEGER", 16 | "u": "NUMERIC", 17 | "b": "BOOLEAN", 18 | "f": "FLOAT", 19 | "O": "STRING", 20 | "S": "STRING", 21 | "U": "STRING", 22 | "M": "TIMESTAMP", 23 | } 24 | schema = [] 25 | for column, dtype in df.dtypes.items(): 26 | val = df[column].iloc[0] 27 | mode = "REPEATED" if isinstance(val, list) else "NULLABLE" 28 | 29 | if isinstance(val, dict) or (mode == "REPEATED" and isinstance(val[0], dict)): 30 | fields = _generate_bigquery_schema(pd.json_normalize(val)) 31 | else: 32 | fields = () 33 | 34 | 35 | 36 | type = "RECORD" if fields else TYPE_MAPPING.get(dtype.kind) 37 | schema.append( 38 | SchemaField( 39 | name=column, 40 | field_type=type, 41 | mode=mode, 42 | fields=fields, 43 | ) 44 | ) 45 | return schema 46 | 47 | 48 | def prepare_data(data: List[dict]) -> Tuple[str, List[SchemaField]]: 49 | df = pd.json_normalize(data) 50 | df = transform_dataframe(df) 51 | schema = _generate_bigquery_schema(df) 52 | json_records = df.to_json(orient="records", lines=True, date_format="iso") 53 | return json_records, schema 54 | 55 | 56 | def load_data_to_bq( 57 | client: bigquery.Client, 58 | data: str, 59 | table_id: str, 60 | load_config: bigquery.LoadJobConfig, 61 | ) -> int: 62 | load_job = client.load_table_from_file( 63 | io.StringIO(data), table_id, location="EU", job_config=load_config 64 | ) 65 | load_job.result() # waits for the job to complete. 66 | destination_table = client.get_table(table_id) 67 | num_rows = destination_table.num_rows 68 | return num_rows 69 | 70 | 71 | @functions_framework.http 72 | def create_load_job(request: flask.Request) -> str: 73 | """ 74 | The request is a flask.Request object that contains a `data` record in the following format: 75 | { 76 | ..., 77 | data: { 78 | dataset: str, 79 | table: str, 80 | results: List[dict] 81 | } 82 | """ 83 | request_parameters = request.get_json().get("data") 84 | results = request_parameters["results"] 85 | dataset = request_parameters["dataset"] 86 | table = request_parameters["table"] 87 | json_records, schema = prepare_data(results) 88 | 89 | bigquery_client = bigquery.Client() 90 | load_config = bigquery.LoadJobConfig( 91 | schema=schema, 92 | write_disposition="WRITE_TRUNCATE", 93 | source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, 94 | ) 95 | table_id = f"{dataset}.{table}" 96 | num_rows = load_data_to_bq(bigquery_client, json_records, table_id, load_config) 97 | print(f"Successfully loaded {num_rows} to {table_id}") 98 | return "DONE" 99 | -------------------------------------------------------------------------------- /4_continuous_deployment/fn_load_to_bq/requirements.txt: -------------------------------------------------------------------------------- 1 | cachetools==5.2.0 2 | certifi==2022.12.7 3 | charset-normalizer==2.1.1 4 | click==8.1.3 5 | cloudevents==1.7.1 6 | deprecation==2.1.0 7 | Flask==2.2.2 8 | functions-framework==3.2.1 9 | google-api-core==2.11.0 10 | google-auth==2.15.0 11 | google-cloud-bigquery==3.4.0 12 | google-cloud-bigquery-storage==2.16.2 13 | google-cloud-core==2.3.2 14 | google-crc32c==1.5.0 15 | google-resumable-media==2.4.0 16 | googleapis-common-protos==1.57.0 17 | grpcio==1.51.1 18 | grpcio-status==1.51.1 19 | gunicorn==20.1.0 20 | idna==3.4 21 | importlib-metadata==5.1.0 22 | itsdangerous==2.1.2 23 | Jinja2==3.1.2 24 | MarkupSafe==2.1.1 25 | numpy==1.23.5 26 | packaging==21.3 27 | pandas==1.5.2 28 | proto-plus==1.22.1 29 | protobuf==4.21.11 30 | pyarrow==10.0.1 31 | pyasn1==0.4.8 32 | pyasn1-modules==0.2.8 33 | pyparsing==3.0.9 34 | python-dateutil==2.8.2 35 | pytz==2022.6 36 | requests==2.28.1 37 | rsa==4.9 38 | six==1.16.0 39 | types-click==7.1.8 40 | types-Flask==1.1.6 41 | types-Jinja2==2.11.9 42 | types-MarkupSafe==1.1.10 43 | types-Werkzeug==1.0.9 44 | urllib3==1.26.13 45 | watchdog==2.2.0 46 | Werkzeug==2.2.2 47 | zipp==3.11.0 48 | -------------------------------------------------------------------------------- /4_continuous_deployment/fn_load_to_bq/transforms.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from typing import Callable, List 3 | import json 4 | 5 | import pandas as pd 6 | 7 | Preprocessor = Callable[[pd.DataFrame], pd.DataFrame] 8 | 9 | 10 | def create_row_hash(df: pd.DataFrame) -> pd.DataFrame: 11 | """Create unique hash of entire DataFrame row.""" 12 | df.set_index(pd.util.hash_pandas_object(df.astype("str")), drop=False, inplace=True) 13 | df = df.reset_index(names=["row_hash"]) 14 | return df 15 | 16 | 17 | def add_current_datetime(df: pd.DataFrame) -> pd.DataFrame: 18 | df.insert(0, "ingestion_date", pd.to_datetime("now", utc=True)) 19 | return df 20 | 21 | 22 | def _get_nested_fields(df: pd.DataFrame) -> List[str]: 23 | """Return a list of nested fields, sorted by the deepest level of nesting first.""" 24 | nested_fields = [*{field.rsplit(".", 1)[0] for field in df.columns if "." in field}] 25 | nested_fields.sort(key=lambda record: len(record.split(".")), reverse=True) 26 | return nested_fields 27 | 28 | 29 | def df_denormalize(df: pd.DataFrame) -> pd.DataFrame: 30 | """ 31 | Convert a normalised DataFrame into a nested structure. 32 | 33 | Fields separated by '.' are considered part of a nested structure. 34 | """ 35 | nested_fields = _get_nested_fields(df) 36 | for field in nested_fields: 37 | list_of_children = [column for column in df.columns if field in column] 38 | rename = { 39 | field_name: field_name.rsplit(".", 1)[1] for field_name in list_of_children 40 | } 41 | renamed_fields = df[list_of_children].rename(columns=rename) 42 | df[field] = json.loads(renamed_fields.to_json(orient="records")) 43 | df.drop(list_of_children, axis=1, inplace=True) 44 | return df 45 | 46 | 47 | def compose(*functions: Preprocessor) -> Preprocessor: 48 | return reduce(lambda f, g: lambda x: g(f(x)), functions) 49 | 50 | 51 | def transform_dataframe(df: pd.DataFrame) -> pd.DataFrame: 52 | preprocessor = compose( 53 | create_row_hash, 54 | add_current_datetime, 55 | df_denormalize, 56 | ) 57 | 58 | return preprocessor(df) 59 | -------------------------------------------------------------------------------- /4_continuous_deployment/fn_ram_api_request/.gcloudignore: -------------------------------------------------------------------------------- 1 | deploy_function.sh 2 | local_test.sh 3 | cloud_test.sh -------------------------------------------------------------------------------- /4_continuous_deployment/fn_ram_api_request/ci-cd/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' 3 | entrypoint: gcloud 4 | args: 5 | - 'alpha' 6 | - 'functions' 7 | - 'deploy' 8 | - 'ram-api-request' 9 | - '--gen2' 10 | - '--region' 11 | - 'europe-west2' 12 | - '--runtime' 13 | - 'python38' 14 | - '--source' 15 | - '4_continuous_deployment/fn_ram_api_request' 16 | - '--entry-point' 17 | - 'send_api_request' 18 | - '--trigger-http' 19 | - '--max-instances' 20 | - '5' 21 | - '--service-account' 22 | - '${_SERVICE_ACCOUNT}' 23 | - '--allow-unauthenticated' 24 | options: 25 | logging: CLOUD_LOGGING_ONLY -------------------------------------------------------------------------------- /4_continuous_deployment/fn_ram_api_request/cloud_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST \ 2 | -H "Content-Type: application/json" \ 3 | -d '{ 4 | "data" : { 5 | "endpoint": "character", 6 | "api_params": {} 7 | } 8 | } 9 | ' -------------------------------------------------------------------------------- /4_continuous_deployment/fn_ram_api_request/deploy.sh: -------------------------------------------------------------------------------- 1 | gcloud alpha functions deploy ram-api-request \ 2 | --gen2 \ 3 | --region=europe-west2 \ 4 | --runtime=python38 \ 5 | --source=. \ 6 | --entry-point=send_api_request \ 7 | --trigger-http \ 8 | --max-instances=5 \ 9 | --allow-unauthenticated -------------------------------------------------------------------------------- /4_continuous_deployment/fn_ram_api_request/local_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST localhost:8080 \ 2 | -H "Content-Type: application/json" \ 3 | -d '{ 4 | "data" : { 5 | "endpoint": "character", 6 | "api_params": {} 7 | } 8 | } 9 | ' -------------------------------------------------------------------------------- /4_continuous_deployment/fn_ram_api_request/main.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from dataclasses import asdict 3 | import json 4 | import datetime 5 | 6 | import functions_framework 7 | import flask 8 | 9 | from ramapi import get_endpoint 10 | from models import ApiParameters, CharacterSchema 11 | 12 | 13 | def get_all_paginated_results( 14 | endpoint: str, pages: int, params: ApiParameters 15 | ) -> List[CharacterSchema]: 16 | results = [] 17 | print(f"Starting loop of {pages} pages") 18 | for page in range(1, pages + 1): 19 | params.page = page 20 | response = get_endpoint(endpoint, params) 21 | results.extend(response.results) 22 | print("Completed") 23 | return results 24 | 25 | 26 | def default(obj): 27 | """Create custom default function for json.dumps() method""" 28 | if isinstance(obj, (datetime.date, datetime.datetime)): 29 | return obj.isoformat() 30 | 31 | 32 | @functions_framework.http 33 | def send_api_request(request: flask.Request) -> str: 34 | """ 35 | Return a JSON serialised string representing a list of `CharacterSchema` records. 36 | 37 | The request is a flask.Request object that contains a `data` record in the following format: 38 | { 39 | ..., 40 | data: { 41 | endpoint: str, 42 | api_params: Fields defined in `ApiParameters` dataclass 43 | } 44 | """ 45 | data = request.get_json().get("data") 46 | params = ApiParameters(**data.get("api_params")) 47 | response = get_endpoint(data.get("endpoint"), params) 48 | results = get_all_paginated_results(data.get("endpoint"), response.info.pages, params) 49 | results = [asdict(result) for result in results] 50 | 51 | return json.dumps(results, default=default) 52 | -------------------------------------------------------------------------------- /4_continuous_deployment/fn_ram_api_request/models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict, List, Literal, Optional 3 | from datetime import datetime, timezone 4 | 5 | 6 | @dataclass 7 | class CharacterSchema: 8 | id: int 9 | name: str 10 | status: Literal["Alive", "Dead", "unknown"] 11 | species: str 12 | type: str 13 | gender: Literal["Female", "Male", "Genderless", "unknown"] 14 | origin: Dict[str, str] 15 | location: Dict[str, str] 16 | image: str 17 | episode: List[str] 18 | url: str 19 | created: datetime 20 | 21 | def __post_init__(self): 22 | self.created = datetime.strptime(self.created, "%Y-%m-%dT%H:%M:%S.%fZ").replace( 23 | tzinfo=timezone.utc 24 | ) 25 | 26 | 27 | @dataclass 28 | class ApiInfo: 29 | count: int 30 | pages: int 31 | next: Optional[str] 32 | prev: Optional[str] 33 | 34 | 35 | @dataclass 36 | class ApiResponse: 37 | info: ApiInfo 38 | results: List[CharacterSchema] 39 | 40 | def __post_init__(self): 41 | self.info = ApiInfo(**self.info) 42 | self.results = [CharacterSchema(**x) for x in self.results] 43 | 44 | 45 | @dataclass 46 | class ApiParameters: 47 | page: Optional[str] = None 48 | name: Optional[str] = None 49 | status: Optional[Literal["alive", "dead", "unknown"]] = None 50 | species: Optional[str] = None 51 | type: Optional[str] = None 52 | gender: Optional[Literal["female", "male", "genderless", "unknown"]] = None 53 | -------------------------------------------------------------------------------- /4_continuous_deployment/fn_ram_api_request/ramapi.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from dataclasses import asdict 3 | 4 | from models import ApiResponse, ApiParameters 5 | 6 | BASE_URL = "https://rickandmortyapi.com/api" 7 | 8 | 9 | def get_endpoint(endpoint: str, params: ApiParameters) -> ApiResponse: 10 | """Return `ApiResponse` from Rick and Morty `endpoint`""" 11 | response = requests.get(url=f"{BASE_URL}/{endpoint}", params=asdict(params)) 12 | response.raise_for_status() 13 | response = ApiResponse(**response.json()) 14 | 15 | return response 16 | -------------------------------------------------------------------------------- /4_continuous_deployment/fn_ram_api_request/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2022.9.24 2 | charset-normalizer==2.1.1 3 | click==8.1.3 4 | idna==3.4 5 | mypy-extensions==0.4.3 6 | pathspec==0.10.1 7 | platformdirs==2.5.3 8 | requests==2.28.1 9 | tomli==2.0.1 10 | typing_extensions==4.4.0 11 | urllib3==1.26.12 12 | types-click==7.1.8 13 | types-Flask==1.1.6 14 | types-Jinja2==2.11.9 15 | types-MarkupSafe==1.1.10 16 | types-Werkzeug==1.0.9 -------------------------------------------------------------------------------- /4_continuous_deployment/test_deployed_functions.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | 4 | def get_character_results(): 5 | data = {"data": {"endpoint": "character", "api_params": {}}} 6 | headers = {"Content-Type": "application/json"} 7 | results = requests.post("", data=json.dumps(data), headers=headers) 8 | return results.json() 9 | 10 | def load_bq_data(results): 11 | data = {"data": {"dataset": "rick_and_morty", "table": "character", "results": results}} 12 | headers = {"Content-Type": "application/json"} 13 | results = requests.post("", data=json.dumps(data), headers=headers) 14 | return "DONE" 15 | 16 | if __name__ == "__main__": 17 | results = get_character_results() 18 | load_bq_data(results) -------------------------------------------------------------------------------- /5_parallel_execution/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /5_parallel_execution/fn_create_cloud_tasks/.gcloudignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danilo-nzyte/real_world_python_tutorials/e2649a8b862b80a7f27db1bc5af029c213fcc4e9/5_parallel_execution/fn_create_cloud_tasks/.gcloudignore -------------------------------------------------------------------------------- /5_parallel_execution/fn_create_cloud_tasks/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | functions-framework = "*" 8 | types-flask = "*" 9 | google-cloud-tasks = "*" 10 | 11 | [dev-packages] 12 | black = "*" 13 | 14 | [requires] 15 | python_version = "3.8" 16 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_create_cloud_tasks/Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "d1bf8db57b06f78191b2a8d01523e24b21b781d6c2ac4a7bfd6596f2e3ac12a0" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.8" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "cachetools": { 20 | "hashes": [ 21 | "sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757", 22 | "sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db" 23 | ], 24 | "markers": "python_version ~= '3.7'", 25 | "version": "==5.2.0" 26 | }, 27 | "certifi": { 28 | "hashes": [ 29 | "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3", 30 | "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18" 31 | ], 32 | "markers": "python_version >= '3.6'", 33 | "version": "==2022.12.7" 34 | }, 35 | "charset-normalizer": { 36 | "hashes": [ 37 | "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845", 38 | "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f" 39 | ], 40 | "markers": "python_full_version >= '3.6.0'", 41 | "version": "==2.1.1" 42 | }, 43 | "click": { 44 | "hashes": [ 45 | "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", 46 | "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" 47 | ], 48 | "markers": "python_version >= '3.7'", 49 | "version": "==8.1.3" 50 | }, 51 | "cloudevents": { 52 | "hashes": [ 53 | "sha256:03009fbff3192fa1a794bdf78b0c752e4649b6c8c4eae49c1855b309a937d264", 54 | "sha256:ecdcc791581817a48a86b9253f19b025dbf794afcbd9f2abb9721a420e7b719c" 55 | ], 56 | "version": "==1.8.0" 57 | }, 58 | "deprecation": { 59 | "hashes": [ 60 | "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff", 61 | "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a" 62 | ], 63 | "version": "==2.1.0" 64 | }, 65 | "flask": { 66 | "hashes": [ 67 | "sha256:642c450d19c4ad482f96729bd2a8f6d32554aa1e231f4f6b4e7e5264b16cca2b", 68 | "sha256:b9c46cc36662a7949f34b52d8ec7bb59c0d74ba08ba6cb9ce9adc1d8676d9526" 69 | ], 70 | "markers": "python_version >= '3.7'", 71 | "version": "==2.2.2" 72 | }, 73 | "functions-framework": { 74 | "hashes": [ 75 | "sha256:b61ef4a25f2bec0e035551171d9910875df0794433339998abceac95fc2e259b", 76 | "sha256:e6c97d2b009f4f0428c483c5a23bae3192a96107c9f1217cb1ecfa456c5375cc" 77 | ], 78 | "index": "pypi", 79 | "version": "==3.3.0" 80 | }, 81 | "google-api-core": { 82 | "extras": [ 83 | "grpc" 84 | ], 85 | "hashes": [ 86 | "sha256:4b9bb5d5a380a0befa0573b302651b8a9a89262c1730e37bf423cec511804c22", 87 | "sha256:ce222e27b0de0d7bc63eb043b956996d6dccab14cc3b690aaea91c9cc99dc16e" 88 | ], 89 | "markers": "python_version >= '3.7'", 90 | "version": "==2.11.0" 91 | }, 92 | "google-auth": { 93 | "hashes": [ 94 | "sha256:6897b93556d8d807ad70701bb89f000183aea366ca7ed94680828b37437a4994", 95 | "sha256:72f12a6cfc968d754d7bdab369c5c5c16032106e52d32c6dfd8484e4c01a6d1f" 96 | ], 97 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", 98 | "version": "==2.15.0" 99 | }, 100 | "google-cloud-tasks": { 101 | "hashes": [ 102 | "sha256:4c2dae0e70fde4d531d4b2cb2e8b592bdb4977a25665ae0c8e2a5c13a934ecca", 103 | "sha256:d1af8d235cc535965337e97f5824f0758a4a8dd92e373a23a0f5785e9f7ecd84" 104 | ], 105 | "index": "pypi", 106 | "version": "==2.11.0" 107 | }, 108 | "googleapis-common-protos": { 109 | "hashes": [ 110 | "sha256:27a849d6205838fb6cc3c1c21cb9800707a661bb21c6ce7fb13e99eb1f8a0c46", 111 | "sha256:a9f4a1d7f6d9809657b7f1316a1aa527f6664891531bcfcc13b6696e685f443c" 112 | ], 113 | "markers": "python_version >= '3.7'", 114 | "version": "==1.57.0" 115 | }, 116 | "grpc-google-iam-v1": { 117 | "hashes": [ 118 | "sha256:312801ae848aeb8408c099ea372b96d253077e7851aae1a9e745df984f81f20c", 119 | "sha256:3f0ac2c940b9a855d7ce7e31fde28bddb0d9ac362d32d07c67148306931a0e30" 120 | ], 121 | "markers": "python_version >= '3.6'", 122 | "version": "==0.12.4" 123 | }, 124 | "grpcio": { 125 | "hashes": [ 126 | "sha256:094e64236253590d9d4075665c77b329d707b6fca864dd62b144255e199b4f87", 127 | "sha256:0dc5354e38e5adf2498312f7241b14c7ce3484eefa0082db4297189dcbe272e6", 128 | "sha256:0e1a9e1b4a23808f1132aa35f968cd8e659f60af3ffd6fb00bcf9a65e7db279f", 129 | "sha256:0fb93051331acbb75b49a2a0fd9239c6ba9528f6bdc1dd400ad1cb66cf864292", 130 | "sha256:16c71740640ba3a882f50b01bf58154681d44b51f09a5728180a8fdc66c67bd5", 131 | "sha256:172405ca6bdfedd6054c74c62085946e45ad4d9cec9f3c42b4c9a02546c4c7e9", 132 | "sha256:17ec9b13cec4a286b9e606b48191e560ca2f3bbdf3986f91e480a95d1582e1a7", 133 | "sha256:22b011674090594f1f3245960ced7386f6af35485a38901f8afee8ad01541dbd", 134 | "sha256:24ac1154c4b2ab4a0c5326a76161547e70664cd2c39ba75f00fc8a2170964ea2", 135 | "sha256:257478300735ce3c98d65a930bbda3db172bd4e00968ba743e6a1154ea6edf10", 136 | "sha256:29cb97d41a4ead83b7bcad23bdb25bdd170b1e2cba16db6d3acbb090bc2de43c", 137 | "sha256:2b170eaf51518275c9b6b22ccb59450537c5a8555326fd96ff7391b5dd75303c", 138 | "sha256:31bb6bc7ff145e2771c9baf612f4b9ebbc9605ccdc5f3ff3d5553de7fc0e0d79", 139 | "sha256:3c2b3842dcf870912da31a503454a33a697392f60c5e2697c91d133130c2c85d", 140 | "sha256:3f9b0023c2c92bebd1be72cdfca23004ea748be1813a66d684d49d67d836adde", 141 | "sha256:471d39d3370ca923a316d49c8aac66356cea708a11e647e3bdc3d0b5de4f0a40", 142 | "sha256:49d680356a975d9c66a678eb2dde192d5dc427a7994fb977363634e781614f7c", 143 | "sha256:4c4423ea38a7825b8fed8934d6d9aeebdf646c97e3c608c3b0bcf23616f33877", 144 | "sha256:506b9b7a4cede87d7219bfb31014d7b471cfc77157da9e820a737ec1ea4b0663", 145 | "sha256:538d981818e49b6ed1e9c8d5e5adf29f71c4e334e7d459bf47e9b7abb3c30e09", 146 | "sha256:59dffade859f157bcc55243714d57b286da6ae16469bf1ac0614d281b5f49b67", 147 | "sha256:5a6ebcdef0ef12005d56d38be30f5156d1cb3373b52e96f147f4a24b0ddb3a9d", 148 | "sha256:5dca372268c6ab6372d37d6b9f9343e7e5b4bc09779f819f9470cd88b2ece3c3", 149 | "sha256:6df3b63538c362312bc5fa95fb965069c65c3ea91d7ce78ad9c47cab57226f54", 150 | "sha256:6f0b89967ee11f2b654c23b27086d88ad7bf08c0b3c2a280362f28c3698b2896", 151 | "sha256:75e29a90dc319f0ad4d87ba6d20083615a00d8276b51512e04ad7452b5c23b04", 152 | "sha256:7942b32a291421460d6a07883033e392167d30724aa84987e6956cd15f1a21b9", 153 | "sha256:9235dcd5144a83f9ca6f431bd0eccc46b90e2c22fe27b7f7d77cabb2fb515595", 154 | "sha256:97d67983189e2e45550eac194d6234fc38b8c3b5396c153821f2d906ed46e0ce", 155 | "sha256:9ff42c5620b4e4530609e11afefa4a62ca91fa0abb045a8957e509ef84e54d30", 156 | "sha256:a8a0b77e992c64880e6efbe0086fe54dfc0bbd56f72a92d9e48264dcd2a3db98", 157 | "sha256:aacb54f7789ede5cbf1d007637f792d3e87f1c9841f57dd51abf89337d1b8472", 158 | "sha256:bc59f7ba87972ab236f8669d8ca7400f02a0eadf273ca00e02af64d588046f02", 159 | "sha256:cc2bece1737b44d878cc1510ea04469a8073dbbcdd762175168937ae4742dfb3", 160 | "sha256:cd3baccea2bc5c38aeb14e5b00167bd4e2373a373a5e4d8d850bd193edad150c", 161 | "sha256:dad6533411d033b77f5369eafe87af8583178efd4039c41d7515d3336c53b4f1", 162 | "sha256:e223a9793522680beae44671b9ed8f6d25bbe5ddf8887e66aebad5e0686049ef", 163 | "sha256:e473525c28251558337b5c1ad3fa969511e42304524a4e404065e165b084c9e4", 164 | "sha256:e4ef09f8997c4be5f3504cefa6b5c6cc3cf648274ce3cede84d4342a35d76db6", 165 | "sha256:e6dfc2b6567b1c261739b43d9c59d201c1b89e017afd9e684d85aa7a186c9f7a", 166 | "sha256:eacad297ea60c72dd280d3353d93fb1dcca952ec11de6bb3c49d12a572ba31dd", 167 | "sha256:f1158bccbb919da42544a4d3af5d9296a3358539ffa01018307337365a9a0c64", 168 | "sha256:f1fec3abaf274cdb85bf3878167cfde5ad4a4d97c68421afda95174de85ba813", 169 | "sha256:f96ace1540223f26fbe7c4ebbf8a98e3929a6aa0290c8033d12526847b291c0f", 170 | "sha256:fbdbe9a849854fe484c00823f45b7baab159bdd4a46075302281998cb8719df5" 171 | ], 172 | "version": "==1.51.1" 173 | }, 174 | "grpcio-status": { 175 | "hashes": [ 176 | "sha256:a52cbdc4b18f325bfc13d319ae7c7ae7a0fee07f3d9a005504d6097896d7a495", 177 | "sha256:ac2617a3095935ebd785e2228958f24b10a0d527a0c9eb5a0863c784f648a816" 178 | ], 179 | "version": "==1.51.1" 180 | }, 181 | "gunicorn": { 182 | "hashes": [ 183 | "sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e", 184 | "sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8" 185 | ], 186 | "markers": "platform_system != 'Windows'", 187 | "version": "==20.1.0" 188 | }, 189 | "idna": { 190 | "hashes": [ 191 | "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", 192 | "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" 193 | ], 194 | "markers": "python_version >= '3.5'", 195 | "version": "==3.4" 196 | }, 197 | "importlib-metadata": { 198 | "hashes": [ 199 | "sha256:0eafa39ba42bf225fc00e67f701d71f85aead9f878569caf13c3724f704b970f", 200 | "sha256:404d48d62bba0b7a77ff9d405efd91501bef2e67ff4ace0bed40a0cf28c3c7cd" 201 | ], 202 | "markers": "python_version < '3.10'", 203 | "version": "==5.2.0" 204 | }, 205 | "itsdangerous": { 206 | "hashes": [ 207 | "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44", 208 | "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a" 209 | ], 210 | "markers": "python_version >= '3.7'", 211 | "version": "==2.1.2" 212 | }, 213 | "jinja2": { 214 | "hashes": [ 215 | "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852", 216 | "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61" 217 | ], 218 | "markers": "python_version >= '3.7'", 219 | "version": "==3.1.2" 220 | }, 221 | "markupsafe": { 222 | "hashes": [ 223 | "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003", 224 | "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88", 225 | "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5", 226 | "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7", 227 | "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a", 228 | "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603", 229 | "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1", 230 | "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135", 231 | "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247", 232 | "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6", 233 | "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601", 234 | "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77", 235 | "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02", 236 | "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e", 237 | "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63", 238 | "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f", 239 | "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980", 240 | "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b", 241 | "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812", 242 | "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff", 243 | "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96", 244 | "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1", 245 | "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925", 246 | "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a", 247 | "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6", 248 | "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e", 249 | "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f", 250 | "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4", 251 | "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f", 252 | "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3", 253 | "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c", 254 | "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a", 255 | "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417", 256 | "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a", 257 | "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a", 258 | "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37", 259 | "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452", 260 | "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933", 261 | "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a", 262 | "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7" 263 | ], 264 | "markers": "python_version >= '3.7'", 265 | "version": "==2.1.1" 266 | }, 267 | "packaging": { 268 | "hashes": [ 269 | "sha256:2198ec20bd4c017b8f9717e00f0c8714076fc2fd93816750ab48e2c41de2cfd3", 270 | "sha256:957e2148ba0e1a3b282772e791ef1d8083648bc131c8ab0c1feba110ce1146c3" 271 | ], 272 | "markers": "python_version >= '3.7'", 273 | "version": "==22.0" 274 | }, 275 | "proto-plus": { 276 | "hashes": [ 277 | "sha256:6c7dfd122dfef8019ff654746be4f5b1d9c80bba787fe9611b508dd88be3a2fa", 278 | "sha256:ea8982669a23c379f74495bc48e3dcb47c822c484ce8ee1d1d7beb339d4e34c5" 279 | ], 280 | "markers": "python_version >= '3.6'", 281 | "version": "==1.22.1" 282 | }, 283 | "protobuf": { 284 | "hashes": [ 285 | "sha256:1f22ac0ca65bb70a876060d96d914dae09ac98d114294f77584b0d2644fa9c30", 286 | "sha256:237216c3326d46808a9f7c26fd1bd4b20015fb6867dc5d263a493ef9a539293b", 287 | "sha256:27f4d15021da6d2b706ddc3860fac0a5ddaba34ab679dc182b60a8bb4e1121cc", 288 | "sha256:299ea899484ee6f44604deb71f424234f654606b983cb496ea2a53e3c63ab791", 289 | "sha256:3d164928ff0727d97022957c2b849250ca0e64777ee31efd7d6de2e07c494717", 290 | "sha256:6ab80df09e3208f742c98443b6166bcb70d65f52cfeb67357d52032ea1ae9bec", 291 | "sha256:78a28c9fa223998472886c77042e9b9afb6fe4242bd2a2a5aced88e3f4422aa7", 292 | "sha256:7cd532c4566d0e6feafecc1059d04c7915aec8e182d1cf7adee8b24ef1e2e6ab", 293 | "sha256:89f9149e4a0169cddfc44c74f230d7743002e3aa0b9472d8c28f0388102fc4c2", 294 | "sha256:a53fd3f03e578553623272dc46ac2f189de23862e68565e83dde203d41b76fc5", 295 | "sha256:b135410244ebe777db80298297a97fbb4c862c881b4403b71bac9d4107d61fd1", 296 | "sha256:b98d0148f84e3a3c569e19f52103ca1feacdac0d2df8d6533cf983d1fda28462", 297 | "sha256:d1736130bce8cf131ac7957fa26880ca19227d4ad68b4888b3be0dea1f95df97", 298 | "sha256:f45460f9ee70a0ec1b6694c6e4e348ad2019275680bd68a1d9314b8c7e01e574" 299 | ], 300 | "markers": "python_version >= '3.7'", 301 | "version": "==4.21.12" 302 | }, 303 | "pyasn1": { 304 | "hashes": [ 305 | "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359", 306 | "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576", 307 | "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf", 308 | "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7", 309 | "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", 310 | "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00", 311 | "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8", 312 | "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86", 313 | "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12", 314 | "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776", 315 | "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba", 316 | "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2", 317 | "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3" 318 | ], 319 | "version": "==0.4.8" 320 | }, 321 | "pyasn1-modules": { 322 | "hashes": [ 323 | "sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8", 324 | "sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199", 325 | "sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811", 326 | "sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed", 327 | "sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4", 328 | "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e", 329 | "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74", 330 | "sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb", 331 | "sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45", 332 | "sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd", 333 | "sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0", 334 | "sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d", 335 | "sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405" 336 | ], 337 | "version": "==0.2.8" 338 | }, 339 | "requests": { 340 | "hashes": [ 341 | "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983", 342 | "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349" 343 | ], 344 | "markers": "python_version >= '3.7' and python_version < '4'", 345 | "version": "==2.28.1" 346 | }, 347 | "rsa": { 348 | "hashes": [ 349 | "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", 350 | "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21" 351 | ], 352 | "markers": "python_version >= '3.6'", 353 | "version": "==4.9" 354 | }, 355 | "setuptools": { 356 | "hashes": [ 357 | "sha256:57f6f22bde4e042978bcd50176fdb381d7c21a9efa4041202288d3737a0c6a54", 358 | "sha256:a7620757bf984b58deaf32fc8a4577a9bbc0850cf92c20e1ce41c38c19e5fb75" 359 | ], 360 | "markers": "python_version >= '3.7'", 361 | "version": "==65.6.3" 362 | }, 363 | "six": { 364 | "hashes": [ 365 | "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", 366 | "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" 367 | ], 368 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 369 | "version": "==1.16.0" 370 | }, 371 | "types-click": { 372 | "hashes": [ 373 | "sha256:8cb030a669e2e927461be9827375f83c16b8178c365852c060a34e24871e7e81", 374 | "sha256:b6604968be6401dc516311ca50708a0a28baa7a0cb840efd7412f0dbbff4e092" 375 | ], 376 | "version": "==7.1.8" 377 | }, 378 | "types-flask": { 379 | "hashes": [ 380 | "sha256:6ab8a9a5e258b76539d652f6341408867298550b19b81f0e41e916825fc39087", 381 | "sha256:aac777b3abfff9436e6b01f6d08171cf23ea6e5be71cbf773aaabb1c5763e9cf" 382 | ], 383 | "index": "pypi", 384 | "version": "==1.1.6" 385 | }, 386 | "types-jinja2": { 387 | "hashes": [ 388 | "sha256:60a1e21e8296979db32f9374d8a239af4cb541ff66447bb915d8ad398f9c63b2", 389 | "sha256:dbdc74a40aba7aed520b7e4d89e8f0fe4286518494208b35123bcf084d4b8c81" 390 | ], 391 | "version": "==2.11.9" 392 | }, 393 | "types-markupsafe": { 394 | "hashes": [ 395 | "sha256:85b3a872683d02aea3a5ac2a8ef590193c344092032f58457287fbf8e06711b1", 396 | "sha256:ca2bee0f4faafc45250602567ef38d533e877d2ddca13003b319c551ff5b3cc5" 397 | ], 398 | "version": "==1.1.10" 399 | }, 400 | "types-werkzeug": { 401 | "hashes": [ 402 | "sha256:194bd5715a13c598f05c63e8a739328657590943bce941e8a3619a6b5d4a54ec", 403 | "sha256:5cc269604c400133d452a40cee6397655f878fc460e03fde291b9e3a5eaa518c" 404 | ], 405 | "version": "==1.0.9" 406 | }, 407 | "urllib3": { 408 | "hashes": [ 409 | "sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc", 410 | "sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8" 411 | ], 412 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", 413 | "version": "==1.26.13" 414 | }, 415 | "watchdog": { 416 | "hashes": [ 417 | "sha256:1893d425ef4fb4f129ee8ef72226836619c2950dd0559bba022b0818c63a7b60", 418 | "sha256:1a410dd4d0adcc86b4c71d1317ba2ea2c92babaf5b83321e4bde2514525544d5", 419 | "sha256:1f2b0665c57358ce9786f06f5475bc083fea9d81ecc0efa4733fd0c320940a37", 420 | "sha256:1f8eca9d294a4f194ce9df0d97d19b5598f310950d3ac3dd6e8d25ae456d4c8a", 421 | "sha256:27e49268735b3c27310883012ab3bd86ea0a96dcab90fe3feb682472e30c90f3", 422 | "sha256:28704c71afdb79c3f215c90231e41c52b056ea880b6be6cee035c6149d658ed1", 423 | "sha256:2ac0bd7c206bb6df78ef9e8ad27cc1346f2b41b1fef610395607319cdab89bc1", 424 | "sha256:2af1a29fd14fc0a87fb6ed762d3e1ae5694dcde22372eebba50e9e5be47af03c", 425 | "sha256:3a048865c828389cb06c0bebf8a883cec3ae58ad3e366bcc38c61d8455a3138f", 426 | "sha256:441024df19253bb108d3a8a5de7a186003d68564084576fecf7333a441271ef7", 427 | "sha256:56fb3f40fc3deecf6e518303c7533f5e2a722e377b12507f6de891583f1b48aa", 428 | "sha256:619d63fa5be69f89ff3a93e165e602c08ed8da402ca42b99cd59a8ec115673e1", 429 | "sha256:74535e955359d79d126885e642d3683616e6d9ab3aae0e7dcccd043bd5a3ff4f", 430 | "sha256:76a2743402b794629a955d96ea2e240bd0e903aa26e02e93cd2d57b33900962b", 431 | "sha256:83cf8bc60d9c613b66a4c018051873d6273d9e45d040eed06d6a96241bd8ec01", 432 | "sha256:920a4bda7daa47545c3201a3292e99300ba81ca26b7569575bd086c865889090", 433 | "sha256:9e99c1713e4436d2563f5828c8910e5ff25abd6ce999e75f15c15d81d41980b6", 434 | "sha256:a5bd9e8656d07cae89ac464ee4bcb6f1b9cecbedc3bf1334683bed3d5afd39ba", 435 | "sha256:ad0150536469fa4b693531e497ffe220d5b6cd76ad2eda474a5e641ee204bbb6", 436 | "sha256:af4b5c7ba60206759a1d99811b5938ca666ea9562a1052b410637bb96ff97512", 437 | "sha256:c7bd98813d34bfa9b464cf8122e7d4bec0a5a427399094d2c17dd5f70d59bc61", 438 | "sha256:ceaa9268d81205876bedb1069f9feab3eccddd4b90d9a45d06a0df592a04cae9", 439 | "sha256:cf05e6ff677b9655c6e9511d02e9cc55e730c4e430b7a54af9c28912294605a4", 440 | "sha256:d0fb5f2b513556c2abb578c1066f5f467d729f2eb689bc2db0739daf81c6bb7e", 441 | "sha256:d6ae890798a3560688b441ef086bb66e87af6b400a92749a18b856a134fc0318", 442 | "sha256:e5aed2a700a18c194c39c266900d41f3db0c1ebe6b8a0834b9995c835d2ca66e", 443 | "sha256:e722755d995035dd32177a9c633d158f2ec604f2a358b545bba5bed53ab25bca", 444 | "sha256:ed91c3ccfc23398e7aa9715abf679d5c163394b8cad994f34f156d57a7c163dc" 445 | ], 446 | "markers": "python_version >= '3.6'", 447 | "version": "==2.2.0" 448 | }, 449 | "werkzeug": { 450 | "hashes": [ 451 | "sha256:7ea2d48322cc7c0f8b3a215ed73eabd7b5d75d0b50e31ab006286ccff9e00b8f", 452 | "sha256:f979ab81f58d7318e064e99c4506445d60135ac5cd2e177a2de0089bfd4c9bd5" 453 | ], 454 | "markers": "python_version >= '3.7'", 455 | "version": "==2.2.2" 456 | }, 457 | "zipp": { 458 | "hashes": [ 459 | "sha256:83a28fcb75844b5c0cdaf5aa4003c2d728c77e05f5aeabe8e95e56727005fbaa", 460 | "sha256:a7a22e05929290a67401440b39690ae6563279bced5f314609d9d03798f56766" 461 | ], 462 | "markers": "python_version >= '3.7'", 463 | "version": "==3.11.0" 464 | } 465 | }, 466 | "develop": { 467 | "black": { 468 | "hashes": [ 469 | "sha256:101c69b23df9b44247bd88e1d7e90154336ac4992502d4197bdac35dd7ee3320", 470 | "sha256:159a46a4947f73387b4d83e87ea006dbb2337eab6c879620a3ba52699b1f4351", 471 | "sha256:1f58cbe16dfe8c12b7434e50ff889fa479072096d79f0a7f25e4ab8e94cd8350", 472 | "sha256:229351e5a18ca30f447bf724d007f890f97e13af070bb6ad4c0a441cd7596a2f", 473 | "sha256:436cc9167dd28040ad90d3b404aec22cedf24a6e4d7de221bec2730ec0c97bcf", 474 | "sha256:559c7a1ba9a006226f09e4916060982fd27334ae1998e7a38b3f33a37f7a2148", 475 | "sha256:7412e75863aa5c5411886804678b7d083c7c28421210180d67dfd8cf1221e1f4", 476 | "sha256:77d86c9f3db9b1bf6761244bc0b3572a546f5fe37917a044e02f3166d5aafa7d", 477 | "sha256:82d9fe8fee3401e02e79767016b4907820a7dc28d70d137eb397b92ef3cc5bfc", 478 | "sha256:9eedd20838bd5d75b80c9f5487dbcb06836a43833a37846cf1d8c1cc01cef59d", 479 | "sha256:c116eed0efb9ff870ded8b62fe9f28dd61ef6e9ddd28d83d7d264a38417dcee2", 480 | "sha256:d30b212bffeb1e252b31dd269dfae69dd17e06d92b87ad26e23890f3efea366f" 481 | ], 482 | "index": "pypi", 483 | "version": "==22.12.0" 484 | }, 485 | "click": { 486 | "hashes": [ 487 | "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", 488 | "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" 489 | ], 490 | "markers": "python_version >= '3.7'", 491 | "version": "==8.1.3" 492 | }, 493 | "mypy-extensions": { 494 | "hashes": [ 495 | "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", 496 | "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" 497 | ], 498 | "version": "==0.4.3" 499 | }, 500 | "pathspec": { 501 | "hashes": [ 502 | "sha256:3c95343af8b756205e2aba76e843ba9520a24dd84f68c22b9f93251507509dd6", 503 | "sha256:56200de4077d9d0791465aa9095a01d421861e405b5096955051deefd697d6f6" 504 | ], 505 | "markers": "python_version >= '3.7'", 506 | "version": "==0.10.3" 507 | }, 508 | "platformdirs": { 509 | "hashes": [ 510 | "sha256:83c8f6d04389165de7c9b6f0c682439697887bca0aa2f1c87ef1826be3584490", 511 | "sha256:e1fea1fe471b9ff8332e229df3cb7de4f53eeea4998d3b6bfff542115e998bd2" 512 | ], 513 | "markers": "python_version >= '3.7'", 514 | "version": "==2.6.2" 515 | }, 516 | "tomli": { 517 | "hashes": [ 518 | "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", 519 | "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" 520 | ], 521 | "markers": "python_full_version < '3.11.0a7'", 522 | "version": "==2.0.1" 523 | }, 524 | "typing-extensions": { 525 | "hashes": [ 526 | "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa", 527 | "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e" 528 | ], 529 | "markers": "python_version < '3.10'", 530 | "version": "==4.4.0" 531 | } 532 | } 533 | } 534 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_create_cloud_tasks/README.md: -------------------------------------------------------------------------------- 1 | ## Details 2 | There must be a payload sent with the request to trigger the function. 3 | 4 | The JSON object must contain a `data` record with the following arguments: 5 | 6 | - **project**: The GCP project ID 7 | - **region**: The GCP region e.g. "europe-west2" 8 | - **service_account**: The service account to use to authenticate with Google Cloud Tasks 9 | - **number_of_iterations**: How many tasks to create with a different iteration passed along with each task 10 | - **function_url**: The GCP Function URL 11 | - **queue**: The name of the Cloud Task queue 12 | - **task_request**: The payload to send in the task 13 | 14 | 15 | ## Setup 16 | Prepare environment with pipenv. 17 | 18 | ```shell 19 | pipenv install 20 | pipenv activate 21 | ``` 22 | 23 | ## Functions-Framework 24 | 25 | ```shell 26 | functions-framework --target generate_tasks --debug 27 | ``` 28 | 29 | ## Deploy Function 30 | ```shell 31 | ./deploy.sh 32 | ``` -------------------------------------------------------------------------------- /5_parallel_execution/fn_create_cloud_tasks/ci-cd/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' 3 | entrypoint: gcloud 4 | args: 5 | - 'alpha' 6 | - 'functions' 7 | - 'deploy' 8 | - 'create-cloud-tasks' 9 | - '--gen2' 10 | - '--region' 11 | - 'europe-west2' 12 | - '--runtime' 13 | - 'python38' 14 | - '--source' 15 | - '5_parallel_execution/fn_create_cloud_tasks' 16 | - '--entry-point' 17 | - 'generate_tasks' 18 | - '--trigger-http' 19 | - '--max-instances' 20 | - '5' 21 | - '--service-account' 22 | - '${_SERVICE_ACCOUNT}' 23 | - '--allow-unauthenticated' 24 | options: 25 | logging: CLOUD_LOGGING_ONLY -------------------------------------------------------------------------------- /5_parallel_execution/fn_create_cloud_tasks/deploy.sh: -------------------------------------------------------------------------------- 1 | gcloud alpha functions deploy create-cloud-tasks \ 2 | --gen2 \ 3 | --region=europe-west2 \ 4 | --runtime=python38 \ 5 | --source=. \ 6 | --entry-point=generate_tasks \ 7 | --trigger-http \ 8 | --max-instances=5 \ 9 | --service-account= \ 10 | --allow-unauthenticated -------------------------------------------------------------------------------- /5_parallel_execution/fn_create_cloud_tasks/local_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST localhost:8080 \ 2 | -H "Content-Type: application/json" \ 3 | -d '{ 4 | "data" : { 5 | "project": , 6 | "region": , 7 | "service_account" : , 8 | "number_of_iterations" : 42, 9 | "function_url" : , 10 | "queue" : "fn-ram-api-request", 11 | "task_request" : { 12 | "data" : { 13 | "endpoint": "character", 14 | "api_params": {} 15 | } 16 | } 17 | } 18 | } 19 | ' -------------------------------------------------------------------------------- /5_parallel_execution/fn_create_cloud_tasks/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | from dataclasses import dataclass 4 | from multiprocessing import Pool 5 | from functools import partial 6 | 7 | import functions_framework 8 | import flask 9 | 10 | from tasks import create_task 11 | 12 | 13 | @dataclass 14 | class Request: 15 | project: str 16 | region: str 17 | service_account: str 18 | number_of_iterations: int 19 | function_url: str 20 | queue: str 21 | task_request: dict 22 | 23 | 24 | def _run_partial_func(function): 25 | return function() 26 | 27 | 28 | @functions_framework.http 29 | def generate_tasks(request: flask.Request) -> str: 30 | """ 31 | Create a single Google Cloud Task for each `number_of_iterations` with a `task_request` as the payload. 32 | 33 | The `request` is a flask.Request object that contains a `data` record in the following format: 34 | { 35 | ..., 36 | data: { 37 | project: str, 38 | region: str, 39 | service_account: str, 40 | number_of_iterations: int, 41 | function_url: str, 42 | queue: str, 43 | task_request: { 44 | data: { 45 | ... 46 | } 47 | } 48 | } 49 | """ 50 | print(f"Incoming Request: {request.get_json()}") 51 | try: 52 | data = request.get_json().get("data") 53 | args = Request(**data) 54 | except TypeError: 55 | print(f'Invalid request. No "data" supplied in request.') 56 | tasks = [] 57 | for iteration in range(1, args.number_of_iterations + 1): 58 | task_request = args.task_request 59 | task_request["data"] = {**task_request["data"], "iteration": iteration} 60 | in_seconds = math.floor(iteration / 10) * 30 # 30 second delay every 10 tasks. 61 | 62 | tasks.append( 63 | partial( 64 | create_task, 65 | args.project, 66 | args.queue, 67 | args.region, 68 | args.function_url, 69 | args.service_account, 70 | dict(args.task_request), 71 | in_seconds, 72 | ) 73 | ) 74 | 75 | with Pool(len(os.sched_getaffinity(0))) as p: 76 | p.map(_run_partial_func, tasks) 77 | 78 | return "DONE" 79 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_create_cloud_tasks/requirements.txt: -------------------------------------------------------------------------------- 1 | -i https://pypi.org/simple 2 | cachetools==5.2.0 ; python_version ~= '3.7' 3 | certifi==2022.12.7 ; python_version >= '3.6' 4 | charset-normalizer==2.1.1 ; python_full_version >= '3.6.0' 5 | click==8.1.3 ; python_version >= '3.7' 6 | cloudevents==1.8.0 7 | deprecation==2.1.0 8 | flask==2.2.2 ; python_version >= '3.7' 9 | functions-framework==3.3.0 10 | google-api-core[grpc]==2.11.0 ; python_version >= '3.7' 11 | google-auth==2.15.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' 12 | google-cloud-tasks==2.11.0 13 | googleapis-common-protos==1.57.0 ; python_version >= '3.7' 14 | grpc-google-iam-v1==0.12.4 ; python_version >= '3.6' 15 | grpcio==1.51.1 16 | grpcio-status==1.51.1 17 | gunicorn==20.1.0 ; platform_system != 'Windows' 18 | idna==3.4 ; python_version >= '3.5' 19 | importlib-metadata==5.2.0 ; python_version < '3.10' 20 | itsdangerous==2.1.2 ; python_version >= '3.7' 21 | jinja2==3.1.2 ; python_version >= '3.7' 22 | markupsafe==2.1.1 ; python_version >= '3.7' 23 | packaging==22.0 ; python_version >= '3.7' 24 | proto-plus==1.22.1 ; python_version >= '3.6' 25 | protobuf==4.21.12 ; python_version >= '3.7' 26 | pyasn1==0.4.8 27 | pyasn1-modules==0.2.8 28 | requests==2.28.1 ; python_version >= '3.7' and python_version < '4' 29 | rsa==4.9 ; python_version >= '3.6' 30 | setuptools==65.6.3 ; python_version >= '3.7' 31 | six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' 32 | types-click==7.1.8 33 | types-flask==1.1.6 34 | types-jinja2==2.11.9 35 | types-markupsafe==1.1.10 36 | types-werkzeug==1.0.9 37 | urllib3==1.26.13 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' 38 | watchdog==2.2.0 ; python_version >= '3.6' 39 | werkzeug==2.2.2 ; python_version >= '3.7' 40 | zipp==3.11.0 ; python_version >= '3.7' 41 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_create_cloud_tasks/tasks.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | 4 | from google.cloud import tasks_v2 5 | from google.protobuf import duration_pb2, timestamp_pb2 6 | 7 | def create_task(project, queue, location, url, service_account, payload=None, in_seconds=None): 8 | # Create a client. 9 | client = tasks_v2.CloudTasksClient() 10 | 11 | deadline = 900 12 | 13 | # Construct the fully qualified queue name. 14 | parent = client.queue_path(project, location, queue) 15 | 16 | # Construct the request body. 17 | task = { 18 | "http_request": { # Specify the type of request. 19 | "http_method": tasks_v2.HttpMethod.POST, 20 | "url": url, # The full url path that the task will be sent to. 21 | "oidc_token": { 22 | "service_account_email": service_account, 23 | "audience": url, 24 | }, 25 | } 26 | } 27 | 28 | if payload is not None: 29 | if isinstance(payload, dict): 30 | # Convert dict to JSON string 31 | payload = json.dumps(payload) 32 | # specify http content-type to application/json 33 | task["http_request"]["headers"] = {"Content-type": "application/json"} 34 | 35 | # The API expects a payload of type bytes. 36 | converted_payload = payload.encode() 37 | 38 | # Add the payload to the request. 39 | task["http_request"]["body"] = converted_payload 40 | 41 | if in_seconds is not None: 42 | # Convert "seconds from now" into an rfc3339 datetime string. 43 | d = datetime.datetime.utcnow() + datetime.timedelta(seconds=in_seconds) 44 | 45 | # Create Timestamp protobuf. 46 | timestamp = timestamp_pb2.Timestamp() 47 | timestamp.FromDatetime(d) 48 | 49 | # Add the timestamp to the tasks. 50 | task["schedule_time"] = timestamp 51 | 52 | if deadline is not None: 53 | # Add dispatch deadline for requests sent to the worker. 54 | duration = duration_pb2.Duration() 55 | duration.FromSeconds(deadline) 56 | task["dispatch_deadline"] = duration 57 | 58 | # Use the client to build and send the task. 59 | response = client.create_task(request={"parent": parent, "task": task}) 60 | 61 | print("Created task {}".format(response.name)) -------------------------------------------------------------------------------- /5_parallel_execution/fn_load_to_bq/.gcloudignore: -------------------------------------------------------------------------------- 1 | deploy_function.sh 2 | local_test.sh 3 | cloud_test.sh -------------------------------------------------------------------------------- /5_parallel_execution/fn_load_to_bq/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | pandas = "*" 8 | google-cloud-bigquery = "*" 9 | functions-framework = "*" 10 | types-flask = "*" 11 | 12 | [dev-packages] 13 | black = "*" 14 | 15 | [requires] 16 | python_version = "3.8" 17 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_load_to_bq/README.md: -------------------------------------------------------------------------------- 1 | ## Details 2 | There must be a payload sent with the request to trigger the function. 3 | 4 | The JSON object must contain a `data` record with the following arguments: 5 | 6 | - **dataset**: The BigQuery dataset to use 7 | - **table**: The table name to use 8 | - **results**: A list of dictionaries which represent each row in the table 9 | 10 | ## Setup 11 | Prepare environment with pipenv. 12 | 13 | ```shell 14 | pipenv install 15 | pipenv activate 16 | ``` 17 | 18 | ## Functions-Framework 19 | 20 | ```shell 21 | functions-framework --target create_load_job --debug 22 | ``` 23 | 24 | ## Deploy Function 25 | ```shell 26 | ./deploy.sh 27 | ``` -------------------------------------------------------------------------------- /5_parallel_execution/fn_load_to_bq/ci-cd/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' 3 | entrypoint: gcloud 4 | args: 5 | - 'alpha' 6 | - 'functions' 7 | - 'deploy' 8 | - 'load-bq-data' 9 | - '--gen2' 10 | - '--region' 11 | - 'europe-west2' 12 | - '--runtime' 13 | - 'python38' 14 | - '--source' 15 | - '5_parallel_execution/fn_load_to_bq' 16 | - '--entry-point' 17 | - 'create_load_job' 18 | - '--trigger-http' 19 | - '--max-instances' 20 | - '5' 21 | - '--service-account' 22 | - '${_SERVICE_ACCOUNT}' 23 | - '--allow-unauthenticated' 24 | options: 25 | logging: CLOUD_LOGGING_ONLY -------------------------------------------------------------------------------- /5_parallel_execution/fn_load_to_bq/deploy.sh: -------------------------------------------------------------------------------- 1 | gcloud alpha functions deploy load-bq-data \ 2 | --gen2 \ 3 | --region=europe-west2 \ 4 | --runtime=python38 \ 5 | --source=. \ 6 | --entry-point=create_load_job \ 7 | --trigger-http \ 8 | --max-instances=5 \ 9 | --allow-unauthenticated -------------------------------------------------------------------------------- /5_parallel_execution/fn_load_to_bq/local_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST localhost:8080 \ 2 | -H "Content-Type: application/json" \ 3 | -d "{ 4 | \"data\" : { 5 | \"dataset\": \"rick_and_morty\", 6 | \"table\": \"character\", 7 | \"results\": [{ 8 | \"created\": \"Mon, 25 Oct 2021 09:18:48 GMT\", 9 | \"episode\": [ 10 | \"https://rickandmortyapi.com/api/episode/49\" 11 | ], 12 | \"gender\": \"Female\", 13 | \"id\": 781, 14 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/781.jpeg\", 15 | \"location\": { 16 | \"name\": \"Earth (Replacement Dimension)\", 17 | \"url\": \"https://rickandmortyapi.com/api/location/20\" 18 | }, 19 | \"name\": \"Rick's Garage\", 20 | \"origin\": { 21 | \"name\": \"Earth (Replacement Dimension)\", 22 | \"url\": \"https://rickandmortyapi.com/api/location/20\" 23 | }, 24 | \"species\": \"Robot\", 25 | \"status\": \"Alive\", 26 | \"type\": \"Artificial Intelligence\", 27 | \"url\": \"https://rickandmortyapi.com/api/character/781\" 28 | }, 29 | { 30 | \"created\": \"Mon, 25 Oct 2021 09:20:57 GMT\", 31 | \"episode\": [ 32 | \"https://rickandmortyapi.com/api/episode/49\" 33 | ], 34 | \"gender\": \"Male\", 35 | \"id\": 782, 36 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/782.jpeg\", 37 | \"location\": { 38 | \"name\": \"Birdperson's Consciousness\", 39 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 40 | }, 41 | \"name\": \"Memory Squanchy\", 42 | \"origin\": { 43 | \"name\": \"Birdperson's Consciousness\", 44 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 45 | }, 46 | \"species\": \"Alien\", 47 | \"status\": \"Dead\", 48 | \"type\": \"Memory\", 49 | \"url\": \"https://rickandmortyapi.com/api/character/782\" 50 | }, 51 | { 52 | \"created\": \"Mon, 25 Oct 2021 09:22:40 GMT\", 53 | \"episode\": [ 54 | \"https://rickandmortyapi.com/api/episode/49\" 55 | ], 56 | \"gender\": \"Male\", 57 | \"id\": 783, 58 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/783.jpeg\", 59 | \"location\": { 60 | \"name\": \"Birdperson's Consciousness\", 61 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 62 | }, 63 | \"name\": \"Memory Rick\", 64 | \"origin\": { 65 | \"name\": \"Birdperson's Consciousness\", 66 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 67 | }, 68 | \"species\": \"Human\", 69 | \"status\": \"Dead\", 70 | \"type\": \"Memory\", 71 | \"url\": \"https://rickandmortyapi.com/api/character/783\" 72 | }, 73 | { 74 | \"created\": \"Mon, 25 Oct 2021 09:23:22 GMT\", 75 | \"episode\": [ 76 | \"https://rickandmortyapi.com/api/episode/49\" 77 | ], 78 | \"gender\": \"Male\", 79 | \"id\": 784, 80 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/784.jpeg\", 81 | \"location\": { 82 | \"name\": \"Birdperson's Consciousness\", 83 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 84 | }, 85 | \"name\": \"Memory Rick\", 86 | \"origin\": { 87 | \"name\": \"Birdperson's Consciousness\", 88 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 89 | }, 90 | \"species\": \"Human\", 91 | \"status\": \"Dead\", 92 | \"type\": \"Memory\", 93 | \"url\": \"https://rickandmortyapi.com/api/character/784\" 94 | }, 95 | { 96 | \"created\": \"Mon, 25 Oct 2021 09:24:51 GMT\", 97 | \"episode\": [ 98 | \"https://rickandmortyapi.com/api/episode/49\" 99 | ], 100 | \"gender\": \"Male\", 101 | \"id\": 785, 102 | \"image\": \"https://rickandmortyapi.com/api/character/avatar/785.jpeg\", 103 | \"location\": { 104 | \"name\": \"Birdperson's Consciousness\", 105 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 106 | }, 107 | \"name\": \"Memory Geardude\", 108 | \"origin\": { 109 | \"name\": \"Birdperson's Consciousness\", 110 | \"url\": \"https://rickandmortyapi.com/api/location/120\" 111 | }, 112 | \"species\": \"Alien\", 113 | \"status\": \"Dead\", 114 | \"type\": \"Memory\", 115 | \"url\": \"https://rickandmortyapi.com/api/character/785\" 116 | } 117 | ] 118 | } 119 | } 120 | " -------------------------------------------------------------------------------- /5_parallel_execution/fn_load_to_bq/main.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | import io 3 | 4 | import pandas as pd 5 | from google.cloud import bigquery 6 | from google.cloud.bigquery.schema import SchemaField 7 | import functions_framework 8 | import flask 9 | 10 | from transforms import transform_dataframe 11 | 12 | 13 | def _generate_bigquery_schema(df: pd.DataFrame) -> List[SchemaField]: 14 | TYPE_MAPPING = { 15 | "i": "INTEGER", 16 | "u": "NUMERIC", 17 | "b": "BOOLEAN", 18 | "f": "FLOAT", 19 | "O": "STRING", 20 | "S": "STRING", 21 | "U": "STRING", 22 | "M": "TIMESTAMP", 23 | } 24 | schema = [] 25 | for column, dtype in df.dtypes.items(): 26 | val = df[column].iloc[0] 27 | mode = "REPEATED" if isinstance(val, list) else "NULLABLE" 28 | 29 | if isinstance(val, dict) or (mode == "REPEATED" and isinstance(val[0], dict)): 30 | fields = _generate_bigquery_schema(pd.json_normalize(val)) 31 | else: 32 | fields = () 33 | 34 | 35 | 36 | type = "RECORD" if fields else TYPE_MAPPING.get(dtype.kind) 37 | schema.append( 38 | SchemaField( 39 | name=column, 40 | field_type=type, 41 | mode=mode, 42 | fields=fields, 43 | ) 44 | ) 45 | return schema 46 | 47 | 48 | def prepare_data(data: List[dict]) -> Tuple[str, List[SchemaField]]: 49 | df = pd.json_normalize(data) 50 | df = transform_dataframe(df) 51 | schema = _generate_bigquery_schema(df) 52 | json_records = df.to_json(orient="records", lines=True, date_format="iso") 53 | return json_records, schema 54 | 55 | 56 | def load_data_to_bq( 57 | client: bigquery.Client, 58 | data: str, 59 | table_id: str, 60 | load_config: bigquery.LoadJobConfig, 61 | ) -> int: 62 | load_job = client.load_table_from_file( 63 | io.StringIO(data), table_id, location="EU", job_config=load_config 64 | ) 65 | load_job.result() # waits for the job to complete. 66 | destination_table = client.get_table(table_id) 67 | num_rows = destination_table.num_rows 68 | return num_rows 69 | 70 | 71 | @functions_framework.http 72 | def create_load_job(request: flask.Request) -> str: 73 | """ 74 | The request is a flask.Request object that contains a `data` record in the following format: 75 | { 76 | ..., 77 | data: { 78 | dataset: str, 79 | table: str, 80 | results: List[dict] 81 | } 82 | """ 83 | print(f"Incoming Request: {request.get_json()}") 84 | request_parameters = request.get_json().get("data") 85 | results = request_parameters["results"] 86 | dataset = request_parameters["dataset"] 87 | table = request_parameters["table"] 88 | json_records, schema = prepare_data(results) 89 | 90 | bigquery_client = bigquery.Client() 91 | load_config = bigquery.LoadJobConfig( 92 | schema=schema, 93 | write_disposition="WRITE_APPEND", 94 | source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, 95 | ) 96 | table_id = f"{dataset}.{table}" 97 | num_rows = load_data_to_bq(bigquery_client, json_records, table_id, load_config) 98 | print(f"Successfully loaded {num_rows} to {table_id}") 99 | return "DONE" 100 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_load_to_bq/requirements.txt: -------------------------------------------------------------------------------- 1 | -i https://pypi.org/simple 2 | cachetools==5.2.0 ; python_version ~= '3.7' 3 | certifi==2022.12.7 ; python_version >= '3.6' 4 | charset-normalizer==2.1.1 ; python_full_version >= '3.6.0' 5 | click==8.1.3 ; python_version >= '3.7' 6 | cloudevents==1.8.0 7 | deprecation==2.1.0 8 | flask==2.2.2 ; python_version >= '3.7' 9 | functions-framework==3.3.0 10 | google-api-core[grpc]==2.11.0 ; python_version >= '3.7' 11 | google-auth==2.15.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' 12 | google-cloud-bigquery==3.4.1 13 | google-cloud-core==2.3.2 ; python_version >= '3.7' 14 | google-crc32c==1.5.0 ; python_version >= '3.7' 15 | google-resumable-media==2.4.0 ; python_version >= '3.7' 16 | googleapis-common-protos==1.57.0 ; python_version >= '3.7' 17 | grpcio==1.51.1 ; python_version >= '3.7' 18 | grpcio-status==1.51.1 19 | gunicorn==20.1.0 ; platform_system != 'Windows' 20 | idna==3.4 ; python_version >= '3.5' 21 | importlib-metadata==5.2.0 ; python_version < '3.10' 22 | itsdangerous==2.1.2 ; python_version >= '3.7' 23 | jinja2==3.1.2 ; python_version >= '3.7' 24 | markupsafe==2.1.1 ; python_version >= '3.7' 25 | numpy==1.24.1 ; python_version < '3.10' 26 | packaging==21.3 ; python_version >= '3.6' 27 | pandas==1.5.2 28 | proto-plus==1.22.1 ; python_version >= '3.6' 29 | protobuf==4.21.12 ; python_version >= '3.7' 30 | pyasn1==0.4.8 31 | pyasn1-modules==0.2.8 32 | pyparsing==3.0.9 ; python_full_version >= '3.6.8' 33 | python-dateutil==2.8.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2' 34 | pytz==2022.7 35 | requests==2.28.1 ; python_version >= '3.7' and python_version < '4' 36 | rsa==4.9 ; python_version >= '3.6' 37 | setuptools==65.6.3 ; python_version >= '3.7' 38 | six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2' 39 | types-click==7.1.8 40 | types-flask==1.1.6 41 | types-jinja2==2.11.9 42 | types-markupsafe==1.1.10 43 | types-werkzeug==1.0.9 44 | urllib3==1.26.13 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' 45 | watchdog==2.2.0 ; python_version >= '3.6' 46 | werkzeug==2.2.2 ; python_version >= '3.7' 47 | zipp==3.11.0 ; python_version >= '3.7' 48 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_load_to_bq/transforms.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from typing import Callable, List 3 | import json 4 | 5 | import pandas as pd 6 | 7 | Preprocessor = Callable[[pd.DataFrame], pd.DataFrame] 8 | 9 | 10 | def create_row_hash(df: pd.DataFrame) -> pd.DataFrame: 11 | """Create unique hash of entire DataFrame row.""" 12 | df.set_index(pd.util.hash_pandas_object(df.astype("str")), drop=False, inplace=True) 13 | df = df.reset_index(names=["row_hash"]) 14 | return df 15 | 16 | 17 | def add_current_datetime(df: pd.DataFrame) -> pd.DataFrame: 18 | df.insert(0, "ingestion_date", pd.to_datetime("now", utc=True)) 19 | return df 20 | 21 | 22 | def _get_nested_fields(df: pd.DataFrame) -> List[str]: 23 | """Return a list of nested fields, sorted by the deepest level of nesting first.""" 24 | nested_fields = [*{field.rsplit(".", 1)[0] for field in df.columns if "." in field}] 25 | nested_fields.sort(key=lambda record: len(record.split(".")), reverse=True) 26 | return nested_fields 27 | 28 | 29 | def df_denormalize(df: pd.DataFrame) -> pd.DataFrame: 30 | """ 31 | Convert a normalised DataFrame into a nested structure. 32 | 33 | Fields separated by '.' are considered part of a nested structure. 34 | """ 35 | nested_fields = _get_nested_fields(df) 36 | for field in nested_fields: 37 | list_of_children = [column for column in df.columns if field in column] 38 | rename = { 39 | field_name: field_name.rsplit(".", 1)[1] for field_name in list_of_children 40 | } 41 | renamed_fields = df[list_of_children].rename(columns=rename) 42 | df[field] = json.loads(renamed_fields.to_json(orient="records")) 43 | df.drop(list_of_children, axis=1, inplace=True) 44 | return df 45 | 46 | 47 | def compose(*functions: Preprocessor) -> Preprocessor: 48 | return reduce(lambda f, g: lambda x: g(f(x)), functions) 49 | 50 | 51 | def transform_dataframe(df: pd.DataFrame) -> pd.DataFrame: 52 | preprocessor = compose( 53 | create_row_hash, 54 | add_current_datetime, 55 | df_denormalize, 56 | ) 57 | 58 | return preprocessor(df) 59 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/.gcloudignore: -------------------------------------------------------------------------------- 1 | deploy_function.sh 2 | local_test.sh 3 | cloud_test.sh -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | requests = "*" 8 | functions-framework = "*" 9 | types-flask = "*" 10 | 11 | [dev-packages] 12 | black = "*" 13 | 14 | [requires] 15 | python_version = "3.8" 16 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "ea835b567f6ea10215783b2d52d479f9e6c52c36b46673fd441baa5ce32a7b7e" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.8" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "certifi": { 20 | "hashes": [ 21 | "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3", 22 | "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18" 23 | ], 24 | "markers": "python_version >= '3.6'", 25 | "version": "==2022.12.7" 26 | }, 27 | "charset-normalizer": { 28 | "hashes": [ 29 | "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845", 30 | "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f" 31 | ], 32 | "markers": "python_full_version >= '3.6.0'", 33 | "version": "==2.1.1" 34 | }, 35 | "click": { 36 | "hashes": [ 37 | "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", 38 | "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" 39 | ], 40 | "markers": "python_version >= '3.7'", 41 | "version": "==8.1.3" 42 | }, 43 | "cloudevents": { 44 | "hashes": [ 45 | "sha256:03009fbff3192fa1a794bdf78b0c752e4649b6c8c4eae49c1855b309a937d264", 46 | "sha256:ecdcc791581817a48a86b9253f19b025dbf794afcbd9f2abb9721a420e7b719c" 47 | ], 48 | "version": "==1.8.0" 49 | }, 50 | "deprecation": { 51 | "hashes": [ 52 | "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff", 53 | "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a" 54 | ], 55 | "version": "==2.1.0" 56 | }, 57 | "flask": { 58 | "hashes": [ 59 | "sha256:642c450d19c4ad482f96729bd2a8f6d32554aa1e231f4f6b4e7e5264b16cca2b", 60 | "sha256:b9c46cc36662a7949f34b52d8ec7bb59c0d74ba08ba6cb9ce9adc1d8676d9526" 61 | ], 62 | "markers": "python_version >= '3.7'", 63 | "version": "==2.2.2" 64 | }, 65 | "functions-framework": { 66 | "hashes": [ 67 | "sha256:b61ef4a25f2bec0e035551171d9910875df0794433339998abceac95fc2e259b", 68 | "sha256:e6c97d2b009f4f0428c483c5a23bae3192a96107c9f1217cb1ecfa456c5375cc" 69 | ], 70 | "index": "pypi", 71 | "version": "==3.3.0" 72 | }, 73 | "gunicorn": { 74 | "hashes": [ 75 | "sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e", 76 | "sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8" 77 | ], 78 | "markers": "platform_system != 'Windows'", 79 | "version": "==20.1.0" 80 | }, 81 | "idna": { 82 | "hashes": [ 83 | "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", 84 | "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" 85 | ], 86 | "markers": "python_version >= '3.5'", 87 | "version": "==3.4" 88 | }, 89 | "importlib-metadata": { 90 | "hashes": [ 91 | "sha256:0eafa39ba42bf225fc00e67f701d71f85aead9f878569caf13c3724f704b970f", 92 | "sha256:404d48d62bba0b7a77ff9d405efd91501bef2e67ff4ace0bed40a0cf28c3c7cd" 93 | ], 94 | "markers": "python_version < '3.10'", 95 | "version": "==5.2.0" 96 | }, 97 | "itsdangerous": { 98 | "hashes": [ 99 | "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44", 100 | "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a" 101 | ], 102 | "markers": "python_version >= '3.7'", 103 | "version": "==2.1.2" 104 | }, 105 | "jinja2": { 106 | "hashes": [ 107 | "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852", 108 | "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61" 109 | ], 110 | "markers": "python_version >= '3.7'", 111 | "version": "==3.1.2" 112 | }, 113 | "markupsafe": { 114 | "hashes": [ 115 | "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003", 116 | "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88", 117 | "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5", 118 | "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7", 119 | "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a", 120 | "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603", 121 | "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1", 122 | "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135", 123 | "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247", 124 | "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6", 125 | "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601", 126 | "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77", 127 | "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02", 128 | "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e", 129 | "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63", 130 | "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f", 131 | "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980", 132 | "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b", 133 | "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812", 134 | "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff", 135 | "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96", 136 | "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1", 137 | "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925", 138 | "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a", 139 | "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6", 140 | "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e", 141 | "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f", 142 | "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4", 143 | "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f", 144 | "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3", 145 | "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c", 146 | "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a", 147 | "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417", 148 | "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a", 149 | "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a", 150 | "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37", 151 | "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452", 152 | "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933", 153 | "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a", 154 | "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7" 155 | ], 156 | "markers": "python_version >= '3.7'", 157 | "version": "==2.1.1" 158 | }, 159 | "packaging": { 160 | "hashes": [ 161 | "sha256:2198ec20bd4c017b8f9717e00f0c8714076fc2fd93816750ab48e2c41de2cfd3", 162 | "sha256:957e2148ba0e1a3b282772e791ef1d8083648bc131c8ab0c1feba110ce1146c3" 163 | ], 164 | "markers": "python_version >= '3.7'", 165 | "version": "==22.0" 166 | }, 167 | "requests": { 168 | "hashes": [ 169 | "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983", 170 | "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349" 171 | ], 172 | "index": "pypi", 173 | "version": "==2.28.1" 174 | }, 175 | "setuptools": { 176 | "hashes": [ 177 | "sha256:57f6f22bde4e042978bcd50176fdb381d7c21a9efa4041202288d3737a0c6a54", 178 | "sha256:a7620757bf984b58deaf32fc8a4577a9bbc0850cf92c20e1ce41c38c19e5fb75" 179 | ], 180 | "markers": "python_version >= '3.7'", 181 | "version": "==65.6.3" 182 | }, 183 | "types-click": { 184 | "hashes": [ 185 | "sha256:8cb030a669e2e927461be9827375f83c16b8178c365852c060a34e24871e7e81", 186 | "sha256:b6604968be6401dc516311ca50708a0a28baa7a0cb840efd7412f0dbbff4e092" 187 | ], 188 | "version": "==7.1.8" 189 | }, 190 | "types-flask": { 191 | "hashes": [ 192 | "sha256:6ab8a9a5e258b76539d652f6341408867298550b19b81f0e41e916825fc39087", 193 | "sha256:aac777b3abfff9436e6b01f6d08171cf23ea6e5be71cbf773aaabb1c5763e9cf" 194 | ], 195 | "index": "pypi", 196 | "version": "==1.1.6" 197 | }, 198 | "types-jinja2": { 199 | "hashes": [ 200 | "sha256:60a1e21e8296979db32f9374d8a239af4cb541ff66447bb915d8ad398f9c63b2", 201 | "sha256:dbdc74a40aba7aed520b7e4d89e8f0fe4286518494208b35123bcf084d4b8c81" 202 | ], 203 | "version": "==2.11.9" 204 | }, 205 | "types-markupsafe": { 206 | "hashes": [ 207 | "sha256:85b3a872683d02aea3a5ac2a8ef590193c344092032f58457287fbf8e06711b1", 208 | "sha256:ca2bee0f4faafc45250602567ef38d533e877d2ddca13003b319c551ff5b3cc5" 209 | ], 210 | "version": "==1.1.10" 211 | }, 212 | "types-werkzeug": { 213 | "hashes": [ 214 | "sha256:194bd5715a13c598f05c63e8a739328657590943bce941e8a3619a6b5d4a54ec", 215 | "sha256:5cc269604c400133d452a40cee6397655f878fc460e03fde291b9e3a5eaa518c" 216 | ], 217 | "version": "==1.0.9" 218 | }, 219 | "urllib3": { 220 | "hashes": [ 221 | "sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc", 222 | "sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8" 223 | ], 224 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", 225 | "version": "==1.26.13" 226 | }, 227 | "watchdog": { 228 | "hashes": [ 229 | "sha256:1893d425ef4fb4f129ee8ef72226836619c2950dd0559bba022b0818c63a7b60", 230 | "sha256:1a410dd4d0adcc86b4c71d1317ba2ea2c92babaf5b83321e4bde2514525544d5", 231 | "sha256:1f2b0665c57358ce9786f06f5475bc083fea9d81ecc0efa4733fd0c320940a37", 232 | "sha256:1f8eca9d294a4f194ce9df0d97d19b5598f310950d3ac3dd6e8d25ae456d4c8a", 233 | "sha256:27e49268735b3c27310883012ab3bd86ea0a96dcab90fe3feb682472e30c90f3", 234 | "sha256:28704c71afdb79c3f215c90231e41c52b056ea880b6be6cee035c6149d658ed1", 235 | "sha256:2ac0bd7c206bb6df78ef9e8ad27cc1346f2b41b1fef610395607319cdab89bc1", 236 | "sha256:2af1a29fd14fc0a87fb6ed762d3e1ae5694dcde22372eebba50e9e5be47af03c", 237 | "sha256:3a048865c828389cb06c0bebf8a883cec3ae58ad3e366bcc38c61d8455a3138f", 238 | "sha256:441024df19253bb108d3a8a5de7a186003d68564084576fecf7333a441271ef7", 239 | "sha256:56fb3f40fc3deecf6e518303c7533f5e2a722e377b12507f6de891583f1b48aa", 240 | "sha256:619d63fa5be69f89ff3a93e165e602c08ed8da402ca42b99cd59a8ec115673e1", 241 | "sha256:74535e955359d79d126885e642d3683616e6d9ab3aae0e7dcccd043bd5a3ff4f", 242 | "sha256:76a2743402b794629a955d96ea2e240bd0e903aa26e02e93cd2d57b33900962b", 243 | "sha256:83cf8bc60d9c613b66a4c018051873d6273d9e45d040eed06d6a96241bd8ec01", 244 | "sha256:920a4bda7daa47545c3201a3292e99300ba81ca26b7569575bd086c865889090", 245 | "sha256:9e99c1713e4436d2563f5828c8910e5ff25abd6ce999e75f15c15d81d41980b6", 246 | "sha256:a5bd9e8656d07cae89ac464ee4bcb6f1b9cecbedc3bf1334683bed3d5afd39ba", 247 | "sha256:ad0150536469fa4b693531e497ffe220d5b6cd76ad2eda474a5e641ee204bbb6", 248 | "sha256:af4b5c7ba60206759a1d99811b5938ca666ea9562a1052b410637bb96ff97512", 249 | "sha256:c7bd98813d34bfa9b464cf8122e7d4bec0a5a427399094d2c17dd5f70d59bc61", 250 | "sha256:ceaa9268d81205876bedb1069f9feab3eccddd4b90d9a45d06a0df592a04cae9", 251 | "sha256:cf05e6ff677b9655c6e9511d02e9cc55e730c4e430b7a54af9c28912294605a4", 252 | "sha256:d0fb5f2b513556c2abb578c1066f5f467d729f2eb689bc2db0739daf81c6bb7e", 253 | "sha256:d6ae890798a3560688b441ef086bb66e87af6b400a92749a18b856a134fc0318", 254 | "sha256:e5aed2a700a18c194c39c266900d41f3db0c1ebe6b8a0834b9995c835d2ca66e", 255 | "sha256:e722755d995035dd32177a9c633d158f2ec604f2a358b545bba5bed53ab25bca", 256 | "sha256:ed91c3ccfc23398e7aa9715abf679d5c163394b8cad994f34f156d57a7c163dc" 257 | ], 258 | "markers": "python_version >= '3.6'", 259 | "version": "==2.2.0" 260 | }, 261 | "werkzeug": { 262 | "hashes": [ 263 | "sha256:7ea2d48322cc7c0f8b3a215ed73eabd7b5d75d0b50e31ab006286ccff9e00b8f", 264 | "sha256:f979ab81f58d7318e064e99c4506445d60135ac5cd2e177a2de0089bfd4c9bd5" 265 | ], 266 | "markers": "python_version >= '3.7'", 267 | "version": "==2.2.2" 268 | }, 269 | "zipp": { 270 | "hashes": [ 271 | "sha256:83a28fcb75844b5c0cdaf5aa4003c2d728c77e05f5aeabe8e95e56727005fbaa", 272 | "sha256:a7a22e05929290a67401440b39690ae6563279bced5f314609d9d03798f56766" 273 | ], 274 | "markers": "python_version >= '3.7'", 275 | "version": "==3.11.0" 276 | } 277 | }, 278 | "develop": { 279 | "black": { 280 | "hashes": [ 281 | "sha256:101c69b23df9b44247bd88e1d7e90154336ac4992502d4197bdac35dd7ee3320", 282 | "sha256:159a46a4947f73387b4d83e87ea006dbb2337eab6c879620a3ba52699b1f4351", 283 | "sha256:1f58cbe16dfe8c12b7434e50ff889fa479072096d79f0a7f25e4ab8e94cd8350", 284 | "sha256:229351e5a18ca30f447bf724d007f890f97e13af070bb6ad4c0a441cd7596a2f", 285 | "sha256:436cc9167dd28040ad90d3b404aec22cedf24a6e4d7de221bec2730ec0c97bcf", 286 | "sha256:559c7a1ba9a006226f09e4916060982fd27334ae1998e7a38b3f33a37f7a2148", 287 | "sha256:7412e75863aa5c5411886804678b7d083c7c28421210180d67dfd8cf1221e1f4", 288 | "sha256:77d86c9f3db9b1bf6761244bc0b3572a546f5fe37917a044e02f3166d5aafa7d", 289 | "sha256:82d9fe8fee3401e02e79767016b4907820a7dc28d70d137eb397b92ef3cc5bfc", 290 | "sha256:9eedd20838bd5d75b80c9f5487dbcb06836a43833a37846cf1d8c1cc01cef59d", 291 | "sha256:c116eed0efb9ff870ded8b62fe9f28dd61ef6e9ddd28d83d7d264a38417dcee2", 292 | "sha256:d30b212bffeb1e252b31dd269dfae69dd17e06d92b87ad26e23890f3efea366f" 293 | ], 294 | "index": "pypi", 295 | "version": "==22.12.0" 296 | }, 297 | "click": { 298 | "hashes": [ 299 | "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", 300 | "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" 301 | ], 302 | "markers": "python_version >= '3.7'", 303 | "version": "==8.1.3" 304 | }, 305 | "mypy-extensions": { 306 | "hashes": [ 307 | "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", 308 | "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" 309 | ], 310 | "version": "==0.4.3" 311 | }, 312 | "pathspec": { 313 | "hashes": [ 314 | "sha256:3c95343af8b756205e2aba76e843ba9520a24dd84f68c22b9f93251507509dd6", 315 | "sha256:56200de4077d9d0791465aa9095a01d421861e405b5096955051deefd697d6f6" 316 | ], 317 | "markers": "python_version >= '3.7'", 318 | "version": "==0.10.3" 319 | }, 320 | "platformdirs": { 321 | "hashes": [ 322 | "sha256:83c8f6d04389165de7c9b6f0c682439697887bca0aa2f1c87ef1826be3584490", 323 | "sha256:e1fea1fe471b9ff8332e229df3cb7de4f53eeea4998d3b6bfff542115e998bd2" 324 | ], 325 | "markers": "python_version >= '3.7'", 326 | "version": "==2.6.2" 327 | }, 328 | "tomli": { 329 | "hashes": [ 330 | "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", 331 | "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" 332 | ], 333 | "markers": "python_full_version < '3.11.0a7'", 334 | "version": "==2.0.1" 335 | }, 336 | "typing-extensions": { 337 | "hashes": [ 338 | "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa", 339 | "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e" 340 | ], 341 | "markers": "python_version < '3.10'", 342 | "version": "==4.4.0" 343 | } 344 | } 345 | } 346 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/README.md: -------------------------------------------------------------------------------- 1 | ## Details 2 | There must be a payload sent with the request to trigger the function. 3 | 4 | The JSON object must contain a `data` record with the following arguments: 5 | 6 | - **endpoint**: The RAM API endpoint e.g. "character" 7 | - **api_params**: Fields defined in `ApiParameters` dataclass 8 | - **iteration**: This is provided by the task from Cloud Tasks and represents the page number to call the API for. 9 | 10 | There are constants in `main.py` that need to be updated with your relevant values. 11 | 12 | ```shell 13 | TASK_FUNCTION_URL = "INSERT_CLOUD_TASK_FUNCTION_URL" 14 | PROJECT = "INSERT_PROJECT_ID" 15 | REGION = "INSERT_REGION" 16 | SERVICE_ACCOUNT = "INSERT_SERVICE_ACCOUNT" 17 | LOAD_TO_BQ_FUNCTION = "INSERT_LOAD_TO_BQ_FUNCTION_URL" 18 | LOAD_TO_BQ_QUEUE = "INSERT_QUEUE_NAME" 19 | ``` 20 | 21 | ## Setup 22 | Prepare environment with pipenv. 23 | 24 | ```shell 25 | pipenv install 26 | pipenv activate 27 | ``` 28 | 29 | ## Functions-Framework 30 | 31 | ```shell 32 | functions-framework --target send_api_request --debug 33 | ``` 34 | 35 | ### Deploy Function 36 | ```shell 37 | ./deploy.sh 38 | ``` -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/ci-cd/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' 3 | entrypoint: gcloud 4 | args: 5 | - 'alpha' 6 | - 'functions' 7 | - 'deploy' 8 | - 'ram-api-request' 9 | - '--gen2' 10 | - '--region' 11 | - 'europe-west2' 12 | - '--runtime' 13 | - 'python38' 14 | - '--source' 15 | - '5_parallel_execution/fn_ram_api_request' 16 | - '--entry-point' 17 | - 'send_api_request' 18 | - '--trigger-http' 19 | - '--max-instances' 20 | - '5' 21 | - '--service-account' 22 | - '${_SERVICE_ACCOUNT}' 23 | - '--allow-unauthenticated' 24 | options: 25 | logging: CLOUD_LOGGING_ONLY -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/deploy.sh: -------------------------------------------------------------------------------- 1 | gcloud alpha functions deploy ram-api-request \ 2 | --gen2 \ 3 | --region=europe-west2 \ 4 | --runtime=python38 \ 5 | --source=. \ 6 | --entry-point=send_api_request \ 7 | --trigger-http \ 8 | --max-instances=5 \ 9 | --service-account= \ 10 | --allow-unauthenticated -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/local_test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST localhost:8080 \ 2 | -H "Content-Type: application/json" \ 3 | -d '{ 4 | "data" : { 5 | "endpoint": "character", 6 | "api_params": {}, 7 | "iteration": 10 8 | } 9 | } 10 | ' -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/main.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from dataclasses import asdict 3 | import json 4 | import datetime 5 | 6 | import requests 7 | import functions_framework 8 | import flask 9 | 10 | from ramapi import get_endpoint 11 | from models import ApiParameters, CharacterSchema 12 | 13 | 14 | TASK_FUNCTION_URL = "INSERT_CLOUD_TASK_FUNCTION_URL" 15 | PROJECT = "INSERT_PROJECT_ID" 16 | REGION = "INSERT_REGION" 17 | SERVICE_ACCOUNT = "INSERT_SERVICE_ACCOUNT" 18 | LOAD_TO_BQ_FUNCTION = "INSERT_LOAD_TO_BQ_FUNCTION_URL" 19 | LOAD_TO_BQ_QUEUE = "INSERT_QUEUE_NAME" 20 | 21 | 22 | def _default(obj): 23 | """Create custom default function for json.dumps() method""" 24 | if isinstance(obj, (datetime.date, datetime.datetime)): 25 | return obj.isoformat() 26 | 27 | 28 | def create_load_bq_data_task(table: str, results: List[CharacterSchema]): 29 | results = [asdict(result) for result in results] 30 | task_request = { 31 | "data": { 32 | "dataset": "rick_and_morty", 33 | "table": table, 34 | "results": results, 35 | } 36 | } 37 | data = { 38 | "data": { 39 | "project": PROJECT, 40 | "region": REGION, 41 | "service_account": SERVICE_ACCOUNT, 42 | "number_of_iterations": 1, 43 | "function_url": LOAD_TO_BQ_FUNCTION, 44 | "queue": LOAD_TO_BQ_QUEUE, 45 | "task_request": task_request 46 | } 47 | } 48 | headers = {"Content-Type": "application/json"} 49 | 50 | response = requests.post(TASK_FUNCTION_URL, data=json.dumps(data, default=_default), headers=headers) 51 | response.raise_for_status() 52 | return "DONE" 53 | 54 | 55 | @functions_framework.http 56 | def send_api_request(request: flask.Request) -> str: 57 | """ 58 | Return a JSON serialised string representing a list of `CharacterSchema` records. 59 | 60 | The request is a flask.Request object that contains a `data` record in the following format: 61 | { 62 | ..., 63 | data: { 64 | endpoint: str, 65 | api_params: Fields defined in `ApiParameters` dataclass 66 | iteration: int 67 | } 68 | """ 69 | data = request.get_json().get("data") 70 | endpoint = data.get("endpoint") 71 | params = ApiParameters(**data.get("api_params")) 72 | params.page = data.get("iteration") 73 | 74 | print(f"Calling {endpoint} with {asdict(params)}") 75 | response = get_endpoint(endpoint, params) 76 | create_load_bq_data_task(endpoint, response.results) 77 | 78 | return "DONE" 79 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict, List, Literal, Optional 3 | from datetime import datetime, timezone 4 | 5 | 6 | @dataclass 7 | class CharacterSchema: 8 | id: int 9 | name: str 10 | status: Literal["Alive", "Dead", "unknown"] 11 | species: str 12 | type: str 13 | gender: Literal["Female", "Male", "Genderless", "unknown"] 14 | origin: Dict[str, str] 15 | location: Dict[str, str] 16 | image: str 17 | episode: List[str] 18 | url: str 19 | created: datetime 20 | 21 | def __post_init__(self): 22 | self.created = datetime.strptime(self.created, "%Y-%m-%dT%H:%M:%S.%fZ").replace( 23 | tzinfo=timezone.utc 24 | ) 25 | 26 | 27 | @dataclass 28 | class ApiInfo: 29 | count: int 30 | pages: int 31 | next: Optional[str] 32 | prev: Optional[str] 33 | 34 | 35 | @dataclass 36 | class ApiResponse: 37 | info: ApiInfo 38 | results: List[CharacterSchema] 39 | 40 | def __post_init__(self): 41 | self.info = ApiInfo(**self.info) 42 | self.results = [CharacterSchema(**x) for x in self.results] 43 | 44 | 45 | @dataclass 46 | class ApiParameters: 47 | page: Optional[str] = None 48 | name: Optional[str] = None 49 | status: Optional[Literal["alive", "dead", "unknown"]] = None 50 | species: Optional[str] = None 51 | type: Optional[str] = None 52 | gender: Optional[Literal["female", "male", "genderless", "unknown"]] = None 53 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/ramapi.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from dataclasses import asdict 3 | 4 | from models import ApiResponse, ApiParameters 5 | 6 | BASE_URL = "https://rickandmortyapi.com/api" 7 | 8 | 9 | def get_endpoint(endpoint: str, params: ApiParameters) -> ApiResponse: 10 | """Return `ApiResponse` from Rick and Morty `endpoint`""" 11 | response = requests.get(url=f"{BASE_URL}/{endpoint}", params=asdict(params)) 12 | response.raise_for_status() 13 | response = ApiResponse(**response.json()) 14 | 15 | return response 16 | -------------------------------------------------------------------------------- /5_parallel_execution/fn_ram_api_request/requirements.txt: -------------------------------------------------------------------------------- 1 | -i https://pypi.org/simple 2 | certifi==2022.12.7 ; python_version >= '3.6' 3 | charset-normalizer==2.1.1 ; python_full_version >= '3.6.0' 4 | click==8.1.3 ; python_version >= '3.7' 5 | cloudevents==1.8.0 6 | deprecation==2.1.0 7 | flask==2.2.2 ; python_version >= '3.7' 8 | functions-framework==3.3.0 9 | gunicorn==20.1.0 ; platform_system != 'Windows' 10 | idna==3.4 ; python_version >= '3.5' 11 | importlib-metadata==5.2.0 ; python_version < '3.10' 12 | itsdangerous==2.1.2 ; python_version >= '3.7' 13 | jinja2==3.1.2 ; python_version >= '3.7' 14 | markupsafe==2.1.1 ; python_version >= '3.7' 15 | packaging==22.0 ; python_version >= '3.7' 16 | requests==2.28.1 17 | setuptools==65.6.3 ; python_version >= '3.7' 18 | types-click==7.1.8 19 | types-flask==1.1.6 20 | types-jinja2==2.11.9 21 | types-markupsafe==1.1.10 22 | types-werkzeug==1.0.9 23 | urllib3==1.26.13 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' 24 | watchdog==2.2.0 ; python_version >= '3.6' 25 | werkzeug==2.2.2 ; python_version >= '3.7' 26 | zipp==3.11.0 ; python_version >= '3.7' 27 | -------------------------------------------------------------------------------- /5_parallel_execution/initial_script.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import requests 4 | 5 | 6 | BASE_URL = "https://rickandmortyapi.com/api" 7 | CLOUD_TASK_FUNCTION_URL = "INSERT_FUNCTION_URL" 8 | 9 | def get_pages(endpoint: str): 10 | response = requests.get(url=f"{BASE_URL}/{endpoint}") 11 | response.raise_for_status() 12 | response = response.json() 13 | pages = response.get("info").get("pages") 14 | return pages 15 | 16 | 17 | def start_process(pages: int): 18 | task_request = { 19 | "data" : { 20 | "endpoint": "character", 21 | "api_params": {} 22 | } 23 | } 24 | data = { 25 | "data" : { 26 | "project": "INSERT_PROJECT", 27 | "region": "INSERT_REGION", 28 | "service_account" : "INSERT_SERVICE_ACCOUNT", 29 | "number_of_iterations" : pages, 30 | "function_url" : "INSERT_FUNCTION_URL", 31 | "queue" : "fn-ram-api-request", 32 | "task_request" : task_request 33 | } 34 | } 35 | headers = {"Content-Type": "application/json"} 36 | 37 | response = requests.post(CLOUD_TASK_FUNCTION_URL, data=json.dumps(data), headers=headers) 38 | response.raise_for_status() 39 | return "DONE" 40 | 41 | if __name__ == "__main__": 42 | pages = get_pages("character") 43 | start_process(pages) -------------------------------------------------------------------------------- /6_prefect_orchestration/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /6_prefect_orchestration/prefect-flows/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | prefect = "==2.7.6" 8 | requests = "*" 9 | 10 | [dev-packages] 11 | 12 | [requires] 13 | python_version = "3.8" 14 | -------------------------------------------------------------------------------- /6_prefect_orchestration/prefect-flows/trigger_ram_api.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import requests 4 | from prefect import flow, task 5 | from pydantic import BaseModel 6 | 7 | 8 | BASE_URL = "https://rickandmortyapi.com/api" 9 | CLOUD_TASK_FUNCTION_URL = "INSERT_CLOUD_TASK_FUNCTION_URL" 10 | 11 | 12 | class Model(BaseModel): 13 | project: str 14 | region: str 15 | service_account: str 16 | function_url: str 17 | task_queue: str 18 | endpoint: str 19 | 20 | 21 | @task() 22 | def get_pages(endpoint: str): 23 | response = requests.get(url=f"{BASE_URL}/{endpoint}") 24 | response.raise_for_status() 25 | response = response.json() 26 | pages = response.get("info").get("pages") 27 | return pages 28 | 29 | 30 | @task(log_prints=True) 31 | def start_process(pages: int, model: Model): 32 | task_request = {"data": {"endpoint": model.endpoint, "api_params": {}}} 33 | data = { 34 | "data": { 35 | "project": model.project, 36 | "region": model.region, 37 | "service_account": model.service_account, 38 | "number_of_iterations": pages, 39 | "function_url": model.function_url, 40 | "queue": model.task_queue, 41 | "task_request": task_request, 42 | } 43 | } 44 | headers = {"Content-Type": "application/json"} 45 | 46 | print(f"LOGGING: Sending request with data: {json.dumps(data)}") 47 | response = requests.post( 48 | CLOUD_TASK_FUNCTION_URL, data=json.dumps(data), headers=headers 49 | ) 50 | response.raise_for_status() 51 | return "DONE" 52 | 53 | 54 | @flow(log_prints=True) 55 | def trigger_ram_api_pipeline(model: Model): 56 | print("LOGGING: Triggering RAM API Request Pipeline") 57 | pages = get_pages("character") 58 | start_process(pages, model) 59 | return "DONE" 60 | 61 | 62 | if __name__ == "__main__": 63 | model = Model( 64 | project="INSERT_PROJECT", 65 | region="INSERT_REGION", 66 | service_account="INSERT_SERVICE_ACCOUNT", 67 | function_url="INSERT_FUNCTION_URL", 68 | task_queue="INSERT_TASK_QUEUE_NAME", 69 | endpoint="character", 70 | ) 71 | trigger_ram_api_pipeline(model) 72 | -------------------------------------------------------------------------------- /6_prefect_orchestration/prefect-flows/trigger_ram_api_pipeline-deployment.py: -------------------------------------------------------------------------------- 1 | from prefect.deployments import Deployment 2 | from prefect.orion.schemas.schedules import RRuleSchedule 3 | 4 | from trigger_ram_api import trigger_ram_api_pipeline 5 | 6 | 7 | deployment = Deployment.build_from_flow( 8 | flow=trigger_ram_api_pipeline, 9 | name="trigger-ram-api-pipeline-deployment", 10 | version=1, 11 | work_queue_name="main", 12 | schedule=RRuleSchedule(rrule="RRULE:FREQ=DAILY"), 13 | parameters={ 14 | "model": { 15 | "project": "INSERT_PROJECT", 16 | "region": "INSERT_REGION", 17 | "service_account": "INSERT_SERVICE_ACCOUNT", 18 | "function_url": "INSERT_FUNCTION_URL", 19 | "task_queue": "INSERT_TASK_QUEUE_NAME", 20 | "endpoint": "character", 21 | } 22 | }, 23 | ) 24 | 25 | deployment.apply() 26 | -------------------------------------------------------------------------------- /7_prefect_deployment/.gitignore: -------------------------------------------------------------------------------- 1 | service_account.json 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # poetry 100 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 101 | # This is especially recommended for binary packages to ensure reproducibility, and is more 102 | # commonly ignored for libraries. 103 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 104 | #poetry.lock 105 | 106 | # pdm 107 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 108 | #pdm.lock 109 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 110 | # in version control. 111 | # https://pdm.fming.dev/#use-with-ide 112 | .pdm.toml 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ -------------------------------------------------------------------------------- /7_prefect_deployment/prefect-flows/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM prefecthq/prefect:2.7.7-python3.8 2 | RUN pip install -U gcsfs -------------------------------------------------------------------------------- /7_prefect_deployment/prefect-flows/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | prefect = "~=2.7.7" 8 | requests = "*" 9 | prefect-gcp = "*" 10 | gcsfs = "*" 11 | 12 | [dev-packages] 13 | 14 | [requires] 15 | python_version = "3.8" 16 | -------------------------------------------------------------------------------- /7_prefect_deployment/prefect-flows/blocks/gcp_cloud_run.py: -------------------------------------------------------------------------------- 1 | from prefect_gcp.cloud_run import CloudRunJob 2 | from prefect_gcp import GcpCredentials 3 | 4 | credentials = GcpCredentials.load("default-credentials") 5 | 6 | block = CloudRunJob( 7 | credentials=credentials, 8 | project="", 9 | image="/ram-api-flow:2.7.7-python3.8", 10 | region="" 11 | ) 12 | 13 | block.save("cloud-run-infrastructure") -------------------------------------------------------------------------------- /7_prefect_deployment/prefect-flows/blocks/gcs_credentials.py: -------------------------------------------------------------------------------- 1 | from prefect_gcp import GcpCredentials 2 | 3 | with open("service_account.json") as f: 4 | service_account = f.read() 5 | 6 | block = GcpCredentials( 7 | service_account_info=service_account, 8 | project="INSERT_PROJECT_ID" 9 | ) 10 | 11 | block.save("default-credentials") -------------------------------------------------------------------------------- /7_prefect_deployment/prefect-flows/blocks/gcs_prefect_deployments_bucket.py: -------------------------------------------------------------------------------- 1 | from prefect.filesystems import GCS 2 | 3 | with open("service_account.json") as f: 4 | service_account = f.read() 5 | 6 | block = GCS( 7 | bucket_path="prefect-deployments/dev/", 8 | service_account_info=service_account, 9 | project="INSERT_PROJECT_ID" 10 | ) 11 | 12 | block.save("dev") -------------------------------------------------------------------------------- /7_prefect_deployment/prefect-flows/trigger-ram-api/.prefectignore: -------------------------------------------------------------------------------- 1 | # prefect artifacts 2 | .prefectignore 3 | 4 | # python artifacts 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | *.egg-info/ 9 | *.egg 10 | 11 | # Type checking artifacts 12 | .mypy_cache/ 13 | .dmypy.json 14 | dmypy.json 15 | .pyre/ 16 | 17 | # IPython 18 | profile_default/ 19 | ipython_config.py 20 | *.ipynb_checkpoints/* 21 | 22 | # Environments 23 | .python-version 24 | .env 25 | .venv 26 | env/ 27 | venv/ 28 | 29 | # MacOS 30 | .DS_Store 31 | 32 | # Dask 33 | dask-worker-space/ 34 | 35 | # Editors 36 | .idea/ 37 | .vscode/ 38 | 39 | # VCS 40 | .git/ 41 | .hg/ 42 | -------------------------------------------------------------------------------- /7_prefect_deployment/prefect-flows/trigger-ram-api/trigger_ram_api.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import requests 4 | from prefect import flow, task 5 | from pydantic import BaseModel 6 | 7 | 8 | BASE_URL = "https://rickandmortyapi.com/api" 9 | CLOUD_TASK_FUNCTION_URL = "INSERT_CLOUD_TASK_FUNCTION_URL" 10 | 11 | 12 | class Model(BaseModel): 13 | project: str 14 | region: str 15 | service_account: str 16 | function_url: str 17 | task_queue: str 18 | endpoint: str 19 | 20 | 21 | @task() 22 | def get_pages(endpoint: str): 23 | response = requests.get(url=f"{BASE_URL}/{endpoint}") 24 | response.raise_for_status() 25 | response = response.json() 26 | pages = response.get("info").get("pages") 27 | return pages 28 | 29 | 30 | @task(log_prints=True) 31 | def start_process(pages: int, model: Model): 32 | task_request = {"data": {"endpoint": model.endpoint, "api_params": {}}} 33 | data = { 34 | "data": { 35 | "project": model.project, 36 | "region": model.region, 37 | "service_account": model.service_account, 38 | "number_of_iterations": pages, 39 | "function_url": model.function_url, 40 | "queue": model.task_queue, 41 | "task_request": task_request, 42 | } 43 | } 44 | headers = {"Content-Type": "application/json"} 45 | 46 | print(f"LOGGING: Sending request with data: {json.dumps(data)}") 47 | response = requests.post( 48 | CLOUD_TASK_FUNCTION_URL, data=json.dumps(data), headers=headers 49 | ) 50 | response.raise_for_status() 51 | return "DONE" 52 | 53 | 54 | @flow(log_prints=True) 55 | def trigger_ram_api_pipeline(model: Model): 56 | print("LOGGING: Triggering RAM API Request Pipeline") 57 | pages = get_pages("character") 58 | start_process(pages, model) 59 | return "DONE" 60 | 61 | 62 | if __name__ == "__main__": 63 | model = Model( 64 | project="INSERT_PROJECT", 65 | region="INSERT_REGION", 66 | service_account="INSERT_SERVICE_ACCOUNT", 67 | function_url="INSERT_FUNCTION_URL", 68 | task_queue="INSERT_TASK_QUEUE_NAME", 69 | endpoint="character", 70 | ) 71 | trigger_ram_api_pipeline(model) 72 | -------------------------------------------------------------------------------- /7_prefect_deployment/prefect-flows/trigger-ram-api/trigger_ram_api_pipeline-deployment.py: -------------------------------------------------------------------------------- 1 | from prefect.deployments import Deployment 2 | from prefect.orion.schemas.schedules import RRuleSchedule 3 | from prefect.filesystems import GCS 4 | from prefect_gcp.cloud_run import CloudRunJob 5 | 6 | from trigger_ram_api import trigger_ram_api_pipeline 7 | 8 | storage = GCS.load("dev") 9 | cloud_run_block = CloudRunJob.load("cloud-run-infrastructure") 10 | 11 | deployment = Deployment.build_from_flow( 12 | flow=trigger_ram_api_pipeline, 13 | name="trigger-ram-api-pipeline-deployment", 14 | version=1, 15 | work_queue_name="main", 16 | schedule=RRuleSchedule(rrule="RRULE:FREQ=DAILY"), 17 | storage=storage, 18 | infrastructure=cloud_run_block, 19 | parameters={ 20 | "model": { 21 | "project": "INSERT_PROJECT_ID", 22 | "region": "INSERT_REGION", 23 | "service_account": "INSERT_SERVICE_ACCOUNT", 24 | "function_url": "INSERT_FUNCTION_URL", 25 | "task_queue": "INSERT_TASK_QUEUE_NAME", 26 | "endpoint": "character", 27 | } 28 | }, 29 | ) 30 | 31 | deployment.apply() 32 | -------------------------------------------------------------------------------- /8_terraform_management/.gitignore: -------------------------------------------------------------------------------- 1 | # Terraform 2 | *.tfvars 3 | */.terraform/ 4 | **.tfstate.backup 5 | *.backup 6 | 7 | service_account.json 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | cover/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | .pybuilder/ 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | # For a library or package, you might want to ignore these files since the code is 95 | # intended to run in multiple environments; otherwise, check them in: 96 | # .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # poetry 106 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 107 | # This is especially recommended for binary packages to ensure reproducibility, and is more 108 | # commonly ignored for libraries. 109 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 110 | #poetry.lock 111 | 112 | # pdm 113 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 114 | #pdm.lock 115 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 116 | # in version control. 117 | # https://pdm.fming.dev/#use-with-ide 118 | .pdm.toml 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ -------------------------------------------------------------------------------- /8_terraform_management/README.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | ## 1. Install Terraform 3 | Link to Terraform installation article 4 | 5 | ## 2. Create a project 6 | ```bash 7 | gcloud projects create ... 8 | ``` 9 | 10 | ## 3. Create a service account 11 | Set the correct project variable 12 | ```bash 13 | gcloud config set project real-world-python-tf 14 | ``` 15 | 16 | Create the service account 17 | ```bash 18 | gcloud iam service-accounts create terraform \ 19 | --description="Service Account to use with Terraform" 20 | ``` 21 | 22 | Create the key file 23 | ```bash 24 | gcloud iam service-accounts keys create service_account.json \ 25 | --iam-account=terraform@real-world-python-tf.iam.gserviceaccount.com 26 | ``` 27 | 28 | Grant the Editor role 29 | ```bash 30 | gcloud projects add-iam-policy-binding real-world-python-tf \ 31 | --member=serviceAccount:terraform@real-world-python-tf.iam.gserviceaccount.com \ 32 | --role=roles/editor 33 | ``` 34 | 35 | # Resources 36 | - IAM 37 | - Service Accounts 38 | - Google Cloud Storage 39 | - BigQuery Dataset 40 | - Cloud Run 41 | - Cloud Functions 42 | - Compute Engine 43 | - Artifact Registry 44 | - Cloud Build 45 | - Cloud Tasks 46 | 47 | ## Notes 48 | - We won't deploy Cloud Functions using Terraform because we have a separate CI/CD pipeline to redeploy functions from our codebase instead. -------------------------------------------------------------------------------- /8_terraform_management/terraform/.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/hashicorp/google" { 5 | version = "4.49.0" 6 | constraints = "4.49.0" 7 | hashes = [ 8 | "h1:DOYre+TiAErDprRnWy3HB1skET0rNtTqVT6HxNtY11M=", 9 | "zh:06759e2760d4c462d1c29ffae91f14be05b478570804c38fdac343bf4e40362e", 10 | "zh:3728742e19d8df05954c48d09e0ed88943f940c18d200f0e872028d576567d63", 11 | "zh:5678d0d22e7345adaa0f780e8160a828fd7a6d11fc5be8fb57f079214b9bc99a", 12 | "zh:5e855410c1ccbe3e6d75d1d0fb7dde8aef79b45b885cfa3204a1af157a5dc39f", 13 | "zh:64338003cc5b8ab93a504ae8cdd05d1c60deef411b2ae357ae5f8af8b7592bdf", 14 | "zh:9fa9c1ae81faf8158f46845e92871fa12900ff04620479f0162619383cf737a4", 15 | "zh:b73ee8222bcb9fbd73ca4c0c56664d57467c9371bc7bce61988e958771d8e697", 16 | "zh:c94cfec61659cd20921acb1dd9b2694f88f14fbb4191b8c55700ccba26464828", 17 | "zh:d3231ad73401d735cf9323347a277d44028038ec5cde29f2ca77e3be1ea9e68a", 18 | "zh:d4a15dee6a2d5b1cb73db94d6b8b16d29f7a7295b2a6ffdc8cb50caa968f763c", 19 | "zh:e7f8d35a982986cef712da221bf5f1e309397559c397a11758c30c45919188b0", 20 | "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", 21 | ] 22 | } 23 | 24 | provider "registry.terraform.io/hashicorp/google-beta" { 25 | version = "4.49.0" 26 | hashes = [ 27 | "h1:RTBBszooJGdqyPJE0pQGVWwCslTAEwSCQGLIRTDuPrc=", 28 | "zh:03f7d936d581dff14bb963b05a444a9c0b1fd41d26375ce27918fb7c45614689", 29 | "zh:3c64e80aa346a812de3ff0b351c3d50e7fe1c44747426bfd2087bd1a887eb45b", 30 | "zh:3fe17786069603a212feacc2f1c816b0f491dcff1a6d8c5385ecbdee993d35a0", 31 | "zh:50355aa91cf3f9a534aaabf71d4aa89bd59c1553bec0030359a320e468647b76", 32 | "zh:53f71886192e2a579f202f1f337d95793f1c9222d5e577a2dac3e29f2cde22a3", 33 | "zh:871560b92de695c876203ccc771ea25934c142a227401b7f125e8f3926715648", 34 | "zh:8771df7f2fc1c3125da0861dd7270104c0cd529f1c27b9a1691b13a43413f47a", 35 | "zh:ad45781ac59d0a3a7b7ecae07240c40c876e31d4bf72e7d0a26e09e96b854417", 36 | "zh:da7830cdcbeecae58954c95ff717944dcb40666039cb6dc13a771ceb3fdf5a2b", 37 | "zh:dc5965b64312f5065fac91e0dbcbbafab6e871902295a4711a5bfbb84f3a7ed4", 38 | "zh:df93f8cbdf6d77feccb57f79f074a5821266c9cb8f5b1749e578a575a178eb3f", 39 | "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", 40 | ] 41 | } 42 | -------------------------------------------------------------------------------- /8_terraform_management/terraform/artifact_registry.tf: -------------------------------------------------------------------------------- 1 | resource "google_artifact_registry_repository" "prefect-flows-docker" { 2 | provider = google-beta 3 | location = var.region 4 | repository_id = "prefect-flows-docker" 5 | description = "Docker repository for Prefect flows" 6 | format = "DOCKER" 7 | depends_on = [google_project_service.api_services] 8 | } -------------------------------------------------------------------------------- /8_terraform_management/terraform/bigquery.tf: -------------------------------------------------------------------------------- 1 | resource "google_bigquery_dataset" "rick_and_morty" { 2 | dataset_id = "rick_and_morty" 3 | location = "EU" 4 | depends_on = [google_project_service.api_services] 5 | } -------------------------------------------------------------------------------- /8_terraform_management/terraform/cloud_build.tf: -------------------------------------------------------------------------------- 1 | resource "google_cloudbuild_trigger" "fn-ram-api-request" { 2 | location = "europe-west1" 3 | name = "build-fn-ram-api-request" 4 | filename = "4_continuous_deployment/fn_ram_api_request/ci-cd/cloudbuild.yaml" 5 | substitutions = { 6 | _SERVICE_ACCOUNT = google_service_account.real-world-python.email 7 | } 8 | github { 9 | owner = "danilo-nzyte" 10 | name = "real_world_python_tutorials" 11 | push { 12 | branch = "^main$" 13 | } 14 | } 15 | included_files = [ 16 | "4_continuous_deployment/fn_ram_api_request/**", 17 | ] 18 | ignored_files = [ 19 | "*.sh", 20 | ] 21 | service_account = google_service_account.real-world-python.id 22 | } 23 | 24 | resource "google_cloudbuild_trigger" "fn-load-to-bq" { 25 | location = "europe-west1" 26 | name = "build-fn-load-to-bq" 27 | filename = "4_continuous_deployment/fn_load_to_bq/ci-cd/cloudbuild.yaml" 28 | substitutions = { 29 | _SERVICE_ACCOUNT = google_service_account.real-world-python.email 30 | } 31 | github { 32 | owner = "danilo-nzyte" 33 | name = "real_world_python_tutorials" 34 | push { 35 | branch = "^main$" 36 | } 37 | } 38 | included_files = [ 39 | "4_continuous_deployment/fn_load_to_bq/**", 40 | ] 41 | ignored_files = [ 42 | "*.sh", 43 | ] 44 | service_account = google_service_account.real-world-python.id 45 | } -------------------------------------------------------------------------------- /8_terraform_management/terraform/cloud_tasks.tf: -------------------------------------------------------------------------------- 1 | resource "google_cloud_tasks_queue" "fn-ram-api-request" { 2 | name = "fn-ram-api-request" 3 | location = var.region 4 | rate_limits { 5 | max_dispatches_per_second = 500 6 | } 7 | retry_config { 8 | max_attempts = -1 9 | min_backoff = "0.100s" 10 | max_backoff = "3600s" 11 | max_doublings = 16 12 | } 13 | depends_on = [google_project_service.api_services] 14 | } 15 | 16 | resource "google_cloud_tasks_queue" "fn-load-bq-data" { 17 | name = "fn-load-bq-data" 18 | location = var.region 19 | rate_limits { 20 | max_dispatches_per_second = 500 21 | } 22 | retry_config { 23 | max_attempts = -1 24 | min_backoff = "0.100s" 25 | max_backoff = "3600s" 26 | max_doublings = 16 27 | } 28 | depends_on = [google_project_service.api_services] 29 | } -------------------------------------------------------------------------------- /8_terraform_management/terraform/compute.tf: -------------------------------------------------------------------------------- 1 | resource "google_compute_instance" "prefect-agent" { 2 | name = "${var.project_id}-prefect-agent" 3 | zone = var.zone 4 | machine_type = "e2-micro" 5 | metadata_startup_script = file("./sh_scripts/prefect_agent.sh") 6 | 7 | boot_disk { 8 | initialize_params { 9 | image = "ubuntu-2004-focal-v20230104" 10 | } 11 | } 12 | network_interface { 13 | network = "default" 14 | subnetwork = "default" 15 | access_config { 16 | network_tier = "PREMIUM" 17 | } 18 | } 19 | 20 | service_account { 21 | scopes = [ 22 | "cloud-platform", 23 | ] 24 | email = google_service_account.prefect.email 25 | } 26 | 27 | metadata = { 28 | PREFECT_API_KEY = var.prefect_api_key 29 | PREFECT_WORKSPACE = var.prefect_workspace 30 | } 31 | 32 | depends_on = [google_project_service.api_services] 33 | } 34 | -------------------------------------------------------------------------------- /8_terraform_management/terraform/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | google = { 4 | source = "hashicorp/google" 5 | version = "4.49.0" 6 | } 7 | } 8 | } 9 | 10 | provider "google" { 11 | credentials = file("service_account.json") 12 | 13 | project = "real-world-python-tf" 14 | region = "europe-west2" 15 | zone = "europe-west2-b" 16 | } 17 | 18 | provider "google-beta" { 19 | credentials = file("service_account.json") 20 | 21 | project = "real-world-python-tf" 22 | region = "europe-west2" 23 | zone = "europe-west2-b" 24 | } 25 | 26 | data "google_project" "project" { 27 | } -------------------------------------------------------------------------------- /8_terraform_management/terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "prefect_service_account_key" { 2 | value = google_service_account_key.prefect_service_account_key.private_key 3 | sensitive = true 4 | } -------------------------------------------------------------------------------- /8_terraform_management/terraform/service_accounts.tf: -------------------------------------------------------------------------------- 1 | resource "google_service_account" "real-world-python" { 2 | account_id = "real-world-python" 3 | display_name = "real-world-python" 4 | description = "Service Account for Real World Python Project" 5 | depends_on = [google_project_service.api_services] 6 | } 7 | 8 | resource "google_project_iam_member" "real-world-python-service-account-iam" { 9 | for_each = toset([ 10 | "roles/iam.serviceAccountUser", 11 | "roles/bigquery.dataEditor", 12 | "roles/bigquery.jobUser", 13 | "roles/cloudfunctions.admin", 14 | "roles/storage.admin", 15 | "roles/run.developer", 16 | "roles/logging.logWriter" 17 | ]) 18 | role = each.value 19 | project = var.project_id 20 | member = "serviceAccount:${google_service_account.real-world-python.email}" 21 | } 22 | 23 | resource "google_service_account" "prefect" { 24 | account_id = "prefect" 25 | display_name = "prefect" 26 | description = "Authorisation to use with Prefect Cloud and Prefect Agent" 27 | depends_on = [google_project_service.api_services] 28 | } 29 | 30 | resource "google_project_iam_member" "prefect-service-account-iam" { 31 | for_each = toset([ 32 | "roles/iam.serviceAccountUser", 33 | "roles/run.admin", 34 | "roles/logging.admin" 35 | ]) 36 | role = each.value 37 | project = var.project_id 38 | member = "serviceAccount:${google_service_account.prefect.email}" 39 | depends_on = [google_project_service.api_services] 40 | } 41 | 42 | resource "google_service_account_key" "prefect_service_account_key" { 43 | service_account_id = google_service_account.prefect.name 44 | public_key_type = "TYPE_X509_PEM_FILE" 45 | depends_on = [google_project_service.api_services] 46 | } 47 | 48 | resource "google_service_account_iam_member" "real-world-python-cloud-build-default" { 49 | 50 | service_account_id = "projects/${var.project_id}/serviceAccounts/${data.google_project.project.number}-compute@developer.gserviceaccount.com" 51 | for_each = toset([ 52 | "roles/iam.serviceAccountUser", 53 | ]) 54 | role = each.value 55 | member = "serviceAccount:${google_service_account.real-world-python.email}" 56 | } 57 | -------------------------------------------------------------------------------- /8_terraform_management/terraform/services.tf: -------------------------------------------------------------------------------- 1 | resource "google_project_service" "api_services" { 2 | project = var.project_id 3 | for_each = toset( 4 | [ 5 | "iam.googleapis.com", 6 | "compute.googleapis.com", 7 | "bigquery.googleapis.com", 8 | "run.googleapis.com", 9 | "cloudfunctions.googleapis.com", 10 | "artifactregistry.googleapis.com", 11 | "cloudbuild.googleapis.com", 12 | "cloudtasks.googleapis.com", 13 | "cloudresourcemanager.googleapis.com", 14 | ] 15 | ) 16 | service = each.key 17 | disable_on_destroy = false 18 | disable_dependent_services = true 19 | } 20 | -------------------------------------------------------------------------------- /8_terraform_management/terraform/sh_scripts/prefect_agent.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | curl -sSO https://dl.google.com/cloudagents/install-logging-agent.sh sudo bash install-logging-agent.sh 3 | sudo apt-get update -y 4 | sudo apt-get upgrade -y 5 | sudo apt-get install -y \ 6 | ca-certificates \ 7 | curl \ 8 | gnupg \ 9 | lsb-release \ 10 | software-properties-common \ 11 | python3-dateutil \ 12 | python3.8 \ 13 | python3.8-dev \ 14 | python3.8-distutils \ 15 | python3.8-venv 16 | sudo ln -s /usr/bin/python3 /usr/bin/python 17 | python3.8 -m venv prefect-env 18 | source prefect-env/bin/activate 19 | wget https://bootstrap.pypa.io/get-pip.py 20 | python3 get-pip.py 21 | PATH="$HOME/.local/bin:$PATH" 22 | export PATH 23 | pip3 install prefect==2.7.7 prefect-gcp 24 | export PREFECT_API_KEY=`curl -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/PREFECT_API_KEY"` 25 | export PREFECT_WORKSPACE=`curl -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/PREFECT_WORKSPACE"` 26 | prefect cloud workspace set -w $PREFECT_WORKSPACE 27 | prefect cloud login -k $PREFECT_API_KEY 28 | prefect agent start -q main -------------------------------------------------------------------------------- /8_terraform_management/terraform/storage.tf: -------------------------------------------------------------------------------- 1 | resource "google_storage_bucket" "prefect-deployments" { 2 | name = "${var.project_id}-prefect-deployments" 3 | location = "EU" 4 | storage_class = "STANDARD" 5 | } 6 | 7 | resource "google_storage_bucket_iam_member" "prefect-deployments-prefect-service-account" { 8 | for_each = toset([ 9 | "roles/storage.objectAdmin", 10 | ]) 11 | role = each.value 12 | bucket = google_storage_bucket.prefect-deployments.name 13 | member = "serviceAccount:${google_service_account.prefect.email}" 14 | depends_on = [google_project_service.api_services] 15 | } 16 | -------------------------------------------------------------------------------- /8_terraform_management/terraform/terraform.tfstate: -------------------------------------------------------------------------------- 1 | { 2 | "version": 4, 3 | "terraform_version": "1.3.7", 4 | "serial": 152, 5 | "lineage": "f00765c8-e5ae-5055-db59-9949d3735520", 6 | "outputs": {}, 7 | "resources": [], 8 | "check_results": null 9 | } 10 | -------------------------------------------------------------------------------- /8_terraform_management/terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "project_id" { 2 | description = "GCP Project ID" 3 | type = string 4 | } 5 | 6 | variable "region" { 7 | description = "GCP Region" 8 | type = string 9 | } 10 | 11 | variable "zone" { 12 | description = "GCP Zone" 13 | type = string 14 | } 15 | 16 | variable "prefect_api_key" { 17 | description = "Prefect API Key" 18 | type = string 19 | sensitive = true 20 | } 21 | 22 | variable "prefect_workspace" { 23 | description = "Prefect Workspace ID" 24 | type = string 25 | sensitive = true 26 | } --------------------------------------------------------------------------------