├── .gitignore ├── CI_CD_CT.png ├── pubsub-payload.json ├── .github └── workflows │ └── github-actions-demo.yml ├── compile-hello-wrold-pipeline.py ├── README.md ├── hello_world_scheduled_pipeline.json └── end-to-end-pytorch.json /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | .idea/ 3 | .DS_Store 4 | .env 5 | .ipynb_checkpoints/ 6 | -------------------------------------------------------------------------------- /CI_CD_CT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jy2k/Git-action-to-GCP-pubsub/main/CI_CD_CT.png -------------------------------------------------------------------------------- /pubsub-payload.json: -------------------------------------------------------------------------------- 1 | {"pipeline_spec_uri": "gs://pipelines-b4536/hello_world_scheduled_pipeline.json","parameter_values": {"greet_name": "something something darkside"}} 2 | -------------------------------------------------------------------------------- /.github/workflows/github-actions-demo.yml: -------------------------------------------------------------------------------- 1 | name: GitHub to pub sub action 2 | on: [push] 3 | jobs: 4 | Pipeline-trigger-actions: 5 | runs-on: ubuntu-latest 6 | env: 7 | GOOGLE_APPLICATION_CREDENTIALS: key.json 8 | steps: 9 | - uses: google-github-actions/setup-gcloud@master 10 | - run: | 11 | gcloud config set project test-cloud-day 12 | echo -n ${{ secrets.GCP_SA_KEY }} | base64 --decode > $GOOGLE_APPLICATION_CREDENTIALS 13 | gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS 14 | gcloud pubsub topics publish projects/test-cloud-day/topics/test --message='{"pipeline_spec_uri": "gs://pipelines-b4536/dag-20211213214804.json","parameter_values": {}}' 15 | -------------------------------------------------------------------------------- /compile-hello-wrold-pipeline.py: -------------------------------------------------------------------------------- 1 | import json 2 | from kfp.v2 import compiler 3 | from kfp.v2 import dsl 4 | from kfp.v2.dsl import component 5 | 6 | # A simple component that prints and returns a greeting string 7 | @component 8 | def hello_world(message: str) -> str: 9 | greeting_str = f'Hello, {message}' 10 | #change something 11 | #change another thing 12 | print(greeting_str) 13 | return greeting_str 14 | 15 | # A simple pipeline that contains a single hello_world task 16 | @dsl.pipeline( 17 | name='hello-world-scheduled-pipeline') 18 | def hello_world_scheduled_pipeline(greet_name: str): 19 | hello_world_task = hello_world(greet_name) 20 | 21 | # Compile the pipeline and generate a JSON file 22 | compiler.Compiler().compile(pipeline_func=hello_world_scheduled_pipeline, 23 | package_path='hello_world_scheduled_pipeline.json') 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # From Script to training pipeline 2 | 3 | A Github Workflow located at [.github/workflows/github-actions-demo.yml](https://github.com/jy2k/github-actions-test/tree/main/.github/workflows). 4 | 5 | The Workflow: 6 | 1. Once a push is made to the repo the workflow triggers 7 | 2. The workflow sends a message to a pub/sub topic 8 | 3. Cloud Function trigger with base64 encoded JSON (see example pubsub-payload.json) 9 | 4. Cloud Function decodes message and starts pipelines 10 | 5. Pipeline trains a model 11 | 12 | Important to configure your project to run with vertex pipelines - [some APIs need to be enabled](https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/hello-world-scheduled-pipeline-20211120225731?project=1065652454167) 13 | 14 | This is a rough diagram of the end-to-end architecture: 15 | ![Screenshot](CI_CD_CT.png) 16 | 17 | [Here](https://cloud.google.com/vertex-ai/docs/pipelines/trigger-pubsub) is how to write a cloud function that picks up on a pub/sub message and triggers a pipeline. 18 | 19 | ## Optional 20 | The message in pub/sub can trigger any precompiled pipeline. 21 | Potentially you could start any piece of code in the cloud function - for example [here](https://github.com/jy2k/Kubeflow-v2-end-to-end) is an end-to-end pipeline that can be triggered. 22 | 23 | ### Secrets 24 | The pub/sub topic, Service account (in base 64), and project id are defined as secrets in the repo 25 | [Here](https://damienaicheh.github.io/github/actions/2021/04/15/environment-variables-secrets-github-actions-en.html) is how to create secrets in Github (also shows how to create convert key.json to base64) 26 | [Here](https://medium.com/firebase-developers/create-automatic-firestore-backups-with-github-actions-abb12eef86a0) is how to save the service account JSON key in base64 27 | 28 | ### Service account 29 | [Create a Service account](https://cloud.google.com/iam/docs/creating-managing-service-accounts#creating) in your Google Project and download a JSON key. Encode it as base65 and save it to the Repos secrets. 30 | The minimum role that the service account needs is ["Pub/Sub Publisher"](https://cloud.google.com/iam/docs/understanding-roles#pub-sub-roles). 31 | 32 | ### Testing 33 | Trigger a git commit and push 34 | Publish a message through the pub/sub UI 35 | 36 | Generally based the Github actions workflow on [this](https://github.com/google-github-actions/setup-gcloud) gcloud setup 37 | -------------------------------------------------------------------------------- /hello_world_scheduled_pipeline.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipelineSpec": { 3 | "components": { 4 | "comp-hello-world": { 5 | "executorLabel": "exec-hello-world", 6 | "inputDefinitions": { 7 | "parameters": { 8 | "message": { 9 | "type": "STRING" 10 | } 11 | } 12 | }, 13 | "outputDefinitions": { 14 | "parameters": { 15 | "Output": { 16 | "type": "STRING" 17 | } 18 | } 19 | } 20 | } 21 | }, 22 | "deploymentSpec": { 23 | "executors": { 24 | "exec-hello-world": { 25 | "container": { 26 | "args": [ 27 | "--executor_input", 28 | "{{$}}", 29 | "--function_to_execute", 30 | "hello_world" 31 | ], 32 | "command": [ 33 | "sh", 34 | "-c", 35 | "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.9' && \"$0\" \"$@\"\n", 36 | "sh", 37 | "-ec", 38 | "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", 39 | "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef hello_world(message: str) -> str:\n greeting_str = f'Hello, {message}'\n print(greeting_str)\n return greeting_str\n\n" 40 | ], 41 | "image": "python:3.7" 42 | } 43 | } 44 | } 45 | }, 46 | "pipelineInfo": { 47 | "name": "hello-world-scheduled-pipeline" 48 | }, 49 | "root": { 50 | "dag": { 51 | "tasks": { 52 | "hello-world": { 53 | "cachingOptions": { 54 | "enableCache": true 55 | }, 56 | "componentRef": { 57 | "name": "comp-hello-world" 58 | }, 59 | "inputs": { 60 | "parameters": { 61 | "message": { 62 | "componentInputParameter": "greet_name" 63 | } 64 | } 65 | }, 66 | "taskInfo": { 67 | "name": "hello-world" 68 | } 69 | } 70 | } 71 | }, 72 | "inputDefinitions": { 73 | "parameters": { 74 | "greet_name": { 75 | "type": "STRING" 76 | } 77 | } 78 | } 79 | }, 80 | "schemaVersion": "2.0.0", 81 | "sdkVersion": "kfp-1.8.9" 82 | }, 83 | "runtimeConfig": {} 84 | } -------------------------------------------------------------------------------- /end-to-end-pytorch.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipelineSpec": { 3 | "components": { 4 | "comp-endpoint-create": { 5 | "executorLabel": "exec-endpoint-create", 6 | "outputDefinitions": { 7 | "artifacts": { 8 | "endpoint": { 9 | "artifactType": { 10 | "schemaTitle": "system.Artifact", 11 | "schemaVersion": "0.0.1" 12 | } 13 | } 14 | } 15 | } 16 | }, 17 | "comp-model-deploy": { 18 | "executorLabel": "exec-model-deploy", 19 | "inputDefinitions": { 20 | "artifacts": { 21 | "endpoint": { 22 | "artifactType": { 23 | "schemaTitle": "system.Artifact", 24 | "schemaVersion": "0.0.1" 25 | } 26 | }, 27 | "model": { 28 | "artifactType": { 29 | "schemaTitle": "system.Model", 30 | "schemaVersion": "0.0.1" 31 | } 32 | } 33 | } 34 | }, 35 | "outputDefinitions": { 36 | "artifacts": { 37 | "endpoint": { 38 | "artifactType": { 39 | "schemaTitle": "system.Artifact", 40 | "schemaVersion": "0.0.1" 41 | } 42 | } 43 | } 44 | } 45 | }, 46 | "comp-model-upload": { 47 | "executorLabel": "exec-model-upload", 48 | "inputDefinitions": { 49 | "parameters": { 50 | "artifact_uri": { 51 | "type": "STRING" 52 | } 53 | } 54 | }, 55 | "outputDefinitions": { 56 | "artifacts": { 57 | "model": { 58 | "artifactType": { 59 | "schemaTitle": "system.Model", 60 | "schemaVersion": "0.0.1" 61 | } 62 | } 63 | } 64 | } 65 | }, 66 | "comp-preprocess": { 67 | "executorLabel": "exec-preprocess", 68 | "outputDefinitions": { 69 | "artifacts": { 70 | "output_csv_path": { 71 | "artifactType": { 72 | "schemaTitle": "system.Artifact", 73 | "schemaVersion": "0.0.1" 74 | } 75 | } 76 | } 77 | } 78 | }, 79 | "comp-train": { 80 | "executorLabel": "exec-train", 81 | "inputDefinitions": { 82 | "artifacts": { 83 | "input_csv_path": { 84 | "artifactType": { 85 | "schemaTitle": "system.Artifact", 86 | "schemaVersion": "0.0.1" 87 | } 88 | } 89 | }, 90 | "parameters": { 91 | "bucket": { 92 | "type": "STRING" 93 | }, 94 | "experiment_name": { 95 | "type": "STRING" 96 | }, 97 | "num_epochs": { 98 | "type": "INT" 99 | }, 100 | "project_id": { 101 | "type": "STRING" 102 | }, 103 | "run_name": { 104 | "type": "STRING" 105 | } 106 | } 107 | }, 108 | "outputDefinitions": { 109 | "artifacts": { 110 | "saved_model": { 111 | "artifactType": { 112 | "schemaTitle": "system.Model", 113 | "schemaVersion": "0.0.1" 114 | } 115 | } 116 | }, 117 | "parameters": { 118 | "artifact_uri": { 119 | "type": "STRING" 120 | } 121 | } 122 | } 123 | } 124 | }, 125 | "deploymentSpec": { 126 | "executors": { 127 | "exec-endpoint-create": { 128 | "container": { 129 | "args": [ 130 | "--method.project", 131 | "test-cloud-day", 132 | "--method.display_name", 133 | "pipelines20211213151347", 134 | "--executor_input", 135 | "{{$}}", 136 | "--resource_name_output_artifact_uri", 137 | "{{$.outputs.artifacts['endpoint'].uri}}" 138 | ], 139 | "command": [ 140 | "python3", 141 | "-m", 142 | "google_cloud_pipeline_components.aiplatform.remote_runner", 143 | "--cls_name", 144 | "Endpoint", 145 | "--method_name", 146 | "create" 147 | ], 148 | "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.1.1" 149 | } 150 | }, 151 | "exec-model-deploy": { 152 | "container": { 153 | "args": [ 154 | "--init.project", 155 | "test-cloud-day", 156 | "--method.deployed_model_display_name", 157 | "model_display_name", 158 | "--method.machine_type", 159 | "n1-standard-4", 160 | "--executor_input", 161 | "{{$}}", 162 | "--resource_name_output_artifact_uri", 163 | "{{$.outputs.artifacts['endpoint'].uri}}", 164 | "--method.endpoint", 165 | "{{$.inputs.artifacts['endpoint'].uri}}", 166 | "--init.model_name", 167 | "{{$.inputs.artifacts['model'].uri}}" 168 | ], 169 | "command": [ 170 | "python3", 171 | "-m", 172 | "google_cloud_pipeline_components.aiplatform.remote_runner", 173 | "--cls_name", 174 | "Model", 175 | "--method_name", 176 | "deploy" 177 | ], 178 | "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.1.1" 179 | } 180 | }, 181 | "exec-model-upload": { 182 | "container": { 183 | "args": [ 184 | "--method.project", 185 | "test-cloud-day", 186 | "--method.display_name", 187 | "model20211213151347", 188 | "--method.serving_container_image_uri", 189 | "us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-24:latest", 190 | "--executor_input", 191 | "{{$}}", 192 | "--resource_name_output_artifact_uri", 193 | "{{$.outputs.artifacts['model'].uri}}", 194 | "--method.artifact_uri", 195 | "{{$.inputs.parameters['artifact_uri']}}" 196 | ], 197 | "command": [ 198 | "python3", 199 | "-m", 200 | "google_cloud_pipeline_components.aiplatform.remote_runner", 201 | "--cls_name", 202 | "Model", 203 | "--method_name", 204 | "upload" 205 | ], 206 | "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.1.1" 207 | } 208 | }, 209 | "exec-preprocess": { 210 | "container": { 211 | "args": [ 212 | "--executor_input", 213 | "{{$}}", 214 | "--function_to_execute", 215 | "preprocess" 216 | ], 217 | "command": [ 218 | "sh", 219 | "-c", 220 | "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'pandas' 'google-cloud-aiplatform' 'google-cloud-bigquery-storage' 'google-cloud-bigquery' 'pyarrow' 'kfp==1.8.10' && \"$0\" \"$@\"\n", 221 | "sh", 222 | "-ec", 223 | "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", 224 | "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef preprocess(output_csv_path: OutputPath('CSV')):\n #1\n from google.cloud import bigquery\n import google.auth\n\n creds, project = google.auth.default()\n client = bigquery.Client(project='test-cloud-day', credentials=creds)\n\n query = \"\"\"\n SELECT * FROM `test-cloud-day.imdb.imdb_review_sentiment_strings`\n \"\"\"\n print(query)\n\n dataframe = client.query(query).to_dataframe()\n print(dataframe.head())\n\n dataframe.to_csv(output_csv_path)\n print(\"done\")\n\n" 225 | ], 226 | "image": "python:3.7" 227 | } 228 | }, 229 | "exec-train": { 230 | "container": { 231 | "args": [ 232 | "--executor_input", 233 | "{{$}}", 234 | "--function_to_execute", 235 | "train" 236 | ], 237 | "command": [ 238 | "sh", 239 | "-c", 240 | "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'pandas' 'imbalanced-learn' 'google-cloud-aiplatform' 'pyarrow' 'kfp==1.8.10' && \"$0\" \"$@\"\n", 241 | "sh", 242 | "-ec", 243 | "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", 244 | "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef train(input_csv_path: InputPath('CSV'), saved_model: Output[Model], artifact_uri: OutputPath(str), experiment_name: str, run_name: str, num_epochs: int, project_id: str, bucket: str):\n from google.cloud import aiplatform\n from typing import NamedTuple\n\n aiplatform.init(\n project=project_id,\n location='us-central1',\n staging_bucket=bucket,\n experiment=experiment_name,\n )\n\n aiplatform.start_run(run_name)\n parameters = {\"epochs\": num_epochs}\n aiplatform.log_params(parameters)\n\n #1\n import pandas as pd\n from imblearn.under_sampling import RandomUnderSampler\n df_review = pd.read_csv(input_csv_path)\n print(len(df_review))\n\n df_positive = df_review[df_review['string_field_1']=='positive'][:9000]\n df_negative = df_review[df_review['string_field_1']=='negative'][:1000]\n\n df_review_imb = pd.concat([df_positive, df_negative])\n df_review_imb.value_counts(['string_field_1'])\n\n print(len(df_review_imb))\n rus = RandomUnderSampler(random_state=0)\n df_review_bal, df_review_bal['string_field_1']=rus.fit_resample(df_review_imb[['string_field_0']], df_review_imb['string_field_1'])\n\n print(len(df_review_bal))\n from sklearn.model_selection import train_test_split\n\n train, test = train_test_split(df_review_bal, test_size=0.33, random_state=42)\n train_x, train_y = train['string_field_0'], train['string_field_1']\n test_x, test_y = test['string_field_0'], test['string_field_1']\n\n print(\"train x values count\")\n print(len(train_x))\n print(\"train y values count\")\n print(train_y.value_counts())\n\n from sklearn.feature_extraction.text import TfidfVectorizer\n\n tfidf = TfidfVectorizer(stop_words='english')\n train_x_vector = tfidf.fit_transform(train_x)\n test_x_vector = tfidf.transform(test_x)\n\n print(train_x_vector)\n from sklearn.svm import SVC\n\n svc = SVC(kernel='linear')\n svc.fit(train_x_vector, train_y)\n\n print(svc.score(test_x_vector, test_y))\n\n #aiplatform.log_metrics({\"accuracy\": accu})\n import joblib\n import os\n\n joblib.dump(svc, os.path.join(saved_model.path.replace(\"saved_model\",\"\"), 'model.joblib'))\n print(\" saved_model.path: \"+ saved_model.path)\n print(\" saved_model.uri: \"+ saved_model.uri)\n with open(artifact_uri, 'w') as f:\n f.write(saved_model.uri.replace(\"saved_model\",\"\"))\n\n print(saved_model.uri) \n\n" 245 | ], 246 | "image": "python:3.7" 247 | } 248 | } 249 | } 250 | }, 251 | "pipelineInfo": { 252 | "name": "train-scikit2e149bd9-c559-40b6-84be-25223b08b420" 253 | }, 254 | "root": { 255 | "dag": { 256 | "tasks": { 257 | "endpoint-create": { 258 | "cachingOptions": { 259 | "enableCache": true 260 | }, 261 | "componentRef": { 262 | "name": "comp-endpoint-create" 263 | }, 264 | "taskInfo": { 265 | "name": "endpoint-create" 266 | } 267 | }, 268 | "model-deploy": { 269 | "cachingOptions": { 270 | "enableCache": true 271 | }, 272 | "componentRef": { 273 | "name": "comp-model-deploy" 274 | }, 275 | "dependentTasks": [ 276 | "endpoint-create", 277 | "model-upload" 278 | ], 279 | "inputs": { 280 | "artifacts": { 281 | "endpoint": { 282 | "taskOutputArtifact": { 283 | "outputArtifactKey": "endpoint", 284 | "producerTask": "endpoint-create" 285 | } 286 | }, 287 | "model": { 288 | "taskOutputArtifact": { 289 | "outputArtifactKey": "model", 290 | "producerTask": "model-upload" 291 | } 292 | } 293 | } 294 | }, 295 | "taskInfo": { 296 | "name": "model-deploy" 297 | } 298 | }, 299 | "model-upload": { 300 | "cachingOptions": { 301 | "enableCache": true 302 | }, 303 | "componentRef": { 304 | "name": "comp-model-upload" 305 | }, 306 | "dependentTasks": [ 307 | "train" 308 | ], 309 | "inputs": { 310 | "parameters": { 311 | "artifact_uri": { 312 | "taskOutputParameter": { 313 | "outputParameterKey": "artifact_uri", 314 | "producerTask": "train" 315 | } 316 | } 317 | } 318 | }, 319 | "taskInfo": { 320 | "name": "model-upload" 321 | } 322 | }, 323 | "preprocess": { 324 | "cachingOptions": { 325 | "enableCache": true 326 | }, 327 | "componentRef": { 328 | "name": "comp-preprocess" 329 | }, 330 | "taskInfo": { 331 | "name": "preprocess" 332 | } 333 | }, 334 | "train": { 335 | "cachingOptions": { 336 | "enableCache": true 337 | }, 338 | "componentRef": { 339 | "name": "comp-train" 340 | }, 341 | "dependentTasks": [ 342 | "preprocess" 343 | ], 344 | "inputs": { 345 | "artifacts": { 346 | "input_csv_path": { 347 | "taskOutputArtifact": { 348 | "outputArtifactKey": "output_csv_path", 349 | "producerTask": "preprocess" 350 | } 351 | } 352 | }, 353 | "parameters": { 354 | "bucket": { 355 | "runtimeValue": { 356 | "constantValue": { 357 | "stringValue": "pipelines-b4536" 358 | } 359 | } 360 | }, 361 | "experiment_name": { 362 | "runtimeValue": { 363 | "constantValue": { 364 | "stringValue": "test-20211213151347" 365 | } 366 | } 367 | }, 368 | "num_epochs": { 369 | "runtimeValue": { 370 | "constantValue": { 371 | "intValue": "1" 372 | } 373 | } 374 | }, 375 | "project_id": { 376 | "runtimeValue": { 377 | "constantValue": { 378 | "stringValue": "test-cloud-day" 379 | } 380 | } 381 | }, 382 | "run_name": { 383 | "runtimeValue": { 384 | "constantValue": { 385 | "stringValue": "test-run-20211213151347" 386 | } 387 | } 388 | } 389 | } 390 | }, 391 | "taskInfo": { 392 | "name": "train" 393 | } 394 | } 395 | } 396 | }, 397 | "inputDefinitions": { 398 | "parameters": { 399 | "bucket": { 400 | "type": "STRING" 401 | }, 402 | "project": { 403 | "type": "STRING" 404 | } 405 | } 406 | } 407 | }, 408 | "schemaVersion": "2.0.0", 409 | "sdkVersion": "kfp-1.8.10" 410 | }, 411 | "runtimeConfig": { 412 | "parameters": { 413 | "bucket": { 414 | "stringValue": "pipelines-b4536" 415 | }, 416 | "project": { 417 | "stringValue": "test-cloud-day" 418 | } 419 | } 420 | } 421 | } --------------------------------------------------------------------------------