├── README.md ├── iris.csv ├── etl-Yarkoni.ipynb └── Schedule_notebook_Vertex_training.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Vertex-workbench-schedule-notebook 2 | 3 | 1. Start a Vertex AI workbench notebook. 4 | 2. Git import this repo. 5 | 3. Execute "Schedule_notebook_Vertex_training.ipynb" using the UI. 6 | -------------------------------------------------------------------------------- /iris.csv: -------------------------------------------------------------------------------- 1 | ,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm) 2 | 0,5.1,3.5,1.4,0.2 3 | 1,4.9,3.0,1.4,0.2 4 | 2,4.7,3.2,1.3,0.2 5 | 3,4.6,3.1,1.5,0.2 6 | 4,5.0,3.6,1.4,0.2 7 | 5,5.4,3.9,1.7,0.4 8 | 6,4.6,3.4,1.4,0.3 9 | 7,5.0,3.4,1.5,0.2 10 | 8,4.4,2.9,1.4,0.2 11 | 9,4.9,3.1,1.5,0.1 12 | 10,5.4,3.7,1.5,0.2 13 | 11,4.8,3.4,1.6,0.2 14 | 12,4.8,3.0,1.4,0.1 15 | 13,4.3,3.0,1.1,0.1 16 | 14,5.8,4.0,1.2,0.2 17 | 15,5.7,4.4,1.5,0.4 18 | 16,5.4,3.9,1.3,0.4 19 | 17,5.1,3.5,1.4,0.3 20 | 18,5.7,3.8,1.7,0.3 21 | 19,5.1,3.8,1.5,0.3 22 | 20,5.4,3.4,1.7,0.2 23 | 21,5.1,3.7,1.5,0.4 24 | 22,4.6,3.6,1.0,0.2 25 | 23,5.1,3.3,1.7,0.5 26 | 24,4.8,3.4,1.9,0.2 27 | 25,5.0,3.0,1.6,0.2 28 | 26,5.0,3.4,1.6,0.4 29 | 27,5.2,3.5,1.5,0.2 30 | 28,5.2,3.4,1.4,0.2 31 | 29,4.7,3.2,1.6,0.2 32 | 30,4.8,3.1,1.6,0.2 33 | 31,5.4,3.4,1.5,0.4 34 | 32,5.2,4.1,1.5,0.1 35 | 33,5.5,4.2,1.4,0.2 36 | 34,4.9,3.1,1.5,0.2 37 | 35,5.0,3.2,1.2,0.2 38 | 36,5.5,3.5,1.3,0.2 39 | 37,4.9,3.6,1.4,0.1 40 | 38,4.4,3.0,1.3,0.2 41 | 39,5.1,3.4,1.5,0.2 42 | 40,5.0,3.5,1.3,0.3 43 | 41,4.5,2.3,1.3,0.3 44 | 42,4.4,3.2,1.3,0.2 45 | 43,5.0,3.5,1.6,0.6 46 | 44,5.1,3.8,1.9,0.4 47 | 45,4.8,3.0,1.4,0.3 48 | 46,5.1,3.8,1.6,0.2 49 | 47,4.6,3.2,1.4,0.2 50 | 48,5.3,3.7,1.5,0.2 51 | 49,5.0,3.3,1.4,0.2 52 | 50,7.0,3.2,4.7,1.4 53 | 51,6.4,3.2,4.5,1.5 54 | 52,6.9,3.1,4.9,1.5 55 | 53,5.5,2.3,4.0,1.3 56 | 54,6.5,2.8,4.6,1.5 57 | 55,5.7,2.8,4.5,1.3 58 | 56,6.3,3.3,4.7,1.6 59 | 57,4.9,2.4,3.3,1.0 60 | 58,6.6,2.9,4.6,1.3 61 | 59,5.2,2.7,3.9,1.4 62 | 60,5.0,2.0,3.5,1.0 63 | 61,5.9,3.0,4.2,1.5 64 | 62,6.0,2.2,4.0,1.0 65 | 63,6.1,2.9,4.7,1.4 66 | 64,5.6,2.9,3.6,1.3 67 | 65,6.7,3.1,4.4,1.4 68 | 66,5.6,3.0,4.5,1.5 69 | 67,5.8,2.7,4.1,1.0 70 | 68,6.2,2.2,4.5,1.5 71 | 69,5.6,2.5,3.9,1.1 72 | 70,5.9,3.2,4.8,1.8 73 | 71,6.1,2.8,4.0,1.3 74 | 72,6.3,2.5,4.9,1.5 75 | 73,6.1,2.8,4.7,1.2 76 | 74,6.4,2.9,4.3,1.3 77 | 75,6.6,3.0,4.4,1.4 78 | 76,6.8,2.8,4.8,1.4 79 | 77,6.7,3.0,5.0,1.7 80 | 78,6.0,2.9,4.5,1.5 81 | 79,5.7,2.6,3.5,1.0 82 | 80,5.5,2.4,3.8,1.1 83 | 81,5.5,2.4,3.7,1.0 84 | 82,5.8,2.7,3.9,1.2 85 | 83,6.0,2.7,5.1,1.6 86 | 84,5.4,3.0,4.5,1.5 87 | 85,6.0,3.4,4.5,1.6 88 | 86,6.7,3.1,4.7,1.5 89 | 87,6.3,2.3,4.4,1.3 90 | 88,5.6,3.0,4.1,1.3 91 | 89,5.5,2.5,4.0,1.3 92 | 90,5.5,2.6,4.4,1.2 93 | 91,6.1,3.0,4.6,1.4 94 | 92,5.8,2.6,4.0,1.2 95 | 93,5.0,2.3,3.3,1.0 96 | 94,5.6,2.7,4.2,1.3 97 | 95,5.7,3.0,4.2,1.2 98 | 96,5.7,2.9,4.2,1.3 99 | 97,6.2,2.9,4.3,1.3 100 | 98,5.1,2.5,3.0,1.1 101 | 99,5.7,2.8,4.1,1.3 102 | 100,6.3,3.3,6.0,2.5 103 | 101,5.8,2.7,5.1,1.9 104 | 102,7.1,3.0,5.9,2.1 105 | 103,6.3,2.9,5.6,1.8 106 | 104,6.5,3.0,5.8,2.2 107 | 105,7.6,3.0,6.6,2.1 108 | 106,4.9,2.5,4.5,1.7 109 | 107,7.3,2.9,6.3,1.8 110 | 108,6.7,2.5,5.8,1.8 111 | 109,7.2,3.6,6.1,2.5 112 | 110,6.5,3.2,5.1,2.0 113 | 111,6.4,2.7,5.3,1.9 114 | 112,6.8,3.0,5.5,2.1 115 | 113,5.7,2.5,5.0,2.0 116 | 114,5.8,2.8,5.1,2.4 117 | 115,6.4,3.2,5.3,2.3 118 | 116,6.5,3.0,5.5,1.8 119 | 117,7.7,3.8,6.7,2.2 120 | 118,7.7,2.6,6.9,2.3 121 | 119,6.0,2.2,5.0,1.5 122 | 120,6.9,3.2,5.7,2.3 123 | 121,5.6,2.8,4.9,2.0 124 | 122,7.7,2.8,6.7,2.0 125 | 123,6.3,2.7,4.9,1.8 126 | 124,6.7,3.3,5.7,2.1 127 | 125,7.2,3.2,6.0,1.8 128 | 126,6.2,2.8,4.8,1.8 129 | 127,6.1,3.0,4.9,1.8 130 | 128,6.4,2.8,5.6,2.1 131 | 129,7.2,3.0,5.8,1.6 132 | 130,7.4,2.8,6.1,1.9 133 | 131,7.9,3.8,6.4,2.0 134 | 132,6.4,2.8,5.6,2.2 135 | 133,6.3,2.8,5.1,1.5 136 | 134,6.1,2.6,5.6,1.4 137 | 135,7.7,3.0,6.1,2.3 138 | 136,6.3,3.4,5.6,2.4 139 | 137,6.4,3.1,5.5,1.8 140 | 138,6.0,3.0,4.8,1.8 141 | 139,6.9,3.1,5.4,2.1 142 | 140,6.7,3.1,5.6,2.4 143 | 141,6.9,3.1,5.1,2.3 144 | 142,5.8,2.7,5.1,1.9 145 | 143,6.8,3.2,5.9,2.3 146 | 144,6.7,3.3,5.7,2.5 147 | 145,6.7,3.0,5.2,2.3 148 | 146,6.3,2.5,5.0,1.9 149 | 147,6.5,3.0,5.2,2.0 150 | 148,6.2,3.4,5.4,2.3 151 | 149,5.9,3.0,5.1,1.8 152 | -------------------------------------------------------------------------------- /etl-Yarkoni.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "c3ab7921-a2d2-47bd-91c3-8a7dec41af17", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import pickle\n", 14 | "import sklearn\n", 15 | "\n", 16 | "from sklearn.datasets import load_iris\n", 17 | "from sklearn.model_selection import train_test_split\n", 18 | "from sklearn.preprocessing import StandardScaler\n", 19 | "from sklearn.pipeline import Pipeline\n", 20 | "from sklearn.ensemble import GradientBoostingClassifier\n", 21 | "from sklearn.metrics import accuracy_score" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "id": "ae3a75ed-4ac8-4f30-9117-2d1f0ebdc58a", 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "The scikit-learn version is 1.0.\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "print('The scikit-learn version is {}.'.format(sklearn.__version__))" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 24, 45 | "id": "28fa4755-4aa6-4b8a-ad54-879cbb668093", 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from datetime import datetime\n", 50 | "\n", 51 | "TIMESTAMP = str(datetime.now())\n", 52 | "BUCKET_NAME = \"automl-output-mlops\"\n", 53 | "MODEL_FILENAME = \"model.pkl\"\n", 54 | "CSV_FILENAME = \"test.csv\"\n", 55 | "LOCATION = \"us-central1\"\n", 56 | "REGION = LOCATION\n", 57 | "#pre-built containers\n", 58 | "DOCKER_IMAGE_URI = \"us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest\" # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers\n", 59 | "\n", 60 | "#TODO: change to actual project\n", 61 | "PROJECT_ID = \"mlops-demos-306914\"" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 13, 67 | "id": "c966fddc-50f7-4cfe-8a53-73a3f7d8df0e", 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "(150, 4)\n", 75 | "(150,)\n" 76 | ] 77 | } 78 | ], 79 | "source": [ 80 | "iris_data = load_iris(as_frame=True)\n", 81 | "df = pd.DataFrame(data=iris_data.data)\n", 82 | "df.to_csv(CSV_FILENAME, index=False)\n", 83 | "\n", 84 | "data = iris_data.data\n", 85 | "labels = iris_data.target\n", 86 | "print(data.shape)\n", 87 | "print(labels.shape)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 4, 93 | "id": "7ecf79cf-3235-46c1-96bf-49c24f0b5c96", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=13)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "id": "53d20366-6500-4ca7-b573-3c9603b9733d", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "Pipeline(steps=[('std_scaler', StandardScaler()),\n", 110 | " ('gbtrees', GradientBoostingClassifier())])" 111 | ] 112 | }, 113 | "execution_count": 5, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "clf = Pipeline([\n", 120 | " \n", 121 | " # Scaler\n", 122 | " ('std_scaler', StandardScaler()),\n", 123 | " \n", 124 | " # Classifier\n", 125 | " ('gbtrees', GradientBoostingClassifier())\n", 126 | "\n", 127 | "])\n", 128 | "clf.fit(X=x_train, y=y_train)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 6, 134 | "id": "a94f3f55-50c3-4243-a7d0-8310f14598cf", 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "Model train accuracy:1.0\n", 142 | "Model test accuracy:0.9\n" 143 | ] 144 | } 145 | ], 146 | "source": [ 147 | "print(f'Model train accuracy:{accuracy_score(y_train, clf.predict(x_train))}')\n", 148 | "print(f'Model test accuracy:{accuracy_score(y_test, clf.predict(x_test))}')" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 8, 154 | "id": "6a43b454-35fc-4b41-b50f-7665bd72235d", 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "pickle.dump(clf, open(MODEL_FILENAME, \"wb\"))" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 9, 164 | "id": "d4da1272-954e-4f35-bfcc-c5727338ce8c", 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | "0.9\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "loaded_model = pickle.load(open(MODEL_FILENAME, 'rb'))\n", 177 | "result = loaded_model.score(x_test, y_test)\n", 178 | "print(result)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 10, 184 | "id": "bb09b263-fb9a-4d6a-bae6-9eb6f44d6006", 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "#https://cloud.google.com/storage/docs/uploading-objects\n", 189 | "from google.cloud import storage\n", 190 | "\n", 191 | "def upload_blob(bucket_name, source_file_name, destination_blob_name):\n", 192 | "\n", 193 | " storage_client = storage.Client()\n", 194 | " bucket = storage_client.bucket(bucket_name)\n", 195 | " blob = bucket.blob(destination_blob_name)\n", 196 | "\n", 197 | " blob.upload_from_filename(source_file_name)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 11, 203 | "id": "77eb4fab-f4f3-4d9f-adf1-0b229092f7d3", 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "upload_blob(bucket_name=BUCKET_NAME, source_file_name=MODEL_FILENAME, destination_blob_name=MODEL_FILENAME)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 14, 213 | "id": "52d0190e-64d0-47ba-9aa8-b5fb279a5675", 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "upload_blob(bucket_name=BUCKET_NAME, source_file_name=CSV_FILENAME, destination_blob_name=CSV_FILENAME)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "id": "9cd15b58-d90d-4628-8999-a4483370a838", 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "#https://cloud.google.com/vertex-ai/docs/model-registry/import-model#pre-built-container\n", 228 | "#https://github.com/googleapis/python-aiplatform/blob/HEAD/samples/model-builder/upload_model_sample.py\n", 229 | "from google.cloud import aiplatform\n", 230 | "\n", 231 | "aiplatform.init(project=PROJECT_ID, location=LOCATION)\n", 232 | "\n", 233 | "model = aiplatform.Model.upload(\n", 234 | " display_name=TIMESTAMP,\n", 235 | " artifact_uri=\"gs://\"+BUCKET_NAME+\"/\",\n", 236 | " serving_container_image_uri=DOCKER_IMAGE_URI)\n", 237 | "\n", 238 | "model.wait()\n", 239 | "\n", 240 | "print(model.display_name)\n", 241 | "print(model.resource_name)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 26, 247 | "id": "c5e145c0-09e6-4cdd-a218-fa7d0d21b56e", 248 | "metadata": { 249 | "tags": [ 250 | "parameters" 251 | ] 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "learning_rate=0.01" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 27, 261 | "id": "91103c64-3c63-4337-8cfb-ab0e9bd01abe", 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "0.01\n" 269 | ] 270 | } 271 | ], 272 | "source": [ 273 | "print(str(learning_rate))" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 17, 279 | "id": "fcba180d-78d1-4d36-bfcd-bc14dd67ecfc", 280 | "metadata": { 281 | "tags": [] 282 | }, 283 | "outputs": [ 284 | { 285 | "name": "stdout", 286 | "output_type": "stream", 287 | "text": [ 288 | "gs://automl-output-mlops/test.csv\n", 289 | "Creating BatchPredictionJob\n", 290 | "BatchPredictionJob created. Resource name: projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888\n", 291 | "To use this BatchPredictionJob in another session:\n", 292 | "bpj = aiplatform.BatchPredictionJob('projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888')\n", 293 | "View Batch Prediction Job:\n", 294 | "https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/8561588357602213888?project=983707479002\n", 295 | "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n", 296 | "JobState.JOB_STATE_RUNNING\n", 297 | "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n", 298 | "JobState.JOB_STATE_RUNNING\n", 299 | "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n", 300 | "JobState.JOB_STATE_RUNNING\n", 301 | "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n", 302 | "JobState.JOB_STATE_RUNNING\n", 303 | "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n", 304 | "JobState.JOB_STATE_RUNNING\n", 305 | "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n", 306 | "JobState.JOB_STATE_RUNNING\n", 307 | "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n", 308 | "JobState.JOB_STATE_RUNNING\n", 309 | "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n", 310 | "JobState.JOB_STATE_RUNNING\n", 311 | "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n", 312 | "JobState.JOB_STATE_RUNNING\n", 313 | "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n", 314 | "JobState.JOB_STATE_SUCCEEDED\n", 315 | "BatchPredictionJob run completed. Resource name: projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888\n", 316 | "test-3\n", 317 | "projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888\n", 318 | "JobState.JOB_STATE_SUCCEEDED\n" 319 | ] 320 | } 321 | ], 322 | "source": [ 323 | "#https://github.com/googleapis/python-aiplatform/blob/2bc9b2b0d048c29ba43c8b4c3ea51370515d08c3/samples/model-builder/create_batch_prediction_job_sample.py\n", 324 | "from google.cloud import aiplatform\n", 325 | "\n", 326 | "#No need to re-init but I wanted the cells to be standalone\n", 327 | "aiplatform.init(project=PROJECT_ID, location=LOCATION)\n", 328 | "\n", 329 | "#TODO: add monitoring\n", 330 | "#https://cloud.google.com/vertex-ai/docs/model-monitoring/model-monitoring-batch-predictions#console\n", 331 | "#https://colab.sandbox.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_monitoring/batch_prediction_model_monitoring.ipynb\n", 332 | "my_model = aiplatform.Model(model.resource_name)\n", 333 | "source = \"gs://\"+BUCKET_NAME+\"/test.csv\"\n", 334 | "print(source)\n", 335 | "batch_prediction_job = my_model.batch_predict(\n", 336 | " job_display_name=TIMESTAMP,\n", 337 | " gcs_source=[source],\n", 338 | " gcs_destination_prefix=\"gs://\"+BUCKET_NAME,\n", 339 | " machine_type = \"n1-standard-32\",\n", 340 | " starting_replica_count=1,\n", 341 | " max_replica_count=2,\n", 342 | " sync=False,\n", 343 | " instances_format=\"csv\" #https://googleapis.dev/python/aiplatform/latest/aiplatform.html\n", 344 | ")\n", 345 | "\n", 346 | "batch_prediction_job.wait()\n", 347 | "\n", 348 | "print(batch_prediction_job.display_name)\n", 349 | "print(batch_prediction_job.resource_name)\n", 350 | "print(batch_prediction_job.state)" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "id": "bae9b75c-e462-4a75-9bde-7cace504bc11", 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "#download CSV from bucket\n", 361 | "#Write to database X" 362 | ] 363 | } 364 | ], 365 | "metadata": { 366 | "kernelspec": { 367 | "display_name": "Python (Local)", 368 | "language": "python", 369 | "name": "local-base" 370 | }, 371 | "language_info": { 372 | "codemirror_mode": { 373 | "name": "ipython", 374 | "version": 3 375 | }, 376 | "file_extension": ".py", 377 | "mimetype": "text/x-python", 378 | "name": "python", 379 | "nbconvert_exporter": "python", 380 | "pygments_lexer": "ipython3", 381 | "version": "3.7.12" 382 | } 383 | }, 384 | "nbformat": 4, 385 | "nbformat_minor": 5 386 | } 387 | -------------------------------------------------------------------------------- /Schedule_notebook_Vertex_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "5786316f-6f84-403a-b62d-cbf49fdeccd5", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "#To push the branch back to git you will probably need a personal access token\n", 11 | "#https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token\n", 12 | "\n", 13 | "#Before pushing to repo don't forget to right click the file on the left pane and git add" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "id": "afffe79e-d764-402c-ab98-92c7b9e19abc", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import tensorflow as tf\n", 24 | "import tensorflow_datasets as tfds\n", 25 | "import tensorflow_hub as hub" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "id": "b6b3f786-234b-492f-8c64-74aa8e88b257", 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "#Download data\n", 36 | "clientBQ = client(credentials=)\n", 37 | "dataframe =clientBQ.query() #/ snowflake / SQL\n", 38 | "\n", 39 | "#use this to train later on\n", 40 | "#https://cloud.google.com/vertex-ai/docs/training/understanding-training-service\n", 41 | "#Instead of using tfds.load you can download the dataset from anywhere else." 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 15, 47 | "id": "1bf0c3a6-e4e1-4c5b-a4f4-b391e8f6dfd9", 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "1000\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "data, info = tfds.load(name='cifar10', as_supervised=True, with_info=True)\n", 60 | "NUM_CLASSES = info.features['label'].num_classes\n", 61 | "DATASET_SIZE = info.splits['train'].num_examples\n", 62 | "print(DATASET_SIZE)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 16, 68 | "id": "3b55e542-3b83-4f2a-9c39-f14625ae3f75", 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "def preprocess_data(image, label):\n", 73 | " image = tf.image.resize(image, (300,300))\n", 74 | " return tf.cast(image, tf.float32) / 255., label" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 17, 80 | "id": "04d44b35-c9c1-4af3-9963-2437ec5c6353", 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "# Create train/validation splits\n", 85 | "\n", 86 | "# Shuffle dataset\n", 87 | "dataset = data['train'].shuffle(1000)\n", 88 | "\n", 89 | "train_split = 0.8\n", 90 | "val_split = 0.2\n", 91 | "train_size = int(train_split * DATASET_SIZE)\n", 92 | "val_size = int(val_split * DATASET_SIZE)\n", 93 | "\n", 94 | "train_data = dataset.take(train_size)\n", 95 | "train_data = train_data.map(preprocess_data)\n", 96 | "train_data = train_data.batch(64)\n", 97 | "\n", 98 | "validation_data = dataset.skip(train_size)\n", 99 | "validation_data = validation_data.map(preprocess_data)\n", 100 | "validation_data = validation_data.batch(64)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 18, 106 | "id": "2e0a1561-421f-4c6c-b39f-dff35f89320f", 107 | "metadata": { 108 | "tags": [ 109 | "parameters" 110 | ] 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "feature_extractor_model = \"inception_v3\"" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 19, 120 | "id": "f3d67a3a-e976-441e-afef-e566e8fb768a", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "tf_hub_uri = f\"https://tfhub.dev/google/imagenet/{feature_extractor_model}/feature_vector/5\"\n" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 20, 130 | "id": "a36fd04b-be39-45a2-9438-fa6ab92c9de6", 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "feature_extractor_layer = hub.KerasLayer(\n", 135 | " tf_hub_uri,\n", 136 | " trainable=False)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 21, 142 | "id": "d1dc6d6a-8bd5-4856-8ad3-8fedac38f146", 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "model = tf.keras.Sequential([\n", 147 | " feature_extractor_layer,\n", 148 | " tf.keras.layers.Dense(units=NUM_CLASSES)\n", 149 | "])" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "id": "d2be4d35-4cce-47c5-a4ac-8e3b484ce40f", 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stderr", 160 | "output_type": "stream", 161 | "text": [ 162 | "2022-09-12 08:24:21.613006: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.\n" 163 | ] 164 | }, 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "10/13 [======================>.......] - ETA: 21s - loss: 2.1005 - acc: 0.2922" 170 | ] 171 | }, 172 | { 173 | "name": "stderr", 174 | "output_type": "stream", 175 | "text": [ 176 | "2022-09-12 08:25:35.270372: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.\n" 177 | ] 178 | }, 179 | { 180 | "name": "stdout", 181 | "output_type": "stream", 182 | "text": [ 183 | "13/13 [==============================] - ETA: 0s - loss: 2.0105 - acc: 0.3375" 184 | ] 185 | } 186 | ], 187 | "source": [ 188 | "model.compile(\n", 189 | " optimizer=tf.keras.optimizers.Adam(),\n", 190 | " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", 191 | " metrics=['acc'])\n", 192 | "\n", 193 | "model.fit(train_data, validation_data=validation_data, epochs=1)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "id": "d967c497-f78f-4d4c-9913-74d370f811f0", 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "#this should create a local folder with all the assets\n", 204 | "#https://www.tensorflow.org/guide/keras/save_and_serialize\n", 205 | "model.save(\"my_model\")" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "id": "996e217a-568e-4d0f-b7e8-ff83187fbffa", 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "#https://cloud.google.com/storage/docs/uploading-objects\n", 216 | "from google.cloud import storage\n", 217 | "\n", 218 | "bucket_name = \"automl-output-mlops\"\n", 219 | "\n", 220 | "def upload_blob(bucket_name, source_file_name, destination_blob_name):\n", 221 | " \"\"\"Uploads a file to the bucket.\"\"\"\n", 222 | " # The ID of your GCS bucket\n", 223 | " # bucket_name = \"your-bucket-name\"\n", 224 | " # The path to your file to upload\n", 225 | " # source_file_name = \"local/path/to/file\"\n", 226 | " # The ID of your GCS object\n", 227 | " # destination_blob_name = \"storage-object-name\"\n", 228 | "\n", 229 | " storage_client = storage.Client()\n", 230 | " bucket = storage_client.bucket(bucket_name)\n", 231 | " blob = bucket.blob(destination_blob_name)\n", 232 | "\n", 233 | " blob.upload_from_filename(source_file_name)\n", 234 | "\n", 235 | " print(\n", 236 | " f\"File {source_file_name} uploaded to {destination_blob_name}.\"\n", 237 | " )\n", 238 | " \n", 239 | "#TODO: fill in the arguments to use the funciton above\n", 240 | "source_file = \"my_model/saved_model.pb\"\n", 241 | "dest_blob = \"saved_model.pb\"\n", 242 | "upload_blob(bucket_name, source_file, dest_blob)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "id": "7dff7917-5de1-4227-8b78-2cbdd8935639", 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "#https://cloud.google.com/vertex-ai/docs/model-registry/import-model#pre-built-container\n", 253 | "#https://github.com/googleapis/python-aiplatform/blob/HEAD/samples/model-builder/upload_model_sample.py\n", 254 | "\n", 255 | "from typing import Dict, Optional, Sequence\n", 256 | "\n", 257 | "from google.cloud import aiplatform\n", 258 | "\n", 259 | "PROJECT_ID = \"mlops-demos-306914\"\n", 260 | "\n", 261 | "aiplatform.init(project=PROJECT_ID)\n", 262 | " \n", 263 | "def upload_model_to_model_registry(\n", 264 | " project: str,\n", 265 | " display_name: str,\n", 266 | " serving_container_image_uri: str =\"us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-23:latest\", # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers\n", 267 | " location: str = \"us-central1\",\n", 268 | " artifact_uri: Optional[str] = None,\n", 269 | " serving_container_predict_route: Optional[str] = None,\n", 270 | " serving_container_health_route: Optional[str] = None,\n", 271 | " description: Optional[str] = None,\n", 272 | " serving_container_command: Optional[Sequence[str]] = None,\n", 273 | " serving_container_args: Optional[Sequence[str]] = None,\n", 274 | " serving_container_environment_variables: Optional[Dict[str, str]] = None,\n", 275 | " serving_container_ports: Optional[Sequence[int]] = None,\n", 276 | " instance_schema_uri: Optional[str] = None,\n", 277 | " parameters_schema_uri: Optional[str] = None,\n", 278 | " prediction_schema_uri: Optional[str] = None,\n", 279 | " explanation_metadata: Optional[explain.ExplanationMetadata] = None,\n", 280 | " explanation_parameters: Optional[explain.ExplanationParameters] = None,\n", 281 | " sync: bool = True,\n", 282 | "):\n", 283 | "\n", 284 | " aiplatform.init(project=project, location=location)\n", 285 | "\n", 286 | " model = aiplatform.Model.upload(\n", 287 | " display_name=display_name,\n", 288 | " artifact_uri=artifact_uri,\n", 289 | " serving_container_image_uri=serving_container_image_uri,\n", 290 | " serving_container_predict_route=serving_container_predict_route,\n", 291 | " serving_container_health_route=serving_container_health_route,\n", 292 | " instance_schema_uri=instance_schema_uri,\n", 293 | " parameters_schema_uri=parameters_schema_uri,\n", 294 | " prediction_schema_uri=prediction_schema_uri,\n", 295 | " description=description,\n", 296 | " serving_container_command=serving_container_command,\n", 297 | " serving_container_args=serving_container_args,\n", 298 | " serving_container_environment_variables=serving_container_environment_variables,\n", 299 | " serving_container_ports=serving_container_ports,\n", 300 | " explanation_metadata=explanation_metadata,\n", 301 | " explanation_parameters=explanation_parameters,\n", 302 | " sync=sync,\n", 303 | " )\n", 304 | "\n", 305 | " model.wait()\n", 306 | "\n", 307 | " print(model.display_name)\n", 308 | " print(model.resource_name)\n", 309 | " return model\n", 310 | "\n", 311 | "#TODO: fill in the arguments to use the funciton above\n", 312 | "upload_model_to_model_registry(project=PROJECT_ID,display_name=\"my_display_name\")" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "id": "39e9984f-5d75-4776-b99d-8cc4bcd53339", 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "#https://github.com/googleapis/python-aiplatform/blob/2bc9b2b0d048c29ba43c8b4c3ea51370515d08c3/samples/model-builder/create_batch_prediction_job_sample.py\n", 323 | "from typing import Sequence, Union\n", 324 | "\n", 325 | "from google.cloud import aiplatform\n", 326 | "\n", 327 | "def create_batch_prediction_job_sample(\n", 328 | " project: str,\n", 329 | " location: str,\n", 330 | " model_resource_name: str,\n", 331 | " job_display_name: str,\n", 332 | " gcs_source: Union[str, Sequence[str]],\n", 333 | " gcs_destination: str,\n", 334 | " sync: bool = True,\n", 335 | "):\n", 336 | " aiplatform.init(project=project, location=location)\n", 337 | "\n", 338 | " my_model = aiplatform.Model(model_resource_name)\n", 339 | "\n", 340 | " batch_prediction_job = my_model.batch_predict(\n", 341 | " job_display_name=job_display_name,\n", 342 | " gcs_source=gcs_source,\n", 343 | " gcs_destination_prefix=gcs_destination,\n", 344 | " sync=sync,\n", 345 | " )\n", 346 | "\n", 347 | " batch_prediction_job.wait()\n", 348 | "\n", 349 | " print(batch_prediction_job.display_name)\n", 350 | " print(batch_prediction_job.resource_name)\n", 351 | " print(batch_prediction_job.state)\n", 352 | " return batch_prediction_job\n", 353 | "\n", 354 | "create_batch_prediction_job_sample()" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": null, 360 | "id": "83312cd9-5fe2-416e-b6fc-4c7cd498bd94", 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "id": "1f7a9a5a-4c60-4b59-8c0c-a7623f1ae81b", 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "id": "38e1885c-133b-4cba-aed3-d11371e9914e", 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [] 380 | } 381 | ], 382 | "metadata": { 383 | "environment": { 384 | "kernel": "python3", 385 | "name": "managed-notebooks.m87", 386 | "type": "gcloud", 387 | "uri": "gcr.io/deeplearning-platform-release/tf2-gpu:latest" 388 | }, 389 | "kernelspec": { 390 | "display_name": "TensorFlow 2 (Local)", 391 | "language": "python", 392 | "name": "local-tf2" 393 | }, 394 | "language_info": { 395 | "codemirror_mode": { 396 | "name": "ipython", 397 | "version": 3 398 | }, 399 | "file_extension": ".py", 400 | "mimetype": "text/x-python", 401 | "name": "python", 402 | "nbconvert_exporter": "python", 403 | "pygments_lexer": "ipython3", 404 | "version": "3.7.12" 405 | } 406 | }, 407 | "nbformat": 4, 408 | "nbformat_minor": 5 409 | } 410 | --------------------------------------------------------------------------------