├── README.md
├── iris.csv
├── etl-Yarkoni.ipynb
└── Schedule_notebook_Vertex_training.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # Vertex-workbench-schedule-notebook
2 | 
3 | 1. Start a Vertex AI workbench notebook.
4 | 2. Git import this repo.
5 | 3. Execute "Schedule_notebook_Vertex_training.ipynb" using the UI.
6 | 


--------------------------------------------------------------------------------
/iris.csv:
--------------------------------------------------------------------------------
  1 | ,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
  2 | 0,5.1,3.5,1.4,0.2
  3 | 1,4.9,3.0,1.4,0.2
  4 | 2,4.7,3.2,1.3,0.2
  5 | 3,4.6,3.1,1.5,0.2
  6 | 4,5.0,3.6,1.4,0.2
  7 | 5,5.4,3.9,1.7,0.4
  8 | 6,4.6,3.4,1.4,0.3
  9 | 7,5.0,3.4,1.5,0.2
 10 | 8,4.4,2.9,1.4,0.2
 11 | 9,4.9,3.1,1.5,0.1
 12 | 10,5.4,3.7,1.5,0.2
 13 | 11,4.8,3.4,1.6,0.2
 14 | 12,4.8,3.0,1.4,0.1
 15 | 13,4.3,3.0,1.1,0.1
 16 | 14,5.8,4.0,1.2,0.2
 17 | 15,5.7,4.4,1.5,0.4
 18 | 16,5.4,3.9,1.3,0.4
 19 | 17,5.1,3.5,1.4,0.3
 20 | 18,5.7,3.8,1.7,0.3
 21 | 19,5.1,3.8,1.5,0.3
 22 | 20,5.4,3.4,1.7,0.2
 23 | 21,5.1,3.7,1.5,0.4
 24 | 22,4.6,3.6,1.0,0.2
 25 | 23,5.1,3.3,1.7,0.5
 26 | 24,4.8,3.4,1.9,0.2
 27 | 25,5.0,3.0,1.6,0.2
 28 | 26,5.0,3.4,1.6,0.4
 29 | 27,5.2,3.5,1.5,0.2
 30 | 28,5.2,3.4,1.4,0.2
 31 | 29,4.7,3.2,1.6,0.2
 32 | 30,4.8,3.1,1.6,0.2
 33 | 31,5.4,3.4,1.5,0.4
 34 | 32,5.2,4.1,1.5,0.1
 35 | 33,5.5,4.2,1.4,0.2
 36 | 34,4.9,3.1,1.5,0.2
 37 | 35,5.0,3.2,1.2,0.2
 38 | 36,5.5,3.5,1.3,0.2
 39 | 37,4.9,3.6,1.4,0.1
 40 | 38,4.4,3.0,1.3,0.2
 41 | 39,5.1,3.4,1.5,0.2
 42 | 40,5.0,3.5,1.3,0.3
 43 | 41,4.5,2.3,1.3,0.3
 44 | 42,4.4,3.2,1.3,0.2
 45 | 43,5.0,3.5,1.6,0.6
 46 | 44,5.1,3.8,1.9,0.4
 47 | 45,4.8,3.0,1.4,0.3
 48 | 46,5.1,3.8,1.6,0.2
 49 | 47,4.6,3.2,1.4,0.2
 50 | 48,5.3,3.7,1.5,0.2
 51 | 49,5.0,3.3,1.4,0.2
 52 | 50,7.0,3.2,4.7,1.4
 53 | 51,6.4,3.2,4.5,1.5
 54 | 52,6.9,3.1,4.9,1.5
 55 | 53,5.5,2.3,4.0,1.3
 56 | 54,6.5,2.8,4.6,1.5
 57 | 55,5.7,2.8,4.5,1.3
 58 | 56,6.3,3.3,4.7,1.6
 59 | 57,4.9,2.4,3.3,1.0
 60 | 58,6.6,2.9,4.6,1.3
 61 | 59,5.2,2.7,3.9,1.4
 62 | 60,5.0,2.0,3.5,1.0
 63 | 61,5.9,3.0,4.2,1.5
 64 | 62,6.0,2.2,4.0,1.0
 65 | 63,6.1,2.9,4.7,1.4
 66 | 64,5.6,2.9,3.6,1.3
 67 | 65,6.7,3.1,4.4,1.4
 68 | 66,5.6,3.0,4.5,1.5
 69 | 67,5.8,2.7,4.1,1.0
 70 | 68,6.2,2.2,4.5,1.5
 71 | 69,5.6,2.5,3.9,1.1
 72 | 70,5.9,3.2,4.8,1.8
 73 | 71,6.1,2.8,4.0,1.3
 74 | 72,6.3,2.5,4.9,1.5
 75 | 73,6.1,2.8,4.7,1.2
 76 | 74,6.4,2.9,4.3,1.3
 77 | 75,6.6,3.0,4.4,1.4
 78 | 76,6.8,2.8,4.8,1.4
 79 | 77,6.7,3.0,5.0,1.7
 80 | 78,6.0,2.9,4.5,1.5
 81 | 79,5.7,2.6,3.5,1.0
 82 | 80,5.5,2.4,3.8,1.1
 83 | 81,5.5,2.4,3.7,1.0
 84 | 82,5.8,2.7,3.9,1.2
 85 | 83,6.0,2.7,5.1,1.6
 86 | 84,5.4,3.0,4.5,1.5
 87 | 85,6.0,3.4,4.5,1.6
 88 | 86,6.7,3.1,4.7,1.5
 89 | 87,6.3,2.3,4.4,1.3
 90 | 88,5.6,3.0,4.1,1.3
 91 | 89,5.5,2.5,4.0,1.3
 92 | 90,5.5,2.6,4.4,1.2
 93 | 91,6.1,3.0,4.6,1.4
 94 | 92,5.8,2.6,4.0,1.2
 95 | 93,5.0,2.3,3.3,1.0
 96 | 94,5.6,2.7,4.2,1.3
 97 | 95,5.7,3.0,4.2,1.2
 98 | 96,5.7,2.9,4.2,1.3
 99 | 97,6.2,2.9,4.3,1.3
100 | 98,5.1,2.5,3.0,1.1
101 | 99,5.7,2.8,4.1,1.3
102 | 100,6.3,3.3,6.0,2.5
103 | 101,5.8,2.7,5.1,1.9
104 | 102,7.1,3.0,5.9,2.1
105 | 103,6.3,2.9,5.6,1.8
106 | 104,6.5,3.0,5.8,2.2
107 | 105,7.6,3.0,6.6,2.1
108 | 106,4.9,2.5,4.5,1.7
109 | 107,7.3,2.9,6.3,1.8
110 | 108,6.7,2.5,5.8,1.8
111 | 109,7.2,3.6,6.1,2.5
112 | 110,6.5,3.2,5.1,2.0
113 | 111,6.4,2.7,5.3,1.9
114 | 112,6.8,3.0,5.5,2.1
115 | 113,5.7,2.5,5.0,2.0
116 | 114,5.8,2.8,5.1,2.4
117 | 115,6.4,3.2,5.3,2.3
118 | 116,6.5,3.0,5.5,1.8
119 | 117,7.7,3.8,6.7,2.2
120 | 118,7.7,2.6,6.9,2.3
121 | 119,6.0,2.2,5.0,1.5
122 | 120,6.9,3.2,5.7,2.3
123 | 121,5.6,2.8,4.9,2.0
124 | 122,7.7,2.8,6.7,2.0
125 | 123,6.3,2.7,4.9,1.8
126 | 124,6.7,3.3,5.7,2.1
127 | 125,7.2,3.2,6.0,1.8
128 | 126,6.2,2.8,4.8,1.8
129 | 127,6.1,3.0,4.9,1.8
130 | 128,6.4,2.8,5.6,2.1
131 | 129,7.2,3.0,5.8,1.6
132 | 130,7.4,2.8,6.1,1.9
133 | 131,7.9,3.8,6.4,2.0
134 | 132,6.4,2.8,5.6,2.2
135 | 133,6.3,2.8,5.1,1.5
136 | 134,6.1,2.6,5.6,1.4
137 | 135,7.7,3.0,6.1,2.3
138 | 136,6.3,3.4,5.6,2.4
139 | 137,6.4,3.1,5.5,1.8
140 | 138,6.0,3.0,4.8,1.8
141 | 139,6.9,3.1,5.4,2.1
142 | 140,6.7,3.1,5.6,2.4
143 | 141,6.9,3.1,5.1,2.3
144 | 142,5.8,2.7,5.1,1.9
145 | 143,6.8,3.2,5.9,2.3
146 | 144,6.7,3.3,5.7,2.5
147 | 145,6.7,3.0,5.2,2.3
148 | 146,6.3,2.5,5.0,1.9
149 | 147,6.5,3.0,5.2,2.0
150 | 148,6.2,3.4,5.4,2.3
151 | 149,5.9,3.0,5.1,1.8
152 | 


--------------------------------------------------------------------------------
/etl-Yarkoni.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "c3ab7921-a2d2-47bd-91c3-8a7dec41af17",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import pandas as pd\n",
 11 |     "import numpy as np\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "import pickle\n",
 14 |     "import sklearn\n",
 15 |     "\n",
 16 |     "from sklearn.datasets import load_iris\n",
 17 |     "from sklearn.model_selection import train_test_split\n",
 18 |     "from sklearn.preprocessing import StandardScaler\n",
 19 |     "from sklearn.pipeline import Pipeline\n",
 20 |     "from sklearn.ensemble import GradientBoostingClassifier\n",
 21 |     "from sklearn.metrics import accuracy_score"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "id": "ae3a75ed-4ac8-4f30-9117-2d1f0ebdc58a",
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "name": "stdout",
 32 |      "output_type": "stream",
 33 |      "text": [
 34 |       "The scikit-learn version is 1.0.\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "print('The scikit-learn version is {}.'.format(sklearn.__version__))"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 24,
 45 |    "id": "28fa4755-4aa6-4b8a-ad54-879cbb668093",
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "from datetime import datetime\n",
 50 |     "\n",
 51 |     "TIMESTAMP = str(datetime.now())\n",
 52 |     "BUCKET_NAME = \"automl-output-mlops\"\n",
 53 |     "MODEL_FILENAME = \"model.pkl\"\n",
 54 |     "CSV_FILENAME = \"test.csv\"\n",
 55 |     "LOCATION = \"us-central1\"\n",
 56 |     "REGION = LOCATION\n",
 57 |     "#pre-built containers\n",
 58 |     "DOCKER_IMAGE_URI = \"us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest\" # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers\n",
 59 |     "\n",
 60 |     "#TODO: change to actual project\n",
 61 |     "PROJECT_ID = \"mlops-demos-306914\""
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 13,
 67 |    "id": "c966fddc-50f7-4cfe-8a53-73a3f7d8df0e",
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "(150, 4)\n",
 75 |       "(150,)\n"
 76 |      ]
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "iris_data = load_iris(as_frame=True)\n",
 81 |     "df = pd.DataFrame(data=iris_data.data)\n",
 82 |     "df.to_csv(CSV_FILENAME, index=False)\n",
 83 |     "\n",
 84 |     "data = iris_data.data\n",
 85 |     "labels = iris_data.target\n",
 86 |     "print(data.shape)\n",
 87 |     "print(labels.shape)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 4,
 93 |    "id": "7ecf79cf-3235-46c1-96bf-49c24f0b5c96",
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=13)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 5,
103 |    "id": "53d20366-6500-4ca7-b573-3c9603b9733d",
104 |    "metadata": {},
105 |    "outputs": [
106 |     {
107 |      "data": {
108 |       "text/plain": [
109 |        "Pipeline(steps=[('std_scaler', StandardScaler()),\n",
110 |        "                ('gbtrees', GradientBoostingClassifier())])"
111 |       ]
112 |      },
113 |      "execution_count": 5,
114 |      "metadata": {},
115 |      "output_type": "execute_result"
116 |     }
117 |    ],
118 |    "source": [
119 |     "clf = Pipeline([\n",
120 |     "    \n",
121 |     "    # Scaler\n",
122 |     "    ('std_scaler', StandardScaler()),\n",
123 |     "    \n",
124 |     "    # Classifier\n",
125 |     "    ('gbtrees', GradientBoostingClassifier())\n",
126 |     "\n",
127 |     "])\n",
128 |     "clf.fit(X=x_train, y=y_train)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 6,
134 |    "id": "a94f3f55-50c3-4243-a7d0-8310f14598cf",
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "Model train accuracy:1.0\n",
142 |       "Model test accuracy:0.9\n"
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "print(f'Model train accuracy:{accuracy_score(y_train, clf.predict(x_train))}')\n",
148 |     "print(f'Model test accuracy:{accuracy_score(y_test, clf.predict(x_test))}')"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 8,
154 |    "id": "6a43b454-35fc-4b41-b50f-7665bd72235d",
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "pickle.dump(clf, open(MODEL_FILENAME, \"wb\"))"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 9,
164 |    "id": "d4da1272-954e-4f35-bfcc-c5727338ce8c",
165 |    "metadata": {},
166 |    "outputs": [
167 |     {
168 |      "name": "stdout",
169 |      "output_type": "stream",
170 |      "text": [
171 |       "0.9\n"
172 |      ]
173 |     }
174 |    ],
175 |    "source": [
176 |     "loaded_model = pickle.load(open(MODEL_FILENAME, 'rb'))\n",
177 |     "result = loaded_model.score(x_test, y_test)\n",
178 |     "print(result)"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 10,
184 |    "id": "bb09b263-fb9a-4d6a-bae6-9eb6f44d6006",
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "#https://cloud.google.com/storage/docs/uploading-objects\n",
189 |     "from google.cloud import storage\n",
190 |     "\n",
191 |     "def upload_blob(bucket_name, source_file_name, destination_blob_name):\n",
192 |     "\n",
193 |     "    storage_client = storage.Client()\n",
194 |     "    bucket = storage_client.bucket(bucket_name)\n",
195 |     "    blob = bucket.blob(destination_blob_name)\n",
196 |     "\n",
197 |     "    blob.upload_from_filename(source_file_name)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 11,
203 |    "id": "77eb4fab-f4f3-4d9f-adf1-0b229092f7d3",
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "upload_blob(bucket_name=BUCKET_NAME, source_file_name=MODEL_FILENAME, destination_blob_name=MODEL_FILENAME)"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 14,
213 |    "id": "52d0190e-64d0-47ba-9aa8-b5fb279a5675",
214 |    "metadata": {},
215 |    "outputs": [],
216 |    "source": [
217 |     "upload_blob(bucket_name=BUCKET_NAME, source_file_name=CSV_FILENAME, destination_blob_name=CSV_FILENAME)"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "id": "9cd15b58-d90d-4628-8999-a4483370a838",
224 |    "metadata": {},
225 |    "outputs": [],
226 |    "source": [
227 |     "#https://cloud.google.com/vertex-ai/docs/model-registry/import-model#pre-built-container\n",
228 |     "#https://github.com/googleapis/python-aiplatform/blob/HEAD/samples/model-builder/upload_model_sample.py\n",
229 |     "from google.cloud import aiplatform\n",
230 |     "\n",
231 |     "aiplatform.init(project=PROJECT_ID, location=LOCATION)\n",
232 |     "\n",
233 |     "model = aiplatform.Model.upload(\n",
234 |     "        display_name=TIMESTAMP,\n",
235 |     "        artifact_uri=\"gs://\"+BUCKET_NAME+\"/\",\n",
236 |     "        serving_container_image_uri=DOCKER_IMAGE_URI)\n",
237 |     "\n",
238 |     "model.wait()\n",
239 |     "\n",
240 |     "print(model.display_name)\n",
241 |     "print(model.resource_name)"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 26,
247 |    "id": "c5e145c0-09e6-4cdd-a218-fa7d0d21b56e",
248 |    "metadata": {
249 |     "tags": [
250 |      "parameters"
251 |     ]
252 |    },
253 |    "outputs": [],
254 |    "source": [
255 |     "learning_rate=0.01"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 27,
261 |    "id": "91103c64-3c63-4337-8cfb-ab0e9bd01abe",
262 |    "metadata": {},
263 |    "outputs": [
264 |     {
265 |      "name": "stdout",
266 |      "output_type": "stream",
267 |      "text": [
268 |       "0.01\n"
269 |      ]
270 |     }
271 |    ],
272 |    "source": [
273 |     "print(str(learning_rate))"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 17,
279 |    "id": "fcba180d-78d1-4d36-bfcd-bc14dd67ecfc",
280 |    "metadata": {
281 |     "tags": []
282 |    },
283 |    "outputs": [
284 |     {
285 |      "name": "stdout",
286 |      "output_type": "stream",
287 |      "text": [
288 |       "gs://automl-output-mlops/test.csv\n",
289 |       "Creating BatchPredictionJob\n",
290 |       "BatchPredictionJob created. Resource name: projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888\n",
291 |       "To use this BatchPredictionJob in another session:\n",
292 |       "bpj = aiplatform.BatchPredictionJob('projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888')\n",
293 |       "View Batch Prediction Job:\n",
294 |       "https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/8561588357602213888?project=983707479002\n",
295 |       "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n",
296 |       "JobState.JOB_STATE_RUNNING\n",
297 |       "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n",
298 |       "JobState.JOB_STATE_RUNNING\n",
299 |       "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n",
300 |       "JobState.JOB_STATE_RUNNING\n",
301 |       "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n",
302 |       "JobState.JOB_STATE_RUNNING\n",
303 |       "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n",
304 |       "JobState.JOB_STATE_RUNNING\n",
305 |       "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n",
306 |       "JobState.JOB_STATE_RUNNING\n",
307 |       "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n",
308 |       "JobState.JOB_STATE_RUNNING\n",
309 |       "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n",
310 |       "JobState.JOB_STATE_RUNNING\n",
311 |       "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n",
312 |       "JobState.JOB_STATE_RUNNING\n",
313 |       "BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:\n",
314 |       "JobState.JOB_STATE_SUCCEEDED\n",
315 |       "BatchPredictionJob run completed. Resource name: projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888\n",
316 |       "test-3\n",
317 |       "projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888\n",
318 |       "JobState.JOB_STATE_SUCCEEDED\n"
319 |      ]
320 |     }
321 |    ],
322 |    "source": [
323 |     "#https://github.com/googleapis/python-aiplatform/blob/2bc9b2b0d048c29ba43c8b4c3ea51370515d08c3/samples/model-builder/create_batch_prediction_job_sample.py\n",
324 |     "from google.cloud import aiplatform\n",
325 |     "\n",
326 |     "#No need to re-init but I wanted the cells to be standalone\n",
327 |     "aiplatform.init(project=PROJECT_ID, location=LOCATION)\n",
328 |     "\n",
329 |     "#TODO: add monitoring\n",
330 |     "#https://cloud.google.com/vertex-ai/docs/model-monitoring/model-monitoring-batch-predictions#console\n",
331 |     "#https://colab.sandbox.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_monitoring/batch_prediction_model_monitoring.ipynb\n",
332 |     "my_model = aiplatform.Model(model.resource_name)\n",
333 |     "source = \"gs://\"+BUCKET_NAME+\"/test.csv\"\n",
334 |     "print(source)\n",
335 |     "batch_prediction_job = my_model.batch_predict(\n",
336 |     "        job_display_name=TIMESTAMP,\n",
337 |     "        gcs_source=[source],\n",
338 |     "        gcs_destination_prefix=\"gs://\"+BUCKET_NAME,\n",
339 |     "        machine_type = \"n1-standard-32\",\n",
340 |     "        starting_replica_count=1,\n",
341 |     "        max_replica_count=2,\n",
342 |     "        sync=False,\n",
343 |     "        instances_format=\"csv\" #https://googleapis.dev/python/aiplatform/latest/aiplatform.html\n",
344 |     ")\n",
345 |     "\n",
346 |     "batch_prediction_job.wait()\n",
347 |     "\n",
348 |     "print(batch_prediction_job.display_name)\n",
349 |     "print(batch_prediction_job.resource_name)\n",
350 |     "print(batch_prediction_job.state)"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": null,
356 |    "id": "bae9b75c-e462-4a75-9bde-7cace504bc11",
357 |    "metadata": {},
358 |    "outputs": [],
359 |    "source": [
360 |     "#download CSV from bucket\n",
361 |     "#Write to database X"
362 |    ]
363 |   }
364 |  ],
365 |  "metadata": {
366 |   "kernelspec": {
367 |    "display_name": "Python (Local)",
368 |    "language": "python",
369 |    "name": "local-base"
370 |   },
371 |   "language_info": {
372 |    "codemirror_mode": {
373 |     "name": "ipython",
374 |     "version": 3
375 |    },
376 |    "file_extension": ".py",
377 |    "mimetype": "text/x-python",
378 |    "name": "python",
379 |    "nbconvert_exporter": "python",
380 |    "pygments_lexer": "ipython3",
381 |    "version": "3.7.12"
382 |   }
383 |  },
384 |  "nbformat": 4,
385 |  "nbformat_minor": 5
386 | }
387 | 


--------------------------------------------------------------------------------
/Schedule_notebook_Vertex_training.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "5786316f-6f84-403a-b62d-cbf49fdeccd5",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "#To push the branch back to git you will probably need a personal access token\n",
 11 |     "#https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token\n",
 12 |     "\n",
 13 |     "#Before pushing to repo don't forget to right click the file on the left pane and git add"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "id": "afffe79e-d764-402c-ab98-92c7b9e19abc",
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import tensorflow as tf\n",
 24 |     "import tensorflow_datasets as tfds\n",
 25 |     "import tensorflow_hub as hub"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "id": "b6b3f786-234b-492f-8c64-74aa8e88b257",
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "#Download data\n",
 36 |     "clientBQ = client(credentials=)\n",
 37 |     "dataframe =clientBQ.query() #/ snowflake / SQL\n",
 38 |     "\n",
 39 |     "#use this to train later on\n",
 40 |     "#https://cloud.google.com/vertex-ai/docs/training/understanding-training-service\n",
 41 |     "#Instead of using tfds.load you can download the dataset from anywhere else."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 15,
 47 |    "id": "1bf0c3a6-e4e1-4c5b-a4f4-b391e8f6dfd9",
 48 |    "metadata": {},
 49 |    "outputs": [
 50 |     {
 51 |      "name": "stdout",
 52 |      "output_type": "stream",
 53 |      "text": [
 54 |       "1000\n"
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "data, info = tfds.load(name='cifar10', as_supervised=True, with_info=True)\n",
 60 |     "NUM_CLASSES = info.features['label'].num_classes\n",
 61 |     "DATASET_SIZE = info.splits['train'].num_examples\n",
 62 |     "print(DATASET_SIZE)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 16,
 68 |    "id": "3b55e542-3b83-4f2a-9c39-f14625ae3f75",
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "def preprocess_data(image, label):\n",
 73 |     "  image = tf.image.resize(image, (300,300))\n",
 74 |     "  return tf.cast(image, tf.float32) / 255., label"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 17,
 80 |    "id": "04d44b35-c9c1-4af3-9963-2437ec5c6353",
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "# Create train/validation splits\n",
 85 |     "\n",
 86 |     "# Shuffle dataset\n",
 87 |     "dataset = data['train'].shuffle(1000)\n",
 88 |     "\n",
 89 |     "train_split = 0.8\n",
 90 |     "val_split = 0.2\n",
 91 |     "train_size = int(train_split * DATASET_SIZE)\n",
 92 |     "val_size = int(val_split * DATASET_SIZE)\n",
 93 |     "\n",
 94 |     "train_data = dataset.take(train_size)\n",
 95 |     "train_data  = train_data.map(preprocess_data)\n",
 96 |     "train_data  = train_data.batch(64)\n",
 97 |     "\n",
 98 |     "validation_data = dataset.skip(train_size)\n",
 99 |     "validation_data  = validation_data.map(preprocess_data)\n",
100 |     "validation_data  = validation_data.batch(64)"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 18,
106 |    "id": "2e0a1561-421f-4c6c-b39f-dff35f89320f",
107 |    "metadata": {
108 |     "tags": [
109 |      "parameters"
110 |     ]
111 |    },
112 |    "outputs": [],
113 |    "source": [
114 |     "feature_extractor_model = \"inception_v3\""
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 19,
120 |    "id": "f3d67a3a-e976-441e-afef-e566e8fb768a",
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "tf_hub_uri = f\"https://tfhub.dev/google/imagenet/{feature_extractor_model}/feature_vector/5\"\n"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 20,
130 |    "id": "a36fd04b-be39-45a2-9438-fa6ab92c9de6",
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "feature_extractor_layer = hub.KerasLayer(\n",
135 |     "    tf_hub_uri,\n",
136 |     "    trainable=False)"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 21,
142 |    "id": "d1dc6d6a-8bd5-4856-8ad3-8fedac38f146",
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "model = tf.keras.Sequential([\n",
147 |     "  feature_extractor_layer,\n",
148 |     "  tf.keras.layers.Dense(units=NUM_CLASSES)\n",
149 |     "])"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "id": "d2be4d35-4cce-47c5-a4ac-8e3b484ce40f",
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "name": "stderr",
160 |      "output_type": "stream",
161 |      "text": [
162 |       "2022-09-12 08:24:21.613006: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.\n"
163 |      ]
164 |     },
165 |     {
166 |      "name": "stdout",
167 |      "output_type": "stream",
168 |      "text": [
169 |       "10/13 [======================>.......] - ETA: 21s - loss: 2.1005 - acc: 0.2922"
170 |      ]
171 |     },
172 |     {
173 |      "name": "stderr",
174 |      "output_type": "stream",
175 |      "text": [
176 |       "2022-09-12 08:25:35.270372: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.\n"
177 |      ]
178 |     },
179 |     {
180 |      "name": "stdout",
181 |      "output_type": "stream",
182 |      "text": [
183 |       "13/13 [==============================] - ETA: 0s - loss: 2.0105 - acc: 0.3375"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "model.compile(\n",
189 |     "  optimizer=tf.keras.optimizers.Adam(),\n",
190 |     "  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
191 |     "  metrics=['acc'])\n",
192 |     "\n",
193 |     "model.fit(train_data, validation_data=validation_data, epochs=1)"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "id": "d967c497-f78f-4d4c-9913-74d370f811f0",
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": [
203 |     "#this should create a local folder with all the assets\n",
204 |     "#https://www.tensorflow.org/guide/keras/save_and_serialize\n",
205 |     "model.save(\"my_model\")"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "id": "996e217a-568e-4d0f-b7e8-ff83187fbffa",
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "#https://cloud.google.com/storage/docs/uploading-objects\n",
216 |     "from google.cloud import storage\n",
217 |     "\n",
218 |     "bucket_name = \"automl-output-mlops\"\n",
219 |     "\n",
220 |     "def upload_blob(bucket_name, source_file_name, destination_blob_name):\n",
221 |     "    \"\"\"Uploads a file to the bucket.\"\"\"\n",
222 |     "    # The ID of your GCS bucket\n",
223 |     "    # bucket_name = \"your-bucket-name\"\n",
224 |     "    # The path to your file to upload\n",
225 |     "    # source_file_name = \"local/path/to/file\"\n",
226 |     "    # The ID of your GCS object\n",
227 |     "    # destination_blob_name = \"storage-object-name\"\n",
228 |     "\n",
229 |     "    storage_client = storage.Client()\n",
230 |     "    bucket = storage_client.bucket(bucket_name)\n",
231 |     "    blob = bucket.blob(destination_blob_name)\n",
232 |     "\n",
233 |     "    blob.upload_from_filename(source_file_name)\n",
234 |     "\n",
235 |     "    print(\n",
236 |     "        f\"File {source_file_name} uploaded to {destination_blob_name}.\"\n",
237 |     "    )\n",
238 |     "    \n",
239 |     "#TODO: fill in the arguments to use the funciton above\n",
240 |     "source_file = \"my_model/saved_model.pb\"\n",
241 |     "dest_blob = \"saved_model.pb\"\n",
242 |     "upload_blob(bucket_name, source_file, dest_blob)"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": null,
248 |    "id": "7dff7917-5de1-4227-8b78-2cbdd8935639",
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": [
252 |     "#https://cloud.google.com/vertex-ai/docs/model-registry/import-model#pre-built-container\n",
253 |     "#https://github.com/googleapis/python-aiplatform/blob/HEAD/samples/model-builder/upload_model_sample.py\n",
254 |     "\n",
255 |     "from typing import Dict, Optional, Sequence\n",
256 |     "\n",
257 |     "from google.cloud import aiplatform\n",
258 |     "\n",
259 |     "PROJECT_ID = \"mlops-demos-306914\"\n",
260 |     "\n",
261 |     "aiplatform.init(project=PROJECT_ID)\n",
262 |     "            \n",
263 |     "def upload_model_to_model_registry(\n",
264 |     "    project: str,\n",
265 |     "    display_name: str,\n",
266 |     "    serving_container_image_uri: str =\"us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-23:latest\", # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers\n",
267 |     "    location: str = \"us-central1\",\n",
268 |     "    artifact_uri: Optional[str] = None,\n",
269 |     "    serving_container_predict_route: Optional[str] = None,\n",
270 |     "    serving_container_health_route: Optional[str] = None,\n",
271 |     "    description: Optional[str] = None,\n",
272 |     "    serving_container_command: Optional[Sequence[str]] = None,\n",
273 |     "    serving_container_args: Optional[Sequence[str]] = None,\n",
274 |     "    serving_container_environment_variables: Optional[Dict[str, str]] = None,\n",
275 |     "    serving_container_ports: Optional[Sequence[int]] = None,\n",
276 |     "    instance_schema_uri: Optional[str] = None,\n",
277 |     "    parameters_schema_uri: Optional[str] = None,\n",
278 |     "    prediction_schema_uri: Optional[str] = None,\n",
279 |     "    explanation_metadata: Optional[explain.ExplanationMetadata] = None,\n",
280 |     "    explanation_parameters: Optional[explain.ExplanationParameters] = None,\n",
281 |     "    sync: bool = True,\n",
282 |     "):\n",
283 |     "\n",
284 |     "    aiplatform.init(project=project, location=location)\n",
285 |     "\n",
286 |     "    model = aiplatform.Model.upload(\n",
287 |     "        display_name=display_name,\n",
288 |     "        artifact_uri=artifact_uri,\n",
289 |     "        serving_container_image_uri=serving_container_image_uri,\n",
290 |     "        serving_container_predict_route=serving_container_predict_route,\n",
291 |     "        serving_container_health_route=serving_container_health_route,\n",
292 |     "        instance_schema_uri=instance_schema_uri,\n",
293 |     "        parameters_schema_uri=parameters_schema_uri,\n",
294 |     "        prediction_schema_uri=prediction_schema_uri,\n",
295 |     "        description=description,\n",
296 |     "        serving_container_command=serving_container_command,\n",
297 |     "        serving_container_args=serving_container_args,\n",
298 |     "        serving_container_environment_variables=serving_container_environment_variables,\n",
299 |     "        serving_container_ports=serving_container_ports,\n",
300 |     "        explanation_metadata=explanation_metadata,\n",
301 |     "        explanation_parameters=explanation_parameters,\n",
302 |     "        sync=sync,\n",
303 |     "    )\n",
304 |     "\n",
305 |     "    model.wait()\n",
306 |     "\n",
307 |     "    print(model.display_name)\n",
308 |     "    print(model.resource_name)\n",
309 |     "    return model\n",
310 |     "\n",
311 |     "#TODO: fill in the arguments to use the funciton above\n",
312 |     "upload_model_to_model_registry(project=PROJECT_ID,display_name=\"my_display_name\")"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": null,
318 |    "id": "39e9984f-5d75-4776-b99d-8cc4bcd53339",
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": [
322 |     "#https://github.com/googleapis/python-aiplatform/blob/2bc9b2b0d048c29ba43c8b4c3ea51370515d08c3/samples/model-builder/create_batch_prediction_job_sample.py\n",
323 |     "from typing import Sequence, Union\n",
324 |     "\n",
325 |     "from google.cloud import aiplatform\n",
326 |     "\n",
327 |     "def create_batch_prediction_job_sample(\n",
328 |     "    project: str,\n",
329 |     "    location: str,\n",
330 |     "    model_resource_name: str,\n",
331 |     "    job_display_name: str,\n",
332 |     "    gcs_source: Union[str, Sequence[str]],\n",
333 |     "    gcs_destination: str,\n",
334 |     "    sync: bool = True,\n",
335 |     "):\n",
336 |     "    aiplatform.init(project=project, location=location)\n",
337 |     "\n",
338 |     "    my_model = aiplatform.Model(model_resource_name)\n",
339 |     "\n",
340 |     "    batch_prediction_job = my_model.batch_predict(\n",
341 |     "        job_display_name=job_display_name,\n",
342 |     "        gcs_source=gcs_source,\n",
343 |     "        gcs_destination_prefix=gcs_destination,\n",
344 |     "        sync=sync,\n",
345 |     "    )\n",
346 |     "\n",
347 |     "    batch_prediction_job.wait()\n",
348 |     "\n",
349 |     "    print(batch_prediction_job.display_name)\n",
350 |     "    print(batch_prediction_job.resource_name)\n",
351 |     "    print(batch_prediction_job.state)\n",
352 |     "    return batch_prediction_job\n",
353 |     "\n",
354 |     "create_batch_prediction_job_sample()"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "code",
359 |    "execution_count": null,
360 |    "id": "83312cd9-5fe2-416e-b6fc-4c7cd498bd94",
361 |    "metadata": {},
362 |    "outputs": [],
363 |    "source": []
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": null,
368 |    "id": "1f7a9a5a-4c60-4b59-8c0c-a7623f1ae81b",
369 |    "metadata": {},
370 |    "outputs": [],
371 |    "source": []
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": null,
376 |    "id": "38e1885c-133b-4cba-aed3-d11371e9914e",
377 |    "metadata": {},
378 |    "outputs": [],
379 |    "source": []
380 |   }
381 |  ],
382 |  "metadata": {
383 |   "environment": {
384 |    "kernel": "python3",
385 |    "name": "managed-notebooks.m87",
386 |    "type": "gcloud",
387 |    "uri": "gcr.io/deeplearning-platform-release/tf2-gpu:latest"
388 |   },
389 |   "kernelspec": {
390 |    "display_name": "TensorFlow 2 (Local)",
391 |    "language": "python",
392 |    "name": "local-tf2"
393 |   },
394 |   "language_info": {
395 |    "codemirror_mode": {
396 |     "name": "ipython",
397 |     "version": 3
398 |    },
399 |    "file_extension": ".py",
400 |    "mimetype": "text/x-python",
401 |    "name": "python",
402 |    "nbconvert_exporter": "python",
403 |    "pygments_lexer": "ipython3",
404 |    "version": "3.7.12"
405 |   }
406 |  },
407 |  "nbformat": 4,
408 |  "nbformat_minor": 5
409 | }
410 | 


--------------------------------------------------------------------------------