├── README.md ├── .gitignore ├── .env └── batch_job.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Kubeflow-v2-batch-prediction 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | .idea/ 3 | .ipynb_checkpoints/ 4 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | PROJECT_ID=kubeflow-demos 2 | BUCKET=user-group-demo 3 | -------------------------------------------------------------------------------- /batch_job.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "eb9f6f24-92f1-4f60-8d9b-43732cd87fcb", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "%%capture\n", 11 | "!pip3 install google-cloud-aiplatform==1.0.0 --upgrade\n", 12 | "!pip3 install kfp google-cloud-pipeline-components==0.1.1 --upgrade\n", 13 | "!pip3 install scikit-learn\n", 14 | "!pip3 install pandas" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "id": "03a3a9ce-10a1-4f28-8d7b-193adf9c9900", 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import uuid\n", 25 | "from kfp import dsl\n", 26 | "import kfp\n", 27 | "from google.cloud import aiplatform\n", 28 | "from kfp.v2.dsl import component\n", 29 | "from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output, OutputPath, component, ClassificationMetrics, Metrics)\n", 30 | "from google_cloud_pipeline_components import aiplatform as gcc_aip\n", 31 | "from typing import NamedTuple" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "id": "d44a30e3-d0f8-4c36-bcaa-a084a57d90ab", 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "kubeflow-demos\n", 45 | "user-group-demo\n", 46 | "gs://user-group-demo/pipeline_root\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "#https://stackoverflow.com/a/54028874\n", 52 | "%load_ext dotenv\n", 53 | "%dotenv\n", 54 | "\n", 55 | "import os\n", 56 | "PROJECT_ID = os.environ['PROJECT_ID']\n", 57 | "BUCKET_NAME = os.environ['BUCKET']\n", 58 | "\n", 59 | "PIPELINE_ROOT = 'gs://{}/pipeline_root'.format(BUCKET_NAME)\n", 60 | "REGION = 'us-central1'\n", 61 | "\n", 62 | "print(PROJECT_ID)\n", 63 | "print(BUCKET_NAME)\n", 64 | "print(PIPELINE_ROOT)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 18, 70 | "id": "6912d7c9-79f4-4fff-bf97-8f98b17b6103", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "@component(packages_to_install=[\"google-cloud-aiplatform\"])\n", 75 | "def run_batch_job():\n", 76 | "\n", 77 | " from typing import Sequence, Union\n", 78 | " from google.cloud import aiplatform, aiplatform_v1\n", 79 | "\n", 80 | " def create_batch_prediction_job_dedicated_resources_sample(\n", 81 | " project: str,\n", 82 | " location: str,\n", 83 | " model_resource_name: str,\n", 84 | " job_display_name: str,\n", 85 | " gcs_source: Union[str, Sequence[str]],\n", 86 | " gcs_destination: str,\n", 87 | " machine_type: str = \"n1-standard-2\",\n", 88 | " accelerator_count: int = 1,\n", 89 | " accelerator_type: Union[str, aiplatform_v1.AcceleratorType] = \"NVIDIA_TESLA_K80\",\n", 90 | " starting_replica_count: int = 1,\n", 91 | " max_replica_count: int = 1,\n", 92 | " sync: bool = True,\n", 93 | " ):\n", 94 | " aiplatform.init(project=project, location=location)\n", 95 | "\n", 96 | " my_model = aiplatform.Model(model_resource_name)\n", 97 | "\n", 98 | " batch_prediction_job = my_model.batch_predict(\n", 99 | " job_display_name=job_display_name,\n", 100 | " gcs_source=gcs_source,\n", 101 | " gcs_destination_prefix=gcs_destination,\n", 102 | " machine_type=machine_type,\n", 103 | " accelerator_count=accelerator_count,\n", 104 | " accelerator_type=accelerator_type,\n", 105 | " starting_replica_count=starting_replica_count,\n", 106 | " max_replica_count=max_replica_count,\n", 107 | " sync=sync,\n", 108 | " )\n", 109 | "\n", 110 | " batch_prediction_job.wait()\n", 111 | "\n", 112 | " print(batch_prediction_job.display_name)\n", 113 | " print(batch_prediction_job.resource_name)\n", 114 | " print(batch_prediction_job.state)\n", 115 | " return batch_prediction_job\n", 116 | "\n", 117 | " \n", 118 | " create_batch_prediction_job_dedicated_resources_sample(\n", 119 | " model_resource_name=\"4789441864266678272\",\n", 120 | " job_display_name=\"test3\",\n", 121 | " gcs_source=\"gs://test-fast/batch_test.csv\",\n", 122 | " gcs_destination=\"gs://test-fast\",\n", 123 | " project=\"kubeflow-demos\",\n", 124 | " location=\"us-central1\",\n", 125 | " )" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 19, 131 | "id": "2182528d-7a63-4676-829f-4792c8f6f463", 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "from datetime import datetime\n", 136 | "\n", 137 | "TIMESTAMP = datetime.now().strftime(\"%Y%m%d%H%M%S\")" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 20, 143 | "id": "77a95a99-1857-433d-9c15-30ac1e5d7079", 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "@kfp.dsl.pipeline(name=\"batch-test\" + str(uuid.uuid4()))\n", 148 | "def pipeline(\n", 149 | " project: str = PROJECT_ID,\n", 150 | " bucket: str = BUCKET_NAME,\n", 151 | " model_id: str = \"4789441864266678272\"\n", 152 | "):\n", 153 | " batch_task = run_batch_job()" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 21, 159 | "id": "e300a2b7-85cd-4699-84d0-1d9188fe4d1a", 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stderr", 164 | "output_type": "stream", 165 | "text": [ 166 | "/Users/yarkoni/projects/ds-churn/venv/lib/python3.9/site-packages/kfp/v2/compiler/compiler.py:1263: FutureWarning: APIs imported from the v1 namespace (e.g. kfp.dsl, kfp.components, etc) will not be supported by the v2 compiler since v2.0.0\n", 167 | " warnings.warn(\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "from kfp.v2 import compiler\n", 173 | "\n", 174 | "compiler.Compiler().compile(pipeline_func=pipeline, \n", 175 | " package_path=\"dag-\"+TIMESTAMP+\".json\")" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 22, 181 | "id": "67b81eed-ad42-425a-a663-69746ec7a117", 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "name": "stderr", 186 | "output_type": "stream", 187 | "text": [ 188 | "/Users/yarkoni/projects/ds-churn/venv/lib/python3.9/site-packages/kfp/v2/google/client/client.py:169: FutureWarning: AIPlatformClient will be deprecated in v2.0.0. Please use PipelineJob https://googleapis.dev/python/aiplatform/latest/_modules/google/cloud/aiplatform/pipeline_jobs.html in Vertex SDK. Install the SDK using \"pip install google-cloud-aiplatform\"\n", 189 | " warnings.warn(\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "from kfp.v2.google.client import AIPlatformClient\n", 195 | "\n", 196 | "api_client = AIPlatformClient(\n", 197 | " project_id=PROJECT_ID,\n", 198 | " region=REGION,\n", 199 | ")" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 23, 205 | "id": "95321b37-0318-4ac0-9e5a-21ccc88d541c", 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/html": [ 211 | "See the Pipeline job here." 212 | ], 213 | "text/plain": [ 214 | "" 215 | ] 216 | }, 217 | "metadata": {}, 218 | "output_type": "display_data" 219 | } 220 | ], 221 | "source": [ 222 | "response = api_client.create_run_from_job_spec(\n", 223 | " \"dag-\"+TIMESTAMP+\".json\",\n", 224 | " pipeline_root=PIPELINE_ROOT,\n", 225 | " parameter_values={\"model_id\": \"4789441864266678272\"},\n", 226 | ")" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "id": "ac4f232e-172c-4e36-874d-74ebc0c857ca", 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [] 236 | } 237 | ], 238 | "metadata": { 239 | "kernelspec": { 240 | "display_name": "Python 3", 241 | "language": "python", 242 | "name": "python3" 243 | }, 244 | "language_info": { 245 | "codemirror_mode": { 246 | "name": "ipython", 247 | "version": 3 248 | }, 249 | "file_extension": ".py", 250 | "mimetype": "text/x-python", 251 | "name": "python", 252 | "nbconvert_exporter": "python", 253 | "pygments_lexer": "ipython3", 254 | "version": "3.9.4" 255 | } 256 | }, 257 | "nbformat": 4, 258 | "nbformat_minor": 5 259 | } 260 | --------------------------------------------------------------------------------