├── README.md
├── .gitignore
├── .env
└── batch_job.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # Kubeflow-v2-batch-prediction
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv/
2 | .idea/
3 | .ipynb_checkpoints/
4 |
--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | PROJECT_ID=kubeflow-demos
2 | BUCKET=user-group-demo
3 |
--------------------------------------------------------------------------------
/batch_job.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "eb9f6f24-92f1-4f60-8d9b-43732cd87fcb",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "%%capture\n",
11 | "!pip3 install google-cloud-aiplatform==1.0.0 --upgrade\n",
12 | "!pip3 install kfp google-cloud-pipeline-components==0.1.1 --upgrade\n",
13 | "!pip3 install scikit-learn\n",
14 | "!pip3 install pandas"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "id": "03a3a9ce-10a1-4f28-8d7b-193adf9c9900",
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "import uuid\n",
25 | "from kfp import dsl\n",
26 | "import kfp\n",
27 | "from google.cloud import aiplatform\n",
28 | "from kfp.v2.dsl import component\n",
29 | "from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output, OutputPath, component, ClassificationMetrics, Metrics)\n",
30 | "from google_cloud_pipeline_components import aiplatform as gcc_aip\n",
31 | "from typing import NamedTuple"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 3,
37 | "id": "d44a30e3-d0f8-4c36-bcaa-a084a57d90ab",
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "name": "stdout",
42 | "output_type": "stream",
43 | "text": [
44 | "kubeflow-demos\n",
45 | "user-group-demo\n",
46 | "gs://user-group-demo/pipeline_root\n"
47 | ]
48 | }
49 | ],
50 | "source": [
51 | "#https://stackoverflow.com/a/54028874\n",
52 | "%load_ext dotenv\n",
53 | "%dotenv\n",
54 | "\n",
55 | "import os\n",
56 | "PROJECT_ID = os.environ['PROJECT_ID']\n",
57 | "BUCKET_NAME = os.environ['BUCKET']\n",
58 | "\n",
59 | "PIPELINE_ROOT = 'gs://{}/pipeline_root'.format(BUCKET_NAME)\n",
60 | "REGION = 'us-central1'\n",
61 | "\n",
62 | "print(PROJECT_ID)\n",
63 | "print(BUCKET_NAME)\n",
64 | "print(PIPELINE_ROOT)"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 18,
70 | "id": "6912d7c9-79f4-4fff-bf97-8f98b17b6103",
71 | "metadata": {},
72 | "outputs": [],
73 | "source": [
74 | "@component(packages_to_install=[\"google-cloud-aiplatform\"])\n",
75 | "def run_batch_job():\n",
76 | "\n",
77 | " from typing import Sequence, Union\n",
78 | " from google.cloud import aiplatform, aiplatform_v1\n",
79 | "\n",
80 | " def create_batch_prediction_job_dedicated_resources_sample(\n",
81 | " project: str,\n",
82 | " location: str,\n",
83 | " model_resource_name: str,\n",
84 | " job_display_name: str,\n",
85 | " gcs_source: Union[str, Sequence[str]],\n",
86 | " gcs_destination: str,\n",
87 | " machine_type: str = \"n1-standard-2\",\n",
88 | " accelerator_count: int = 1,\n",
89 | " accelerator_type: Union[str, aiplatform_v1.AcceleratorType] = \"NVIDIA_TESLA_K80\",\n",
90 | " starting_replica_count: int = 1,\n",
91 | " max_replica_count: int = 1,\n",
92 | " sync: bool = True,\n",
93 | " ):\n",
94 | " aiplatform.init(project=project, location=location)\n",
95 | "\n",
96 | " my_model = aiplatform.Model(model_resource_name)\n",
97 | "\n",
98 | " batch_prediction_job = my_model.batch_predict(\n",
99 | " job_display_name=job_display_name,\n",
100 | " gcs_source=gcs_source,\n",
101 | " gcs_destination_prefix=gcs_destination,\n",
102 | " machine_type=machine_type,\n",
103 | " accelerator_count=accelerator_count,\n",
104 | " accelerator_type=accelerator_type,\n",
105 | " starting_replica_count=starting_replica_count,\n",
106 | " max_replica_count=max_replica_count,\n",
107 | " sync=sync,\n",
108 | " )\n",
109 | "\n",
110 | " batch_prediction_job.wait()\n",
111 | "\n",
112 | " print(batch_prediction_job.display_name)\n",
113 | " print(batch_prediction_job.resource_name)\n",
114 | " print(batch_prediction_job.state)\n",
115 | " return batch_prediction_job\n",
116 | "\n",
117 | " \n",
118 | " create_batch_prediction_job_dedicated_resources_sample(\n",
119 | " model_resource_name=\"4789441864266678272\",\n",
120 | " job_display_name=\"test3\",\n",
121 | " gcs_source=\"gs://test-fast/batch_test.csv\",\n",
122 | " gcs_destination=\"gs://test-fast\",\n",
123 | " project=\"kubeflow-demos\",\n",
124 | " location=\"us-central1\",\n",
125 | " )"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 19,
131 | "id": "2182528d-7a63-4676-829f-4792c8f6f463",
132 | "metadata": {},
133 | "outputs": [],
134 | "source": [
135 | "from datetime import datetime\n",
136 | "\n",
137 | "TIMESTAMP = datetime.now().strftime(\"%Y%m%d%H%M%S\")"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": 20,
143 | "id": "77a95a99-1857-433d-9c15-30ac1e5d7079",
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "@kfp.dsl.pipeline(name=\"batch-test\" + str(uuid.uuid4()))\n",
148 | "def pipeline(\n",
149 | " project: str = PROJECT_ID,\n",
150 | " bucket: str = BUCKET_NAME,\n",
151 | " model_id: str = \"4789441864266678272\"\n",
152 | "):\n",
153 | " batch_task = run_batch_job()"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 21,
159 | "id": "e300a2b7-85cd-4699-84d0-1d9188fe4d1a",
160 | "metadata": {},
161 | "outputs": [
162 | {
163 | "name": "stderr",
164 | "output_type": "stream",
165 | "text": [
166 | "/Users/yarkoni/projects/ds-churn/venv/lib/python3.9/site-packages/kfp/v2/compiler/compiler.py:1263: FutureWarning: APIs imported from the v1 namespace (e.g. kfp.dsl, kfp.components, etc) will not be supported by the v2 compiler since v2.0.0\n",
167 | " warnings.warn(\n"
168 | ]
169 | }
170 | ],
171 | "source": [
172 | "from kfp.v2 import compiler\n",
173 | "\n",
174 | "compiler.Compiler().compile(pipeline_func=pipeline, \n",
175 | " package_path=\"dag-\"+TIMESTAMP+\".json\")"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 22,
181 | "id": "67b81eed-ad42-425a-a663-69746ec7a117",
182 | "metadata": {},
183 | "outputs": [
184 | {
185 | "name": "stderr",
186 | "output_type": "stream",
187 | "text": [
188 | "/Users/yarkoni/projects/ds-churn/venv/lib/python3.9/site-packages/kfp/v2/google/client/client.py:169: FutureWarning: AIPlatformClient will be deprecated in v2.0.0. Please use PipelineJob https://googleapis.dev/python/aiplatform/latest/_modules/google/cloud/aiplatform/pipeline_jobs.html in Vertex SDK. Install the SDK using \"pip install google-cloud-aiplatform\"\n",
189 | " warnings.warn(\n"
190 | ]
191 | }
192 | ],
193 | "source": [
194 | "from kfp.v2.google.client import AIPlatformClient\n",
195 | "\n",
196 | "api_client = AIPlatformClient(\n",
197 | " project_id=PROJECT_ID,\n",
198 | " region=REGION,\n",
199 | ")"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 23,
205 | "id": "95321b37-0318-4ac0-9e5a-21ccc88d541c",
206 | "metadata": {},
207 | "outputs": [
208 | {
209 | "data": {
210 | "text/html": [
211 | "See the Pipeline job here."
212 | ],
213 | "text/plain": [
214 | ""
215 | ]
216 | },
217 | "metadata": {},
218 | "output_type": "display_data"
219 | }
220 | ],
221 | "source": [
222 | "response = api_client.create_run_from_job_spec(\n",
223 | " \"dag-\"+TIMESTAMP+\".json\",\n",
224 | " pipeline_root=PIPELINE_ROOT,\n",
225 | " parameter_values={\"model_id\": \"4789441864266678272\"},\n",
226 | ")"
227 | ]
228 | },
229 | {
230 | "cell_type": "code",
231 | "execution_count": null,
232 | "id": "ac4f232e-172c-4e36-874d-74ebc0c857ca",
233 | "metadata": {},
234 | "outputs": [],
235 | "source": []
236 | }
237 | ],
238 | "metadata": {
239 | "kernelspec": {
240 | "display_name": "Python 3",
241 | "language": "python",
242 | "name": "python3"
243 | },
244 | "language_info": {
245 | "codemirror_mode": {
246 | "name": "ipython",
247 | "version": 3
248 | },
249 | "file_extension": ".py",
250 | "mimetype": "text/x-python",
251 | "name": "python",
252 | "nbconvert_exporter": "python",
253 | "pygments_lexer": "ipython3",
254 | "version": "3.9.4"
255 | }
256 | },
257 | "nbformat": 4,
258 | "nbformat_minor": 5
259 | }
260 |
--------------------------------------------------------------------------------