├── Cloud_AutoML_Vision
├── Data Preparation.ipynb
├── Kaggle Data Download.ipynb
├── README.md
└── predict.ipynb
├── GCP_Serverless_AppEngine
├── ComplaintsFlask.py
├── ComplaintsServer.py
├── Dockerfile
├── README.md
├── Text_Classification_using_TFIDF_AutoML_scikit_learn.ipynb
├── app.yaml
├── preprocessing
│ └── functions.py
└── requirements.txt
├── Google_Kubernetes_Engine
├── ComplaintsFlask.py
├── Dockerfile
├── README.md
├── Text_Classification_using_TFIDF_AutoML_scikit_learn.ipynb
├── deployment.yaml
├── preprocessing
│ └── functions.py
├── requirements.txt
└── service.yaml
├── README.md
├── Telecom_churn_AI_Platform
├── Churn_model.ipynb
├── README.md
├── data
│ ├── README.md
│ └── telecom_customer_churn.csv
├── input.json
├── predict_setup.ipynb
├── predictor.py
├── serving.ipynb
└── setup.py
├── bigquery_logisticsregression
└── README.md
├── bigqueryml_datastudio
└── README.md
└── gke_autopilot
├── Dockerfile
├── README.md
├── deployment.yaml
├── imgwebapp.py
├── models
├── assets
│ └── README.md
├── saved_model.pb
└── variables
│ ├── variables.data-00000-of-00002
│ ├── variables.data-00001-of-00002
│ └── variables.index
├── requirements.txt
└── service.yaml
/Cloud_AutoML_Vision/Data Preparation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 36,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 37,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "df=pd.read_csv('train.csv')"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 38,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/html": [
29 | "
\n",
30 | "\n",
43 | "
\n",
44 | " \n",
45 | " \n",
46 | " | \n",
47 | " image_id | \n",
48 | " label | \n",
49 | "
\n",
50 | " \n",
51 | " \n",
52 | " \n",
53 | " 0 | \n",
54 | " 1000015157.jpg | \n",
55 | " 0 | \n",
56 | "
\n",
57 | " \n",
58 | " 1 | \n",
59 | " 1000201771.jpg | \n",
60 | " 3 | \n",
61 | "
\n",
62 | " \n",
63 | " 2 | \n",
64 | " 100042118.jpg | \n",
65 | " 1 | \n",
66 | "
\n",
67 | " \n",
68 | " 3 | \n",
69 | " 1000723321.jpg | \n",
70 | " 1 | \n",
71 | "
\n",
72 | " \n",
73 | " 4 | \n",
74 | " 1000812911.jpg | \n",
75 | " 3 | \n",
76 | "
\n",
77 | " \n",
78 | " ... | \n",
79 | " ... | \n",
80 | " ... | \n",
81 | "
\n",
82 | " \n",
83 | " 21392 | \n",
84 | " 999068805.jpg | \n",
85 | " 3 | \n",
86 | "
\n",
87 | " \n",
88 | " 21393 | \n",
89 | " 999329392.jpg | \n",
90 | " 3 | \n",
91 | "
\n",
92 | " \n",
93 | " 21394 | \n",
94 | " 999474432.jpg | \n",
95 | " 1 | \n",
96 | "
\n",
97 | " \n",
98 | " 21395 | \n",
99 | " 999616605.jpg | \n",
100 | " 4 | \n",
101 | "
\n",
102 | " \n",
103 | " 21396 | \n",
104 | " 999998473.jpg | \n",
105 | " 4 | \n",
106 | "
\n",
107 | " \n",
108 | "
\n",
109 | "
21397 rows × 2 columns
\n",
110 | "
"
111 | ],
112 | "text/plain": [
113 | " image_id label\n",
114 | "0 1000015157.jpg 0\n",
115 | "1 1000201771.jpg 3\n",
116 | "2 100042118.jpg 1\n",
117 | "3 1000723321.jpg 1\n",
118 | "4 1000812911.jpg 3\n",
119 | "... ... ...\n",
120 | "21392 999068805.jpg 3\n",
121 | "21393 999329392.jpg 3\n",
122 | "21394 999474432.jpg 1\n",
123 | "21395 999616605.jpg 4\n",
124 | "21396 999998473.jpg 4\n",
125 | "\n",
126 | "[21397 rows x 2 columns]"
127 | ]
128 | },
129 | "execution_count": 38,
130 | "metadata": {},
131 | "output_type": "execute_result"
132 | }
133 | ],
134 | "source": [
135 | "df"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "label ={0:'CBB', 1:'CBSD', 2:'CGM', 3:'CMD', 4:'Healthy'}"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 39,
150 | "metadata": {},
151 | "outputs": [],
152 | "source": [
153 | "df['image_path'] = 'gs://srivatsan-project-vcm/train_images/' + df['image_id']"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 40,
159 | "metadata": {},
160 | "outputs": [
161 | {
162 | "data": {
163 | "text/html": [
164 | "\n",
165 | "\n",
178 | "
\n",
179 | " \n",
180 | " \n",
181 | " | \n",
182 | " image_id | \n",
183 | " label | \n",
184 | " image_path | \n",
185 | "
\n",
186 | " \n",
187 | " \n",
188 | " \n",
189 | " 0 | \n",
190 | " 1000015157.jpg | \n",
191 | " 0 | \n",
192 | " gs://srivatsan-project-vcm/train_images/100001... | \n",
193 | "
\n",
194 | " \n",
195 | " 1 | \n",
196 | " 1000201771.jpg | \n",
197 | " 3 | \n",
198 | " gs://srivatsan-project-vcm/train_images/100020... | \n",
199 | "
\n",
200 | " \n",
201 | " 2 | \n",
202 | " 100042118.jpg | \n",
203 | " 1 | \n",
204 | " gs://srivatsan-project-vcm/train_images/100042... | \n",
205 | "
\n",
206 | " \n",
207 | " 3 | \n",
208 | " 1000723321.jpg | \n",
209 | " 1 | \n",
210 | " gs://srivatsan-project-vcm/train_images/100072... | \n",
211 | "
\n",
212 | " \n",
213 | " 4 | \n",
214 | " 1000812911.jpg | \n",
215 | " 3 | \n",
216 | " gs://srivatsan-project-vcm/train_images/100081... | \n",
217 | "
\n",
218 | " \n",
219 | " ... | \n",
220 | " ... | \n",
221 | " ... | \n",
222 | " ... | \n",
223 | "
\n",
224 | " \n",
225 | " 21392 | \n",
226 | " 999068805.jpg | \n",
227 | " 3 | \n",
228 | " gs://srivatsan-project-vcm/train_images/999068... | \n",
229 | "
\n",
230 | " \n",
231 | " 21393 | \n",
232 | " 999329392.jpg | \n",
233 | " 3 | \n",
234 | " gs://srivatsan-project-vcm/train_images/999329... | \n",
235 | "
\n",
236 | " \n",
237 | " 21394 | \n",
238 | " 999474432.jpg | \n",
239 | " 1 | \n",
240 | " gs://srivatsan-project-vcm/train_images/999474... | \n",
241 | "
\n",
242 | " \n",
243 | " 21395 | \n",
244 | " 999616605.jpg | \n",
245 | " 4 | \n",
246 | " gs://srivatsan-project-vcm/train_images/999616... | \n",
247 | "
\n",
248 | " \n",
249 | " 21396 | \n",
250 | " 999998473.jpg | \n",
251 | " 4 | \n",
252 | " gs://srivatsan-project-vcm/train_images/999998... | \n",
253 | "
\n",
254 | " \n",
255 | "
\n",
256 | "
21397 rows × 3 columns
\n",
257 | "
"
258 | ],
259 | "text/plain": [
260 | " image_id label \\\n",
261 | "0 1000015157.jpg 0 \n",
262 | "1 1000201771.jpg 3 \n",
263 | "2 100042118.jpg 1 \n",
264 | "3 1000723321.jpg 1 \n",
265 | "4 1000812911.jpg 3 \n",
266 | "... ... ... \n",
267 | "21392 999068805.jpg 3 \n",
268 | "21393 999329392.jpg 3 \n",
269 | "21394 999474432.jpg 1 \n",
270 | "21395 999616605.jpg 4 \n",
271 | "21396 999998473.jpg 4 \n",
272 | "\n",
273 | " image_path \n",
274 | "0 gs://srivatsan-project-vcm/train_images/100001... \n",
275 | "1 gs://srivatsan-project-vcm/train_images/100020... \n",
276 | "2 gs://srivatsan-project-vcm/train_images/100042... \n",
277 | "3 gs://srivatsan-project-vcm/train_images/100072... \n",
278 | "4 gs://srivatsan-project-vcm/train_images/100081... \n",
279 | "... ... \n",
280 | "21392 gs://srivatsan-project-vcm/train_images/999068... \n",
281 | "21393 gs://srivatsan-project-vcm/train_images/999329... \n",
282 | "21394 gs://srivatsan-project-vcm/train_images/999474... \n",
283 | "21395 gs://srivatsan-project-vcm/train_images/999616... \n",
284 | "21396 gs://srivatsan-project-vcm/train_images/999998... \n",
285 | "\n",
286 | "[21397 rows x 3 columns]"
287 | ]
288 | },
289 | "execution_count": 40,
290 | "metadata": {},
291 | "output_type": "execute_result"
292 | }
293 | ],
294 | "source": [
295 | "df"
296 | ]
297 | },
298 | {
299 | "cell_type": "code",
300 | "execution_count": 41,
301 | "metadata": {},
302 | "outputs": [],
303 | "source": [
304 | "df=df.replace({'label':label})"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 42,
310 | "metadata": {},
311 | "outputs": [
312 | {
313 | "data": {
314 | "text/html": [
315 | "\n",
316 | "\n",
329 | "
\n",
330 | " \n",
331 | " \n",
332 | " | \n",
333 | " image_id | \n",
334 | " label | \n",
335 | " image_path | \n",
336 | "
\n",
337 | " \n",
338 | " \n",
339 | " \n",
340 | " 0 | \n",
341 | " 1000015157.jpg | \n",
342 | " CBB | \n",
343 | " gs://srivatsan-project-vcm/train_images/100001... | \n",
344 | "
\n",
345 | " \n",
346 | " 1 | \n",
347 | " 1000201771.jpg | \n",
348 | " CMD | \n",
349 | " gs://srivatsan-project-vcm/train_images/100020... | \n",
350 | "
\n",
351 | " \n",
352 | " 2 | \n",
353 | " 100042118.jpg | \n",
354 | " CBSD | \n",
355 | " gs://srivatsan-project-vcm/train_images/100042... | \n",
356 | "
\n",
357 | " \n",
358 | " 3 | \n",
359 | " 1000723321.jpg | \n",
360 | " CBSD | \n",
361 | " gs://srivatsan-project-vcm/train_images/100072... | \n",
362 | "
\n",
363 | " \n",
364 | " 4 | \n",
365 | " 1000812911.jpg | \n",
366 | " CMD | \n",
367 | " gs://srivatsan-project-vcm/train_images/100081... | \n",
368 | "
\n",
369 | " \n",
370 | " ... | \n",
371 | " ... | \n",
372 | " ... | \n",
373 | " ... | \n",
374 | "
\n",
375 | " \n",
376 | " 21392 | \n",
377 | " 999068805.jpg | \n",
378 | " CMD | \n",
379 | " gs://srivatsan-project-vcm/train_images/999068... | \n",
380 | "
\n",
381 | " \n",
382 | " 21393 | \n",
383 | " 999329392.jpg | \n",
384 | " CMD | \n",
385 | " gs://srivatsan-project-vcm/train_images/999329... | \n",
386 | "
\n",
387 | " \n",
388 | " 21394 | \n",
389 | " 999474432.jpg | \n",
390 | " CBSD | \n",
391 | " gs://srivatsan-project-vcm/train_images/999474... | \n",
392 | "
\n",
393 | " \n",
394 | " 21395 | \n",
395 | " 999616605.jpg | \n",
396 | " Healthy | \n",
397 | " gs://srivatsan-project-vcm/train_images/999616... | \n",
398 | "
\n",
399 | " \n",
400 | " 21396 | \n",
401 | " 999998473.jpg | \n",
402 | " Healthy | \n",
403 | " gs://srivatsan-project-vcm/train_images/999998... | \n",
404 | "
\n",
405 | " \n",
406 | "
\n",
407 | "
21397 rows × 3 columns
\n",
408 | "
"
409 | ],
410 | "text/plain": [
411 | " image_id label \\\n",
412 | "0 1000015157.jpg CBB \n",
413 | "1 1000201771.jpg CMD \n",
414 | "2 100042118.jpg CBSD \n",
415 | "3 1000723321.jpg CBSD \n",
416 | "4 1000812911.jpg CMD \n",
417 | "... ... ... \n",
418 | "21392 999068805.jpg CMD \n",
419 | "21393 999329392.jpg CMD \n",
420 | "21394 999474432.jpg CBSD \n",
421 | "21395 999616605.jpg Healthy \n",
422 | "21396 999998473.jpg Healthy \n",
423 | "\n",
424 | " image_path \n",
425 | "0 gs://srivatsan-project-vcm/train_images/100001... \n",
426 | "1 gs://srivatsan-project-vcm/train_images/100020... \n",
427 | "2 gs://srivatsan-project-vcm/train_images/100042... \n",
428 | "3 gs://srivatsan-project-vcm/train_images/100072... \n",
429 | "4 gs://srivatsan-project-vcm/train_images/100081... \n",
430 | "... ... \n",
431 | "21392 gs://srivatsan-project-vcm/train_images/999068... \n",
432 | "21393 gs://srivatsan-project-vcm/train_images/999329... \n",
433 | "21394 gs://srivatsan-project-vcm/train_images/999474... \n",
434 | "21395 gs://srivatsan-project-vcm/train_images/999616... \n",
435 | "21396 gs://srivatsan-project-vcm/train_images/999998... \n",
436 | "\n",
437 | "[21397 rows x 3 columns]"
438 | ]
439 | },
440 | "execution_count": 42,
441 | "metadata": {},
442 | "output_type": "execute_result"
443 | }
444 | ],
445 | "source": [
446 | "df"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": 43,
452 | "metadata": {},
453 | "outputs": [
454 | {
455 | "data": {
456 | "text/plain": [
457 | "CMD 13158\n",
458 | "Healthy 2577\n",
459 | "CGM 2386\n",
460 | "CBSD 2189\n",
461 | "CBB 1087\n",
462 | "Name: label, dtype: int64"
463 | ]
464 | },
465 | "execution_count": 43,
466 | "metadata": {},
467 | "output_type": "execute_result"
468 | }
469 | ],
470 | "source": [
471 | "df['label'].value_counts()"
472 | ]
473 | },
474 | {
475 | "cell_type": "code",
476 | "execution_count": 44,
477 | "metadata": {},
478 | "outputs": [
479 | {
480 | "data": {
481 | "text/html": [
482 | "\n",
483 | "\n",
496 | "
\n",
497 | " \n",
498 | " \n",
499 | " | \n",
500 | " image_id | \n",
501 | " label | \n",
502 | " image_path | \n",
503 | "
\n",
504 | " \n",
505 | " \n",
506 | " \n",
507 | " 0 | \n",
508 | " 1000015157.jpg | \n",
509 | " CBB | \n",
510 | " gs://srivatsan-project-vcm/train_images/100001... | \n",
511 | "
\n",
512 | " \n",
513 | " 1 | \n",
514 | " 1000201771.jpg | \n",
515 | " CMD | \n",
516 | " gs://srivatsan-project-vcm/train_images/100020... | \n",
517 | "
\n",
518 | " \n",
519 | " 2 | \n",
520 | " 100042118.jpg | \n",
521 | " CBSD | \n",
522 | " gs://srivatsan-project-vcm/train_images/100042... | \n",
523 | "
\n",
524 | " \n",
525 | " 3 | \n",
526 | " 1000723321.jpg | \n",
527 | " CBSD | \n",
528 | " gs://srivatsan-project-vcm/train_images/100072... | \n",
529 | "
\n",
530 | " \n",
531 | " 4 | \n",
532 | " 1000812911.jpg | \n",
533 | " CMD | \n",
534 | " gs://srivatsan-project-vcm/train_images/100081... | \n",
535 | "
\n",
536 | " \n",
537 | " ... | \n",
538 | " ... | \n",
539 | " ... | \n",
540 | " ... | \n",
541 | "
\n",
542 | " \n",
543 | " 21392 | \n",
544 | " 999068805.jpg | \n",
545 | " CMD | \n",
546 | " gs://srivatsan-project-vcm/train_images/999068... | \n",
547 | "
\n",
548 | " \n",
549 | " 21393 | \n",
550 | " 999329392.jpg | \n",
551 | " CMD | \n",
552 | " gs://srivatsan-project-vcm/train_images/999329... | \n",
553 | "
\n",
554 | " \n",
555 | " 21394 | \n",
556 | " 999474432.jpg | \n",
557 | " CBSD | \n",
558 | " gs://srivatsan-project-vcm/train_images/999474... | \n",
559 | "
\n",
560 | " \n",
561 | " 21395 | \n",
562 | " 999616605.jpg | \n",
563 | " Healthy | \n",
564 | " gs://srivatsan-project-vcm/train_images/999616... | \n",
565 | "
\n",
566 | " \n",
567 | " 21396 | \n",
568 | " 999998473.jpg | \n",
569 | " Healthy | \n",
570 | " gs://srivatsan-project-vcm/train_images/999998... | \n",
571 | "
\n",
572 | " \n",
573 | "
\n",
574 | "
21397 rows × 3 columns
\n",
575 | "
"
576 | ],
577 | "text/plain": [
578 | " image_id label \\\n",
579 | "0 1000015157.jpg CBB \n",
580 | "1 1000201771.jpg CMD \n",
581 | "2 100042118.jpg CBSD \n",
582 | "3 1000723321.jpg CBSD \n",
583 | "4 1000812911.jpg CMD \n",
584 | "... ... ... \n",
585 | "21392 999068805.jpg CMD \n",
586 | "21393 999329392.jpg CMD \n",
587 | "21394 999474432.jpg CBSD \n",
588 | "21395 999616605.jpg Healthy \n",
589 | "21396 999998473.jpg Healthy \n",
590 | "\n",
591 | " image_path \n",
592 | "0 gs://srivatsan-project-vcm/train_images/100001... \n",
593 | "1 gs://srivatsan-project-vcm/train_images/100020... \n",
594 | "2 gs://srivatsan-project-vcm/train_images/100042... \n",
595 | "3 gs://srivatsan-project-vcm/train_images/100072... \n",
596 | "4 gs://srivatsan-project-vcm/train_images/100081... \n",
597 | "... ... \n",
598 | "21392 gs://srivatsan-project-vcm/train_images/999068... \n",
599 | "21393 gs://srivatsan-project-vcm/train_images/999329... \n",
600 | "21394 gs://srivatsan-project-vcm/train_images/999474... \n",
601 | "21395 gs://srivatsan-project-vcm/train_images/999616... \n",
602 | "21396 gs://srivatsan-project-vcm/train_images/999998... \n",
603 | "\n",
604 | "[21397 rows x 3 columns]"
605 | ]
606 | },
607 | "execution_count": 44,
608 | "metadata": {},
609 | "output_type": "execute_result"
610 | }
611 | ],
612 | "source": [
613 | "df"
614 | ]
615 | },
616 | {
617 | "cell_type": "code",
618 | "execution_count": 45,
619 | "metadata": {},
620 | "outputs": [],
621 | "source": [
622 | "df[['image_path','label']].to_csv('labels.csv', index=False, header=False)"
623 | ]
624 | },
625 | {
626 | "cell_type": "code",
627 | "execution_count": 46,
628 | "metadata": {},
629 | "outputs": [
630 | {
631 | "name": "stdout",
632 | "output_type": "stream",
633 | "text": [
634 | "Copying file://labels.csv [Content-Type=text/csv]...\n",
635 | "/ [1 files][ 1.2 MiB/ 1.2 MiB] \n",
636 | "Operation completed over 1 objects/1.2 MiB. \n"
637 | ]
638 | }
639 | ],
640 | "source": [
641 | "!gsutil cp labels.csv gs://srivatsan-project-vcm/"
642 | ]
643 | },
644 | {
645 | "cell_type": "code",
646 | "execution_count": null,
647 | "metadata": {},
648 | "outputs": [],
649 | "source": []
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": null,
654 | "metadata": {},
655 | "outputs": [],
656 | "source": []
657 | },
658 | {
659 | "cell_type": "code",
660 | "execution_count": null,
661 | "metadata": {},
662 | "outputs": [],
663 | "source": []
664 | },
665 | {
666 | "cell_type": "code",
667 | "execution_count": null,
668 | "metadata": {},
669 | "outputs": [],
670 | "source": []
671 | },
672 | {
673 | "cell_type": "code",
674 | "execution_count": null,
675 | "metadata": {},
676 | "outputs": [],
677 | "source": []
678 | },
679 | {
680 | "cell_type": "code",
681 | "execution_count": null,
682 | "metadata": {},
683 | "outputs": [],
684 | "source": []
685 | },
686 | {
687 | "cell_type": "code",
688 | "execution_count": null,
689 | "metadata": {},
690 | "outputs": [],
691 | "source": []
692 | },
693 | {
694 | "cell_type": "code",
695 | "execution_count": null,
696 | "metadata": {},
697 | "outputs": [],
698 | "source": []
699 | },
700 | {
701 | "cell_type": "code",
702 | "execution_count": null,
703 | "metadata": {},
704 | "outputs": [],
705 | "source": [
706 | "!curl -X GET -H \"Authorization: Bearer \"$(gcloud auth application-default print-access-token) https://automl.googleapis.com/v1/projects/srivatsan-project/locations/us-central1/operations/8525331686798393344"
707 | ]
708 | },
709 | {
710 | "cell_type": "code",
711 | "execution_count": null,
712 | "metadata": {},
713 | "outputs": [],
714 | "source": []
715 | }
716 | ],
717 | "metadata": {
718 | "environment": {
719 | "name": "common-cpu.m59",
720 | "type": "gcloud",
721 | "uri": "gcr.io/deeplearning-platform-release/base-cpu:m59"
722 | },
723 | "kernelspec": {
724 | "display_name": "Python 3",
725 | "language": "python",
726 | "name": "python3"
727 | },
728 | "language_info": {
729 | "codemirror_mode": {
730 | "name": "ipython",
731 | "version": 3
732 | },
733 | "file_extension": ".py",
734 | "mimetype": "text/x-python",
735 | "name": "python",
736 | "nbconvert_exporter": "python",
737 | "pygments_lexer": "ipython3",
738 | "version": "3.7.8"
739 | }
740 | },
741 | "nbformat": 4,
742 | "nbformat_minor": 4
743 | }
744 |
--------------------------------------------------------------------------------
/Cloud_AutoML_Vision/README.md:
--------------------------------------------------------------------------------
1 | This repository contains code and steps to train a image classifier model on Google Cloud AutoML Vision
2 |
3 | Cloud ML Vision is trained on Kaggle Cassava Leaf Disease detection dataset - https://www.kaggle.com/c/cassava-leaf-disease-classification
4 |
5 | Kaggle Data Download.ipynb - contains code to pull data from Kaggle and move the data to google cloud storage
6 |
7 | Data Preparation.ipynb - contains code to prepare csv file containing image file and label information. This file will be used for training Cloud AutoML Vision models
8 |
9 | On how to use google cloud UI to create AutoML vision model you can check this video - https://youtu.be/XZMU9uNbQvs
10 |
11 | Once model is train you can deploy it and use predict.ipynb file to predict new instances of input
12 |
13 |
--------------------------------------------------------------------------------
/Cloud_AutoML_Vision/predict.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 12,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from google.cloud import aiplatform\n",
10 | "\n",
11 | "def predict_image_classification_sample(\n",
12 | " endpoint: str, instance: dict, parameters_dict: dict\n",
13 | "):\n",
14 | " client_options = dict(api_endpoint=\"us-central1-prediction-aiplatform.googleapis.com\")\n",
15 | " client = aiplatform.PredictionServiceClient(client_options=client_options)\n",
16 | " from google.protobuf import json_format\n",
17 | " from google.protobuf.struct_pb2 import Value\n",
18 | "\n",
19 | " # See gs://google-cloud-aiplatform/schema/predict/params/image_classification_1.0.0.yaml for the format of the parameters.\n",
20 | " parameters = json_format.ParseDict(parameters_dict, Value())\n",
21 | "\n",
22 | " # See gs://google-cloud-aiplatform/schema/predict/instance/image_classification_1.0.0.yaml for the format of the instances.\n",
23 | " instances_list = [instance]\n",
24 | " instances = [json_format.ParseDict(s, Value()) for s in instances_list]\n",
25 | " response = client.predict(\n",
26 | " endpoint=endpoint, instances=instances, parameters=parameters\n",
27 | " )\n",
28 | "\n",
29 | " print(\"response\")\n",
30 | " print(\" deployed_model_id:\", response.deployed_model_id)\n",
31 | " predictions = response.predictions\n",
32 | " print(\"predictions\")\n",
33 | " for prediction in predictions:\n",
34 | " # See gs://google-cloud-aiplatform/schema/predict/prediction/classification_1.0.0.yaml for the format of the predictions.\n",
35 | " print(\" prediction:\", dict(prediction))"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 13,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "import base64\n",
45 | "\n",
46 | "with open('test_images/2216849948.jpg', \"rb\") as imageFile:\n",
47 | " str = base64.b64encode(imageFile.read())"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 30,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "from google.cloud import automl\n",
57 | "\n",
58 | "def predict():\n",
59 | " \"\"\"Predict.\"\"\"\n",
60 | " # [START automl_vision_classification_predict]\n",
61 | "\n",
62 | " project_id = \"srivatsan-project\"\n",
63 | " model_id = \"cassava\"\n",
64 | "\n",
65 | " file_path = \"test_images/2216849948.jpg\"\n",
66 | "\n",
67 | " prediction_client = automl.PredictionServiceClient()\n",
68 | "\n",
69 | " # Get the full path of the model.\n",
70 | " model_full_id = automl.AutoMlClient.model_path(\n",
71 | " project_id, \"us-central1\", model_id\n",
72 | " )\n",
73 | "\n",
74 | " # Read the file.\n",
75 | " with open(file_path, \"rb\") as content_file:\n",
76 | " content = content_file.read()\n",
77 | "\n",
78 | " image = automl.Image(image_bytes=content)\n",
79 | " payload = automl.ExamplePayload(image=image)\n",
80 | "\n",
81 | " # params is additional domain-specific parameters.\n",
82 | " # score_threshold is used to filter the result\n",
83 | " # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#predictrequest\n",
84 | " params = {\"score_threshold\": \"0.8\"}\n",
85 | "\n",
86 | " request = automl.PredictRequest(\n",
87 | " name=model_full_id,\n",
88 | " payload=payload,\n",
89 | " params=params\n",
90 | " )\n",
91 | " response = prediction_client.predict(request=request)\n",
92 | "\n",
93 | " print(\"Prediction results:\")\n",
94 | " for result in response.payload:\n",
95 | " print(\"Predicted class name: {}\".format(result.display_name))\n",
96 | " print(\"Predicted class score: {}\".format(result.classification.score))"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 31,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "ename": "NotFound",
106 | "evalue": "404 Invalid resource ID",
107 | "output_type": "error",
108 | "traceback": [
109 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
110 | "\u001b[0;31m_InactiveRpcError\u001b[0m Traceback (most recent call last)",
111 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/google/api_core/grpc_helpers.py\u001b[0m in \u001b[0;36merror_remapped_callable\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 57\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcallable_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 58\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mgrpc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRpcError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
112 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/grpc/_channel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, request, timeout, metadata, credentials, wait_for_ready, compression)\u001b[0m\n\u001b[1;32m 922\u001b[0m wait_for_ready, compression)\n\u001b[0;32m--> 923\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_end_unary_response_blocking\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcall\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 924\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
113 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/grpc/_channel.py\u001b[0m in \u001b[0;36m_end_unary_response_blocking\u001b[0;34m(state, call, with_call, deadline)\u001b[0m\n\u001b[1;32m 825\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 826\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0m_InactiveRpcError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 827\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
114 | "\u001b[0;31m_InactiveRpcError\u001b[0m: <_InactiveRpcError of RPC that terminated with:\n\tstatus = StatusCode.NOT_FOUND\n\tdetails = \"Invalid resource ID\"\n\tdebug_error_string = \"{\"created\":\"@1608932170.899453921\",\"description\":\"Error received from peer ipv4:74.125.124.95:443\",\"file\":\"src/core/lib/surface/call.cc\",\"file_line\":1062,\"grpc_message\":\"Invalid resource ID\",\"grpc_status\":5}\"\n>",
115 | "\nThe above exception was the direct cause of the following exception:\n",
116 | "\u001b[0;31mNotFound\u001b[0m Traceback (most recent call last)",
117 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
118 | "\u001b[0;32m\u001b[0m in \u001b[0;36mpredict\u001b[0;34m()\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m )\n\u001b[0;32m---> 36\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprediction_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 37\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Prediction results:\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
119 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/google/cloud/automl_v1/services/prediction_service/client.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, request, name, payload, params, retry, timeout, metadata)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[0;31m# Send the request.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 413\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrpc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretry\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mretry\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetadata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetadata\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 414\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[0;31m# Done; return the response.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
120 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/google/api_core/gapic_v1/method.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"metadata\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmetadata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 145\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapped_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 146\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
121 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/google/api_core/grpc_helpers.py\u001b[0m in \u001b[0;36merror_remapped_callable\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcallable_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mgrpc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRpcError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_grpc_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0merror_remapped_callable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
122 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/six.py\u001b[0m in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n",
123 | "\u001b[0;31mNotFound\u001b[0m: 404 Invalid resource ID"
124 | ]
125 | }
126 | ],
127 | "source": [
128 | "predict()"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": null,
134 | "metadata": {},
135 | "outputs": [],
136 | "source": []
137 | }
138 | ],
139 | "metadata": {
140 | "environment": {
141 | "name": "common-cpu.m59",
142 | "type": "gcloud",
143 | "uri": "gcr.io/deeplearning-platform-release/base-cpu:m59"
144 | },
145 | "kernelspec": {
146 | "display_name": "Python 3",
147 | "language": "python",
148 | "name": "python3"
149 | },
150 | "language_info": {
151 | "codemirror_mode": {
152 | "name": "ipython",
153 | "version": 3
154 | },
155 | "file_extension": ".py",
156 | "mimetype": "text/x-python",
157 | "name": "python",
158 | "nbconvert_exporter": "python",
159 | "pygments_lexer": "ipython3",
160 | "version": "3.7.8"
161 | }
162 | },
163 | "nbformat": 4,
164 | "nbformat_minor": 4
165 | }
166 |
--------------------------------------------------------------------------------
/GCP_Serverless_AppEngine/ComplaintsFlask.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, jsonify, request
2 | from preprocessing.functions import tokenize
3 | import xgboost as xgb
4 | import joblib
5 | from healthcheck import HealthCheck
6 |
7 | import os
8 | import logging
9 |
10 | logging.basicConfig(format='%(message)s', level=logging.INFO)
11 | app = Flask(__name__)
12 |
13 | target={0:'Debt collection', 1:'Credit card or prepaid card', 2:'Mortgage',
14 | 3:'Checking or savings account', 4:'Student loan',
15 | 5:'Vehicle loan or lease'}
16 |
17 | tfvectorizer = joblib.load('models/tfvectroizer.pkl')
18 | xgb_clf = xgb.Booster({'nthread': 3})
19 | xgb_clf.load_model('models/complaints.booster')
20 |
21 | logging.info('All models loaded succcessfully')
22 |
23 | health = HealthCheck(app, "/hcheck")
24 |
25 | def howami():
26 | return True, "I am alive. Thanks for checking.."
27 |
28 | health.add_check(howami)
29 |
30 | def scorer(text):
31 | encoded_text = tfvectorizer.transform([text])
32 | score = xgb_clf.predict(xgb.DMatrix(encoded_text))
33 | return score
34 |
35 | @app.route('/score', methods=['POST'])
36 | def predict_fn():
37 | text = request.get_json()['text']
38 | logging.info('Received incoming message - '+ text)
39 | predictions = scorer(text)
40 | predictions = predictions.argmax(axis=1)[0]
41 | return jsonify({'predictions ': str(predictions), 'Category ': target.get(predictions)})
42 |
43 | @app.route('/')
44 | def hello():
45 | return 'Welcome to Complaints Prediction Application'
46 |
47 | if __name__ == "__main__":
48 | app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000)))
--------------------------------------------------------------------------------
/GCP_Serverless_AppEngine/ComplaintsServer.py:
--------------------------------------------------------------------------------
1 | from ComplaintsFlask import app
2 |
3 | if __name__ == "__main__":
4 | app.run()
--------------------------------------------------------------------------------
/GCP_Serverless_AppEngine/Dockerfile:
--------------------------------------------------------------------------------
1 | # lightweight python
2 | FROM python:3.7-slim
3 |
4 | RUN apt-get update && apt-get install -y libgomp1
5 |
6 | # Copy local code to the container image.
7 | ENV APP_HOME /app
8 | WORKDIR $APP_HOME
9 | COPY . ./
10 |
11 | RUN ls -la $APP_HOME/
12 |
13 | ENV NLTK_DATA $APP_HOME/nltk_data
14 |
15 | # Install dependencies
16 | RUN pip install -r requirements.txt
17 |
18 | ENV PORT 5000
19 |
20 | # Run the flask service on container startup
21 | #CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 ComplaintsServer
22 | CMD [ "python", "ComplaintsFlask.py" ]
23 |
--------------------------------------------------------------------------------
/GCP_Serverless_AppEngine/README.md:
--------------------------------------------------------------------------------
1 | This repository contains details and code for deploying machine learning models on Google Cloud Serverless platform and Google Cloud App Engine
2 |
3 | Details of the code and working is covered in detailed in my YouTube channel (AIEngineering) here - https://youtu.be/kyQH71pB0vI
4 |
5 | Before getting started with deployment the container expects trained models and also downloaded nltk corpus
6 |
7 | For model files you are run associated notebook in this repository or else download the trained models and use it using below 2 command
8 |
9 | wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1m1bVxlUjrJ_tmWApYJHlk2q5bikGyIxr' -O complaints.booster
10 |
11 | wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1WURdboJjF27g9bZG_CGCCGSYWi0EvxJR' -O tfvectroizer.pkl
12 |
13 | If you want more details on model training you can check the video here - https://youtu.be/EHt_x8r1exU
14 |
15 | For NLTK you have to download stopwords and tokenizer corpa. One can download it using nltk.download()
16 |
17 |
18 | To deploy the model on serverless infrastructure (Cloud Run), execute the below commands
19 | ------------------------------------------------------------------------------------------
20 |
21 | Building the container image - gcloud builds submit --tag gcr.io//complaintsapi .
22 |
23 | List the image - gcloud builds list --filter complaints
24 |
25 | Checking logs of built image - gcloud builds log
26 |
27 | Deploy the container on google cloud run - gcloud run deploy complaintsapi --image gcr.io//complaintsapi --platform managed --memory 1G
28 |
29 | To deploy the model on App Engine run below commands
30 | -----------------------------------------------------
31 |
32 | gcloud app create
33 |
34 | gcloud app deploy
35 |
--------------------------------------------------------------------------------
/GCP_Serverless_AppEngine/app.yaml:
--------------------------------------------------------------------------------
1 | runtime: custom
2 | env: flex
--------------------------------------------------------------------------------
/GCP_Serverless_AppEngine/preprocessing/functions.py:
--------------------------------------------------------------------------------
1 | import re
2 | import nltk
3 |
4 | stemmer = nltk.stem.SnowballStemmer('english')
5 | stop_words = set(nltk.corpus.stopwords.words('english'))
6 |
7 | def tokenize(text):
8 | tokens = [word for word in nltk.word_tokenize(text) if (len(word) > 3 and len(word.strip('Xx/')) > 2 and len(re.sub('\d+', '', word.strip('Xx/'))) > 3) ]
9 | tokens = map(str.lower, tokens)
10 | stems = [stemmer.stem(item) for item in tokens if (item not in stop_words)]
11 | return stems
12 |
--------------------------------------------------------------------------------
/GCP_Serverless_AppEngine/requirements.txt:
--------------------------------------------------------------------------------
1 | flask
2 | scikit-learn==0.22
3 | xgboost==0.90
4 | nltk
5 | joblib
6 | gunicorn
7 | healthcheck
8 | six
9 |
10 | #Run this file with - pip3 install -r requirements.txt
--------------------------------------------------------------------------------
/Google_Kubernetes_Engine/ComplaintsFlask.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, jsonify, request
2 | from preprocessing.functions import tokenize
3 | import xgboost as xgb
4 | import joblib
5 | from healthcheck import HealthCheck
6 |
7 | import os
8 | import logging
9 |
10 | logging.basicConfig(format='%(message)s', level=logging.INFO)
11 | app = Flask(__name__)
12 |
13 | target={0:'Debt collection', 1:'Credit card or prepaid card', 2:'Mortgage',
14 | 3:'Checking or savings account', 4:'Student loan',
15 | 5:'Vehicle loan or lease'}
16 |
17 | tfvectorizer = joblib.load('models/tfvectroizer.pkl')
18 | xgb_clf = xgb.Booster({'nthread': 3})
19 | xgb_clf.load_model('models/complaints.booster')
20 |
21 | logging.info('All models loaded succcessfully')
22 |
23 | health = HealthCheck(app, "/hcheck")
24 |
25 | def howami():
26 | return True, "I am alive. Thanks for checking.."
27 |
28 | health.add_check(howami)
29 |
30 | def scorer(text):
31 | encoded_text = tfvectorizer.transform([text])
32 | score = xgb_clf.predict(xgb.DMatrix(encoded_text))
33 | return score
34 |
35 | @app.route('/score', methods=['POST'])
36 | def predict_fn():
37 | text = request.get_json()['text']
38 | logging.info('Received incoming message - '+ text)
39 | predictions = scorer(text)
40 | predictions = predictions.argmax(axis=1)[0]
41 | return jsonify({'predictions ': str(predictions), 'Category ': target.get(predictions)})
42 |
43 | @app.route('/')
44 | def hello():
45 | return 'Welcome to Complaints Prediction Application'
46 |
47 | if __name__ == "__main__":
48 | app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000)))
--------------------------------------------------------------------------------
/Google_Kubernetes_Engine/Dockerfile:
--------------------------------------------------------------------------------
1 | # lightweight python
2 | FROM python:3.7-slim
3 |
4 | RUN apt-get update && apt-get install -y libgomp1
5 |
6 | # Copy local code to the container image.
7 | ENV APP_HOME /app
8 | WORKDIR $APP_HOME
9 | COPY . ./
10 |
11 | RUN ls -la $APP_HOME/
12 |
13 | ENV NLTK_DATA $APP_HOME/nltk_data
14 |
15 | # Install dependencies
16 | RUN pip install -r requirements.txt
17 |
18 | ENV PORT 5000
19 |
20 | # Run the flask service on container startup
21 | #CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 ComplaintsServer
22 | CMD [ "python", "ComplaintsFlask.py" ]
23 |
--------------------------------------------------------------------------------
/Google_Kubernetes_Engine/README.md:
--------------------------------------------------------------------------------
1 | This repository contains details and code for deploying machine learning models on Google Kubernetes Engine Engine
2 |
3 | Details of the code and working is covered in detailed in my YouTube channel (AIEngineering) here - https://youtu.be/Hfgla4ViIwU
4 |
5 | Before getting started with deployment the container expects trained models and also downloaded nltk corpus
6 |
7 | For model files you are run associated notebook in this repository or else download the trained models and use it using below 2 command
8 |
9 | wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1m1bVxlUjrJ_tmWApYJHlk2q5bikGyIxr' -O complaints.booster
10 |
11 | wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1WURdboJjF27g9bZG_CGCCGSYWi0EvxJR' -O tfvectroizer.pkl
12 |
13 | If you want more details on model training you can check the video here - https://youtu.be/EHt_x8r1exU
14 |
15 | For NLTK you have to download stopwords and tokenizer corpa. One can download it using nltk.download()
16 |
17 |
18 | To deploy the model on Google Kubernetes Engine, execute the below commands
19 | ------------------------------------------------------------------------------------------
20 |
21 | Building the container image - gcloud builds submit --tag gcr.io//complaintsapi .
22 |
23 | List the image - gcloud builds list --filter complaints
24 |
25 | Checking logs of built image - gcloud builds log
26 |
27 | Create Kubernetes Cluster - gcloud container clusters create complaints-gke --zone "us-west1-b" --machine-type "n1-standard-1" --num-nodes "1" --service-account srivatsan-gke@srivatsan-project.iam.gserviceaccount.com (Change to your service account)
28 |
29 | Create Kubernetes Deployment - kubectl apply -f deployment.yaml
30 |
31 | Get details on deployed application - kubectl get deployments
32 |
33 | Get info of created pods via deployment - kubectl get pods
34 |
35 | Decribe deployed pod - kubectl describe pod
36 |
37 | Get pod logs - kubectl logs
38 |
39 | Create service for deployment - kubectl apply -f service.yaml
40 |
41 | Get service details - kubectl get services
42 |
43 | Add nodes to cluster - gcloud container clusters resize complaints-gke --num-nodes 3 --zone us-west1-b
44 |
45 | Get details on cluster - gcloud container clusters list
46 |
47 | Scale pod replicas - kubectl scale deployment complaints --replicas 2
48 |
49 | Auto Scale setting in deployment - kubectl autoscale deployment complaints --max 6 --min 2 --cpu-percent 50
50 |
51 | Get details on horizontal pod autoscaler - kubectl get hpa
52 |
--------------------------------------------------------------------------------
/Google_Kubernetes_Engine/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: complaints
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: complaintsclassifier
10 | template:
11 | metadata:
12 | labels:
13 | app: complaintsclassifier
14 | spec:
15 | containers:
16 | - name: complaints-app
17 | image: gcr.io/srivatsan-project/complaintsapi
18 | ports:
19 | - containerPort: 8080
20 | env:
21 | - name: PORT
22 | value: "8080"
--------------------------------------------------------------------------------
/Google_Kubernetes_Engine/preprocessing/functions.py:
--------------------------------------------------------------------------------
1 | import re
2 | import nltk
3 |
4 | stemmer = nltk.stem.SnowballStemmer('english')
5 | stop_words = set(nltk.corpus.stopwords.words('english'))
6 |
7 | def tokenize(text):
8 | tokens = [word for word in nltk.word_tokenize(text) if (len(word) > 3 and len(word.strip('Xx/')) > 2 and len(re.sub('\d+', '', word.strip('Xx/'))) > 3) ]
9 | tokens = map(str.lower, tokens)
10 | stems = [stemmer.stem(item) for item in tokens if (item not in stop_words)]
11 | return stems
12 |
--------------------------------------------------------------------------------
/Google_Kubernetes_Engine/requirements.txt:
--------------------------------------------------------------------------------
1 | flask
2 | scikit-learn==0.22
3 | xgboost==0.90
4 | nltk
5 | joblib
6 | gunicorn
7 | healthcheck
8 | six
9 |
10 | #Run this file with - pip3 install -r requirements.txt
--------------------------------------------------------------------------------
/Google_Kubernetes_Engine/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: complaintsclassifier
5 | spec:
6 | type: LoadBalancer
7 | selector:
8 | app: complaintsclassifier
9 | ports:
10 | - port: 80
11 | targetPort: 8080
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # google_cloud_AI_ML
2 | This repository contains code to learn AI and ML on Google Cloud Platform
3 |
4 | Detailed code walkthrough of different Google Cloud AI services available in this repo can be found in my video playlist - https://www.youtube.com/playlist?list=PL3N9eeOlCrP6Nhv4UFp67IsQ_TVDpXqXK
5 |
6 | GCP AI Project Details
7 |
8 | Project 1 - Telecom Churn on AI Platform Notebook and AI Platform Prediction
9 |
10 | In this project we will be building a scikit learn model using AI Platform Notebook and deploying it on AI Platform prediction service
11 |
12 | Project 2 - Model Deployment on Google Cloud Platform
13 |
14 | In this project we will be deploying machine learning models on Google Cloud Serverless platform (Cloud Run) and Google Cloud App Engine
15 |
16 | Project 3 - Building Time Series model using BigQuery ML and Data Analysis using Cloud Datastudio
17 |
18 | In this project we will be analyzing SFO bikeshare dataset and building multiple time series model using BigQuery ML
19 |
20 | Project 4 - Image Classification using Google Cloud AutoML Vision
21 |
22 | In this project we will be training image classifier using Google Cloud ML Vision on Kaggle Cassava Leaf Disease detection dataset
23 |
24 | Project 5 - Logistics Regression and Feature Engineering using BigQuery ML
25 |
26 | This project is end to end demo of using BigQuery ML for feature engineering and for building Logistics Regression model
27 |
28 | Quick Start Projects to learn GCP
29 |
30 | Speech to Text API - https://www.youtube.com/watch?v=ZkTiKPUSYj4
31 |
32 | Big Query Data Q&A - https://www.youtube.com/watch?v=nhAm7q826qk
33 |
34 | Google Cloud Data Products Overview and Professional ML Engineer Certification Discussion - https://www.youtube.com/watch?v=pIzX7wk56iA&list=PL3N9eeOlCrP6Nhv4UFp67IsQ_TVDpXqXK&index=9
35 |
36 | Google Cloud AI Platform overview - https://www.youtube.com/watch?v=dx5kyKZ7Q0I
37 |
38 | Google data studio getting started - https://www.youtube.com/watch?v=DMRC90qvwFo
39 |
40 |
41 |
--------------------------------------------------------------------------------
/Telecom_churn_AI_Platform/README.md:
--------------------------------------------------------------------------------
1 | Video accompanying this notebook can be found here - https://www.youtube.com/watch?v=y63OIKPe52Y
2 |
3 | This repository demonstrated how to get started with Google Cloud AI Platform and further use AI Prediction Service for deploying the trained model
4 |
5 | Churn_model.ipynb is the main file for building the model and further saving the trained model in Google Cloud Storage
6 |
7 | predictor.py is standalone prediction code to test the model before deploying it on AI Prediction Platform
8 |
9 | serving.ipynb is client program used to invoke model deployed on AI prediction service
10 |
--------------------------------------------------------------------------------
/Telecom_churn_AI_Platform/data/README.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Telecom_churn_AI_Platform/input.json:
--------------------------------------------------------------------------------
1 | {"instances":["7317-GGVPB", "Male", 0, "Yes", "No", 71, "Yes", "Yes", "Fiber optic", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "Two year", "Yes", "Credit card (automatic)", 108.6, "7690.9"]}
--------------------------------------------------------------------------------
/Telecom_churn_AI_Platform/predict_setup.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "Writing predictor.py\n"
13 | ]
14 | }
15 | ],
16 | "source": [
17 | "%%writefile predictor.py\n",
18 | "\n",
19 | "import os\n",
20 | "\n",
21 | "import numpy as np\n",
22 | "import joblib\n",
23 | "import pandas as pd\n",
24 | "\n",
25 | "class ChurnPredictor(object):\n",
26 | "\n",
27 | " _COLUMN_NAMES=['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling','PaymentMethod', 'MonthlyCharges', 'TotalCharges']\n",
28 | "\n",
29 | " def __init__(self, model):\n",
30 | " self._model = model\n",
31 | "\n",
32 | " def predict(self, instances, **kwargs):\n",
33 | " inputs = pd.DataFrame(data=[instances], columns=self._COLUMN_NAMES)\n",
34 | " outputs = self._model.predict(inputs)\n",
35 | " return outputs.tolist()\n",
36 | "\n",
37 | " @classmethod\n",
38 | " def from_path(cls, model_dir):\n",
39 | " model_path = os.path.join(model_dir, 'model.joblib')\n",
40 | " model = joblib.load(model_path)\n",
41 | " return cls(model)"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 2,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "name": "stdout",
51 | "output_type": "stream",
52 | "text": [
53 | "['No']\n"
54 | ]
55 | }
56 | ],
57 | "source": [
58 | "from predictor import ChurnPredictor\n",
59 | "model = ChurnPredictor.from_path('.')\n",
60 | "instance = ['7317-GGVPB', 'Male', 0, 'Yes', 'No', 71, 'Yes', 'Yes', 'Fiber optic', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Two year', 'Yes', 'Credit card (automatic)', 108.6, '7690.9']\n",
61 | "print(model.predict(instance))"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 3,
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "name": "stdout",
71 | "output_type": "stream",
72 | "text": [
73 | "Writing setup.py\n"
74 | ]
75 | }
76 | ],
77 | "source": [
78 | "%%writefile setup.py\n",
79 | "from setuptools import setup\n",
80 | "from setuptools import find_packages\n",
81 | "\n",
82 | "REQUIRED_PACKAGES = ['xgboost']\n",
83 | "\n",
84 | "setup(\n",
85 | " name='custom_predict',\n",
86 | " version='0.1',\n",
87 | " install_requires=REQUIRED_PACKAGES,\n",
88 | " packages=find_packages(),\n",
89 | " include_package_data=True,\n",
90 | " scripts=['predictor.py'])"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 4,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "name": "stdout",
100 | "output_type": "stream",
101 | "text": [
102 | "running sdist\n",
103 | "running egg_info\n",
104 | "creating custom_predict.egg-info\n",
105 | "writing custom_predict.egg-info/PKG-INFO\n",
106 | "writing dependency_links to custom_predict.egg-info/dependency_links.txt\n",
107 | "writing requirements to custom_predict.egg-info/requires.txt\n",
108 | "writing top-level names to custom_predict.egg-info/top_level.txt\n",
109 | "writing manifest file 'custom_predict.egg-info/SOURCES.txt'\n",
110 | "reading manifest file 'custom_predict.egg-info/SOURCES.txt'\n",
111 | "writing manifest file 'custom_predict.egg-info/SOURCES.txt'\n",
112 | "running check\n",
113 | "warning: check: missing required meta-data: url\n",
114 | "\n",
115 | "warning: check: missing meta-data: either (author and author_email) or (maintainer and maintainer_email) must be supplied\n",
116 | "\n",
117 | "creating custom_predict-0.1\n",
118 | "creating custom_predict-0.1/custom_predict.egg-info\n",
119 | "copying files to custom_predict-0.1...\n",
120 | "copying README.md -> custom_predict-0.1\n",
121 | "copying predictor.py -> custom_predict-0.1\n",
122 | "copying setup.py -> custom_predict-0.1\n",
123 | "copying custom_predict.egg-info/PKG-INFO -> custom_predict-0.1/custom_predict.egg-info\n",
124 | "copying custom_predict.egg-info/SOURCES.txt -> custom_predict-0.1/custom_predict.egg-info\n",
125 | "copying custom_predict.egg-info/dependency_links.txt -> custom_predict-0.1/custom_predict.egg-info\n",
126 | "copying custom_predict.egg-info/requires.txt -> custom_predict-0.1/custom_predict.egg-info\n",
127 | "copying custom_predict.egg-info/top_level.txt -> custom_predict-0.1/custom_predict.egg-info\n",
128 | "Writing custom_predict-0.1/setup.cfg\n",
129 | "creating dist\n",
130 | "Creating tar archive\n",
131 | "removing 'custom_predict-0.1' (and everything under it)\n"
132 | ]
133 | }
134 | ],
135 | "source": [
136 | "!python setup.py sdist --formats=gztar"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": 5,
142 | "metadata": {},
143 | "outputs": [
144 | {
145 | "name": "stdout",
146 | "output_type": "stream",
147 | "text": [
148 | "Copying file://./dist/custom_predict-0.1.tar.gz [Content-Type=application/x-tar]...\n",
149 | "/ [1 files][ 1.2 KiB/ 1.2 KiB] \n",
150 | "Operation completed over 1 objects/1.2 KiB. \n"
151 | ]
152 | }
153 | ],
154 | "source": [
155 | "!gsutil cp ./dist/custom_predict-0.1.tar.gz gs://churn-model-sri/custom_predict-0.1.tar.gz"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {},
162 | "outputs": [],
163 | "source": []
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 6,
168 | "metadata": {},
169 | "outputs": [
170 | {
171 | "name": "stdout",
172 | "output_type": "stream",
173 | "text": [
174 | "Using endpoint [https://ml.googleapis.com/]\n",
175 | "Created ml engine model [projects/srivatsan-project/models/ChurnPredictor].\n"
176 | ]
177 | }
178 | ],
179 | "source": [
180 | "!gcloud beta ai-platform models create ChurnPredictor --regions us-central1 --enable-console-logging"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": 7,
186 | "metadata": {},
187 | "outputs": [
188 | {
189 | "name": "stdout",
190 | "output_type": "stream",
191 | "text": [
192 | "Using endpoint [https://ml.googleapis.com/]\n",
193 | "Creating version (this might take a few minutes)......done. \n"
194 | ]
195 | }
196 | ],
197 | "source": [
198 | "! gcloud --quiet beta ai-platform versions create V1 \\\n",
199 | " --model ChurnPredictor \\\n",
200 | " --runtime-version 2.3 \\\n",
201 | " --python-version 3.7 \\\n",
202 | " --origin gs://churn-model-sri/ \\\n",
203 | " --package-uris gs://churn-model-sri/custom_predict-0.1.tar.gz \\\n",
204 | " --prediction-class predictor.ChurnPredictor "
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": null,
210 | "metadata": {},
211 | "outputs": [],
212 | "source": [
213 | "#{\"instances\": [\"7317-GGVPB\", \"Male\", 0, \"Yes\", \"No\", 71, \"Yes\", \"Yes\", \"Fiber optic\", \"No\", \"Yes\", \"Yes\", \"Yes\", \"Yes\", \"Yes\", \"Two year\", \"Yes\", \"Credit card (automatic)\", 108.6, \"7690.9\"]}"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": 8,
219 | "metadata": {},
220 | "outputs": [
221 | {
222 | "name": "stdout",
223 | "output_type": "stream",
224 | "text": [
225 | "Using endpoint [https://ml.googleapis.com/]\n",
226 | "['No']\n"
227 | ]
228 | }
229 | ],
230 | "source": [
231 | "!gcloud ai-platform predict --model ChurnPredictor --version V1 --json-request input.json"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": null,
237 | "metadata": {},
238 | "outputs": [],
239 | "source": []
240 | }
241 | ],
242 | "metadata": {
243 | "environment": {
244 | "name": "common-cpu.m59",
245 | "type": "gcloud",
246 | "uri": "gcr.io/deeplearning-platform-release/base-cpu:m59"
247 | },
248 | "kernelspec": {
249 | "display_name": "Python 3",
250 | "language": "python",
251 | "name": "python3"
252 | },
253 | "language_info": {
254 | "codemirror_mode": {
255 | "name": "ipython",
256 | "version": 3
257 | },
258 | "file_extension": ".py",
259 | "mimetype": "text/x-python",
260 | "name": "python",
261 | "nbconvert_exporter": "python",
262 | "pygments_lexer": "ipython3",
263 | "version": "3.7.8"
264 | }
265 | },
266 | "nbformat": 4,
267 | "nbformat_minor": 4
268 | }
269 |
--------------------------------------------------------------------------------
/Telecom_churn_AI_Platform/predictor.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 |
4 | import numpy as np
5 | import joblib
6 | import pandas as pd
7 |
8 | class ChurnPredictor(object):
9 |
10 | _COLUMN_NAMES=['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling','PaymentMethod', 'MonthlyCharges', 'TotalCharges']
11 |
12 | def __init__(self, model):
13 | self._model = model
14 |
15 | def predict(self, instances, **kwargs):
16 | inputs = pd.DataFrame(data=[instances], columns=self._COLUMN_NAMES)
17 | outputs = self._model.predict(inputs)
18 | return outputs.tolist()
19 |
20 | @classmethod
21 | def from_path(cls, model_dir):
22 | model_path = os.path.join(model_dir, 'model.joblib')
23 | model = joblib.load(model_path)
24 | return cls(model)
25 |
--------------------------------------------------------------------------------
/Telecom_churn_AI_Platform/serving.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import googleapiclient.discovery"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "instances = [\"7317-GGVPB\", \"Male\", 0, \"Yes\", \"No\", 71, \"Yes\", \"Yes\", \"Fiber optic\", \"No\", \"Yes\", \"Yes\", \"Yes\", \"Yes\", \"Yes\", \"Two year\", \"Yes\", \"Credit card (automatic)\", 108.6, \"7690.9\"]"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 3,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "service = googleapiclient.discovery.build('ml', 'v1')\n",
28 | "name = 'projects/{}/models/{}/versions/{}'.format(\"srivatsan-project\", \"ChurnPredictor\", \"V1\")"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 4,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "name": "stdout",
38 | "output_type": "stream",
39 | "text": [
40 | "['No']\n"
41 | ]
42 | }
43 | ],
44 | "source": [
45 | "response = service.projects().predict(\n",
46 | " name=name,\n",
47 | " body={'instances': instances}\n",
48 | ").execute()\n",
49 | "\n",
50 | "if 'error' in response:\n",
51 | " raise RuntimeError(response['error'])\n",
52 | "else:\n",
53 | " print(response['predictions'])"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "#end"
63 | ]
64 | }
65 | ],
66 | "metadata": {
67 | "environment": {
68 | "name": "common-cpu.m59",
69 | "type": "gcloud",
70 | "uri": "gcr.io/deeplearning-platform-release/base-cpu:m59"
71 | },
72 | "kernelspec": {
73 | "display_name": "Python 3",
74 | "language": "python",
75 | "name": "python3"
76 | },
77 | "language_info": {
78 | "codemirror_mode": {
79 | "name": "ipython",
80 | "version": 3
81 | },
82 | "file_extension": ".py",
83 | "mimetype": "text/x-python",
84 | "name": "python",
85 | "nbconvert_exporter": "python",
86 | "pygments_lexer": "ipython3",
87 | "version": "3.7.8"
88 | }
89 | },
90 | "nbformat": 4,
91 | "nbformat_minor": 4
92 | }
93 |
--------------------------------------------------------------------------------
/Telecom_churn_AI_Platform/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from setuptools import find_packages
3 |
4 | REQUIRED_PACKAGES = ['xgboost']
5 |
6 | setup(
7 | name='custom_predict',
8 | version='0.1',
9 | install_requires=REQUIRED_PACKAGES,
10 | packages=find_packages(),
11 | include_package_data=True,
12 | scripts=['predictor.py'])
13 |
--------------------------------------------------------------------------------
/bigquery_logisticsregression/README.md:
--------------------------------------------------------------------------------
1 | This folder contains demonstration of using BigQuery ML for feature engineering and for building logistics regression model
2 |
3 | Dataset used for this demonstration is available in UCI ML repository - https://archive.ics.uci.edu/ml/datasets/Bank+Marketing
4 |
5 | You can watch the video demonstration of code here - https://youtu.be/pX4P6uG1CuU
6 |
7 | You can store the data in GCS bucket and import into bigquery. Once done follow the below steps
8 |
9 | Step 1 - Query table to quickly validate the load and get understanding of data
10 |
11 | select
12 | age,
13 | job,
14 | marital,
15 | education,
16 | 'default',
17 | balance,
18 | housing,
19 | loan,
20 | contact,
21 | day,
22 | month,
23 | campaign,
24 | pdays,
25 | previous,
26 | poutcome,
27 | y as target
28 | from
29 | `srivatsan-project.bank.bank_marketing`
30 |
31 | Step 2 - Check target value distribution. This dataset target instances are imbalanced
32 |
33 | select
34 | y as target, count(*)
35 | from
36 | `srivatsan-project.bank.bank_marketing`
37 | group by y
38 |
39 | Step 3: Query to split data on train/validation and test
40 |
41 | select
42 | age, job, marital, education, 'default', balance, housing, loan,
43 | contact, day, month, campaign, pdays, previous,
44 | poutcome, target,
45 | CASE
46 | WHEN split_field < 0.8 THEN 'training'
47 | WHEN split_field = 0.8 THEN 'evaluation'
48 | WHEN split_field > 0.8 THEN 'prediction'
49 | END AS dataframe
50 | from (
51 | select
52 | age, job, marital, education, 'default', balance, housing, loan,
53 | contact, day, month, campaign, pdays, previous,
54 | poutcome, y as target,
55 | ROUND(ABS(RAND()),1) as split_field
56 | from
57 | `srivatsan-project.bank.bank_marketing` )
58 |
59 | Query 4: Store the data split into new table for using it in model. Creating physical table to keep consistent data sets from random splits
60 |
61 |
62 | CREATE OR REPLACE table `bank.marketing_tab` AS
63 | select
64 | age, job, marital, education, 'default' as derog, balance, housing, loan,
65 | contact, day, month, campaign, pdays, previous,
66 | poutcome, target,
67 | CASE
68 | WHEN split_field < 0.8 THEN 'training'
69 | WHEN split_field = 0.8 THEN 'evaluation'
70 | WHEN split_field > 0.8 THEN 'prediction'
71 | END AS dataframe
72 | from (
73 | select
74 | age, job, marital, education, 'default', balance, housing, loan,
75 | contact, day, month, campaign, pdays, previous,
76 | poutcome, y as target,
77 | ROUND(ABS(RAND()),1) as split_field
78 | from
79 | `srivatsan-project.bank.bank_marketing` )
80 |
81 | Query 5: validate target variable distribution in splits
82 |
83 | select
84 | dataframe, target, count(*)
85 | from `srivatsan-project.bank.marketing_tab`
86 | group by dataframe, target
87 | order by dataframe
88 |
89 | Query 6: Create Logistics Regression model
90 |
91 |
92 | CREATE OR REPLACE MODEL
93 | `bank.marketing_model`
94 | OPTIONS
95 | ( model_type='LOGISTIC_REG',
96 | auto_class_weights=TRUE,
97 | input_label_cols=['target']
98 | ) AS
99 | SELECT
100 | * EXCEPT(dataframe)
101 | FROM
102 | `bank.marketing_tab`
103 | WHERE
104 | dataframe = 'training'
105 |
106 | Query 7: Get Training and Feature Info from trained model
107 |
108 | SELECT
109 | *
110 | FROM
111 | ML.TRAINING_INFO(MODEL `bank.marketing_model`)
112 |
113 | SELECT
114 | *
115 | FROM
116 | ML.FEATURE_INFO(MODEL `bank.marketing_model`)
117 |
118 |
119 | SELECT
120 | *
121 | FROM
122 | ML.WEIGHTS(MODEL `bank.marketing_model`)
123 |
124 | Query 8: Evaluate using the trained model
125 |
126 |
127 | SELECT
128 | *
129 | FROM
130 | ML.EVALUATE (MODEL `bank.marketing_model`,
131 | (
132 | SELECT
133 | *
134 | FROM
135 | `bank.marketing_tab`
136 | WHERE
137 | dataframe = 'evaluation'
138 | )
139 | )
140 |
141 | Query 8: Predict new data using the trained model
142 |
143 |
144 | SELECT
145 | *
146 | FROM
147 | ML.PREDICT (MODEL `bank.marketing_model`,
148 | (
149 | SELECT
150 | *
151 | FROM
152 | `bank.marketing_tab`
153 | WHERE
154 | dataframe = 'prediction'
155 | )
156 | )
157 |
158 |
159 | Query 9: Add feature engineering to the model to increase model performance
160 |
161 |
162 | CREATE OR REPLACE MODEL
163 | `bank.marketing_model_feat`
164 | TRANSFORM(
165 | ML.QUANTILE_BUCKETIZE(age,5) OVER() AS bucketized_age,
166 | ML.FEATURE_CROSS(STRUCT(job, education)) job_education,
167 | marital, balance, housing, loan,
168 | contact, day, month, pdays, previous,
169 | poutcome, target)
170 | OPTIONS
171 | ( model_type='LOGISTIC_REG',
172 | auto_class_weights=TRUE,
173 | input_label_cols=['target']
174 | ) AS
175 | SELECT
176 | * EXCEPT(dataframe, campaign, derog)
177 | FROM
178 | `bank.marketing_tab`
179 | WHERE
180 | dataframe = 'training'
181 |
182 | Query 10: Get training and feature info from newly trained model
183 |
184 | SELECT
185 | *
186 | FROM
187 | ML.TRAINING_INFO(MODEL `bank.marketing_model_feat`)
188 |
189 | SELECT
190 | *
191 | FROM
192 | ML.FEATURE_INFO(MODEL `bank.marketing_model_feat`)
193 |
194 | SELECT
195 | *
196 | FROM
197 | ML.WEIGHTS(MODEL `bank.marketing_model_feat`)
198 |
199 | Query 11: Evaluate the model
200 |
201 |
202 | SELECT
203 | *
204 | FROM
205 | ML.EVALUATE (MODEL `bank.marketing_model_feat`,
206 | (
207 | SELECT
208 | *
209 | FROM
210 | `bank.marketing_tab`
211 | WHERE
212 | dataframe = 'evaluation'
213 | )
214 | )
215 |
216 |
--------------------------------------------------------------------------------
/bigqueryml_datastudio/README.md:
--------------------------------------------------------------------------------
1 |
2 | This repo contains SQL queries for my live session on BigQuery, BigQuery ML and Google Data Studio
3 |
4 | Live session video is available here - https://youtu.be/5l4Qb6Fy3E0
5 |
6 | In below set of SQL queries we will be analyzing SFO bikeshare dataset and as well will be building multiple time series model using BigQuery ML
7 |
8 | Query 1: Query to select limited columns for analysis in data studio
9 |
10 | SELECT
11 | start_date, duration_sec, start_station_name, subscriber_type, zip_code
12 | FROM
13 | `bigquery-public-data.san_francisco_bikeshare.bikeshare_trips`
14 |
15 | Query 2: Select columns for understanding time series pattern
16 |
17 | SELECT
18 | start_station_name,
19 | EXTRACT(DATE from start_date) AS date,
20 | COUNT(*) AS num_trips
21 | FROM
22 | `bigquery-public-data.san_francisco_bikeshare.bikeshare_trips`
23 | GROUP BY start_station_name, date
24 | order by start_station_name, date
25 |
26 | Query 3: Understand minimum and maximum dates grouped by station
27 |
28 | SELECT
29 | start_station_name,
30 | min(EXTRACT(DATE from start_date)) as min_date,
31 | max(EXTRACT(DATE from start_date)) as max_date
32 | FROM
33 | `bigquery-public-data.san_francisco_bikeshare.bikeshare_trips`
34 | WHERE
35 | start_station_name IN ('Harry Bridges Plaza (Ferry Building)','Embarcadero at Sansome','2nd at Townsend')
36 | group by start_station_name
37 |
38 | Query 4: Create multiple time series model using SFO bikeshare data
39 |
40 | CREATE OR REPLACE MODEL bike_share_ml.sfo_bike
41 | OPTIONS
42 | (model_type = 'ARIMA',
43 | time_series_timestamp_col = 'date',
44 | time_series_data_col = 'num_trips',
45 | time_series_id_col = 'start_station_name'
46 | ) AS
47 | SELECT
48 | start_station_name,
49 | EXTRACT(DATE from start_date) AS date,
50 | COUNT(*) AS num_trips
51 | FROM
52 | `bigquery-public-data.san_francisco_bikeshare.bikeshare_trips`
53 | WHERE
54 | start_station_name IN ('Harry Bridges Plaza (Ferry Building)','Embarcadero at Sansome','2nd at Townsend') AND
55 | EXTRACT(DATE from start_date) <= '2016-07-31'
56 | GROUP BY start_station_name, date
57 |
58 | Query 5: Evaluate the developed model
59 |
60 | SELECT
61 | *
62 | FROM
63 | ML.EVALUATE(MODEL `bike_share_ml.sfo_bike`)
64 |
65 | Query 6: Understand trained model co-efficients
66 |
67 | SELECT
68 | *
69 | FROM
70 | ML.ARIMA_COEFFICIENTS(MODEL `bike_share_ml.sfo_bike`)
71 |
72 |
73 | Query 7: Forecast for 3 future time period
74 |
75 | SELECT
76 | *
77 | FROM
78 | ML.FORECAST(MODEL `bike_share_ml.sfo_bike`,
79 | STRUCT(3 AS horizon, 0.9 AS confidence_level))
80 |
81 | Query 8: Combine historical data and future 365 days forecast for visualization
82 |
83 | SELECT
84 | start_station_name,
85 | date AS timestamp,
86 | num_trips AS history_value,
87 | NULL AS forecast_value,
88 | NULL AS prediction_interval_lower_bound,
89 | NULL AS prediction_interval_upper_bound
90 | FROM
91 | (
92 | SELECT
93 | start_station_name,
94 | EXTRACT(DATE from start_date) AS date,
95 | COUNT(*) AS num_trips
96 | FROM
97 | `bigquery-public-data.san_francisco_bikeshare.bikeshare_trips`
98 | WHERE
99 | start_station_name IN ('Harry Bridges Plaza (Ferry Building)','Embarcadero at Sansome','2nd at Townsend')
100 | GROUP BY start_station_name, date
101 | )
102 | UNION ALL
103 | SELECT
104 | start_station_name,
105 | EXTRACT(DATE from forecast_timestamp) AS timestamp,
106 | NULL AS history_value,
107 | forecast_value,
108 | prediction_interval_lower_bound,
109 | prediction_interval_upper_bound
110 | FROM
111 | ML.FORECAST(MODEL `bike_share_ml.sfo_bike`,
112 | STRUCT(365 AS horizon, 0.9 AS confidence_level))
113 | WHERE
114 | start_station_name IN ('Harry Bridges Plaza (Ferry Building)','Embarcadero at Sansome','2nd at Townsend')
115 |
--------------------------------------------------------------------------------
/gke_autopilot/Dockerfile:
--------------------------------------------------------------------------------
1 | # lightweight python
2 | FROM python:3.7-slim
3 |
4 | RUN apt-get update
5 |
6 | # Copy local code to the container image.
7 | ENV APP_HOME /app
8 | WORKDIR $APP_HOME
9 | COPY . ./
10 |
11 | RUN ls -la $APP_HOME/
12 |
13 | # Install dependencies
14 | RUN pip install -r requirements.txt
15 |
16 | # Run the streamlit on container startup
17 | CMD [ "streamlit", "run","imgwebapp.py" ]
--------------------------------------------------------------------------------
/gke_autopilot/README.md:
--------------------------------------------------------------------------------
1 | TBF
2 |
--------------------------------------------------------------------------------
/gke_autopilot/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: imgclass
5 | spec:
6 | replicas: 2
7 | selector:
8 | matchLabels:
9 | app: imageclassifier
10 | template:
11 | metadata:
12 | labels:
13 | app: imageclassifier
14 | spec:
15 | containers:
16 | - name: cv-app
17 | image: gcr.io/srivatsan-project/imgclassifier
18 | ports:
19 | - containerPort: 8501
--------------------------------------------------------------------------------
/gke_autopilot/imgwebapp.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.simplefilter(action='ignore', category=FutureWarning)
3 |
4 | import tensorflow as tf
5 | import numpy as np
6 | import streamlit as st
7 | from PIL import Image
8 | import requests
9 | from io import BytesIO
10 |
11 | st.set_option('deprecation.showfileUploaderEncoding', False)
12 | st.title("Bean Image Classifier")
13 | st.text("Provide URL of bean Image for image classification")
14 |
15 | @st.cache(allow_output_mutation=True)
16 | def load_model():
17 | model = tf.keras.models.load_model('./models')
18 | return model
19 |
20 | with st.spinner('Loading Model Into Memory....'):
21 | model = load_model()
22 |
23 | classes=['angular_leaf_spot','bean_rust','healthy']
24 |
25 | def scale(image):
26 | image = tf.cast(image, tf.float32)
27 | image /= 255.0
28 |
29 | return tf.image.resize(image,[224,224])
30 |
31 | def decode_img(image):
32 | img = tf.image.decode_jpeg(image, channels=3)
33 | img = scale(img)
34 | return np.expand_dims(img, axis=0)
35 |
36 | #path = st.text_input('Enter Image URL to Classify.. ','http://barmac.com.au/wp-content/uploads/sites/3/2016/01/Angular-Leaf-Spot-Beans1.jpg')
37 | img_file_buffer = st.file_uploader("Upload Image to Classify....")
38 |
39 | if img_file_buffer is not None:
40 | image = img_file_buffer
41 | image_out = Image.open(img_file_buffer)
42 | image = image.getvalue()
43 | else:
44 | test_image = 'http://barmac.com.au/wp-content/uploads/sites/3/2016/01/Angular-Leaf-Spot-Beans1.jpg'
45 | image = requests.get(test_image).content
46 | image_out = Image.open(BytesIO(image))
47 |
48 | st.write("Predicted Class :")
49 | with st.spinner('classifying.....'):
50 | label =np.argmax(model.predict(decode_img(image)),axis=1)
51 | st.write(classes[label[0]])
52 | st.write("")
53 | st.image(image_out, caption='Classifying Bean Image', use_column_width=True)
--------------------------------------------------------------------------------
/gke_autopilot/models/assets/README.md:
--------------------------------------------------------------------------------
1 | TBF
2 |
--------------------------------------------------------------------------------
/gke_autopilot/models/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/srivatsan88/google_cloud_AI_ML/1ef1fdb2208cb5db66d43a39a46ffb3597255e0c/gke_autopilot/models/saved_model.pb
--------------------------------------------------------------------------------
/gke_autopilot/models/variables/variables.data-00000-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/srivatsan88/google_cloud_AI_ML/1ef1fdb2208cb5db66d43a39a46ffb3597255e0c/gke_autopilot/models/variables/variables.data-00000-of-00002
--------------------------------------------------------------------------------
/gke_autopilot/models/variables/variables.data-00001-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/srivatsan88/google_cloud_AI_ML/1ef1fdb2208cb5db66d43a39a46ffb3597255e0c/gke_autopilot/models/variables/variables.data-00001-of-00002
--------------------------------------------------------------------------------
/gke_autopilot/models/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/srivatsan88/google_cloud_AI_ML/1ef1fdb2208cb5db66d43a39a46ffb3597255e0c/gke_autopilot/models/variables/variables.index
--------------------------------------------------------------------------------
/gke_autopilot/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow
2 | streamlit
3 | pillow
4 | numpy
5 | requests
6 |
7 | #Run this file with - pip3 install -r requirements.txt
--------------------------------------------------------------------------------
/gke_autopilot/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: imageclassifier
5 | spec:
6 | type: LoadBalancer
7 | selector:
8 | app: imageclassifier
9 | ports:
10 | - port: 80
11 | targetPort: 8501
--------------------------------------------------------------------------------