├── Cloud_AutoML_Vision ├── Data Preparation.ipynb ├── Kaggle Data Download.ipynb ├── README.md └── predict.ipynb ├── GCP_Serverless_AppEngine ├── ComplaintsFlask.py ├── ComplaintsServer.py ├── Dockerfile ├── README.md ├── Text_Classification_using_TFIDF_AutoML_scikit_learn.ipynb ├── app.yaml ├── preprocessing │ └── functions.py └── requirements.txt ├── Google_Kubernetes_Engine ├── ComplaintsFlask.py ├── Dockerfile ├── README.md ├── Text_Classification_using_TFIDF_AutoML_scikit_learn.ipynb ├── deployment.yaml ├── preprocessing │ └── functions.py ├── requirements.txt └── service.yaml ├── README.md ├── Telecom_churn_AI_Platform ├── Churn_model.ipynb ├── README.md ├── data │ ├── README.md │ └── telecom_customer_churn.csv ├── input.json ├── predict_setup.ipynb ├── predictor.py ├── serving.ipynb └── setup.py ├── bigquery_logisticsregression └── README.md ├── bigqueryml_datastudio └── README.md └── gke_autopilot ├── Dockerfile ├── README.md ├── deployment.yaml ├── imgwebapp.py ├── models ├── assets │ └── README.md ├── saved_model.pb └── variables │ ├── variables.data-00000-of-00002 │ ├── variables.data-00001-of-00002 │ └── variables.index ├── requirements.txt └── service.yaml /Cloud_AutoML_Vision/Data Preparation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 36, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 37, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "df=pd.read_csv('train.csv')" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 38, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "

\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "

	image_id	label
0	1000015157.jpg	0
1	1000201771.jpg	3
2	100042118.jpg	1
3	1000723321.jpg	1
4	1000812911.jpg	3
...	...	...
21392	999068805.jpg	3
21393	999329392.jpg	3
21394	999474432.jpg	1
21395	999616605.jpg	4
21396	999998473.jpg	4

\n", 109 | "

21397 rows × 2 columns

\n", 110 | "

" 111 | ], 112 | "text/plain": [ 113 | " image_id label\n", 114 | "0 1000015157.jpg 0\n", 115 | "1 1000201771.jpg 3\n", 116 | "2 100042118.jpg 1\n", 117 | "3 1000723321.jpg 1\n", 118 | "4 1000812911.jpg 3\n", 119 | "... ... ...\n", 120 | "21392 999068805.jpg 3\n", 121 | "21393 999329392.jpg 3\n", 122 | "21394 999474432.jpg 1\n", 123 | "21395 999616605.jpg 4\n", 124 | "21396 999998473.jpg 4\n", 125 | "\n", 126 | "[21397 rows x 2 columns]" 127 | ] 128 | }, 129 | "execution_count": 38, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "df" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "label ={0:'CBB', 1:'CBSD', 2:'CGM', 3:'CMD', 4:'Healthy'}" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 39, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "df['image_path'] = 'gs://srivatsan-project-vcm/train_images/' + df['image_id']" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 40, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "data": { 163 | "text/html": [ 164 | "

\n", 165 | "\n", 178 | "\n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | "

	image_id	label	image_path
0	1000015157.jpg	0	gs://srivatsan-project-vcm/train_images/100001...
1	1000201771.jpg	3	gs://srivatsan-project-vcm/train_images/100020...
2	100042118.jpg	1	gs://srivatsan-project-vcm/train_images/100042...
3	1000723321.jpg	1	gs://srivatsan-project-vcm/train_images/100072...
4	1000812911.jpg	3	gs://srivatsan-project-vcm/train_images/100081...
...	...	...	...
21392	999068805.jpg	3	gs://srivatsan-project-vcm/train_images/999068...
21393	999329392.jpg	3	gs://srivatsan-project-vcm/train_images/999329...
21394	999474432.jpg	1	gs://srivatsan-project-vcm/train_images/999474...
21395	999616605.jpg	4	gs://srivatsan-project-vcm/train_images/999616...
21396	999998473.jpg	4	gs://srivatsan-project-vcm/train_images/999998...

\n", 256 | "

21397 rows × 3 columns

\n", 257 | "

" 258 | ], 259 | "text/plain": [ 260 | " image_id label \\\n", 261 | "0 1000015157.jpg 0 \n", 262 | "1 1000201771.jpg 3 \n", 263 | "2 100042118.jpg 1 \n", 264 | "3 1000723321.jpg 1 \n", 265 | "4 1000812911.jpg 3 \n", 266 | "... ... ... \n", 267 | "21392 999068805.jpg 3 \n", 268 | "21393 999329392.jpg 3 \n", 269 | "21394 999474432.jpg 1 \n", 270 | "21395 999616605.jpg 4 \n", 271 | "21396 999998473.jpg 4 \n", 272 | "\n", 273 | " image_path \n", 274 | "0 gs://srivatsan-project-vcm/train_images/100001... \n", 275 | "1 gs://srivatsan-project-vcm/train_images/100020... \n", 276 | "2 gs://srivatsan-project-vcm/train_images/100042... \n", 277 | "3 gs://srivatsan-project-vcm/train_images/100072... \n", 278 | "4 gs://srivatsan-project-vcm/train_images/100081... \n", 279 | "... ... \n", 280 | "21392 gs://srivatsan-project-vcm/train_images/999068... \n", 281 | "21393 gs://srivatsan-project-vcm/train_images/999329... \n", 282 | "21394 gs://srivatsan-project-vcm/train_images/999474... \n", 283 | "21395 gs://srivatsan-project-vcm/train_images/999616... \n", 284 | "21396 gs://srivatsan-project-vcm/train_images/999998... \n", 285 | "\n", 286 | "[21397 rows x 3 columns]" 287 | ] 288 | }, 289 | "execution_count": 40, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "df" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 41, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "df=df.replace({'label':label})" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 42, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/html": [ 315 | "

\n", 316 | "\n", 329 | "\n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | "

	image_id	label	image_path
0	1000015157.jpg	CBB	gs://srivatsan-project-vcm/train_images/100001...
1	1000201771.jpg	CMD	gs://srivatsan-project-vcm/train_images/100020...
2	100042118.jpg	CBSD	gs://srivatsan-project-vcm/train_images/100042...
3	1000723321.jpg	CBSD	gs://srivatsan-project-vcm/train_images/100072...
4	1000812911.jpg	CMD	gs://srivatsan-project-vcm/train_images/100081...
...	...	...	...
21392	999068805.jpg	CMD	gs://srivatsan-project-vcm/train_images/999068...
21393	999329392.jpg	CMD	gs://srivatsan-project-vcm/train_images/999329...
21394	999474432.jpg	CBSD	gs://srivatsan-project-vcm/train_images/999474...
21395	999616605.jpg	Healthy	gs://srivatsan-project-vcm/train_images/999616...
21396	999998473.jpg	Healthy	gs://srivatsan-project-vcm/train_images/999998...

\n", 407 | "

21397 rows × 3 columns

\n", 408 | "

" 409 | ], 410 | "text/plain": [ 411 | " image_id label \\\n", 412 | "0 1000015157.jpg CBB \n", 413 | "1 1000201771.jpg CMD \n", 414 | "2 100042118.jpg CBSD \n", 415 | "3 1000723321.jpg CBSD \n", 416 | "4 1000812911.jpg CMD \n", 417 | "... ... ... \n", 418 | "21392 999068805.jpg CMD \n", 419 | "21393 999329392.jpg CMD \n", 420 | "21394 999474432.jpg CBSD \n", 421 | "21395 999616605.jpg Healthy \n", 422 | "21396 999998473.jpg Healthy \n", 423 | "\n", 424 | " image_path \n", 425 | "0 gs://srivatsan-project-vcm/train_images/100001... \n", 426 | "1 gs://srivatsan-project-vcm/train_images/100020... \n", 427 | "2 gs://srivatsan-project-vcm/train_images/100042... \n", 428 | "3 gs://srivatsan-project-vcm/train_images/100072... \n", 429 | "4 gs://srivatsan-project-vcm/train_images/100081... \n", 430 | "... ... \n", 431 | "21392 gs://srivatsan-project-vcm/train_images/999068... \n", 432 | "21393 gs://srivatsan-project-vcm/train_images/999329... \n", 433 | "21394 gs://srivatsan-project-vcm/train_images/999474... \n", 434 | "21395 gs://srivatsan-project-vcm/train_images/999616... \n", 435 | "21396 gs://srivatsan-project-vcm/train_images/999998... \n", 436 | "\n", 437 | "[21397 rows x 3 columns]" 438 | ] 439 | }, 440 | "execution_count": 42, 441 | "metadata": {}, 442 | "output_type": "execute_result" 443 | } 444 | ], 445 | "source": [ 446 | "df" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 43, 452 | "metadata": {}, 453 | "outputs": [ 454 | { 455 | "data": { 456 | "text/plain": [ 457 | "CMD 13158\n", 458 | "Healthy 2577\n", 459 | "CGM 2386\n", 460 | "CBSD 2189\n", 461 | "CBB 1087\n", 462 | "Name: label, dtype: int64" 463 | ] 464 | }, 465 | "execution_count": 43, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "df['label'].value_counts()" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 44, 477 | "metadata": {}, 478 | "outputs": [ 479 | { 480 | "data": { 481 | "text/html": [ 482 | "

\n", 483 | "\n", 496 | "\n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | "

	image_id	label	image_path
0	1000015157.jpg	CBB	gs://srivatsan-project-vcm/train_images/100001...
1	1000201771.jpg	CMD	gs://srivatsan-project-vcm/train_images/100020...
2	100042118.jpg	CBSD	gs://srivatsan-project-vcm/train_images/100042...
3	1000723321.jpg	CBSD	gs://srivatsan-project-vcm/train_images/100072...
4	1000812911.jpg	CMD	gs://srivatsan-project-vcm/train_images/100081...
...	...	...	...
21392	999068805.jpg	CMD	gs://srivatsan-project-vcm/train_images/999068...
21393	999329392.jpg	CMD	gs://srivatsan-project-vcm/train_images/999329...
21394	999474432.jpg	CBSD	gs://srivatsan-project-vcm/train_images/999474...
21395	999616605.jpg	Healthy	gs://srivatsan-project-vcm/train_images/999616...
21396	999998473.jpg	Healthy	gs://srivatsan-project-vcm/train_images/999998...

\n", 574 | "

21397 rows × 3 columns

\n", 575 | "

" 576 | ], 577 | "text/plain": [ 578 | " image_id label \\\n", 579 | "0 1000015157.jpg CBB \n", 580 | "1 1000201771.jpg CMD \n", 581 | "2 100042118.jpg CBSD \n", 582 | "3 1000723321.jpg CBSD \n", 583 | "4 1000812911.jpg CMD \n", 584 | "... ... ... \n", 585 | "21392 999068805.jpg CMD \n", 586 | "21393 999329392.jpg CMD \n", 587 | "21394 999474432.jpg CBSD \n", 588 | "21395 999616605.jpg Healthy \n", 589 | "21396 999998473.jpg Healthy \n", 590 | "\n", 591 | " image_path \n", 592 | "0 gs://srivatsan-project-vcm/train_images/100001... \n", 593 | "1 gs://srivatsan-project-vcm/train_images/100020... \n", 594 | "2 gs://srivatsan-project-vcm/train_images/100042... \n", 595 | "3 gs://srivatsan-project-vcm/train_images/100072... \n", 596 | "4 gs://srivatsan-project-vcm/train_images/100081... \n", 597 | "... ... \n", 598 | "21392 gs://srivatsan-project-vcm/train_images/999068... \n", 599 | "21393 gs://srivatsan-project-vcm/train_images/999329... \n", 600 | "21394 gs://srivatsan-project-vcm/train_images/999474... \n", 601 | "21395 gs://srivatsan-project-vcm/train_images/999616... \n", 602 | "21396 gs://srivatsan-project-vcm/train_images/999998... \n", 603 | "\n", 604 | "[21397 rows x 3 columns]" 605 | ] 606 | }, 607 | "execution_count": 44, 608 | "metadata": {}, 609 | "output_type": "execute_result" 610 | } 611 | ], 612 | "source": [ 613 | "df" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 45, 619 | "metadata": {}, 620 | "outputs": [], 621 | "source": [ 622 | "df[['image_path','label']].to_csv('labels.csv', index=False, header=False)" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": 46, 628 | "metadata": {}, 629 | "outputs": [ 630 | { 631 | "name": "stdout", 632 | "output_type": "stream", 633 | "text": [ 634 | "Copying file://labels.csv [Content-Type=text/csv]...\n", 635 | "/ [1 files][ 1.2 MiB/ 1.2 MiB] \n", 636 | "Operation completed over 1 objects/1.2 MiB. \n" 637 | ] 638 | } 639 | ], 640 | "source": [ 641 | "!gsutil cp labels.csv gs://srivatsan-project-vcm/" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": null, 647 | "metadata": {}, 648 | "outputs": [], 649 | "source": [] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": null, 654 | "metadata": {}, 655 | "outputs": [], 656 | "source": [] 657 | }, 658 | { 659 | "cell_type": "code", 660 | "execution_count": null, 661 | "metadata": {}, 662 | "outputs": [], 663 | "source": [] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": null, 668 | "metadata": {}, 669 | "outputs": [], 670 | "source": [] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "execution_count": null, 675 | "metadata": {}, 676 | "outputs": [], 677 | "source": [] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "execution_count": null, 682 | "metadata": {}, 683 | "outputs": [], 684 | "source": [] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": null, 689 | "metadata": {}, 690 | "outputs": [], 691 | "source": [] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": null, 696 | "metadata": {}, 697 | "outputs": [], 698 | "source": [] 699 | }, 700 | { 701 | "cell_type": "code", 702 | "execution_count": null, 703 | "metadata": {}, 704 | "outputs": [], 705 | "source": [ 706 | "!curl -X GET -H \"Authorization: Bearer \"$(gcloud auth application-default print-access-token) https://automl.googleapis.com/v1/projects/srivatsan-project/locations/us-central1/operations/8525331686798393344" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": null, 712 | "metadata": {}, 713 | "outputs": [], 714 | "source": [] 715 | } 716 | ], 717 | "metadata": { 718 | "environment": { 719 | "name": "common-cpu.m59", 720 | "type": "gcloud", 721 | "uri": "gcr.io/deeplearning-platform-release/base-cpu:m59" 722 | }, 723 | "kernelspec": { 724 | "display_name": "Python 3", 725 | "language": "python", 726 | "name": "python3" 727 | }, 728 | "language_info": { 729 | "codemirror_mode": { 730 | "name": "ipython", 731 | "version": 3 732 | }, 733 | "file_extension": ".py", 734 | "mimetype": "text/x-python", 735 | "name": "python", 736 | "nbconvert_exporter": "python", 737 | "pygments_lexer": "ipython3", 738 | "version": "3.7.8" 739 | } 740 | }, 741 | "nbformat": 4, 742 | "nbformat_minor": 4 743 | } 744 | -------------------------------------------------------------------------------- /Cloud_AutoML_Vision/README.md: -------------------------------------------------------------------------------- 1 | This repository contains code and steps to train a image classifier model on Google Cloud AutoML Vision 2 | 3 | Cloud ML Vision is trained on Kaggle Cassava Leaf Disease detection dataset - https://www.kaggle.com/c/cassava-leaf-disease-classification 4 | 5 | Kaggle Data Download.ipynb - contains code to pull data from Kaggle and move the data to google cloud storage 6 | 7 | Data Preparation.ipynb - contains code to prepare csv file containing image file and label information. This file will be used for training Cloud AutoML Vision models 8 | 9 | On how to use google cloud UI to create AutoML vision model you can check this video - https://youtu.be/XZMU9uNbQvs 10 | 11 | Once model is train you can deploy it and use predict.ipynb file to predict new instances of input 12 | 13 | -------------------------------------------------------------------------------- /Cloud_AutoML_Vision/predict.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 12, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from google.cloud import aiplatform\n", 10 | "\n", 11 | "def predict_image_classification_sample(\n", 12 | " endpoint: str, instance: dict, parameters_dict: dict\n", 13 | "):\n", 14 | " client_options = dict(api_endpoint=\"us-central1-prediction-aiplatform.googleapis.com\")\n", 15 | " client = aiplatform.PredictionServiceClient(client_options=client_options)\n", 16 | " from google.protobuf import json_format\n", 17 | " from google.protobuf.struct_pb2 import Value\n", 18 | "\n", 19 | " # See gs://google-cloud-aiplatform/schema/predict/params/image_classification_1.0.0.yaml for the format of the parameters.\n", 20 | " parameters = json_format.ParseDict(parameters_dict, Value())\n", 21 | "\n", 22 | " # See gs://google-cloud-aiplatform/schema/predict/instance/image_classification_1.0.0.yaml for the format of the instances.\n", 23 | " instances_list = [instance]\n", 24 | " instances = [json_format.ParseDict(s, Value()) for s in instances_list]\n", 25 | " response = client.predict(\n", 26 | " endpoint=endpoint, instances=instances, parameters=parameters\n", 27 | " )\n", 28 | "\n", 29 | " print(\"response\")\n", 30 | " print(\" deployed_model_id:\", response.deployed_model_id)\n", 31 | " predictions = response.predictions\n", 32 | " print(\"predictions\")\n", 33 | " for prediction in predictions:\n", 34 | " # See gs://google-cloud-aiplatform/schema/predict/prediction/classification_1.0.0.yaml for the format of the predictions.\n", 35 | " print(\" prediction:\", dict(prediction))" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 13, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "import base64\n", 45 | "\n", 46 | "with open('test_images/2216849948.jpg', \"rb\") as imageFile:\n", 47 | " str = base64.b64encode(imageFile.read())" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 30, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "from google.cloud import automl\n", 57 | "\n", 58 | "def predict():\n", 59 | " \"\"\"Predict.\"\"\"\n", 60 | " # [START automl_vision_classification_predict]\n", 61 | "\n", 62 | " project_id = \"srivatsan-project\"\n", 63 | " model_id = \"cassava\"\n", 64 | "\n", 65 | " file_path = \"test_images/2216849948.jpg\"\n", 66 | "\n", 67 | " prediction_client = automl.PredictionServiceClient()\n", 68 | "\n", 69 | " # Get the full path of the model.\n", 70 | " model_full_id = automl.AutoMlClient.model_path(\n", 71 | " project_id, \"us-central1\", model_id\n", 72 | " )\n", 73 | "\n", 74 | " # Read the file.\n", 75 | " with open(file_path, \"rb\") as content_file:\n", 76 | " content = content_file.read()\n", 77 | "\n", 78 | " image = automl.Image(image_bytes=content)\n", 79 | " payload = automl.ExamplePayload(image=image)\n", 80 | "\n", 81 | " # params is additional domain-specific parameters.\n", 82 | " # score_threshold is used to filter the result\n", 83 | " # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#predictrequest\n", 84 | " params = {\"score_threshold\": \"0.8\"}\n", 85 | "\n", 86 | " request = automl.PredictRequest(\n", 87 | " name=model_full_id,\n", 88 | " payload=payload,\n", 89 | " params=params\n", 90 | " )\n", 91 | " response = prediction_client.predict(request=request)\n", 92 | "\n", 93 | " print(\"Prediction results:\")\n", 94 | " for result in response.payload:\n", 95 | " print(\"Predicted class name: {}\".format(result.display_name))\n", 96 | " print(\"Predicted class score: {}\".format(result.classification.score))" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 31, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "ename": "NotFound", 106 | "evalue": "404 Invalid resource ID", 107 | "output_type": "error", 108 | "traceback": [ 109 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 110 | "\u001b[0;31m_InactiveRpcError\u001b[0m Traceback (most recent call last)", 111 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/google/api_core/grpc_helpers.py\u001b[0m in \u001b[0;36merror_remapped_callable\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 57\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcallable_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 58\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mgrpc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRpcError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 112 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/grpc/_channel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, request, timeout, metadata, credentials, wait_for_ready, compression)\u001b[0m\n\u001b[1;32m 922\u001b[0m wait_for_ready, compression)\n\u001b[0;32m--> 923\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_end_unary_response_blocking\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcall\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 924\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 113 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/grpc/_channel.py\u001b[0m in \u001b[0;36m_end_unary_response_blocking\u001b[0;34m(state, call, with_call, deadline)\u001b[0m\n\u001b[1;32m 825\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 826\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0m_InactiveRpcError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 827\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 114 | "\u001b[0;31m_InactiveRpcError\u001b[0m: <_InactiveRpcError of RPC that terminated with:\n\tstatus = StatusCode.NOT_FOUND\n\tdetails = \"Invalid resource ID\"\n\tdebug_error_string = \"{\"created\":\"@1608932170.899453921\",\"description\":\"Error received from peer ipv4:74.125.124.95:443\",\"file\":\"src/core/lib/surface/call.cc\",\"file_line\":1062,\"grpc_message\":\"Invalid resource ID\",\"grpc_status\":5}\"\n>", 115 | "\nThe above exception was the direct cause of the following exception:\n", 116 | "\u001b[0;31mNotFound\u001b[0m Traceback (most recent call last)", 117 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 118 | "\u001b[0;32m\u001b[0m in \u001b[0;36mpredict\u001b[0;34m()\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m )\n\u001b[0;32m---> 36\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprediction_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 37\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Prediction results:\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 119 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/google/cloud/automl_v1/services/prediction_service/client.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, request, name, payload, params, retry, timeout, metadata)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[0;31m# Send the request.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 413\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrpc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretry\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mretry\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetadata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetadata\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 414\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[0;31m# Done; return the response.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 120 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/google/api_core/gapic_v1/method.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"metadata\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmetadata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 145\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapped_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 146\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 121 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/google/api_core/grpc_helpers.py\u001b[0m in \u001b[0;36merror_remapped_callable\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcallable_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mgrpc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRpcError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_grpc_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0merror_remapped_callable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 122 | "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/six.py\u001b[0m in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n", 123 | "\u001b[0;31mNotFound\u001b[0m: 404 Invalid resource ID" 124 | ] 125 | } 126 | ], 127 | "source": [ 128 | "predict()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [] 137 | } 138 | ], 139 | "metadata": { 140 | "environment": { 141 | "name": "common-cpu.m59", 142 | "type": "gcloud", 143 | "uri": "gcr.io/deeplearning-platform-release/base-cpu:m59" 144 | }, 145 | "kernelspec": { 146 | "display_name": "Python 3", 147 | "language": "python", 148 | "name": "python3" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.7.8" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 4 165 | } 166 | -------------------------------------------------------------------------------- /GCP_Serverless_AppEngine/ComplaintsFlask.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, jsonify, request 2 | from preprocessing.functions import tokenize 3 | import xgboost as xgb 4 | import joblib 5 | from healthcheck import HealthCheck 6 | 7 | import os 8 | import logging 9 | 10 | logging.basicConfig(format='%(message)s', level=logging.INFO) 11 | app = Flask(__name__) 12 | 13 | target={0:'Debt collection', 1:'Credit card or prepaid card', 2:'Mortgage', 14 | 3:'Checking or savings account', 4:'Student loan', 15 | 5:'Vehicle loan or lease'} 16 | 17 | tfvectorizer = joblib.load('models/tfvectroizer.pkl') 18 | xgb_clf = xgb.Booster({'nthread': 3}) 19 | xgb_clf.load_model('models/complaints.booster') 20 | 21 | logging.info('All models loaded succcessfully') 22 | 23 | health = HealthCheck(app, "/hcheck") 24 | 25 | def howami(): 26 | return True, "I am alive. Thanks for checking.." 27 | 28 | health.add_check(howami) 29 | 30 | def scorer(text): 31 | encoded_text = tfvectorizer.transform([text]) 32 | score = xgb_clf.predict(xgb.DMatrix(encoded_text)) 33 | return score 34 | 35 | @app.route('/score', methods=['POST']) 36 | def predict_fn(): 37 | text = request.get_json()['text'] 38 | logging.info('Received incoming message - '+ text) 39 | predictions = scorer(text) 40 | predictions = predictions.argmax(axis=1)[0] 41 | return jsonify({'predictions ': str(predictions), 'Category ': target.get(predictions)}) 42 | 43 | @app.route('/') 44 | def hello(): 45 | return 'Welcome to Complaints Prediction Application' 46 | 47 | if __name__ == "__main__": 48 | app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000))) -------------------------------------------------------------------------------- /GCP_Serverless_AppEngine/ComplaintsServer.py: -------------------------------------------------------------------------------- 1 | from ComplaintsFlask import app 2 | 3 | if __name__ == "__main__": 4 | app.run() -------------------------------------------------------------------------------- /GCP_Serverless_AppEngine/Dockerfile: -------------------------------------------------------------------------------- 1 | # lightweight python 2 | FROM python:3.7-slim 3 | 4 | RUN apt-get update && apt-get install -y libgomp1 5 | 6 | # Copy local code to the container image. 7 | ENV APP_HOME /app 8 | WORKDIR $APP_HOME 9 | COPY . ./ 10 | 11 | RUN ls -la $APP_HOME/ 12 | 13 | ENV NLTK_DATA $APP_HOME/nltk_data 14 | 15 | # Install dependencies 16 | RUN pip install -r requirements.txt 17 | 18 | ENV PORT 5000 19 | 20 | # Run the flask service on container startup 21 | #CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 ComplaintsServer 22 | CMD [ "python", "ComplaintsFlask.py" ] 23 | -------------------------------------------------------------------------------- /GCP_Serverless_AppEngine/README.md: -------------------------------------------------------------------------------- 1 | This repository contains details and code for deploying machine learning models on Google Cloud Serverless platform and Google Cloud App Engine 2 | 3 | Details of the code and working is covered in detailed in my YouTube channel (AIEngineering) here - https://youtu.be/kyQH71pB0vI 4 | 5 | Before getting started with deployment the container expects trained models and also downloaded nltk corpus 6 | 7 | For model files you are run associated notebook in this repository or else download the trained models and use it using below 2 command 8 | 9 | wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1m1bVxlUjrJ_tmWApYJHlk2q5bikGyIxr' -O complaints.booster 10 | 11 | wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1WURdboJjF27g9bZG_CGCCGSYWi0EvxJR' -O tfvectroizer.pkl 12 | 13 | If you want more details on model training you can check the video here - https://youtu.be/EHt_x8r1exU 14 | 15 | For NLTK you have to download stopwords and tokenizer corpa. One can download it using nltk.download() 16 | 17 | 18 | To deploy the model on serverless infrastructure (Cloud Run), execute the below commands 19 | ------------------------------------------------------------------------------------------ 20 | 21 | Building the container image - gcloud builds submit --tag gcr.io//complaintsapi . 22 | 23 | List the image - gcloud builds list --filter complaints 24 | 25 | Checking logs of built image - gcloud builds log 26 | 27 | Deploy the container on google cloud run - gcloud run deploy complaintsapi --image gcr.io//complaintsapi --platform managed --memory 1G 28 | 29 | To deploy the model on App Engine run below commands 30 | ----------------------------------------------------- 31 | 32 | gcloud app create 33 | 34 | gcloud app deploy 35 | -------------------------------------------------------------------------------- /GCP_Serverless_AppEngine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex -------------------------------------------------------------------------------- /GCP_Serverless_AppEngine/preprocessing/functions.py: -------------------------------------------------------------------------------- 1 | import re 2 | import nltk 3 | 4 | stemmer = nltk.stem.SnowballStemmer('english') 5 | stop_words = set(nltk.corpus.stopwords.words('english')) 6 | 7 | def tokenize(text): 8 | tokens = [word for word in nltk.word_tokenize(text) if (len(word) > 3 and len(word.strip('Xx/')) > 2 and len(re.sub('\d+', '', word.strip('Xx/'))) > 3) ] 9 | tokens = map(str.lower, tokens) 10 | stems = [stemmer.stem(item) for item in tokens if (item not in stop_words)] 11 | return stems 12 | -------------------------------------------------------------------------------- /GCP_Serverless_AppEngine/requirements.txt: -------------------------------------------------------------------------------- 1 | flask 2 | scikit-learn==0.22 3 | xgboost==0.90 4 | nltk 5 | joblib 6 | gunicorn 7 | healthcheck 8 | six 9 | 10 | #Run this file with - pip3 install -r requirements.txt -------------------------------------------------------------------------------- /Google_Kubernetes_Engine/ComplaintsFlask.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, jsonify, request 2 | from preprocessing.functions import tokenize 3 | import xgboost as xgb 4 | import joblib 5 | from healthcheck import HealthCheck 6 | 7 | import os 8 | import logging 9 | 10 | logging.basicConfig(format='%(message)s', level=logging.INFO) 11 | app = Flask(__name__) 12 | 13 | target={0:'Debt collection', 1:'Credit card or prepaid card', 2:'Mortgage', 14 | 3:'Checking or savings account', 4:'Student loan', 15 | 5:'Vehicle loan or lease'} 16 | 17 | tfvectorizer = joblib.load('models/tfvectroizer.pkl') 18 | xgb_clf = xgb.Booster({'nthread': 3}) 19 | xgb_clf.load_model('models/complaints.booster') 20 | 21 | logging.info('All models loaded succcessfully') 22 | 23 | health = HealthCheck(app, "/hcheck") 24 | 25 | def howami(): 26 | return True, "I am alive. Thanks for checking.." 27 | 28 | health.add_check(howami) 29 | 30 | def scorer(text): 31 | encoded_text = tfvectorizer.transform([text]) 32 | score = xgb_clf.predict(xgb.DMatrix(encoded_text)) 33 | return score 34 | 35 | @app.route('/score', methods=['POST']) 36 | def predict_fn(): 37 | text = request.get_json()['text'] 38 | logging.info('Received incoming message - '+ text) 39 | predictions = scorer(text) 40 | predictions = predictions.argmax(axis=1)[0] 41 | return jsonify({'predictions ': str(predictions), 'Category ': target.get(predictions)}) 42 | 43 | @app.route('/') 44 | def hello(): 45 | return 'Welcome to Complaints Prediction Application' 46 | 47 | if __name__ == "__main__": 48 | app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000))) -------------------------------------------------------------------------------- /Google_Kubernetes_Engine/Dockerfile: -------------------------------------------------------------------------------- 1 | # lightweight python 2 | FROM python:3.7-slim 3 | 4 | RUN apt-get update && apt-get install -y libgomp1 5 | 6 | # Copy local code to the container image. 7 | ENV APP_HOME /app 8 | WORKDIR $APP_HOME 9 | COPY . ./ 10 | 11 | RUN ls -la $APP_HOME/ 12 | 13 | ENV NLTK_DATA $APP_HOME/nltk_data 14 | 15 | # Install dependencies 16 | RUN pip install -r requirements.txt 17 | 18 | ENV PORT 5000 19 | 20 | # Run the flask service on container startup 21 | #CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 ComplaintsServer 22 | CMD [ "python", "ComplaintsFlask.py" ] 23 | -------------------------------------------------------------------------------- /Google_Kubernetes_Engine/README.md: -------------------------------------------------------------------------------- 1 | This repository contains details and code for deploying machine learning models on Google Kubernetes Engine Engine 2 | 3 | Details of the code and working is covered in detailed in my YouTube channel (AIEngineering) here - https://youtu.be/Hfgla4ViIwU 4 | 5 | Before getting started with deployment the container expects trained models and also downloaded nltk corpus 6 | 7 | For model files you are run associated notebook in this repository or else download the trained models and use it using below 2 command 8 | 9 | wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1m1bVxlUjrJ_tmWApYJHlk2q5bikGyIxr' -O complaints.booster 10 | 11 | wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1WURdboJjF27g9bZG_CGCCGSYWi0EvxJR' -O tfvectroizer.pkl 12 | 13 | If you want more details on model training you can check the video here - https://youtu.be/EHt_x8r1exU 14 | 15 | For NLTK you have to download stopwords and tokenizer corpa. One can download it using nltk.download() 16 | 17 | 18 | To deploy the model on Google Kubernetes Engine, execute the below commands 19 | ------------------------------------------------------------------------------------------ 20 | 21 | Building the container image - gcloud builds submit --tag gcr.io//complaintsapi . 22 | 23 | List the image - gcloud builds list --filter complaints 24 | 25 | Checking logs of built image - gcloud builds log 26 | 27 | Create Kubernetes Cluster - gcloud container clusters create complaints-gke --zone "us-west1-b" --machine-type "n1-standard-1" --num-nodes "1" --service-account srivatsan-gke@srivatsan-project.iam.gserviceaccount.com (Change to your service account) 28 | 29 | Create Kubernetes Deployment - kubectl apply -f deployment.yaml 30 | 31 | Get details on deployed application - kubectl get deployments 32 | 33 | Get info of created pods via deployment - kubectl get pods 34 | 35 | Decribe deployed pod - kubectl describe pod 36 | 37 | Get pod logs - kubectl logs 38 | 39 | Create service for deployment - kubectl apply -f service.yaml 40 | 41 | Get service details - kubectl get services 42 | 43 | Add nodes to cluster - gcloud container clusters resize complaints-gke --num-nodes 3 --zone us-west1-b 44 | 45 | Get details on cluster - gcloud container clusters list 46 | 47 | Scale pod replicas - kubectl scale deployment complaints --replicas 2 48 | 49 | Auto Scale setting in deployment - kubectl autoscale deployment complaints --max 6 --min 2 --cpu-percent 50 50 | 51 | Get details on horizontal pod autoscaler - kubectl get hpa 52 | -------------------------------------------------------------------------------- /Google_Kubernetes_Engine/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: complaints 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: complaintsclassifier 10 | template: 11 | metadata: 12 | labels: 13 | app: complaintsclassifier 14 | spec: 15 | containers: 16 | - name: complaints-app 17 | image: gcr.io/srivatsan-project/complaintsapi 18 | ports: 19 | - containerPort: 8080 20 | env: 21 | - name: PORT 22 | value: "8080" -------------------------------------------------------------------------------- /Google_Kubernetes_Engine/preprocessing/functions.py: -------------------------------------------------------------------------------- 1 | import re 2 | import nltk 3 | 4 | stemmer = nltk.stem.SnowballStemmer('english') 5 | stop_words = set(nltk.corpus.stopwords.words('english')) 6 | 7 | def tokenize(text): 8 | tokens = [word for word in nltk.word_tokenize(text) if (len(word) > 3 and len(word.strip('Xx/')) > 2 and len(re.sub('\d+', '', word.strip('Xx/'))) > 3) ] 9 | tokens = map(str.lower, tokens) 10 | stems = [stemmer.stem(item) for item in tokens if (item not in stop_words)] 11 | return stems 12 | -------------------------------------------------------------------------------- /Google_Kubernetes_Engine/requirements.txt: -------------------------------------------------------------------------------- 1 | flask 2 | scikit-learn==0.22 3 | xgboost==0.90 4 | nltk 5 | joblib 6 | gunicorn 7 | healthcheck 8 | six 9 | 10 | #Run this file with - pip3 install -r requirements.txt -------------------------------------------------------------------------------- /Google_Kubernetes_Engine/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: complaintsclassifier 5 | spec: 6 | type: LoadBalancer 7 | selector: 8 | app: complaintsclassifier 9 | ports: 10 | - port: 80 11 | targetPort: 8080 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # google_cloud_AI_ML 2 | This repository contains code to learn AI and ML on Google Cloud Platform 3 | 4 | Detailed code walkthrough of different Google Cloud AI services available in this repo can be found in my video playlist - https://www.youtube.com/playlist?list=PL3N9eeOlCrP6Nhv4UFp67IsQ_TVDpXqXK 5 | 6 |

GCP AI Project Details

7 | 8 | Project 1 - Telecom Churn on AI Platform Notebook and AI Platform Prediction 9 | 10 | In this project we will be building a scikit learn model using AI Platform Notebook and deploying it on AI Platform prediction service 11 | 12 | Project 2 - Model Deployment on Google Cloud Platform 13 | 14 | In this project we will be deploying machine learning models on Google Cloud Serverless platform (Cloud Run) and Google Cloud App Engine 15 | 16 | Project 3 - Building Time Series model using BigQuery ML and Data Analysis using Cloud Datastudio 17 | 18 | In this project we will be analyzing SFO bikeshare dataset and building multiple time series model using BigQuery ML 19 | 20 | Project 4 - Image Classification using Google Cloud AutoML Vision 21 | 22 | In this project we will be training image classifier using Google Cloud ML Vision on Kaggle Cassava Leaf Disease detection dataset 23 | 24 | Project 5 - Logistics Regression and Feature Engineering using BigQuery ML 25 | 26 | This project is end to end demo of using BigQuery ML for feature engineering and for building Logistics Regression model 27 | 28 |
Quick Start Projects to learn GCP
29 | 30 | Speech to Text API - https://www.youtube.com/watch?v=ZkTiKPUSYj4 31 | 32 | Big Query Data Q&A - https://www.youtube.com/watch?v=nhAm7q826qk 33 | 34 | Google Cloud Data Products Overview and Professional ML Engineer Certification Discussion - https://www.youtube.com/watch?v=pIzX7wk56iA&list=PL3N9eeOlCrP6Nhv4UFp67IsQ_TVDpXqXK&index=9 35 | 36 | Google Cloud AI Platform overview - https://www.youtube.com/watch?v=dx5kyKZ7Q0I 37 | 38 | Google data studio getting started - https://www.youtube.com/watch?v=DMRC90qvwFo 39 | 40 | 41 | -------------------------------------------------------------------------------- /Telecom_churn_AI_Platform/README.md: -------------------------------------------------------------------------------- 1 | Video accompanying this notebook can be found here - https://www.youtube.com/watch?v=y63OIKPe52Y 2 | 3 | This repository demonstrated how to get started with Google Cloud AI Platform and further use AI Prediction Service for deploying the trained model 4 | 5 | Churn_model.ipynb is the main file for building the model and further saving the trained model in Google Cloud Storage 6 | 7 | predictor.py is standalone prediction code to test the model before deploying it on AI Prediction Platform 8 | 9 | serving.ipynb is client program used to invoke model deployed on AI prediction service 10 | -------------------------------------------------------------------------------- /Telecom_churn_AI_Platform/data/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Telecom_churn_AI_Platform/input.json: -------------------------------------------------------------------------------- 1 | {"instances":["7317-GGVPB", "Male", 0, "Yes", "No", 71, "Yes", "Yes", "Fiber optic", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "Two year", "Yes", "Credit card (automatic)", 108.6, "7690.9"]} -------------------------------------------------------------------------------- /Telecom_churn_AI_Platform/predict_setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Writing predictor.py\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "%%writefile predictor.py\n", 18 | "\n", 19 | "import os\n", 20 | "\n", 21 | "import numpy as np\n", 22 | "import joblib\n", 23 | "import pandas as pd\n", 24 | "\n", 25 | "class ChurnPredictor(object):\n", 26 | "\n", 27 | " _COLUMN_NAMES=['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling','PaymentMethod', 'MonthlyCharges', 'TotalCharges']\n", 28 | "\n", 29 | " def __init__(self, model):\n", 30 | " self._model = model\n", 31 | "\n", 32 | " def predict(self, instances, **kwargs):\n", 33 | " inputs = pd.DataFrame(data=[instances], columns=self._COLUMN_NAMES)\n", 34 | " outputs = self._model.predict(inputs)\n", 35 | " return outputs.tolist()\n", 36 | "\n", 37 | " @classmethod\n", 38 | " def from_path(cls, model_dir):\n", 39 | " model_path = os.path.join(model_dir, 'model.joblib')\n", 40 | " model = joblib.load(model_path)\n", 41 | " return cls(model)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "['No']\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "from predictor import ChurnPredictor\n", 59 | "model = ChurnPredictor.from_path('.')\n", 60 | "instance = ['7317-GGVPB', 'Male', 0, 'Yes', 'No', 71, 'Yes', 'Yes', 'Fiber optic', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Two year', 'Yes', 'Credit card (automatic)', 108.6, '7690.9']\n", 61 | "print(model.predict(instance))" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 3, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "name": "stdout", 71 | "output_type": "stream", 72 | "text": [ 73 | "Writing setup.py\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "%%writefile setup.py\n", 79 | "from setuptools import setup\n", 80 | "from setuptools import find_packages\n", 81 | "\n", 82 | "REQUIRED_PACKAGES = ['xgboost']\n", 83 | "\n", 84 | "setup(\n", 85 | " name='custom_predict',\n", 86 | " version='0.1',\n", 87 | " install_requires=REQUIRED_PACKAGES,\n", 88 | " packages=find_packages(),\n", 89 | " include_package_data=True,\n", 90 | " scripts=['predictor.py'])" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 4, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "running sdist\n", 103 | "running egg_info\n", 104 | "creating custom_predict.egg-info\n", 105 | "writing custom_predict.egg-info/PKG-INFO\n", 106 | "writing dependency_links to custom_predict.egg-info/dependency_links.txt\n", 107 | "writing requirements to custom_predict.egg-info/requires.txt\n", 108 | "writing top-level names to custom_predict.egg-info/top_level.txt\n", 109 | "writing manifest file 'custom_predict.egg-info/SOURCES.txt'\n", 110 | "reading manifest file 'custom_predict.egg-info/SOURCES.txt'\n", 111 | "writing manifest file 'custom_predict.egg-info/SOURCES.txt'\n", 112 | "running check\n", 113 | "warning: check: missing required meta-data: url\n", 114 | "\n", 115 | "warning: check: missing meta-data: either (author and author_email) or (maintainer and maintainer_email) must be supplied\n", 116 | "\n", 117 | "creating custom_predict-0.1\n", 118 | "creating custom_predict-0.1/custom_predict.egg-info\n", 119 | "copying files to custom_predict-0.1...\n", 120 | "copying README.md -> custom_predict-0.1\n", 121 | "copying predictor.py -> custom_predict-0.1\n", 122 | "copying setup.py -> custom_predict-0.1\n", 123 | "copying custom_predict.egg-info/PKG-INFO -> custom_predict-0.1/custom_predict.egg-info\n", 124 | "copying custom_predict.egg-info/SOURCES.txt -> custom_predict-0.1/custom_predict.egg-info\n", 125 | "copying custom_predict.egg-info/dependency_links.txt -> custom_predict-0.1/custom_predict.egg-info\n", 126 | "copying custom_predict.egg-info/requires.txt -> custom_predict-0.1/custom_predict.egg-info\n", 127 | "copying custom_predict.egg-info/top_level.txt -> custom_predict-0.1/custom_predict.egg-info\n", 128 | "Writing custom_predict-0.1/setup.cfg\n", 129 | "creating dist\n", 130 | "Creating tar archive\n", 131 | "removing 'custom_predict-0.1' (and everything under it)\n" 132 | ] 133 | } 134 | ], 135 | "source": [ 136 | "!python setup.py sdist --formats=gztar" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 5, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | "Copying file://./dist/custom_predict-0.1.tar.gz [Content-Type=application/x-tar]...\n", 149 | "/ [1 files][ 1.2 KiB/ 1.2 KiB] \n", 150 | "Operation completed over 1 objects/1.2 KiB. \n" 151 | ] 152 | } 153 | ], 154 | "source": [ 155 | "!gsutil cp ./dist/custom_predict-0.1.tar.gz gs://churn-model-sri/custom_predict-0.1.tar.gz" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 6, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "Using endpoint [https://ml.googleapis.com/]\n", 175 | "Created ml engine model [projects/srivatsan-project/models/ChurnPredictor].\n" 176 | ] 177 | } 178 | ], 179 | "source": [ 180 | "!gcloud beta ai-platform models create ChurnPredictor --regions us-central1 --enable-console-logging" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 7, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "name": "stdout", 190 | "output_type": "stream", 191 | "text": [ 192 | "Using endpoint [https://ml.googleapis.com/]\n", 193 | "Creating version (this might take a few minutes)......done. \n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "! gcloud --quiet beta ai-platform versions create V1 \\\n", 199 | " --model ChurnPredictor \\\n", 200 | " --runtime-version 2.3 \\\n", 201 | " --python-version 3.7 \\\n", 202 | " --origin gs://churn-model-sri/ \\\n", 203 | " --package-uris gs://churn-model-sri/custom_predict-0.1.tar.gz \\\n", 204 | " --prediction-class predictor.ChurnPredictor " 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "#{\"instances\": [\"7317-GGVPB\", \"Male\", 0, \"Yes\", \"No\", 71, \"Yes\", \"Yes\", \"Fiber optic\", \"No\", \"Yes\", \"Yes\", \"Yes\", \"Yes\", \"Yes\", \"Two year\", \"Yes\", \"Credit card (automatic)\", 108.6, \"7690.9\"]}" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 8, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "name": "stdout", 223 | "output_type": "stream", 224 | "text": [ 225 | "Using endpoint [https://ml.googleapis.com/]\n", 226 | "['No']\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "!gcloud ai-platform predict --model ChurnPredictor --version V1 --json-request input.json" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [] 240 | } 241 | ], 242 | "metadata": { 243 | "environment": { 244 | "name": "common-cpu.m59", 245 | "type": "gcloud", 246 | "uri": "gcr.io/deeplearning-platform-release/base-cpu:m59" 247 | }, 248 | "kernelspec": { 249 | "display_name": "Python 3", 250 | "language": "python", 251 | "name": "python3" 252 | }, 253 | "language_info": { 254 | "codemirror_mode": { 255 | "name": "ipython", 256 | "version": 3 257 | }, 258 | "file_extension": ".py", 259 | "mimetype": "text/x-python", 260 | "name": "python", 261 | "nbconvert_exporter": "python", 262 | "pygments_lexer": "ipython3", 263 | "version": "3.7.8" 264 | } 265 | }, 266 | "nbformat": 4, 267 | "nbformat_minor": 4 268 | } 269 | -------------------------------------------------------------------------------- /Telecom_churn_AI_Platform/predictor.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | 4 | import numpy as np 5 | import joblib 6 | import pandas as pd 7 | 8 | class ChurnPredictor(object): 9 | 10 | _COLUMN_NAMES=['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling','PaymentMethod', 'MonthlyCharges', 'TotalCharges'] 11 | 12 | def __init__(self, model): 13 | self._model = model 14 | 15 | def predict(self, instances, **kwargs): 16 | inputs = pd.DataFrame(data=[instances], columns=self._COLUMN_NAMES) 17 | outputs = self._model.predict(inputs) 18 | return outputs.tolist() 19 | 20 | @classmethod 21 | def from_path(cls, model_dir): 22 | model_path = os.path.join(model_dir, 'model.joblib') 23 | model = joblib.load(model_path) 24 | return cls(model) 25 | -------------------------------------------------------------------------------- /Telecom_churn_AI_Platform/serving.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import googleapiclient.discovery" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "instances = [\"7317-GGVPB\", \"Male\", 0, \"Yes\", \"No\", 71, \"Yes\", \"Yes\", \"Fiber optic\", \"No\", \"Yes\", \"Yes\", \"Yes\", \"Yes\", \"Yes\", \"Two year\", \"Yes\", \"Credit card (automatic)\", 108.6, \"7690.9\"]" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "service = googleapiclient.discovery.build('ml', 'v1')\n", 28 | "name = 'projects/{}/models/{}/versions/{}'.format(\"srivatsan-project\", \"ChurnPredictor\", \"V1\")" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 4, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "['No']\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "response = service.projects().predict(\n", 46 | " name=name,\n", 47 | " body={'instances': instances}\n", 48 | ").execute()\n", 49 | "\n", 50 | "if 'error' in response:\n", 51 | " raise RuntimeError(response['error'])\n", 52 | "else:\n", 53 | " print(response['predictions'])" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "#end" 63 | ] 64 | } 65 | ], 66 | "metadata": { 67 | "environment": { 68 | "name": "common-cpu.m59", 69 | "type": "gcloud", 70 | "uri": "gcr.io/deeplearning-platform-release/base-cpu:m59" 71 | }, 72 | "kernelspec": { 73 | "display_name": "Python 3", 74 | "language": "python", 75 | "name": "python3" 76 | }, 77 | "language_info": { 78 | "codemirror_mode": { 79 | "name": "ipython", 80 | "version": 3 81 | }, 82 | "file_extension": ".py", 83 | "mimetype": "text/x-python", 84 | "name": "python", 85 | "nbconvert_exporter": "python", 86 | "pygments_lexer": "ipython3", 87 | "version": "3.7.8" 88 | } 89 | }, 90 | "nbformat": 4, 91 | "nbformat_minor": 4 92 | } 93 | -------------------------------------------------------------------------------- /Telecom_churn_AI_Platform/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | REQUIRED_PACKAGES = ['xgboost'] 5 | 6 | setup( 7 | name='custom_predict', 8 | version='0.1', 9 | install_requires=REQUIRED_PACKAGES, 10 | packages=find_packages(), 11 | include_package_data=True, 12 | scripts=['predictor.py']) 13 | -------------------------------------------------------------------------------- /bigquery_logisticsregression/README.md: -------------------------------------------------------------------------------- 1 | This folder contains demonstration of using BigQuery ML for feature engineering and for building logistics regression model 2 | 3 | Dataset used for this demonstration is available in UCI ML repository - https://archive.ics.uci.edu/ml/datasets/Bank+Marketing 4 | 5 | You can watch the video demonstration of code here - https://youtu.be/pX4P6uG1CuU 6 | 7 | You can store the data in GCS bucket and import into bigquery. Once done follow the below steps 8 | 9 | Step 1 - Query table to quickly validate the load and get understanding of data 10 | 11 | select 12 | age, 13 | job, 14 | marital, 15 | education, 16 | 'default', 17 | balance, 18 | housing, 19 | loan, 20 | contact, 21 | day, 22 | month, 23 | campaign, 24 | pdays, 25 | previous, 26 | poutcome, 27 | y as target 28 | from 29 | `srivatsan-project.bank.bank_marketing` 30 | 31 | Step 2 - Check target value distribution. This dataset target instances are imbalanced 32 | 33 | select 34 | y as target, count(*) 35 | from 36 | `srivatsan-project.bank.bank_marketing` 37 | group by y 38 | 39 | Step 3: Query to split data on train/validation and test 40 | 41 | select 42 | age, job, marital, education, 'default', balance, housing, loan, 43 | contact, day, month, campaign, pdays, previous, 44 | poutcome, target, 45 | CASE 46 | WHEN split_field < 0.8 THEN 'training' 47 | WHEN split_field = 0.8 THEN 'evaluation' 48 | WHEN split_field > 0.8 THEN 'prediction' 49 | END AS dataframe 50 | from ( 51 | select 52 | age, job, marital, education, 'default', balance, housing, loan, 53 | contact, day, month, campaign, pdays, previous, 54 | poutcome, y as target, 55 | ROUND(ABS(RAND()),1) as split_field 56 | from 57 | `srivatsan-project.bank.bank_marketing` ) 58 | 59 | Query 4: Store the data split into new table for using it in model. Creating physical table to keep consistent data sets from random splits 60 | 61 | 62 | CREATE OR REPLACE table `bank.marketing_tab` AS 63 | select 64 | age, job, marital, education, 'default' as derog, balance, housing, loan, 65 | contact, day, month, campaign, pdays, previous, 66 | poutcome, target, 67 | CASE 68 | WHEN split_field < 0.8 THEN 'training' 69 | WHEN split_field = 0.8 THEN 'evaluation' 70 | WHEN split_field > 0.8 THEN 'prediction' 71 | END AS dataframe 72 | from ( 73 | select 74 | age, job, marital, education, 'default', balance, housing, loan, 75 | contact, day, month, campaign, pdays, previous, 76 | poutcome, y as target, 77 | ROUND(ABS(RAND()),1) as split_field 78 | from 79 | `srivatsan-project.bank.bank_marketing` ) 80 | 81 | Query 5: validate target variable distribution in splits 82 | 83 | select 84 | dataframe, target, count(*) 85 | from `srivatsan-project.bank.marketing_tab` 86 | group by dataframe, target 87 | order by dataframe 88 | 89 | Query 6: Create Logistics Regression model 90 | 91 | 92 | CREATE OR REPLACE MODEL 93 | `bank.marketing_model` 94 | OPTIONS 95 | ( model_type='LOGISTIC_REG', 96 | auto_class_weights=TRUE, 97 | input_label_cols=['target'] 98 | ) AS 99 | SELECT 100 | * EXCEPT(dataframe) 101 | FROM 102 | `bank.marketing_tab` 103 | WHERE 104 | dataframe = 'training' 105 | 106 | Query 7: Get Training and Feature Info from trained model 107 | 108 | SELECT 109 | * 110 | FROM 111 | ML.TRAINING_INFO(MODEL `bank.marketing_model`) 112 | 113 | SELECT 114 | * 115 | FROM 116 | ML.FEATURE_INFO(MODEL `bank.marketing_model`) 117 | 118 | 119 | SELECT 120 | * 121 | FROM 122 | ML.WEIGHTS(MODEL `bank.marketing_model`) 123 | 124 | Query 8: Evaluate using the trained model 125 | 126 | 127 | SELECT 128 | * 129 | FROM 130 | ML.EVALUATE (MODEL `bank.marketing_model`, 131 | ( 132 | SELECT 133 | * 134 | FROM 135 | `bank.marketing_tab` 136 | WHERE 137 | dataframe = 'evaluation' 138 | ) 139 | ) 140 | 141 | Query 8: Predict new data using the trained model 142 | 143 | 144 | SELECT 145 | * 146 | FROM 147 | ML.PREDICT (MODEL `bank.marketing_model`, 148 | ( 149 | SELECT 150 | * 151 | FROM 152 | `bank.marketing_tab` 153 | WHERE 154 | dataframe = 'prediction' 155 | ) 156 | ) 157 | 158 | 159 | Query 9: Add feature engineering to the model to increase model performance 160 | 161 | 162 | CREATE OR REPLACE MODEL 163 | `bank.marketing_model_feat` 164 | TRANSFORM( 165 | ML.QUANTILE_BUCKETIZE(age,5) OVER() AS bucketized_age, 166 | ML.FEATURE_CROSS(STRUCT(job, education)) job_education, 167 | marital, balance, housing, loan, 168 | contact, day, month, pdays, previous, 169 | poutcome, target) 170 | OPTIONS 171 | ( model_type='LOGISTIC_REG', 172 | auto_class_weights=TRUE, 173 | input_label_cols=['target'] 174 | ) AS 175 | SELECT 176 | * EXCEPT(dataframe, campaign, derog) 177 | FROM 178 | `bank.marketing_tab` 179 | WHERE 180 | dataframe = 'training' 181 | 182 | Query 10: Get training and feature info from newly trained model 183 | 184 | SELECT 185 | * 186 | FROM 187 | ML.TRAINING_INFO(MODEL `bank.marketing_model_feat`) 188 | 189 | SELECT 190 | * 191 | FROM 192 | ML.FEATURE_INFO(MODEL `bank.marketing_model_feat`) 193 | 194 | SELECT 195 | * 196 | FROM 197 | ML.WEIGHTS(MODEL `bank.marketing_model_feat`) 198 | 199 | Query 11: Evaluate the model 200 | 201 | 202 | SELECT 203 | * 204 | FROM 205 | ML.EVALUATE (MODEL `bank.marketing_model_feat`, 206 | ( 207 | SELECT 208 | * 209 | FROM 210 | `bank.marketing_tab` 211 | WHERE 212 | dataframe = 'evaluation' 213 | ) 214 | ) 215 | 216 | -------------------------------------------------------------------------------- /bigqueryml_datastudio/README.md: -------------------------------------------------------------------------------- 1 | 2 | This repo contains SQL queries for my live session on BigQuery, BigQuery ML and Google Data Studio 3 | 4 | Live session video is available here - https://youtu.be/5l4Qb6Fy3E0 5 | 6 | In below set of SQL queries we will be analyzing SFO bikeshare dataset and as well will be building multiple time series model using BigQuery ML 7 | 8 | Query 1: Query to select limited columns for analysis in data studio 9 | 10 | SELECT 11 | start_date, duration_sec, start_station_name, subscriber_type, zip_code 12 | FROM 13 | `bigquery-public-data.san_francisco_bikeshare.bikeshare_trips` 14 | 15 | Query 2: Select columns for understanding time series pattern 16 | 17 | SELECT 18 | start_station_name, 19 | EXTRACT(DATE from start_date) AS date, 20 | COUNT(*) AS num_trips 21 | FROM 22 | `bigquery-public-data.san_francisco_bikeshare.bikeshare_trips` 23 | GROUP BY start_station_name, date 24 | order by start_station_name, date 25 | 26 | Query 3: Understand minimum and maximum dates grouped by station 27 | 28 | SELECT 29 | start_station_name, 30 | min(EXTRACT(DATE from start_date)) as min_date, 31 | max(EXTRACT(DATE from start_date)) as max_date 32 | FROM 33 | `bigquery-public-data.san_francisco_bikeshare.bikeshare_trips` 34 | WHERE 35 | start_station_name IN ('Harry Bridges Plaza (Ferry Building)','Embarcadero at Sansome','2nd at Townsend') 36 | group by start_station_name 37 | 38 | Query 4: Create multiple time series model using SFO bikeshare data 39 | 40 | CREATE OR REPLACE MODEL bike_share_ml.sfo_bike 41 | OPTIONS 42 | (model_type = 'ARIMA', 43 | time_series_timestamp_col = 'date', 44 | time_series_data_col = 'num_trips', 45 | time_series_id_col = 'start_station_name' 46 | ) AS 47 | SELECT 48 | start_station_name, 49 | EXTRACT(DATE from start_date) AS date, 50 | COUNT(*) AS num_trips 51 | FROM 52 | `bigquery-public-data.san_francisco_bikeshare.bikeshare_trips` 53 | WHERE 54 | start_station_name IN ('Harry Bridges Plaza (Ferry Building)','Embarcadero at Sansome','2nd at Townsend') AND 55 | EXTRACT(DATE from start_date) <= '2016-07-31' 56 | GROUP BY start_station_name, date 57 | 58 | Query 5: Evaluate the developed model 59 | 60 | SELECT 61 | * 62 | FROM 63 | ML.EVALUATE(MODEL `bike_share_ml.sfo_bike`) 64 | 65 | Query 6: Understand trained model co-efficients 66 | 67 | SELECT 68 | * 69 | FROM 70 | ML.ARIMA_COEFFICIENTS(MODEL `bike_share_ml.sfo_bike`) 71 | 72 | 73 | Query 7: Forecast for 3 future time period 74 | 75 | SELECT 76 | * 77 | FROM 78 | ML.FORECAST(MODEL `bike_share_ml.sfo_bike`, 79 | STRUCT(3 AS horizon, 0.9 AS confidence_level)) 80 | 81 | Query 8: Combine historical data and future 365 days forecast for visualization 82 | 83 | SELECT 84 | start_station_name, 85 | date AS timestamp, 86 | num_trips AS history_value, 87 | NULL AS forecast_value, 88 | NULL AS prediction_interval_lower_bound, 89 | NULL AS prediction_interval_upper_bound 90 | FROM 91 | ( 92 | SELECT 93 | start_station_name, 94 | EXTRACT(DATE from start_date) AS date, 95 | COUNT(*) AS num_trips 96 | FROM 97 | `bigquery-public-data.san_francisco_bikeshare.bikeshare_trips` 98 | WHERE 99 | start_station_name IN ('Harry Bridges Plaza (Ferry Building)','Embarcadero at Sansome','2nd at Townsend') 100 | GROUP BY start_station_name, date 101 | ) 102 | UNION ALL 103 | SELECT 104 | start_station_name, 105 | EXTRACT(DATE from forecast_timestamp) AS timestamp, 106 | NULL AS history_value, 107 | forecast_value, 108 | prediction_interval_lower_bound, 109 | prediction_interval_upper_bound 110 | FROM 111 | ML.FORECAST(MODEL `bike_share_ml.sfo_bike`, 112 | STRUCT(365 AS horizon, 0.9 AS confidence_level)) 113 | WHERE 114 | start_station_name IN ('Harry Bridges Plaza (Ferry Building)','Embarcadero at Sansome','2nd at Townsend') 115 | -------------------------------------------------------------------------------- /gke_autopilot/Dockerfile: -------------------------------------------------------------------------------- 1 | # lightweight python 2 | FROM python:3.7-slim 3 | 4 | RUN apt-get update 5 | 6 | # Copy local code to the container image. 7 | ENV APP_HOME /app 8 | WORKDIR $APP_HOME 9 | COPY . ./ 10 | 11 | RUN ls -la $APP_HOME/ 12 | 13 | # Install dependencies 14 | RUN pip install -r requirements.txt 15 | 16 | # Run the streamlit on container startup 17 | CMD [ "streamlit", "run","imgwebapp.py" ] -------------------------------------------------------------------------------- /gke_autopilot/README.md: -------------------------------------------------------------------------------- 1 | TBF 2 | -------------------------------------------------------------------------------- /gke_autopilot/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: imgclass 5 | spec: 6 | replicas: 2 7 | selector: 8 | matchLabels: 9 | app: imageclassifier 10 | template: 11 | metadata: 12 | labels: 13 | app: imageclassifier 14 | spec: 15 | containers: 16 | - name: cv-app 17 | image: gcr.io/srivatsan-project/imgclassifier 18 | ports: 19 | - containerPort: 8501 -------------------------------------------------------------------------------- /gke_autopilot/imgwebapp.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.simplefilter(action='ignore', category=FutureWarning) 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | import streamlit as st 7 | from PIL import Image 8 | import requests 9 | from io import BytesIO 10 | 11 | st.set_option('deprecation.showfileUploaderEncoding', False) 12 | st.title("Bean Image Classifier") 13 | st.text("Provide URL of bean Image for image classification") 14 | 15 | @st.cache(allow_output_mutation=True) 16 | def load_model(): 17 | model = tf.keras.models.load_model('./models') 18 | return model 19 | 20 | with st.spinner('Loading Model Into Memory....'): 21 | model = load_model() 22 | 23 | classes=['angular_leaf_spot','bean_rust','healthy'] 24 | 25 | def scale(image): 26 | image = tf.cast(image, tf.float32) 27 | image /= 255.0 28 | 29 | return tf.image.resize(image,[224,224]) 30 | 31 | def decode_img(image): 32 | img = tf.image.decode_jpeg(image, channels=3) 33 | img = scale(img) 34 | return np.expand_dims(img, axis=0) 35 | 36 | #path = st.text_input('Enter Image URL to Classify.. ','http://barmac.com.au/wp-content/uploads/sites/3/2016/01/Angular-Leaf-Spot-Beans1.jpg') 37 | img_file_buffer = st.file_uploader("Upload Image to Classify....") 38 | 39 | if img_file_buffer is not None: 40 | image = img_file_buffer 41 | image_out = Image.open(img_file_buffer) 42 | image = image.getvalue() 43 | else: 44 | test_image = 'http://barmac.com.au/wp-content/uploads/sites/3/2016/01/Angular-Leaf-Spot-Beans1.jpg' 45 | image = requests.get(test_image).content 46 | image_out = Image.open(BytesIO(image)) 47 | 48 | st.write("Predicted Class :") 49 | with st.spinner('classifying.....'): 50 | label =np.argmax(model.predict(decode_img(image)),axis=1) 51 | st.write(classes[label[0]]) 52 | st.write("") 53 | st.image(image_out, caption='Classifying Bean Image', use_column_width=True) -------------------------------------------------------------------------------- /gke_autopilot/models/assets/README.md: -------------------------------------------------------------------------------- 1 | TBF 2 | -------------------------------------------------------------------------------- /gke_autopilot/models/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srivatsan88/google_cloud_AI_ML/1ef1fdb2208cb5db66d43a39a46ffb3597255e0c/gke_autopilot/models/saved_model.pb -------------------------------------------------------------------------------- /gke_autopilot/models/variables/variables.data-00000-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srivatsan88/google_cloud_AI_ML/1ef1fdb2208cb5db66d43a39a46ffb3597255e0c/gke_autopilot/models/variables/variables.data-00000-of-00002 -------------------------------------------------------------------------------- /gke_autopilot/models/variables/variables.data-00001-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srivatsan88/google_cloud_AI_ML/1ef1fdb2208cb5db66d43a39a46ffb3597255e0c/gke_autopilot/models/variables/variables.data-00001-of-00002 -------------------------------------------------------------------------------- /gke_autopilot/models/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srivatsan88/google_cloud_AI_ML/1ef1fdb2208cb5db66d43a39a46ffb3597255e0c/gke_autopilot/models/variables/variables.index -------------------------------------------------------------------------------- /gke_autopilot/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow 2 | streamlit 3 | pillow 4 | numpy 5 | requests 6 | 7 | #Run this file with - pip3 install -r requirements.txt -------------------------------------------------------------------------------- /gke_autopilot/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: imageclassifier 5 | spec: 6 | type: LoadBalancer 7 | selector: 8 | app: imageclassifier 9 | ports: 10 | - port: 80 11 | targetPort: 8501 --------------------------------------------------------------------------------