├── lambda_config.py ├── docker_build_lambda_env.sh ├── Dockerfile.ultimate_ml ├── LICENSE ├── README.md ├── add_lambda_and_upload.sh ├── .gitignore ├── Dockerfile.aws_ami_for_lambda ├── lambda_function.py └── Serverless-ML-API-example.ipynb /lambda_config.py: -------------------------------------------------------------------------------- 1 | S3_BUCKET_NAME="lambda-deploy-ml" 2 | VENV_EXTRA_FILE="venv_extra_201805041413.zip" 3 | MODEL_FILE="xgb_mnist_pipe.pckl" 4 | REGION_NAME="us-east-2" 5 | -------------------------------------------------------------------------------- /docker_build_lambda_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # description: This script is used to build the AWS lambda function's python environment with Docker, 4 | # and copy the zipped environment out of the container (as well as delete it, but not the image). 5 | # author: Roy Shilkrot 6 | # date: 4/16/2018 7 | # 8 | # Assumes user has Docker installed, and the file `Dockerfile.aws_ami_for_lambda` exists 9 | # 10 | # Usage: ./docker_build_lambda_env.sh 11 | # 12 | 13 | set -ex 14 | 15 | OUTPUT_VENV=./venv_$(date +"%Y%m%d%H%M").zip 16 | OUTPUT_VENV_EXTRA=./venv_extra_$(date +"%Y%m%d%H%M").zip 17 | DOCKER_IMAGE=ami_lambda_env 18 | 19 | docker build -f Dockerfile.aws_ami_for_lambda -t ${DOCKER_IMAGE} . 20 | CONTAINER_ID=$(docker run -d ${DOCKER_IMAGE}:latest) 21 | docker cp ${CONTAINER_ID}:/lambda_build/outputs/venv.zip ${OUTPUT_VENV} 22 | docker cp ${CONTAINER_ID}:/lambda_build/outputs/venv_extra.zip ${OUTPUT_VENV_EXTRA} 23 | docker rm ${CONTAINER_ID} 24 | 25 | echo ${OUTPUT_VENV} 26 | echo ${OUTPUT_VENV_EXTRA} -------------------------------------------------------------------------------- /Dockerfile.ultimate_ml: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:latest-py3 2 | RUN apt-get update && apt-get install -y python-opencv python-skimage git wget cmake 3 | 4 | RUN apt-get install -y python3-pip 5 | RUN python3 -m pip install --upgrade pip 6 | 7 | RUN python3 -m pip install ipykernel 8 | RUN python3 -m ipykernel install --user 9 | 10 | RUN python3 -m pip install requests ipywidgets RISE nbconvert opencv-python opencv-contrib-python 11 | 12 | RUN jupyter nbextension enable --py widgetsnbextension 13 | RUN jupyter-nbextension install rise --py --sys-prefix && jupyter-nbextension enable rise --py --sys-prefix 14 | 15 | RUN git clone git://github.com/keras-team/keras.git && python3 -m pip install keras[tests] && rm -rf keras 16 | 17 | RUN python3 -m pip install seaborn progressbar2 xgboost 18 | 19 | RUN python3 -m pip install http://download.pytorch.org/whl/cu80/torch-0.3.1-cp35-cp35m-linux_x86_64.whl 20 | RUN python3 -m pip install torchvision 21 | 22 | # passed = 'docker4ml' 23 | CMD ["/run_jupyter.sh", "--allow-root", "--NotebookApp.token=''", "--NotebookApp.password='sha1:1b28d13060f7:fa96a61b082354475b51b81b4dc3b8e293e61511'"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Roy Shilkrot 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Docker4MLTutorial 2 | A hands on tutorial on Docker and AWS Lambda for Machine Learning applications. 3 | 4 | Videos and slides: http://hi.cs.stonybrook.edu/teaching/docker4ml 5 | 6 | ## Using the scripts 7 | General sequence: 8 | 1. Use `Dockerfile.ultimate_ml` to build a container with your favorite ML framework. 9 | 2. Build a model you'd like to serve in the cloud. You can follow the script in `Serverless-ML-API-example.ipynb` to build an MNIST classifier with `sklearn` and `XGBoost`. Pickle the model to a file. 10 | 3. Modify `Dockerfile.aws_ami_for_lambda` to reflect a python execution environment for AWS Lambda that can run your model (e.g. if you used XGBoost you'd want XGBoost installed) 11 | 4. Use `docker_build_lambda_env.sh` to build the AWS Linux container and extract the packaged virtual environment files (`venv*.zip`) 12 | 5. Modify `lambda_function.py` to perform the prediction on your model depending on the inputs and outputs you expect (e.g. input MNIST digit pixels in a JSON array and output the predicted digit) 13 | 6. Create a Lambda function on AWS, give it a unique name. 14 | 7. Create an S3 bucket on AWS, give it a unique name. 15 | 8. Modify `lambda_config.py` with the names of the function, S3 bucket, region, etc. 16 | 9. Use `add_lambda_and_upload.sh` to upload the Lambda code, environment and ML model to S3 and then deploy everything to your lambda function. 17 | 10. Optionally create an AWS API Gateway to trigger the Lambda function on an HTTP request. 18 | 19 | -------------------------------------------------------------------------------- /add_lambda_and_upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # description: This script is used to build the AWS lambda function given the lambda_function.py script 4 | # and the venv.zip file with the environment, and upload to S3. 5 | # author: Roy Shilkrot 6 | # date: 11/13/2017 7 | # 8 | # Assumes user has AWS CLI installed: https://aws.amazon.com/cli/ 9 | # This assumes the user has `aid` profile configured for the AWS CLI, look in ~/.aws/credentials 10 | # 11 | # Usage: ./add_lambda_and_upload.sh 12 | # 13 | 14 | set -ex 15 | 16 | if [ "$#" -ne 5 ]; then 17 | echo "Usage: ./add_lambda_and_upload.sh " 18 | exit 19 | fi 20 | 21 | BUCKET_NAME=lambda-deploy-ml 22 | REGION_NAME=us-east-2 23 | AWS_CLI_PROFILE=default 24 | 25 | # prepare configuration file for the lambda function 26 | cat << EOF > lambda_config.py 27 | S3_BUCKET_NAME="${BUCKET_NAME}" 28 | VENV_EXTRA_FILE="$2" 29 | MODEL_FILE="$5" 30 | REGION_NAME="${REGION_NAME}" 31 | EOF 32 | 33 | # update the lambda venv zip with the function code and config 34 | NEW_FILE=lambda_venv_$(date +%s).zip 35 | cp $1 $NEW_FILE 36 | zip -ju $NEW_FILE $3 lambda_config.py 37 | 38 | # --- upload to S3 39 | # remove old versions: 40 | aws --profile ${AWS_CLI_PROFILE} s3 rm s3://${BUCKET_NAME}/ --recursive --exclude "*" --include "lambda_venv*" 41 | # lambda function: 42 | aws --profile ${AWS_CLI_PROFILE} s3 cp $NEW_FILE s3://${BUCKET_NAME}/ 43 | # extra environment 44 | aws --profile ${AWS_CLI_PROFILE} s3 sync . s3://${BUCKET_NAME}/ --exclude '*' --include "$2" --include "$5" # upload if not exists 45 | 46 | # reload the lambda functions 47 | aws --profile ${AWS_CLI_PROFILE} --region ${REGION_NAME} lambda update-function-code --function-name $4 --s3-bucket ${BUCKET_NAME} --s3-key "$NEW_FILE" 48 | 49 | rm $NEW_FILE 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # General 104 | .DS_Store 105 | .AppleDouble 106 | .LSOverride 107 | 108 | # Icon must end with two \r 109 | Icon 110 | 111 | 112 | # Thumbnails 113 | ._* 114 | 115 | # Files that might appear in the root of a volume 116 | .DocumentRevisions-V100 117 | .fseventsd 118 | .Spotlight-V100 119 | .TemporaryItems 120 | .Trashes 121 | .VolumeIcon.icns 122 | .com.apple.timemachine.donotpresent 123 | 124 | # Directories potentially created on remote AFP share 125 | .AppleDB 126 | .AppleDesktop 127 | Network Trash Folder 128 | Temporary Items 129 | .apdisk 130 | 131 | -------------------------------------------------------------------------------- /Dockerfile.aws_ami_for_lambda: -------------------------------------------------------------------------------- 1 | # 2 | # description: This dockerfile builds the AWS lambda execution environment for running our risk service. 3 | # author: Roy Shilkrot 4 | # date: 4/14/2018 5 | # 6 | FROM amazonlinux:latest 7 | 8 | RUN yum update -y 9 | RUN yum install -y \ 10 | atlas-devel \ 11 | atlas-sse3-devel \ 12 | blas-devel \ 13 | gcc44 \ 14 | gcc44-c++ \ 15 | gcc44-gfortran \ 16 | lapack-devel \ 17 | python36-devel \ 18 | python36-virtualenv \ 19 | findutils \ 20 | zip 21 | 22 | 23 | RUN virtualenv-3.6 \ 24 | --python /usr/bin/python3.6 /lambda_build \ 25 | --always-copy \ 26 | --no-site-packages 27 | 28 | ENV VIRTUAL_ENV="/lambda_build" 29 | ENV ENV_EXTRA="$VIRTUAL_ENV/env_extra" 30 | ENV SITE_PACKAGES="$VIRTUAL_ENV/lib64/python3.6/site-packages/" 31 | RUN mkdir -p $ENV_EXTRA 32 | 33 | RUN /bin/bash -c "source /lambda_build/bin/activate && python3 -m pip install --upgrade pip wheel setuptools" 34 | RUN /bin/bash -c "source /lambda_build/bin/activate && PYTHONPATH=$PYTHONPATH:$ENV_EXTRA python3 -m pip install -t $ENV_EXTRA/ --no-binary numpy numpy" 35 | RUN /bin/bash -c "source /lambda_build/bin/activate && PYTHONPATH=$PYTHONPATH:$ENV_EXTRA python3 -m pip install -t $ENV_EXTRA/ --no-binary scipy scipy" 36 | RUN /bin/bash -c "source /lambda_build/bin/activate && PYTHONPATH=$PYTHONPATH:$ENV_EXTRA python3 -m pip install -t $ENV_EXTRA/ --no-binary sklean sklearn" 37 | # RUN /bin/bash -c "source /lambda_build/bin/activate && PYTHONPATH=$PYTHONPATH:$ENV_EXTRA python3 -m pip install -t $ENV_EXTRA/ pandas" 38 | # RUN /bin/bash -c "source /lambda_build/bin/activate && PYTHONPATH=$PYTHONPATH:$ENV_EXTRA python3 -m pip install -t $ENV_EXTRA/ sklearn_pandas" 39 | # RUN /bin/bash -c "source /lambda_build/bin/activate && PYTHONPATH=$PYTHONPATH:$ENV_EXTRA python3 -m pip install -t $ENV_EXTRA/ tensorflow" 40 | # RUN yum install -y git 41 | # RUN git clone git://github.com/keras-team/keras.git && python3 -m pip install -t $ENV_EXTRA/ keras[tests] && rm -rf keras 42 | 43 | RUN yum install -y gcc48 gcc48-c++ 44 | 45 | RUN /bin/bash -c "source /lambda_build/bin/activate && PYTHONPATH=$PYTHONPATH:$ENV_EXTRA python3 -m pip install -t $ENV_EXTRA/ xgboost" 46 | # RUN /bin/bash -c "source /lambda_build/bin/activate && PYTHONPATH=$PYTHONPATH:$ENV_EXTRA python3 -m pip install -t $ENV_EXTRA/ imblearn" 47 | 48 | ENV libdir="$ENV_EXTRA/lib/" 49 | RUN mkdir -p $libdir 50 | RUN cp /usr/lib64/atlas/* $libdir 51 | RUN cp /usr/lib64/libquadmath.so.0 $libdir 52 | RUN cp /usr/lib64/libgfortran.so.3 $libdir 53 | 54 | RUN mkdir -p $VIRTUAL_ENV/outputs 55 | RUN find $ENV_EXTRA/ -name "*.so" | xargs strip 56 | 57 | RUN rm -rf $ENV_EXTRA/setup_tools* $ENV_EXTRA/wheel* 58 | 59 | RUN echo UEsFBgAAAAAAAAAAAAAAAAAAAAAAAA== | base64 -d > $VIRTUAL_ENV/outputs/venv.zip 60 | RUN pushd $SITE_PACKAGES && zip -r -9 -q $VIRTUAL_ENV/outputs/venv.zip * ; popd 61 | RUN echo UEsFBgAAAAAAAAAAAAAAAAAAAAAAAA== | base64 -d > $VIRTUAL_ENV/outputs/venv_extra.zip 62 | RUN pushd $ENV_EXTRA && zip -r -9 -q $VIRTUAL_ENV/outputs/venv_extra.zip * ; popd 63 | 64 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /lambda_function.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import botocore.config 3 | import sys 4 | import logging 5 | import traceback 6 | import random 7 | import json 8 | import pickle 9 | import os 10 | import ctypes 11 | import uuid 12 | import zipfile 13 | import io 14 | import imp 15 | import math 16 | 17 | logger = logging.getLogger() 18 | logger.setLevel(logging.DEBUG) 19 | 20 | from lambda_config import * 21 | 22 | config = botocore.config.Config() 23 | config.region_name = REGION_NAME 24 | config.connection_timeout = 60 25 | config.read_timeout = 60 26 | 27 | s3_client = boto3.client('s3') 28 | 29 | def download_peripherals(keys): 30 | global s3_client 31 | 32 | #load files from S3 33 | download_path = {} 34 | for k in keys: 35 | download_path[k] = '/tmp/{}'.format(k) 36 | if not os.path.exists(download_path[k]): 37 | logger.debug('download %s'%(k)) 38 | s3_client.download_file(S3_BUCKET_NAME, k, download_path[k]) 39 | else: 40 | logger.debug('file `%s` already exists, skip download'%(download_path[k])) 41 | 42 | return download_path 43 | 44 | download_keys = [MODEL_FILE, VENV_EXTRA_FILE] 45 | 46 | logger.debug("download dependencies from S3") 47 | 48 | try: 49 | download_paths = download_peripherals(download_keys) 50 | except Exception as e: 51 | logger.error(traceback.format_exc()) 52 | logger.error("ERROR: Unexpected error: Could not download dependencies from S3.") 53 | sys.exit() 54 | 55 | logger.debug("unzip python dependencies: %s"%(download_paths[VENV_EXTRA_FILE])) 56 | 57 | try: 58 | # unzip py dependencies from S3 59 | with open(download_paths[VENV_EXTRA_FILE],'rb') as tf: 60 | # rewind the file 61 | tf.seek(0) 62 | 63 | # Read the file as a zipfile and process the members 64 | with zipfile.ZipFile(tf, mode='r') as zipf: 65 | for zmember in zipf.infolist(): 66 | if not os.path.exists('/tmp/lambda_packages/%s'%(zmember.filename)): 67 | zipf.extract(zmember, "/tmp/lambda_packages/") 68 | 69 | except Exception as e: 70 | logger.error(traceback.format_exc()) 71 | logger.error("ERROR: Unexpected error: Could not unzip python dependencies.") 72 | sys.exit() 73 | 74 | logger.debug("load shared libraries") 75 | 76 | os.chdir('/tmp/lambda_packages/lib/') 77 | for libfile in ['/tmp/lambda_packages/lib/libatlas.so.3', 78 | '/tmp/lambda_packages/lib/libcblas.so.3', 79 | '/tmp/lambda_packages/lib/libquadmath.so.0', 80 | '/tmp/lambda_packages/lib/libgfortran.so.3', 81 | '/tmp/lambda_packages/lib/libf77blas.so.3' 82 | ]: 83 | logger.debug('load lib: %s'%(libfile)) 84 | ctypes.cdll.LoadLibrary(libfile) 85 | 86 | for d, _, files in os.walk('/tmp/lambda_packages/lib'): 87 | for f in files: 88 | if f.endswith('.a'): 89 | continue 90 | logger.debug("load %s"%(os.path.join(d, f))) 91 | ctypes.cdll.LoadLibrary(os.path.join(d, f)) 92 | 93 | logger.debug("import downloaded python dependecies") 94 | 95 | sys.path.append("/tmp/lambda_packages/") 96 | 97 | import numpy as np 98 | 99 | mnist_model = pickle.load(open(download_paths[MODEL_FILE],'rb')) 100 | 101 | def predict(txbody): 102 | X = np.expand_dims(txbody['digit'], axis=0) 103 | return int(mnist_model.predict(X)[0]) 104 | 105 | def respond(err, res=None): 106 | return { 107 | 'statusCode': '400' if err else '200', 108 | 'body': err.message if err else json.dumps(res), 109 | 'headers': { 110 | 'Content-Type': 'application/json', 111 | }, 112 | } 113 | 114 | def lambda_handler(event, context): 115 | txbody_json = event['body'] 116 | 117 | txbody = json.loads(txbody_json) 118 | 119 | return respond(False, {"prediction": predict(txbody)}) 120 | -------------------------------------------------------------------------------- /Serverless-ML-API-example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Serverless API example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "import..." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stderr", 24 | "output_type": "stream", 25 | "text": [ 26 | "/usr/local/lib/python3.5/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 27 | " from ._conv import register_converters as _register_converters\n", 28 | "Using TensorFlow backend.\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "from __future__ import print_function\n", 34 | "from keras.datasets import mnist\n", 35 | "from sklearn.pipeline import Pipeline\n", 36 | "from sklearn import metrics\n", 37 | "import xgboost\n", 38 | "from sklearn.decomposition import PCA\n", 39 | "import pickle\n", 40 | "import numpy as np\n", 41 | "import matplotlib.pyplot as plt" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "## MNIST dataset" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz\n", 61 | "11493376/11490434 [==============================] - 1s 0us/step\n", 62 | "x_train shape: (60000, 784)\n", 63 | "60000 train samples\n", 64 | "10000 test samples\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "# the data, split between train and test sets\n", 70 | "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", 71 | "\n", 72 | "x_train = x_train.reshape(60000, 784)\n", 73 | "x_test = x_test.reshape(10000, 784)\n", 74 | "\n", 75 | "x_train = x_train.astype('float32')\n", 76 | "x_test = x_test.astype('float32')\n", 77 | "x_train /= 255\n", 78 | "x_test /= 255\n", 79 | "print('x_train shape:', x_train.shape)\n", 80 | "print(x_train.shape[0], 'train samples')\n", 81 | "print(x_test.shape[0], 'test samples')" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 3, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADaVJREFUeJzt3X+MXOV1xvHnib1e4jU0GILrGgcnhKA6NDjVxiSCVo4IKZAgEyWhWKrlSpRFLUhQRW2Rq6iWWqUUhSC3SSM5wY1BBGgCCCtx01CrrYVKHS/I2IBpTajT2DVewLQ2AfwDn/6x19EGdt5d5ted9fl+pNXO3HPv3KPrfXzvzDszryNCAPJ5R90NAKgH4QeSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kNT0bu5shvvjJA10c5dAKq/rZzochzyZdVsKv+1LJa2WNE3SNyPiltL6J2lAF/jiVnYJoGBzbJz0uk1f9tueJulrki6TtFDSMtsLm308AN3VynP+xZKejYjnIuKwpHslLW1PWwA6rZXwz5P00zH3d1fLfoHtIdvDtoeP6FALuwPQTh1/tT8i1kTEYEQM9qm/07sDMEmthH+PpPlj7p9ZLQMwBbQS/i2SzrH9XtszJF0taX172gLQaU0P9UXEUds3SPpHjQ71rY2Ip9rWGYCOammcPyI2SNrQpl4AdBFv7wWSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBThB5Ii/EBShB9IivADSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiCplmbptb1L0kFJb0g6GhGD7WgKQOe1FP7KxyPixTY8DoAu4rIfSKrV8IekH9p+zPZQOxoC0B2tXvZfFBF7bJ8h6WHbz0TEprErVP8pDEnSSZrZ4u4AtEtLZ/6I2FP9HpH0oKTF46yzJiIGI2KwT/2t7A5AGzUdftsDtk8+flvSJyU92a7GAHRWK5f9cyQ9aPv443w7In7Qlq4AdFzT4Y+I5ySd38ZeAHQRQ31AUoQfSIrwA0kRfiApwg8kRfiBpNrxqb4UXrr2Yw1r71n+bHHbZ0bmFOuHD/UV6/PuKddn7n6lYe3Y1qeL2yIvzvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBTj/JP0x3/07Ya1zw68XN747BZ3vqRc3nX01Ya11S98vMWdT10/GjmrYW3gtl8qbjt942PtbqfncOYHkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaQcEV3b2SmeHRf44q7tr51+9rkLGtZe/FD5/9BTd5SP8cu/6mJ9xof+t1i/9bwHGtYueedrxW2//+qsYv1TMxt/V0CrXovDxfrmQwPF+pKTjjS97/d//7pi/QNDW5p+7Dptjo06EPvLf1AVzvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kNSEn+e3vVbSpyWNRMR51bLZku6TtEDSLklXRcQEH2qf2ga+u7lQa+2xT2ltc/3NLy9pWPuLCxeU9/2v5TkHbl3y/iY6mpzprx0r1ge27S3WT9t0f7H+azMaz3cwc1d5LoQMJnPm/5akS9+07GZJGyPiHEkbq/sAppAJwx8RmyTtf9PipZLWVbfXSbqyzX0B6LBmn/PPiYjj12TPSyrPRwWg57T8gl+Mfjig4ZvXbQ/ZHrY9fESHWt0dgDZpNvz7bM+VpOr3SKMVI2JNRAxGxGCf+pvcHYB2azb86yWtqG6vkPRQe9oB0C0Tht/2PZIelXSu7d22r5F0i6RLbO+U9InqPoApZMJx/ohY1qA0NT+YfwI6+vy+hrWB+xvXJOmNCR574LsvNdFRe+z7vY8V6x+cUf7z/fL+cxvWFvzdc8VtjxarJwbe4QckRfiBpAg/kBThB5Ii/EBShB9Iiim6UZvpZ80v1r+68qvFep+nFevfWf2JhrXT9j5a3DYDzvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBTj/KjNM384r1j/SH95pumnDpenH5/99Ktvu6dMOPMDSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKM86OjDn3qIw1rj3/u9gm2Ls/w9Ps33lisv/PffjTB4+fGmR9IivADSRF+ICnCDyRF+IGkCD+QFOEHkppwnN/2WkmfljQSEedVy1ZJulbSC9VqKyNiQ6eaxNT135c1Pr/Mcnkcf9l/XVKsz/zBE8V6FKuYzJn/W5IuHWf57RGxqPoh+MAUM2H4I2KTpP1d6AVAF7XynP8G29tsr7V9ats6AtAVzYb/65LOlrRI0l5JtzVa0faQ7WHbw0d0qMndAWi3psIfEfsi4o2IOCbpG5IWF9ZdExGDETHYN8EHNQB0T1Phtz13zN3PSHqyPe0A6JbJDPXdI2mJpNNt75b0Z5KW2F6k0dGUXZKu62CPADpgwvBHxLJxFt/RgV4wBb3j5JOL9eW/8UjD2oFjrxe3HfnS+4r1/kNbinWU8Q4/ICnCDyRF+IGkCD+QFOEHkiL8QFJ8dTdasnPVB4v1753+tw1rS3d+trht/waG8jqJMz+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJMU4P4r+73c+Wqxv++2/LtZ/fPRIw9orf3Vmcdt+7S3W0RrO/EBShB9IivADSRF+ICnCDyRF+IGkCD+QFOP8yU2f9yvF+k1fvK9Y73f5T+jqJ5Y3rL37H/i8fp048wNJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUhOO89ueL+lOSXMkhaQ1EbHa9mxJ90laIGmXpKsi4uXOtYpmeHr5n/j87+0u1j8/66Vi/e6DZxTrc77Y+PxyrLglOm0yZ/6jkr4QEQslfVTS9bYXSrpZ0saIOEfSxuo+gCliwvBHxN6IeLy6fVDSDknzJC2VtK5abZ2kKzvVJID2e1vP+W0vkPRhSZslzYmI49+z9LxGnxYAmCImHX7bsyTdL+mmiDgwthYRodHXA8bbbsj2sO3hIzrUUrMA2mdS4bfdp9Hg3x0RD1SL99meW9XnShoZb9uIWBMRgxEx2Kf+dvQMoA0mDL9tS7pD0o6I+MqY0npJK6rbKyQ91P72AHTKZD7Se6Gk5ZK2295aLVsp6RZJf2/7Gkk/kXRVZ1pES84/t1j+8zPuaunhv/alzxfr73ri0ZYeH50zYfgj4hFJblC+uL3tAOgW3uEHJEX4gaQIP5AU4QeSIvxAUoQfSIqv7j4BTFv4gYa1oXtbe+/VwrXXF+sL7vr3lh4f9eHMDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJMc5/AnjmD05tWLti5oGGtck4818Ol1eIcb+9DVMAZ34gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSIpx/ing9SsWF+sbr7itUJ3Z3mZwwuDMDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJTTjOb3u+pDslzZEUktZExGrbqyRdK+mFatWVEbGhU41m9j8XTivW3zO9+bH8uw+eUaz3HSh/np9P809dk3mTz1FJX4iIx22fLOkx2w9Xtdsj4sudaw9Ap0wY/ojYK2lvdfug7R2S5nW6MQCd9bae89teIOnDkjZXi26wvc32WtvjfpeU7SHbw7aHj+hQS80CaJ9Jh9/2LEn3S7opIg5I+rqksyUt0uiVwbhvMI+INRExGBGDfepvQ8sA2mFS4bfdp9Hg3x0RD0hSROyLiDci4pikb0gqf/oEQE+ZMPy2LekOSTsi4itjls8ds9pnJD3Z/vYAdMpkXu2/UNJySdttb62WrZS0zPYijY727JJ0XUc6REv+8qWFxfqjv7WgWI+929vYDXrJZF7tf0SSxykxpg9MYbzDD0iK8ANJEX4gKcIPJEX4gaQIP5CUo4tTLJ/i2XGBL+7a/oBsNsdGHYj94w3NvwVnfiApwg8kRfiBpAg/kBThB5Ii/EBShB9Iqqvj/LZfkPSTMYtOl/Ri1xp4e3q1t17tS6K3ZrWzt7Mi4t2TWbGr4X/Lzu3hiBisrYGCXu2tV/uS6K1ZdfXGZT+QFOEHkqo7/Gtq3n9Jr/bWq31J9NasWnqr9Tk/gPrUfeYHUJNawm/7Utv/YftZ2zfX0UMjtnfZ3m57q+3hmntZa3vE9pNjls22/bDtndXvcadJq6m3Vbb3VMduq+3La+ptvu1/tv207ads31gtr/XYFfqq5bh1/bLf9jRJ/ynpEkm7JW2RtCwinu5qIw3Y3iVpMCJqHxO2/ZuSXpF0Z0ScVy27VdL+iLil+o/z1Ij4kx7pbZWkV+qeubmaUGbu2JmlJV0p6XdV47Er9HWVajhudZz5F0t6NiKei4jDku6VtLSGPnpeRGyStP9Ni5dKWlfdXqfRP56ua9BbT4iIvRHxeHX7oKTjM0vXeuwKfdWijvDPk/TTMfd3q7em/A5JP7T9mO2hupsZx5xq2nRJel7SnDqbGceEMzd305tmlu6ZY9fMjNftxgt+b3VRRPy6pMskXV9d3vakGH3O1kvDNZOaublbxplZ+ufqPHbNznjdbnWEf4+k+WPun1kt6wkRsaf6PSLpQfXe7MP7jk+SWv0eqbmfn+ulmZvHm1laPXDsemnG6zrCv0XSObbfa3uGpKslra+hj7ewPVC9ECPbA5I+qd6bfXi9pBXV7RWSHqqxl1/QKzM3N5pZWjUfu56b8Toiuv4j6XKNvuL/Y0l/WkcPDfp6n6Qnqp+n6u5N0j0avQw8otHXRq6RdJqkjZJ2SvonSbN7qLe7JG2XtE2jQZtbU28XafSSfpukrdXP5XUfu0JftRw33uEHJMULfkBShB9IivADSRF+ICnCDyRF+IGkCD+QFOEHkvp/uK0ZUt56JeQAAAAASUVORK5CYII=\n", 92 | "text/plain": [ 93 | "
" 94 | ] 95 | }, 96 | "metadata": {}, 97 | "output_type": "display_data" 98 | } 99 | ], 100 | "source": [ 101 | "plt.imshow(x_test[0].reshape(28,28));" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "---\n", 109 | "## Build Classifier with PCA-XGBoost" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 4, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "pipeline = Pipeline([('pca',PCA(n_components=10)), ('xgb',xgboost.XGBClassifier(max_depth=10,n_estimators=10))])" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 5, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "Pipeline(memory=None,\n", 130 | " steps=[('pca', PCA(copy=True, iterated_power='auto', n_components=10, random_state=None,\n", 131 | " svd_solver='auto', tol=0.0, whiten=False)), ('xgb', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n", 132 | " colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,\n", 133 | " max_dept...\n", 134 | " reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n", 135 | " silent=True, subsample=1))])" 136 | ] 137 | }, 138 | "execution_count": 5, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "pipeline.fit(x_train,y_train)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 6, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "name": "stderr", 154 | "output_type": "stream", 155 | "text": [ 156 | "/usr/local/lib/python3.5/dist-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", 157 | " if diff:\n" 158 | ] 159 | }, 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "0.8836" 164 | ] 165 | }, 166 | "execution_count": 6, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "metrics.accuracy_score(y_test, pipeline.predict(x_test))" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "Save classifier file to be uploaded to Lambda" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 7, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "pickle.dump(file=open('xgb_mnist_pipe.pckl','wb'),obj=pipeline)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 9, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "image/png": "\n", 199 | "text/plain": [ 200 | "
" 201 | ] 202 | }, 203 | "metadata": {}, 204 | "output_type": "display_data" 205 | } 206 | ], 207 | "source": [ 208 | "sample_id = 21\n", 209 | "plt.figure(figsize=(10,5))\n", 210 | "plt.subplot(121),plt.imshow(x_test[sample_id].reshape(28,28))\n", 211 | "plt.subplot(122),plt.bar(range(10),pipeline.predict_proba(np.expand_dims(x_test[sample_id],axis=0)).tolist()[0]);" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 79, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "name": "stderr", 221 | "output_type": "stream", 222 | "text": [ 223 | "/usr/local/lib/python3.5/dist-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", 224 | " if diff:\n" 225 | ] 226 | }, 227 | { 228 | "data": { 229 | "text/plain": [ 230 | "9" 231 | ] 232 | }, 233 | "execution_count": 79, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "int(pipeline.predict(np.expand_dims(x_test[sample_id],axis=0))[0])" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "---\n", 247 | "### Testing the API service" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 10, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "import json\n", 257 | "import requests" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 11, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "service_url = 'https://6t4p4l2bed.execute-api.us-east-2.amazonaws.com/serverless_deploy'" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 24, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "name": "stdout", 276 | "output_type": "stream", 277 | "text": [ 278 | "{\"prediction\": 4} real: 4\n" 279 | ] 280 | } 281 | ], 282 | "source": [ 283 | "sample_id = 109\n", 284 | "digit_json = json.dumps({\"digit\":x_test[sample_id].tolist()})\n", 285 | "\n", 286 | "r = requests.post(service_url, json={\"body\":digit_json})\n", 287 | "print(r.json()['body'], \"real: %d\"%(y_test[sample_id]))" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [] 296 | } 297 | ], 298 | "metadata": { 299 | "kernelspec": { 300 | "display_name": "Python 3", 301 | "language": "python", 302 | "name": "python3" 303 | }, 304 | "language_info": { 305 | "codemirror_mode": { 306 | "name": "ipython", 307 | "version": 3 308 | }, 309 | "file_extension": ".py", 310 | "mimetype": "text/x-python", 311 | "name": "python", 312 | "nbconvert_exporter": "python", 313 | "pygments_lexer": "ipython3", 314 | "version": "3.5.2" 315 | } 316 | }, 317 | "nbformat": 4, 318 | "nbformat_minor": 2 319 | } 320 | --------------------------------------------------------------------------------