├── .gitignore ├── Dockerfile ├── LICENSE ├── Model_training.ipynb ├── README.md ├── app.py ├── iris_trained_model.pkl └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.gitignore.io/api/linux,macos,python,pycharm 2 | 3 | ### Linux ### 4 | *~ 5 | 6 | # temporary files which can be created if a process still has a handle open of a deleted file 7 | .fuse_hidden* 8 | 9 | # KDE directory preferences 10 | .directory 11 | 12 | # Linux trash folder which might appear on any partition or disk 13 | .Trash-* 14 | 15 | # .nfs files are created when an open file is removed but is still being accessed 16 | .nfs* 17 | 18 | ### macOS ### 19 | # General 20 | .DS_Store 21 | .AppleDouble 22 | .LSOverride 23 | 24 | # Icon must end with two \r 25 | Icon 26 | 27 | # Thumbnails 28 | ._* 29 | 30 | # Files that might appear in the root of a volume 31 | .DocumentRevisions-V100 32 | .fseventsd 33 | .Spotlight-V100 34 | .TemporaryItems 35 | .Trashes 36 | .VolumeIcon.icns 37 | .com.apple.timemachine.donotpresent 38 | 39 | # Directories potentially created on remote AFP share 40 | .AppleDB 41 | .AppleDesktop 42 | Network Trash Folder 43 | Temporary Items 44 | .apdisk 45 | 46 | ### PyCharm ### 47 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 48 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 49 | 50 | # User-specific stuff 51 | .idea 52 | .idea/**/workspace.xml 53 | .idea/**/tasks.xml 54 | .idea/**/usage.statistics.xml 55 | .idea/**/dictionaries 56 | .idea/**/shelf 57 | 58 | # Sensitive or high-churn files 59 | .idea/**/dataSources/ 60 | .idea/**/dataSources.ids 61 | .idea/**/dataSources.local.xml 62 | .idea/**/sqlDataSources.xml 63 | .idea/**/dynamic.xml 64 | .idea/**/uiDesigner.xml 65 | .idea/**/dbnavigator.xml 66 | 67 | # Gradle 68 | .idea/**/gradle.xml 69 | .idea/**/libraries 70 | 71 | # CMake 72 | cmake-build-*/ 73 | 74 | # Mongo Explorer plugin 75 | .idea/**/mongoSettings.xml 76 | 77 | # File-based project format 78 | *.iws 79 | 80 | # IntelliJ 81 | out/ 82 | 83 | # mpeltonen/sbt-idea plugin 84 | .idea_modules/ 85 | 86 | # JIRA plugin 87 | atlassian-ide-plugin.xml 88 | 89 | # Cursive Clojure plugin 90 | .idea/replstate.xml 91 | 92 | # Crashlytics plugin (for Android Studio and IntelliJ) 93 | com_crashlytics_export_strings.xml 94 | crashlytics.properties 95 | crashlytics-build.properties 96 | fabric.properties 97 | 98 | # Editor-based Rest Client 99 | .idea/httpRequests 100 | 101 | ### PyCharm Patch ### 102 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 103 | 104 | # *.iml 105 | # modules.xml 106 | # .idea/misc.xml 107 | # *.ipr 108 | 109 | # Sonarlint plugin 110 | .idea/sonarlint 111 | 112 | ### Python ### 113 | # Byte-compiled / optimized / DLL files 114 | __pycache__/ 115 | *.py[cod] 116 | *$py.class 117 | 118 | # C extensions 119 | *.so 120 | 121 | # Distribution / packaging 122 | .Python 123 | build/ 124 | develop-eggs/ 125 | dist/ 126 | downloads/ 127 | eggs/ 128 | .eggs/ 129 | lib/ 130 | lib64/ 131 | parts/ 132 | sdist/ 133 | var/ 134 | wheels/ 135 | *.egg-info/ 136 | .installed.cfg 137 | *.egg 138 | MANIFEST 139 | 140 | # PyInstaller 141 | # Usually these files are written by a python script from a template 142 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 143 | *.manifest 144 | *.spec 145 | 146 | # Installer logs 147 | pip-log.txt 148 | pip-delete-this-directory.txt 149 | 150 | # Unit test / coverage reports 151 | htmlcov/ 152 | .tox/ 153 | .coverage 154 | .coverage.* 155 | .cache 156 | nosetests.xml 157 | coverage.xml 158 | *.cover 159 | .hypothesis/ 160 | .pytest_cache/ 161 | 162 | # Translations 163 | *.mo 164 | *.pot 165 | 166 | # Django stuff: 167 | *.log 168 | local_settings.py 169 | db.sqlite3 170 | 171 | # Flask stuff: 172 | instance/ 173 | .webassets-cache 174 | 175 | # Scrapy stuff: 176 | .scrapy 177 | 178 | # Sphinx documentation 179 | docs/_build/ 180 | 181 | # PyBuilder 182 | target/ 183 | 184 | # Jupyter Notebook 185 | .ipynb_checkpoints 186 | 187 | # pyenv 188 | .python-version 189 | 190 | # celery beat schedule file 191 | celerybeat-schedule 192 | 193 | # SageMath parsed files 194 | *.sage.py 195 | 196 | # Environments 197 | .env 198 | .venv 199 | env/ 200 | venv/ 201 | ENV/ 202 | env.bak/ 203 | venv.bak/ 204 | 205 | # Spyder project settings 206 | .spyderproject 207 | .spyproject 208 | 209 | # Rope project settings 210 | .ropeproject 211 | 212 | # mkdocs documentation 213 | /site 214 | 215 | # mypy 216 | .mypy_cache/ 217 | 218 | ### Python Patch ### 219 | .venv/ 220 | 221 | 222 | # End of https://www.gitignore.io/api/linux,macos,python,pycharm 223 | 224 | # Customized 225 | Dockerfile_2 226 | images -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6-slim 2 | COPY ./app.py /deploy/ 3 | COPY ./requirements.txt /deploy/ 4 | COPY ./iris_trained_model.pkl /deploy/ 5 | WORKDIR /deploy/ 6 | RUN pip install -r requirements.txt 7 | EXPOSE 80 8 | ENTRYPOINT ["python", "app.py"] 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [2019] [Tanuj Jain] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Model_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn.datasets import load_iris\n", 10 | "from sklearn.linear_model import LogisticRegression" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "### Load iris Dataset" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "iris_dict = load_iris()\n", 27 | "X = iris_dict['data']\n", 28 | "y = iris_dict['target']" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "# shuffle arrays since y values are in order\n", 38 | "\n", 39 | "from sklearn.utils import shuffle\n", 40 | "X_new, y_new = shuffle(X, y, random_state=0)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 4, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# Divide samples into train and test " 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 5, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "n_samples_train = 120 # number of samples for training (--> #samples for testing = len(y_new) - 120 = 30)\n", 59 | "X_train = X_new[:n_samples_train, :]\n", 60 | "y_train = y_new[:n_samples_train]\n", 61 | "\n", 62 | "X_test = X_new[n_samples_train:, :]\n", 63 | "y_test = y_new[n_samples_train:]" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "#### Fit logistic regression model" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 6, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", 82 | " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", 83 | " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", 84 | " verbose=0, warm_start=False)" 85 | ] 86 | }, 87 | "execution_count": 6, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "clf = LogisticRegression()\n", 94 | "clf.fit(X_train, y_train)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 7, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "y_pred = clf.predict(X_test)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "#### Metrics" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 8, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "from sklearn.metrics import accuracy_score" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 9, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "0.9333333333333333" 131 | ] 132 | }, 133 | "execution_count": 9, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "accuracy_score(y_test, y_pred)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "#### Save Model" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 10, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "import pickle" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 11, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "with open('iris_trained_model.pkl', 'wb') as f:\n", 165 | " pickle.dump(clf, f)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 12, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "with open('iris_trained_model.pkl', 'rb') as f:\n", 175 | " clf_loaded = pickle.load(f)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 13, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "data": { 185 | "text/plain": [ 186 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", 187 | " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", 188 | " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", 189 | " verbose=0, warm_start=False)" 190 | ] 191 | }, 192 | "execution_count": 13, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "clf_loaded" 199 | ] 200 | } 201 | ], 202 | "metadata": { 203 | "kernelspec": { 204 | "display_name": "Python 3", 205 | "language": "python", 206 | "name": "python3" 207 | }, 208 | "language_info": { 209 | "codemirror_mode": { 210 | "name": "ipython", 211 | "version": 3 212 | }, 213 | "file_extension": ".py", 214 | "mimetype": "text/x-python", 215 | "name": "python", 216 | "nbconvert_exporter": "python", 217 | "pygments_lexer": "ipython3", 218 | "version": "3.6.6" 219 | } 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 2 223 | } 224 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Serve a Machine Learning Model as a Webservice 2 | Serving a simple machine learning model as a webservice using [flask](http://flask.pocoo.org/) and [docker](https://www.docker.com/). 3 | 4 | ## Getting Started 5 | 1. Use Model_training.ipynb to train a logistic regression model on the [iris dataset](http://archive.ics.uci.edu/ml/datasets/iris) and generate a pickled model file (iris_trained_model.pkl) 6 | 2. Use app.py to wrap the inference logic in a flask server to serve the model as a REST webservice: 7 | * Execute the command `python app.py` to run the flask app. 8 | * Go to the browser and hit the url `0.0.0.0:80` to get a message `Hello World!` displayed. **NOTE**: A permission error may be received at this point. In this case, change the port number to 5000 in `app.run()` command in `app.py`. 9 | (Port 80 is a privileged port, so change it to some port that isn't, eg: 5000) 10 | * Next, run the below command in terminal to query the flask server to get a reply ```2``` for the model file provided in this repo: 11 | ``` 12 | curl -X POST \ 13 | 0.0.0.0:80/predict \ 14 | -H 'Content-Type: application/json' \ 15 | -d '[5.9,3.0,5.1,1.8]' 16 | ``` 17 | 3. Run ```docker build -t app-iris .``` to build the docker image using ```Dockerfile```. (Pay attention to the period in the docker build command) 18 | 4. Run ```docker run -p 80:80 app-iris``` to run the docker container that got generated using the `app-iris` docker image. (This assumes that the port in app.py is set to 80) 19 | 5. Use the below command in terminal to query the flask server to get a reply ```2``` for the model file provided in this repo: 20 | ``` 21 | curl -X POST \ 22 | 0.0.0.0:80/predict \ 23 | -H 'Content-Type: application/json' \ 24 | -d '[5.9,3.0,5.1,1.8]' 25 | ``` 26 | 27 | For details on floating the containerized app on [AWS ec2 instance](https://aws.amazon.com/ec2/), see the [blog](https://medium.com/@tanuj.jain.10/simple-way-to-deploy-machine-learning-models-to-cloud-fd58b771fdcf). 28 | 29 | ## LICENSE 30 | See [LICENSE](LICENSE) for details. -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | # Serve model as a flask application 2 | 3 | import pickle 4 | import numpy as np 5 | from flask import Flask, request 6 | 7 | model = None 8 | app = Flask(__name__) 9 | 10 | 11 | def load_model(): 12 | global model 13 | # model variable refers to the global variable 14 | with open('iris_trained_model.pkl', 'rb') as f: 15 | model = pickle.load(f) 16 | 17 | 18 | @app.route('/') 19 | def home_endpoint(): 20 | return 'Hello World!' 21 | 22 | 23 | @app.route('/predict', methods=['POST']) 24 | def get_prediction(): 25 | # Works only for a single sample 26 | if request.method == 'POST': 27 | data = request.get_json() # Get data posted as a json 28 | data = np.array(data)[np.newaxis, :] # converts shape from (4,) to (1, 4) 29 | prediction = model.predict(data) # runs globally loaded model on the data 30 | return str(prediction[0]) 31 | 32 | 33 | if __name__ == '__main__': 34 | load_model() # load model at the beginning once only 35 | app.run(host='0.0.0.0', port=80) 36 | -------------------------------------------------------------------------------- /iris_trained_model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanujjain/deploy-ml-model/2361acdb9a4f5ecb50be38e07c7aa86a3156b513/iris_trained_model.pkl -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==1.0.2 2 | numpy==1.16.1 3 | scikit-learn==0.20.2 4 | --------------------------------------------------------------------------------