├── .gitignore ├── CS329s-deploying-ml-models-tutorial.pdf ├── LICENSE ├── README.md ├── food-images ├── coffee.jpeg ├── donuts.jpeg ├── hamburger.jpeg ├── ice_cream.jpeg ├── pizza.jpeg ├── plant.jpeg ├── plant_2.jpeg ├── steak.jpeg ├── truck.jpeg └── xbox.jpg ├── food-vision ├── .dockerignore ├── Dockerfile ├── Makefile ├── SessionState.py ├── app.py ├── app.yaml ├── requirements.txt └── utils.py ├── images ├── food-vision-demo-cropped.gif ├── gcp-connecting-a-model-version-to-google-storage.png ├── gcp-creating-a-bucket.png ├── gcp-creating-a-model-on-ai-platform.png ├── gcp-creating-a-model-version.png ├── gcp-ml-engine-permissions.png ├── make-gcloud-deploy.png ├── streamlit-app-first-error-youll-run-into.png ├── streamlit-app-on-app-engine.png ├── streamlit-app-what-you-should-see.png └── streamlit-predict-button-clicked.png └── model_training.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.toptal.com/developers/gitignore/api/python 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 4 | 5 | ### Python ### 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | pytestdebug.log 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | doc/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | pythonenv* 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # mypy 132 | .mypy_cache/ 133 | .dmypy.json 134 | dmypy.json 135 | 136 | # Pyre type checker 137 | .pyre/ 138 | 139 | # pytype static type analyzer 140 | .pytype/ 141 | 142 | # profiling data 143 | .prof 144 | 145 | # misc 146 | *.json 147 | .DS_Store 148 | *.key 149 | keynote-images 150 | 151 | # End of https://www.toptal.com/developers/gitignore/api/python 152 | -------------------------------------------------------------------------------- /CS329s-deploying-ml-models-tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/CS329s-deploying-ml-models-tutorial.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Daniel Bourke 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CS329s Machine Learning Model Deployment Tutorial 2 | 3 | **Warning:** Following the steps of what's in here may cost you money (Google Cloud is a paid service), be sure to shut down any Google Cloud service you no longer need to use to avoid charges. 4 | 5 | **Thank you to:** [Mark Douthwaite's incredible ML + software engineering blog](https://mark.douthwaite.io/), [Lj Miranda's amazing post on software engineering tools for data scientists](https://ljvmiranda921.github.io/notebook/2020/11/15/data-science-swe/), [Chip Huyen](https://huyenchip.com/) and Ashik Shafi's gracious feedback on the raw materials of this tutorial. 6 | 7 | ## What is in here? 8 | 9 | Code and files to go along with [CS329s machine learning model deployment tutorial](https://stanford-cs329s.github.io/syllabus.html). 10 | 11 | * Watch the [video tutorial on YouTube](https://youtu.be/fw6NMQrYc6w) 12 | * See the [slides](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/blob/main/CS329s-deploying-ml-models-tutorial.pdf) 13 | * Get the [model training code](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/blob/main/model_training.ipynb) 14 | 15 | ## What do I need to get started? 16 | 17 | * A [Google Cloud account](https://cloud.google.com/gcp) and a [Google Cloud Project](https://cloud.google.com/resource-manager/docs/creating-managing-projects) 18 | * [Google Cloud SDK installed](https://cloud.google.com/sdk/docs/install) (gcloud CLI utitly) 19 | * Trained [machine learning model(s)](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/blob/main/model_training.ipynb), our app uses an image classification model trained on a number of different classes of food from [Food101 dataset](https://www.kaggle.com/dansbecker/food-101) 20 | * [Docker installed](https://docs.docker.com/get-docker/) 21 | 22 | **Warning (again):** Using Google Cloud services costs money. If you don't have credits (you get $300USD when you first sign up), you will be charged. Delete and shutdown your work when finished to avoid charges. 23 | 24 | ## What will I end up with? 25 | 26 | If you go through the steps below without fail, you should end up with a [Streamlit](http://streamlit.io/)-powered web application (Food Vision 🍔👁) for classifying images of food (deployed on Google Cloud if you want). 27 | 28 | Our app running locally making a prediction on an image of ice cream (using a machine learning model deployed on Google Cloud): 29 | ![food vision demo](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/raw/main/images/food-vision-demo-cropped.gif) 30 | 31 | ## Okay, I'm in, how can I use it? 32 | 33 | We're going to tackle this in 3 parts: 34 | 1. Getting the app running (running Streamlit on our local machines) 35 | 2. Deploying a machine learning model to AI Platform (getting Google Cloud to host one of our models) 36 | 3. Deploying our app to App Engine (getting our app on the internet) 37 | 38 | ### 1. Getting the app running 39 | 40 | 1. Clone this repo 41 | ``` 42 | git clone https://github.com/mrdbourke/cs329s-ml-deployment-tutorial 43 | ``` 44 | 45 | 2. Change into the `food-vision` directory 46 | ``` 47 | cd food-vision 48 | ``` 49 | 50 | 3. Create and activate a virtual environment (call it what you want, I called mine "env") 51 | ``` 52 | pip install virtualenv 53 | virtualenv 54 | source /bin/activate 55 | ``` 56 | 4. Install the required dependencies (Streamlit, TensorFlow, etc) 57 | ``` 58 | pip install -r requirements.txt 59 | ``` 60 | 5. Activate Streamlit and run `app.py` 61 | ``` 62 | streamlit run app.py 63 | ``` 64 | Running the above command should result in you seeing the following: 65 | ![](https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/main/images/streamlit-app-what-you-should-see.png) 66 | 67 | This is Food Vision 🍔👁 the app we're making. 68 | 69 | 6. Try an upload an image (e.g. one of the ones in [`food-images/`](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/tree/main/food-images) such as [`ice_cream.jpeg`](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/blob/main/food-images/ice_cream.jpeg) and it should load. 70 | 71 | 7. Notice a "Predict" button appears when you upload an image to the app, click it and see what happens. 72 | 73 | 8. The app breaks because it tries to contact Google Cloud Platform (GCP) looking for a machine learning model and it either: 74 | * won't be able to find the model (wrong API call or the model doesn't exist) 75 | * won't be able to use the existing model because the credentials are wrong (seen below) 76 | ![credential error](https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/main/images/streamlit-app-first-error-youll-run-into.png) 77 | 78 | This is a good thing! It means our app is trying to contact GCP (using functions in `food-vision/app.py` and `food-vision/utils.py`). 79 | 80 | Now let's learn how to get a model hosted on GCP. 81 | 82 | ### 2. Getting a machine learning model hosted on GCP 83 | 84 | > How do I fix this error? (Streamlit can't access your model) 85 | 86 | To fix it, we're going to need a couple of things: 87 | * A trained machine learning model (suited to our problem, we'll be uploading this to Google Storage) 88 | * A Google Storage bucket (to store our trained model) 89 | * A hosted model on Google AI Platform (we'll connect the model in our Google Storage bucket to here) 90 | * A service key to access our hosted model on Google AI Platform 91 | 92 | Let's see how we'll can get the above. 93 | 94 | 1. To train a machine learning model and save it in the [`SavedModel`](https://www.tensorflow.org/guide/saved_model) format (this TensorFlow specific, do what you need for PyTorch), we can follow the steps in [`model_training.ipynb`](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/blob/main/model_training.ipynb). 95 | 96 | 2. Once we've got a `SavedModel`, we'll upload it Google Storage but before we do that, we'll need to [create a Google Storage Bucket](https://cloud.google.com/storage/docs/creating-buckets) (a bucket is like a hard drive on the cloud). 97 | 98 | ![creating a bucket on google cloud](https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/main/images/gcp-creating-a-bucket.png) 99 | 100 | Call your bucket whatever you like (e.g. my_cool_bucket_name). You'll want to store your data in a region which is either closest to you or wherever you're allowed to store data (if this doesn't make sense, store it in the US). 101 | 102 | 3. With a bucket created, we can [copy our model to the bucket](https://cloud.google.com/storage/docs/uploading-objects#gsutil). 103 | ``` 104 | ## Uploading a model to Google Storage from within Colab ## 105 | 106 | # Authorize Colab and initalize gcloud (enter the appropriate inputs when asked) 107 | from google.colab import auth 108 | auth.authenticate_user() 109 | !curl https://sdk.cloud.google.com | bash 110 | !gcloud init 111 | 112 | # Upload SavedModel to Google Storage Bucket 113 | !gsutil cp -r 114 | ``` 115 | 116 | 4. [Connect model in bucket to AI Platform](https://cloud.google.com/ai-platform/prediction/docs/deploying-models) (this'll make our model accessible via an API call, if you're not sure what an API call is, imagine writing a function that could trigger our model from anywhere on the internet) 117 | * Don't like clicking around Google Cloud's console? You can also [use `gcloud` to create a model in AI Platform](https://cloud.google.com/sdk/gcloud/reference/ai-platform/models/create) on the command line 118 | * Create a model on AI Platform (choose a region which is closest to you or where you'd like your model to be accessed from): 119 | ![creating a model on AI Platform](https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/main/images/gcp-creating-a-model-on-ai-platform.png) 120 | * Once you've got a model on AI Platform (above), you'll need to create a model version which matches up with what your model was trained with (e.g. choose TensorFlow if your model is trained with TensorFlow): 121 | ![creating a model version on AI Platform](https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/main/images/gcp-creating-a-model-version.png) 122 | * And then link your model version to your trained model in Google Storage: 123 | ![linking a model version to Google Storage](https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/main/images/gcp-connecting-a-model-version-to-google-storage.png) 124 | 125 | 5. Create a [service account to access AI Platform](https://cloud.google.com/iam/docs/creating-managing-service-accounts) (GCP loves permissions, it's for the security of your app) 126 | * You'll want to make a service account with permissions to use the "ML Engine Developer" role 127 | 128 | ![ml developer role permission](https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/main/images/gcp-ml-engine-permissions.png) 129 | 130 | 6. Once you've got an active service account, [create and download its key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) (this will come in the form of a .JSON file) 131 | * 🔑 **Note:** Service keys grant access to your GCP account, keep this file private (e.g add `*.json` to your `.gitignore` so you don't accidently add it to GitHub) 132 | 133 | 7. Update the following variables: 134 | * In `app.py`, change the existing GCP key path to your key path: 135 | ``` 136 | # Google Cloud Services look for these when your app runs 137 | 138 | # Old 139 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "daniels-dl-playground-4edbcb2e6e37.json" 140 | 141 | # New 142 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "" 143 | ``` 144 | * In `app.py`, change the GCP project and region to your GCP project and region 145 | ``` 146 | # Old 147 | PROJECT = "daniels-dl-playground" 148 | REGION = "us-central1" 149 | 150 | # New 151 | PROJECT = "" 152 | REGION = "" 153 | ``` 154 | * In `utils.py`, change the `"model_name"` key of `"model_1"` to your model name: 155 | ``` 156 | # Old 157 | classes_and_models = { 158 | "model_1": { 159 | "classes": base_classes, 160 | "model_name": "efficientnet_model_1_10_classes" 161 | } 162 | } 163 | 164 | # New 165 | classes_and_models = { 166 | "model_1": { 167 | "classes": base_classes, 168 | "model_name": "" 169 | } 170 | } 171 | ``` 172 | 173 | 8. Retry the app to see if it works (refresh the Streamlit app by pressing R or refreshing the page and then reupload an image and click "Predict") 174 | 175 | ![what you'll see when you click the predict button and your model is hosted correctly](https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/main/images/streamlit-predict-button-clicked.png) 176 | 177 | ### 3. Deploying the whole app to GCP 178 | 179 | > Okay, I've fixed the permissions error, how do I deploy my model/app? 180 | 181 | I'm glad you asked... 182 | 183 | 1. run `make gcloud-deploy`... wait 5-10 mins and your app will be on App Engine (as long as you've activated the App Engine API) 184 | 185 | ...and you're done 186 | 187 | > But wait, what happens when you run `make gcloud-deploy`? 188 | 189 | When you run `make gcloud-deploy`, the `gcloud-deploy` command within the Makefile ([`food-vision/Makefile`](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/blob/main/food-vision/Makefile)) gets triggered. 190 | 191 | `make gcloud-deploy` is actually an alias for running: 192 | 193 | ``` 194 | gcloud app deploy app.yaml 195 | ``` 196 | 197 | This is `gcloud`'s way of saying "Hey, Google Cloud, kick off the steps you need to do to get our locally running app (`food-vision/app.py`) running on App Engine." 198 | 199 | To do this, the `gcloud app deploy` command does a number of things: 200 | * Our app is put into a [Docker container](https://www.docker.com/resources/what-container) defined by [`[food-vision/Dockerfile]`](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/blob/main/food-vision/Dockerfile) (imagine a Docker container as a box which contains our locally running app and everything it needs to run, once it's in the box, the box can be run anywhere Docker is available and it should work and the Dockerfile defines how the container should be created). 201 | * Once the Docker container is created, it becomes a Docker image (confusing, I know but think of a Docker image as an immutable Docker container, e.g. it won't change when we move it somewhere). 202 | * The Docker image is then uploaded to [Google Container Registry (GCR)](https://cloud.google.com/container-registry), Google's place for hosting Docker images. 203 | * Once our Docker image is hosted on GCR, it gets deployed to an App Engine instance (think a computer just like ours but running online, where other people can access it). 204 | * The App Engine instance is defined by the instructions in [`food-vision/app.yaml`](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/blob/main/food-vision/app.yaml), if you check out this file you'll notice it's quite simple, it has two lines: 205 | ``` 206 | runtime: custom # we want to run our own custom Docker container 207 | env: flex # we want our App Engine to be flexible and install our various dependencies (in requirements.txt) 208 | ``` 209 | 210 | Seems like a lot right? 211 | 212 | And it is, but once you've had a little practice which each, you'll start to realise there's a specific reason behind each of them. 213 | 214 | If all the steps executed correctly, you should see your app running live on App Engine under a URL similar to: 215 | 216 | ``` 217 | http://.ue.r.appspot.com/ 218 | ``` 219 | 220 | Which should look exactly like our app running locally! 221 | 222 | ![our streamlit app running on App Engine](https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/main/images/streamlit-app-on-app-engine.png) 223 | 224 | ## Breaking down `food-vision` 225 | 226 | > What do all the files in `food-vision` do? 227 | 228 | There's a bunch of files in our [`food-vision` directory](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/tree/main/food-vision) and seeing them for the first time can be confusing. So here's a quick one-liner for each. 229 | 230 | * `.dockerignore` - files/folders to ignore when are Docker container is being created (similar to how `.gitignore` tells what files/folders to ignore when committing. 231 | * `Dockerfile` - instructions for how our Docker container (a box with all of what our app needs to run) should be created. 232 | * `Makefile` - a handy script for executing commands like `make gcloud-deploy` on the command which run larger commands (this saves us typing large commands all the time, see [What is a Makefile?](https://www.google.com/search?client=safari&rls=en&q=what+is+a+makefile&ie=UTF-8&oe=UTF-8) for more). 233 | * `SessionState.py`- a Python script to help our Streamlit app maintain state (not delete everything) when we a click a button, see the [Streamlit forums for more](https://discuss.streamlit.io/t/is-there-any-working-example-for-session-state-for-streamlit-version-0-63-1/4551/2). 234 | * `app.py` - our Food Vision 👁🍔 app built with [Streamlit](http://streamlit.io/). 235 | * `app.yaml` - the instructions for what type of instance App Engine should create when we deploy our app. 236 | * `requirements.txt`- all of the dependencies required to run `app.py`. 237 | * `utils.py` - helper functions used in `app.py` (this prevents our app from getting too large). 238 | 239 | ## Where else your app will break 240 | 241 | During the tutorial (see [timestamp 1:32:31](https://youtu.be/fw6NMQrYc6w?t=5551)), we saw the app we've deployed is far from perfect and we saw a couple of places where our app will break, but there's one more: 242 | 243 | The default app (the on you'll get when you clone the repo) works with 3 models: 244 | * Model 1: 10 food classes from [Food101](https://www.kaggle.com/dansbecker/food-101). 245 | * Model 2: 11 food classes from Food101. 246 | * Model 3: 11 food classes Food101 + 1 not_food class (random images from ImageNet). 247 | 248 | All of these models can be trained using [`model_training.ipynb`](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/blob/main/model_training.ipynb), however, if you do have access to all 3, your app will break if you choose anything other than Model 1 in the sidebar (the app requires at least 1 model to run). 249 | 250 | ## Learn more 251 | 252 | > Where can I learn all of this? 253 | 254 | Just like there's an infinite way you can construct deep learning neural networks with different layers, what we've done here is only *one* way you can deploy machine learning models/applications with Google Cloud (other cloud services have similar offerings as well). 255 | 256 | If you'd like to learn more about Google Cloud, I'd recommend [Google's Qwiklabs](https://google.qwiklabs.com/), here you'll get hands-on experience using Google Cloud for different uses-cases (all for free). 257 | 258 | If you'd like more about how software engineering crosses over with machine learning, I'd recommend the following blogs: 259 | 260 | * LJ Miranda's [How to improve software engineering skills as a researcher](https://ljvmiranda921.github.io/notebook/2020/11/15/data-science-swe/) 261 | * Mark Douthwaite's [software engineering and machine learning blog](https://mark.douthwaite.io/) 262 | 263 | For more on the concept of the "data flywheel" (discussed during the tutorial), check out Josh Tobin's talk [A Missing Link in the Machine Learning Infrastrcuture Stack](https://youtu.be/o4q_ljRkXqw). 264 | 265 | ## Extensions 266 | 267 | > How can I extend this app? 268 | 269 | **CI/CD** - you'll hear this a lot when you start building and shipping software. It stands for "continuous integration/continuous delivery". I think of it like this, say you make a change to your app and you'd like to push it to your users immediately, you could have a service such as [GitHub Actions](https://github.com/features/actions) watch for changes in your GitHub repo. If a change occurs on a certain branch, GitHub Actions performs steps very similar to what we've done here and redeploys your (updated) app automatically. 270 | * Mark Douthwaite has a great blog post on [CI/CD with GitHub Actions](https://mark.douthwaite.io/continuous-training-and-delivery/). 271 | 272 | **Codify everything!** - when deploying our app, we did a lot of clicking around the Google Cloud console, however you can do all of what we did using the [`gcloud` SDK](https://cloud.google.com/sdk), this means you could automate everything we've done and make the whole process far less manual! 273 | 274 | ## Questions? 275 | 276 | Start a [discussion](https://github.com/mrdbourke/cs329s-ml-deployment-tutorial/discussions) or send me a message: daniel at mrdbourke dot com. 277 | -------------------------------------------------------------------------------- /food-images/coffee.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/food-images/coffee.jpeg -------------------------------------------------------------------------------- /food-images/donuts.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/food-images/donuts.jpeg -------------------------------------------------------------------------------- /food-images/hamburger.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/food-images/hamburger.jpeg -------------------------------------------------------------------------------- /food-images/ice_cream.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/food-images/ice_cream.jpeg -------------------------------------------------------------------------------- /food-images/pizza.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/food-images/pizza.jpeg -------------------------------------------------------------------------------- /food-images/plant.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/food-images/plant.jpeg -------------------------------------------------------------------------------- /food-images/plant_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/food-images/plant_2.jpeg -------------------------------------------------------------------------------- /food-images/steak.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/food-images/steak.jpeg -------------------------------------------------------------------------------- /food-images/truck.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/food-images/truck.jpeg -------------------------------------------------------------------------------- /food-images/xbox.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/food-images/xbox.jpg -------------------------------------------------------------------------------- /food-vision/.dockerignore: -------------------------------------------------------------------------------- 1 | # *.json 2 | *.jpg 3 | *.jpeg 4 | *.git 5 | *.key 6 | env 7 | images 8 | keynote-images 9 | 10 | # Python stuff 11 | *.pyc 12 | *.pyo 13 | *.pyd 14 | __pycache__ 15 | .pytest_cache 16 | -------------------------------------------------------------------------------- /food-vision/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7 2 | 3 | ## App engine stuff 4 | # Expose port you want your app on 5 | EXPOSE 8080 6 | 7 | # Upgrade pip 8 | RUN pip install -U pip 9 | 10 | COPY requirements.txt app/requirements.txt 11 | RUN pip install -r app/requirements.txt 12 | 13 | # Create a new directory for app (keep it in its own directory) 14 | COPY . /app 15 | WORKDIR app 16 | 17 | # Run 18 | ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8080", "--server.address=0.0.0.0"] 19 | -------------------------------------------------------------------------------- /food-vision/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: run run-container gcloud-deploy 2 | 3 | APP_NAME ?= food-vision 4 | 5 | run: 6 | @streamlit run app.py --server.port=8080 --server.address=0.0.0.0 7 | 8 | run-container: 9 | @docker build . -t ${APP_NAME} 10 | @docker run -p 8080:8080 ${APP_NAME} 11 | 12 | gcloud-deploy: 13 | @gcloud app deploy app.yaml 14 | 15 | -------------------------------------------------------------------------------- /food-vision/SessionState.py: -------------------------------------------------------------------------------- 1 | """Hack to add per-session state to Streamlit. 2 | 3 | Usage 4 | ----- 5 | 6 | >>> import SessionState 7 | >>> 8 | >>> session_state = SessionState.get(user_name='', favorite_color='black') 9 | >>> session_state.user_name 10 | '' 11 | >>> session_state.user_name = 'Mary' 12 | >>> session_state.favorite_color 13 | 'black' 14 | 15 | Since you set user_name above, next time your script runs this will be the 16 | result: 17 | >>> session_state = get(user_name='', favorite_color='black') 18 | >>> session_state.user_name 19 | 'Mary' 20 | 21 | """ 22 | try: 23 | import streamlit.ReportThread as ReportThread 24 | from streamlit.server.Server import Server 25 | except Exception: 26 | # Streamlit >= 0.65.0 27 | import streamlit.report_thread as ReportThread 28 | from streamlit.server.server import Server 29 | 30 | 31 | class SessionState(object): 32 | def __init__(self, **kwargs): 33 | """A new SessionState object. 34 | 35 | Parameters 36 | ---------- 37 | **kwargs : any 38 | Default values for the session state. 39 | 40 | Example 41 | ------- 42 | >>> session_state = SessionState(user_name='', favorite_color='black') 43 | >>> session_state.user_name = 'Mary' 44 | '' 45 | >>> session_state.favorite_color 46 | 'black' 47 | 48 | """ 49 | for key, val in kwargs.items(): 50 | setattr(self, key, val) 51 | 52 | 53 | def get(**kwargs): 54 | """Gets a SessionState object for the current session. 55 | 56 | Creates a new object if necessary. 57 | 58 | Parameters 59 | ---------- 60 | **kwargs : any 61 | Default values you want to add to the session state, if we're creating a 62 | new one. 63 | 64 | Example 65 | ------- 66 | >>> session_state = get(user_name='', favorite_color='black') 67 | >>> session_state.user_name 68 | '' 69 | >>> session_state.user_name = 'Mary' 70 | >>> session_state.favorite_color 71 | 'black' 72 | 73 | Since you set user_name above, next time your script runs this will be the 74 | result: 75 | >>> session_state = get(user_name='', favorite_color='black') 76 | >>> session_state.user_name 77 | 'Mary' 78 | 79 | """ 80 | # Hack to get the session object from Streamlit. 81 | 82 | ctx = ReportThread.get_report_ctx() 83 | 84 | this_session = None 85 | 86 | current_server = Server.get_current() 87 | if hasattr(current_server, '_session_infos'): 88 | # Streamlit < 0.56 89 | session_infos = Server.get_current()._session_infos.values() 90 | else: 91 | session_infos = Server.get_current()._session_info_by_id.values() 92 | 93 | for session_info in session_infos: 94 | s = session_info.session 95 | if ( 96 | # Streamlit < 0.54.0 97 | (hasattr(s, '_main_dg') and s._main_dg == ctx.main_dg) 98 | or 99 | # Streamlit >= 0.54.0 100 | (not hasattr(s, '_main_dg') and s.enqueue == ctx.enqueue) 101 | or 102 | # Streamlit >= 0.65.2 103 | (not hasattr(s, '_main_dg') and s._uploaded_file_mgr == ctx.uploaded_file_mgr) 104 | ): 105 | this_session = s 106 | 107 | if this_session is None: 108 | raise RuntimeError( 109 | "Oh noes. Couldn't get your Streamlit Session object. " 110 | 'Are you doing something fancy with threads?') 111 | 112 | # Got the session object! Now let's attach some state into it. 113 | 114 | if not hasattr(this_session, '_custom_session_state'): 115 | this_session._custom_session_state = SessionState(**kwargs) 116 | 117 | return this_session._custom_session_state -------------------------------------------------------------------------------- /food-vision/app.py: -------------------------------------------------------------------------------- 1 | ### Script for CS329s ML Deployment Lec 2 | import os 3 | import json 4 | import requests 5 | import SessionState 6 | import streamlit as st 7 | import tensorflow as tf 8 | from utils import load_and_prep_image, classes_and_models, update_logger, predict_json 9 | 10 | # Setup environment credentials (you'll need to change these) 11 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "daniels-dl-playground-4edbcb2e6e37.json" # change for your GCP key 12 | PROJECT = "daniels-dl-playground" # change for your GCP project 13 | REGION = "us-central1" # change for your GCP region (where your model is hosted) 14 | 15 | ### Streamlit code (works as a straigtht-forward script) ### 16 | st.title("Welcome to Food Vision 🍔📸") 17 | st.header("Identify what's in your food photos!") 18 | 19 | @st.cache # cache the function so predictions aren't always redone (Streamlit refreshes every click) 20 | def make_prediction(image, model, class_names): 21 | """ 22 | Takes an image and uses model (a trained TensorFlow model) to make a 23 | prediction. 24 | 25 | Returns: 26 | image (preproccessed) 27 | pred_class (prediction class from class_names) 28 | pred_conf (model confidence) 29 | """ 30 | image = load_and_prep_image(image) 31 | # Turn tensors into int16 (saves a lot of space, ML Engine has a limit of 1.5MB per request) 32 | image = tf.cast(tf.expand_dims(image, axis=0), tf.int16) 33 | # image = tf.expand_dims(image, axis=0) 34 | preds = predict_json(project=PROJECT, 35 | region=REGION, 36 | model=model, 37 | instances=image) 38 | pred_class = class_names[tf.argmax(preds[0])] 39 | pred_conf = tf.reduce_max(preds[0]) 40 | return image, pred_class, pred_conf 41 | 42 | # Pick the model version 43 | choose_model = st.sidebar.selectbox( 44 | "Pick model you'd like to use", 45 | ("Model 1 (10 food classes)", # original 10 classes 46 | "Model 2 (11 food classes)", # original 10 classes + donuts 47 | "Model 3 (11 food classes + non-food class)") # 11 classes (same as above) + not_food class 48 | ) 49 | 50 | # Model choice logic 51 | if choose_model == "Model 1 (10 food classes)": 52 | CLASSES = classes_and_models["model_1"]["classes"] 53 | MODEL = classes_and_models["model_1"]["model_name"] 54 | elif choose_model == "Model 2 (11 food classes)": 55 | CLASSES = classes_and_models["model_2"]["classes"] 56 | MODEL = classes_and_models["model_2"]["model_name"] 57 | else: 58 | CLASSES = classes_and_models["model_3"]["classes"] 59 | MODEL = classes_and_models["model_3"]["model_name"] 60 | 61 | # Display info about model and classes 62 | if st.checkbox("Show classes"): 63 | st.write(f"You chose {MODEL}, these are the classes of food it can identify:\n", CLASSES) 64 | 65 | # File uploader allows user to add their own image 66 | uploaded_file = st.file_uploader(label="Upload an image of food", 67 | type=["png", "jpeg", "jpg"]) 68 | 69 | # Setup session state to remember state of app so refresh isn't always needed 70 | # See: https://discuss.streamlit.io/t/the-button-inside-a-button-seems-to-reset-the-whole-app-why/1051/11 71 | session_state = SessionState.get(pred_button=False) 72 | 73 | # Create logic for app flow 74 | if not uploaded_file: 75 | st.warning("Please upload an image.") 76 | st.stop() 77 | else: 78 | session_state.uploaded_image = uploaded_file.read() 79 | st.image(session_state.uploaded_image, use_column_width=True) 80 | pred_button = st.button("Predict") 81 | 82 | # Did the user press the predict button? 83 | if pred_button: 84 | session_state.pred_button = True 85 | 86 | # And if they did... 87 | if session_state.pred_button: 88 | session_state.image, session_state.pred_class, session_state.pred_conf = make_prediction(session_state.uploaded_image, model=MODEL, class_names=CLASSES) 89 | st.write(f"Prediction: {session_state.pred_class}, \ 90 | Confidence: {session_state.pred_conf:.3f}") 91 | 92 | # Create feedback mechanism (building a data flywheel) 93 | session_state.feedback = st.selectbox( 94 | "Is this correct?", 95 | ("Select an option", "Yes", "No")) 96 | if session_state.feedback == "Select an option": 97 | pass 98 | elif session_state.feedback == "Yes": 99 | st.write("Thank you for your feedback!") 100 | # Log prediction information to terminal (this could be stored in Big Query or something...) 101 | print(update_logger(image=session_state.image, 102 | model_used=MODEL, 103 | pred_class=session_state.pred_class, 104 | pred_conf=session_state.pred_conf, 105 | correct=True)) 106 | elif session_state.feedback == "No": 107 | session_state.correct_class = st.text_input("What should the correct label be?") 108 | if session_state.correct_class: 109 | st.write("Thank you for that, we'll use your help to make our model better!") 110 | # Log prediction information to terminal (this could be stored in Big Query or something...) 111 | print(update_logger(image=session_state.image, 112 | model_used=MODEL, 113 | pred_class=session_state.pred_class, 114 | pred_conf=session_state.pred_conf, 115 | correct=False, 116 | user_label=session_state.correct_class)) 117 | 118 | # TODO: code could be cleaned up to work with a main() function... 119 | # if __name__ == "__main__": 120 | # main() -------------------------------------------------------------------------------- /food-vision/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | -------------------------------------------------------------------------------- /food-vision/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.25.1 2 | tensorflow==2.4.1 3 | streamlit==0.76.0 4 | google_api_python_client==1.12.8 5 | protobuf==3.14.0 6 | -------------------------------------------------------------------------------- /food-vision/utils.py: -------------------------------------------------------------------------------- 1 | # Utils for preprocessing data etc 2 | import tensorflow as tf 3 | import googleapiclient.discovery 4 | from google.api_core.client_options import ClientOptions 5 | 6 | base_classes = ['chicken_curry', 7 | 'chicken_wings', 8 | 'fried_rice', 9 | 'grilled_salmon', 10 | 'hamburger', 11 | 'ice_cream', 12 | 'pizza', 13 | 'ramen', 14 | 'steak', 15 | 'sushi'] 16 | 17 | classes_and_models = { 18 | "model_1": { 19 | "classes": base_classes, 20 | "model_name": "efficientnet_model_1_10_classes" # change to be your model name 21 | }, 22 | "model_2": { 23 | "classes": sorted(base_classes + ["donut"]), 24 | "model_name": "efficientnet_model_2_11_classes" 25 | }, 26 | "model_3": { 27 | "classes": sorted(base_classes + ["donut", "not_food"]), 28 | "model_name": "efficientnet_model_3_12_classes" 29 | } 30 | } 31 | 32 | def predict_json(project, region, model, instances, version=None): 33 | """Send json data to a deployed model for prediction. 34 | 35 | Args: 36 | project (str): project where the Cloud ML Engine Model is deployed. 37 | model (str): model name. 38 | instances ([Mapping[str: Any]]): Keys should be the names of Tensors 39 | your deployed model expects as inputs. Values should be datatypes 40 | convertible to Tensors, or (potentially nested) lists of datatypes 41 | convertible to Tensors. 42 | version (str): version of the model to target. 43 | Returns: 44 | Mapping[str: any]: dictionary of prediction results defined by the 45 | model. 46 | """ 47 | # Create the ML Engine service object 48 | prefix = "{}-ml".format(region) if region else "ml" 49 | api_endpoint = "https://{}.googleapis.com".format(prefix) 50 | client_options = ClientOptions(api_endpoint=api_endpoint) 51 | 52 | # Setup model path 53 | model_path = "projects/{}/models/{}".format(project, model) 54 | if version is not None: 55 | model_path += "/versions/{}".format(version) 56 | 57 | # Create ML engine resource endpoint and input data 58 | ml_resource = googleapiclient.discovery.build( 59 | "ml", "v1", cache_discovery=False, client_options=client_options).projects() 60 | instances_list = instances.numpy().tolist() # turn input into list (ML Engine wants JSON) 61 | 62 | input_data_json = {"signature_name": "serving_default", 63 | "instances": instances_list} 64 | 65 | request = ml_resource.predict(name=model_path, body=input_data_json) 66 | response = request.execute() 67 | 68 | # # ALT: Create model api 69 | # model_api = api_endpoint + model_path + ":predict" 70 | # headers = {"Authorization": "Bearer " + token} 71 | # response = requests.post(model_api, json=input_data_json, headers=headers) 72 | 73 | if "error" in response: 74 | raise RuntimeError(response["error"]) 75 | 76 | return response["predictions"] 77 | 78 | # Create a function to import an image and resize it to be able to be used with our model 79 | def load_and_prep_image(filename, img_shape=224, rescale=False): 80 | """ 81 | Reads in an image from filename, turns it into a tensor and reshapes into 82 | (224, 224, 3). 83 | """ 84 | # Decode it into a tensor 85 | # img = tf.io.decode_image(filename) # no channels=3 means model will break for some PNG's (4 channels) 86 | img = tf.io.decode_image(filename, channels=3) # make sure there's 3 colour channels (for PNG's) 87 | # Resize the image 88 | img = tf.image.resize(img, [img_shape, img_shape]) 89 | # Rescale the image (get all values between 0 and 1) 90 | if rescale: 91 | return img/255. 92 | else: 93 | return img 94 | 95 | def update_logger(image, model_used, pred_class, pred_conf, correct=False, user_label=None): 96 | """ 97 | Function for tracking feedback given in app, updates and reutrns 98 | logger dictionary. 99 | """ 100 | logger = { 101 | "image": image, 102 | "model_used": model_used, 103 | "pred_class": pred_class, 104 | "pred_conf": pred_conf, 105 | "correct": correct, 106 | "user_label": user_label 107 | } 108 | return logger 109 | -------------------------------------------------------------------------------- /images/food-vision-demo-cropped.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/food-vision-demo-cropped.gif -------------------------------------------------------------------------------- /images/gcp-connecting-a-model-version-to-google-storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/gcp-connecting-a-model-version-to-google-storage.png -------------------------------------------------------------------------------- /images/gcp-creating-a-bucket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/gcp-creating-a-bucket.png -------------------------------------------------------------------------------- /images/gcp-creating-a-model-on-ai-platform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/gcp-creating-a-model-on-ai-platform.png -------------------------------------------------------------------------------- /images/gcp-creating-a-model-version.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/gcp-creating-a-model-version.png -------------------------------------------------------------------------------- /images/gcp-ml-engine-permissions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/gcp-ml-engine-permissions.png -------------------------------------------------------------------------------- /images/make-gcloud-deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/make-gcloud-deploy.png -------------------------------------------------------------------------------- /images/streamlit-app-first-error-youll-run-into.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/streamlit-app-first-error-youll-run-into.png -------------------------------------------------------------------------------- /images/streamlit-app-on-app-engine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/streamlit-app-on-app-engine.png -------------------------------------------------------------------------------- /images/streamlit-app-what-you-should-see.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/streamlit-app-what-you-should-see.png -------------------------------------------------------------------------------- /images/streamlit-predict-button-clicked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrdbourke/cs329s-ml-deployment-tutorial/a941ecbdf5bd09a00bf19e9d68fed56273fffa2f/images/streamlit-predict-button-clicked.png --------------------------------------------------------------------------------