├── LICENSE ├── README.md ├── doc └── source │ └── images │ ├── EnterIBMID.jpg │ ├── PowerAITrial2.jpg │ ├── architecture.png │ ├── demo.gif │ ├── launchtrialbutton.jpg │ ├── powerai-notebook-terminal.png │ └── welcomepage.png ├── notebooks └── Hotdog_NotHotdog_CNN.ipynb └── seefood.mlmodel /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # seeFOOD CNN, a binary classifier written in Keras and converted to CoreML 2 | 3 | In this Code Pattern, we will walk you through how to use GPU hardware in the Cloud with Nimbix, to quickly train and deploy a Convolutional Neural Network model that can tell you whether your lunchtime nutritional choice is the right one -- all with the camera of the mobile phone in your pocket. All you need are some photos and descriptions of them. You can be up and running with a model to stream video through in no time flat. 4 | 5 | I'm sure you've seen the episode of Silicon Valley, but to give you an idea of the amazing technology we are going to share with you today, here's a clip: 6 | 7 |

8 | 9 | 10 | 11 |

12 | 13 | So you want to identify hotdogs -- great! Summer is just around the corner, and you can never be too careful with what you're eating. You too can develop an app that identifies **Hot Dog** and the alternatives... **Not Hot Dog** 14 | 15 | This repo will walk you through the steps, and technologies to train a Deep Learning model using a Convolutional Netural Network, evaluate its accuracy, and save it into a format that can be loaded on an iOS device. With a model converted to Apple's CoreML format we will load a `.mlmodel` into an open source project: [Lumina](https://github.com/dokun1/lumina). Within Lumina you can quickly import and activate your .mlmodel, and stream object predictions in real time from the camera feed... Let me repeat, you can stream object predictions from the camera feed in real time -- and you can do this with one line of code. 16 | 17 | When the reader has completed this Code Pattern, they will understand how to: 18 | 19 | * Run a Jupyter Notebook with PowerAI 20 | * Train a Convolutional Neural Network model with Keras 21 | * Convert a Keras model to Apple's CoreML 22 | * Use Lumina to create an iOS app that uses your binary classifier 23 | 24 | ![](doc/source/images/architecture.png) 25 | 26 | ## Flow 27 | 1. Run the Jupyter Notebook with PowerAI 28 | 2. Download the `seefood.mlmodel` file from Nimbix 29 | 3. Implement your model with Lumina 30 | 4. Run your app 31 | 32 | ## Included components 33 | * [IBM Power Systems](https://www-03.ibm.com/systems/power/): A server built with open technologies and designed for mission-critical applications. 34 | * [IBM Power AI](https://www.ibm.com/ms-en/marketplace/deep-learning-platform): A software platform that makes deep learning, machine learning, and AI more accessible and better performing. 35 | * [Jupyter Notebook](http://jupyter.org/): An open source web application that allows you to create and share documents that contain live code, equations, visualizations, and explanatory text. 36 | * [Nimbix Cloud Computing Platform](https://www.nimbix.net/): An HPC & Cloud Supercomputing platform enabling engineers, scientists & developers, to build, compute, analyze, and scale simulations in the cloud. 37 | * [Keras](https://keras.io/): A high-level neural networks API, written in Python, and capable of running on top of TensorFlow, CNTK, or Theano. 38 | * [Tensorflow](https://www.tensorflow.org/): An open source software library for numerical computation using data flow graphs. 39 | 40 | ## Featured technologies 41 | * [CoreMLTools](https://github.com/apple/coremltools): Integrates trained machine learning models into your iOS app. 42 | * [Lumina](https://github.com/dokun1/lumina): An iOS camera designed in Swift that can use any CoreML model for object recognition, as well as streaming video, images, and qr/bar codes. 43 | * [Artificial Intelligence](https://medium.com/ibm-data-science-experience): Artificial intelligence can be applied to disparate solution spaces to deliver disruptive technologies. 44 | * [Cloud](https://www.ibm.com/developerworks/learn/cloud/): Accessing computer and information technology resources through the Internet. 45 | * [Data Science](https://medium.com/ibm-data-science-experience/): Systems and scientific methods to analyze structured and unstructured data in order to extract knowledge and insights. 46 | * [Mobile](https://mobilefirstplatform.ibmcloud.com/): Systems of engagement are increasingly using mobile technology as the platform for delivery. 47 | * [Python](https://www.python.org/): Python is a programming language that lets you work more quickly and integrate your systems more effectively. 48 | 49 | 53 | 54 | # Steps 55 | 56 | Follow these steps to setup and run this *phenomenon sweeping the vegan meat industry*. The steps are described in detail below. 57 | 58 | ## Using the Nimbix and PowerAI 59 | 60 | 1. [Get 24-hours of free access to the PowerAI platform](#1-get-24-hours-of-free-access-to-the-powerai-platform) 61 | 1. [Access and start the Jupyter Notebook](#2-access-and-start-the-jupyter-notebook) 62 | 1. [Run the notebook](#3-run-the-notebook) 63 | 1. [Save and share your model ](#4-save-and-share-your-model) 64 | 1. [Implement Your Model With Lumina](#5-Implement-Your-Model-With-Lumina) 65 | 1. [End your trial](#6-end-your-trial) 66 | 67 | ### 1. Get 24-Hours of free access to the PowerAI platform 68 | 69 | IBM has partnered with Nimbix to provide cognitive developers a trial 70 | account that provides 24-Hours of free processing time on the PowerAI 71 | platform. Follow these steps to register for access to Nimbix to try 72 | the PowerAI Cognitive Code Patterns and explore the platform. 73 | 74 | Go to the [IBM Marketplace PowerAI Portal](https://www.ibm.com/us-en/marketplace/deep-learning-platform), and click `Start your free trial`. 75 | 76 | On the IBM PowerAI Trial page, shown below, enter the required information to sign up for an IBM account and click `Continue`. If you already have an IBM ID, click `Already have an account? Log in`, enter your credentials and click `Continue`. 77 | 78 | ![](doc/source/images/EnterIBMID.jpg) 79 | 80 | On the **Almost there…** page, shown below, enter the required information and click `Continue` to complete the registration and launch the **IBM Marketplace Products and Services** page. 81 | 82 | ![](doc/source/images/PowerAITrial2.jpg) 83 | 84 | Your **IBM Marketplace Products and Services** page displays all offerings that are available to you; the PowerAI Trial should now be one of them. From the PowerAI Trial section, click `Launch`, as shown below, to launch the **IBM PowerAI trial** page. 85 | 86 | ![](doc/source/images/launchtrialbutton.jpg) 87 | 88 | The **Welcome to IBM PowerAI Trial** page provides instructions for accessing the trial, as shown below. Alternatively, you will receive an email confirming your registration with similar instructions that you can follow to start the trial. 89 | 90 | ![](doc/source/images/welcomepage.png) 91 | 92 | Summary of steps for starting the trial: 93 | 94 | * Use your browser to go to the URL as directed. Use the IP Address provided in the table. It may be a fully qualified domain name. 95 | 96 | * Login with the User Id and Password given in the table. 97 | 98 | ### 2. Access and start the Jupyter Notebook 99 | 100 | Use git clone to download the example notebook. 101 | 102 | * Get a new terminal window by clicking on the ```New``` pull-down and selecting ``Terminal``. 103 | 104 | ![](doc/source/images/powerai-notebook-terminal.png) 105 | 106 | * Run the following command to clone the git repo: 107 | 108 | ```commandline 109 | git clone https://github.com/justinmccoy/keras-binary-classifier 110 | ``` 111 | 112 | * Once done, you can exit the terminal and return to the notebook browser. Use the ``Files`` tab and click on ``keras-binary-classifier`` then ``notebooks`` and then ``Hotdog_NotHotdog_CNN.ipynb`` to open the notebook. 113 | 114 | ### 3. Run the notebook 115 | 116 | When a notebook is executed, what is actually happening is that each code cell in 117 | the notebook is executed, in order, from top to bottom. 118 | 119 | Each code cell is selectable and is preceded by a tag in the left margin. The tag 120 | format is `In [x]:`. Depending on the state of the notebook, the `x` can be: 121 | 122 | * A blank, this indicates that the cell has never been executed. 123 | * A number, this number represents the relative order this code step was executed. 124 | * A `*`, this indicates that the cell is currently executing. 125 | 126 | There are several ways to execute the code cells in your notebook: 127 | 128 | * One cell at a time. 129 | * Select the cell, and then press the `Play` button in the toolbar. 130 | * Batch mode, in sequential order. 131 | * From the `Cell` menu bar, there are several options available. For example, you 132 | can `Run All` cells in your notebook, or you can `Run All Below`, that will 133 | start executing from the first cell under the currently selected cell, and then 134 | continue executing all cells that follow. 135 | 136 | ### 4. Save and share your model 137 | 138 | Because this notebook is running temporarily on a Nimbix 139 | Cloud server, use the following options to save your work: 140 | 141 | Under the `File` menu, there are options to: 142 | 143 | * `Download as...` will download the notebook to your local system. 144 | * `Print Preview` will allow you to print the current state of the 145 | notebook. 146 | 147 | Select `Download as...` and then `seefood.mlmodel` to download your trained food classifier. 148 | 149 | ### 5. Implement your model with Lumina 150 | 151 | You'll need to start an iOS project that uses the **Lumina** framework. You can either clone the repository [here](https://github.com/dokun1/lumina) and use the `LuminaSample` app in the main workspace, or you can make your own iOS app using the framework. Watch [this](https://www.youtube.com/watch?v=8eEAvcy708s) video for more information on using Lumina. 152 | 153 | Once you have a project open with Lumina integrated, make sure you implement a camera with at least the following code: 154 | 155 | ```swift 156 | let camera = LuminaViewController() 157 | camera.delegate = self 158 | camera.streamingModelTypes = [seefood()] 159 | present(camera, animated: true) 160 | ``` 161 | 162 | At this point, your iOS app is already making use of the `CoreML` functionality embedded in Lumina. Now, you need to actually do something with the data returned from it. 163 | 164 | Extend your class to conform to `LuminaDelegate` like so: 165 | 166 | ```swift 167 | extension ViewController: LuminaDelegate { 168 | func streamed(videoFrame: UIImage, with predictions: [LuminaRecognitionResult]?, from controller: LuminaViewController) { 169 | 170 | } 171 | } 172 | ``` 173 | 174 | Results streamed from each video frame are given to you in this delegate method. In this example, you have created a binary classifier, so you should only expect one result with either a `1.0` or `0.0` result. Lumina has a built in text label to use as a prompt, so update your method to make use of it here like so: 175 | 176 | ```swift 177 | func streamed(videoFrame: UIImage, with predictions: [LuminaRecognitionResult]?, from controller: LuminaViewController) { 178 | guard let predicted = predictions else { 179 | return 180 | } 181 | guard let value = predicted.first?.predictions?.first else { 182 | return 183 | } 184 | if value.confidence > 0 { 185 | controller.textPrompt = "\(String(describing: predicted.first?.type)): Not Hot Dog" 186 | } else { 187 | controller.textPrompt = "\(String(describing: predicted.first?.type)): Hot Dog" 188 | } 189 | } 190 | ``` 191 | 192 | Run your app, and point the camera at a hot dog, then at anything that isn't a hot dog. The results speak for themselves! 193 | 194 | ### 6. End your trial 195 | 196 | When you are done with your work, please cancel your subscription by issuing the following command in your ssh session or by visiting the `Manage` link on the **My Products and Services** page. 197 | 198 | ```sh 199 | sudo poweroff --force 200 | ``` 201 | 202 | # See the result 203 | 204 |

205 | 206 |

207 | 208 | 209 | # Links 210 | * [Free Trial to GPU Accelerated HW in the Cloud](https://developer.ibm.com/linuxonpower/cloud-resources/) 211 | * [Learn how to use Lumina in an iOS app](https://www.youtube.com/watch?v=8eEAvcy708s) 212 | 213 | # Learn more 214 | * **Artificial Intelligence Code Patterns**: Enjoyed this Code Pattern? Check out our other [AI Code Patterns](https://developer.ibm.com/code/technologies/artificial-intelligence/). 215 | * **Data Analytics Code Patterns**: Enjoyed this Code Pattern? Check out our other [Data Analytics Code Patterns](https://developer.ibm.com/code/technologies/data-science/) 216 | * **AI and Data Code Pattern Playlist**: Bookmark our [playlist](https://www.youtube.com/playlist?list=PLzUbsvIyrNfknNewObx5N7uGZ5FKH0Fde) with all of our Code Pattern videos 217 | * **Data Science Experience**: Master the art of data science with IBM's [Data Science Experience](https://datascience.ibm.com/) 218 | * **PowerAI**: Get started or get scaling, faster, with a software distribution for machine learning running on the Enterprise Platform for AI: [IBM Power Systems](https://www.ibm.com/ms-en/marketplace/deep-learning-platform) 219 | 220 | # License 221 | [Apache 2.0](LICENSE) 222 | 223 | -------------------------------------------------------------------------------- /doc/source/images/EnterIBMID.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/justinmccoy/keras-binary-classifier/f850420f872936b338259d0cf6dc9a892614e293/doc/source/images/EnterIBMID.jpg -------------------------------------------------------------------------------- /doc/source/images/PowerAITrial2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/justinmccoy/keras-binary-classifier/f850420f872936b338259d0cf6dc9a892614e293/doc/source/images/PowerAITrial2.jpg -------------------------------------------------------------------------------- /doc/source/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/justinmccoy/keras-binary-classifier/f850420f872936b338259d0cf6dc9a892614e293/doc/source/images/architecture.png -------------------------------------------------------------------------------- /doc/source/images/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/justinmccoy/keras-binary-classifier/f850420f872936b338259d0cf6dc9a892614e293/doc/source/images/demo.gif -------------------------------------------------------------------------------- /doc/source/images/launchtrialbutton.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/justinmccoy/keras-binary-classifier/f850420f872936b338259d0cf6dc9a892614e293/doc/source/images/launchtrialbutton.jpg -------------------------------------------------------------------------------- /doc/source/images/powerai-notebook-terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/justinmccoy/keras-binary-classifier/f850420f872936b338259d0cf6dc9a892614e293/doc/source/images/powerai-notebook-terminal.png -------------------------------------------------------------------------------- /doc/source/images/welcomepage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/justinmccoy/keras-binary-classifier/f850420f872936b338259d0cf6dc9a892614e293/doc/source/images/welcomepage.png -------------------------------------------------------------------------------- /notebooks/Hotdog_NotHotdog_CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Hotdog or NotHotdog?\n", 8 | "## Using PowerAI to Deploy an iOS Model for CoreML\n", 9 | "\n", 10 | "Index 2018 \n", 11 | "\n", 12 | "Yes, really. Justin McCoy and David Okun will walk you through how to use PowerAI in the Cloud with Nimbix, to quickly train and deploy a Model that can tell you whether or not your lunchtime nutritional choice is the right one - all with the camera of the mobile phone in your pocket. All you need are some photos, descriptions of them, and you can be up and running with a model to stream video through in no time flat." 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "So you want to identify hotdogs, great, summer is just around the corner, and you can never be too careful of what you're eating. \n", 20 | "\n", 21 | "During this demo we will walk you through the steps, and technologies necessary to train a Deep Learning model using a Convolutional Neural Network, saving it into a format that can be loaded on an iOS device.\n", 22 | "\n", 23 | "I'm sure you've seen the eposide of Silicon Valley, let's get started.\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### GPU Resources on the Cloud\n", 31 | "\n", 32 | "![NVIDIA](https://vignette.wikia.nocookie.net/logopedia/images/3/38/Nvidia_logo.png/revision/latest?cb=20120829072950) ![NIMBIX](https://www.nimbix.net/wp-content/uploads/2014/06/LogoRemailer.png)\n", 33 | "![POWERAI](https://mp.s81c.com/8034F2C/dal05/v1/AUTH_db1cfc7b-a055-460b-9274-1fd3f11fe689/cbaa77deff9000906b880d05f1a65f4d/hero_55462c4a-dd4c-4d67-ba05-643f030d51b6.png)\n", 34 | "Using the NIMBIX High Performance Computing Cloud\n", 35 | "\n", 36 | "Signup for a free trial to use IBM\"s Latest AI Hardware and get started." 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Agenda\n", 44 | "1. Setup Environment \n", 45 | "2. Acquire Data\n", 46 | "3. Build Convolutional Neural Network\n", 47 | "4. Train Convolutional Neural Network\n", 48 | "5. Evaluate Model\n", 49 | "6. Make predictions\n", 50 | "7. Take a closer look at generated filters\n", 51 | "8. Save to CoreML\n", 52 | "9. Transfering Learning" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "# 1. Setup Environment" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "### Install and import libraries" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": true 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "# Late in this Notebook the Keras Model is converted to Apple's coreML format; only Keras 2.0.4\n", 78 | "# is supported as of creation of this notebook\n", 79 | "!sudo pip install keras==2.0.4\n", 80 | "!sudo pip install coremltools" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "from __future__ import division\n", 92 | "\n", 93 | "import json\n", 94 | "from matplotlib import pyplot\n", 95 | "import math\n", 96 | "import numpy as np\n", 97 | "import os\n", 98 | "import sys\n", 99 | "import time\n", 100 | "\n", 101 | "import keras\n", 102 | "from keras.layers import Conv2D\n", 103 | "from keras.layers import MaxPooling2D\n", 104 | "from keras.layers import Flatten\n", 105 | "from keras.layers import Dense\n", 106 | "from keras.layers import Activation\n", 107 | "from keras.layers import Dropout\n", 108 | "from keras.layers import BatchNormalization\n", 109 | "\n", 110 | "from keras.models import Model\n", 111 | "from keras.models import model_from_json\n", 112 | "from keras.models import Sequential\n", 113 | "from keras.models import load_model\n", 114 | "\n", 115 | "from keras.preprocessing import image\n", 116 | "from keras.preprocessing.image import ImageDataGenerator \n", 117 | "\n", 118 | "from keras import callbacks\n", 119 | "from keras.callbacks import ModelCheckpoint\n", 120 | "from keras.callbacks import EarlyStopping\n", 121 | "\n", 122 | "from keras import backend as K\n", 123 | "\n", 124 | "\n", 125 | "from tensorflow import Tensor\n", 126 | "\n", 127 | "from keras.engine import InputLayer\n", 128 | "import coremltools\n", 129 | "\n", 130 | "%matplotlib inline" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### Setup global variables" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "ITERATION = 1\n", 149 | "\n", 150 | "DATA_PATH = 'hotdog_data'\n", 151 | "TRAINING_DATA_PATH = DATA_PATH + '/training_set'\n", 152 | "VALIDATION_DATA_PATH = DATA_PATH + '/validation_set'\n", 153 | "TEST_DATA_PATH = DATA_PATH + '/test_set'\n", 154 | "\n", 155 | "OUTPUT_PATH = 'output_' + time.strftime(\"%d-%m-%Y_\") + str(ITERATION)\n", 156 | "MODEL_JSON_PATH = OUTPUT_PATH + '/seefood_model.json'\n", 157 | "MODEL_WEIGHTS_PATH = OUTPUT_PATH + '/seefood_model_weights.h5'\n", 158 | "MODEL_EPOCH_PATH = OUTPUT_PATH + '/seefood_weights-{epoch:02d}-{val_acc}.hdf5'\n", 159 | "\n", 160 | "COREML_MODEL_PATH = OUTPUT_PATH + '/seefood_model.mlmodel'\n", 161 | "COREML_META_DESCRIPTION = 'SeeFood: Model to classify images as either hotdog or nothotdog'\n", 162 | "COREML_META_AUTHOR = 'Justin A. McCoy'\n", 163 | "COREML_META_INPUT_DESCRIPTION = 'Image of food that might be a hotdog'\n", 164 | "\n", 165 | "\n", 166 | "# input image dimensions\n", 167 | "IMG_ROWS, IMG_COLS = 128, 128\n", 168 | "IMG_CHANNELS = 3\n", 169 | "\n", 170 | "# Number of images gererated at each invocation of the ImageDataGenerator\n", 171 | "# Batchsize is an important value when training a CNN. too large a number can lead to out of memory errors, \n", 172 | "# and lower accuracy. https://arxiv.org/abs/1606.02228 \n", 173 | "# When you have a high batch size in compairison to the number of training samples you make bigger jumps during\n", 174 | "# graident descent, this can lead you to the minimuma faster but it's possib\n", 175 | "BATCH_SIZE = 16\n", 176 | "\n", 177 | "EPOCHS = 50\n", 178 | "\n", 179 | "HOTDOG = 0\n", 180 | "NOTHOTDOG = 1\n", 181 | "\n", 182 | "CLASSIFICATION = ['HOTDOG', 'NOTHOTDOG']" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "### Create output directory" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "collapsed": true 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "!mkdir \"$OUTPUT_PATH\"\n", 201 | "print('Created path: {}'.format(OUTPUT_PATH))" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "### Common methods used throughout notebook" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "def show_images(images, cols=1, title=\"\"):\n", 220 | " \"\"\"Display a list of images in a single plot with matplotlib.\n", 221 | " \n", 222 | " Parameters\n", 223 | " ---------\n", 224 | " images: List of np.arrays compatible with plt.imshow.\n", 225 | " \n", 226 | " cols (Default = 1): Number of columns in figure (number of rows is \n", 227 | " set to np.ceil(n_images/float(cols))).\n", 228 | " \"\"\"\n", 229 | " n_images = len(images)\n", 230 | " fig = pyplot.figure()\n", 231 | " fig.suptitle(title, fontsize=16)\n", 232 | " for n, image in enumerate(images):\n", 233 | " pyplot.subplot(cols, np.ceil(n_images/float(cols)), n + 1)\n", 234 | " pyplot.imshow(image)\n", 235 | " pyplot.axis('off')\n", 236 | " pyplot.show()\n", 237 | " \n", 238 | " \n", 239 | "def get_best_model():\n", 240 | " return load_model(MODEL_WEIGHTS_PATH)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "# 2. Acquire Data" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "### Aquire and classify training data for Hotdogs" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "There are several places to aquire datasets that are prelabeled. for this example I've used Image.net to aquire images of hotdogs and food items that are not hotdogs\n", 262 | "\n", 263 | "www.image-net.org/\n", 264 | "\n", 265 | "ImageNet is a database of images organized according to the WordNet hierarchy, where each concept described by multiple words is grouped in a 'synonym set'. The synsets are thumbnails of images and contain links to images that have been classified.\n", 266 | "\n", 267 | "A major drawback of ImageNet revolves around where images are located; ImageNet doesn't keep a database of images but links to where images are, or were at one time. ImageNet doesn't save you from the task of cleaning and reviewing your data before jumping into the fun stuff." 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": [ 274 | "#### Downloading images\n" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": { 281 | "collapsed": true 282 | }, 283 | "outputs": [], 284 | "source": [ 285 | "!wget -L -O hotdog_data.tar.gz https://ibm.box.com/shared/static/ig5ao996ew9mqutfbgs0o2otophuwe1h.gz" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "!tar -xzvf hotdog_data.tar.gz" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "### Look at the training and validation data\n", 302 | "It's important to understand your data balance between classes." 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": true 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "# How many images do we have to work with in our training and validation sets?\n", 314 | "\n", 315 | "hd_training_set_size = !ls -alR '$TRAINING_DATA_PATH'/hotdog | wc -l\n", 316 | "hd_training_set_size = hd_training_set_size[0]\n", 317 | "\n", 318 | "nhd_training_set_size = !ls -alR '$TRAINING_DATA_PATH'/not-hotdog | wc -l\n", 319 | "nhd_training_set_size = nhd_training_set_size[0]\n", 320 | "\n", 321 | "validation_set_size = !ls -alR '$VALIDATION_DATA_PATH'/hotdog | wc -l\n", 322 | "validation_set_size = validation_set_size[0]\n", 323 | "\n", 324 | "test_set_size = !ls -alR '$TEST_DATA_PATH'/ | wc -l\n", 325 | "test_set_size = test_set_size[0]\n", 326 | "\n", 327 | "print(\"Hotdog training examples:\\t{}\".format(hd_training_set_size))\n", 328 | "print(\"NotHotdog training examples:\\t{}\".format(nhd_training_set_size))\n", 329 | "print(\"Hotdog validation examples:\\t{}\".format(validation_set_size))\n", 330 | "print(\"Test examples:\\t{}\".format(test_set_size))" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": { 336 | "collapsed": true 337 | }, 338 | "source": [ 339 | " \n", 340 | " " 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "### Data Augmentation with Generators\n", 348 | "Using Keras's ImageDataGenerator we solve several problems with large datasets\n", 349 | "\n", 350 | "1. Eliminate the need to store our entire dataset in memory\n", 351 | "2. Supplement dataset with additional training images\n", 352 | "3. Improve model robustness by producing different color and oritntation profiles of images\n", 353 | "4. Mean-Normalization - Image pixel values are usually integers between the range of 1 and 255. Using such large numbers in models can cause overflow. To fix this the ImageDataGenerator provides a rescale function to scale the original pixel values by a sacaling factor, in our case this is 255. Leaving values between 0 and 1 for each pixel value.\n", 354 | "\n", 355 | "Here we’ve rescaled the image data so that each pixel lies in the interval [0, 1] instead of [0, 255]. It is always a good idea to normalize the input so that each dimension has approximately the same scale." 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": { 362 | "collapsed": true 363 | }, 364 | "outputs": [], 365 | "source": [ 366 | "train_datagen = ImageDataGenerator(rescale = 1./255,\n", 367 | " rotation_range=40,\n", 368 | "# width_shift_range=0.2,\n", 369 | "# height_shift_range=0.2,\n", 370 | "# shear_range=0.2,\n", 371 | " zoom_range=0.2,\n", 372 | " horizontal_flip=True,\n", 373 | " fill_mode='nearest')\n", 374 | "\n", 375 | "validation_datagen = ImageDataGenerator(rescale = 1./255)\n", 376 | "\n", 377 | "print(\"training_set\")\n", 378 | "training_set = train_datagen.flow_from_directory(TRAINING_DATA_PATH,\n", 379 | " target_size = (IMG_ROWS, IMG_COLS),\n", 380 | " batch_size = BATCH_SIZE,\n", 381 | " class_mode = 'binary')\n", 382 | " \n", 383 | "print(\"validation_set\")\n", 384 | "validation_set = validation_datagen.flow_from_directory(VALIDATION_DATA_PATH,\n", 385 | " target_size = (IMG_ROWS, IMG_COLS),\n", 386 | " batch_size = BATCH_SIZE,\n", 387 | " class_mode = 'binary')\n", 388 | "\n", 389 | "print(\"test_set\")\n", 390 | "test_datagen = ImageDataGenerator()\n", 391 | "test_set = test_datagen.flow_from_directory(TEST_DATA_PATH,\n", 392 | " target_size=(IMG_ROWS, IMG_COLS),\n", 393 | " batch_size=488,\n", 394 | " class_mode='binary')" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "metadata": { 401 | "collapsed": true 402 | }, 403 | "outputs": [], 404 | "source": [ 405 | "# Using Keras's ImageDataGenerator we generate new images from transformations of our initial training set\n", 406 | "TRAINING_IMAGES = 20000\n", 407 | "VALIDATION_IMAGES = validation_set.samples # No augmentation of the validation images, just use what's there" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "### Look at subset of sample generated images in the training_set" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": { 421 | "collapsed": true 422 | }, 423 | "outputs": [], 424 | "source": [] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": { 430 | "collapsed": true 431 | }, 432 | "outputs": [], 433 | "source": [ 434 | "from matplotlib import pyplot\n", 435 | "%matplotlib inline\n", 436 | " \n", 437 | "x,y = training_set.next() # Generates BATCH_SIZE images on every invocation\n", 438 | "\n", 439 | "# Only interested in a subset of the images\n", 440 | "x = x[:9]\n", 441 | "y = y[:9]\n", 442 | "\n", 443 | "fig = pyplot.figure()\n", 444 | "title = fig.suptitle('training_set examples', fontsize=16)\n", 445 | "title.set_position([.5, 1.05])\n", 446 | "for n, (img, title) in enumerate(zip(x,y)):\n", 447 | " a = fig.add_subplot(3, 3, n+1)\n", 448 | " pyplot.imshow(img)\n", 449 | " a.set_title(title)\n", 450 | " pyplot.axis('off')\n", 451 | "#fig.set_dpi(300)\n", 452 | "pyplot.show()\n", 453 | "\n", 454 | "print(training_set.class_indices)" 455 | ] 456 | }, 457 | { 458 | "cell_type": "markdown", 459 | "metadata": {}, 460 | "source": [ 461 | "# 3. Build Convolutional Neural Network" 462 | ] 463 | }, 464 | { 465 | "cell_type": "markdown", 466 | "metadata": { 467 | "collapsed": true 468 | }, 469 | "source": [ 470 | "### A Convolutional Neural Network (CNN) \n", 471 | "\n", 472 | "Each filter in a CNN, learns different characteristic of an image.\n", 473 | "\n", 474 | "Keras allows us to specify the number of **filters** we want and the size of the filters. So, in our first layer, 32 is number of filters and (3, 3) is the size of the filter. We also need to specify the shape of the input which is (28, 28, 1), but we have to specify it only once.\n", 475 | "\n", 476 | "The second layer is the **Activation layer**. We have used ReLU (rectified linear unit) as our activation function. ReLU function is f(x) = max(0, x), where x is the input. It sets all negative values in the matrix ‘x’ to 0 and keeps all the other values constant. It is the most used activation function since it reduces training time and prevents the problem of vanishing gradients.\n", 477 | "\n", 478 | "The third layer is the **MaxPooling layer**. MaxPooling layer is used to down-sample the input to enable the model to make assumptions about the features so as to reduce overfitting. It also reduces the number of parameters to learn, reducing the training time.\n", 479 | "\n", 480 | "After creating all the **convolutional layers**, we need to flatten them, so that they can act as an input to the Dense layers.\n", 481 | "\n", 482 | "**Dense layers** are keras’s alias for Fully connected layers. These layers give the ability to classify the features learned by the CNN.\n", 483 | "\n", 484 | "**Dropout** is the method used to reduce overfitting. It forces the model to learn multiple independent representations of the same data by randomly disabling neurons in the learning phase. In our model, dropout will randomly disable 20% of the neurons.\n", 485 | "\n", 486 | "The second last layer is the Dense layer with 1 neuron. The neurons in this layer should be equal to the number of classes we want to predict as this is the output layer.\n", 487 | "\n", 488 | "\n" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": null, 494 | "metadata": { 495 | "collapsed": true 496 | }, 497 | "outputs": [], 498 | "source": [ 499 | "K.set_learning_phase(1) # https://github.com/keras-team/keras/issues/2310\n", 500 | "classifier = Sequential()" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": null, 506 | "metadata": { 507 | "collapsed": true 508 | }, 509 | "outputs": [], 510 | "source": [ 511 | "classifier.add(Conv2D(64, (3, 3), input_shape = (IMG_ROWS, IMG_COLS, IMG_CHANNELS), padding='same', activation = 'relu'))\n", 512 | "classifier.add(MaxPooling2D(pool_size=(2, 2)))\n", 513 | "classifier.add(Conv2D(64, (3, 3), activation='relu'))\n", 514 | "classifier.add(Dropout(0.4))\n", 515 | "classifier.add(MaxPooling2D(pool_size=(2, 2)))\n", 516 | "classifier.add(Conv2D(128, (3, 3), activation='relu'))\n", 517 | "classifier.add(Dropout(0.4))\n", 518 | "classifier.add(MaxPooling2D(pool_size=(2, 2)))\n", 519 | "\n", 520 | "classifier.add(Flatten())\n", 521 | "classifier.add(Dense(64))\n", 522 | "classifier.add(Activation('relu'))\n", 523 | "classifier.add(Dropout(0.2))\n", 524 | "classifier.add(Dense(1))\n", 525 | "classifier.add(Activation('sigmoid'))\n" 526 | ] 527 | }, 528 | { 529 | "cell_type": "markdown", 530 | "metadata": {}, 531 | "source": [ 532 | "The cross-entropy loss calculates the error rate between the predicted value and the original value. The formula for calculating cross-entropy loss is given here. Because we have two classes we used binary_crossentropy.\n", 533 | "\n", 534 | "The Adam optimizer is an improvement over SGD(Stochastic Gradient Descent). The optimizer is responsible for updating the weights of the neurons via backpropagation. It calculates the derivative of the loss function with respect to each weight and subtracts it from the weight. This is how a neural network learns.\n" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": null, 540 | "metadata": { 541 | "collapsed": true 542 | }, 543 | "outputs": [], 544 | "source": [ 545 | "classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": {}, 551 | "source": [ 552 | "### Save CNN as JSON\n" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": null, 558 | "metadata": { 559 | "collapsed": true 560 | }, 561 | "outputs": [], 562 | "source": [ 563 | "#serialize model to JSON\n", 564 | "classifier_json = classifier.to_json()\n", 565 | "with open(MODEL_JSON_PATH, \"w\") as json_file:\n", 566 | " json_file.write(classifier_json)\n", 567 | " " 568 | ] 569 | }, 570 | { 571 | "cell_type": "markdown", 572 | "metadata": {}, 573 | "source": [ 574 | "# 4. Train Convolutional Neural Network" 575 | ] 576 | }, 577 | { 578 | "cell_type": "markdown", 579 | "metadata": {}, 580 | "source": [ 581 | "### Metrics to monitor\n", 582 | "When training your convolutional neural network you're monitoring two things at each epoch for both the training and validation stages: **accuracy**, and **loss**. As you monitor these metrics hopefully the accuracy goes up and the loss goes down. \n", 583 | "\n", 584 | "When training it's possible to **underfit** and **overfit**. Underfitting occurs when the accuracy on the training set is lower then the accuracy on the validation set; a poor performing model. Overfitting occurs when the training loss contines to go down, but the validation loss continues to rise.\n", 585 | "\n", 586 | "Finding the sweet spot between underfitting and overfitting is crucial to developing a model that generalizes well with unseen data.\n", 587 | "\n", 588 | "**Dropout Regularization** is one method that's used to reduce overfitting. Dropout randomly turns off neurons during training, reducing their weight further down the network. This encourages the network to find additional neurons building separate representations of the class within the network. Increasing the dropout rate between layers combats against overfitting; a good starting point is between 0.20 and 0.50. \n", 589 | "\n", 590 | "#### *Finding the right balance is more of an art then science, and takes some time to experiment.*\n", 591 | "\n", 592 | "![XKCD.com/1838](https://imgs.xkcd.com/comics/machine_learning.png)\n" 593 | ] 594 | }, 595 | { 596 | "cell_type": "markdown", 597 | "metadata": {}, 598 | "source": [ 599 | "### Setup callbacks to monitor metrics\n", 600 | "\n", 601 | "**Callbacks** are functions applied at given stages of training. Below is an example demonstrating how to create a new callback, reporting the *validation loss* and *validation accuracy* at the end of each epoch.\n", 602 | "\n", 603 | "Keras comes with many callback functions, including ModelCheckpoint used to save the weights after every epoch as seen below." 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "metadata": { 610 | "collapsed": true 611 | }, 612 | "outputs": [], 613 | "source": [ 614 | "class LossHistory(callbacks.Callback):\n", 615 | " def on_train_begin(self, logs={}):\n", 616 | " self.losses = []\n", 617 | "\n", 618 | " def on_epoch_end(self, epoch, logs={}):\n", 619 | " self.losses.append(logs.get('val_loss'))\n", 620 | " print('-----------------------------------------------------------------------')\n", 621 | " print('Epoch ' + str(epoch) + ' - Validation loss: ' + str(logs.get('val_loss')) + ' accuracy : ' + str(logs.get('val_acc')))\n", 622 | " print('-----------------------------------------------------------------------')\n" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": { 629 | "collapsed": true 630 | }, 631 | "outputs": [], 632 | "source": [ 633 | "checkpoint_all = ModelCheckpoint(filepath= MODEL_EPOCH_PATH, verbose=1, save_best_only=False)\n", 634 | "checkpoint_best = ModelCheckpoint(filepath= MODEL_WEIGHTS_PATH, verbose=1, save_best_only=True)\n", 635 | "loss_history = LossHistory()" 636 | ] 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": null, 641 | "metadata": { 642 | "collapsed": true 643 | }, 644 | "outputs": [], 645 | "source": [ 646 | "history = classifier.fit_generator(training_set,\n", 647 | " steps_per_epoch = TRAINING_IMAGES//BATCH_SIZE,\n", 648 | " epochs = 50,\n", 649 | " validation_data = validation_set,\n", 650 | " validation_steps = VALIDATION_IMAGES//BATCH_SIZE,\n", 651 | " callbacks = [loss_history, checkpoint_all, checkpoint_best])" 652 | ] 653 | }, 654 | { 655 | "cell_type": "markdown", 656 | "metadata": {}, 657 | "source": [ 658 | "**Hyperparameter optimization is necessary to improve the models accuracy**" 659 | ] 660 | }, 661 | { 662 | "cell_type": "markdown", 663 | "metadata": { 664 | "collapsed": true 665 | }, 666 | "source": [ 667 | "# 5. Evaluate Model" 668 | ] 669 | }, 670 | { 671 | "cell_type": "code", 672 | "execution_count": null, 673 | "metadata": { 674 | "collapsed": true 675 | }, 676 | "outputs": [], 677 | "source": [ 678 | "classifier.summary()" 679 | ] 680 | }, 681 | { 682 | "cell_type": "markdown", 683 | "metadata": {}, 684 | "source": [ 685 | "### Plot Accuracy and Loss over Epochs\n", 686 | "Plot the validation accuracy vs training accuracy, and the validation_loss vs training_loss" 687 | ] 688 | }, 689 | { 690 | "cell_type": "code", 691 | "execution_count": null, 692 | "metadata": { 693 | "collapsed": true 694 | }, 695 | "outputs": [], 696 | "source": [ 697 | "\n", 698 | "pyplot.plot(history.epoch,history.history['val_acc'],label='validation accuracy')\n", 699 | "pyplot.plot(history.epoch,history.history['acc'],label='training accuracy')\n", 700 | "\n", 701 | "\n", 702 | "pyplot.legend(loc=0)\n", 703 | "pyplot.xlabel('epoch')\n", 704 | "pyplot.ylabel('accuracy')\n", 705 | "pyplot.grid(True)\n", 706 | "pyplot.show()" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": null, 712 | "metadata": { 713 | "collapsed": true 714 | }, 715 | "outputs": [], 716 | "source": [ 717 | "pyplot.plot(history.epoch,history.history['val_loss'],label='validation loss')\n", 718 | "pyplot.plot(history.epoch,history.history['loss'],label='training loss')\n", 719 | "\n", 720 | "\n", 721 | "pyplot.legend(loc=0)\n", 722 | "pyplot.xlabel('epoch')\n", 723 | "pyplot.ylabel('loss')\n", 724 | "pyplot.grid(True)\n", 725 | "pyplot.show()" 726 | ] 727 | }, 728 | { 729 | "cell_type": "markdown", 730 | "metadata": {}, 731 | "source": [ 732 | "### Load the best weights from the training " 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": null, 738 | "metadata": { 739 | "collapsed": true 740 | }, 741 | "outputs": [], 742 | "source": [ 743 | "model = get_best_model()" 744 | ] 745 | }, 746 | { 747 | "cell_type": "code", 748 | "execution_count": null, 749 | "metadata": { 750 | "collapsed": true 751 | }, 752 | "outputs": [], 753 | "source": [ 754 | "# The evaluate_generator method returns a tuple including the loss and accuracy of a model\n", 755 | "results = model.evaluate_generator(validation_set, 1)\n", 756 | "print(\"The model has a {}% accuracy, with a loss of {}.\".format(results[1]*100, results[0])) " 757 | ] 758 | }, 759 | { 760 | "cell_type": "markdown", 761 | "metadata": {}, 762 | "source": [ 763 | "### Create a Confusion Matrix on with test data\n", 764 | "\n", 765 | "It's important to test your model with data that hasn't been used during training for validation. \n", 766 | "\n", 767 | "| | **HOTDOG** | **NOTHOTDOG** |\n", 768 | "| ------------- | -------------- | -------------- |\n", 769 | "| **HOTDOG** | True Positive | False Positive |\n", 770 | "| **NOTHOTDOG** | False Negative | True Negative |" 771 | ] 772 | }, 773 | { 774 | "cell_type": "code", 775 | "execution_count": null, 776 | "metadata": { 777 | "collapsed": true 778 | }, 779 | "outputs": [], 780 | "source": [ 781 | "# Creating the Confusion Matrix on our test data\n", 782 | "\n", 783 | "X_test, y_test = test_set.next()\n", 784 | "y_pred = model.predict(X_test)\n", 785 | "y_pred = (y_pred > 0.5)\n", 786 | "from sklearn.metrics import confusion_matrix\n", 787 | "cm = confusion_matrix(y_test, y_pred)\n", 788 | "cm" 789 | ] 790 | }, 791 | { 792 | "cell_type": "markdown", 793 | "metadata": {}, 794 | "source": [ 795 | "# 6. Make Predictions" 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": null, 801 | "metadata": { 802 | "collapsed": true 803 | }, 804 | "outputs": [], 805 | "source": [ 806 | "#Load the best weights from the training \n", 807 | "model = get_best_model()" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": null, 813 | "metadata": { 814 | "collapsed": true 815 | }, 816 | "outputs": [], 817 | "source": [ 818 | "def make_prediction(model, img):\n", 819 | " \"\"\"Display a list of images in a single plot with matplotlib.\n", 820 | " \n", 821 | " Parameters\n", 822 | " ---------\n", 823 | " images: List of np.arrays compatible with plt.imshow.\n", 824 | " \n", 825 | " cols (Default = 1): Number of columns in figure (number of rows is \n", 826 | " set to np.ceil(n_images/float(cols))).\n", 827 | " \"\"\"\n", 828 | " test_image = image.img_to_array(img)\n", 829 | " test_image = np.expand_dims(test_image, axis = 0)\n", 830 | " return model.predict(test_image, verbose=0)" 831 | ] 832 | }, 833 | { 834 | "cell_type": "code", 835 | "execution_count": null, 836 | "metadata": { 837 | "collapsed": true 838 | }, 839 | "outputs": [], 840 | "source": [ 841 | "false_positives=[]\n", 842 | "for file in os.listdir(TEST_DATA_PATH + '/nothotdog'):\n", 843 | " img = image.load_img(TEST_DATA_PATH + '/nothotdog/' + file, target_size = (IMG_COLS, IMG_COLS))\n", 844 | " if make_prediction(model, img) == HOTDOG:\n", 845 | " false_positives.append(img) \n", 846 | " \n", 847 | "show_images(false_positives, 3, title=\"False Positives\")\n", 848 | "\n" 849 | ] 850 | }, 851 | { 852 | "cell_type": "code", 853 | "execution_count": null, 854 | "metadata": { 855 | "collapsed": true 856 | }, 857 | "outputs": [], 858 | "source": [ 859 | "false_negatives=[]\n", 860 | "for file in os.listdir(TEST_DATA_PATH + '/hotdog'):\n", 861 | " img = image.load_img(TEST_DATA_PATH + '/hotdog/' + file, target_size = (IMG_COLS, IMG_COLS))\n", 862 | " if make_prediction(model, img) == NOTHOTDOG:\n", 863 | " false_negatives.append(img)\n", 864 | " \n", 865 | "\n", 866 | "show_images(false_negatives, 10, title=\"False Negatives\")" 867 | ] 868 | }, 869 | { 870 | "cell_type": "markdown", 871 | "metadata": {}, 872 | "source": [ 873 | "### Download images from the web to evaluate model" 874 | ] 875 | }, 876 | { 877 | "cell_type": "code", 878 | "execution_count": null, 879 | "metadata": { 880 | "collapsed": true 881 | }, 882 | "outputs": [], 883 | "source": [ 884 | "!wget https://www.dietsinreview.com/diet_column/wp-content/uploads/2010/07/joey-chestnut-nathans-famous-hot-dog-eating-contest.jpg -O hotdog.jpg " 885 | ] 886 | }, 887 | { 888 | "cell_type": "code", 889 | "execution_count": null, 890 | "metadata": { 891 | "collapsed": true 892 | }, 893 | "outputs": [], 894 | "source": [ 895 | "test_image = image.load_img('hotdog.jpg', target_size = (IMG_ROWS, IMG_COLS))\n", 896 | "pyplot.imshow(test_image)\n", 897 | "pyplot.show()\n", 898 | "\n", 899 | "test_image = image.img_to_array(test_image)\n", 900 | "test_image = np.expand_dims(test_image, axis = 0)\n", 901 | "\n", 902 | "result = classifier.predict(test_image, verbose=1)\n", 903 | "\n", 904 | "print(CLASSIFICATION[int(result[0][0])])\n" 905 | ] 906 | }, 907 | { 908 | "cell_type": "code", 909 | "execution_count": null, 910 | "metadata": { 911 | "collapsed": true 912 | }, 913 | "outputs": [], 914 | "source": [ 915 | "!wget https://img.webmd.com/dtmcms/live/webmd/consumer_assets/site_images/dam/editorial/childrens-health/miscellaneous/how-to-change-teen-eating-habits/graphics/thumbnails/final/how-to-change-teen-eating-habits-375x321.jpg -O nothotdog.jpg" 916 | ] 917 | }, 918 | { 919 | "cell_type": "code", 920 | "execution_count": null, 921 | "metadata": { 922 | "collapsed": true 923 | }, 924 | "outputs": [], 925 | "source": [ 926 | "test_image = image.load_img('nothotdog.jpg', target_size = (IMG_ROWS, IMG_COLS))\n", 927 | "pyplot.imshow(test_image)\n", 928 | "pyplot.show()\n", 929 | "\n", 930 | "test_image = image.img_to_array(test_image)\n", 931 | "test_image = np.expand_dims(test_image, axis = 0)\n", 932 | "\n", 933 | "result = classifier.predict(test_image, verbose=1)\n", 934 | "\n", 935 | "print(CLASSIFICATION[int(result[0][0])])" 936 | ] 937 | }, 938 | { 939 | "cell_type": "markdown", 940 | "metadata": {}, 941 | "source": [ 942 | "# 7. Take a closer look at generated filters\n", 943 | "The CNN has several layers, including various filtering layers to identify important features of the image for the classification task. Let's look at the filters to see what areas of an image are identified as important for the hotdog, nothotdog classification\n", 944 | "Inspired by code here: https://github.com/mingruimingrui/Convolution-neural-networks-made-easy-with-keras" 945 | ] 946 | }, 947 | { 948 | "cell_type": "code", 949 | "execution_count": null, 950 | "metadata": { 951 | "collapsed": true 952 | }, 953 | "outputs": [], 954 | "source": [ 955 | "model = get_best_model()" 956 | ] 957 | }, 958 | { 959 | "cell_type": "code", 960 | "execution_count": null, 961 | "metadata": { 962 | "collapsed": true 963 | }, 964 | "outputs": [], 965 | "source": [ 966 | "def get_layer_dict(model):\n", 967 | " return dict([(layer.name, layer) for layer in model.layers if (layer.name.find('dense') > -1) | (layer.name.find('conv') > -1)])" 968 | ] 969 | }, 970 | { 971 | "cell_type": "code", 972 | "execution_count": null, 973 | "metadata": { 974 | "collapsed": true 975 | }, 976 | "outputs": [], 977 | "source": [ 978 | "layers = get_layer_dict(model)\n", 979 | "layers" 980 | ] 981 | }, 982 | { 983 | "cell_type": "code", 984 | "execution_count": null, 985 | "metadata": { 986 | "collapsed": true 987 | }, 988 | "outputs": [], 989 | "source": [ 990 | "def deprocess_image(img):\n", 991 | " # normalize tensor: center on 0., ensure std is 0.1\n", 992 | " img -= img.mean()\n", 993 | " img /= (img.std() + 1e-5)\n", 994 | " img *= 0.1\n", 995 | "\n", 996 | " # clip to [0, 1]\n", 997 | " img += 0.5\n", 998 | " img = np.clip(img, 0, 1)\n", 999 | "\n", 1000 | " # convert to RGB array\n", 1001 | " img *= 255\n", 1002 | " img = np.clip(img, 0, 255).astype('uint8')\n", 1003 | " return img\n" 1004 | ] 1005 | }, 1006 | { 1007 | "cell_type": "code", 1008 | "execution_count": null, 1009 | "metadata": { 1010 | "collapsed": true 1011 | }, 1012 | "outputs": [], 1013 | "source": [ 1014 | "def plot_hidden_filter_layers(model, layer, num_plot=16):\n", 1015 | " _ = pyplot.suptitle(layer.name)\n", 1016 | "\n", 1017 | " # we shall only plot out 16(default) as there are too many filters to visualize\n", 1018 | "\n", 1019 | " layer_output = layer.output\n", 1020 | " output_shape = layer.output_shape\n", 1021 | " sub_plot_height = math.ceil(np.sqrt(num_plot))\n", 1022 | " nb_filters = output_shape[len(output_shape) - 1]\n", 1023 | "\n", 1024 | " # here we need to conduct gradient acdent on each filter\n", 1025 | " counter = 0\n", 1026 | " for i in range(nb_filters):\n", 1027 | " if counter < num_plot:\n", 1028 | " # conv layers have different outputs than dense layers therefore different loss function sizes\n", 1029 | " if layer.name.find('conv') != -1:\n", 1030 | " loss = K.mean(layer_output[:,:,:,np.random.randint(nb_filters)])\n", 1031 | " else:\n", 1032 | " loss = K.mean(layer_output[:,np.random.randint(nb_filters)])\n", 1033 | "\n", 1034 | " # randomise initial input_img and calc gradient\n", 1035 | " input_img = model.input#np.expand_dims(np.ones(X_shape[1:]), axis=0)\n", 1036 | " grads = K.gradients(loss, input_img)[0]\n", 1037 | "\n", 1038 | " # normalize gradient\n", 1039 | " grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)\n", 1040 | "\n", 1041 | " # this function returns the loss and grads given the input picture\n", 1042 | " iterate = K.function([input_img], [loss, grads])\n", 1043 | "\n", 1044 | " # we start from a gray image with some noise\n", 1045 | " input_img_data = np.random.rand(1,IMG_ROWS, IMG_COLS, IMG_CHANNELS) * 0.1 + 0.5\n", 1046 | "\n", 1047 | " # run gradient ascdent for 20 steps\n", 1048 | " for j in range(40):\n", 1049 | " loss_value, grads_value = iterate([input_img_data])\n", 1050 | " input_img_data += grads_value\n", 1051 | "\n", 1052 | " # deprocess_image and plot if found\n", 1053 | " if loss_value > 0:\n", 1054 | " img = deprocess_image(input_img_data[0])\n", 1055 | " ax = pyplot.subplot(sub_plot_height, sub_plot_height, counter+1)\n", 1056 | " _ = pyplot.axis('off')\n", 1057 | " _ = ax.set_xticklabels([])\n", 1058 | " _ = ax.set_yticklabels([])\n", 1059 | " _ = ax.set_aspect('equal')\n", 1060 | " _ = pyplot.imshow(img.squeeze(), cmap='inferno')\n", 1061 | "\n", 1062 | " counter += 1\n", 1063 | "\n", 1064 | " _ = pyplot.show()" 1065 | ] 1066 | }, 1067 | { 1068 | "cell_type": "code", 1069 | "execution_count": null, 1070 | "metadata": { 1071 | "collapsed": true 1072 | }, 1073 | "outputs": [], 1074 | "source": [ 1075 | "for layer_name in layers:\n", 1076 | " plot_hidden_filter_layers(model, layers[layer_name])" 1077 | ] 1078 | }, 1079 | { 1080 | "cell_type": "markdown", 1081 | "metadata": { 1082 | "collapsed": true 1083 | }, 1084 | "source": [ 1085 | "### Visualize image through each filter layer\n", 1086 | "Run an image through each convolutional layer and display the different filters applied to the image. Each filter focuses on features within the image. This gives you a better understanding of what the CNN is looking for when making it's final decision; hotdog or nothotdog." 1087 | ] 1088 | }, 1089 | { 1090 | "cell_type": "code", 1091 | "execution_count": null, 1092 | "metadata": { 1093 | "collapsed": true 1094 | }, 1095 | "outputs": [], 1096 | "source": [ 1097 | "model = get_best_model()" 1098 | ] 1099 | }, 1100 | { 1101 | "cell_type": "code", 1102 | "execution_count": null, 1103 | "metadata": { 1104 | "collapsed": true 1105 | }, 1106 | "outputs": [], 1107 | "source": [ 1108 | "def get_image_from_generator():\n", 1109 | " '''Generate some validation data and select a single sample image'''\n", 1110 | " X, y = validation_set.next()\n", 1111 | " img = X[0].reshape(-1,IMG_ROWS,IMG_COLS,IMG_CHANNELS)\n", 1112 | " return img" 1113 | ] 1114 | }, 1115 | { 1116 | "cell_type": "code", 1117 | "execution_count": null, 1118 | "metadata": { 1119 | "collapsed": true 1120 | }, 1121 | "outputs": [], 1122 | "source": [ 1123 | "def get_conv_intermediate_layers(my_model):\n", 1124 | " '''Returns the names of each convolutional layer in the model'''\n", 1125 | " conv_layers = []\n", 1126 | " for layer in my_model.layers:\n", 1127 | " if 'conv' in layer.name:\n", 1128 | " # Build a new model with the input from the original \n", 1129 | " # model, but with the output of specific layer.\n", 1130 | " conv_layers.append(Model(inputs=my_model.input,\n", 1131 | " outputs=my_model.get_layer(layer.name).output)) \n", 1132 | " return conv_layers" 1133 | ] 1134 | }, 1135 | { 1136 | "cell_type": "code", 1137 | "execution_count": null, 1138 | "metadata": { 1139 | "collapsed": true 1140 | }, 1141 | "outputs": [], 1142 | "source": [ 1143 | "def plot_hidden_layers(my_model, img):\n", 1144 | " to_visual = my_model.predict(img)\n", 1145 | " to_visual = to_visual.reshape(to_visual.shape[1:])\n", 1146 | " _ = pyplot.figure()\n", 1147 | "\n", 1148 | " sub_plot_height = math.ceil(np.sqrt(to_visual.shape[2]))\n", 1149 | " for i in range(to_visual.shape[2]):\n", 1150 | " ax = pyplot.subplot(sub_plot_height, sub_plot_height, i+1)\n", 1151 | " _ = pyplot.axis('off')\n", 1152 | " _ = ax.set_xticklabels([])\n", 1153 | " _ = ax.set_yticklabels([])\n", 1154 | " _ = ax.set_aspect('equal')\n", 1155 | " _ = pyplot.imshow(to_visual[:, :, i], cmap='inferno')\n", 1156 | " " 1157 | ] 1158 | }, 1159 | { 1160 | "cell_type": "code", 1161 | "execution_count": null, 1162 | "metadata": { 1163 | "collapsed": true 1164 | }, 1165 | "outputs": [], 1166 | "source": [ 1167 | "conv_models = get_conv_intermediate_layers(model)\n", 1168 | "img = get_image_from_generator()\n", 1169 | "\n", 1170 | "_ = pyplot.imshow(img.reshape(img.shape[1:]))\n", 1171 | "_ = pyplot.title('Hotdog')\n", 1172 | "\n", 1173 | "index = 0\n", 1174 | "for my_model in conv_models:\n", 1175 | " index += 1\n", 1176 | " \n", 1177 | " plot_hidden_layers(my_model, img)\n", 1178 | "\n", 1179 | "pyplot.show()" 1180 | ] 1181 | }, 1182 | { 1183 | "cell_type": "markdown", 1184 | "metadata": {}, 1185 | "source": [ 1186 | "# 8. Convert Keras Model to Apple's coreML" 1187 | ] 1188 | }, 1189 | { 1190 | "cell_type": "markdown", 1191 | "metadata": {}, 1192 | "source": [ 1193 | "#### Load the existing Keras model from disk" 1194 | ] 1195 | }, 1196 | { 1197 | "cell_type": "code", 1198 | "execution_count": null, 1199 | "metadata": { 1200 | "collapsed": true 1201 | }, 1202 | "outputs": [], 1203 | "source": [ 1204 | "# Load the best weights from the training \n", 1205 | "model = get_best_model()" 1206 | ] 1207 | }, 1208 | { 1209 | "cell_type": "markdown", 1210 | "metadata": {}, 1211 | "source": [ 1212 | "#### Set model properties and save as coreML" 1213 | ] 1214 | }, 1215 | { 1216 | "cell_type": "code", 1217 | "execution_count": null, 1218 | "metadata": { 1219 | "collapsed": true 1220 | }, 1221 | "outputs": [], 1222 | "source": [ 1223 | "output_labels = ['nothotdog']\n", 1224 | "coreml_model = coremltools.converters.keras.convert(MODEL_WEIGHTS_PATH, input_names='image',image_input_names = 'image',class_labels = output_labels) \n", 1225 | "coreml_model.author = AUTHOR \n", 1226 | "coreml_model.short_description = COREML_META_DESCRIPTION \n", 1227 | "coreml_model.input_description['image'] = COREML_META_INPUT_DESCRIPTION\n", 1228 | "\n", 1229 | "\n", 1230 | "coreml_model.save(COREML_MODEL_PATH)\n", 1231 | "\n", 1232 | "print coreml_model " 1233 | ] 1234 | } 1235 | ], 1236 | "metadata": { 1237 | "kernelspec": { 1238 | "display_name": "Python 2 with Spark 1.6 (Unsupported)", 1239 | "language": "python", 1240 | "name": "python2" 1241 | }, 1242 | "language_info": { 1243 | "codemirror_mode": { 1244 | "name": "ipython", 1245 | "version": 2 1246 | }, 1247 | "file_extension": ".py", 1248 | "mimetype": "text/x-python", 1249 | "name": "python", 1250 | "nbconvert_exporter": "python", 1251 | "pygments_lexer": "ipython2", 1252 | "version": "2.7.11" 1253 | } 1254 | }, 1255 | "nbformat": 4, 1256 | "nbformat_minor": 2 1257 | } 1258 | -------------------------------------------------------------------------------- /seefood.mlmodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/justinmccoy/keras-binary-classifier/f850420f872936b338259d0cf6dc9a892614e293/seefood.mlmodel --------------------------------------------------------------------------------