├── .github
    └── workflows
    │   └── manual.yml
├── .gitignore
├── CODEOWNERS
├── LICENSE.md
├── README.md
├── deep-learning-with-pytorch
    ├── Part 1 - Tensors in PyTorch (Exercises).ipynb
    ├── Part 1 - Tensors in PyTorch (Solution).ipynb
    ├── Part 2 - Neural Networks in PyTorch (Exercises).ipynb
    ├── Part 2 - Neural Networks in PyTorch (Solution).ipynb
    ├── Part 3 - Training Neural Networks (Exercises).ipynb
    ├── Part 3 - Training Neural Networks (Solution).ipynb
    ├── Part 4 - Fashion-MNIST (Exercises).ipynb
    ├── Part 4 - Fashion-MNIST (Solution).ipynb
    ├── Part 5 - Inference and Validation (Exercises).ipynb
    ├── Part 5 - Inference and Validation (Solution).ipynb
    ├── Part 6 - Saving and Loading Models.ipynb
    ├── Part 7 - Loading Image Data (Exercises).ipynb
    ├── Part 7 - Loading Image Data (Solution).ipynb
    ├── Part 8 - Transfer Learning (Exercises).ipynb
    ├── Part 8 - Transfer Learning (Solution).ipynb
    ├── README.md
    ├── __pycache__
    │   └── helper.cpython-37.pyc
    ├── assets
    │   ├── ImageNet_example.png
    │   ├── Pooling_Simple_max.png
    │   ├── activation.png
    │   ├── autoencoder_1.png
    │   ├── backprop_diagram.png
    │   ├── cat.70.jpg
    │   ├── cat_cropped.png
    │   ├── conv_net.jpg
    │   ├── dog.128.jpg
    │   ├── dog_cat.png
    │   ├── examples_new.png
    │   ├── fashion-mnist-sprite.png
    │   ├── full_padding_no_strides_transposed.gif
    │   ├── function_approx.png
    │   ├── gradient_descent.png
    │   ├── image_distribution.png
    │   ├── infographic.pdf
    │   ├── lenet.png
    │   ├── mlp_mnist.png
    │   ├── mnist.png
    │   ├── multilayer_diagram_weights.png
    │   ├── network_diagram.png
    │   ├── overfitting.png
    │   ├── padding_strides.gif
    │   ├── simple_neuron.png
    │   ├── tensor_examples.svg
    │   ├── test_examples.png
    │   ├── train_examples.png
    │   └── w1_backprop_graph.png
    ├── fc_model.py
    └── helper.py
├── gradient-descent
    ├── GradientDescent.ipynb
    ├── GradientDescentSolutions.ipynb
    ├── data.csv
    └── points.png
└── student-admissions
    ├── StudentAdmissions.ipynb
    ├── StudentAdmissionsSolutions.ipynb
    └── student_data.csv


/.github/workflows/manual.yml:
--------------------------------------------------------------------------------
 1 | # Workflow to ensure whenever a Github PR is submitted, 
 2 | # a JIRA ticket gets created automatically. 
 3 | name: Manual Workflow
 4 | 
 5 | # Controls when the action will run. 
 6 | on:
 7 |   # Triggers the workflow on pull request events but only for the master branch
 8 |   pull_request_target:
 9 |     types: [assigned, opened, reopened]
10 | 
11 |   # Allows you to run this workflow manually from the Actions tab
12 |   workflow_dispatch:
13 | 
14 | jobs:
15 |   test-transition-issue:
16 |     name: Convert Github Issue to Jira Issue
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |     - name: Checkout
20 |       uses: actions/checkout@master
21 | 
22 |     - name: Login
23 |       uses: atlassian/gajira-login@master
24 |       env:
25 |         JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
26 |         JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
27 |         JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
28 |         
29 |     - name: Create NEW JIRA ticket
30 |       id: create
31 |       uses: atlassian/gajira-create@master
32 |       with:
33 |         project: CONUPDATE
34 |         issuetype: Task
35 |         summary: |
36 |           Github PR - cd0281 Introduction to Neural Networks with Pytorch | Repo: ${{ github.repository }}  | PR# ${{github.event.number}}
37 |         description: |
38 |            Repo link: https://github.com/${{ github.repository }}   
39 |            PR no. ${{ github.event.pull_request.number }} 
40 |            PR title: ${{ github.event.pull_request.title }}  
41 |            PR description: ${{ github.event.pull_request.description }}  
42 |            In addition, please resolve other issues, if any. 
43 |         fields: '{"components": [{"name":"cd0281 - Neural Network"}], "customfield_16449":"https://classroom.udacity.com/", "customfield_16450":"Resolve the PR", "labels": ["github"], "priority":{"id": "4"}}'
44 | 
45 |     - name: Log created issue
46 |       run: echo "Issue ${{ steps.create.outputs.issue }} was created"
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_store
2 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *           @udacity/active-public-content


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright © 2012 - 2020, Udacity, Inc.
 3 | 
 4 | Udacity hereby grants you a license in and to the Educational Content, including but not limited to homework assignments, programming assignments, code samples, and other educational materials and tools (as further described in the Udacity Terms of Use),  subject to, as modified herein, the terms and conditions of the Creative Commons Attribution-NonCommercial- NoDerivs 3.0 License located at http://creativecommons.org/licenses/by-nc-nd/4.0 and successor locations for such license (the "CC License") provided that, in each case, the Educational Content is specifically marked as being subject to the CC License.
 5 | Udacity expressly defines the following as falling outside the definition of "non-commercial":
 6 | (a) the sale or rental of (i) any part of the Educational Content, (ii) any derivative works based at least in part on the Educational Content, or (iii) any collective work that includes any part of the Educational Content;
 7 | (b) the sale of access or a link to any part of the Educational Content without first obtaining informed consent from the buyer (that the buyer is aware that the Educational Content, or such part thereof, is available at the Website free of charge);
 8 | (c) providing training, support, or editorial services that use or reference the Educational Content in exchange for a fee;
 9 | (d) the sale of advertisements, sponsorships, or promotions placed on the Educational Content, or any part thereof, or the sale of advertisements, sponsorships, or promotions on any website or blog containing any part of the Educational Material, including without limitation any "pop-up advertisements";
10 | (e) the use of Educational Content by a college, university, school, or other educational institution for instruction where tuition is charged; and
11 | (f) the use of Educational Content by a for-profit corporation or non-profit entity for internal professional development or training.
12 | 
13 | 
14 | 
15 | THE SERVICES AND ONLINE COURSES (INCLUDING ANY CONTENT) ARE PROVIDED "AS IS" AND "AS AVAILABLE" WITH NO REPRESENTATIONS OR WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. YOU ASSUME TOTAL RESPONSIBILITY AND THE ENTIRE RISK FOR YOUR USE OF THE SERVICES, ONLINE COURSES, AND CONTENT. WITHOUT LIMITING THE FOREGOING, WE DO NOT WARRANT THAT (A) THE SERVICES, WEBSITES, CONTENT, OR THE ONLINE COURSES WILL MEET YOUR REQUIREMENTS OR EXPECTATIONS OR ACHIEVE THE INTENDED PURPOSES, (B) THE WEBSITES OR THE ONLINE COURSES WILL NOT EXPERIENCE OUTAGES OR OTHERWISE BE UNINTERRUPTED, TIMELY, SECURE OR ERROR-FREE, (C) THE INFORMATION OR CONTENT OBTAINED THROUGH THE SERVICES, SUCH AS CHAT ROOM SERVICES, WILL BE ACCURATE, COMPLETE, CURRENT, ERROR- FREE, COMPLETELY SECURE OR RELIABLE, OR (D) THAT DEFECTS IN OR ON THE SERVICES OR CONTENT WILL BE CORRECTED. YOU ASSUME ALL RISK OF PERSONAL INJURY, INCLUDING DEATH AND DAMAGE TO PERSONAL PROPERTY, SUSTAINED FROM USE OF SERVICES.
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Introduction to Neural Networks with Pytorch
2 | This repo contains starter code and notebooks for Udacity's Introduction to Neural Networks with Pytorch course.
3 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/Part 1 - Tensors in PyTorch (Exercises).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Introduction to Deep Learning with PyTorch\n",
  8 |     "\n",
  9 |     "In this notebook, you'll get introduced to [PyTorch](http://pytorch.org/), a framework for building and training neural networks. PyTorch in a lot of ways behaves like the arrays you love from Numpy. These Numpy arrays, after all, are just tensors. PyTorch takes these tensors and makes it simple to move them to GPUs for the faster processing needed when training neural networks. It also provides a module that automatically calculates gradients (for backpropagation!) and another module specifically for building neural networks. All together, PyTorch ends up being more coherent with Python and the Numpy/Scipy stack compared to TensorFlow and other frameworks.\n",
 10 |     "\n"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "## Neural Networks\n",
 18 |     "\n",
 19 |     "Deep Learning is based on artificial neural networks which have been around in some form since the late 1950s. The networks are built from individual parts approximating neurons, typically called units or simply \"neurons.\" Each unit has some number of weighted inputs. These weighted inputs are summed together (a linear combination) then passed through an activation function to get the unit's output.\n",
 20 |     "\n",
 21 |     "<img src=\"assets/simple_neuron.png\" width=400px>\n",
 22 |     "\n",
 23 |     "Mathematically this looks like: \n",
 24 |     "\n",
 25 |     "$$\n",
 26 |     "\\begin{align}\n",
 27 |     "y &= f(w_1 x_1 + w_2 x_2 + b) \\\\\n",
 28 |     "y &= f\\left(\\sum_i w_i x_i +b \\right)\n",
 29 |     "\\end{align}\n",
 30 |     "$$\n",
 31 |     "\n",
 32 |     "With vectors this is the dot/inner product of two vectors:\n",
 33 |     "\n",
 34 |     "$$\n",
 35 |     "h = \\begin{bmatrix}\n",
 36 |     "x_1 \\, x_2 \\cdots  x_n\n",
 37 |     "\\end{bmatrix}\n",
 38 |     "\\cdot \n",
 39 |     "\\begin{bmatrix}\n",
 40 |     "           w_1 \\\\\n",
 41 |     "           w_2 \\\\\n",
 42 |     "           \\vdots \\\\\n",
 43 |     "           w_n\n",
 44 |     "\\end{bmatrix}\n",
 45 |     "$$"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "## Tensors\n",
 53 |     "\n",
 54 |     "It turns out neural network computations are just a bunch of linear algebra operations on *tensors*, a generalization of matrices. A vector is a 1-dimensional tensor, a matrix is a 2-dimensional tensor, an array with three indices is a 3-dimensional tensor (RGB color images for example). The fundamental data structure for neural networks are tensors and PyTorch (as well as pretty much every other deep learning framework) is built around tensors.\n",
 55 |     "\n",
 56 |     "<img src=\"assets/tensor_examples.svg\" width=600px>\n",
 57 |     "\n",
 58 |     "With the basics covered, it's time to explore how we can use PyTorch to build a simple neural network."
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# First, import PyTorch\n",
 68 |     "import torch"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "def activation(x):\n",
 78 |     "    \"\"\" Sigmoid activation function \n",
 79 |     "    \n",
 80 |     "        Arguments\n",
 81 |     "        ---------\n",
 82 |     "        x: torch.Tensor\n",
 83 |     "    \"\"\"\n",
 84 |     "    return 1/(1+torch.exp(-x))"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "### Generate some data\n",
 94 |     "torch.manual_seed(7) # Set the random seed so things are predictable\n",
 95 |     "\n",
 96 |     "# Features are 3 random normal variables\n",
 97 |     "features = torch.randn((1, 5))\n",
 98 |     "# True weights for our data, random normal variables again\n",
 99 |     "weights = torch.randn_like(features)\n",
100 |     "# and a true bias term\n",
101 |     "bias = torch.randn((1, 1))"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "Above I generated data we can use to get the output of our simple network. This is all just random for now, going forward we'll start using normal data. Going through each relevant line:\n",
109 |     "\n",
110 |     "`features = torch.randn((1, 5))` creates a tensor with shape `(1, 5)`, one row and five columns, that contains values randomly distributed according to the normal distribution with a mean of zero and standard deviation of one. \n",
111 |     "\n",
112 |     "`weights = torch.randn_like(features)` creates another tensor with the same shape as `features`, again containing values from a normal distribution.\n",
113 |     "\n",
114 |     "Finally, `bias = torch.randn((1, 1))` creates a single value from a normal distribution.\n",
115 |     "\n",
116 |     "PyTorch tensors can be added, multiplied, subtracted, etc, just like Numpy arrays. In general, you'll use PyTorch tensors pretty much the same way you'd use Numpy arrays. They come with some nice benefits though such as GPU acceleration which we'll get to later. For now, use the generated data to calculate the output of this simple single layer network. \n",
117 |     "> **Exercise**: Calculate the output of the network with input features `features`, weights `weights`, and bias `bias`. Similar to Numpy, PyTorch has a [`torch.sum()`](https://pytorch.org/docs/stable/torch.html#torch.sum) function, as well as a `.sum()` method on tensors, for taking sums. Use the function `activation` defined above as the activation function."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "## Calculate the output of this network using the weights and bias tensors"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "You can do the multiplication and sum in the same operation using a matrix multiplication. In general, you'll want to use matrix multiplications since they are more efficient and accelerated using modern libraries and high-performance computing on GPUs.\n",
134 |     "\n",
135 |     "Here, we want to do a matrix multiplication of the features and the weights. For this we can use [`torch.mm()`](https://pytorch.org/docs/stable/torch.html#torch.mm) or [`torch.matmul()`](https://pytorch.org/docs/stable/torch.html#torch.matmul) which is somewhat more complicated and supports broadcasting. If we try to do it with `features` and `weights` as they are, we'll get an error\n",
136 |     "\n",
137 |     "```python\n",
138 |     ">> torch.mm(features, weights)\n",
139 |     "\n",
140 |     "---------------------------------------------------------------------------\n",
141 |     "RuntimeError                              Traceback (most recent call last)\n",
142 |     "<ipython-input-13-15d592eb5279> in <module>()\n",
143 |     "----> 1 torch.mm(features, weights)\n",
144 |     "\n",
145 |     "RuntimeError: size mismatch, m1: [1 x 5], m2: [1 x 5] at /Users/soumith/minicondabuild3/conda-bld/pytorch_1524590658547/work/aten/src/TH/generic/THTensorMath.c:2033\n",
146 |     "```\n",
147 |     "\n",
148 |     "As you're building neural networks in any framework, you'll see this often. Really often. What's happening here is our tensors aren't the correct shapes to perform a matrix multiplication. Remember that for matrix multiplications, the number of columns in the first tensor must equal to the number of rows in the second column. Both `features` and `weights` have the same shape, `(1, 5)`. This means we need to change the shape of `weights` to get the matrix multiplication to work.\n",
149 |     "\n",
150 |     "**Note:** To see the shape of a tensor called `tensor`, use `tensor.shape`. If you're building neural networks, you'll be using this method often.\n",
151 |     "\n",
152 |     "There are a few options here: [`weights.reshape()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.reshape), [`weights.resize_()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.resize_), and [`weights.view()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view).\n",
153 |     "\n",
154 |     "* `weights.reshape(a, b)` will return a new tensor with the same data as `weights` with size `(a, b)` sometimes, and sometimes a clone, as in it copies the data to another part of memory.\n",
155 |     "* `weights.resize_(a, b)` returns the same tensor with a different shape. However, if the new shape results in fewer elements than the original tensor, some elements will be removed from the tensor (but not from memory). If the new shape results in more elements than the original tensor, new elements will be uninitialized in memory. Here I should note that the underscore at the end of the method denotes that this method is performed **in-place**. Here is a great forum thread to [read more about in-place operations](https://discuss.pytorch.org/t/what-is-in-place-operation/16244) in PyTorch.\n",
156 |     "* `weights.view(a, b)` will return a new tensor with the same data as `weights` with size `(a, b)`.\n",
157 |     "\n",
158 |     "I usually use `.view()`, but any of the three methods will work for this. So, now we can reshape `weights` to have five rows and one column with something like `weights.view(5, 1)`.\n",
159 |     "\n",
160 |     "> **Exercise**: Calculate the output of our little network using matrix multiplication."
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "## Calculate the output of this network using matrix multiplication"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "### Stack them up!\n",
177 |     "\n",
178 |     "That's how you can calculate the output for a single neuron. The real power of this algorithm happens when you start stacking these individual units into layers and stacks of layers, into a network of neurons. The output of one layer of neurons becomes the input for the next layer. With multiple input units and output units, we now need to express the weights as a matrix.\n",
179 |     "\n",
180 |     "<img src='assets/multilayer_diagram_weights.png' width=450px>\n",
181 |     "\n",
182 |     "The first layer shown on the bottom here are the inputs, understandably called the **input layer**. The middle layer is called the **hidden layer**, and the final layer (on the right) is the **output layer**. We can express this network mathematically with matrices again and use matrix multiplication to get linear combinations for each unit in one operation. For example, the hidden layer ($h_1$ and $h_2$ here) can be calculated \n",
183 |     "\n",
184 |     "$$\n",
185 |     "\\vec{h} = [h_1 \\, h_2] = \n",
186 |     "\\begin{bmatrix}\n",
187 |     "x_1 \\, x_2 \\cdots \\, x_n\n",
188 |     "\\end{bmatrix}\n",
189 |     "\\cdot \n",
190 |     "\\begin{bmatrix}\n",
191 |     "           w_{11} & w_{12} \\\\\n",
192 |     "           w_{21} &w_{22} \\\\\n",
193 |     "           \\vdots &\\vdots \\\\\n",
194 |     "           w_{n1} &w_{n2}\n",
195 |     "\\end{bmatrix}\n",
196 |     "$$\n",
197 |     "\n",
198 |     "The output for this small network is found by treating the hidden layer as inputs for the output unit. The network output is expressed simply\n",
199 |     "\n",
200 |     "$$\n",
201 |     "y =  f_2 \\! \\left(\\, f_1 \\! \\left(\\vec{x} \\, \\mathbf{W_1}\\right) \\mathbf{W_2} \\right)\n",
202 |     "$$"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": null,
208 |    "metadata": {},
209 |    "outputs": [],
210 |    "source": [
211 |     "### Generate some data\n",
212 |     "torch.manual_seed(7) # Set the random seed so things are predictable\n",
213 |     "\n",
214 |     "# Features are 3 random normal variables\n",
215 |     "features = torch.randn((1, 3))\n",
216 |     "\n",
217 |     "# Define the size of each layer in our network\n",
218 |     "n_input = features.shape[1]     # Number of input units, must match number of input features\n",
219 |     "n_hidden = 2                    # Number of hidden units \n",
220 |     "n_output = 1                    # Number of output units\n",
221 |     "\n",
222 |     "# Weights for inputs to hidden layer\n",
223 |     "W1 = torch.randn(n_input, n_hidden)\n",
224 |     "# Weights for hidden layer to output layer\n",
225 |     "W2 = torch.randn(n_hidden, n_output)\n",
226 |     "\n",
227 |     "# and bias terms for hidden and output layers\n",
228 |     "B1 = torch.randn((1, n_hidden))\n",
229 |     "B2 = torch.randn((1, n_output))"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "markdown",
234 |    "metadata": {},
235 |    "source": [
236 |     "> **Exercise:** Calculate the output for this multi-layer network using the weights `W1` & `W2`, and the biases, `B1` & `B2`. "
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "## Your solution here"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "metadata": {},
251 |    "source": [
252 |     "If you did this correctly, you should see the output `tensor([[ 0.3171]])`.\n",
253 |     "\n",
254 |     "The number of hidden units is a parameter of the network, often called a **hyperparameter** to differentiate it from the weights and biases parameters. As you'll see later when we discuss training a neural network, the more hidden units a network has, and the more layers, the better able it is to learn from data and make accurate predictions."
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "markdown",
259 |    "metadata": {},
260 |    "source": [
261 |     "## Numpy to Torch and back\n",
262 |     "\n",
263 |     "Special bonus section! PyTorch has a great feature for converting between Numpy arrays and Torch tensors. To create a tensor from a Numpy array, use `torch.from_numpy()`. To convert a tensor to a Numpy array, use the `.numpy()` method."
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "metadata": {},
270 |    "outputs": [],
271 |    "source": [
272 |     "import numpy as np\n",
273 |     "np.set_printoptions(precision=8)\n",
274 |     "a = np.random.rand(4,3)\n",
275 |     "a"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": null,
281 |    "metadata": {},
282 |    "outputs": [],
283 |    "source": [
284 |     "torch.set_printoptions(precision=8)\n",
285 |     "b = torch.from_numpy(a)\n",
286 |     "b"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": null,
292 |    "metadata": {},
293 |    "outputs": [],
294 |    "source": [
295 |     "b.numpy()"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "metadata": {},
301 |    "source": [
302 |     "The memory is shared between the Numpy array and Torch tensor, so if you change the values in-place of one object, the other will change as well."
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {},
309 |    "outputs": [],
310 |    "source": [
311 |     "# Multiply PyTorch Tensor by 2, in place\n",
312 |     "b.mul_(2)"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": null,
318 |    "metadata": {},
319 |    "outputs": [],
320 |    "source": [
321 |     "# Numpy array matches new values from Tensor\n",
322 |     "a"
323 |    ]
324 |   }
325 |  ],
326 |  "metadata": {
327 |   "kernelspec": {
328 |    "display_name": "Python 3",
329 |    "language": "python",
330 |    "name": "python3"
331 |   },
332 |   "language_info": {
333 |    "codemirror_mode": {
334 |     "name": "ipython",
335 |     "version": 3
336 |    },
337 |    "file_extension": ".py",
338 |    "mimetype": "text/x-python",
339 |    "name": "python",
340 |    "nbconvert_exporter": "python",
341 |    "pygments_lexer": "ipython3",
342 |    "version": "3.6.6"
343 |   }
344 |  },
345 |  "nbformat": 4,
346 |  "nbformat_minor": 2
347 | }
348 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/Part 1 - Tensors in PyTorch (Solution).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Introduction to Deep Learning with PyTorch\n",
  8 |     "\n",
  9 |     "In this notebook, you'll get introduced to [PyTorch](http://pytorch.org/), a framework for building and training neural networks. PyTorch in a lot of ways behaves like the arrays you love from Numpy. These Numpy arrays, after all, are just tensors. PyTorch takes these tensors and makes it simple to move them to GPUs for the faster processing needed when training neural networks. It also provides a module that automatically calculates gradients (for backpropagation!) and another module specifically for building neural networks. All together, PyTorch ends up being more coherent with Python and the Numpy/Scipy stack compared to TensorFlow and other frameworks.\n",
 10 |     "\n"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "## Neural Networks\n",
 18 |     "\n",
 19 |     "Deep Learning is based on artificial neural networks which have been around in some form since the late 1950s. The networks are built from individual parts approximating neurons, typically called units or simply \"neurons.\" Each unit has some number of weighted inputs. These weighted inputs are summed together (a linear combination) then passed through an activation function to get the unit's output.\n",
 20 |     "\n",
 21 |     "<img src=\"assets/simple_neuron.png\" width=400px>\n",
 22 |     "\n",
 23 |     "Mathematically this looks like: \n",
 24 |     "\n",
 25 |     "$$\n",
 26 |     "\\begin{align}\n",
 27 |     "y &= f(w_1 x_1 + w_2 x_2 + b) \\\\\n",
 28 |     "y &= f\\left(\\sum_i w_i x_i +b \\right)\n",
 29 |     "\\end{align}\n",
 30 |     "$$\n",
 31 |     "\n",
 32 |     "With vectors this is the dot/inner product of two vectors:\n",
 33 |     "\n",
 34 |     "$$\n",
 35 |     "h = \\begin{bmatrix}\n",
 36 |     "x_1 \\, x_2 \\cdots  x_n\n",
 37 |     "\\end{bmatrix}\n",
 38 |     "\\cdot \n",
 39 |     "\\begin{bmatrix}\n",
 40 |     "           w_1 \\\\\n",
 41 |     "           w_2 \\\\\n",
 42 |     "           \\vdots \\\\\n",
 43 |     "           w_n\n",
 44 |     "\\end{bmatrix}\n",
 45 |     "$$"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "## Tensors\n",
 53 |     "\n",
 54 |     "It turns out neural network computations are just a bunch of linear algebra operations on *tensors*, a generalization of matrices. A vector is a 1-dimensional tensor, a matrix is a 2-dimensional tensor, an array with three indices is a 3-dimensional tensor (RGB color images for example). The fundamental data structure for neural networks are tensors and PyTorch (as well as pretty much every other deep learning framework) is built around tensors.\n",
 55 |     "\n",
 56 |     "<img src=\"assets/tensor_examples.svg\" width=600px>\n",
 57 |     "\n",
 58 |     "With the basics covered, it's time to explore how we can use PyTorch to build a simple neural network."
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 1,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# First, import PyTorch\n",
 68 |     "import torch"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 2,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "def activation(x):\n",
 78 |     "    \"\"\" Sigmoid activation function \n",
 79 |     "    \n",
 80 |     "        Arguments\n",
 81 |     "        ---------\n",
 82 |     "        x: torch.Tensor\n",
 83 |     "    \"\"\"\n",
 84 |     "    return 1/(1+torch.exp(-x))"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 3,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "### Generate some data\n",
 94 |     "torch.manual_seed(7) # Set the random seed so things are predictable\n",
 95 |     "\n",
 96 |     "# Features are 5 random normal variables\n",
 97 |     "features = torch.randn((1, 5))\n",
 98 |     "# True weights for our data, random normal variables again\n",
 99 |     "weights = torch.randn_like(features)\n",
100 |     "# and a true bias term\n",
101 |     "bias = torch.randn((1, 1))"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "Above I generated data we can use to get the output of our simple network. This is all just random for now, going forward we'll start using normal data. Going through each relevant line:\n",
109 |     "\n",
110 |     "`features = torch.randn((1, 5))` creates a tensor with shape `(1, 5)`, one row and five columns, that contains values randomly distributed according to the normal distribution with a mean of zero and standard deviation of one. \n",
111 |     "\n",
112 |     "`weights = torch.randn_like(features)` creates another tensor with the same shape as `features`, again containing values from a normal distribution.\n",
113 |     "\n",
114 |     "Finally, `bias = torch.randn((1, 1))` creates a single value from a normal distribution.\n",
115 |     "\n",
116 |     "PyTorch tensors can be added, multiplied, subtracted, etc, just like Numpy arrays. In general, you'll use PyTorch tensors pretty much the same way you'd use Numpy arrays. They come with some nice benefits though such as GPU acceleration which we'll get to later. For now, use the generated data to calculate the output of this simple single layer network. \n",
117 |     "> **Exercise**: Calculate the output of the network with input features `features`, weights `weights`, and bias `bias`. Similar to Numpy, PyTorch has a [`torch.sum()`](https://pytorch.org/docs/stable/torch.html#torch.sum) function, as well as a `.sum()` method on tensors, for taking sums. Use the function `activation` defined above as the activation function."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 4,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "### Solution\n",
127 |     "\n",
128 |     "# Now, make our labels from our data and true weights\n",
129 |     "\n",
130 |     "y = activation(torch.sum(features * weights) + bias)\n",
131 |     "y = activation((features * weights).sum() + bias)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "You can do the multiplication and sum in the same operation using a matrix multiplication. In general, you'll want to use matrix multiplications since they are more efficient and accelerated using modern libraries and high-performance computing on GPUs.\n",
139 |     "\n",
140 |     "Here, we want to do a matrix multiplication of the features and the weights. For this we can use [`torch.mm()`](https://pytorch.org/docs/stable/torch.html#torch.mm) or [`torch.matmul()`](https://pytorch.org/docs/stable/torch.html#torch.matmul) which is somewhat more complicated and supports broadcasting. If we try to do it with `features` and `weights` as they are, we'll get an error\n",
141 |     "\n",
142 |     "```python\n",
143 |     ">> torch.mm(features, weights)\n",
144 |     "\n",
145 |     "---------------------------------------------------------------------------\n",
146 |     "RuntimeError                              Traceback (most recent call last)\n",
147 |     "<ipython-input-13-15d592eb5279> in <module>()\n",
148 |     "----> 1 torch.mm(features, weights)\n",
149 |     "\n",
150 |     "RuntimeError: size mismatch, m1: [1 x 5], m2: [1 x 5] at /Users/soumith/minicondabuild3/conda-bld/pytorch_1524590658547/work/aten/src/TH/generic/THTensorMath.c:2033\n",
151 |     "```\n",
152 |     "\n",
153 |     "As you're building neural networks in any framework, you'll see this often. Really often. What's happening here is our tensors aren't the correct shapes to perform a matrix multiplication. Remember that for matrix multiplications, the number of columns in the first tensor must equal to the number of rows in the second tensor. Both `features` and `weights` have the same shape, `(1, 5)`. This means we need to change the shape of `weights` to get the matrix multiplication to work.\n",
154 |     "\n",
155 |     "**Note:** To see the shape of a tensor called `tensor`, use `tensor.shape`. If you're building neural networks, you'll be using this method often.\n",
156 |     "\n",
157 |     "There are a few options here: [`weights.reshape()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.reshape), [`weights.resize_()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.resize_), and [`weights.view()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view).\n",
158 |     "\n",
159 |     "* `weights.reshape(a, b)` will return a new tensor with the same data as `weights` with size `(a, b)` sometimes, and sometimes a clone, as in it copies the data to another part of memory.\n",
160 |     "* `weights.resize_(a, b)` returns the same tensor with a different shape. However, if the new shape results in fewer elements than the original tensor, some elements will be removed from the tensor (but not from memory). If the new shape results in more elements than the original tensor, new elements will be uninitialized in memory. Here I should note that the underscore at the end of the method denotes that this method is performed **in-place**. Here is a great forum thread to [read more about in-place operations](https://discuss.pytorch.org/t/what-is-in-place-operation/16244) in PyTorch.\n",
161 |     "* `weights.view(a, b)` will return a new tensor with the same data as `weights` with size `(a, b)`.\n",
162 |     "\n",
163 |     "I usually use `.view()`, but any of the three methods will work for this. So, now we can reshape `weights` to have five rows and one column with something like `weights.view(5, 1)`.\n",
164 |     "\n",
165 |     "> **Exercise**: Calculate the output of our little network using matrix multiplication."
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 5,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "## Solution\n",
175 |     "\n",
176 |     "y = activation(torch.mm(features, weights.view(5,1)) + bias)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "### Stack them up!\n",
184 |     "\n",
185 |     "That's how you can calculate the output for a single neuron. The real power of this algorithm happens when you start stacking these individual units into layers and stacks of layers, into a network of neurons. The output of one layer of neurons becomes the input for the next layer. With multiple input units and output units, we now need to express the weights as a matrix.\n",
186 |     "\n",
187 |     "<img src='assets/multilayer_diagram_weights.png' width=450px>\n",
188 |     "\n",
189 |     "The first layer shown on the bottom here are the inputs, understandably called the **input layer**. The middle layer is called the **hidden layer**, and the final layer (on the right) is the **output layer**. We can express this network mathematically with matrices again and use matrix multiplication to get linear combinations for each unit in one operation. For example, the hidden layer ($h_1$ and $h_2$ here) can be calculated \n",
190 |     "\n",
191 |     "$$\n",
192 |     "\\vec{h} = [h_1 \\, h_2] = \n",
193 |     "\\begin{bmatrix}\n",
194 |     "x_1 \\, x_2 \\cdots \\, x_n\n",
195 |     "\\end{bmatrix}\n",
196 |     "\\cdot \n",
197 |     "\\begin{bmatrix}\n",
198 |     "           w_{11} & w_{12} \\\\\n",
199 |     "           w_{21} &w_{22} \\\\\n",
200 |     "           \\vdots &\\vdots \\\\\n",
201 |     "           w_{n1} &w_{n2}\n",
202 |     "\\end{bmatrix}\n",
203 |     "$$\n",
204 |     "\n",
205 |     "The output for this small network is found by treating the hidden layer as inputs for the output unit. The network output is expressed simply\n",
206 |     "\n",
207 |     "$$\n",
208 |     "y =  f_2 \\! \\left(\\, f_1 \\! \\left(\\vec{x} \\, \\mathbf{W_1}\\right) \\mathbf{W_2} \\right)\n",
209 |     "$$"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 6,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "### Generate some data\n",
219 |     "torch.manual_seed(7) # Set the random seed so things are predictable\n",
220 |     "\n",
221 |     "# Features are 3 random normal variables\n",
222 |     "features = torch.randn((1, 3))\n",
223 |     "\n",
224 |     "# Define the size of each layer in our network\n",
225 |     "n_input = features.shape[1]     # Number of input units, must match number of input features\n",
226 |     "n_hidden = 2                    # Number of hidden units \n",
227 |     "n_output = 1                    # Number of output units\n",
228 |     "\n",
229 |     "# Weights for inputs to hidden layer\n",
230 |     "W1 = torch.randn(n_input, n_hidden)\n",
231 |     "# Weights for hidden layer to output layer\n",
232 |     "W2 = torch.randn(n_hidden, n_output)\n",
233 |     "\n",
234 |     "# and bias terms for hidden and output layers\n",
235 |     "B1 = torch.randn((1, n_hidden))\n",
236 |     "B2 = torch.randn((1, n_output))"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "metadata": {},
242 |    "source": [
243 |     "> **Exercise:** Calculate the output for this multi-layer network using the weights `W1` & `W2`, and the biases, `B1` & `B2`. "
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 7,
249 |    "metadata": {},
250 |    "outputs": [
251 |     {
252 |      "name": "stdout",
253 |      "output_type": "stream",
254 |      "text": [
255 |       "tensor([[ 0.3171]])\n"
256 |      ]
257 |     }
258 |    ],
259 |    "source": [
260 |     "### Solution\n",
261 |     "\n",
262 |     "h = activation(torch.mm(features, W1) + B1)\n",
263 |     "output = activation(torch.mm(h, W2) + B2)\n",
264 |     "print(output)"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "metadata": {},
270 |    "source": [
271 |     "If you did this correctly, you should see the output `tensor([[ 0.3171]])`.\n",
272 |     "\n",
273 |     "The number of hidden units a parameter of the network, often called a **hyperparameter** to differentiate it from the weights and biases parameters. As you'll see later when we discuss training a neural network, the more hidden units a network has, and the more layers, the better able it is to learn from data and make accurate predictions."
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "metadata": {},
279 |    "source": [
280 |     "## Numpy to Torch and back\n",
281 |     "\n",
282 |     "Special bonus section! PyTorch has a great feature for converting between Numpy arrays and Torch tensors. To create a tensor from a Numpy array, use `torch.from_numpy()`. To convert a tensor to a Numpy array, use the `.numpy()` method."
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "code",
287 |    "execution_count": 16,
288 |    "metadata": {},
289 |    "outputs": [
290 |     {
291 |      "data": {
292 |       "text/plain": [
293 |        "array([[ 0.33669496,  0.59531562,  0.65433944],\n",
294 |        "       [ 0.86531224,  0.59945364,  0.28043973],\n",
295 |        "       [ 0.48409303,  0.98357622,  0.33884284],\n",
296 |        "       [ 0.25591391,  0.51081783,  0.39986403]])"
297 |       ]
298 |      },
299 |      "execution_count": 16,
300 |      "metadata": {},
301 |      "output_type": "execute_result"
302 |     }
303 |    ],
304 |    "source": [
305 |     "import numpy as np\n",
306 |     "a = np.random.rand(4,3)\n",
307 |     "a"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": 17,
313 |    "metadata": {},
314 |    "outputs": [
315 |     {
316 |      "data": {
317 |       "text/plain": [
318 |        "\n",
319 |        " 0.3367  0.5953  0.6543\n",
320 |        " 0.8653  0.5995  0.2804\n",
321 |        " 0.4841  0.9836  0.3388\n",
322 |        " 0.2559  0.5108  0.3999\n",
323 |        "[torch.DoubleTensor of size 4x3]"
324 |       ]
325 |      },
326 |      "execution_count": 17,
327 |      "metadata": {},
328 |      "output_type": "execute_result"
329 |     }
330 |    ],
331 |    "source": [
332 |     "b = torch.from_numpy(a)\n",
333 |     "b"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 18,
339 |    "metadata": {},
340 |    "outputs": [
341 |     {
342 |      "data": {
343 |       "text/plain": [
344 |        "array([[ 0.33669496,  0.59531562,  0.65433944],\n",
345 |        "       [ 0.86531224,  0.59945364,  0.28043973],\n",
346 |        "       [ 0.48409303,  0.98357622,  0.33884284],\n",
347 |        "       [ 0.25591391,  0.51081783,  0.39986403]])"
348 |       ]
349 |      },
350 |      "execution_count": 18,
351 |      "metadata": {},
352 |      "output_type": "execute_result"
353 |     }
354 |    ],
355 |    "source": [
356 |     "b.numpy()"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "markdown",
361 |    "metadata": {},
362 |    "source": [
363 |     "The memory is shared between the Numpy array and Torch tensor, so if you change the values in-place of one object, the other will change as well."
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": 19,
369 |    "metadata": {},
370 |    "outputs": [
371 |     {
372 |      "data": {
373 |       "text/plain": [
374 |        "\n",
375 |        " 0.6734  1.1906  1.3087\n",
376 |        " 1.7306  1.1989  0.5609\n",
377 |        " 0.9682  1.9672  0.6777\n",
378 |        " 0.5118  1.0216  0.7997\n",
379 |        "[torch.DoubleTensor of size 4x3]"
380 |       ]
381 |      },
382 |      "execution_count": 19,
383 |      "metadata": {},
384 |      "output_type": "execute_result"
385 |     }
386 |    ],
387 |    "source": [
388 |     "# Multiply PyTorch Tensor by 2, in place\n",
389 |     "b.mul_(2)"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": 20,
395 |    "metadata": {},
396 |    "outputs": [
397 |     {
398 |      "data": {
399 |       "text/plain": [
400 |        "array([[ 0.67338991,  1.19063124,  1.30867888],\n",
401 |        "       [ 1.73062448,  1.19890728,  0.56087946],\n",
402 |        "       [ 0.96818606,  1.96715243,  0.67768568],\n",
403 |        "       [ 0.51182782,  1.02163565,  0.79972807]])"
404 |       ]
405 |      },
406 |      "execution_count": 20,
407 |      "metadata": {},
408 |      "output_type": "execute_result"
409 |     }
410 |    ],
411 |    "source": [
412 |     "# Numpy array matches new values from Tensor\n",
413 |     "a"
414 |    ]
415 |   }
416 |  ],
417 |  "metadata": {
418 |   "kernelspec": {
419 |    "display_name": "Python 3",
420 |    "language": "python",
421 |    "name": "python3"
422 |   },
423 |   "language_info": {
424 |    "codemirror_mode": {
425 |     "name": "ipython",
426 |     "version": 3
427 |    },
428 |    "file_extension": ".py",
429 |    "mimetype": "text/x-python",
430 |    "name": "python",
431 |    "nbconvert_exporter": "python",
432 |    "pygments_lexer": "ipython3",
433 |    "version": "3.6.6"
434 |   }
435 |  },
436 |  "nbformat": 4,
437 |  "nbformat_minor": 2
438 | }
439 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/Part 2 - Neural Networks in PyTorch (Exercises).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Neural networks with PyTorch\n",
  8 |     "\n",
  9 |     "Deep learning networks tend to be massive with dozens or hundreds of layers, that's where the term \"deep\" comes from. You can build one of these deep networks using only weight matrices as we did in the previous notebook, but in general it's very cumbersome and difficult to implement. PyTorch has a nice module `nn` that provides a nice way to efficiently build large neural networks."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "# Import necessary packages\n",
 21 |     "\n",
 22 |     "%matplotlib inline\n",
 23 |     "%config InlineBackend.figure_format = 'retina'\n",
 24 |     "\n",
 25 |     "import numpy as np\n",
 26 |     "import torch\n",
 27 |     "\n",
 28 |     "import helper\n",
 29 |     "\n",
 30 |     "import matplotlib.pyplot as plt"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "\n",
 38 |     "Now we're going to build a larger network that can solve a (formerly) difficult problem, identifying text in an image. Here we'll use the MNIST dataset which consists of greyscale handwritten digits. Each image is 28x28 pixels, you can see a sample below\n",
 39 |     "\n",
 40 |     "<img src='assets/mnist.png'>\n",
 41 |     "\n",
 42 |     "Our goal is to build a neural network that can take one of these images and predict the digit in the image.\n",
 43 |     "\n",
 44 |     "First up, we need to get our dataset. This is provided through the `torchvision` package. The code below will download the MNIST dataset, then create training and test datasets for us. Don't worry too much about the details here, you'll learn more about this later."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# The MNIST datasets are hosted on yann.lecun.com that has moved under CloudFlare protection\n",
 54 |     "# Run this script to enable the datasets download\n",
 55 |     "# Reference: https://github.com/pytorch/vision/issues/1938\n",
 56 |     "\n",
 57 |     "from six.moves import urllib\n",
 58 |     "opener = urllib.request.build_opener()\n",
 59 |     "opener.addheaders = [('User-agent', 'Mozilla/5.0')]\n",
 60 |     "urllib.request.install_opener(opener)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {
 67 |     "collapsed": true
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "### Run this cell\n",
 72 |     "\n",
 73 |     "from torchvision import datasets, transforms\n",
 74 |     "\n",
 75 |     "# Define a transform to normalize the data\n",
 76 |     "transform = transforms.Compose([transforms.ToTensor(),\n",
 77 |     "                              transforms.Normalize((0.5,), (0.5,)),\n",
 78 |     "                              ])\n",
 79 |     "\n",
 80 |     "# Download and load the training data\n",
 81 |     "trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)\n",
 82 |     "trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "We have the training data loaded into `trainloader` and we make that an iterator with `iter(trainloader)`. Later, we'll use this to loop through the dataset for training, like\n",
 90 |     "\n",
 91 |     "```python\n",
 92 |     "for image, label in trainloader:\n",
 93 |     "    ## do things with images and labels\n",
 94 |     "```\n",
 95 |     "\n",
 96 |     "You'll notice I created the `trainloader` with a batch size of 64, and `shuffle=True`. The batch size is the number of images we get in one iteration from the data loader and pass through our network, often called a *batch*. And `shuffle=True` tells it to shuffle the dataset every time we start going through the data loader again. But here I'm just grabbing the first batch so we can check out the data. We can see below that `images` is just a tensor with size `(64, 1, 28, 28)`. So, 64 images per batch, 1 color channel, and 28x28 images."
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {
103 |     "collapsed": true
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "dataiter = iter(trainloader)\n",
108 |     "images, labels = dataiter.next()\n",
109 |     "print(type(images))\n",
110 |     "print(images.shape)\n",
111 |     "print(labels.shape)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "This is what one of the images looks like. "
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {
125 |     "collapsed": true
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "plt.imshow(images[1].numpy().squeeze(), cmap='Greys_r');"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "First, let's try to build a simple network for this dataset using weight matrices and matrix multiplications. Then, we'll see how to do it using PyTorch's `nn` module which provides a much more convenient and powerful method for defining network architectures.\n",
137 |     "\n",
138 |     "The networks you've seen so far are called *fully-connected* or *dense* networks. Each unit in one layer is connected to each unit in the next layer. In fully-connected networks, the input to each layer must be a one-dimensional vector (which can be stacked into a 2D tensor as a batch of multiple examples). However, our images are 28x28 2D tensors, so we need to convert them into 1D vectors. Thinking about sizes, we need to convert the batch of images with shape `(64, 1, 28, 28)` to a have a shape of `(64, 784)`, 784 is 28 times 28. This is typically called *flattening*, we flattened the 2D images into 1D vectors.\n",
139 |     "\n",
140 |     "Previously you built a network with one output unit. Here we need 10 output units, one for each digit. We want our network to predict the digit shown in an image, so what we'll do is calculate probabilities that the image is of any one digit or class. This ends up being a discrete probability distribution over the classes (digits) that tells us the most likely class for the image. That means we need 10 output units for the 10 classes (digits). We'll see how to convert the network output into a probability distribution next.\n",
141 |     "\n",
142 |     "> **Exercise:** Flatten the batch of images `images`. Then build a multi-layer network with 784 input units, 256 hidden units, and 10 output units using random tensors for the weights and biases. For now, use a sigmoid activation for the hidden layer. Leave the output layer without an activation, we'll add one that gives us a probability distribution next."
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {
149 |     "collapsed": true
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "## Your solution\n",
154 |     "\n",
155 |     "\n",
156 |     "out = # output of your network, should have shape (64,10)"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "Now we have 10 outputs for our network. We want to pass in an image to our network and get out a probability distribution over the classes that tells us the likely class(es) the image belongs to. Something that looks like this:\n",
164 |     "<img src='assets/image_distribution.png' width=500px>\n",
165 |     "\n",
166 |     "Here we see that the probability for each class is roughly the same. This is representing an untrained network, it hasn't seen any data yet so it just returns a uniform distribution with equal probabilities for each class.\n",
167 |     "\n",
168 |     "To calculate this probability distribution, we often use the [**softmax** function](https://en.wikipedia.org/wiki/Softmax_function). Mathematically this looks like\n",
169 |     "\n",
170 |     "$$\n",
171 |     "\\Large \\sigma(x_i) = \\cfrac{e^{x_i}}{\\sum_k^K{e^{x_k}}}\n",
172 |     "$$\n",
173 |     "\n",
174 |     "What this does is squish each input $x_i$ between 0 and 1 and normalizes the values to give you a proper probability distribution where the probabilites sum up to one.\n",
175 |     "\n",
176 |     "> **Exercise:** Implement a function `softmax` that performs the softmax calculation and returns probability distributions for each example in the batch. Note that you'll need to pay attention to the shapes when doing this. If you have a tensor `a` with shape `(64, 10)` and a tensor `b` with shape `(64,)`, doing `a/b` will give you an error because PyTorch will try to do the division across the columns (called broadcasting) but you'll get a size mismatch. The way to think about this is for each of the 64 examples, you only want to divide by one value, the sum in the denominator. So you need `b` to have a shape of `(64, 1)`. This way PyTorch will divide the 10 values in each row of `a` by the one value in each row of `b`. Pay attention to how you take the sum as well. You'll need to define the `dim` keyword in `torch.sum`. Setting `dim=0` takes the sum across the rows while `dim=1` takes the sum across the columns."
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {
183 |     "collapsed": true
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "def softmax(x):\n",
188 |     "    ## TODO: Implement the softmax function here\n",
189 |     "\n",
190 |     "# Here, out should be the output of the network in the previous excercise with shape (64,10)\n",
191 |     "probabilities = softmax(out)\n",
192 |     "\n",
193 |     "# Does it have the right shape? Should be (64, 10)\n",
194 |     "print(probabilities.shape)\n",
195 |     "# Does it sum to 1?\n",
196 |     "print(probabilities.sum(dim=1))"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "## Building networks with PyTorch\n",
204 |     "\n",
205 |     "PyTorch provides a module `nn` that makes building networks much simpler. Here I'll show you how to build the same one as above with 784 inputs, 256 hidden units, 10 output units and a softmax output."
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {
212 |     "collapsed": true
213 |    },
214 |    "outputs": [],
215 |    "source": [
216 |     "from torch import nn"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {
223 |     "collapsed": true
224 |    },
225 |    "outputs": [],
226 |    "source": [
227 |     "class Network(nn.Module):\n",
228 |     "    def __init__(self):\n",
229 |     "        super().__init__()\n",
230 |     "        \n",
231 |     "        # Inputs to hidden layer linear transformation\n",
232 |     "        self.hidden = nn.Linear(784, 256)\n",
233 |     "        # Output layer, 10 units - one for each digit\n",
234 |     "        self.output = nn.Linear(256, 10)\n",
235 |     "        \n",
236 |     "        # Define sigmoid activation and softmax output \n",
237 |     "        self.sigmoid = nn.Sigmoid()\n",
238 |     "        self.softmax = nn.Softmax(dim=1)\n",
239 |     "        \n",
240 |     "    def forward(self, x):\n",
241 |     "        # Pass the input tensor through each of our operations\n",
242 |     "        x = self.hidden(x)\n",
243 |     "        x = self.sigmoid(x)\n",
244 |     "        x = self.output(x)\n",
245 |     "        x = self.softmax(x)\n",
246 |     "        \n",
247 |     "        return x"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "Let's go through this bit by bit.\n",
255 |     "\n",
256 |     "```python\n",
257 |     "class Network(nn.Module):\n",
258 |     "```\n",
259 |     "\n",
260 |     "Here we're inheriting from `nn.Module`. Combined with `super().__init__()` this creates a class that tracks the architecture and provides a lot of useful methods and attributes. It is mandatory to inherit from `nn.Module` when you're creating a class for your network. The name of the class itself can be anything.\n",
261 |     "\n",
262 |     "```python\n",
263 |     "self.hidden = nn.Linear(784, 256)\n",
264 |     "```\n",
265 |     "\n",
266 |     "This line creates a module for a linear transformation, $x\\mathbf{W} + b$, with 784 inputs and 256 outputs and assigns it to `self.hidden`. The module automatically creates the weight and bias tensors which we'll use in the `forward` method. You can access the weight and bias tensors once the network (`net`) is created with `net.hidden.weight` and `net.hidden.bias`.\n",
267 |     "\n",
268 |     "```python\n",
269 |     "self.output = nn.Linear(256, 10)\n",
270 |     "```\n",
271 |     "\n",
272 |     "Similarly, this creates another linear transformation with 256 inputs and 10 outputs.\n",
273 |     "\n",
274 |     "```python\n",
275 |     "self.sigmoid = nn.Sigmoid()\n",
276 |     "self.softmax = nn.Softmax(dim=1)\n",
277 |     "```\n",
278 |     "\n",
279 |     "Here I defined operations for the sigmoid activation and softmax output. Setting `dim=1` in `nn.Softmax(dim=1)` calculates softmax across the columns.\n",
280 |     "\n",
281 |     "```python\n",
282 |     "def forward(self, x):\n",
283 |     "```\n",
284 |     "\n",
285 |     "PyTorch networks created with `nn.Module` must have a `forward` method defined. It takes in a tensor `x` and passes it through the operations you defined in the `__init__` method.\n",
286 |     "\n",
287 |     "```python\n",
288 |     "x = self.hidden(x)\n",
289 |     "x = self.sigmoid(x)\n",
290 |     "x = self.output(x)\n",
291 |     "x = self.softmax(x)\n",
292 |     "```\n",
293 |     "\n",
294 |     "Here the input tensor `x` is passed through each operation and reassigned to `x`. We can see that the input tensor goes through the hidden layer, then a sigmoid function, then the output layer, and finally the softmax function. It doesn't matter what you name the variables here, as long as the inputs and outputs of the operations match the network architecture you want to build. The order in which you define things in the `__init__` method doesn't matter, but you'll need to sequence the operations correctly in the `forward` method.\n",
295 |     "\n",
296 |     "Now we can create a `Network` object."
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": null,
302 |    "metadata": {
303 |     "collapsed": true
304 |    },
305 |    "outputs": [],
306 |    "source": [
307 |     "# Create the network and look at it's text representation\n",
308 |     "model = Network()\n",
309 |     "model"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "markdown",
314 |    "metadata": {},
315 |    "source": [
316 |     "You can define the network somewhat more concisely and clearly using the `torch.nn.functional` module. This is the most common way you'll see networks defined as many operations are simple element-wise functions. We normally import this module as `F`, `import torch.nn.functional as F`."
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {
323 |     "collapsed": true
324 |    },
325 |    "outputs": [],
326 |    "source": [
327 |     "import torch.nn.functional as F\n",
328 |     "\n",
329 |     "class Network(nn.Module):\n",
330 |     "    def __init__(self):\n",
331 |     "        super().__init__()\n",
332 |     "        # Inputs to hidden layer linear transformation\n",
333 |     "        self.hidden = nn.Linear(784, 256)\n",
334 |     "        # Output layer, 10 units - one for each digit\n",
335 |     "        self.output = nn.Linear(256, 10)\n",
336 |     "        \n",
337 |     "    def forward(self, x):\n",
338 |     "        # Hidden layer with sigmoid activation\n",
339 |     "        x = F.sigmoid(self.hidden(x))\n",
340 |     "        # Output layer with softmax activation\n",
341 |     "        x = F.softmax(self.output(x), dim=1)\n",
342 |     "        \n",
343 |     "        return x"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {},
349 |    "source": [
350 |     "### Activation functions\n",
351 |     "\n",
352 |     "So far we've only been looking at the sigmoid activation function, but in general any function can be used as an activation function. The only requirement is that for a network to approximate a non-linear function, the activation functions must be non-linear. Here are a few more examples of common activation functions: Tanh (hyperbolic tangent), and ReLU (rectified linear unit).\n",
353 |     "\n",
354 |     "<img src=\"assets/activation.png\" width=700px>\n",
355 |     "\n",
356 |     "In practice, the ReLU function is used almost exclusively as the activation function for hidden layers."
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "markdown",
361 |    "metadata": {},
362 |    "source": [
363 |     "### Your Turn to Build a Network\n",
364 |     "\n",
365 |     "<img src=\"assets/mlp_mnist.png\" width=600px>\n",
366 |     "\n",
367 |     "> **Exercise:** Create a network with 784 input units, a hidden layer with 128 units and a ReLU activation, then a hidden layer with 64 units and a ReLU activation, and finally an output layer with a softmax activation as shown above. You can use a ReLU activation with the `nn.ReLU` module or `F.relu` function.\n",
368 |     "\n",
369 |     "It's good practice to name your layers by their type of network, for instance 'fc' to represent a fully-connected layer. As you code your solution, use `fc1`, `fc2`, and `fc3` as your layer names."
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": null,
375 |    "metadata": {
376 |     "collapsed": true,
377 |     "scrolled": true
378 |    },
379 |    "outputs": [],
380 |    "source": [
381 |     "## Your solution here\n"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "markdown",
386 |    "metadata": {},
387 |    "source": [
388 |     "### Initializing weights and biases\n",
389 |     "\n",
390 |     "The weights and such are automatically initialized for you, but it's possible to customize how they are initialized. The weights and biases are tensors attached to the layer you defined, you can get them with `model.fc1.weight` for instance."
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": null,
396 |    "metadata": {
397 |     "collapsed": true
398 |    },
399 |    "outputs": [],
400 |    "source": [
401 |     "print(model.fc1.weight)\n",
402 |     "print(model.fc1.bias)"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "metadata": {},
408 |    "source": [
409 |     "For custom initialization, we want to modify these tensors in place. These are actually autograd *Variables*, so we need to get back the actual tensors with `model.fc1.weight.data`. Once we have the tensors, we can fill them with zeros (for biases) or random normal values."
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "metadata": {
416 |     "collapsed": true
417 |    },
418 |    "outputs": [],
419 |    "source": [
420 |     "# Set biases to all zeros\n",
421 |     "model.fc1.bias.data.fill_(0)"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "code",
426 |    "execution_count": null,
427 |    "metadata": {
428 |     "collapsed": true
429 |    },
430 |    "outputs": [],
431 |    "source": [
432 |     "# sample from random normal with standard dev = 0.01\n",
433 |     "model.fc1.weight.data.normal_(std=0.01)"
434 |    ]
435 |   },
436 |   {
437 |    "cell_type": "markdown",
438 |    "metadata": {},
439 |    "source": [
440 |     "### Forward pass\n",
441 |     "\n",
442 |     "Now that we have a network, let's see what happens when we pass in an image."
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "code",
447 |    "execution_count": null,
448 |    "metadata": {
449 |     "collapsed": true
450 |    },
451 |    "outputs": [],
452 |    "source": [
453 |     "# Grab some data \n",
454 |     "dataiter = iter(trainloader)\n",
455 |     "images, labels = dataiter.next()\n",
456 |     "\n",
457 |     "# Resize images into a 1D vector, new shape is (batch size, color channels, image pixels) \n",
458 |     "images.resize_(64, 1, 784)\n",
459 |     "# or images.resize_(images.shape[0], 1, 784) to automatically get batch size\n",
460 |     "\n",
461 |     "# Forward pass through the network\n",
462 |     "img_idx = 0\n",
463 |     "ps = model.forward(images[img_idx,:])\n",
464 |     "\n",
465 |     "img = images[img_idx]\n",
466 |     "helper.view_classify(img.view(1, 28, 28), ps)"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "markdown",
471 |    "metadata": {},
472 |    "source": [
473 |     "As you can see above, our network has basically no idea what this digit is. It's because we haven't trained it yet, all the weights are random!\n",
474 |     "\n",
475 |     "### Using `nn.Sequential`\n",
476 |     "\n",
477 |     "PyTorch provides a convenient way to build networks like this where a tensor is passed sequentially through operations, `nn.Sequential` ([documentation](https://pytorch.org/docs/master/nn.html#torch.nn.Sequential)). Using this to build the equivalent network:"
478 |    ]
479 |   },
480 |   {
481 |    "cell_type": "code",
482 |    "execution_count": null,
483 |    "metadata": {
484 |     "collapsed": true
485 |    },
486 |    "outputs": [],
487 |    "source": [
488 |     "# Hyperparameters for our network\n",
489 |     "input_size = 784\n",
490 |     "hidden_sizes = [128, 64]\n",
491 |     "output_size = 10\n",
492 |     "\n",
493 |     "# Build a feed-forward network\n",
494 |     "model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),\n",
495 |     "                      nn.ReLU(),\n",
496 |     "                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),\n",
497 |     "                      nn.ReLU(),\n",
498 |     "                      nn.Linear(hidden_sizes[1], output_size),\n",
499 |     "                      nn.Softmax(dim=1))\n",
500 |     "print(model)\n",
501 |     "\n",
502 |     "# Forward pass through the network and display output\n",
503 |     "images, labels = next(iter(trainloader))\n",
504 |     "images.resize_(images.shape[0], 1, 784)\n",
505 |     "ps = model.forward(images[0,:])\n",
506 |     "helper.view_classify(images[0].view(1, 28, 28), ps)"
507 |    ]
508 |   },
509 |   {
510 |    "cell_type": "markdown",
511 |    "metadata": {},
512 |    "source": [
513 |     "Here our model is the same as before: 784 input units, a hidden layer with 128 units, ReLU activation, 64 unit hidden layer, another ReLU, then the output layer with 10 units, and the softmax output.\n",
514 |     "\n",
515 |     "The operations are available by passing in the appropriate index. For example, if you want to get first Linear operation and look at the weights, you'd use `model[0]`."
516 |    ]
517 |   },
518 |   {
519 |    "cell_type": "code",
520 |    "execution_count": null,
521 |    "metadata": {
522 |     "collapsed": true
523 |    },
524 |    "outputs": [],
525 |    "source": [
526 |     "print(model[0])\n",
527 |     "model[0].weight"
528 |    ]
529 |   },
530 |   {
531 |    "cell_type": "markdown",
532 |    "metadata": {},
533 |    "source": [
534 |     "You can also pass in an `OrderedDict` to name the individual layers and operations, instead of using incremental integers. Note that dictionary keys must be unique, so _each operation must have a different name_."
535 |    ]
536 |   },
537 |   {
538 |    "cell_type": "code",
539 |    "execution_count": null,
540 |    "metadata": {
541 |     "collapsed": true
542 |    },
543 |    "outputs": [],
544 |    "source": [
545 |     "from collections import OrderedDict\n",
546 |     "model = nn.Sequential(OrderedDict([\n",
547 |     "                      ('fc1', nn.Linear(input_size, hidden_sizes[0])),\n",
548 |     "                      ('relu1', nn.ReLU()),\n",
549 |     "                      ('fc2', nn.Linear(hidden_sizes[0], hidden_sizes[1])),\n",
550 |     "                      ('relu2', nn.ReLU()),\n",
551 |     "                      ('output', nn.Linear(hidden_sizes[1], output_size)),\n",
552 |     "                      ('softmax', nn.Softmax(dim=1))]))\n",
553 |     "model"
554 |    ]
555 |   },
556 |   {
557 |    "cell_type": "markdown",
558 |    "metadata": {},
559 |    "source": [
560 |     "Now you can access layers either by integer or the name"
561 |    ]
562 |   },
563 |   {
564 |    "cell_type": "code",
565 |    "execution_count": null,
566 |    "metadata": {
567 |     "collapsed": true
568 |    },
569 |    "outputs": [],
570 |    "source": [
571 |     "print(model[0])\n",
572 |     "print(model.fc1)"
573 |    ]
574 |   },
575 |   {
576 |    "cell_type": "markdown",
577 |    "metadata": {},
578 |    "source": [
579 |     "In the next notebook, we'll see how we can train a neural network to accuractly predict the numbers appearing in the MNIST images."
580 |    ]
581 |   }
582 |  ],
583 |  "metadata": {
584 |   "kernelspec": {
585 |    "display_name": "Python [default]",
586 |    "language": "python",
587 |    "name": "python3"
588 |   },
589 |   "language_info": {
590 |    "codemirror_mode": {
591 |     "name": "ipython",
592 |     "version": 3
593 |    },
594 |    "file_extension": ".py",
595 |    "mimetype": "text/x-python",
596 |    "name": "python",
597 |    "nbconvert_exporter": "python",
598 |    "pygments_lexer": "ipython3",
599 |    "version": "3.6.4"
600 |   }
601 |  },
602 |  "nbformat": 4,
603 |  "nbformat_minor": 2
604 | }
605 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/Part 3 - Training Neural Networks (Exercises).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Training Neural Networks\n",
  8 |     "\n",
  9 |     "The network we built in the previous part isn't so smart, it doesn't know anything about our handwritten digits. Neural networks with non-linear activations work like universal function approximators. There is some function that maps your input to the output. For example, images of handwritten digits to class probabilities. The power of neural networks is that we can train them to approximate this function, and basically any function given enough data and compute time.\n",
 10 |     "\n",
 11 |     "<img src=\"assets/function_approx.png\" width=500px>\n",
 12 |     "\n",
 13 |     "At first the network is naive, it doesn't know the function mapping the inputs to the outputs. We train the network by showing it examples of real data, then adjusting the network parameters such that it approximates this function.\n",
 14 |     "\n",
 15 |     "To find these parameters, we need to know how poorly the network is predicting the real outputs. For this we calculate a **loss function** (also called the cost), a measure of our prediction error. For example, the mean squared loss is often used in regression and binary classification problems\n",
 16 |     "\n",
 17 |     "$$\n",
 18 |     "\\large \\ell = \\frac{1}{2n}\\sum_i^n{\\left(y_i - \\hat{y}_i\\right)^2}\n",
 19 |     "$$\n",
 20 |     "\n",
 21 |     "where $n$ is the number of training examples, $y_i$ are the true labels, and $\\hat{y}_i$ are the predicted labels.\n",
 22 |     "\n",
 23 |     "By minimizing this loss with respect to the network parameters, we can find configurations where the loss is at a minimum and the network is able to predict the correct labels with high accuracy. We find this minimum using a process called **gradient descent**. The gradient is the slope of the loss function and points in the direction of fastest change. To get to the minimum in the least amount of time, we then want to follow the gradient (downwards). You can think of this like descending a mountain by following the steepest slope to the base.\n",
 24 |     "\n",
 25 |     "<img src='assets/gradient_descent.png' width=350px>"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Backpropagation\n",
 33 |     "\n",
 34 |     "For single layer networks, gradient descent is straightforward to implement. However, it's more complicated for deeper, multilayer neural networks like the one we've built. Complicated enough that it took about 30 years before researchers figured out how to train multilayer networks.\n",
 35 |     "\n",
 36 |     "Training multilayer networks is done through **backpropagation** which is really just an application of the chain rule from calculus. It's easiest to understand if we convert a two layer network into a graph representation.\n",
 37 |     "\n",
 38 |     "<img src='assets/backprop_diagram.png' width=550px>\n",
 39 |     "\n",
 40 |     "In the forward pass through the network, our data and operations go from bottom to top here. We pass the input $x$ through a linear transformation $L_1$ with weights $W_1$ and biases $b_1$. The output then goes through the sigmoid operation $S$ and another linear transformation $L_2$. Finally we calculate the loss $\\ell$. We use the loss as a measure of how bad the network's predictions are. The goal then is to adjust the weights and biases to minimize the loss.\n",
 41 |     "\n",
 42 |     "To train the weights with gradient descent, we propagate the gradient of the loss backwards through the network. Each operation has some gradient between the inputs and outputs. As we send the gradients backwards, we multiply the incoming gradient with the gradient for the operation. Mathematically, this is really just calculating the gradient of the loss with respect to the weights using the chain rule.\n",
 43 |     "\n",
 44 |     "$$\n",
 45 |     "\\large \\frac{\\partial \\ell}{\\partial W_1} = \\frac{\\partial L_1}{\\partial W_1} \\frac{\\partial S}{\\partial L_1} \\frac{\\partial L_2}{\\partial S} \\frac{\\partial \\ell}{\\partial L_2}\n",
 46 |     "$$\n",
 47 |     "\n",
 48 |     "**Note:** I'm glossing over a few details here that require some knowledge of vector calculus, but they aren't necessary to understand what's going on.\n",
 49 |     "\n",
 50 |     "We update our weights using this gradient with some learning rate $\\alpha$. \n",
 51 |     "\n",
 52 |     "$$\n",
 53 |     "\\large W^\\prime_1 = W_1 - \\alpha \\frac{\\partial \\ell}{\\partial W_1}\n",
 54 |     "$$\n",
 55 |     "\n",
 56 |     "The learning rate $\\alpha$ is set such that the weight update steps are small enough that the iterative method settles in a minimum."
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "## Losses in PyTorch\n",
 64 |     "\n",
 65 |     "Let's start by seeing how we calculate the loss with PyTorch. Through the `nn` module, PyTorch provides losses such as the cross-entropy loss (`nn.CrossEntropyLoss`). You'll usually see the loss assigned to `criterion`. As noted in the last part, with a classification problem such as MNIST, we're using the softmax function to predict class probabilities. With a softmax output, you want to use cross-entropy as the loss. To actually calculate the loss, you first define the criterion then pass in the output of your network and the correct labels.\n",
 66 |     "\n",
 67 |     "Something really important to note here. Looking at [the documentation for `nn.CrossEntropyLoss`](https://pytorch.org/docs/stable/nn.html#torch.nn.CrossEntropyLoss),\n",
 68 |     "\n",
 69 |     "> This criterion combines `nn.LogSoftmax()` and `nn.NLLLoss()` in one single class.\n",
 70 |     ">\n",
 71 |     "> The input is expected to contain scores for each class.\n",
 72 |     "\n",
 73 |     "This means we need to pass in the raw output of our network into the loss, not the output of the softmax function. This raw output is usually called the *logits* or *scores*. We use the logits because softmax gives you probabilities which will often be very close to zero or one but floating-point numbers can't accurately represent values near zero or one ([read more here](https://docs.python.org/3/tutorial/floatingpoint.html)). It's usually best to avoid doing calculations with probabilities, typically we use log-probabilities."
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "# The MNIST datasets are hosted on yann.lecun.com that has moved under CloudFlare protection\n",
 83 |     "# Run this script to enable the datasets download\n",
 84 |     "# Reference: https://github.com/pytorch/vision/issues/1938\n",
 85 |     "\n",
 86 |     "from six.moves import urllib\n",
 87 |     "opener = urllib.request.build_opener()\n",
 88 |     "opener.addheaders = [('User-agent', 'Mozilla/5.0')]\n",
 89 |     "urllib.request.install_opener(opener)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "import torch\n",
 99 |     "from torch import nn\n",
100 |     "import torch.nn.functional as F\n",
101 |     "from torchvision import datasets, transforms\n",
102 |     "\n",
103 |     "# Define a transform to normalize the data\n",
104 |     "transform = transforms.Compose([transforms.ToTensor(),\n",
105 |     "                                transforms.Normalize((0.5,), (0.5,)),\n",
106 |     "                              ])\n",
107 |     "# Download and load the training data\n",
108 |     "trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)\n",
109 |     "trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "### Note\n",
117 |     "If you haven't seen `nn.Sequential` yet, please finish the end of the Part 2 notebook."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "# Build a feed-forward network\n",
127 |     "model = nn.Sequential(nn.Linear(784, 128),\n",
128 |     "                      nn.ReLU(),\n",
129 |     "                      nn.Linear(128, 64),\n",
130 |     "                      nn.ReLU(),\n",
131 |     "                      nn.Linear(64, 10))\n",
132 |     "\n",
133 |     "# Define the loss\n",
134 |     "criterion = nn.CrossEntropyLoss()\n",
135 |     "\n",
136 |     "# Get our data\n",
137 |     "dataiter = iter(trainloader)\n",
138 |     "\n",
139 |     "images, labels = next(dataiter)\n",
140 |     "\n",
141 |     "# Flatten images\n",
142 |     "images = images.view(images.shape[0], -1)\n",
143 |     "\n",
144 |     "# Forward pass, get our logits\n",
145 |     "logits = model(images)\n",
146 |     "# Calculate the loss with the logits and the labels\n",
147 |     "loss = criterion(logits, labels)\n",
148 |     "\n",
149 |     "print(loss)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "In my experience it's more convenient to build the model with a log-softmax output using `nn.LogSoftmax` or `F.log_softmax` ([documentation](https://pytorch.org/docs/stable/nn.html#torch.nn.LogSoftmax)). Then you can get the actual probabilities by taking the exponential `torch.exp(output)`. With a log-softmax output, you want to use the negative log likelihood loss, `nn.NLLLoss` ([documentation](https://pytorch.org/docs/stable/nn.html#torch.nn.NLLLoss)).\n",
157 |     "\n",
158 |     ">**Exercise:** Build a model that returns the log-softmax as the output and calculate the loss using the negative log likelihood loss. Note that for `nn.LogSoftmax` and `F.log_softmax` you'll need to set the `dim` keyword argument appropriately. `dim=0` calculates softmax across the rows, so each column sums to 1, while `dim=1` calculates across the columns so each row sums to 1. Think about what you want the output to be and choose `dim` appropriately."
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "# TODO: Build a feed-forward network\n",
168 |     "model = \n",
169 |     "\n",
170 |     "# TODO: Define the loss\n",
171 |     "criterion = \n",
172 |     "\n",
173 |     "### Run this to check your work\n",
174 |     "# Get our data\n",
175 |     "dataiter = iter(trainloader)\n",
176 |     "\n",
177 |     "images, labels = next(dataiter)\n",
178 |     "\n",
179 |     "# Flatten images\n",
180 |     "images = images.view(images.shape[0], -1)\n",
181 |     "\n",
182 |     "# Forward pass, get our logits\n",
183 |     "logits = model(images)\n",
184 |     "# Calculate the loss with the logits and the labels\n",
185 |     "loss = criterion(logits, labels)\n",
186 |     "\n",
187 |     "print(loss)"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "## Autograd\n",
195 |     "\n",
196 |     "Now that we know how to calculate a loss, how do we use it to perform backpropagation? Torch provides a module, `autograd`, for automatically calculating the gradients of tensors. We can use it to calculate the gradients of all our parameters with respect to the loss. Autograd works by keeping track of operations performed on tensors, then going backwards through those operations, calculating gradients along the way. To make sure PyTorch keeps track of operations on a tensor and calculates the gradients, you need to set `requires_grad = True` on a tensor. You can do this at creation with the `requires_grad` keyword, or at any time with `x.requires_grad_(True)`.\n",
197 |     "\n",
198 |     "You can turn off gradients for a block of code with the `torch.no_grad()` content:\n",
199 |     "```python\n",
200 |     "x = torch.zeros(1, requires_grad=True)\n",
201 |     ">>> with torch.no_grad():\n",
202 |     "...     y = x * 2\n",
203 |     ">>> y.requires_grad\n",
204 |     "False\n",
205 |     "```\n",
206 |     "\n",
207 |     "Also, you can turn on or off gradients altogether with `torch.set_grad_enabled(True|False)`.\n",
208 |     "\n",
209 |     "The gradients are computed with respect to some variable `z` with `z.backward()`. This does a backward pass through the operations that created `z`."
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": null,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "x = torch.randn(2,2, requires_grad=True)\n",
219 |     "print(x)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "y = x**2\n",
229 |     "print(y)"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "markdown",
234 |    "metadata": {},
235 |    "source": [
236 |     "Below we can see the operation that created `y`, a power operation `PowBackward0`."
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "## grad_fn shows the function that generated this variable\n",
246 |     "print(y.grad_fn)"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {},
252 |    "source": [
253 |     "The autograd module keeps track of these operations and knows how to calculate the gradient for each one. In this way, it's able to calculate the gradients for a chain of operations, with respect to any one tensor. Let's reduce the tensor `y` to a scalar value, the mean."
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {},
260 |    "outputs": [],
261 |    "source": [
262 |     "z = y.mean()\n",
263 |     "print(z)"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "metadata": {},
269 |    "source": [
270 |     "You can check the gradients for `x` and `y` but they are empty currently."
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {},
277 |    "outputs": [],
278 |    "source": [
279 |     "print(x.grad)"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "markdown",
284 |    "metadata": {},
285 |    "source": [
286 |     "To calculate the gradients, you need to run the `.backward` method on a Variable, `z` for example. This will calculate the gradient for `z` with respect to `x`\n",
287 |     "\n",
288 |     "$$\n",
289 |     "\\frac{\\partial z}{\\partial x} = \\frac{\\partial}{\\partial x}\\left[\\frac{1}{n}\\sum_i^n x_i^2\\right] = \\frac{x}{2}\n",
290 |     "$$"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": null,
296 |    "metadata": {},
297 |    "outputs": [],
298 |    "source": [
299 |     "z.backward()\n",
300 |     "print(x.grad)\n",
301 |     "print(x/2)"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "These gradients calculations are particularly useful for neural networks. For training we need the gradients of the cost with respect to the weights. With PyTorch, we run data forward through the network to calculate the loss, then, go backwards to calculate the gradients with respect to the loss. Once we have the gradients we can make a gradient descent step. "
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "metadata": {},
314 |    "source": [
315 |     "## Loss and Autograd together\n",
316 |     "\n",
317 |     "When we create a network with PyTorch, all of the parameters are initialized with `requires_grad = True`. This means that when we calculate the loss and call `loss.backward()`, the gradients for the parameters are calculated. These gradients are used to update the weights with gradient descent. Below you can see an example of calculating the gradients using a backwards pass."
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": null,
323 |    "metadata": {},
324 |    "outputs": [],
325 |    "source": [
326 |     "# Build a feed-forward network\n",
327 |     "model = nn.Sequential(nn.Linear(784, 128),\n",
328 |     "                      nn.ReLU(),\n",
329 |     "                      nn.Linear(128, 64),\n",
330 |     "                      nn.ReLU(),\n",
331 |     "                      nn.Linear(64, 10),\n",
332 |     "                      nn.LogSoftmax(dim=1))\n",
333 |     "\n",
334 |     "criterion = nn.NLLLoss()\n",
335 |     "dataiter = iter(trainloader)\n",
336 |     "images, labels = next(dataiter)\n",
337 |     "images = images.view(images.shape[0], -1)\n",
338 |     "\n",
339 |     "logits = model(images)\n",
340 |     "loss = criterion(logits, labels)"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "code",
345 |    "execution_count": null,
346 |    "metadata": {},
347 |    "outputs": [],
348 |    "source": [
349 |     "print('Before backward pass: \\n', model[0].weight.grad)\n",
350 |     "\n",
351 |     "loss.backward()\n",
352 |     "\n",
353 |     "print('After backward pass: \\n', model[0].weight.grad)"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "markdown",
358 |    "metadata": {},
359 |    "source": [
360 |     "## Training the network!\n",
361 |     "\n",
362 |     "There's one last piece we need to start training, an optimizer that we'll use to update the weights with the gradients. We get these from PyTorch's [`optim` package](https://pytorch.org/docs/stable/optim.html). For example we can use stochastic gradient descent with `optim.SGD`. You can see how to define an optimizer below."
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": null,
368 |    "metadata": {},
369 |    "outputs": [],
370 |    "source": [
371 |     "from torch import optim\n",
372 |     "\n",
373 |     "# Optimizers require the parameters to optimize and a learning rate\n",
374 |     "optimizer = optim.SGD(model.parameters(), lr=0.01)"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "markdown",
379 |    "metadata": {},
380 |    "source": [
381 |     "Now we know how to use all the individual parts so it's time to see how they work together. Let's consider just one learning step before looping through all the data. The general process with PyTorch:\n",
382 |     "\n",
383 |     "* Make a forward pass through the network \n",
384 |     "* Use the network output to calculate the loss\n",
385 |     "* Perform a backward pass through the network with `loss.backward()` to calculate the gradients\n",
386 |     "* Take a step with the optimizer to update the weights\n",
387 |     "\n",
388 |     "Below I'll go through one training step and print out the weights and gradients so you can see how it changes. Note that I have a line of code `optimizer.zero_grad()`. When you do multiple backwards passes with the same parameters, the gradients are accumulated. This means that you need to zero the gradients on each training pass or you'll retain gradients from previous training batches."
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": null,
394 |    "metadata": {},
395 |    "outputs": [],
396 |    "source": [
397 |     "print('Initial weights - ', model[0].weight)\n",
398 |     "\n",
399 |     "dataiter = iter(trainloader)\n",
400 |     "images, labels = next(dataiter)\n",
401 |     "images.resize_(64, 784)\n",
402 |     "\n",
403 |     "# Clear the gradients, do this because gradients are accumulated\n",
404 |     "optimizer.zero_grad()\n",
405 |     "\n",
406 |     "# Forward pass, then backward pass, then update weights\n",
407 |     "output = model(images)\n",
408 |     "loss = criterion(output, labels)\n",
409 |     "loss.backward()\n",
410 |     "print('Gradient -', model[0].weight.grad)"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "code",
415 |    "execution_count": null,
416 |    "metadata": {},
417 |    "outputs": [],
418 |    "source": [
419 |     "# Take an update step and view the new weights\n",
420 |     "optimizer.step()\n",
421 |     "print('Updated weights - ', model[0].weight)"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "markdown",
426 |    "metadata": {},
427 |    "source": [
428 |     "### Training for real\n",
429 |     "\n",
430 |     "Now we'll put this algorithm into a loop so we can go through all the images. Some nomenclature, one pass through the entire dataset is called an *epoch*. So here we're going to loop through `trainloader` to get our training batches. For each batch, we'll be doing a training pass where we calculate the loss, do a backwards pass, and update the weights.\n",
431 |     "\n",
432 |     ">**Exercise:** Implement the training pass for our network. If you implemented it correctly, you should see the training loss drop with each epoch."
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "code",
437 |    "execution_count": null,
438 |    "metadata": {},
439 |    "outputs": [],
440 |    "source": [
441 |     "## Your solution here\n",
442 |     "\n",
443 |     "model = nn.Sequential(nn.Linear(784, 128),\n",
444 |     "                      nn.ReLU(),\n",
445 |     "                      nn.Linear(128, 64),\n",
446 |     "                      nn.ReLU(),\n",
447 |     "                      nn.Linear(64, 10),\n",
448 |     "                      nn.LogSoftmax(dim=1))\n",
449 |     "\n",
450 |     "criterion = nn.NLLLoss()\n",
451 |     "optimizer = optim.SGD(model.parameters(), lr=0.003)\n",
452 |     "\n",
453 |     "epochs = 5\n",
454 |     "for e in range(epochs):\n",
455 |     "    running_loss = 0\n",
456 |     "    for images, labels in trainloader:\n",
457 |     "        # Flatten MNIST images into a 784 long vector\n",
458 |     "        images = images.view(images.shape[0], -1)\n",
459 |     "    \n",
460 |     "        # TODO: Training pass\n",
461 |     "        \n",
462 |     "        loss = \n",
463 |     "        \n",
464 |     "        running_loss += loss.item()\n",
465 |     "    else:\n",
466 |     "        print(f\"Training loss: {running_loss/len(trainloader)}\")"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "markdown",
471 |    "metadata": {},
472 |    "source": [
473 |     "With the network trained, we can check out it's predictions."
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "code",
478 |    "execution_count": null,
479 |    "metadata": {},
480 |    "outputs": [],
481 |    "source": [
482 |     "%matplotlib inline\n",
483 |     "import helper\n",
484 |     "\n",
485 |     "dataiter = iter(trainloader)\n",
486 |     "images, labels = next(dataiter)\n",
487 |     "\n",
488 |     "img = images[0].view(1, 784)\n",
489 |     "# Turn off gradients to speed up this part\n",
490 |     "with torch.no_grad():\n",
491 |     "    logps = model(img)\n",
492 |     "\n",
493 |     "# Output of the network are log-probabilities, need to take exponential for probabilities\n",
494 |     "ps = torch.exp(logps)\n",
495 |     "helper.view_classify(img.view(1, 28, 28), ps)"
496 |    ]
497 |   },
498 |   {
499 |    "cell_type": "markdown",
500 |    "metadata": {},
501 |    "source": [
502 |     "Now our network is brilliant. It can accurately predict the digits in our images. Next up you'll write the code for training a neural network on a more complex dataset."
503 |    ]
504 |   }
505 |  ],
506 |  "metadata": {
507 |   "kernelspec": {
508 |    "display_name": "Python 3",
509 |    "language": "python",
510 |    "name": "python3"
511 |   },
512 |   "language_info": {
513 |    "codemirror_mode": {
514 |     "name": "ipython",
515 |     "version": 3
516 |    },
517 |    "file_extension": ".py",
518 |    "mimetype": "text/x-python",
519 |    "name": "python",
520 |    "nbconvert_exporter": "python",
521 |    "pygments_lexer": "ipython3",
522 |    "version": "3.7.1"
523 |   }
524 |  },
525 |  "nbformat": 4,
526 |  "nbformat_minor": 2
527 | }
528 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/Part 4 - Fashion-MNIST (Exercises).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Classifying Fashion-MNIST\n",
  8 |     "\n",
  9 |     "Now it's your turn to build and train a neural network. You'll be using the [Fashion-MNIST dataset](https://github.com/zalandoresearch/fashion-mnist), a drop-in replacement for the MNIST dataset. MNIST is actually quite trivial with neural networks where you can easily achieve better than 97% accuracy. Fashion-MNIST is a set of 28x28 greyscale images of clothes. It's more complex than MNIST, so it's a better representation of the actual performance of your network, and a better representation of datasets you'll use in the real world.\n",
 10 |     "\n",
 11 |     "<img src='assets/fashion-mnist-sprite.png' width=500px>\n",
 12 |     "\n",
 13 |     "In this notebook, you'll build your own neural network. For the most part, you could just copy and paste the code from Part 3, but you wouldn't be learning. It's important for you to write the code yourself and get it to work. Feel free to consult the previous notebooks though as you work through this.\n",
 14 |     "\n",
 15 |     "First off, let's load the dataset through torchvision."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import torch\n",
 25 |     "from torchvision import datasets, transforms\n",
 26 |     "import helper\n",
 27 |     "\n",
 28 |     "# Define a transform to normalize the data\n",
 29 |     "transform = transforms.Compose([transforms.ToTensor(),\n",
 30 |     "                                transforms.Normalize((0.5,), (0.5,))])\n",
 31 |     "# Download and load the training data\n",
 32 |     "trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)\n",
 33 |     "trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)\n",
 34 |     "\n",
 35 |     "# Download and load the test data\n",
 36 |     "testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)\n",
 37 |     "testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "Here we can see one of the images."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "image, label = next(iter(trainloader))\n",
 54 |     "helper.imshow(image[0,:]);"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## Building the network\n",
 62 |     "\n",
 63 |     "Here you should define your network. As with MNIST, each image is 28x28 which is a total of 784 pixels, and there are 10 classes. You should include at least one hidden layer. We suggest you use ReLU activations for the layers and to return the logits or log-softmax from the forward pass. It's up to you how many layers you add and the size of those layers."
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "# TODO: Define your network architecture here\n"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "# Train the network\n",
 80 |     "\n",
 81 |     "Now you should create your network and train it. First you'll want to define [the criterion](http://pytorch.org/docs/master/nn.html#loss-functions) ( something like `nn.CrossEntropyLoss`) and [the optimizer](http://pytorch.org/docs/master/optim.html) (typically `optim.SGD` or `optim.Adam`).\n",
 82 |     "\n",
 83 |     "Then write the training code. Remember the training pass is a fairly straightforward process:\n",
 84 |     "\n",
 85 |     "* Make a forward pass through the network to get the logits \n",
 86 |     "* Use the logits to calculate the loss\n",
 87 |     "* Perform a backward pass through the network with `loss.backward()` to calculate the gradients\n",
 88 |     "* Take a step with the optimizer to update the weights\n",
 89 |     "\n",
 90 |     "By adjusting the hyperparameters (hidden units, learning rate, etc), you should be able to get the training loss below 0.4."
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "# TODO: Create the network, define the criterion and optimizer\n"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "# TODO: Train the network here\n"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "%matplotlib inline\n",
118 |     "%config InlineBackend.figure_format = 'retina'\n",
119 |     "\n",
120 |     "import helper\n",
121 |     "\n",
122 |     "# Test out your network!\n",
123 |     "\n",
124 |     "dataiter = iter(testloader)\n",
125 |     "images, labels = dataiter.next()\n",
126 |     "img = images[0]\n",
127 |     "# Convert 2D image to 1D vector\n",
128 |     "img = img.resize_(1, 784)\n",
129 |     "\n",
130 |     "# TODO: Calculate the class probabilities (softmax) for img\n",
131 |     "ps = \n",
132 |     "\n",
133 |     "# Plot the image and probabilities\n",
134 |     "helper.view_classify(img.resize_(1, 28, 28), ps, version='Fashion')"
135 |    ]
136 |   }
137 |  ],
138 |  "metadata": {
139 |   "kernelspec": {
140 |    "display_name": "Python 3",
141 |    "language": "python",
142 |    "name": "python3"
143 |   },
144 |   "language_info": {
145 |    "codemirror_mode": {
146 |     "name": "ipython",
147 |     "version": 3
148 |    },
149 |    "file_extension": ".py",
150 |    "mimetype": "text/x-python",
151 |    "name": "python",
152 |    "nbconvert_exporter": "python",
153 |    "pygments_lexer": "ipython3",
154 |    "version": "3.6.7"
155 |   }
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 2
159 | }
160 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/Part 6 - Saving and Loading Models.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Saving and Loading Models\n",
  8 |     "\n",
  9 |     "In this notebook, I'll show you how to save and load models with PyTorch. This is important because you'll often want to load previously trained models to use in making predictions or to continue training on new data."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "%matplotlib inline\n",
 19 |     "%config InlineBackend.figure_format = 'retina'\n",
 20 |     "\n",
 21 |     "import matplotlib.pyplot as plt\n",
 22 |     "\n",
 23 |     "import torch\n",
 24 |     "from torch import nn\n",
 25 |     "from torch import optim\n",
 26 |     "import torch.nn.functional as F\n",
 27 |     "from torchvision import datasets, transforms\n",
 28 |     "\n",
 29 |     "import helper\n",
 30 |     "import fc_model"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "# Define a transform to normalize the data\n",
 40 |     "transform = transforms.Compose([transforms.ToTensor(),\n",
 41 |     "                                transforms.Normalize((0.5,), (0.5,))])\n",
 42 |     "# Download and load the training data\n",
 43 |     "trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)\n",
 44 |     "trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)\n",
 45 |     "\n",
 46 |     "# Download and load the test data\n",
 47 |     "testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)\n",
 48 |     "testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "Here we can see one of the images."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 12,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "data": {
 65 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdMAAAHTCAYAAAB8/vKtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAWJQAAFiUBSVIk8AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAADZJJREFUeJzt3VuPneV5gOFvPJ7xZsaRd8AUMBgcGxpSVapRZGIO2lCaqM1B0v62Kj+jpVKhKSrKQQ+bgxaixhsIpJINxcab8VizXdNfUAneG2Vp5Os6f3i+WTPLN9/Rs7C/vz8BAOMOzfsBAOCgE1MAiMQUACIxBYBITAEgElMAiMQUACIxBYBITAEgElMAiMQUACIxBYBITAEgElMAiMQUAKLD9T/wozf/1EFUAA60D/79PxfKvDdTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIgOz/sBAP4/CwsLw7P7+/vf4pN8M+W5p2m+z/7mD68Oz978+Gba/fkXXwzPHoqfeeXNFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWASEwBIBJTAIjEFAAiMQWAyAk2+BoO8kmt1y9fTvP/8etff0tP8s3N83Mr5vncP/mrH6f5V1+5NDy78Xgj7S4n2ObNmykARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkDknil8De2a6TSV65YvnDuXdl+6eDHNnzx5cnj2448/Sbs/+/1nw7Oz2Sztnqe/+/nPh2efe/a5tPuX778/PPvfv/1t2l3M5nz71pspAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRE2zwNSwsxCNs4TzUqVOn0urd3d00v/bM2vDshZdeTru3traGZ4+vrKTd//hP7wzPXvnBD9LuZ8Jn/sv3/zXtvnb9epp/UnkzBYBITAEgElMAiMQUACIxBYBITAEgElMAiMQUACIxBYBITAEgElMAiMQUACIxBYBITAEgElMAiNwzha9hbzab2+61tfHbltM0Tf/87rtpfuPx4+HZeov1jStXxnfHO65vv/XW8OzxY8fT7jt37gzPHuR7pOVu8H64Gfxt8GYKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkDkBBsHRjnPNE3zPdF06eLF4dlbt26l3eWEWnXv3r00/+577w3PvvzSS2n3j/78L4Zn92Z7affOznaaP6jmfUat8GYKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQuWfKN1Jvipbp2RxvHV787nfT/OLi4vDshx99lHZX5Xc+z/uUP/3rv0nzW9tbw7MPHz5Mu//hnXfS/EF15vTp4dl6O7fyZgoAkZgCQCSmABCJKQBEYgoAkZgCQCSmABCJKQBEYgoAkZgCQCSmABCJKQBEYgoAkZgCQOQE25wc1LNWdff8nnyaXr98eXj21u3bafetW7fS/DyVs3n1933mzJnh2fVHj9Lucm3wzp27cff48gsvv5x2P/3008Oz9UTj8tLy8Oz1mzfS7sqbKQBEYgoAkZgCQCSmABCJKQBEYgoAkZgCQCSmABCJKQBEYgoAkZgCQCSmABCJKQBEYgoAkZgCQJTvmdb7dfO8zck3V3/ff/uznw3P3rt3L+3+4Fe/SvNPqtkcv6PnX3xxeHZxsb0r7O7sDs8+++wfpd1X3/jh8OzO7k7a/eDBg+HZR/GG7NLy+D3TxxsbaXflzRQAIjEFgEhMASASUwCIxBQAIjEFgEhMASASUwCIxBQAIjEFgEhMASASUwCIxBQAIjEFgCifYHNCbcxB/dx+/Pbbab6ccFtbW0u7154Zn//8i8/T7ifV8tL4Sa1pmqbX/+zy8Oz6+nraff3mjeHZ++GM2TRNUzl0OJvN0u5iaWkpza+urAzP3r59O+2uvJkCQCSmABCJKQBEYgoAkZgCQCSmABCJKQBEYgoAkZgCQCSmABCJKQBEYgoAkZgCQCSmABCJKQBE+Z7pQVZua9Z7pGfPnBmevXDhQtp9586d4dljx46l3VtbW8Oz9+/fT7vfuHJlePa9f3kv7d4MP/e8/fGrrw7PvnLpUtr90W9+Mzxbb9Bubm4Oz54+fTrtPnx4/J/m8tzTNE07OzvDs/Xfh0OHxt/v6s9deTMFgEhMASASUwCIxBQAIjEFgEhMASASUwCIxBQAIjEFgEhMASASUwCIxBQAIjEFgEhMASASUwCInuh7pvN04sR3hmeffuqptPva9evDs+Xe4DRN0+bm+F3PU6fajcjZbG949urVq2n3v33wQZov3ozPvrqyMjx74+bNtHt7e3t4dnn5SNq9uro6PLuzs5t2b25uDM8ePryYdpfv+O7u+Hes7t7ba7srb6YAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAET5BFs9ydXO/bQzR/v7+2m+uPvV3eHZ+w8epN1PnT07PHv3q6/S7mvXrg3Pnjt3Lu3+/muvDc/Wv7UXwrO/cO6FtHuKf+a/+/TT4dnl5eW0u8wvLCyk3Ts7O8Oz9d+WxcX5veeUR6+f+dbW+InGefNmCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEOV7pqurq2n+lUuXhmc3Nh6n3eVe4e3bt9Lu5597fnj2T177ftr90vnzw7Obm5tp92vf+97w7N//4hdp9/kXzw/Prq2tpd3PPz/++364vp52b2+3G5Gz2Wx4tnzHpqndx6w3RettzmJvb/wzr7dQy/zxY8fS7s/+5/dpfp68mQJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEOUTbA8fPkzzt29/Pjx7+PBi2n0snAu6cOFC2r0/Gz8P9V8ffph2l7N5Z8+eSbvLiaa33/rLtPuTTz4Znl2Mf2vle1LPHB45spzm93b3hme38vm3ckatnWCrJ9yKo0ePDM/Ws3dF/czWY0/myZspAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAlO+ZVuXe4dGj30m7H4TbeQsLC2n34qHx/4/58s6Xaffe3vh9ynqvcGVlZXi23J+dpmlaWCj/79h+7uPHjw/P7u7upt31d1bmZ7NZ2l2Uv/Npmu/PXW6SLi+P30KdpnZLdW/WPvPNrXb/dp68mQJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEM39BNvdu3eHZ0+dPJl2nz51anh2K54KWlxcTPPzcuRIO++0Pxs/a7Ufz6AV5STWNLW/l6WlpbR7nqfIqnLKrH7HypnF+pkdCicat+Pf6tLueBY2Hm+k3QeZN1MAiMQUACIxBYBITAEgElMAiMQUACIxBYBITAEgElMAiMQUACIxBYBITAEgElMAiMQUACIxBYBo7vdMy73C6zdupN0nVleHZ0+FW6jTNE0rKyvDs0uH233LKdxpzDciF8Puw213uTFZf+7FQ+Pz9Z7p8vJymi/f0XrXc563VMtN0fxzh7u/R48eTbtPrJ4Ynv3dZ5+m3QeZN1MAiMQUACIxBYBITAEgElMAiMQUACIxBYBITAEgElMAiMQUACIxBYBITAEgElMAiMQUAKK5n2Cbp/VHj+YyW9VzYOVE0/Fjx9LupXAO7FA4HVeVM2R1fnt7O+3e3tlJ8+WcWP2NzfMEWzl9dyx+T5L4PfnfQ18Oz66vr6fdB5k3UwCIxBQAIjEFgEhMASASUwCIxBQAIjEFgEhMASASUwCIxBQAIjEFgEhMASASUwCIxBQAIjEFgOiJvmd6UO3t7aX5jY2NuczCk+L+gwfzfgT+wLyZAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkAkpgAQiSkARGIKAJGYAkC0sL+/P+9nAIADzZspAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJAJKYAEIkpAERiCgCRmAJA9H8ahSabBrOrHAAAAABJRU5ErkJggg==\n",
 66 |       "text/plain": [
 67 |        "<Figure size 432x288 with 1 Axes>"
 68 |       ]
 69 |      },
 70 |      "metadata": {
 71 |       "image/png": {
 72 |        "height": 233,
 73 |        "width": 233
 74 |       },
 75 |       "needs_background": "light"
 76 |      },
 77 |      "output_type": "display_data"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "image, label = next(iter(trainloader))\n",
 82 |     "helper.imshow(image[0,:]);"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "# Train a network\n",
 90 |     "\n",
 91 |     "To make things more concise here, I moved the model architecture and training code from the last part to a file called `fc_model`. Importing this, we can easily create a fully-connected network with `fc_model.Network`, and train the network using `fc_model.train`. I'll use this model (once it's trained) to demonstrate how we can save and load models."
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "# Create the network, define the criterion and optimizer\n",
101 |     "\n",
102 |     "model = fc_model.Network(784, 10, [512, 256, 128])\n",
103 |     "criterion = nn.NLLLoss()\n",
104 |     "optimizer = optim.Adam(model.parameters(), lr=0.001)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "fc_model.train(model, trainloader, testloader, criterion, optimizer, epochs=2)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## Saving and loading networks\n",
121 |     "\n",
122 |     "As you can imagine, it's impractical to train a network every time you need to use it. Instead, we can save trained networks then load them later to train more or use them for predictions.\n",
123 |     "\n",
124 |     "The parameters for PyTorch networks are stored in a model's `state_dict`. We can see the state dict contains the weight and bias matrices for each of our layers."
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "print(\"Our model: \\n\\n\", model, '\\n')\n",
134 |     "print(\"The state dict keys: \\n\\n\", model.state_dict().keys())"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "The simplest thing to do is simply save the state dict with `torch.save`. For example, we can save it to a file `'checkpoint.pth'`."
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "torch.save(model.state_dict(), 'checkpoint.pth')"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "Then we can load the state dict with `torch.load`."
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "state_dict = torch.load('checkpoint.pth')\n",
167 |     "print(state_dict.keys())"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "And to load the state dict in to the network, you do `model.load_state_dict(state_dict)`."
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {},
181 |    "outputs": [],
182 |    "source": [
183 |     "model.load_state_dict(state_dict)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "Seems pretty straightforward, but as usual it's a bit more complicated. Loading the state dict works only if the model architecture is exactly the same as the checkpoint architecture. If I create a model with a different architecture, this fails."
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "# Try this\n",
200 |     "model = fc_model.Network(784, 10, [400, 200, 100])\n",
201 |     "# This will throw an error because the tensor sizes are wrong!\n",
202 |     "model.load_state_dict(state_dict)"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "metadata": {},
208 |    "source": [
209 |     "This means we need to rebuild the model exactly as it was when trained. Information about the model architecture needs to be saved in the checkpoint, along with the state dict. To do this, you build a dictionary with all the information you need to compeletely rebuild the model."
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": null,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "checkpoint = {'input_size': 784,\n",
219 |     "              'output_size': 10,\n",
220 |     "              'hidden_layers': [each.out_features for each in model.hidden_layers],\n",
221 |     "              'state_dict': model.state_dict()}\n",
222 |     "\n",
223 |     "torch.save(checkpoint, 'checkpoint.pth')"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "markdown",
228 |    "metadata": {},
229 |    "source": [
230 |     "Now the checkpoint has all the necessary information to rebuild the trained model. You can easily make that a function if you want. Similarly, we can write a function to load checkpoints. "
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "def load_checkpoint(filepath):\n",
240 |     "    checkpoint = torch.load(filepath)\n",
241 |     "    model = fc_model.Network(checkpoint['input_size'],\n",
242 |     "                             checkpoint['output_size'],\n",
243 |     "                             checkpoint['hidden_layers'])\n",
244 |     "    model.load_state_dict(checkpoint['state_dict'])\n",
245 |     "    \n",
246 |     "    return model"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": [
255 |     "model = load_checkpoint('checkpoint.pth')\n",
256 |     "print(model)"
257 |    ]
258 |   }
259 |  ],
260 |  "metadata": {
261 |   "kernelspec": {
262 |    "display_name": "Python 3",
263 |    "language": "python",
264 |    "name": "python3"
265 |   },
266 |   "language_info": {
267 |    "codemirror_mode": {
268 |     "name": "ipython",
269 |     "version": 3
270 |    },
271 |    "file_extension": ".py",
272 |    "mimetype": "text/x-python",
273 |    "name": "python",
274 |    "nbconvert_exporter": "python",
275 |    "pygments_lexer": "ipython3",
276 |    "version": "3.6.6"
277 |   }
278 |  },
279 |  "nbformat": 4,
280 |  "nbformat_minor": 2
281 | }
282 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/Part 7 - Loading Image Data (Exercises).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Loading Image Data\n",
  8 |     "\n",
  9 |     "So far we've been working with fairly artificial datasets that you wouldn't typically be using in real projects. Instead, you'll likely be dealing with full-sized images like you'd get from smart phone cameras. In this notebook, we'll look at how to load images and use them to train neural networks.\n",
 10 |     "\n",
 11 |     "We'll be using a [dataset of cat and dog photos](https://www.kaggle.com/c/dogs-vs-cats) available from Kaggle. Here are a couple example images:\n",
 12 |     "\n",
 13 |     "<img src='assets/dog_cat.png'>\n",
 14 |     "\n",
 15 |     "We'll use this dataset to train a neural network that can differentiate between cats and dogs. These days it doesn't seem like a big accomplishment, but five years ago it was a serious challenge for computer vision systems."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "%matplotlib inline\n",
 25 |     "%config InlineBackend.figure_format = 'retina'\n",
 26 |     "\n",
 27 |     "import matplotlib.pyplot as plt\n",
 28 |     "\n",
 29 |     "import torch\n",
 30 |     "from torchvision import datasets, transforms\n",
 31 |     "\n",
 32 |     "import helper"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "The easiest way to load image data is with `datasets.ImageFolder` from `torchvision` ([documentation](http://pytorch.org/docs/master/torchvision/datasets.html#imagefolder)). In general you'll use `ImageFolder` like so:\n",
 40 |     "\n",
 41 |     "```python\n",
 42 |     "dataset = datasets.ImageFolder('path/to/data', transform=transform)\n",
 43 |     "```\n",
 44 |     "\n",
 45 |     "where `'path/to/data'` is the file path to the data directory and `transform` is a list of processing steps built with the [`transforms`](http://pytorch.org/docs/master/torchvision/transforms.html) module from `torchvision`. ImageFolder expects the files and directories to be constructed like so:\n",
 46 |     "```\n",
 47 |     "root/dog/xxx.png\n",
 48 |     "root/dog/xxy.png\n",
 49 |     "root/dog/xxz.png\n",
 50 |     "\n",
 51 |     "root/cat/123.png\n",
 52 |     "root/cat/nsdf3.png\n",
 53 |     "root/cat/asd932_.png\n",
 54 |     "```\n",
 55 |     "\n",
 56 |     "where each class has its own directory (`cat` and `dog`) for the images. The images are then labeled with the class taken from the directory name. So here, the image `123.png` would be loaded with the class label `cat`. You can download the dataset already structured like this [from here](https://s3.amazonaws.com/content.udacity-data.com/nd089/Cat_Dog_data.zip). I've also split it into a training set and test set.\n",
 57 |     "\n",
 58 |     "### Transforms\n",
 59 |     "\n",
 60 |     "When you load in the data with `ImageFolder`, you'll need to define some transforms. For example, the images are different sizes but we'll need them to all be the same size for training. You can either resize them with `transforms.Resize()` or crop with `transforms.CenterCrop()`, `transforms.RandomResizedCrop()`, etc. We'll also need to convert the images to PyTorch tensors with `transforms.ToTensor()`. Typically you'll combine these transforms into a pipeline with `transforms.Compose()`, which accepts a list of transforms and runs them in sequence. It looks something like this to scale, then crop, then convert to a tensor:\n",
 61 |     "\n",
 62 |     "```python\n",
 63 |     "transform = transforms.Compose([transforms.Resize(255),\n",
 64 |     "                                 transforms.CenterCrop(224),\n",
 65 |     "                                 transforms.ToTensor()])\n",
 66 |     "\n",
 67 |     "```\n",
 68 |     "\n",
 69 |     "There are plenty of transforms available, I'll cover more in a bit and you can read through the [documentation](http://pytorch.org/docs/master/torchvision/transforms.html). \n",
 70 |     "\n",
 71 |     "### Data Loaders\n",
 72 |     "\n",
 73 |     "With the `ImageFolder` loaded, you have to pass it to a [`DataLoader`](http://pytorch.org/docs/master/data.html#torch.utils.data.DataLoader). The `DataLoader` takes a dataset (such as you would get from `ImageFolder`) and returns batches of images and the corresponding labels. You can set various parameters like the batch size and if the data is shuffled after each epoch.\n",
 74 |     "\n",
 75 |     "```python\n",
 76 |     "dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)\n",
 77 |     "```\n",
 78 |     "\n",
 79 |     "Here `dataloader` is a [generator](https://jeffknupp.com/blog/2013/04/07/improve-your-python-yield-and-generators-explained/). To get data out of it, you need to loop through it or convert it to an iterator and call `next()`.\n",
 80 |     "\n",
 81 |     "```python\n",
 82 |     "# Looping through it, get a batch on each loop \n",
 83 |     "for images, labels in dataloader:\n",
 84 |     "    pass\n",
 85 |     "\n",
 86 |     "# Get one batch\n",
 87 |     "images, labels = next(iter(dataloader))\n",
 88 |     "```\n",
 89 |     " \n",
 90 |     ">**Exercise:** Load images from the `Cat_Dog_data/train` folder, define a few transforms, then build the dataloader."
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "data_dir = 'Cat_Dog_data/train'\n",
100 |     "\n",
101 |     "transform = # TODO: compose transforms here\n",
102 |     "dataset = # TODO: create the ImageFolder\n",
103 |     "dataloader = # TODO: use the ImageFolder dataset to create the DataLoader"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "# Run this to test your data loader\n",
113 |     "images, labels = next(iter(dataloader))\n",
114 |     "helper.imshow(images[0], normalize=False)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "If you loaded the data correctly, you should see something like this (your image will be different):\n",
122 |     "\n",
123 |     "<img src='assets/cat_cropped.png' width=244>"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "## Data Augmentation\n",
131 |     "\n",
132 |     "A common strategy for training neural networks is to introduce randomness in the input data itself. For example, you can randomly rotate, mirror, scale, and/or crop your images during training. This will help your network generalize as it's seeing the same images but in different locations, with different sizes, in different orientations, etc.\n",
133 |     "\n",
134 |     "To randomly rotate, scale and crop, then flip your images you would define your transforms like this:\n",
135 |     "\n",
136 |     "```python\n",
137 |     "train_transforms = transforms.Compose([transforms.RandomRotation(30),\n",
138 |     "                                       transforms.RandomResizedCrop(224),\n",
139 |     "                                       transforms.RandomHorizontalFlip(),\n",
140 |     "                                       transforms.ToTensor(),\n",
141 |     "                                       transforms.Normalize([0.5, 0.5, 0.5], \n",
142 |     "                                                            [0.5, 0.5, 0.5])])\n",
143 |     "```\n",
144 |     "\n",
145 |     "You'll also typically want to normalize images with `transforms.Normalize`. You pass in a list of means and list of standard deviations, then the color channels are normalized like so\n",
146 |     "\n",
147 |     "```input[channel] = (input[channel] - mean[channel]) / std[channel]```\n",
148 |     "\n",
149 |     "Subtracting `mean` centers the data around zero and dividing by `std` squishes the values to be between -1 and 1. Normalizing helps keep the network work weights near zero which in turn makes backpropagation more stable. Without normalization, networks will tend to fail to learn.\n",
150 |     "\n",
151 |     "You can find a list of all [the available transforms here](http://pytorch.org/docs/0.3.0/torchvision/transforms.html). When you're testing however, you'll want to use images that aren't altered (except you'll need to normalize the same way). So, for validation/test images, you'll typically just resize and crop.\n",
152 |     "\n",
153 |     ">**Exercise:** Define transforms for training data and testing data below. Leave off normalization for now."
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "data_dir = 'Cat_Dog_data'\n",
163 |     "\n",
164 |     "# TODO: Define transforms for the training data and testing data\n",
165 |     "train_transforms = \n",
166 |     "\n",
167 |     "test_transforms = \n",
168 |     "\n",
169 |     "\n",
170 |     "# Pass transforms in here, then run the next cell to see how the transforms look\n",
171 |     "train_data = datasets.ImageFolder(data_dir + '/train', transform=train_transforms)\n",
172 |     "test_data = datasets.ImageFolder(data_dir + '/test', transform=test_transforms)\n",
173 |     "\n",
174 |     "trainloader = torch.utils.data.DataLoader(train_data, batch_size=32)\n",
175 |     "testloader = torch.utils.data.DataLoader(test_data, batch_size=32)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "# change this to the trainloader or testloader \n",
185 |     "data_iter = iter(testloader)\n",
186 |     "\n",
187 |     "images, labels = next(data_iter)\n",
188 |     "fig, axes = plt.subplots(figsize=(10,4), ncols=4)\n",
189 |     "for ii in range(4):\n",
190 |     "    ax = axes[ii]\n",
191 |     "    helper.imshow(images[ii], ax=ax, normalize=False)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "Your transformed images should look something like this.\n",
199 |     "\n",
200 |     "<center>Training examples:</center>\n",
201 |     "<img src='assets/train_examples.png' width=500px>\n",
202 |     "\n",
203 |     "<center>Testing examples:</center>\n",
204 |     "<img src='assets/test_examples.png' width=500px>"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "At this point you should be able to load data for training and testing. Now, you should try building a network that can classify cats vs dogs. This is quite a bit more complicated than before with the MNIST and Fashion-MNIST datasets. To be honest, you probably won't get it to work with a fully-connected network, no matter how deep. These images have three color channels and at a higher resolution (so far you've seen 28x28 images which are tiny).\n",
212 |     "\n",
213 |     "In the next part, I'll show you how to use a pre-trained network to build a model that can actually solve this problem."
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "metadata": {},
220 |    "outputs": [],
221 |    "source": [
222 |     "# Optional TODO: Attempt to build a network to classify cats vs dogs from this dataset"
223 |    ]
224 |   }
225 |  ],
226 |  "metadata": {
227 |   "kernelspec": {
228 |    "display_name": "Python 3",
229 |    "language": "python",
230 |    "name": "python3"
231 |   },
232 |   "language_info": {
233 |    "codemirror_mode": {
234 |     "name": "ipython",
235 |     "version": 3
236 |    },
237 |    "file_extension": ".py",
238 |    "mimetype": "text/x-python",
239 |    "name": "python",
240 |    "nbconvert_exporter": "python",
241 |    "pygments_lexer": "ipython3",
242 |    "version": "3.6.6"
243 |   }
244 |  },
245 |  "nbformat": 4,
246 |  "nbformat_minor": 2
247 | }
248 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/Part 7 - Loading Image Data (Solution).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Loading Image Data\n",
  8 |     "\n",
  9 |     "So far we've been working with fairly artificial datasets that you wouldn't typically be using in real projects. Instead, you'll likely be dealing with full-sized images like you'd get from smart phone cameras. In this notebook, we'll look at how to load images and use them to train neural networks.\n",
 10 |     "\n",
 11 |     "We'll be using a [dataset of cat and dog photos](https://www.kaggle.com/c/dogs-vs-cats) available from Kaggle. Here are a couple example images:\n",
 12 |     "\n",
 13 |     "<img src='assets/dog_cat.png'>\n",
 14 |     "\n",
 15 |     "We'll use this dataset to train a neural network that can differentiate between cats and dogs. These days it doesn't seem like a big accomplishment, but five years ago it was a serious challenge for computer vision systems."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "%matplotlib inline\n",
 25 |     "%config InlineBackend.figure_format = 'retina'\n",
 26 |     "\n",
 27 |     "import matplotlib.pyplot as plt\n",
 28 |     "\n",
 29 |     "import torch\n",
 30 |     "from torchvision import datasets, transforms\n",
 31 |     "\n",
 32 |     "import helper"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "The easiest way to load image data is with `datasets.ImageFolder` from `torchvision` ([documentation](http://pytorch.org/docs/master/torchvision/datasets.html#imagefolder)). In general you'll use `ImageFolder` like so:\n",
 40 |     "\n",
 41 |     "```python\n",
 42 |     "dataset = datasets.ImageFolder('path/to/data', transform=transform)\n",
 43 |     "```\n",
 44 |     "\n",
 45 |     "where `'path/to/data'` is the file path to the data directory and `transform` is a sequence of processing steps built with the [`transforms`](http://pytorch.org/docs/master/torchvision/transforms.html) module from `torchvision`. ImageFolder expects the files and directories to be constructed like so:\n",
 46 |     "```\n",
 47 |     "root/dog/xxx.png\n",
 48 |     "root/dog/xxy.png\n",
 49 |     "root/dog/xxz.png\n",
 50 |     "\n",
 51 |     "root/cat/123.png\n",
 52 |     "root/cat/nsdf3.png\n",
 53 |     "root/cat/asd932_.png\n",
 54 |     "```\n",
 55 |     "\n",
 56 |     "where each class has its own directory (`cat` and `dog`) for the images. The images are then labeled with the class taken from the directory name. So here, the image `123.png` would be loaded with the class label `cat`. You can download the dataset already structured like this [from here](https://s3.amazonaws.com/content.udacity-data.com/nd089/Cat_Dog_data.zip). I've also split it into a training set and test set.\n",
 57 |     "\n",
 58 |     "### Transforms\n",
 59 |     "\n",
 60 |     "When you load in the data with `ImageFolder`, you'll need to define some transforms. For example, the images are different sizes but we'll need them to all be the same size for training. You can either resize them with `transforms.Resize()` or crop with `transforms.CenterCrop()`, `transforms.RandomResizedCrop()`, etc. We'll also need to convert the images to PyTorch tensors with `transforms.ToTensor()`. Typically you'll combine these transforms into a pipeline with `transforms.Compose()`, which accepts a list of transforms and runs them in sequence. It looks something like this to scale, then crop, then convert to a tensor:\n",
 61 |     "\n",
 62 |     "```python\n",
 63 |     "transform = transforms.Compose([transforms.Resize(255),\n",
 64 |     "                                transforms.CenterCrop(224),\n",
 65 |     "                                transforms.ToTensor()])\n",
 66 |     "\n",
 67 |     "```\n",
 68 |     "\n",
 69 |     "There are plenty of transforms available, I'll cover more in a bit and you can read through the [documentation](http://pytorch.org/docs/master/torchvision/transforms.html). \n",
 70 |     "\n",
 71 |     "### Data Loaders\n",
 72 |     "\n",
 73 |     "With the `ImageFolder` loaded, you have to pass it to a [`DataLoader`](http://pytorch.org/docs/master/data.html#torch.utils.data.DataLoader). The `DataLoader` takes a dataset (such as you would get from `ImageFolder`) and returns batches of images and the corresponding labels. You can set various parameters like the batch size and if the data is shuffled after each epoch.\n",
 74 |     "\n",
 75 |     "```python\n",
 76 |     "dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)\n",
 77 |     "```\n",
 78 |     "\n",
 79 |     "Here `dataloader` is a [generator](https://jeffknupp.com/blog/2013/04/07/improve-your-python-yield-and-generators-explained/). To get data out of it, you need to loop through it or convert it to an iterator and call `next()`.\n",
 80 |     "\n",
 81 |     "```python\n",
 82 |     "# Looping through it, get a batch on each loop \n",
 83 |     "for images, labels in dataloader:\n",
 84 |     "    pass\n",
 85 |     "\n",
 86 |     "# Get one batch\n",
 87 |     "images, labels = next(iter(dataloader))\n",
 88 |     "```\n",
 89 |     " \n",
 90 |     ">**Exercise:** Load images from the `Cat_Dog_data/train` folder, define a few transforms, then build the dataloader."
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "data_dir = 'Cat_Dog_data/train'\n",
100 |     "\n",
101 |     "transform = transforms.Compose([transforms.Resize(255),\n",
102 |     "                                transforms.CenterCrop(224),\n",
103 |     "                                transforms.ToTensor()])\n",
104 |     "dataset = datasets.ImageFolder(data_dir, transform=transform)\n",
105 |     "dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "# Run this to test your data loader\n",
115 |     "images, labels = next(iter(dataloader))\n",
116 |     "helper.imshow(images[0], normalize=False)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "If you loaded the data correctly, you should see something like this (your image will be different):\n",
124 |     "\n",
125 |     "<img src='assets/cat_cropped.png' width=244>"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "## Data Augmentation\n",
133 |     "\n",
134 |     "A common strategy for training neural networks is to introduce randomness in the input data itself. For example, you can randomly rotate, mirror, scale, and/or crop your images during training. This will help your network generalize as it's seeing the same images but in different locations, with different sizes, in different orientations, etc.\n",
135 |     "\n",
136 |     "To randomly rotate, scale and crop, then flip your images you would define your transforms like this:\n",
137 |     "\n",
138 |     "```python\n",
139 |     "train_transforms = transforms.Compose([transforms.RandomRotation(30),\n",
140 |     "                                       transforms.RandomResizedCrop(224),\n",
141 |     "                                       transforms.RandomHorizontalFlip(),\n",
142 |     "                                       transforms.ToTensor(),\n",
143 |     "                                       transforms.Normalize([0.5, 0.5, 0.5], \n",
144 |     "                                                            [0.5, 0.5, 0.5])])\n",
145 |     "```\n",
146 |     "\n",
147 |     "You'll also typically want to normalize images with `transforms.Normalize`. You pass in a list of means and list of standard deviations, then the color channels are normalized like so\n",
148 |     "\n",
149 |     "```input[channel] = (input[channel] - mean[channel]) / std[channel]```\n",
150 |     "\n",
151 |     "Subtracting `mean` centers the data around zero and dividing by `std` squishes the values to be between -1 and 1. Normalizing helps keep the network weights near zero which in turn makes backpropagation more stable. Without normalization, networks will tend to fail to learn.\n",
152 |     "\n",
153 |     "You can find a list of all [the available transforms here](http://pytorch.org/docs/0.3.0/torchvision/transforms.html). When you're testing however, you'll want to use images that aren't altered other than normalizing. So, for validation/test images, you'll typically just resize and crop.\n",
154 |     "\n",
155 |     ">**Exercise:** Define transforms for training data and testing data below. Leave off normalization for now."
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "data_dir = 'Cat_Dog_data'\n",
165 |     "\n",
166 |     "# TODO: Define transforms for the training data and testing data\n",
167 |     "train_transforms = transforms.Compose([transforms.RandomRotation(30),\n",
168 |     "                                       transforms.RandomResizedCrop(224),\n",
169 |     "                                       transforms.RandomHorizontalFlip(),\n",
170 |     "                                       transforms.ToTensor()]) \n",
171 |     "\n",
172 |     "test_transforms = transforms.Compose([transforms.Resize(255),\n",
173 |     "                                      transforms.CenterCrop(224),\n",
174 |     "                                      transforms.ToTensor()])\n",
175 |     "\n",
176 |     "\n",
177 |     "# Pass transforms in here, then run the next cell to see how the transforms look\n",
178 |     "train_data = datasets.ImageFolder(data_dir + '/train', transform=train_transforms)\n",
179 |     "test_data = datasets.ImageFolder(data_dir + '/test', transform=test_transforms)\n",
180 |     "\n",
181 |     "trainloader = torch.utils.data.DataLoader(train_data, batch_size=32)\n",
182 |     "testloader = torch.utils.data.DataLoader(test_data, batch_size=32)"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "# change this to the trainloader or testloader \n",
192 |     "data_iter = iter(testloader)\n",
193 |     "\n",
194 |     "images, labels = next(data_iter)\n",
195 |     "fig, axes = plt.subplots(figsize=(10,4), ncols=4)\n",
196 |     "for ii in range(4):\n",
197 |     "    ax = axes[ii]\n",
198 |     "    helper.imshow(images[ii], ax=ax, normalize=False)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "markdown",
203 |    "metadata": {},
204 |    "source": [
205 |     "Your transformed images should look something like this.\n",
206 |     "\n",
207 |     "<center>Training examples:</center>\n",
208 |     "<img src='assets/train_examples.png' width=500px>\n",
209 |     "\n",
210 |     "<center>Testing examples:</center>\n",
211 |     "<img src='assets/test_examples.png' width=500px>"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "At this point you should be able to load data for training and testing. Now, you should try building a network that can classify cats vs dogs. This is quite a bit more complicated than before with the MNIST and Fashion-MNIST datasets. To be honest, you probably won't get it to work with a fully-connected network, no matter how deep. These images have three color channels and at a higher resolution (so far you've seen 28x28 images which are tiny).\n",
219 |     "\n",
220 |     "In the next part, I'll show you how to use a pre-trained network to build a model that can actually solve this problem."
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "# Optional TODO: Attempt to build a network to classify cats vs dogs from this dataset"
230 |    ]
231 |   }
232 |  ],
233 |  "metadata": {
234 |   "kernelspec": {
235 |    "display_name": "Python 3",
236 |    "language": "python",
237 |    "name": "python3"
238 |   },
239 |   "language_info": {
240 |    "codemirror_mode": {
241 |     "name": "ipython",
242 |     "version": 3
243 |    },
244 |    "file_extension": ".py",
245 |    "mimetype": "text/x-python",
246 |    "name": "python",
247 |    "nbconvert_exporter": "python",
248 |    "pygments_lexer": "ipython3",
249 |    "version": "3.6.6"
250 |   }
251 |  },
252 |  "nbformat": 4,
253 |  "nbformat_minor": 2
254 | }
255 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/Part 8 - Transfer Learning (Exercises).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Transfer Learning\n",
  8 |     "\n",
  9 |     "In this notebook, you'll learn how to use pre-trained networks to solved challenging problems in computer vision. Specifically, you'll use networks trained on [ImageNet](http://www.image-net.org/) [available from torchvision](http://pytorch.org/docs/0.3.0/torchvision/models.html). \n",
 10 |     "\n",
 11 |     "ImageNet is a massive dataset with over 1 million labeled images in 1000 categories. It's used to train deep neural networks using an architecture called convolutional layers. I'm not going to get into the details of convolutional networks here, but if you want to learn more about them, please [watch this](https://www.youtube.com/watch?v=2-Ol7ZB0MmU).\n",
 12 |     "\n",
 13 |     "Once trained, these models work astonishingly well as feature detectors for images they weren't trained on. Using a pre-trained network on images not in the training set is called transfer learning. Here we'll use transfer learning to train a network that can classify our cat and dog photos with near perfect accuracy.\n",
 14 |     "\n",
 15 |     "With `torchvision.models` you can download these pre-trained networks and use them in your applications. We'll include `models` in our imports now."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "%matplotlib inline\n",
 25 |     "%config InlineBackend.figure_format = 'retina'\n",
 26 |     "\n",
 27 |     "import matplotlib.pyplot as plt\n",
 28 |     "\n",
 29 |     "import torch\n",
 30 |     "from torch import nn\n",
 31 |     "from torch import optim\n",
 32 |     "import torch.nn.functional as F\n",
 33 |     "from torchvision import datasets, transforms, models"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "Most of the pretrained models require the input to be 224x224 images. Also, we'll need to match the normalization used when the models were trained. Each color channel was normalized separately, the means are `[0.485, 0.456, 0.406]` and the standard deviations are `[0.229, 0.224, 0.225]`."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "data_dir = 'assets/Cat_Dog_data'\n",
 50 |     "\n",
 51 |     "# TODO: Define transforms for the training data and testing data\n",
 52 |     "train_transforms =\n",
 53 |     "\n",
 54 |     "test_transforms =\n",
 55 |     "\n",
 56 |     "# Pass transforms in here, then run the next cell to see how the transforms look\n",
 57 |     "train_data = datasets.ImageFolder(data_dir + '/train', transform=train_transforms)\n",
 58 |     "test_data = datasets.ImageFolder(data_dir + '/test', transform=test_transforms)\n",
 59 |     "\n",
 60 |     "trainloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)\n",
 61 |     "testloader = torch.utils.data.DataLoader(test_data, batch_size=64)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "We can load in a model such as [DenseNet](http://pytorch.org/docs/0.3.0/torchvision/models.html#id5). Let's print out the model architecture so we can see what's going on."
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {
 75 |     "scrolled": true
 76 |    },
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "model = models.densenet121(pretrained=True)\n",
 80 |     "model"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "This model is built out of two main parts, the features and the classifier. The features part is a stack of convolutional layers and overall works as a feature detector that can be fed into a classifier. The classifier part is a single fully-connected layer `(classifier): Linear(in_features=1024, out_features=1000)`. This layer was trained on the ImageNet dataset, so it won't work for our specific problem. That means we need to replace the classifier, but the features will work perfectly on their own. In general, I think about pre-trained networks as amazingly good feature detectors that can be used as the input for simple feed-forward classifiers."
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "# Freeze parameters so we don't backprop through them\n",
 97 |     "for param in model.parameters():\n",
 98 |     "    param.requires_grad = False\n",
 99 |     "\n",
100 |     "from collections import OrderedDict\n",
101 |     "classifier = nn.Sequential(OrderedDict([\n",
102 |     "                          ('fc1', nn.Linear(1024, 500)),\n",
103 |     "                          ('relu', nn.ReLU()),\n",
104 |     "                          ('fc2', nn.Linear(500, 2)),\n",
105 |     "                          ('output', nn.LogSoftmax(dim=1))\n",
106 |     "                          ]))\n",
107 |     "    \n",
108 |     "model.classifier = classifier"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "With our model built, we need to train the classifier. However, now we're using a **really deep** neural network. If you try to train this on a CPU like normal, it will take a long, long time. Instead, we're going to use the GPU to do the calculations. The linear algebra computations are done in parallel on the GPU leading to 100x increased training speeds. It's also possible to train on multiple GPUs, further decreasing training time.\n",
116 |     "\n",
117 |     "PyTorch, along with pretty much every other deep learning framework, uses [CUDA](https://developer.nvidia.com/cuda-zone) to efficiently compute the forward and backwards passes on the GPU. In PyTorch, you move your model parameters and other tensors to the GPU memory using `model.to('cuda')`. You can move them back from the GPU with `model.to('cpu')` which you'll commonly do when you need to operate on the network output outside of PyTorch. As a demonstration of the increased speed, I'll compare how long it takes to perform a forward and backward pass with and without a GPU."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "import time"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "for device in ['cpu', 'cuda']:\n",
136 |     "\n",
137 |     "    criterion = nn.NLLLoss()\n",
138 |     "    # Only train the classifier parameters, feature parameters are frozen\n",
139 |     "    optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)\n",
140 |     "\n",
141 |     "    model.to(device)\n",
142 |     "\n",
143 |     "    for ii, (inputs, labels) in enumerate(trainloader):\n",
144 |     "\n",
145 |     "        # Move input and label tensors to the GPU\n",
146 |     "        inputs, labels = inputs.to(device), labels.to(device)\n",
147 |     "\n",
148 |     "        start = time.time()\n",
149 |     "\n",
150 |     "        outputs = model.forward(inputs)\n",
151 |     "        loss = criterion(outputs, labels)\n",
152 |     "\n",
153 |     "        optimizer.zero_grad()\n",
154 |     "        loss.backward()\n",
155 |     "        optimizer.step()\n",
156 |     "\n",
157 |     "        if ii==3:\n",
158 |     "            break\n",
159 |     "        \n",
160 |     "    print(f\"Device = {device}; Time per batch: {(time.time() - start)/3:.3f} seconds\")"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "You can write device agnostic code which will automatically use CUDA if it's enabled like so:\n",
168 |     "```python\n",
169 |     "# at beginning of the script\n",
170 |     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
171 |     "\n",
172 |     "...\n",
173 |     "\n",
174 |     "# then whenever you get a new Tensor or Module\n",
175 |     "# this won't copy if they are already on the desired device\n",
176 |     "input = data.to(device)\n",
177 |     "model = MyModule(...).to(device)\n",
178 |     "```\n",
179 |     "\n",
180 |     "From here, I'll let you finish training the model. The process is the same as before except now your model is much more powerful. You should get better than 95% accuracy easily.\n",
181 |     "\n",
182 |     ">**Exercise:** Train a pretrained models to classify the cat and dog images. Continue with the DenseNet model, or try ResNet, it's also a good model to try out first. Make sure you are only training the classifier and the parameters for the features part are frozen."
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "## TODO: Use a pretrained model to classify the cat and dog images"
192 |    ]
193 |   }
194 |  ],
195 |  "metadata": {
196 |   "kernelspec": {
197 |    "display_name": "Python 3",
198 |    "language": "python",
199 |    "name": "python3"
200 |   },
201 |   "language_info": {
202 |    "codemirror_mode": {
203 |     "name": "ipython",
204 |     "version": 3
205 |    },
206 |    "file_extension": ".py",
207 |    "mimetype": "text/x-python",
208 |    "name": "python",
209 |    "nbconvert_exporter": "python",
210 |    "pygments_lexer": "ipython3",
211 |    "version": "3.6.6"
212 |   }
213 |  },
214 |  "nbformat": 4,
215 |  "nbformat_minor": 2
216 | }
217 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/Part 8 - Transfer Learning (Solution).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Transfer Learning\n",
  8 |     "\n",
  9 |     "In this notebook, you'll learn how to use pre-trained networks to solved challenging problems in computer vision. Specifically, you'll use networks trained on [ImageNet](http://www.image-net.org/) [available from torchvision](http://pytorch.org/docs/0.3.0/torchvision/models.html). \n",
 10 |     "\n",
 11 |     "ImageNet is a massive dataset with over 1 million labeled images in 1000 categories. It's used to train deep neural networks using an architecture called convolutional layers. I'm not going to get into the details of convolutional networks here, but if you want to learn more about them, please [watch this](https://www.youtube.com/watch?v=2-Ol7ZB0MmU).\n",
 12 |     "\n",
 13 |     "Once trained, these models work astonishingly well as feature detectors for images they weren't trained on. Using a pre-trained network on images not in the training set is called transfer learning. Here we'll use transfer learning to train a network that can classify our cat and dog photos with near perfect accuracy.\n",
 14 |     "\n",
 15 |     "With `torchvision.models` you can download these pre-trained networks and use them in your applications. We'll include `models` in our imports now."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "%matplotlib inline\n",
 25 |     "%config InlineBackend.figure_format = 'retina'\n",
 26 |     "\n",
 27 |     "import matplotlib.pyplot as plt\n",
 28 |     "\n",
 29 |     "import torch\n",
 30 |     "from torch import nn\n",
 31 |     "from torch import optim\n",
 32 |     "import torch.nn.functional as F\n",
 33 |     "from torchvision import datasets, transforms, models"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "Most of the pretrained models require the input to be 224x224 images. Also, we'll need to match the normalization used when the models were trained. Each color channel was normalized separately, the means are `[0.485, 0.456, 0.406]` and the standard deviations are `[0.229, 0.224, 0.225]`."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "data_dir = 'Cat_Dog_data'\n",
 50 |     "\n",
 51 |     "# TODO: Define transforms for the training data and testing data\n",
 52 |     "train_transforms = transforms.Compose([transforms.RandomRotation(30),\n",
 53 |     "                                       transforms.RandomResizedCrop(224),\n",
 54 |     "                                       transforms.RandomHorizontalFlip(),\n",
 55 |     "                                       transforms.ToTensor(),\n",
 56 |     "                                       transforms.Normalize([0.485, 0.456, 0.406],\n",
 57 |     "                                                            [0.229, 0.224, 0.225])])\n",
 58 |     "\n",
 59 |     "test_transforms = transforms.Compose([transforms.Resize(255),\n",
 60 |     "                                      transforms.CenterCrop(224),\n",
 61 |     "                                      transforms.ToTensor(),\n",
 62 |     "                                      transforms.Normalize([0.485, 0.456, 0.406],\n",
 63 |     "                                                           [0.229, 0.224, 0.225])])\n",
 64 |     "\n",
 65 |     "# Pass transforms in here, then run the next cell to see how the transforms look\n",
 66 |     "train_data = datasets.ImageFolder(data_dir + '/train', transform=train_transforms)\n",
 67 |     "test_data = datasets.ImageFolder(data_dir + '/test', transform=test_transforms)\n",
 68 |     "\n",
 69 |     "trainloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)\n",
 70 |     "testloader = torch.utils.data.DataLoader(test_data, batch_size=64)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "We can load in a model such as [DenseNet](http://pytorch.org/docs/0.3.0/torchvision/models.html#id5). Let's print out the model architecture so we can see what's going on."
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {
 84 |     "scrolled": true
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "model = models.densenet121(pretrained=True)\n",
 89 |     "model"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "This model is built out of two main parts, the features and the classifier. The features part is a stack of convolutional layers and overall works as a feature detector that can be fed into a classifier. The classifier part is a single fully-connected layer `(classifier): Linear(in_features=1024, out_features=1000)`. This layer was trained on the ImageNet dataset, so it won't work for our specific problem. That means we need to replace the classifier, but the features will work perfectly on their own. In general, I think about pre-trained networks as amazingly good feature detectors that can be used as the input for simple feed-forward classifiers."
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "# Freeze parameters so we don't backprop through them\n",
106 |     "for param in model.parameters():\n",
107 |     "    param.requires_grad = False\n",
108 |     "\n",
109 |     "from collections import OrderedDict\n",
110 |     "classifier = nn.Sequential(OrderedDict([\n",
111 |     "                          ('fc1', nn.Linear(1024, 500)),\n",
112 |     "                          ('relu', nn.ReLU()),\n",
113 |     "                          ('fc2', nn.Linear(500, 2)),\n",
114 |     "                          ('output', nn.LogSoftmax(dim=1))\n",
115 |     "                          ]))\n",
116 |     "    \n",
117 |     "model.classifier = classifier"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "With our model built, we need to train the classifier. However, now we're using a **really deep** neural network. If you try to train this on a CPU like normal, it will take a long, long time. Instead, we're going to use the GPU to do the calculations. The linear algebra computations are done in parallel on the GPU leading to 100x increased training speeds. It's also possible to train on multiple GPUs, further decreasing training time.\n",
125 |     "\n",
126 |     "PyTorch, along with pretty much every other deep learning framework, uses [CUDA](https://developer.nvidia.com/cuda-zone) to efficiently compute the forward and backwards passes on the GPU. In PyTorch, you move your model parameters and other tensors to the GPU memory using `model.to('cuda')`. You can move them back from the GPU with `model.to('cpu')` which you'll commonly do when you need to operate on the network output outside of PyTorch. As a demonstration of the increased speed, I'll compare how long it takes to perform a forward and backward pass with and without a GPU."
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "import time"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "for device in ['cpu', 'cuda']:\n",
145 |     "\n",
146 |     "    criterion = nn.NLLLoss()\n",
147 |     "    # Only train the classifier parameters, feature parameters are frozen\n",
148 |     "    optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)\n",
149 |     "\n",
150 |     "    model.to(device)\n",
151 |     "\n",
152 |     "    for ii, (inputs, labels) in enumerate(trainloader):\n",
153 |     "\n",
154 |     "        # Move input and label tensors to the GPU\n",
155 |     "        inputs, labels = inputs.to(device), labels.to(device)\n",
156 |     "\n",
157 |     "        start = time.time()\n",
158 |     "\n",
159 |     "        outputs = model.forward(inputs)\n",
160 |     "        loss = criterion(outputs, labels)\n",
161 |     "\n",
162 |     "        optimizer.zero_grad()\n",
163 |     "        loss.backward()\n",
164 |     "        optimizer.step()\n",
165 |     "\n",
166 |     "        if ii==3:\n",
167 |     "            break\n",
168 |     "        \n",
169 |     "    print(f\"Device = {device}; Time per batch: {(time.time() - start)/3:.3f} seconds\")"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "You can write device agnostic code which will automatically use CUDA if it's enabled like so:\n",
177 |     "```python\n",
178 |     "# at beginning of the script\n",
179 |     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
180 |     "\n",
181 |     "...\n",
182 |     "\n",
183 |     "# then whenever you get a new Tensor or Module\n",
184 |     "# this won't copy if they are already on the desired device\n",
185 |     "input = data.to(device)\n",
186 |     "model = MyModule(...).to(device)\n",
187 |     "```\n",
188 |     "\n",
189 |     "From here, I'll let you finish training the model. The process is the same as before except now your model is much more powerful. You should get better than 95% accuracy easily.\n",
190 |     "\n",
191 |     ">**Exercise:** Train a pretrained models to classify the cat and dog images. Continue with the DenseNet model, or try ResNet, it's also a good model to try out first. Make sure you are only training the classifier and the parameters for the features part are frozen."
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "# Use GPU if it's available\n",
201 |     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
202 |     "\n",
203 |     "model = models.densenet121(pretrained=True)\n",
204 |     "\n",
205 |     "# Freeze parameters so we don't backprop through them\n",
206 |     "for param in model.parameters():\n",
207 |     "    param.requires_grad = False\n",
208 |     "    \n",
209 |     "model.classifier = nn.Sequential(nn.Linear(1024, 256),\n",
210 |     "                                 nn.ReLU(),\n",
211 |     "                                 nn.Dropout(0.2),\n",
212 |     "                                 nn.Linear(256, 2),\n",
213 |     "                                 nn.LogSoftmax(dim=1))\n",
214 |     "\n",
215 |     "criterion = nn.NLLLoss()\n",
216 |     "\n",
217 |     "# Only train the classifier parameters, feature parameters are frozen\n",
218 |     "optimizer = optim.Adam(model.classifier.parameters(), lr=0.003)\n",
219 |     "\n",
220 |     "model.to(device);"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "epochs = 1\n",
230 |     "steps = 0\n",
231 |     "running_loss = 0\n",
232 |     "print_every = 5\n",
233 |     "for epoch in range(epochs):\n",
234 |     "    for inputs, labels in trainloader:\n",
235 |     "        steps += 1\n",
236 |     "        # Move input and label tensors to the default device\n",
237 |     "        inputs, labels = inputs.to(device), labels.to(device)\n",
238 |     "        \n",
239 |     "        logps = model.forward(inputs)\n",
240 |     "        loss = criterion(logps, labels)\n",
241 |     "        \n",
242 |     "        optimizer.zero_grad()\n",
243 |     "        loss.backward()\n",
244 |     "        optimizer.step()\n",
245 |     "\n",
246 |     "        running_loss += loss.item()\n",
247 |     "        \n",
248 |     "        if steps % print_every == 0:\n",
249 |     "            test_loss = 0\n",
250 |     "            accuracy = 0\n",
251 |     "            model.eval()\n",
252 |     "            with torch.no_grad():\n",
253 |     "                for inputs, labels in testloader:\n",
254 |     "                    inputs, labels = inputs.to(device), labels.to(device)\n",
255 |     "                    logps = model.forward(inputs)\n",
256 |     "                    batch_loss = criterion(logps, labels)\n",
257 |     "                    \n",
258 |     "                    test_loss += batch_loss.item()\n",
259 |     "                    \n",
260 |     "                    # Calculate accuracy\n",
261 |     "                    ps = torch.exp(logps)\n",
262 |     "                    top_p, top_class = ps.topk(1, dim=1)\n",
263 |     "                    equals = top_class == labels.view(*top_class.shape)\n",
264 |     "                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()\n",
265 |     "                    \n",
266 |     "            print(f\"Epoch {epoch+1}/{epochs}.. \"\n",
267 |     "                  f\"Train loss: {running_loss/print_every:.3f}.. \"\n",
268 |     "                  f\"Test loss: {test_loss/len(testloader):.3f}.. \"\n",
269 |     "                  f\"Test accuracy: {accuracy/len(testloader):.3f}\")\n",
270 |     "            running_loss = 0\n",
271 |     "            model.train()"
272 |    ]
273 |   }
274 |  ],
275 |  "metadata": {
276 |   "kernelspec": {
277 |    "display_name": "Python 3",
278 |    "language": "python",
279 |    "name": "python3"
280 |   },
281 |   "language_info": {
282 |    "codemirror_mode": {
283 |     "name": "ipython",
284 |     "version": 3
285 |    },
286 |    "file_extension": ".py",
287 |    "mimetype": "text/x-python",
288 |    "name": "python",
289 |    "nbconvert_exporter": "python",
290 |    "pygments_lexer": "ipython3",
291 |    "version": "3.6.6"
292 |   }
293 |  },
294 |  "nbformat": 4,
295 |  "nbformat_minor": 2
296 | }
297 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning with PyTorch
 2 | 
 3 | This repo contains notebooks and related code for Udacity's Deep Learning with PyTorch lesson. This lesson appears in our [AI Programming with Python Nanodegree program](https://www.udacity.com/course/ai-programming-python-nanodegree--nd089).
 4 | 
 5 | * **Part 1:** Introduction to PyTorch and using tensors
 6 | * **Part 2:** Building fully-connected neural networks with PyTorch
 7 | * **Part 3:** How to train a fully-connected network with backpropagation on MNIST
 8 | * **Part 4:** Exercise - train a neural network on Fashion-MNIST
 9 | * **Part 5:** Using a trained network for making predictions and validating networks
10 | * **Part 6:** How to save and load trained models
11 | * **Part 7:** Load image data with torchvision, also data augmentation
12 | * **Part 8:** Use transfer learning to train a state-of-the-art image classifier for dogs and cats


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/__pycache__/helper.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/__pycache__/helper.cpython-37.pyc


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/ImageNet_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/ImageNet_example.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/Pooling_Simple_max.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/Pooling_Simple_max.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/activation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/activation.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/autoencoder_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/autoencoder_1.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/backprop_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/backprop_diagram.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/cat.70.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/cat.70.jpg


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/cat_cropped.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/cat_cropped.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/conv_net.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/conv_net.jpg


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/dog.128.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/dog.128.jpg


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/dog_cat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/dog_cat.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/examples_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/examples_new.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/fashion-mnist-sprite.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/fashion-mnist-sprite.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/full_padding_no_strides_transposed.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/full_padding_no_strides_transposed.gif


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/function_approx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/function_approx.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/gradient_descent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/gradient_descent.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/image_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/image_distribution.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/infographic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/infographic.pdf


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/lenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/lenet.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/mlp_mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/mlp_mnist.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/mnist.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/multilayer_diagram_weights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/multilayer_diagram_weights.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/network_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/network_diagram.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/overfitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/overfitting.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/padding_strides.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/padding_strides.gif


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/simple_neuron.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/simple_neuron.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/test_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/test_examples.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/train_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/train_examples.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/assets/w1_backprop_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/deep-learning-with-pytorch/assets/w1_backprop_graph.png


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/fc_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class Network(nn.Module):
  7 |     def __init__(self, input_size, output_size, hidden_layers, drop_p=0.5):
  8 |         ''' Builds a feedforward network with arbitrary hidden layers.
  9 |         
 10 |             Arguments
 11 |             ---------
 12 |             input_size: integer, size of the input layer
 13 |             output_size: integer, size of the output layer
 14 |             hidden_layers: list of integers, the sizes of the hidden layers
 15 |         
 16 |         '''
 17 |         super().__init__()
 18 |         # Input to a hidden layer
 19 |         self.hidden_layers = nn.ModuleList([nn.Linear(input_size, hidden_layers[0])])
 20 |         
 21 |         # Add a variable number of more hidden layers
 22 |         layer_sizes = zip(hidden_layers[:-1], hidden_layers[1:])
 23 |         self.hidden_layers.extend([nn.Linear(h1, h2) for h1, h2 in layer_sizes])
 24 |         
 25 |         self.output = nn.Linear(hidden_layers[-1], output_size)
 26 |         
 27 |         self.dropout = nn.Dropout(p=drop_p)
 28 |         
 29 |     def forward(self, x):
 30 |         ''' Forward pass through the network, returns the output logits '''
 31 |         
 32 |         for each in self.hidden_layers:
 33 |             x = F.relu(each(x))
 34 |             x = self.dropout(x)
 35 |         x = self.output(x)
 36 |         
 37 |         return F.log_softmax(x, dim=1)
 38 | 
 39 | 
 40 | def validation(model, testloader, criterion):
 41 |     accuracy = 0
 42 |     test_loss = 0
 43 |     for images, labels in testloader:
 44 | 
 45 |         images = images.resize_(images.size()[0], 784)
 46 | 
 47 |         output = model.forward(images)
 48 |         test_loss += criterion(output, labels).item()
 49 | 
 50 |         ## Calculating the accuracy 
 51 |         # Model's output is log-softmax, take exponential to get the probabilities
 52 |         ps = torch.exp(output)
 53 |         # Class with highest probability is our predicted class, compare with true label
 54 |         equality = (labels.data == ps.max(1)[1])
 55 |         # Accuracy is number of correct predictions divided by all predictions, just take the mean
 56 |         accuracy += equality.type_as(torch.FloatTensor()).mean()
 57 | 
 58 |     return test_loss, accuracy
 59 | 
 60 | 
 61 | def train(model, trainloader, testloader, criterion, optimizer, epochs=5, print_every=40):
 62 |     
 63 |     steps = 0
 64 |     running_loss = 0
 65 |     for e in range(epochs):
 66 |         # Model in training mode, dropout is on
 67 |         model.train()
 68 |         for images, labels in trainloader:
 69 |             steps += 1
 70 |             
 71 |             # Flatten images into a 784 long vector
 72 |             images.resize_(images.size()[0], 784)
 73 |             
 74 |             optimizer.zero_grad()
 75 |             
 76 |             output = model.forward(images)
 77 |             loss = criterion(output, labels)
 78 |             loss.backward()
 79 |             optimizer.step()
 80 |             
 81 |             running_loss += loss.item()
 82 | 
 83 |             if steps % print_every == 0:
 84 |                 # Model in inference mode, dropout is off
 85 |                 model.eval()
 86 |                 
 87 |                 # Turn off gradients for validation, will speed up inference
 88 |                 with torch.no_grad():
 89 |                     test_loss, accuracy = validation(model, testloader, criterion)
 90 |                 
 91 |                 print("Epoch: {}/{}.. ".format(e+1, epochs),
 92 |                       "Training Loss: {:.3f}.. ".format(running_loss/print_every),
 93 |                       "Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
 94 |                       "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
 95 |                 
 96 |                 running_loss = 0
 97 |                 
 98 |                 # Make sure dropout and grads are on for training
 99 |                 model.train()
100 | 


--------------------------------------------------------------------------------
/deep-learning-with-pytorch/helper.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | from torch import nn, optim
 4 | from torch.autograd import Variable
 5 | 
 6 | 
 7 | def test_network(net, trainloader):
 8 | 
 9 |     criterion = nn.MSELoss()
10 |     optimizer = optim.Adam(net.parameters(), lr=0.001)
11 | 
12 |     dataiter = iter(trainloader)
13 |     images, labels = dataiter.next()
14 | 
15 |     # Create Variables for the inputs and targets
16 |     inputs = Variable(images)
17 |     targets = Variable(images)
18 | 
19 |     # Clear the gradients from all Variables
20 |     optimizer.zero_grad()
21 | 
22 |     # Forward pass, then backward pass, then update weights
23 |     output = net.forward(inputs)
24 |     loss = criterion(output, targets)
25 |     loss.backward()
26 |     optimizer.step()
27 | 
28 |     return True
29 | 
30 | 
31 | def imshow(image, ax=None, title=None, normalize=True):
32 |     """Imshow for Tensor."""
33 |     if ax is None:
34 |         fig, ax = plt.subplots()
35 |     image = image.numpy().transpose((1, 2, 0))
36 | 
37 |     if normalize:
38 |         mean = np.array([0.485, 0.456, 0.406])
39 |         std = np.array([0.229, 0.224, 0.225])
40 |         image = std * image + mean
41 |         image = np.clip(image, 0, 1)
42 | 
43 |     ax.imshow(image)
44 |     ax.spines['top'].set_visible(False)
45 |     ax.spines['right'].set_visible(False)
46 |     ax.spines['left'].set_visible(False)
47 |     ax.spines['bottom'].set_visible(False)
48 |     ax.tick_params(axis='both', length=0)
49 |     ax.set_xticklabels('')
50 |     ax.set_yticklabels('')
51 | 
52 |     return ax
53 | 
54 | 
55 | def view_recon(img, recon):
56 |     ''' Function for displaying an image (as a PyTorch Tensor) and its
57 |         reconstruction also a PyTorch Tensor
58 |     '''
59 | 
60 |     fig, axes = plt.subplots(ncols=2, sharex=True, sharey=True)
61 |     axes[0].imshow(img.numpy().squeeze())
62 |     axes[1].imshow(recon.data.numpy().squeeze())
63 |     for ax in axes:
64 |         ax.axis('off')
65 |         ax.set_adjustable('box-forced')
66 | 
67 | def view_classify(img, ps, version="MNIST"):
68 |     ''' Function for viewing an image and it's predicted classes.
69 |     '''
70 |     ps = ps.data.numpy().squeeze()
71 | 
72 |     fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
73 |     ax1.imshow(img.resize_(1, 28, 28).numpy().squeeze())
74 |     ax1.axis('off')
75 |     ax2.barh(np.arange(10), ps)
76 |     ax2.set_aspect(0.1)
77 |     ax2.set_yticks(np.arange(10))
78 |     if version == "MNIST":
79 |         ax2.set_yticklabels(np.arange(10))
80 |     elif version == "Fashion":
81 |         ax2.set_yticklabels(['T-shirt/top',
82 |                             'Trouser',
83 |                             'Pullover',
84 |                             'Dress',
85 |                             'Coat',
86 |                             'Sandal',
87 |                             'Shirt',
88 |                             'Sneaker',
89 |                             'Bag',
90 |                             'Ankle Boot'], size='small');
91 |     ax2.set_title('Class Probability')
92 |     ax2.set_xlim(0, 1.1)
93 | 
94 |     plt.tight_layout()
95 | 


--------------------------------------------------------------------------------
/gradient-descent/GradientDescent.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Implementing the Gradient Descent Algorithm\n",
  8 |     "\n",
  9 |     "In this notebook, you'll be implementing the functions that build the gradient descent algorithm, namely:\n",
 10 |     "\n",
 11 |     "* `sigmoid`: The sigmoid activation function.\n",
 12 |     "* `output_formula`: The formula for the prediction.\n",
 13 |     "* `error_formula`: The formula for the error at a point.\n",
 14 |     "* `update_weights`: The function that updates the parameters with one gradient descent step.\n",
 15 |     "\n",
 16 |     "Your goal is to find the boundary on a small dataset that has two classes:\n",
 17 |     "\n",
 18 |     "![auaghsga](points.png \"test\")\n",
 19 |     "\n",
 20 |     "After you implement the gradient descent functions, be sure to run the `train` function. This will graph several of the lines that are drawn in successive gradient descent steps. It will also graph the error function, and you'll be able to see it decreasing as the number of epochs grows.\n",
 21 |     "\n",
 22 |     "First, we'll start with some functions that will help us plot and visualize the data."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "import numpy as np\n",
 33 |     "import pandas as pd\n",
 34 |     "\n",
 35 |     "#Some helper functions for plotting and drawing lines\n",
 36 |     "\n",
 37 |     "def plot_points(X, y):\n",
 38 |     "    admitted = X[np.argwhere(y==1)]\n",
 39 |     "    rejected = X[np.argwhere(y==0)]\n",
 40 |     "    plt.scatter([s[0][0] for s in rejected], [s[0][1] for s in rejected], s = 25, color = 'blue', edgecolor = 'k')\n",
 41 |     "    plt.scatter([s[0][0] for s in admitted], [s[0][1] for s in admitted], s = 25, color = 'red', edgecolor = 'k')\n",
 42 |     "\n",
 43 |     "def display(m, b, color='g--'):\n",
 44 |     "    plt.xlim(-0.05,1.05)\n",
 45 |     "    plt.ylim(-0.05,1.05)\n",
 46 |     "    x = np.arange(-10, 10, 0.1)\n",
 47 |     "    plt.plot(x, m*x+b, color)"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "## Reading and plotting the data"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "data = pd.read_csv('data.csv', header=None)\n",
 64 |     "X = np.array(data[[0,1]])\n",
 65 |     "y = np.array(data[2])\n",
 66 |     "plot_points(X,y)\n",
 67 |     "plt.show()"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "## TODO: Implementing the basic functions\n",
 75 |     "Here is your turn to shine. Implement the following formulas, as explained in the text.\n",
 76 |     "- Sigmoid activation function\n",
 77 |     "\n",
 78 |     "$$\\sigma(x) = \\frac{1}{1+e^{-x}}$$\n",
 79 |     "\n",
 80 |     "- Output (prediction) formula\n",
 81 |     "\n",
 82 |     "$$\\hat{y} = \\sigma(w_1 x_1 + w_2 x_2 + b)$$\n",
 83 |     "\n",
 84 |     "- Error function\n",
 85 |     "\n",
 86 |     "$$Error(y, \\hat{y}) = - y \\log(\\hat{y}) - (1-y) \\log(1-\\hat{y})$$\n",
 87 |     "\n",
 88 |     "- The function that updates the weights\n",
 89 |     "\n",
 90 |     "$$ w_i \\longrightarrow w_i + \\alpha (y - \\hat{y}) x_i$$\n",
 91 |     "\n",
 92 |     "$$ b \\longrightarrow b + \\alpha (y - \\hat{y})$$"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "# Activation (sigmoid) function\n",
102 |     "def sigmoid(x):\n",
103 |     "    pass\n",
104 |     "\n",
105 |     "# Output (prediction) formula\n",
106 |     "def output_formula(features, weights, bias):\n",
107 |     "    pass\n",
108 |     "\n",
109 |     "# Error (log-loss) formula\n",
110 |     "def error_formula(y, output):\n",
111 |     "    pass\n",
112 |     "\n",
113 |     "# Gradient descent step\n",
114 |     "def update_weights(x, y, weights, bias, learnrate):\n",
115 |     "    pass"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "## Training function\n",
123 |     "This function will help us iterate the gradient descent algorithm through all the data, for a number of epochs. It will also plot the data, and some of the boundary lines obtained as we run the algorithm."
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "np.random.seed(44)\n",
133 |     "\n",
134 |     "epochs = 100\n",
135 |     "learnrate = 0.01\n",
136 |     "\n",
137 |     "def train(features, targets, epochs, learnrate, graph_lines=False):\n",
138 |     "    \n",
139 |     "    errors = []\n",
140 |     "    n_records, n_features = features.shape\n",
141 |     "    last_loss = None\n",
142 |     "    weights = np.random.normal(scale=1 / n_features**.5, size=n_features)\n",
143 |     "    bias = 0\n",
144 |     "    for e in range(epochs):\n",
145 |     "        del_w = np.zeros(weights.shape)\n",
146 |     "        for x, y in zip(features, targets):\n",
147 |     "            output = output_formula(x, weights, bias)\n",
148 |     "            error = error_formula(y, output)\n",
149 |     "            weights, bias = update_weights(x, y, weights, bias, learnrate)\n",
150 |     "        \n",
151 |     "        # Printing out the log-loss error on the training set\n",
152 |     "        out = output_formula(features, weights, bias)\n",
153 |     "        loss = np.mean(error_formula(targets, out))\n",
154 |     "        errors.append(loss)\n",
155 |     "        if e % (epochs / 10) == 0:\n",
156 |     "            print(\"\\n========== Epoch\", e,\"==========\")\n",
157 |     "            if last_loss and last_loss < loss:\n",
158 |     "                print(\"Train loss: \", loss, \"  WARNING - Loss Increasing\")\n",
159 |     "            else:\n",
160 |     "                print(\"Train loss: \", loss)\n",
161 |     "            last_loss = loss\n",
162 |     "            predictions = out > 0.5\n",
163 |     "            accuracy = np.mean(predictions == targets)\n",
164 |     "            print(\"Accuracy: \", accuracy)\n",
165 |     "        if graph_lines and e % (epochs / 100) == 0:\n",
166 |     "            display(-weights[0]/weights[1], -bias/weights[1])\n",
167 |     "            \n",
168 |     "\n",
169 |     "    # Plotting the solution boundary\n",
170 |     "    plt.title(\"Solution boundary\")\n",
171 |     "    display(-weights[0]/weights[1], -bias/weights[1], 'black')\n",
172 |     "\n",
173 |     "    # Plotting the data\n",
174 |     "    plot_points(features, targets)\n",
175 |     "    plt.show()\n",
176 |     "\n",
177 |     "    # Plotting the error\n",
178 |     "    plt.title(\"Error Plot\")\n",
179 |     "    plt.xlabel('Number of epochs')\n",
180 |     "    plt.ylabel('Error')\n",
181 |     "    plt.plot(errors)\n",
182 |     "    plt.show()"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "metadata": {},
188 |    "source": [
189 |     "## Time to train the algorithm!\n",
190 |     "When we run the function, we'll obtain the following:\n",
191 |     "- 10 updates with the current training loss and accuracy\n",
192 |     "- A plot of the data and some of the boundary lines obtained. The final one is in black. Notice how the lines get closer and closer to the best fit, as we go through more epochs.\n",
193 |     "- A plot of the error function. Notice how it decreases as we go through more epochs."
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": [
202 |     "train(X, y, epochs, learnrate, True)"
203 |    ]
204 |   }
205 |  ],
206 |  "metadata": {
207 |   "kernelspec": {
208 |    "display_name": "Python 3 (ipykernel)",
209 |    "language": "python",
210 |    "name": "python3"
211 |   },
212 |   "language_info": {
213 |    "codemirror_mode": {
214 |     "name": "ipython",
215 |     "version": 3
216 |    },
217 |    "file_extension": ".py",
218 |    "mimetype": "text/x-python",
219 |    "name": "python",
220 |    "nbconvert_exporter": "python",
221 |    "pygments_lexer": "ipython3",
222 |    "version": "3.8.2"
223 |   }
224 |  },
225 |  "nbformat": 4,
226 |  "nbformat_minor": 2
227 | }
228 | 


--------------------------------------------------------------------------------
/gradient-descent/GradientDescentSolutions.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Solution"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {
14 |     "collapsed": true
15 |    },
16 |    "outputs": [],
17 |    "source": [
18 |     "# Activation (sigmoid) function\n",
19 |     "def sigmoid(x):\n",
20 |     "    return 1 / (1 + np.exp(-x))\n",
21 |     "\n",
22 |     "# Output (prediction) formula\n",
23 |     "def output_formula(features, weights, bias):\n",
24 |     "    return sigmoid(np.dot(features, weights) + bias)\n",
25 |     "\n",
26 |     "# Error (log-loss) formula\n",
27 |     "def error_formula(y, output):\n",
28 |     "    return - y*np.log(output) - (1 - y) * np.log(1-output)\n",
29 |     "\n",
30 |     "# Gradient descent step\n",
31 |     "def update_weights(x, y, weights, bias, learnrate):\n",
32 |     "    output = output_formula(x, weights, bias)\n",
33 |     "    d_error = y - output\n",
34 |     "    weights += learnrate * d_error * x\n",
35 |     "    bias += learnrate * d_error\n",
36 |     "    return weights, bias"
37 |    ]
38 |   }
39 |  ],
40 |  "metadata": {
41 |   "kernelspec": {
42 |    "display_name": "Python 3 (ipykernel)",
43 |    "language": "python",
44 |    "name": "python3"
45 |   },
46 |   "language_info": {
47 |    "codemirror_mode": {
48 |     "name": "ipython",
49 |     "version": 3
50 |    },
51 |    "file_extension": ".py",
52 |    "mimetype": "text/x-python",
53 |    "name": "python",
54 |    "nbconvert_exporter": "python",
55 |    "pygments_lexer": "ipython3",
56 |    "version": "3.8.2"
57 |   }
58 |  },
59 |  "nbformat": 4,
60 |  "nbformat_minor": 2
61 | }
62 | 


--------------------------------------------------------------------------------
/gradient-descent/data.csv:
--------------------------------------------------------------------------------
  1 | 0.78051,-0.063669,1
  2 | 0.28774,0.29139,1
  3 | 0.40714,0.17878,1
  4 | 0.2923,0.4217,1
  5 | 0.50922,0.35256,1
  6 | 0.27785,0.10802,1
  7 | 0.27527,0.33223,1
  8 | 0.43999,0.31245,1
  9 | 0.33557,0.42984,1
 10 | 0.23448,0.24986,1
 11 | 0.0084492,0.13658,1
 12 | 0.12419,0.33595,1
 13 | 0.25644,0.42624,1
 14 | 0.4591,0.40426,1
 15 | 0.44547,0.45117,1
 16 | 0.42218,0.20118,1
 17 | 0.49563,0.21445,1
 18 | 0.30848,0.24306,1
 19 | 0.39707,0.44438,1
 20 | 0.32945,0.39217,1
 21 | 0.40739,0.40271,1
 22 | 0.3106,0.50702,1
 23 | 0.49638,0.45384,1
 24 | 0.10073,0.32053,1
 25 | 0.69907,0.37307,1
 26 | 0.29767,0.69648,1
 27 | 0.15099,0.57341,1
 28 | 0.16427,0.27759,1
 29 | 0.33259,0.055964,1
 30 | 0.53741,0.28637,1
 31 | 0.19503,0.36879,1
 32 | 0.40278,0.035148,1
 33 | 0.21296,0.55169,1
 34 | 0.48447,0.56991,1
 35 | 0.25476,0.34596,1
 36 | 0.21726,0.28641,1
 37 | 0.67078,0.46538,1
 38 | 0.3815,0.4622,1
 39 | 0.53838,0.32774,1
 40 | 0.4849,0.26071,1
 41 | 0.37095,0.38809,1
 42 | 0.54527,0.63911,1
 43 | 0.32149,0.12007,1
 44 | 0.42216,0.61666,1
 45 | 0.10194,0.060408,1
 46 | 0.15254,0.2168,1
 47 | 0.45558,0.43769,1
 48 | 0.28488,0.52142,1
 49 | 0.27633,0.21264,1
 50 | 0.39748,0.31902,1
 51 | 0.5533,1,0
 52 | 0.44274,0.59205,0
 53 | 0.85176,0.6612,0
 54 | 0.60436,0.86605,0
 55 | 0.68243,0.48301,0
 56 | 1,0.76815,0
 57 | 0.72989,0.8107,0
 58 | 0.67377,0.77975,0
 59 | 0.78761,0.58177,0
 60 | 0.71442,0.7668,0
 61 | 0.49379,0.54226,0
 62 | 0.78974,0.74233,0
 63 | 0.67905,0.60921,0
 64 | 0.6642,0.72519,0
 65 | 0.79396,0.56789,0
 66 | 0.70758,0.76022,0
 67 | 0.59421,0.61857,0
 68 | 0.49364,0.56224,0
 69 | 0.77707,0.35025,0
 70 | 0.79785,0.76921,0
 71 | 0.70876,0.96764,0
 72 | 0.69176,0.60865,0
 73 | 0.66408,0.92075,0
 74 | 0.65973,0.66666,0
 75 | 0.64574,0.56845,0
 76 | 0.89639,0.7085,0
 77 | 0.85476,0.63167,0
 78 | 0.62091,0.80424,0
 79 | 0.79057,0.56108,0
 80 | 0.58935,0.71582,0
 81 | 0.56846,0.7406,0
 82 | 0.65912,0.71548,0
 83 | 0.70938,0.74041,0
 84 | 0.59154,0.62927,0
 85 | 0.45829,0.4641,0
 86 | 0.79982,0.74847,0
 87 | 0.60974,0.54757,0
 88 | 0.68127,0.86985,0
 89 | 0.76694,0.64736,0
 90 | 0.69048,0.83058,0
 91 | 0.68122,0.96541,0
 92 | 0.73229,0.64245,0
 93 | 0.76145,0.60138,0
 94 | 0.58985,0.86955,0
 95 | 0.73145,0.74516,0
 96 | 0.77029,0.7014,0
 97 | 0.73156,0.71782,0
 98 | 0.44556,0.57991,0
 99 | 0.85275,0.85987,0
100 | 0.51912,0.62359,0
101 | 


--------------------------------------------------------------------------------
/gradient-descent/points.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/cd0281-Introduction-to-Neural-Networks-with-PyTorch/b9077645c089fd3865c0e5d3b992e6f1bfd8a98a/gradient-descent/points.png


--------------------------------------------------------------------------------
/student-admissions/StudentAdmissions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Predicting Student Admissions with Neural Networks\n",
  8 |     "In this notebook, we predict student admissions to graduate school at UCLA based on three pieces of data:\n",
  9 |     "- GRE Scores (Test)\n",
 10 |     "- GPA Scores (Grades)\n",
 11 |     "- Class rank (1-4)\n",
 12 |     "\n",
 13 |     "The dataset originally came from here: http://www.ats.ucla.edu/\n",
 14 |     "\n",
 15 |     "## Loading the data\n",
 16 |     "To load the data and format it nicely, we will use two very useful packages called Pandas and Numpy. You can read on the documentation here:\n",
 17 |     "- https://pandas.pydata.org/pandas-docs/stable/\n",
 18 |     "- https://docs.scipy.org/"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "collapsed": true
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "# Importing pandas and numpy\n",
 30 |     "import pandas as pd\n",
 31 |     "import numpy as np\n",
 32 |     "\n",
 33 |     "# Reading the csv file into a pandas DataFrame\n",
 34 |     "data = pd.read_csv('student_data.csv')\n",
 35 |     "\n",
 36 |     "# Printing out the first 10 rows of our data\n",
 37 |     "data[:10]"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "## Plotting the data\n",
 45 |     "\n",
 46 |     "First let's make a plot of our data to see how it looks. In order to have a 2D plot, let's ingore the rank."
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "collapsed": true
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "# Importing matplotlib\n",
 58 |     "import matplotlib.pyplot as plt\n",
 59 |     "\n",
 60 |     "# Function to help us plot\n",
 61 |     "def plot_points(data):\n",
 62 |     "    X = np.array(data[[\"gre\",\"gpa\"]])\n",
 63 |     "    y = np.array(data[\"admit\"])\n",
 64 |     "    admitted = X[np.argwhere(y==1)]\n",
 65 |     "    rejected = X[np.argwhere(y==0)]\n",
 66 |     "    plt.scatter([s[0][0] for s in rejected], [s[0][1] for s in rejected], s = 25, color = 'red', edgecolor = 'k')\n",
 67 |     "    plt.scatter([s[0][0] for s in admitted], [s[0][1] for s in admitted], s = 25, color = 'cyan', edgecolor = 'k')\n",
 68 |     "    plt.xlabel('Test (GRE)')\n",
 69 |     "    plt.ylabel('Grades (GPA)')\n",
 70 |     "    \n",
 71 |     "# Plotting the points\n",
 72 |     "plot_points(data)\n",
 73 |     "plt.show()"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "Roughly, it looks like the students with high scores in the grades and test passed, while the ones with low scores didn't, but the data is not as nicely separable as we hoped it would. Maybe it would help to take the rank into account? Let's make 4 plots, each one for each rank."
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {
 87 |     "collapsed": true
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "# Separating the ranks\n",
 92 |     "data_rank1 = data[data[\"rank\"]==1]\n",
 93 |     "data_rank2 = data[data[\"rank\"]==2]\n",
 94 |     "data_rank3 = data[data[\"rank\"]==3]\n",
 95 |     "data_rank4 = data[data[\"rank\"]==4]\n",
 96 |     "\n",
 97 |     "# Plotting the graphs\n",
 98 |     "plot_points(data_rank1)\n",
 99 |     "plt.title(\"Rank 1\")\n",
100 |     "plt.show()\n",
101 |     "plot_points(data_rank2)\n",
102 |     "plt.title(\"Rank 2\")\n",
103 |     "plt.show()\n",
104 |     "plot_points(data_rank3)\n",
105 |     "plt.title(\"Rank 3\")\n",
106 |     "plt.show()\n",
107 |     "plot_points(data_rank4)\n",
108 |     "plt.title(\"Rank 4\")\n",
109 |     "plt.show()"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "This looks more promising, as it seems that the lower the rank, the higher the acceptance rate. Let's use the rank as one of our inputs. In order to do this, we should one-hot encode it.\n",
117 |     "\n",
118 |     "## TODO: One-hot encoding the rank\n",
119 |     "Use the `get_dummies` function in Pandas in order to one-hot encode the data."
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {
126 |     "collapsed": true
127 |    },
128 |    "outputs": [],
129 |    "source": [
130 |     "# TODO:  Make dummy variables for rank\n",
131 |     "one_hot_data = None\n",
132 |     "\n",
133 |     "# TODO: Drop the previous rank column\n",
134 |     "one_hot_data = None\n",
135 |     "\n",
136 |     "# Print the first 10 rows of our data\n",
137 |     "one_hot_data[:10]"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "## TODO: Scaling the data\n",
145 |     "The next step is to scale the data. We notice that the range for grades is 1.0-4.0, whereas the range for test scores is roughly 200-800, which is much larger. This means our data is skewed, and that makes it hard for a neural network to handle. Let's fit our two features into a range of 0-1, by dividing the grades by 4.0, and the test score by 800."
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {
152 |     "collapsed": true
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "# Making a copy of our data\n",
157 |     "processed_data = one_hot_data[:]\n",
158 |     "\n",
159 |     "# TODO: Scale the columns\n",
160 |     "\n",
161 |     "# Printing the first 10 rows of our procesed data\n",
162 |     "processed_data[:10]"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {},
168 |    "source": [
169 |     "## Splitting the data into Training and Testing"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "In order to test our algorithm, we'll split the data into a Training and a Testing set. The size of the testing set will be 10% of the total data."
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {
183 |     "collapsed": true
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "sample = np.random.choice(processed_data.index, size=int(len(processed_data)*0.9), replace=False)\n",
188 |     "train_data, test_data = processed_data.iloc[sample], processed_data.drop(sample)\n",
189 |     "\n",
190 |     "print(\"Number of training samples is\", len(train_data))\n",
191 |     "print(\"Number of testing samples is\", len(test_data))\n",
192 |     "print(train_data[:10])\n",
193 |     "print(test_data[:10])"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "## Splitting the data into features and targets (labels)\n",
201 |     "Now, as a final step before the training, we'll split the data into features (X) and targets (y)."
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": true
209 |    },
210 |    "outputs": [],
211 |    "source": [
212 |     "features = train_data.drop('admit', axis=1)\n",
213 |     "targets = train_data['admit']\n",
214 |     "features_test = test_data.drop('admit', axis=1)\n",
215 |     "targets_test = test_data['admit']\n",
216 |     "\n",
217 |     "print(features[:10])\n",
218 |     "print(targets[:10])"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "markdown",
223 |    "metadata": {},
224 |    "source": [
225 |     "## Training the 2-layer Neural Network\n",
226 |     "The following function trains the 2-layer neural network. First, we'll write some helper functions."
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {
233 |     "collapsed": true
234 |    },
235 |    "outputs": [],
236 |    "source": [
237 |     "# Activation (sigmoid) function\n",
238 |     "def sigmoid(x):\n",
239 |     "    return 1 / (1 + np.exp(-x))\n",
240 |     "def sigmoid_prime(x):\n",
241 |     "    return sigmoid(x) * (1-sigmoid(x))\n",
242 |     "def error_formula(y, output):\n",
243 |     "    return - y*np.log(output) - (1 - y) * np.log(1-output)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "# TODO: Backpropagate the error\n",
251 |     "Now it's your turn to shine. Write the error term. Remember that this is given by the equation $$ (y-\\hat{y}) \\sigma'(x) $$"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "metadata": {
258 |     "collapsed": true
259 |    },
260 |    "outputs": [],
261 |    "source": [
262 |     "# TODO: Write the error term formula\n",
263 |     "def error_term_formula(x, y, output):\n",
264 |     "    pass"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": null,
270 |    "metadata": {
271 |     "collapsed": true
272 |    },
273 |    "outputs": [],
274 |    "source": [
275 |     "# Neural Network hyperparameters\n",
276 |     "epochs = 1000\n",
277 |     "learnrate = 0.5\n",
278 |     "\n",
279 |     "# Training function\n",
280 |     "def train_nn(features, targets, epochs, learnrate):\n",
281 |     "    \n",
282 |     "    # Use to same seed to make debugging easier\n",
283 |     "    np.random.seed(42)\n",
284 |     "\n",
285 |     "    n_records, n_features = features.shape\n",
286 |     "    last_loss = None\n",
287 |     "\n",
288 |     "    # Initialize weights\n",
289 |     "    weights = np.random.normal(scale=1 / n_features**.5, size=n_features)\n",
290 |     "\n",
291 |     "    for e in range(epochs):\n",
292 |     "        del_w = np.zeros(weights.shape)\n",
293 |     "        for x, y in zip(features.values, targets):\n",
294 |     "            # Loop through all records, x is the input, y is the target\n",
295 |     "\n",
296 |     "            # Activation of the output unit\n",
297 |     "            #   Notice we multiply the inputs and the weights here \n",
298 |     "            #   rather than storing h as a separate variable \n",
299 |     "            output = sigmoid(np.dot(x, weights))\n",
300 |     "\n",
301 |     "            # The error, the target minus the network output\n",
302 |     "            error = error_formula(y, output)\n",
303 |     "\n",
304 |     "            # The error term\n",
305 |     "            error_term = error_term_formula(x, y, output)\n",
306 |     "\n",
307 |     "            # The gradient descent step, the error times the gradient times the inputs\n",
308 |     "            del_w += error_term * x\n",
309 |     "\n",
310 |     "        # Update the weights here. The learning rate times the \n",
311 |     "        # change in weights, divided by the number of records to average\n",
312 |     "        weights += learnrate * del_w / n_records\n",
313 |     "\n",
314 |     "        # Printing out the mean square error on the training set\n",
315 |     "        if e % (epochs / 10) == 0:\n",
316 |     "            out = sigmoid(np.dot(features, weights))\n",
317 |     "            loss = np.mean((out - targets) ** 2)\n",
318 |     "            print(\"Epoch:\", e)\n",
319 |     "            if last_loss and last_loss < loss:\n",
320 |     "                print(\"Train loss: \", loss, \"  WARNING - Loss Increasing\")\n",
321 |     "            else:\n",
322 |     "                print(\"Train loss: \", loss)\n",
323 |     "            last_loss = loss\n",
324 |     "            print(\"=========\")\n",
325 |     "    print(\"Finished training!\")\n",
326 |     "    return weights\n",
327 |     "    \n",
328 |     "weights = train_nn(features, targets, epochs, learnrate)"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "metadata": {},
334 |    "source": [
335 |     "## Calculating the Accuracy on the Test Data"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": null,
341 |    "metadata": {
342 |     "collapsed": true
343 |    },
344 |    "outputs": [],
345 |    "source": [
346 |     "# Calculate accuracy on test data\n",
347 |     "test_out = sigmoid(np.dot(features_test, weights))\n",
348 |     "predictions = test_out > 0.5\n",
349 |     "accuracy = np.mean(predictions == targets_test)\n",
350 |     "print(\"Prediction accuracy: {:.3f}\".format(accuracy))"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": null,
356 |    "metadata": {
357 |     "collapsed": true
358 |    },
359 |    "outputs": [],
360 |    "source": []
361 |   }
362 |  ],
363 |  "metadata": {
364 |   "kernelspec": {
365 |    "display_name": "Python 3",
366 |    "language": "python",
367 |    "name": "python3"
368 |   },
369 |   "language_info": {
370 |    "codemirror_mode": {
371 |     "name": "ipython",
372 |     "version": 3
373 |    },
374 |    "file_extension": ".py",
375 |    "mimetype": "text/x-python",
376 |    "name": "python",
377 |    "nbconvert_exporter": "python",
378 |    "pygments_lexer": "ipython3",
379 |    "version": "3.6.3"
380 |   }
381 |  },
382 |  "nbformat": 4,
383 |  "nbformat_minor": 2
384 | }
385 | 


--------------------------------------------------------------------------------
/student-admissions/StudentAdmissionsSolutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Solutions"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### One-hot encoding the rank"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# Make dummy variables for rank\n",
 26 |     "one_hot_data = pd.concat([data, pd.get_dummies(data['rank'], prefix='rank')], axis=1)\n",
 27 |     "\n",
 28 |     "# Drop the previous rank column\n",
 29 |     "one_hot_data = one_hot_data.drop('rank', axis=1)\n",
 30 |     "\n",
 31 |     "# Print the first 10 rows of our data\n",
 32 |     "one_hot_data[:10]"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "### Scaling the data"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {
 46 |     "collapsed": true
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# Copying our data\n",
 51 |     "processed_data = one_hot_data[:]\n",
 52 |     "\n",
 53 |     "# Scaling the columns\n",
 54 |     "processed_data['gre'] = processed_data['gre']/800\n",
 55 |     "processed_data['gpa'] = processed_data['gpa']/4.0\n",
 56 |     "processed_data[:10]"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "### Backpropagating the data"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "collapsed": true
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "def error_term_formula(x, y, output):\n",
 75 |     "    return (y - output)*sigmoid_prime(x)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "## alternative solution ##\n",
 85 |     "# you could also *only* use y and the output \n",
 86 |     "# and calculate sigmoid_prime directly from the activated output!\n",
 87 |     "\n",
 88 |     "# below is an equally valid solution (it doesn't utilize x)\n",
 89 |     "def error_term_formula(x, y, output):\n",
 90 |     "    return (y-output) * output * (1 - output)"
 91 |    ]
 92 |   }
 93 |  ],
 94 |  "metadata": {
 95 |   "kernelspec": {
 96 |    "display_name": "Python 3",
 97 |    "language": "python",
 98 |    "name": "python3"
 99 |   },
100 |   "language_info": {
101 |    "codemirror_mode": {
102 |     "name": "ipython",
103 |     "version": 3
104 |    },
105 |    "file_extension": ".py",
106 |    "mimetype": "text/x-python",
107 |    "name": "python",
108 |    "nbconvert_exporter": "python",
109 |    "pygments_lexer": "ipython3",
110 |    "version": "3.6.3"
111 |   }
112 |  },
113 |  "nbformat": 4,
114 |  "nbformat_minor": 2
115 | }
116 | 


--------------------------------------------------------------------------------
/student-admissions/student_data.csv:
--------------------------------------------------------------------------------
  1 | admit,gre,gpa,rank
  2 | 0,380,3.61,3
  3 | 1,660,3.67,3
  4 | 1,800,4,1
  5 | 1,640,3.19,4
  6 | 0,520,2.93,4
  7 | 1,760,3,2
  8 | 1,560,2.98,1
  9 | 0,400,3.08,2
 10 | 1,540,3.39,3
 11 | 0,700,3.92,2
 12 | 0,800,4,4
 13 | 0,440,3.22,1
 14 | 1,760,4,1
 15 | 0,700,3.08,2
 16 | 1,700,4,1
 17 | 0,480,3.44,3
 18 | 0,780,3.87,4
 19 | 0,360,2.56,3
 20 | 0,800,3.75,2
 21 | 1,540,3.81,1
 22 | 0,500,3.17,3
 23 | 1,660,3.63,2
 24 | 0,600,2.82,4
 25 | 0,680,3.19,4
 26 | 1,760,3.35,2
 27 | 1,800,3.66,1
 28 | 1,620,3.61,1
 29 | 1,520,3.74,4
 30 | 1,780,3.22,2
 31 | 0,520,3.29,1
 32 | 0,540,3.78,4
 33 | 0,760,3.35,3
 34 | 0,600,3.4,3
 35 | 1,800,4,3
 36 | 0,360,3.14,1
 37 | 0,400,3.05,2
 38 | 0,580,3.25,1
 39 | 0,520,2.9,3
 40 | 1,500,3.13,2
 41 | 1,520,2.68,3
 42 | 0,560,2.42,2
 43 | 1,580,3.32,2
 44 | 1,600,3.15,2
 45 | 0,500,3.31,3
 46 | 0,700,2.94,2
 47 | 1,460,3.45,3
 48 | 1,580,3.46,2
 49 | 0,500,2.97,4
 50 | 0,440,2.48,4
 51 | 0,400,3.35,3
 52 | 0,640,3.86,3
 53 | 0,440,3.13,4
 54 | 0,740,3.37,4
 55 | 1,680,3.27,2
 56 | 0,660,3.34,3
 57 | 1,740,4,3
 58 | 0,560,3.19,3
 59 | 0,380,2.94,3
 60 | 0,400,3.65,2
 61 | 0,600,2.82,4
 62 | 1,620,3.18,2
 63 | 0,560,3.32,4
 64 | 0,640,3.67,3
 65 | 1,680,3.85,3
 66 | 0,580,4,3
 67 | 0,600,3.59,2
 68 | 0,740,3.62,4
 69 | 0,620,3.3,1
 70 | 0,580,3.69,1
 71 | 0,800,3.73,1
 72 | 0,640,4,3
 73 | 0,300,2.92,4
 74 | 0,480,3.39,4
 75 | 0,580,4,2
 76 | 0,720,3.45,4
 77 | 0,720,4,3
 78 | 0,560,3.36,3
 79 | 1,800,4,3
 80 | 0,540,3.12,1
 81 | 1,620,4,1
 82 | 0,700,2.9,4
 83 | 0,620,3.07,2
 84 | 0,500,2.71,2
 85 | 0,380,2.91,4
 86 | 1,500,3.6,3
 87 | 0,520,2.98,2
 88 | 0,600,3.32,2
 89 | 0,600,3.48,2
 90 | 0,700,3.28,1
 91 | 1,660,4,2
 92 | 0,700,3.83,2
 93 | 1,720,3.64,1
 94 | 0,800,3.9,2
 95 | 0,580,2.93,2
 96 | 1,660,3.44,2
 97 | 0,660,3.33,2
 98 | 0,640,3.52,4
 99 | 0,480,3.57,2
100 | 0,700,2.88,2
101 | 0,400,3.31,3
102 | 0,340,3.15,3
103 | 0,580,3.57,3
104 | 0,380,3.33,4
105 | 0,540,3.94,3
106 | 1,660,3.95,2
107 | 1,740,2.97,2
108 | 1,700,3.56,1
109 | 0,480,3.13,2
110 | 0,400,2.93,3
111 | 0,480,3.45,2
112 | 0,680,3.08,4
113 | 0,420,3.41,4
114 | 0,360,3,3
115 | 0,600,3.22,1
116 | 0,720,3.84,3
117 | 0,620,3.99,3
118 | 1,440,3.45,2
119 | 0,700,3.72,2
120 | 1,800,3.7,1
121 | 0,340,2.92,3
122 | 1,520,3.74,2
123 | 1,480,2.67,2
124 | 0,520,2.85,3
125 | 0,500,2.98,3
126 | 0,720,3.88,3
127 | 0,540,3.38,4
128 | 1,600,3.54,1
129 | 0,740,3.74,4
130 | 0,540,3.19,2
131 | 0,460,3.15,4
132 | 1,620,3.17,2
133 | 0,640,2.79,2
134 | 0,580,3.4,2
135 | 0,500,3.08,3
136 | 0,560,2.95,2
137 | 0,500,3.57,3
138 | 0,560,3.33,4
139 | 0,700,4,3
140 | 0,620,3.4,2
141 | 1,600,3.58,1
142 | 0,640,3.93,2
143 | 1,700,3.52,4
144 | 0,620,3.94,4
145 | 0,580,3.4,3
146 | 0,580,3.4,4
147 | 0,380,3.43,3
148 | 0,480,3.4,2
149 | 0,560,2.71,3
150 | 1,480,2.91,1
151 | 0,740,3.31,1
152 | 1,800,3.74,1
153 | 0,400,3.38,2
154 | 1,640,3.94,2
155 | 0,580,3.46,3
156 | 0,620,3.69,3
157 | 1,580,2.86,4
158 | 0,560,2.52,2
159 | 1,480,3.58,1
160 | 0,660,3.49,2
161 | 0,700,3.82,3
162 | 0,600,3.13,2
163 | 0,640,3.5,2
164 | 1,700,3.56,2
165 | 0,520,2.73,2
166 | 0,580,3.3,2
167 | 0,700,4,1
168 | 0,440,3.24,4
169 | 0,720,3.77,3
170 | 0,500,4,3
171 | 0,600,3.62,3
172 | 0,400,3.51,3
173 | 0,540,2.81,3
174 | 0,680,3.48,3
175 | 1,800,3.43,2
176 | 0,500,3.53,4
177 | 1,620,3.37,2
178 | 0,520,2.62,2
179 | 1,620,3.23,3
180 | 0,620,3.33,3
181 | 0,300,3.01,3
182 | 0,620,3.78,3
183 | 0,500,3.88,4
184 | 0,700,4,2
185 | 1,540,3.84,2
186 | 0,500,2.79,4
187 | 0,800,3.6,2
188 | 0,560,3.61,3
189 | 0,580,2.88,2
190 | 0,560,3.07,2
191 | 0,500,3.35,2
192 | 1,640,2.94,2
193 | 0,800,3.54,3
194 | 0,640,3.76,3
195 | 0,380,3.59,4
196 | 1,600,3.47,2
197 | 0,560,3.59,2
198 | 0,660,3.07,3
199 | 1,400,3.23,4
200 | 0,600,3.63,3
201 | 0,580,3.77,4
202 | 0,800,3.31,3
203 | 1,580,3.2,2
204 | 1,700,4,1
205 | 0,420,3.92,4
206 | 1,600,3.89,1
207 | 1,780,3.8,3
208 | 0,740,3.54,1
209 | 1,640,3.63,1
210 | 0,540,3.16,3
211 | 0,580,3.5,2
212 | 0,740,3.34,4
213 | 0,580,3.02,2
214 | 0,460,2.87,2
215 | 0,640,3.38,3
216 | 1,600,3.56,2
217 | 1,660,2.91,3
218 | 0,340,2.9,1
219 | 1,460,3.64,1
220 | 0,460,2.98,1
221 | 1,560,3.59,2
222 | 0,540,3.28,3
223 | 0,680,3.99,3
224 | 1,480,3.02,1
225 | 0,800,3.47,3
226 | 0,800,2.9,2
227 | 1,720,3.5,3
228 | 0,620,3.58,2
229 | 0,540,3.02,4
230 | 0,480,3.43,2
231 | 1,720,3.42,2
232 | 0,580,3.29,4
233 | 0,600,3.28,3
234 | 0,380,3.38,2
235 | 0,420,2.67,3
236 | 1,800,3.53,1
237 | 0,620,3.05,2
238 | 1,660,3.49,2
239 | 0,480,4,2
240 | 0,500,2.86,4
241 | 0,700,3.45,3
242 | 0,440,2.76,2
243 | 1,520,3.81,1
244 | 1,680,2.96,3
245 | 0,620,3.22,2
246 | 0,540,3.04,1
247 | 0,800,3.91,3
248 | 0,680,3.34,2
249 | 0,440,3.17,2
250 | 0,680,3.64,3
251 | 0,640,3.73,3
252 | 0,660,3.31,4
253 | 0,620,3.21,4
254 | 1,520,4,2
255 | 1,540,3.55,4
256 | 1,740,3.52,4
257 | 0,640,3.35,3
258 | 1,520,3.3,2
259 | 1,620,3.95,3
260 | 0,520,3.51,2
261 | 0,640,3.81,2
262 | 0,680,3.11,2
263 | 0,440,3.15,2
264 | 1,520,3.19,3
265 | 1,620,3.95,3
266 | 1,520,3.9,3
267 | 0,380,3.34,3
268 | 0,560,3.24,4
269 | 1,600,3.64,3
270 | 1,680,3.46,2
271 | 0,500,2.81,3
272 | 1,640,3.95,2
273 | 0,540,3.33,3
274 | 1,680,3.67,2
275 | 0,660,3.32,1
276 | 0,520,3.12,2
277 | 1,600,2.98,2
278 | 0,460,3.77,3
279 | 1,580,3.58,1
280 | 1,680,3,4
281 | 1,660,3.14,2
282 | 0,660,3.94,2
283 | 0,360,3.27,3
284 | 0,660,3.45,4
285 | 0,520,3.1,4
286 | 1,440,3.39,2
287 | 0,600,3.31,4
288 | 1,800,3.22,1
289 | 1,660,3.7,4
290 | 0,800,3.15,4
291 | 0,420,2.26,4
292 | 1,620,3.45,2
293 | 0,800,2.78,2
294 | 0,680,3.7,2
295 | 0,800,3.97,1
296 | 0,480,2.55,1
297 | 0,520,3.25,3
298 | 0,560,3.16,1
299 | 0,460,3.07,2
300 | 0,540,3.5,2
301 | 0,720,3.4,3
302 | 0,640,3.3,2
303 | 1,660,3.6,3
304 | 1,400,3.15,2
305 | 1,680,3.98,2
306 | 0,220,2.83,3
307 | 0,580,3.46,4
308 | 1,540,3.17,1
309 | 0,580,3.51,2
310 | 0,540,3.13,2
311 | 0,440,2.98,3
312 | 0,560,4,3
313 | 0,660,3.67,2
314 | 0,660,3.77,3
315 | 1,520,3.65,4
316 | 0,540,3.46,4
317 | 1,300,2.84,2
318 | 1,340,3,2
319 | 1,780,3.63,4
320 | 1,480,3.71,4
321 | 0,540,3.28,1
322 | 0,460,3.14,3
323 | 0,460,3.58,2
324 | 0,500,3.01,4
325 | 0,420,2.69,2
326 | 0,520,2.7,3
327 | 0,680,3.9,1
328 | 0,680,3.31,2
329 | 1,560,3.48,2
330 | 0,580,3.34,2
331 | 0,500,2.93,4
332 | 0,740,4,3
333 | 0,660,3.59,3
334 | 0,420,2.96,1
335 | 0,560,3.43,3
336 | 1,460,3.64,3
337 | 1,620,3.71,1
338 | 0,520,3.15,3
339 | 0,620,3.09,4
340 | 0,540,3.2,1
341 | 1,660,3.47,3
342 | 0,500,3.23,4
343 | 1,560,2.65,3
344 | 0,500,3.95,4
345 | 0,580,3.06,2
346 | 0,520,3.35,3
347 | 0,500,3.03,3
348 | 0,600,3.35,2
349 | 0,580,3.8,2
350 | 0,400,3.36,2
351 | 0,620,2.85,2
352 | 1,780,4,2
353 | 0,620,3.43,3
354 | 1,580,3.12,3
355 | 0,700,3.52,2
356 | 1,540,3.78,2
357 | 1,760,2.81,1
358 | 0,700,3.27,2
359 | 0,720,3.31,1
360 | 1,560,3.69,3
361 | 0,720,3.94,3
362 | 1,520,4,1
363 | 1,540,3.49,1
364 | 0,680,3.14,2
365 | 0,460,3.44,2
366 | 1,560,3.36,1
367 | 0,480,2.78,3
368 | 0,460,2.93,3
369 | 0,620,3.63,3
370 | 0,580,4,1
371 | 0,800,3.89,2
372 | 1,540,3.77,2
373 | 1,680,3.76,3
374 | 1,680,2.42,1
375 | 1,620,3.37,1
376 | 0,560,3.78,2
377 | 0,560,3.49,4
378 | 0,620,3.63,2
379 | 1,800,4,2
380 | 0,640,3.12,3
381 | 0,540,2.7,2
382 | 0,700,3.65,2
383 | 1,540,3.49,2
384 | 0,540,3.51,2
385 | 0,660,4,1
386 | 1,480,2.62,2
387 | 0,420,3.02,1
388 | 1,740,3.86,2
389 | 0,580,3.36,2
390 | 0,640,3.17,2
391 | 0,640,3.51,2
392 | 1,800,3.05,2
393 | 1,660,3.88,2
394 | 1,600,3.38,3
395 | 1,620,3.75,2
396 | 1,460,3.99,3
397 | 0,620,4,2
398 | 0,560,3.04,3
399 | 0,460,2.63,2
400 | 0,700,3.65,2
401 | 0,600,3.89,3
402 | 


--------------------------------------------------------------------------------