├── .gitignore ├── .idea ├── misc.xml ├── modules.xml ├── neural-networks-and-deep-learning.iml └── vcs.xml ├── README.md ├── data └── mnist.pkl.gz ├── fig ├── backprop_magnitude_nabla.png ├── backprop_magnitude_nabla.py ├── data_1000.json ├── digits.png ├── digits_separate.png ├── false_minima.png ├── false_minima.py ├── generate_gradient.py ├── initial_gradient.json ├── misleading_gradient.png ├── misleading_gradient.py ├── misleading_gradient_contours.png ├── misleading_gradient_contours.py ├── mnist.py ├── mnist_100_digits.png ├── mnist_2_and_1.png ├── mnist_complete_zero.png ├── mnist_first_digit.png ├── mnist_other_features.png ├── mnist_really_bad_images.png ├── mnist_top_left_feature.png ├── more_data.json ├── more_data.png ├── more_data.py ├── more_data_5.png ├── more_data_comparison.png ├── more_data_log.png ├── more_data_rotated_5.png ├── more_data_svm.json ├── multiple_eta.json ├── multiple_eta.png ├── multiple_eta.py ├── norms_during_training_2_layers.json ├── norms_during_training_3_layers.json ├── norms_during_training_4_layers.json ├── overfitting.json ├── overfitting.py ├── overfitting1.png ├── overfitting2.png ├── overfitting3.png ├── overfitting4.png ├── overfitting_full.json ├── overfitting_full.png ├── pca_hard_data.png ├── pca_hard_data_fit.png ├── pca_limitations.py ├── regularized.json ├── regularized1.png ├── regularized2.png ├── regularized_full.json ├── regularized_full.png ├── replaced_by_d3 │ ├── README.md │ ├── relu.png │ ├── relu.py │ ├── sigmoid.png │ ├── sigmoid.py │ ├── step.png │ ├── step.py │ ├── tanh.png │ └── tanh.py ├── serialize_images_to_json.py ├── test.png ├── training_speed_2_layers.png ├── training_speed_3_layers.png ├── training_speed_4_layers.png ├── valley.png ├── valley.py ├── valley2.png ├── valley2.py ├── weight_initialization.py ├── weight_initialization_100.json ├── weight_initialization_100.png ├── weight_initialization_30.json └── weight_initialization_30.png ├── requirements.txt └── src ├── conv.py ├── expand_mnist.py ├── mnist_average_darkness.py ├── mnist_loader.py ├── mnist_svm.py ├── network.py ├── network2.py ├── network3.py ├── old ├── blog │ ├── __init__.py │ └── common_knowledge.py ├── cost_vs_iterations.png ├── cost_vs_iterations_trapped.png ├── deep_autoencoder.py ├── deep_learning.py ├── gradient_descent_hack.py ├── mnist_100_30_deep_autoencoder.png ├── mnist_100_unit_autoencoder.png ├── mnist_10_unit_autoencoder.png ├── mnist_30_component_pca.png ├── mnist_30_unit_autoencoder.png ├── mnist_autoencoder.py ├── mnist_pca.py └── perceptron_learning.py └── test_mnist.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.org 3 | *.pem 4 | *.pkl 5 | *.pyc 6 | .DS_Store 7 | loc.py 8 | src/ec2 9 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/neural-networks-and-deep-learning.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code samples for "Neural Networks and Deep Learning" (Python 3.x version) 2 | 3 | This repository contains code samples for my (forthcoming) book on 4 | "Neural Networks and Deep Learning". 5 | 6 | As the code is written to accompany the book, I don't intend to add 7 | new features. However, bug reports are welcome, and you should feel 8 | free to fork and modify the code. 9 | 10 | ## Changes 11 | This is the code for online book "Neural Networks and Deep Learning". But it is modified for Python 3.x. 12 | 13 | If you are interested in that book but only prefer to python 3, you can use this version. 14 | 15 | My homepage : http://www.liuxiao.org 16 | 17 | ## License 18 | 19 | MIT License 20 | 21 | Copyright (c) 2012-2015 Michael Nielsen 22 | 23 | Permission is hereby granted, free of charge, to any person obtaining 24 | a copy of this software and associated documentation files (the 25 | "Software"), to deal in the Software without restriction, including 26 | without limitation the rights to use, copy, modify, merge, publish, 27 | distribute, sublicense, and/or sell copies of the Software, and to 28 | permit persons to whom the Software is furnished to do so, subject to 29 | the following conditions: 30 | 31 | The above copyright notice and this permission notice shall be 32 | included in all copies or substantial portions of the Software. 33 | 34 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 35 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 36 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 37 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 38 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 39 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 40 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 41 | -------------------------------------------------------------------------------- /data/mnist.pkl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/data/mnist.pkl.gz -------------------------------------------------------------------------------- /fig/backprop_magnitude_nabla.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/backprop_magnitude_nabla.png -------------------------------------------------------------------------------- /fig/backprop_magnitude_nabla.py: -------------------------------------------------------------------------------- 1 | """ 2 | backprop_magnitude_nabla 3 | ~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | Using backprop2 I constructed a 784-30-30-30-30-30-10 network to classify 6 | MNIST data. I ran ten mini-batches of size 100, with eta = 0.01 and 7 | lambda = 0.05, using: 8 | 9 | net.SGD(otd[:1000], 1, 100, 0.01, 0.05, 10 | 11 | I obtained the following norms for the (unregularized) nabla_w for the 12 | respective mini-batches: 13 | 14 | [0.90845722175923671, 2.8852730656073566, 10.696793986223632, 37.75701921183488, 157.7365422527995, 304.43990075227839] 15 | [0.22493835119537842, 0.6555126517964851, 2.6036801277234076, 11.408825365731225, 46.882319190445472, 70.499637502698221] 16 | [0.11935180022357521, 0.19756069137133489, 0.8152794148335869, 3.4590802543293977, 15.470507965493903, 31.032396017142556] 17 | [0.15130005837653659, 0.39687135985664701, 1.4810006139254532, 4.392519005642268, 16.831939776937311, 34.082104455938733] 18 | [0.11594085276308999, 0.17177668061395848, 0.72204558746599512, 3.05062409378366, 14.133001132214286, 29.776204839994385] 19 | [0.10790389807606221, 0.20707152756018626, 0.96348134037828603, 3.9043824079499561, 15.986873430586924, 39.195258080490895] 20 | [0.088613291101645356, 0.129173436407863, 0.4242933114455002, 1.6154682713449411, 7.5451567587160069, 20.180545544006566] 21 | [0.086175380639289575, 0.12571016850457151, 0.44231149185805047, 1.8435833504677326, 7.61973813981073, 19.474539356281781] 22 | [0.095372080184163904, 0.15854489503205446, 0.70244235144444678, 2.6294803575724157, 10.427062019753425, 24.309420272033819] 23 | [0.096453131000155692, 0.13574642196947601, 0.53551377709415471, 2.0247466793066895, 9.4503978546018068, 21.73772148470092] 24 | 25 | Note that results are listed in order of layer. They clearly show how 26 | the magnitude of nabla_w decreases as we go back through layers. 27 | 28 | In this program I take min-batches 7, 8, 9 as representative and plot 29 | them. I omit the results from the first and final layers since they 30 | correspond to 784 input neurons and 10 output neurons, not 30 as in 31 | the other layers, making it difficult to compare results. 32 | 33 | Note that I haven't attempted to preserve the whole workflow here. It 34 | involved some minor hacking around with backprop2, which messed up 35 | that code. That's why I've simply put the results in by hand below. 36 | """ 37 | 38 | # Third-party libraries 39 | import matplotlib.pyplot as plt 40 | 41 | nw1 = [0.129173436407863, 0.4242933114455002, 42 | 1.6154682713449411, 7.5451567587160069] 43 | nw2 = [0.12571016850457151, 0.44231149185805047, 44 | 1.8435833504677326, 7.61973813981073] 45 | nw3 = [0.15854489503205446, 0.70244235144444678, 46 | 2.6294803575724157, 10.427062019753425] 47 | plt.plot(range(1, 5), nw1, "ro-", range(1, 5), nw2, "go-", 48 | range(1, 5), nw3, "bo-") 49 | plt.xlabel('Layer $l$') 50 | plt.ylabel(r"$\Vert\nabla C^l_w\Vert$") 51 | plt.xticks([1, 2, 3, 4]) 52 | plt.show() 53 | -------------------------------------------------------------------------------- /fig/digits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/digits.png -------------------------------------------------------------------------------- /fig/digits_separate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/digits_separate.png -------------------------------------------------------------------------------- /fig/false_minima.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/false_minima.png -------------------------------------------------------------------------------- /fig/false_minima.py: -------------------------------------------------------------------------------- 1 | """ 2 | false_minimum 3 | ~~~~~~~~~~~~~ 4 | 5 | Plots a function of two variables with many false minima.""" 6 | 7 | #### Libraries 8 | # Third party libraries 9 | from matplotlib.ticker import LinearLocator 10 | # Note that axes3d is not explicitly used in the code, but is needed 11 | # to register the 3d plot type correctly 12 | from mpl_toolkits.mplot3d import axes3d 13 | import matplotlib.pyplot as plt 14 | import numpy 15 | 16 | fig = plt.figure() 17 | ax = fig.gca(projection='3d') 18 | X = numpy.arange(-5, 5, 0.1) 19 | Y = numpy.arange(-5, 5, 0.1) 20 | X, Y = numpy.meshgrid(X, Y) 21 | Z = numpy.sin(X)*numpy.sin(Y)+0.2*X 22 | 23 | colortuple = ('w', 'b') 24 | colors = numpy.empty(X.shape, dtype=str) 25 | for x in xrange(len(X)): 26 | for y in xrange(len(Y)): 27 | colors[x, y] = colortuple[(x + y) % 2] 28 | 29 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors, 30 | linewidth=0) 31 | 32 | ax.set_xlim3d(-5, 5) 33 | ax.set_ylim3d(-5, 5) 34 | ax.set_zlim3d(-2, 2) 35 | ax.w_xaxis.set_major_locator(LinearLocator(3)) 36 | ax.w_yaxis.set_major_locator(LinearLocator(3)) 37 | ax.w_zaxis.set_major_locator(LinearLocator(3)) 38 | 39 | plt.show() 40 | 41 | -------------------------------------------------------------------------------- /fig/generate_gradient.py: -------------------------------------------------------------------------------- 1 | """generate_gradient.py 2 | ~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | Use network2 to figure out the average starting values of the gradient 5 | error terms \delta^l_j = \partial C / \partial z^l_j = \partial C / 6 | \partial b^l_j. 7 | 8 | """ 9 | 10 | #### Libraries 11 | # Standard library 12 | import json 13 | import math 14 | import random 15 | import shutil 16 | import sys 17 | sys.path.append("../src/") 18 | 19 | # My library 20 | import mnist_loader 21 | import network2 22 | 23 | # Third-party libraries 24 | import matplotlib.pyplot as plt 25 | import numpy as np 26 | 27 | def main(): 28 | # Load the data 29 | full_td, _, _ = mnist_loader.load_data_wrapper() 30 | td = full_td[:1000] # Just use the first 1000 items of training data 31 | epochs = 500 # Number of epochs to train for 32 | 33 | print "\nTwo hidden layers:" 34 | net = network2.Network([784, 30, 30, 10]) 35 | initial_norms(td, net) 36 | abbreviated_gradient = [ 37 | ag[:6] for ag in get_average_gradient(net, td)[:-1]] 38 | print "Saving the averaged gradient for the top six neurons in each "+\ 39 | "layer.\nWARNING: This will affect the look of the book, so be "+\ 40 | "sure to check the\nrelevant material (early chapter 5)." 41 | f = open("initial_gradient.json", "w") 42 | json.dump(abbreviated_gradient, f) 43 | f.close() 44 | shutil.copy("initial_gradient.json", "../../js/initial_gradient.json") 45 | training(td, net, epochs, "norms_during_training_2_layers.json") 46 | plot_training( 47 | epochs, "norms_during_training_2_layers.json", 2) 48 | 49 | print "\nThree hidden layers:" 50 | net = network2.Network([784, 30, 30, 30, 10]) 51 | initial_norms(td, net) 52 | training(td, net, epochs, "norms_during_training_3_layers.json") 53 | plot_training( 54 | epochs, "norms_during_training_3_layers.json", 3) 55 | 56 | print "\nFour hidden layers:" 57 | net = network2.Network([784, 30, 30, 30, 30, 10]) 58 | initial_norms(td, net) 59 | training(td, net, epochs, 60 | "norms_during_training_4_layers.json") 61 | plot_training( 62 | epochs, "norms_during_training_4_layers.json", 4) 63 | 64 | def initial_norms(training_data, net): 65 | average_gradient = get_average_gradient(net, training_data) 66 | norms = [list_norm(avg) for avg in average_gradient[:-1]] 67 | print "Average gradient for the hidden layers: "+str(norms) 68 | 69 | def training(training_data, net, epochs, filename): 70 | norms = [] 71 | for j in range(epochs): 72 | average_gradient = get_average_gradient(net, training_data) 73 | norms.append([list_norm(avg) for avg in average_gradient[:-1]]) 74 | print "Epoch: %s" % j 75 | net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0) 76 | f = open(filename, "w") 77 | json.dump(norms, f) 78 | f.close() 79 | 80 | def plot_training(epochs, filename, num_layers): 81 | f = open(filename, "r") 82 | norms = json.load(f) 83 | f.close() 84 | fig = plt.figure() 85 | ax = fig.add_subplot(111) 86 | colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"] 87 | for j in range(num_layers): 88 | ax.plot(np.arange(epochs), 89 | [n[j] for n in norms], 90 | color=colors[j], 91 | label="Hidden layer %s" % (j+1,)) 92 | ax.set_xlim([0, epochs]) 93 | ax.grid(True) 94 | ax.set_xlabel('Number of epochs of training') 95 | ax.set_title('Speed of learning: %s hidden layers' % num_layers) 96 | ax.set_yscale('log') 97 | plt.legend(loc="upper right") 98 | fig_filename = "training_speed_%s_layers.png" % num_layers 99 | plt.savefig(fig_filename) 100 | shutil.copy(fig_filename, "../../images/"+fig_filename) 101 | plt.show() 102 | 103 | def get_average_gradient(net, training_data): 104 | nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data] 105 | gradient = list_sum(nabla_b_results) 106 | return [(np.reshape(g, len(g))/len(training_data)).tolist() 107 | for g in gradient] 108 | 109 | def zip_sum(a, b): 110 | return [x+y for (x, y) in zip(a, b)] 111 | 112 | def list_sum(l): 113 | return reduce(zip_sum, l) 114 | 115 | def list_norm(l): 116 | return math.sqrt(sum([x*x for x in l])) 117 | 118 | if __name__ == "__main__": 119 | main() 120 | -------------------------------------------------------------------------------- /fig/initial_gradient.json: -------------------------------------------------------------------------------- 1 | [[-0.003970677333144113, -0.0031684316985881185, 0.008103235909196014, 0.012598010584130365, -0.026465907331998335, 0.0017583319323150341], [0.04152906589960523, 0.044025552524932406, -0.009669682279354514, 0.046736871369353235, 0.03877302528270452, 0.012336459551975156]] -------------------------------------------------------------------------------- /fig/misleading_gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/misleading_gradient.png -------------------------------------------------------------------------------- /fig/misleading_gradient.py: -------------------------------------------------------------------------------- 1 | """ 2 | misleading_gradient 3 | ~~~~~~~~~~~~~~~~~~~ 4 | 5 | Plots a function which misleads the gradient descent algorithm.""" 6 | 7 | #### Libraries 8 | # Third party libraries 9 | from matplotlib.ticker import LinearLocator 10 | # Note that axes3d is not explicitly used in the code, but is needed 11 | # to register the 3d plot type correctly 12 | from mpl_toolkits.mplot3d import axes3d 13 | import matplotlib.pyplot as plt 14 | import numpy 15 | 16 | fig = plt.figure() 17 | ax = fig.gca(projection='3d') 18 | X = numpy.arange(-1, 1, 0.025) 19 | Y = numpy.arange(-1, 1, 0.025) 20 | X, Y = numpy.meshgrid(X, Y) 21 | Z = X**2 + 10*Y**2 22 | 23 | colortuple = ('w', 'b') 24 | colors = numpy.empty(X.shape, dtype=str) 25 | for x in xrange(len(X)): 26 | for y in xrange(len(Y)): 27 | colors[x, y] = colortuple[(x + y) % 2] 28 | 29 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors, 30 | linewidth=0) 31 | 32 | ax.set_xlim3d(-1, 1) 33 | ax.set_ylim3d(-1, 1) 34 | ax.set_zlim3d(0, 12) 35 | ax.w_xaxis.set_major_locator(LinearLocator(3)) 36 | ax.w_yaxis.set_major_locator(LinearLocator(3)) 37 | ax.w_zaxis.set_major_locator(LinearLocator(3)) 38 | ax.text(0.05, -1.8, 0, "$w_1$", fontsize=20) 39 | ax.text(1.5, -0.25, 0, "$w_2$", fontsize=20) 40 | ax.text(1.79, 0, 9.62, "$C$", fontsize=20) 41 | 42 | plt.show() 43 | 44 | -------------------------------------------------------------------------------- /fig/misleading_gradient_contours.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/misleading_gradient_contours.png -------------------------------------------------------------------------------- /fig/misleading_gradient_contours.py: -------------------------------------------------------------------------------- 1 | """ 2 | misleading_gradient_contours 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | Plots the contours of the function from misleading_gradient.py""" 6 | 7 | #### Libraries 8 | # Third party libraries 9 | import matplotlib.pyplot as plt 10 | import numpy 11 | 12 | X = numpy.arange(-1, 1, 0.02) 13 | Y = numpy.arange(-1, 1, 0.02) 14 | X, Y = numpy.meshgrid(X, Y) 15 | Z = X**2 + 10*Y**2 16 | 17 | plt.figure() 18 | CS = plt.contour(X, Y, Z, levels=[0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]) 19 | plt.xlabel("$w_1$", fontsize=16) 20 | plt.ylabel("$w_2$", fontsize=16) 21 | plt.show() 22 | -------------------------------------------------------------------------------- /fig/mnist.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist 3 | ~~~~~ 4 | 5 | Draws images based on the MNIST data.""" 6 | 7 | #### Libraries 8 | # Standard library 9 | import cPickle 10 | import sys 11 | 12 | # My library 13 | sys.path.append('../src/') 14 | import mnist_loader 15 | 16 | # Third-party libraries 17 | import matplotlib 18 | import matplotlib.pyplot as plt 19 | import numpy as np 20 | 21 | def main(): 22 | training_set, validation_set, test_set = mnist_loader.load_data() 23 | images = get_images(training_set) 24 | plot_rotated_image(images[0]) 25 | 26 | #### Plotting 27 | def plot_images_together(images): 28 | """ Plot a single image containing all six MNIST images, one after 29 | the other. Note that we crop the sides of the images so that they 30 | appear reasonably close together.""" 31 | fig = plt.figure() 32 | images = [image[:, 3:25] for image in images] 33 | image = np.concatenate(images, axis=1) 34 | ax = fig.add_subplot(1, 1, 1) 35 | ax.matshow(image, cmap = matplotlib.cm.binary) 36 | plt.xticks(np.array([])) 37 | plt.yticks(np.array([])) 38 | plt.show() 39 | 40 | def plot_10_by_10_images(images): 41 | """ Plot 100 MNIST images in a 10 by 10 table. Note that we crop 42 | the images so that they appear reasonably close together. The 43 | image is post-processed to give the appearance of being continued.""" 44 | fig = plt.figure() 45 | images = [image[3:25, 3:25] for image in images] 46 | #image = np.concatenate(images, axis=1) 47 | for x in range(10): 48 | for y in range(10): 49 | ax = fig.add_subplot(10, 10, 10*y+x) 50 | ax.matshow(images[10*y+x], cmap = matplotlib.cm.binary) 51 | plt.xticks(np.array([])) 52 | plt.yticks(np.array([])) 53 | plt.show() 54 | 55 | def plot_images_separately(images): 56 | "Plot the six MNIST images separately." 57 | fig = plt.figure() 58 | for j in xrange(1, 7): 59 | ax = fig.add_subplot(1, 6, j) 60 | ax.matshow(images[j-1], cmap = matplotlib.cm.binary) 61 | plt.xticks(np.array([])) 62 | plt.yticks(np.array([])) 63 | plt.show() 64 | 65 | def plot_mnist_digit(image): 66 | """ Plot a single MNIST image.""" 67 | fig = plt.figure() 68 | ax = fig.add_subplot(1, 1, 1) 69 | ax.matshow(image, cmap = matplotlib.cm.binary) 70 | plt.xticks(np.array([])) 71 | plt.yticks(np.array([])) 72 | plt.show() 73 | 74 | def plot_2_and_1(images): 75 | "Plot a 2 and a 1 image from the MNIST set." 76 | fig = plt.figure() 77 | ax = fig.add_subplot(1, 2, 1) 78 | ax.matshow(images[5], cmap = matplotlib.cm.binary) 79 | plt.xticks(np.array([])) 80 | plt.yticks(np.array([])) 81 | ax = fig.add_subplot(1, 2, 2) 82 | ax.matshow(images[3], cmap = matplotlib.cm.binary) 83 | plt.xticks(np.array([])) 84 | plt.yticks(np.array([])) 85 | plt.show() 86 | 87 | def plot_top_left(image): 88 | "Plot the top left of ``image``." 89 | image[14:,:] = np.zeros((14,28)) 90 | image[:,14:] = np.zeros((28,14)) 91 | fig = plt.figure() 92 | ax = fig.add_subplot(1, 1, 1) 93 | ax.matshow(image, cmap = matplotlib.cm.binary) 94 | plt.xticks(np.array([])) 95 | plt.yticks(np.array([])) 96 | plt.show() 97 | 98 | def plot_bad_images(images): 99 | """This takes a list of images misclassified by a pretty good 100 | neural network --- one achieving over 93 percent accuracy --- and 101 | turns them into a figure.""" 102 | bad_image_indices = [8, 18, 33, 92, 119, 124, 149, 151, 193, 233, 241, 247, 259, 300, 313, 321, 324, 341, 349, 352, 359, 362, 381, 412, 435, 445, 449, 478, 479, 495, 502, 511, 528, 531, 547, 571, 578, 582, 597, 610, 619, 628, 629, 659, 667, 691, 707, 717, 726, 740, 791, 810, 844, 846, 898, 938, 939, 947, 956, 959, 965, 982, 1014, 1033, 1039, 1044, 1050, 1055, 1107, 1112, 1124, 1147, 1181, 1191, 1192, 1198, 1202, 1204, 1206, 1224, 1226, 1232, 1242, 1243, 1247, 1256, 1260, 1263, 1283, 1289, 1299, 1310, 1319, 1326, 1328, 1357, 1378, 1393, 1413, 1422, 1435, 1467, 1469, 1494, 1500, 1522, 1523, 1525, 1527, 1530, 1549, 1553, 1609, 1611, 1634, 1641, 1676, 1678, 1681, 1709, 1717, 1722, 1730, 1732, 1737, 1741, 1754, 1759, 1772, 1773, 1790, 1808, 1813, 1823, 1843, 1850, 1857, 1868, 1878, 1880, 1883, 1901, 1913, 1930, 1938, 1940, 1952, 1969, 1970, 1984, 2001, 2009, 2016, 2018, 2035, 2040, 2043, 2044, 2053, 2063, 2098, 2105, 2109, 2118, 2129, 2130, 2135, 2148, 2161, 2168, 2174, 2182, 2185, 2186, 2189, 2224, 2229, 2237, 2266, 2272, 2293, 2299, 2319, 2325, 2326, 2334, 2369, 2371, 2380, 2381, 2387, 2393, 2395, 2406, 2408, 2414, 2422, 2433, 2450, 2488, 2514, 2526, 2548, 2574, 2589, 2598, 2607, 2610, 2631, 2648, 2654, 2695, 2713, 2720, 2721, 2730, 2770, 2771, 2780, 2863, 2866, 2896, 2907, 2925, 2927, 2939, 2995, 3005, 3023, 3030, 3060, 3073, 3102, 3108, 3110, 3114, 3115, 3117, 3130, 3132, 3157, 3160, 3167, 3183, 3189, 3206, 3240, 3254, 3260, 3280, 3329, 3330, 3333, 3383, 3384, 3475, 3490, 3503, 3520, 3525, 3559, 3567, 3573, 3597, 3598, 3604, 3629, 3664, 3702, 3716, 3718, 3725, 3726, 3727, 3751, 3752, 3757, 3763, 3766, 3767, 3769, 3776, 3780, 3798, 3806, 3808, 3811, 3817, 3821, 3838, 3848, 3853, 3855, 3869, 3876, 3902, 3906, 3926, 3941, 3943, 3951, 3954, 3962, 3976, 3985, 3995, 4000, 4002, 4007, 4017, 4018, 4065, 4075, 4078, 4093, 4102, 4139, 4140, 4152, 4154, 4163, 4165, 4176, 4199, 4201, 4205, 4207, 4212, 4224, 4238, 4248, 4256, 4284, 4289, 4297, 4300, 4306, 4344, 4355, 4356, 4359, 4360, 4369, 4405, 4425, 4433, 4435, 4449, 4487, 4497, 4498, 4500, 4521, 4536, 4548, 4563, 4571, 4575, 4601, 4615, 4620, 4633, 4639, 4662, 4690, 4722, 4731, 4735, 4737, 4739, 4740, 4761, 4798, 4807, 4814, 4823, 4833, 4837, 4874, 4876, 4879, 4880, 4886, 4890, 4910, 4950, 4951, 4952, 4956, 4963, 4966, 4968, 4978, 4990, 5001, 5020, 5054, 5067, 5068, 5078, 5135, 5140, 5143, 5176, 5183, 5201, 5210, 5331, 5409, 5457, 5495, 5600, 5601, 5617, 5623, 5634, 5642, 5677, 5678, 5718, 5734, 5735, 5749, 5752, 5771, 5787, 5835, 5842, 5845, 5858, 5887, 5888, 5891, 5906, 5913, 5936, 5937, 5945, 5955, 5957, 5972, 5973, 5985, 5987, 5997, 6035, 6042, 6043, 6045, 6053, 6059, 6065, 6071, 6081, 6091, 6112, 6124, 6157, 6166, 6168, 6172, 6173, 6347, 6370, 6386, 6390, 6391, 6392, 6421, 6426, 6428, 6505, 6542, 6555, 6556, 6560, 6564, 6568, 6571, 6572, 6597, 6598, 6603, 6608, 6625, 6651, 6694, 6706, 6721, 6725, 6740, 6746, 6768, 6783, 6785, 6796, 6817, 6827, 6847, 6870, 6872, 6926, 6945, 7002, 7035, 7043, 7089, 7121, 7130, 7198, 7216, 7233, 7248, 7265, 7426, 7432, 7434, 7494, 7498, 7691, 7777, 7779, 7797, 7800, 7809, 7812, 7821, 7849, 7876, 7886, 7897, 7902, 7905, 7917, 7921, 7945, 7999, 8020, 8059, 8081, 8094, 8095, 8115, 8246, 8256, 8262, 8272, 8273, 8278, 8279, 8293, 8322, 8339, 8353, 8408, 8453, 8456, 8502, 8520, 8522, 8607, 9009, 9010, 9013, 9015, 9019, 9022, 9024, 9026, 9036, 9045, 9046, 9128, 9214, 9280, 9316, 9342, 9382, 9433, 9446, 9506, 9540, 9544, 9587, 9614, 9634, 9642, 9645, 9700, 9716, 9719, 9729, 9732, 9738, 9740, 9741, 9742, 9744, 9745, 9749, 9752, 9768, 9770, 9777, 9779, 9792, 9808, 9831, 9839, 9856, 9858, 9867, 9879, 9883, 9888, 9890, 9893, 9905, 9944, 9970, 9982] 103 | n = len(bad_image_indices) 104 | bad_images = [images[j] for j in bad_image_indices] 105 | fig = plt.figure(figsize=(10, 15)) 106 | for j in xrange(1, n+1): 107 | ax = fig.add_subplot(25, 125, j) 108 | ax.matshow(bad_images[j-1], cmap = matplotlib.cm.binary) 109 | ax.set_title(str(bad_image_indices[j-1])) 110 | plt.xticks(np.array([])) 111 | plt.yticks(np.array([])) 112 | plt.subplots_adjust(hspace = 1.2) 113 | plt.show() 114 | 115 | def plot_really_bad_images(images): 116 | """This takes a list of the worst images from plot_bad_images and 117 | turns them into a figure.""" 118 | really_bad_image_indices = [ 119 | 324, 582, 659, 726, 846, 956, 1124, 1393, 120 | 1773, 1868, 2018, 2109, 2654, 4199, 4201, 4620, 5457, 5642] 121 | n = len(really_bad_image_indices) 122 | really_bad_images = [images[j] for j in really_bad_image_indices] 123 | fig = plt.figure(figsize=(10, 2)) 124 | for j in xrange(1, n+1): 125 | ax = fig.add_subplot(2, 9, j) 126 | ax.matshow(really_bad_images[j-1], cmap = matplotlib.cm.binary) 127 | #ax.set_title(str(really_bad_image_indices[j-1])) 128 | plt.xticks(np.array([])) 129 | plt.yticks(np.array([])) 130 | plt.show() 131 | 132 | def plot_features(image): 133 | "Plot the top right, bottom left, and bottom right of ``image``." 134 | image_1, image_2, image_3 = np.copy(image), np.copy(image), np.copy(image) 135 | image_1[:,:14] = np.zeros((28,14)) 136 | image_1[14:,:] = np.zeros((14,28)) 137 | image_2[:,14:] = np.zeros((28,14)) 138 | image_2[:14,:] = np.zeros((14,28)) 139 | image_3[:14,:] = np.zeros((14,28)) 140 | image_3[:,:14] = np.zeros((28,14)) 141 | fig = plt.figure() 142 | ax = fig.add_subplot(1, 3, 1) 143 | ax.matshow(image_1, cmap = matplotlib.cm.binary) 144 | plt.xticks(np.array([])) 145 | plt.yticks(np.array([])) 146 | ax = fig.add_subplot(1, 3, 2) 147 | ax.matshow(image_2, cmap = matplotlib.cm.binary) 148 | plt.xticks(np.array([])) 149 | plt.yticks(np.array([])) 150 | ax = fig.add_subplot(1, 3, 3) 151 | ax.matshow(image_3, cmap = matplotlib.cm.binary) 152 | plt.xticks(np.array([])) 153 | plt.yticks(np.array([])) 154 | plt.show() 155 | 156 | def plot_rotated_image(image): 157 | """ Plot an MNIST digit and a version rotated by 10 degrees.""" 158 | # Do the initial plot 159 | fig = plt.figure() 160 | ax = fig.add_subplot(1, 1, 1) 161 | ax.matshow(image, cmap = matplotlib.cm.binary) 162 | plt.xticks(np.array([])) 163 | plt.yticks(np.array([])) 164 | plt.show() 165 | # Set up the rotated image. There are fast matrix techniques 166 | # for doing this, but we'll do a pedestrian approach 167 | rot_image = np.zeros((28,28)) 168 | theta = 15*np.pi/180 # 15 degrees 169 | def to_xy(j, k): 170 | # Converts from matrix indices to x, y co-ords, using the 171 | # 13, 14 matrix entry as the origin 172 | return (k-13, -j+14) # x range: -13..14, y range: -13..14 173 | def to_jk(x, y): 174 | # Converts from x, y co-ords to matrix indices 175 | return (-y+14, x+13) 176 | def image_value(image, x, y): 177 | # returns the value of the image at co-ordinate x, y 178 | # (Note that this would be better done as a closure, if Pythong 179 | # supported closures, so that image didn't need to be passed) 180 | j, k = to_jk(x, y) 181 | return image[j, k] 182 | # Element by element, figure out what should be in the rotated 183 | # image. We simply take each matrix entry, figure out the 184 | # corresponding x, y co-ordinates, rotate backward, and then 185 | # average the nearby matrix elements. It's not perfect, and it's 186 | # not fast, but it works okay. 187 | for j in range(28): 188 | for k in range(28): 189 | x, y = to_xy(j, k) 190 | # rotate by -theta 191 | x1 = np.cos(theta)*x + np.sin(theta)*y 192 | y1 = -np.sin(theta)*x + np.cos(theta)*y 193 | # Nearest integer x entries are x2 and x2+1. delta_x 194 | # measures how to interpolate 195 | x2 = np.floor(x1) 196 | delta_x = x1-x2 197 | # Similarly for y 198 | y2 = np.floor(y1) 199 | delta_y = y1-y2 200 | # Check if we're out of bounds, and if so continue to next entry 201 | # This will miss a boundary row and layer, but that's okay, 202 | # MNIST digits usually don't go that near the boundary. 203 | if x2 < -13 or x2 > 13 or y2 < -13 or y2 > 13: continue 204 | # If we're in bounds, average the nearby entries. 205 | value \ 206 | = (1-delta_x)*(1-delta_y)*image_value(image, x2, y2)+\ 207 | (1-delta_x)*delta_y*image_value(image, x2, y2+1)+\ 208 | delta_x*(1-delta_y)*image_value(image, x2+1, y2)+\ 209 | delta_x*delta_y*image_value(image, x2+1, y2+1) 210 | # Rescale the value by a hand-set fudge factor. This 211 | # seems to be necessary because the averaging doesn't 212 | # quite work right. The fudge-factor should probably be 213 | # theta-dependent, but I've set it by hand. 214 | rot_image[j, k] = 1.3*value 215 | plot_mnist_digit(rot_image) 216 | 217 | #### Miscellanea 218 | def load_data(): 219 | """ Return the MNIST data as a tuple containing the training data, 220 | the validation data, and the test data.""" 221 | f = open('../data/mnist.pkl', 'rb') 222 | training_set, validation_set, test_set = cPickle.load(f) 223 | f.close() 224 | return (training_set, validation_set, test_set) 225 | 226 | def get_images(training_set): 227 | """ Return a list containing the images from the MNIST data 228 | set. Each image is represented as a 2-d numpy array.""" 229 | flattened_images = training_set[0] 230 | return [np.reshape(f, (-1, 28)) for f in flattened_images] 231 | 232 | #### Main 233 | if __name__ == "__main__": 234 | main() 235 | -------------------------------------------------------------------------------- /fig/mnist_100_digits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_100_digits.png -------------------------------------------------------------------------------- /fig/mnist_2_and_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_2_and_1.png -------------------------------------------------------------------------------- /fig/mnist_complete_zero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_complete_zero.png -------------------------------------------------------------------------------- /fig/mnist_first_digit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_first_digit.png -------------------------------------------------------------------------------- /fig/mnist_other_features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_other_features.png -------------------------------------------------------------------------------- /fig/mnist_really_bad_images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_really_bad_images.png -------------------------------------------------------------------------------- /fig/mnist_top_left_feature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_top_left_feature.png -------------------------------------------------------------------------------- /fig/more_data.json: -------------------------------------------------------------------------------- 1 | [69.09, 76.37, 85.29, 88.85, 91.27, 93.24, 94.89, 95.85, 95.97] -------------------------------------------------------------------------------- /fig/more_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data.png -------------------------------------------------------------------------------- /fig/more_data.py: -------------------------------------------------------------------------------- 1 | """more_data 2 | ~~~~~~~~~~~~ 3 | 4 | Plot graphs to illustrate the performance of MNIST when different size 5 | training sets are used. 6 | 7 | """ 8 | 9 | # Standard library 10 | import json 11 | import random 12 | import sys 13 | 14 | # My library 15 | sys.path.append('../src/') 16 | import mnist_loader 17 | import network2 18 | 19 | # Third-party libraries 20 | import matplotlib.pyplot as plt 21 | import numpy as np 22 | from sklearn import svm 23 | 24 | # The sizes to use for the different training sets 25 | SIZES = [100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000] 26 | 27 | def main(): 28 | run_networks() 29 | run_svms() 30 | make_plots() 31 | 32 | def run_networks(): 33 | # Make results more easily reproducible 34 | random.seed(12345678) 35 | np.random.seed(12345678) 36 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper() 37 | net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost()) 38 | accuracies = [] 39 | for size in SIZES: 40 | print "\n\nTraining network with data set size %s" % size 41 | net.large_weight_initializer() 42 | num_epochs = 1500000 / size 43 | net.SGD(training_data[:size], num_epochs, 10, 0.5, lmbda = size*0.0001) 44 | accuracy = net.accuracy(validation_data) / 100.0 45 | print "Accuracy was %s percent" % accuracy 46 | accuracies.append(accuracy) 47 | f = open("more_data.json", "w") 48 | json.dump(accuracies, f) 49 | f.close() 50 | 51 | def run_svms(): 52 | svm_training_data, svm_validation_data, svm_test_data \ 53 | = mnist_loader.load_data() 54 | accuracies = [] 55 | for size in SIZES: 56 | print "\n\nTraining SVM with data set size %s" % size 57 | clf = svm.SVC() 58 | clf.fit(svm_training_data[0][:size], svm_training_data[1][:size]) 59 | predictions = [int(a) for a in clf.predict(svm_validation_data[0])] 60 | accuracy = sum(int(a == y) for a, y in 61 | zip(predictions, svm_validation_data[1])) / 100.0 62 | print "Accuracy was %s percent" % accuracy 63 | accuracies.append(accuracy) 64 | f = open("more_data_svm.json", "w") 65 | json.dump(accuracies, f) 66 | f.close() 67 | 68 | def make_plots(): 69 | f = open("more_data.json", "r") 70 | accuracies = json.load(f) 71 | f.close() 72 | f = open("more_data_svm.json", "r") 73 | svm_accuracies = json.load(f) 74 | f.close() 75 | make_linear_plot(accuracies) 76 | make_log_plot(accuracies) 77 | make_combined_plot(accuracies, svm_accuracies) 78 | 79 | def make_linear_plot(accuracies): 80 | fig = plt.figure() 81 | ax = fig.add_subplot(111) 82 | ax.plot(SIZES, accuracies, color='#2A6EA6') 83 | ax.plot(SIZES, accuracies, "o", color='#FFA933') 84 | ax.set_xlim(0, 50000) 85 | ax.set_ylim(60, 100) 86 | ax.grid(True) 87 | ax.set_xlabel('Training set size') 88 | ax.set_title('Accuracy (%) on the validation data') 89 | plt.show() 90 | 91 | def make_log_plot(accuracies): 92 | fig = plt.figure() 93 | ax = fig.add_subplot(111) 94 | ax.plot(SIZES, accuracies, color='#2A6EA6') 95 | ax.plot(SIZES, accuracies, "o", color='#FFA933') 96 | ax.set_xlim(100, 50000) 97 | ax.set_ylim(60, 100) 98 | ax.set_xscale('log') 99 | ax.grid(True) 100 | ax.set_xlabel('Training set size') 101 | ax.set_title('Accuracy (%) on the validation data') 102 | plt.show() 103 | 104 | def make_combined_plot(accuracies, svm_accuracies): 105 | fig = plt.figure() 106 | ax = fig.add_subplot(111) 107 | ax.plot(SIZES, accuracies, color='#2A6EA6') 108 | ax.plot(SIZES, accuracies, "o", color='#2A6EA6', 109 | label='Neural network accuracy (%)') 110 | ax.plot(SIZES, svm_accuracies, color='#FFA933') 111 | ax.plot(SIZES, svm_accuracies, "o", color='#FFA933', 112 | label='SVM accuracy (%)') 113 | ax.set_xlim(100, 50000) 114 | ax.set_ylim(25, 100) 115 | ax.set_xscale('log') 116 | ax.grid(True) 117 | ax.set_xlabel('Training set size') 118 | plt.legend(loc="lower right") 119 | plt.show() 120 | 121 | if __name__ == "__main__": 122 | main() 123 | -------------------------------------------------------------------------------- /fig/more_data_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_5.png -------------------------------------------------------------------------------- /fig/more_data_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_comparison.png -------------------------------------------------------------------------------- /fig/more_data_log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_log.png -------------------------------------------------------------------------------- /fig/more_data_rotated_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_rotated_5.png -------------------------------------------------------------------------------- /fig/more_data_svm.json: -------------------------------------------------------------------------------- 1 | [25.07, 48.93, 75.13, 83.87, 88.49, 91.46, 92.45, 93.47, 94.48] -------------------------------------------------------------------------------- /fig/multiple_eta.json: -------------------------------------------------------------------------------- 1 | [[[], [], [0.87809508908377998, 0.67406552530098141, 0.59798920430275404, 0.55533015743656189, 0.51751101003208144, 0.4942033354556824, 0.47255041042913526, 0.46069879353359433, 0.44304475294352064, 0.43099562372228112, 0.42310993427766375, 0.41408265298981006, 0.40573464183982105, 0.40110722961828227, 0.39162028064538967, 0.38705015774740958, 0.38116357043417587, 0.37603986695304614, 0.37297012040237154, 0.37057334627661631, 0.36551756338853658, 0.36335674264586654, 0.35745296185579917, 0.35535960956849127, 0.35365591135061097, 0.35011353300568238, 0.34946519495897871, 0.34604661988238178, 0.34386077098862522, 0.33919980880230349], []], [[], [], [0.49501954654296704, 0.4063145129425576, 0.40482383242804637, 0.37156577828840276, 0.37380111172151681, 0.37152751786000143, 0.35371985224004426, 0.3557161388797867, 0.34323780090168027, 0.3433514311156789, 0.3367645441708797, 0.34532085892085329, 0.33506383267050244, 0.34760988079085842, 0.34921493732996928, 0.33853424834583179, 0.32837282561262077, 0.33175599401109612, 0.33132920379429243, 0.33024353325326034, 0.32736756892399654, 0.3259638557593546, 0.32004264784244907, 0.33424319076405928, 0.33878125802305081, 0.32521839878261177, 0.32679267619514646, 0.32488571435373748, 0.33056367198473002, 0.33879633130932685], []], [[], [], [0.92489293305102116, 0.83919130289246469, 0.88748421594232696, 0.79625231780396133, 0.78117959228699174, 1.1365919079387048, 0.78787239608336346, 0.76778614131217449, 0.73689525303227721, 0.80127437393519696, 0.74433665287336681, 0.73725544607013882, 0.80249602203179993, 0.85190338199210014, 0.79872168623645712, 0.80243104440756152, 0.80649160680410659, 0.81467254023600921, 0.82526467696100858, 0.75042379852601759, 0.93658673378777402, 0.88236662906752283, 0.86121396033520892, 0.72492681699401829, 0.80405009868466648, 0.83959963179208197, 0.83387510808276821, 0.88282498566307899, 0.88583473645177979, 0.86068501713490919], []]] -------------------------------------------------------------------------------- /fig/multiple_eta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/multiple_eta.png -------------------------------------------------------------------------------- /fig/multiple_eta.py: -------------------------------------------------------------------------------- 1 | """multiple_eta 2 | ~~~~~~~~~~~~~~~ 3 | 4 | This program shows how different values for the learning rate affect 5 | training. In particular, we'll plot out how the cost changes using 6 | three different values for eta. 7 | 8 | """ 9 | 10 | # Standard library 11 | import json 12 | import random 13 | import sys 14 | 15 | # My library 16 | sys.path.append('../src/') 17 | import mnist_loader 18 | import network2 19 | 20 | # Third-party libraries 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | 24 | # Constants 25 | LEARNING_RATES = [0.025, 0.25, 2.5] 26 | COLORS = ['#2A6EA6', '#FFCD33', '#FF7033'] 27 | NUM_EPOCHS = 30 28 | 29 | def main(): 30 | run_networks() 31 | make_plot() 32 | 33 | def run_networks(): 34 | """Train networks using three different values for the learning rate, 35 | and store the cost curves in the file ``multiple_eta.json``, where 36 | they can later be used by ``make_plot``. 37 | 38 | """ 39 | # Make results more easily reproducible 40 | random.seed(12345678) 41 | np.random.seed(12345678) 42 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper() 43 | results = [] 44 | for eta in LEARNING_RATES: 45 | print "\nTrain a network using eta = "+str(eta) 46 | net = network2.Network([784, 30, 10]) 47 | results.append( 48 | net.SGD(training_data, NUM_EPOCHS, 10, eta, lmbda=5.0, 49 | evaluation_data=validation_data, 50 | monitor_training_cost=True)) 51 | f = open("multiple_eta.json", "w") 52 | json.dump(results, f) 53 | f.close() 54 | 55 | def make_plot(): 56 | f = open("multiple_eta.json", "r") 57 | results = json.load(f) 58 | f.close() 59 | fig = plt.figure() 60 | ax = fig.add_subplot(111) 61 | for eta, result, color in zip(LEARNING_RATES, results, COLORS): 62 | _, _, training_cost, _ = result 63 | ax.plot(np.arange(NUM_EPOCHS), training_cost, "o-", 64 | label="$\eta$ = "+str(eta), 65 | color=color) 66 | ax.set_xlim([0, NUM_EPOCHS]) 67 | ax.set_xlabel('Epoch') 68 | ax.set_ylabel('Cost') 69 | plt.legend(loc='upper right') 70 | plt.show() 71 | 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /fig/norms_during_training_2_layers.json: -------------------------------------------------------------------------------- 1 | [[0.06574134326503182, 0.3092184942703712], [0.042965950225229275, 0.21697470825384765], [0.03285743853560062, 0.1661475616404329], [0.02683217541545865, 0.13284259725100744], [0.022444963188347787, 0.10869569875668701], [0.01898702292286868, 0.09026907616072426], [0.016176320606231937, 0.0757817449583434], [0.013856769047630028, 0.06415813857855861], [0.011923755327842544, 0.05468929179740756], [0.010300852608835414, 0.04688240462739417], [0.008929980735740226, 0.04038231936325889], [0.007766037293739668, 0.03492602136501374], [0.0067734782810398185, 0.0303144328294002], [0.005923963369769822, 0.02639409406963878], [0.0051946687903396006, 0.02304484718674352], [0.004567050423835018, 0.02017132438924394], [0.004025922146371357, 0.017696929143574278], [0.003558759740350878, 0.015559495043546933], [0.003155168513245846, 0.01370809963803681], [0.0028064709792152993, 0.012100689100100913], [0.0025053833194008273, 0.010702282128687351], [0.0022457579431536864, 0.009483594139974067], [0.0020223756268692833, 0.008419970767157997], [0.0018307752663026288, 0.007490551952524308], [0.0016671128100467073, 0.006677609993253912], [0.0015280437877996404, 0.00596602024352487], [0.001410626139902204, 0.005342833987422243], [0.0013122417372158585, 0.004796930719247236], [0.001230535877050377, 0.0043187326553456445], [0.0011633740094011671, 0.003899968399564661], [0.0011088140690242762, 0.003533475735331852], [0.0010650914693569541, 0.003213035828461012], [0.0010306126820953377, 0.002933232910393291], [0.0010039529391577253, 0.0026893349185429013], [0.0009838541507884253, 0.002477191693569685], [0.0009692204018936975, 0.0022931482255388073], [0.0009591098842916142, 0.0021339711220004043], [0.0009527233875527112, 0.001996786940464415], [0.000949390258845377, 0.001879031281725876], [0.0009485530386537979, 0.0017784075917766545], [0.0009497519150621335, 0.0016928545134905027], [0.0009526098833938276, 0.00162052044418621], [0.0009568191883996328, 0.0015597437895911916], [0.0009621293468216554, 0.0015090373462108967], [0.0009683368349615602, 0.0014670753405878112], [0.0009752763835047726, 0.0014326818995832435], [0.0009828137394213656, 0.0014048200688868667], [0.0009908397165437146, 0.001382580864127233], [0.0009992653475108188, 0.0013651721629557073], [0.0010080179583493688, 0.0013519074848511415], [0.0010170380046287963, 0.0013421948454926998], [0.0010262765293966452, 0.0013355259254374328], [0.0010356931246733264, 0.0013314657823426134], [0.0010452542983669293, 0.001329643288455763], [0.0010549321662308622, 0.0013297424128337033], [0.0010647034036817776, 0.0013314944062056567], [0.0010745484049984948, 0.001334670894282615], [0.0010844506078711124, 0.001339077846101557], [0.0010943959497621623, 0.00134455035772627], [0.0011043724293854176, 0.001350948176527696], [0.0011143697520880728, 0.0013581518848623535], [0.0011243790422903717, 0.0013660596617759066], [0.0011343926096075954, 0.001374584545237105], [0.0011444037580310062, 0.0013836521236696693], [0.0011544066297232028, 0.0013931985929649267], [0.001164396076707179, 0.0014031691228828121], [0.0011743675550923705, 0.0014135164842454385], [0.001184317037560411, 0.001424199895282673], [0.0011942409406884003, 0.0014351840517464725], [0.0012041360643654548, 0.001446438310918101], [0.00121399954109664, 0.0014579360044009737], [0.0012238287934166479, 0.001469653858672124], [0.001233621497976716, 0.0014815715058239837], [0.0012433755551407887, 0.0014936710698406808], [0.0012530890631449795, 0.00150593681619279], [0.0012627602960492853, 0.0015183548545708195], [0.0012723876848512645, 0.0015309128862728684], [0.0012819698012447925, 0.0015435999891708265], [0.0012915053435987739, 0.0015564064343490218], [0.0013009931248051125, 0.0015693235294801865], [0.0013104320617056788, 0.0015823434848093638], [0.0013198211658574318, 0.0015954592982856857], [0.001329159535435146, 0.0016086646569381795], [0.0013384463481043436, 0.0016219538520547598], [0.0013476808547242748, 0.0016353217061091565], [0.0013568623737632026, 0.0016487635097024642], [0.0013659902863269694, 0.0016622749670549338], [0.0013750640317171125, 0.0016758521488088555], [0.0013840831034477462, 0.001689491451092161], [0.0013930470456610636, 0.0017031895599511186], [0.0014019554498903382, 0.0017169434203938478], [0.001410807952126751, 0.001730750209399147], [0.0014196042301527312, 0.0017446073123398938], [0.001428344001109794, 0.0017585123023509564], [0.0014370270192733724, 0.0017724629222394034], [0.0014456530740109097, 0.0017864570685928126], [0.0014542219879027463, 0.0018004927777905224], [0.0014627336150080232, 0.001814568213664344], [0.0014711878392602328, 0.00182868165659123], [0.0014795845729789635, 0.001842831493830357], [0.0014879237554861209, 0.001857016210943517], [0.001496205351816403, 0.0018712343841595458], [0.001504429351512978, 0.0018854846735628997], [0.001512595767500488, 0.0018997658170025018], [0.0015207046350283473, 0.001914076624631347], [0.0015287560106781513, 0.0019284159739990133], [0.0015367499714297151, 0.0019427828056299383], [0.001544686613780801, 0.0019571761190289314], [0.0015525660529162172, 0.00197159496906334], [0.0015603884219223176, 0.0019860384626777115], [0.0015681538710434048, 0.002000505755902608], [0.0015758625669768783, 0.0020149960511241243], [0.0015835146922042135, 0.0020295085945849795], [0.0015911104443552297, 0.002044042674091702], [0.001598650035603237, 0.0020585976169056313], [0.0016061336920889376, 0.0020731727877982367], [0.001613561653371099, 0.002087767587253655], [0.0016209341719021985, 0.0021023814498034505], [0.0016282515125274055, 0.0021170138424803724], [0.001635513952005339, 0.0021316642633795106], [0.0016427217785492264, 0.0021463322403165704], [0.0016498752913871664, 0.002161017329574194], [0.0016569748003402733, 0.0021757191147283752], [0.0016640206254176105, 0.00219043720554777], [0.0016710130964268687, 0.002205171236959631], [0.0016779525525998347, 0.0022199208680767388], [0.001684839342231744, 0.002234685781280274], [0.0016916738223337158, 0.0022494656813542074], [0.0016984563582974415, 0.002264260294667135], [0.0017051873235714623, 0.002279069368397964], [0.0017118670993483138, 0.0022938926698022047], [0.0017184960742619357, 0.002308729985515969], [0.001725074644094738, 0.0023235811208949593], [0.0017316032114938027, 0.0023384458993861265], [0.001738082185695673, 0.0023533241619297417], [0.0017445119822592711, 0.0023682157663899766], [0.0017508930228065083, 0.0023831205870120933], [0.001757225734770155, 0.0023980385139046865], [0.0017635105511485838, 0.002412969452545351], [0.0017697479102670496, 0.0024279133233084706], [0.0017759382555451366, 0.002442870061013807], [0.0017820820352700965, 0.002457839614494661], [0.0017881797023757589, 0.002472821946184576], [0.0017942317142267865, 0.002487817031721519], [0.0018002385324079868, 0.0025028248595686305], [0.0018062006225184927, 0.002517845430650605], [0.001812118453970572, 0.002532878758004949], [0.0018179924997929005, 0.0025479248664473006], [0.0018238232364381085, 0.0025629837922500893], [0.0018296111435944535, 0.0025780555828339136], [0.0018353567040014507, 0.0025931402964709826], [0.0018410604032693685, 0.002608238001999926], [0.0018467227297024314, 0.002623348778551656], [0.001852344174125658, 0.0026384727152854465], [0.0018579252297151955, 0.0026536099111349864], [0.0018634663918321254, 0.0026687604745637963], [0.0018689681578596077, 0.002683924523329614], [0.0018744310270433402, 0.0026991021842573027], [0.0018798555003352415, 0.0027142935930199196], [0.001885242080240342, 0.0027294988939275022], [0.0018905912706668024, 0.002744718239723253], [0.001895903576779071, 0.002759951791386862], [0.0019011795048541078, 0.0027751997179443952], [0.0019064195621406662, 0.002790462196284726], [0.001911624256721632, 0.0028057394109820286], [0.0019167940973793586, 0.0028210315541241206], [0.0019219295934640498, 0.0028363388251463613], [0.0019270312547651242, 0.0028516614306708394], [0.001932099591385598, 0.002866999584350647], [0.0019371351136194554, 0.002882353506718987], [0.0019421383318320318, 0.0028977234250428295], [0.0019471097563433955, 0.002913109573180972], [0.0019520498973147331, 0.0029285121914463023], [0.001956959264637746, 0.002943931526472021], [0.0019618383678270644, 0.0029593678310816003], [0.0019666877159156756, 0.002974821364162418], [0.0019715078173533898, 0.0029902923905428024], [0.001976299179908307, 0.0030057811808722997], [0.0019810623105713573, 0.0030212880115050643], [0.001985797715463823, 0.0030368131643862127], [0.0019905058997479567, 0.0030523569269409264], [0.0019951873675405837, 0.0030679195919662114], [0.0019998426218297647, 0.003083501457525218], [0.0020044721643944826, 0.0030991028268438415], [0.0020090764957273553, 0.003114724008209653], [0.0020136561149603598, 0.0031303653148729107], [0.002018211519793561, 0.003146027064949554], [0.0020227432064268357, 0.0031617095813261313], [0.0020272516694945634, 0.003177413191566421], [0.0020317374020032944, 0.0031931382278198197], [0.0020362008952723117, 0.0032088850267311727], [0.0020406426388771466, 0.0032246539293521105], [0.002045063120595933, 0.0032404452810537725], [0.002049462826358647, 0.0032562594314406923], [0.00205384224019913, 0.0032720967342659444], [0.002058201844209906, 0.003287957547347296], [0.002062542118499728, 0.0033038422324843707], [0.002066863541153808, 0.0033197511553766943], [0.0020711665881966946, 0.0033356846855425656], [0.0020754517335577396, 0.003351643196238663], [0.0020797194490391057, 0.0033676270643802518], [0.002083970204286235, 0.0033836366704620515], [0.002088204466760761, 0.003399672398479515], [0.0020924227017157433, 0.0034157346358505767], [0.0020966253721732215, 0.003431823773337752], [0.0021008129389039577, 0.0034479402049704716], [0.0021049858604093342, 0.0034640843279676843], [0.0021091445929053232, 0.0034802565426605417], [0.0021132895903084527, 0.003496457252415226], [0.0021174213042236528, 0.0035126868635556737], [0.0021215401839339712, 0.003528945785286369], [0.002125646676391995, 0.00354523442961492], [0.0021297412262129456, 0.0035615532112745186], [0.002133824275669347, 0.00357790254764607], [0.0021378962646871474, 0.003594282858680137], [0.0021419576308432365, 0.003610694566818363], [0.0021460088093642558, 0.003627138096914653], [0.002150050233126582, 0.003643613876155697], [0.002154082332657443, 0.0036601223339810694], [0.0021581055361369977, 0.0036766639020027005], [0.0021621202694013285, 0.0036932390139236514], [0.0021661269559462466, 0.0037098481054562895], [0.0021701260169317894, 0.003726491614239563], [0.002174117871187324, 0.0037431699797555932], [0.0021781029352171515, 0.0037598836432453132], [0.0021820816232065265, 0.0037766330476232263], [0.0021860543470279595, 0.0037934186373911826], [0.002190021516247749, 0.0038102408585511584], [0.002193983538132606, 0.0038271001585168936], [0.0021979408176562884, 0.003843996986024524], [0.0022018937575061484, 0.0038609317910419383], [0.0022058427580895048, 0.0038779050246769663], [0.002209788217539725, 0.0038949171390842974], [0.0022137305317219465, 0.003911968587371091], [0.002217670094238327, 0.0039290598235012155], [0.0022216072964327377, 0.003946191302198092], [0.002225542527394841, 0.0039633634788460706], [0.0022294761739634085, 0.003980576809390355], [0.0022334086207288447, 0.0039978317502353386], [0.0022373402500348184, 0.004015128758141401], [0.002241271441978918, 0.004032468290120044], [0.0022452025744122447, 0.004049850803327413], [0.0022491340229379045, 0.004067276754956062], [0.002253066160908286, 0.0040847466021249985], [0.0022569993594210693, 0.004102260801767939], [0.00226093398731391, 0.004119819810519758], [0.0022648704111577263, 0.004137424084601017], [0.0022688089952485196, 0.004155074079700713], [0.002272750101597681, 0.004172770250856938], [0.0022766940899207363, 0.00419051305233571], [0.002280641317624457, 0.004208302937507712], [0.0022845921397922925, 0.004226140358723033], [0.002288546909168105, 0.004244025767183831], [0.0022925059761381293, 0.004261959612814915], [0.0022964696887111365, 0.004279942344132131], [0.0023004383924967553, 0.004297974408108706], [0.0023044124306819376, 0.004316056250039291], [0.0023083921440055144, 0.004334188313401842], [0.0023123778707308316, 0.0043523710397172805], [0.0023163699466164206, 0.0043706048684068475], [0.0023203687048847367, 0.004388890236647192], [0.002324374476188852, 0.0044072275792231734], [0.002328387588577205, 0.004425617328378256], [0.002332408367456302, 0.00444405991366268], [0.002336437135551399, 0.004462555761779136], [0.002340474212865165, 0.004481105296426132], [0.002344519916634313, 0.004499708938138957], [0.0023485745612841953, 0.004518367104128216], [0.0023526384583813687, 0.004537080208115914], [0.002356711916584148, 0.0045558486601691706], [0.0023607952415911305, 0.004574672866531417], [0.0023648887360877228, 0.004593553229451196], [0.0023689926996906716, 0.004612490147008489], [0.002373107428890626, 0.0046314840129385105], [0.0023772332169927127, 0.004650535216453183], [0.002381370354055213, 0.00466964414206], [0.0023855191268262756, 0.00468881116937846], [0.002389679818678771, 0.004708036672954117], [0.0023938527095432355, 0.004727321022070043], [0.00239803807583901, 0.004746664580555933], [0.0024022361904035426, 0.004766067706594688], [0.002406447322419903, 0.00478553075252663], [0.00241067173734256, 0.004805054064651225], [0.0024149096968214294, 0.0048246379830264126], [0.002419161458624243, 0.004844282841265573], [0.002423427276557266, 0.004863988966332031], [0.0024277074003844113, 0.004883756678331269], [0.002432002075744773, 0.004903586290300732], [0.0024363115440686556, 0.004923478107997383], [0.002440636042492077, 0.004943432429682883], [0.0024449758037698543, 0.00496344954590655], [0.0024493310561872718, 0.004983529739286014], [0.0024537020234704065, 0.005003673284285789], [0.0024580889246951163, 0.005023880446993503], [0.002462491974194787, 0.005044151484894066], [0.002466911381466829, 0.0050644866466417445], [0.002471347351078043, 0.005084886171830108], [0.002475800082568811, 0.005105350290759991], [0.002480269770356263, 0.005125879224205454], [0.0024847566036363747, 0.005146473183177764], [0.0024892607662851274, 0.005167132368687558], [0.0024937824367587164, 0.005187856971505075], [0.0024983217879929066, 0.005208647171918635], [0.002502878987301572, 0.005229503139491423], [0.00250745419627445, 0.0052504250328164375], [0.002512047570674216, 0.005271412999270007], [0.0025166592603328656, 0.005292467174763562], [0.002521289409047543, 0.005313587683494025], [0.0025259381544757834, 0.005334774637692681], [0.002530605628030294, 0.005356028137372697], [0.0025352919547732976, 0.005377348270075373], [0.0025399972533105206, 0.005398735110615056], [0.002544721635684845, 0.005420188720823024], [0.0025494652072697325, 0.005441709149290172], [0.002554228066662461, 0.005463296431108753], [0.002559010305577223, 0.005484950587613131], [0.002563812008738162, 0.005506671626119766], [0.0025686332537724317, 0.005528459539666374], [0.0025734741111032786, 0.005550314306750434], [0.0025783346438433175, 0.005572235891067092], [0.0025832149076879425, 0.005594224241246602], [0.002588114950809057, 0.005616279290591347], [0.0025930348137491147, 0.0056384009568125615], [0.0025979745293155743, 0.005660589141766809], [0.002602934122475831, 0.005682843731192474], [0.002607913610252721, 0.005705164594446077], [0.002612913001620622, 0.005727551584238853], [0.0026179322974022668, 0.005750004536373475], [0.0026229714901663545, 0.005772523269481117], [0.002628030564125997, 0.005795107584758912], [0.0026331094950380945, 0.00581775726570805], [0.002638208250103759, 0.0058404720778724864], [0.002643326787869787, 0.005863251768578484], [0.002648465058131364, 0.0058860960666750175], [0.0026536230018359866, 0.00590900468227531], [0.002658800550988757, 0.005931977306499468], [0.0026639976285591067, 0.005955013611218456], [0.0026692141483890316, 0.005978113248799521], [0.0026744500151029385, 0.006001275851853173], [0.0026797051240192004, 0.0060245010329819], [0.0026849793610634813, 0.006047788384530743], [0.0026902726026839423, 0.006071137478339861], [0.0026955847157684207, 0.006094547865499277], [0.0027009155575636696, 0.006118019076105893], [0.0027062649755967313, 0.006141550619023013], [0.0027116328075985773, 0.006165141981642434], [0.0027170188814300816, 0.0061887926296493095], [0.0027224230150104177, 0.006212502006790031], [0.0027278450162479717, 0.0062362695346431015], [0.002733284682973905, 0.0062600946123933425], [0.0027387418028783873, 0.006283976616609525], [0.002744216153449669, 0.006307914901025608], [0.002749707501916029, 0.006331908796325816], [0.0027552156051907084, 0.006355957609933584], [0.002760740209819949, 0.0063800606258047595], [0.0027662810519341746, 0.0064042171042251045], [0.002771837857202434, 0.006428426281612287], [0.0027774103407902243, 0.006452687370322627], [0.0027829982073206906, 0.006476999558462748], [0.002788601150839403, 0.006501362009706332], [0.0027942188547827, 0.006525773863116187], [0.002799850991949728, 0.006550234232971763], [0.0028054972244782395, 0.006574742208602484], [0.0028111572038242425, 0.006599296854226912], [0.002816830570745557, 0.006623897208798031], [0.0028225169552893617, 0.006648542285854994], [0.002828215976783803, 0.00667323107338124], [0.002833927243833722, 0.006697962533669548], [0.0028396503543205768, 0.006722735603193962], [0.002845384895406613, 0.006747549192489009], [0.002851130443543345, 0.006772402186036285], [0.002856886564484383, 0.006797293442158781], [0.0028626528133026993, 0.006822221792922986], [0.002868428734412329, 0.00684718604404922], [0.002874213861594579, 0.006872184974830269], [0.0028800077180287957, 0.006897217338058531], [0.0028858098163276804, 0.006922281859962103], [0.0028916196585772355, 0.006947377240149799], [0.002897436736381345, 0.006972502151565546], [0.0029032605309109905, 0.0069976552404521905], [0.0029090905129581864, 0.007022835126325175], [0.0029149261429945754, 0.007048040401956119], [0.0029207668712347323, 0.0070732696333666465], [0.0029266121377042017, 0.007098521359832755], [0.0029324613723122235, 0.007123794093899752], [0.0029383139949291925, 0.007149086321408257], [0.0029441694154688112, 0.0071743965015313126], [0.0029500270339749505, 0.007199723066822926], [0.002955886240713191, 0.007225064423278256], [0.002961746416267046, 0.007250418950405634], [0.0029676069316388162, 0.007275785001310789], [0.002973467148355095, 0.007301160902793266], [0.00297932641857685, 0.007326544955455544], [0.002985184085214092, 0.0073519354338248254], [0.0029910394820450784, 0.007377330586487867], [0.0029968919338400238, 0.007402728636239055], [0.0030027407564892613, 0.0074281277802418205], [0.0030085852571358606, 0.007453526190203774], [0.003014424734312599, 0.007478922012565629], [0.0030202584780832896, 0.007504313368704147], [0.003026085770188423, 0.007529698355149343], [0.003031905884195024, 0.007555075043816072], [0.0030377180856507547, 0.007580441482250205], [0.0030435216322421445, 0.007605795693889583], [0.0030493157739569453, 0.007631135678339849], [0.0030550997532505586, 0.007656459411665481], [0.0030608728052164543, 0.0076817648466958955], [0.0030666341577606053, 0.007707049913347047], [0.003072383031779785, 0.007732312518958536], [0.0030781186413438083, 0.007757550548646268], [0.0030838401938815666, 0.007782761865670929], [0.003089546890370893, 0.007807944311822333], [0.003095237925532164, 0.007833095707819617], [0.0031009124880256394, 0.007858213853727558], [0.0031065697606524763, 0.007883296529388973], [0.003112208920559412, 0.007908341494873227], [0.003117829139447088, 0.007933346490940978], [0.0031234295837819443, 0.007958309239525193], [0.0031290094150117314, 0.007983227444228353], [0.0031345677897845654, 0.008008098790835989], [0.0031401038601715564, 0.008032920947846427], [0.003145616773892945, 0.008057691567016832], [0.003151105674547794, 0.008082408283925395], [0.0031565697018472124, 0.008107068718549694], [0.0031620079918510805, 0.008131670475861174], [0.0031674196772083587, 0.008156211146435586], [0.0031728038874008703, 0.00818068830707935], [0.0031781597489907083, 0.008205099521471734], [0.0031834863858711568, 0.00822944234082266], [0.003188782919521227, 0.008253714304546031], [0.0031940484692637937, 0.008277912940948463], [0.003199282152527358, 0.008302035767933084], [0.003204483085111488, 0.008326080293718377], [0.0032096503814559156, 0.008350044017571764], [0.00321478315491339, 0.00837392443055768], [0.0032198805180262396, 0.008397719016299926], [0.003224941582806784, 0.008421425251757972], [0.0032299654610214766, 0.008445040608017042], [0.003234951264478985, 0.00846856255109149], [0.003239898105322112, 0.008491988542741325], [0.0032448050963236478, 0.008515316041301367], [0.0032496713511861865, 0.008538542502522846], [0.003254495984845906, 0.008561665380426933], [0.003259278113780387, 0.008584682128169829], [0.0032640168563204284, 0.008607590198918996], [0.0032687113329659496, 0.008630387046740198], [0.003273360666705946, 0.00865307012749472], [0.0032779639833425303, 0.008675636899746382], [0.0032825204118190417, 0.008698084825677976], [0.003287029084552262, 0.008720411372016288], [0.0032914891377686735, 0.0087426140109657], [0.003295899711844795, 0.008764690221149226], [0.0033002599516515174, 0.008786637488556974], [0.003304569006902456, 0.008808453307501136], [0.003308826032506215, 0.008830135181577006], [0.0033130301889225614, 0.008851680624629514], [0.0033171806425224127, 0.008873087161724488], [0.0033212765659515527, 0.00889435233012424], [0.003325317138497986, 0.008915473680266628], [0.0033293015464628613, 0.008936448776747088], [0.0033332289835347776, 0.008957275199302915], [0.003337098651167397, 0.008977950543799138], [0.00334090975896019, 0.0089984724232153], [0.003344661525042164, 0.009018838468632452], [0.0033483531764583814, 0.00903904633021964], [0.0033519839495590852, 0.009059093678219316], [0.0033555530903912312, 0.00907897820393063], [0.003359059855092182, 0.009098697620690337], [0.0033625035102853467, 0.009118249664850217], [0.003365883333477502, 0.009137632096750422], [0.0033691986134575026, 0.009156842701688046], [0.0033724486506961386, 0.009175879290880175], [0.0033756327577467954, 0.009194739702420565], [0.003378750259646627, 0.009213421802229387], [0.0033818004943178886, 0.009231923484995163], [0.003384782812969105, 0.009250242675108208], [0.0033876965804956857, 0.009268377327584836], [0.003390541175879653, 0.009286325428981633], [0.0033933159925880257, 0.00930408499829904], [0.0033960204389695586, 0.009321654087873438], [0.0033986539386493208, 0.009339030784257189], [0.0034012159309207645, 0.009356213209085801], [0.0034037058711348213, 0.009373199519931572], [0.0034061232310855644, 0.009389987911142941], [0.003408467499392047, 0.009406576614668954], [0.003410738181875776, 0.009422963900868135], [0.003412934801933416, 0.009439148079301062], [0.0034150569009042436, 0.00945512749950608], [0.00341710403843184, 0.009470900551757401], [0.00341907579281959, 0.009486465667805097], [0.003420971761379469, 0.009501821321596259], [0.0034227915607736494, 0.009516966029976797], [0.003424534827348476, 0.00953189835337327], [0.0034262012174602332, 0.00954661689645427], [0.0034277904077923576, 0.009561120308770606], [0.0034293020956635053, 0.009575407285374095], [0.0034307359993260692, 0.009589476567414058], [0.003432091858254652, 0.009603326942711399], [0.0034333694334240427, 0.00961695724630957], [0.003434568507576292, 0.009630366361002016]] -------------------------------------------------------------------------------- /fig/overfitting.json: -------------------------------------------------------------------------------- 1 | [[2.0762772323329082, 1.8232334122685845, 1.6640751933146665, 1.4913409287162824, 1.4626645665352562, 1.5608730982986192, 1.3270934349008427, 1.3031689400520545, 1.2737198013316875, 1.2353298430277617, 1.2781249875365142, 1.2587094591590358, 1.2236320447498565, 1.2049258878595992, 1.202838510821453, 1.2175903284804579, 1.2166038163981336, 1.2302002518540471, 1.2284292747614989, 1.2284082512336671, 1.2206853894877705, 1.21982789310683, 1.2416121174277031, 1.2386867792565612, 1.2590040086618466, 1.2442300811597213, 1.256214615756384, 1.2688359032682412, 1.262880085921275, 1.2580241299014177, 1.2715941639378459, 1.2704272355044199, 1.2713173651241083, 1.2883075453227311, 1.309478393757302, 1.2884464353157816, 1.2992864292684581, 1.2995723853510095, 1.3057065520137037, 1.2996067919082652, 1.3113694262185569, 1.3180980499575814, 1.3224531791316712, 1.3288895530170624, 1.333896011747612, 1.3342655386450013, 1.3507230411896862, 1.349523874760193, 1.3486221834113297, 1.3492461107256304, 1.371103940789913, 1.363742107503537, 1.3591970586051429, 1.3628600849625045, 1.3715455620910941, 1.3762306334690999, 1.374571326678441, 1.3797013054519787, 1.3852744476067012, 1.3910542555139365, 1.3898246384066992, 1.3990962884350675, 1.4024643563307768, 1.4090064214871054, 1.4084778553386963, 1.4088023031568424, 1.4191800292184851, 1.4231642276020737, 1.4249505527888344, 1.4243906063296523, 1.4329048311102033, 1.4324957023669891, 1.4393786314154042, 1.4422194928893282, 1.4464979740530604, 1.4456638780161708, 1.4510268869602028, 1.4569921654272227, 1.4501932854980157, 1.4586286646321187, 1.4641932180653843, 1.4627134909477864, 1.4695342388383457, 1.4677910970476582, 1.4741781001557179, 1.4716445971833882, 1.4801857605543194, 1.4824209608683785, 1.4850739585015795, 1.4862526009107158, 1.4891994206257972, 1.4911449111215642, 1.4895703546607124, 1.4965483049082178, 1.498475574872792, 1.4983442080951213, 1.5006331103383848, 1.5026762910773346, 1.5056581200232744, 1.5052999301275902, 1.5101971277214103, 1.5052100436149518, 1.5122319812343581, 1.5140032252405993, 1.5124829079283069, 1.5155523493173086, 1.5173126087446009, 1.5182757080707934, 1.5226359219963441, 1.5219631288706079, 1.5280382373274002, 1.5305864446018889, 1.5308400057540297, 1.5298912015444006, 1.5299337876516403, 1.5359447484558009, 1.5373272763323333, 1.5357721830268081, 1.5411296193695012, 1.5408039410712637, 1.5435982660843079, 1.5448396233143082, 1.54640053335383, 1.5497924569986792, 1.5505113782392206, 1.5510873904758762, 1.5522372758736236, 1.5551534205865707, 1.5579037629164372, 1.5562529728398988, 1.557078203694122, 1.5605381965411875, 1.5615119183693009, 1.5620335052988363, 1.5636399289779603, 1.5666556667329066, 1.565967137736715, 1.5692899251189862, 1.569455366800145, 1.570201924893627, 1.5724338032777558, 1.5743182022210231, 1.5779052215415215, 1.5765908208317501, 1.5777906843645095, 1.5807065832710021, 1.5826042798570108, 1.5834282733757874, 1.5828727942783614, 1.5856236197268949, 1.5865253156344346, 1.5870711209440933, 1.5901102159916298, 1.589831374782438, 1.5903365893797863, 1.5922289915737446, 1.5937242093943276, 1.5957483404630928, 1.5961125976163055, 1.5970150572826043, 1.5978109963232512, 1.5992551729862305, 1.6002494658191888, 1.6011359241282295, 1.6026033918901981, 1.6036086580112265, 1.6048749030222984, 1.6074694961111247, 1.606561408369042, 1.6080270858679979, 1.6070173269387908, 1.6073846856343872, 1.6108499262729024, 1.6142347957554661, 1.6123239687358804, 1.6161556699815407, 1.6165460098238449, 1.6175561633900144, 1.6189398493899978, 1.6196154264250775, 1.6203787165288632, 1.621001492936847, 1.6202434861259687, 1.6245124200435654, 1.6239635265755512, 1.6241046070201524, 1.6252458899999425, 1.6270324298456853, 1.6251838072169713, 1.6275992851310326, 1.6284252702633855, 1.63017431200309, 1.6310654169645247, 1.6317526370318307, 1.6338220017737439, 1.6337496047773168, 1.6341251344927106, 1.6352945051866614, 1.6350194166439092, 1.6370499538617151, 1.6398726421890257, 1.6392502241532569, 1.6407454538992294, 1.6403690089576106, 1.6407818413256172, 1.6422394672335026, 1.6440166122111106, 1.6445019840290256, 1.6454238395620735, 1.6446012999992357, 1.646388125891822, 1.647477767725033, 1.6489903681956257, 1.6495670111667955, 1.6518317266949349, 1.6521411302385651, 1.6516324900159436, 1.6519268509016449, 1.6536769664130893, 1.6557231011758236, 1.6556135178242031, 1.6563575299485291, 1.6585034371713305, 1.6590068125468771, 1.6594819047820268, 1.6602894618927027, 1.6610971710252704, 1.6614742412028516, 1.662062758010538, 1.6618888908042855, 1.664003604166757, 1.6650773987115881, 1.6655027680401031, 1.6669239090330996, 1.6670229476327978, 1.6673755478034697, 1.668631108043269, 1.6691187845751598, 1.6698574553969809, 1.6699987129628646, 1.6718587417171702, 1.6728186780767957, 1.6735770871095164, 1.6751317162887345, 1.6754619738035605, 1.6757497694666139, 1.6767453551142881, 1.6773935555529487, 1.6790329446692798, 1.6796379042981611, 1.6798597028202431, 1.6814275113531045, 1.681540584668908, 1.6825376023031897, 1.6838370704483998, 1.6839490568545408, 1.6849619150773361, 1.6856698652111073, 1.6863000466757747, 1.6871126637371965, 1.6880316020404877, 1.6879506729766618, 1.6894802944100824, 1.6905853787788423, 1.6917216848211414, 1.6926191212187904, 1.6929352076880684, 1.6942338256895795, 1.6948350819305742, 1.6947248010331575, 1.6956565470999065, 1.6966226855434137, 1.6977135512214465, 1.6976628333622414, 1.6992728088551838, 1.6995188736719555, 1.7004758991163513, 1.7003390074918037, 1.701757593590616, 1.7030610036769165, 1.7032572445905845, 1.70424379410192, 1.7050439989771855, 1.7050460437739656, 1.705340680240933, 1.7063172030736129, 1.7074694863569662, 1.7081060241444701, 1.7083152915972599, 1.7100598915164169, 1.7098580352207235, 1.7110452055463516, 1.7118315825579393, 1.7119458704259569, 1.7128193121191575, 1.714088357669219, 1.7143137710846792, 1.7151310218423155, 1.7158160290566882, 1.7164824543799349, 1.7170992986317428, 1.7176374854282062, 1.7180675777622618, 1.7183529546663991, 1.7183913489385712, 1.7200745183529782, 1.7201860622786533, 1.7209378765189278, 1.7211755130028632, 1.7227001954459273, 1.7233033322709161, 1.724388593301251, 1.7249286817001739, 1.7257212014285681, 1.7264759882752161, 1.7263145509431113, 1.727169750737453, 1.7274545949025009, 1.728138187570482, 1.7284329943827041, 1.7291647307556921, 1.7297691124388797, 1.7303999198392592, 1.7309397188198092, 1.731884218614588, 1.732752372009305, 1.7337895563134313, 1.7338046236202502, 1.7343991319697829, 1.7354045120011685, 1.7359328966782865, 1.7363186919712537, 1.7368728425169133, 1.7376556264901872, 1.738218355695242, 1.7389320558428096, 1.7392586695357521, 1.7395726489260961, 1.7403457853492119, 1.7411144686251934, 1.7418114756639416, 1.7423787115928511, 1.7429156859372819, 1.7433516620794796, 1.7445995405595869, 1.7449650517928348, 1.7454219936222521, 1.7454049499805062, 1.7461754045631253, 1.747238770079671, 1.7480364894800848, 1.7481891743633657, 1.7484612615979531, 1.7492265370334927, 1.7499314246477431, 1.7503425435026281, 1.7509597451899421, 1.7513546402678131, 1.7521155770124217, 1.7527284609234106, 1.7529769148484364, 1.7538672981186787, 1.7544452588346211, 1.7549061780496615, 1.7553447817113197, 1.755809458463981, 1.7558520671233728, 1.7568602710475358, 1.7568136319142174, 1.7575653625849685, 1.758205144851257, 1.7587151972026469, 1.7591737337097375, 1.7594811212041248, 1.7599224471680641, 1.7604958546917258, 1.7614357150479159, 1.7620773477904375, 1.7629340224321914, 1.7634360517269456, 1.7634779671556928, 1.7642836857118194, 1.7646825015144432, 1.7652084365396346, 1.765550476840142, 1.766323427364384, 1.7671269295963092, 1.7674831990461801, 1.7679902398030436, 1.7688556765701444, 1.7693755350034828, 1.7691087563919485, 1.7699593793502248, 1.7702219339149627, 1.7709157134395872, 1.7709574156060244, 1.7720375325001132, 1.7722910641140253, 1.7728105919575348, 1.7731493757222807], [5887, 6505, 6970, 7271, 7433, 7198, 7710, 7747, 7850, 7899, 7853, 7848, 7986, 8020, 8046, 8039, 8056, 8090, 8090, 8107, 8086, 8126, 8104, 8107, 8116, 8121, 8128, 8121, 8135, 8126, 8137, 8149, 8146, 8124, 8118, 8146, 8134, 8156, 8148, 8165, 8170, 8146, 8157, 8157, 8156, 8161, 8134, 8156, 8166, 8145, 8140, 8154, 8156, 8147, 8144, 8147, 8144, 8156, 8154, 8157, 8148, 8137, 8144, 8145, 8148, 8149, 8147, 8152, 8154, 8152, 8136, 8151, 8145, 8152, 8150, 8155, 8152, 8147, 8159, 8148, 8164, 8160, 8153, 8149, 8153, 8158, 8153, 8160, 8154, 8165, 8158, 8155, 8159, 8164, 8170, 8176, 8174, 8176, 8165, 8173, 8163, 8189, 8181, 8175, 8180, 8185, 8177, 8179, 8184, 8178, 8185, 8177, 8182, 8194, 8189, 8174, 8184, 8188, 8180, 8192, 8180, 8181, 8197, 8183, 8184, 8194, 8186, 8188, 8189, 8195, 8192, 8184, 8194, 8197, 8197, 8185, 8196, 8192, 8200, 8200, 8199, 8197, 8191, 8196, 8193, 8193, 8192, 8194, 8201, 8195, 8196, 8195, 8196, 8197, 8190, 8196, 8197, 8197, 8193, 8190, 8195, 8196, 8199, 8195, 8191, 8192, 8187, 8189, 8192, 8193, 8189, 8192, 8194, 8187, 8189, 8193, 8197, 8193, 8194, 8195, 8197, 8191, 8198, 8196, 8196, 8196, 8194, 8192, 8196, 8192, 8193, 8192, 8197, 8195, 8191, 8192, 8191, 8194, 8191, 8190, 8192, 8191, 8195, 8190, 8197, 8193, 8194, 8191, 8196, 8192, 8192, 8198, 8199, 8199, 8200, 8199, 8195, 8196, 8199, 8191, 8194, 8199, 8205, 8195, 8204, 8206, 8207, 8208, 8204, 8203, 8203, 8207, 8207, 8200, 8206, 8206, 8203, 8203, 8206, 8210, 8208, 8207, 8208, 8208, 8209, 8209, 8208, 8211, 8210, 8209, 8208, 8213, 8205, 8207, 8211, 8210, 8213, 8212, 8213, 8211, 8216, 8213, 8214, 8214, 8216, 8214, 8218, 8218, 8218, 8217, 8220, 8217, 8221, 8217, 8217, 8218, 8217, 8217, 8218, 8223, 8221, 8223, 8222, 8226, 8218, 8221, 8217, 8219, 8218, 8221, 8219, 8218, 8221, 8220, 8220, 8214, 8220, 8218, 8218, 8220, 8221, 8220, 8222, 8220, 8223, 8220, 8221, 8223, 8219, 8217, 8221, 8217, 8218, 8220, 8221, 8220, 8219, 8221, 8219, 8219, 8223, 8223, 8222, 8221, 8221, 8221, 8219, 8220, 8220, 8221, 8222, 8222, 8221, 8222, 8222, 8223, 8222, 8222, 8222, 8220, 8221, 8219, 8220, 8219, 8219, 8219, 8220, 8222, 8220, 8218, 8218, 8220, 8222, 8221, 8220, 8221, 8219, 8222, 8220, 8218, 8221, 8220, 8221, 8223, 8224, 8223, 8225, 8224, 8223, 8223, 8224, 8226, 8223, 8226, 8227, 8223, 8222, 8222, 8223, 8222, 8222, 8223, 8221, 8222, 8221, 8222, 8223, 8222, 8223, 8221, 8221, 8222, 8221, 8220, 8223, 8223, 8221, 8220, 8220, 8223], [1.8433647860328504, 1.4777434227600235, 1.2072861295975754, 1.0133122929166287, 0.90463155805724549, 0.88914085245876628, 0.69510736399672024, 0.60063282069902524, 0.54091603110413877, 0.47790622727830795, 0.45205834534806816, 0.41823557259928568, 0.36353886658117263, 0.34097725702984655, 0.30704391068438625, 0.28505238206157008, 0.27569888987164376, 0.24768957063883623, 0.23176895744869463, 0.21675013886039948, 0.20851633183590543, 0.19581288303288452, 0.1883450862028723, 0.17598212888313519, 0.17148361118265443, 0.1579893219777749, 0.15232727768047913, 0.14908652369052086, 0.13959561541748497, 0.13575056473237712, 0.13038402753584682, 0.12633393160050335, 0.12301377811155474, 0.11760787528185435, 0.11768063532050554, 0.1116090975652263, 0.10594482960527374, 0.10146350864471576, 0.098950522532661414, 0.097470601335692755, 0.092587258119849816, 0.08890875709297294, 0.087334066191656873, 0.083968079007007201, 0.080425567454585997, 0.077388473929440482, 0.075732073394689639, 0.073264736306067349, 0.071041388148701201, 0.069457129692052144, 0.068501170653270813, 0.066032566370952353, 0.063988473365526183, 0.062795927261155279, 0.060741640047227284, 0.059231989932508271, 0.05768849915339639, 0.05665547621362698, 0.054958317927951077, 0.053790448746231824, 0.052736286230260894, 0.051837510403181575, 0.050246365862518169, 0.049157161612436856, 0.048399496486416158, 0.047223244213956135, 0.0462029122464475, 0.045362346946900868, 0.044991942955692463, 0.043548301006926415, 0.042619315582575576, 0.041872679888486676, 0.041353003773040818, 0.040343023072558423, 0.039610640543463034, 0.038827894599382441, 0.038165321713376478, 0.037555764670128743, 0.036994307367393418, 0.036209725814626531, 0.035681364944148621, 0.035011174581168515, 0.034291592495846304, 0.033547709156373326, 0.032718263569078379, 0.032023904582162074, 0.031191659853705877, 0.030626708962014425, 0.029958425643681459, 0.029353265126344338, 0.028797964387740602, 0.028422012718391305, 0.027812191605286883, 0.02735501112071697, 0.026993564391853634, 0.026569113029157501, 0.026092064055110505, 0.025674781762346367, 0.025305894007476161, 0.024889083317963079, 0.024558131841685489, 0.024199329722505512, 0.023811240945016011, 0.02349334595548817, 0.023144448162318861, 0.022855591970993343, 0.022543488743196019, 0.022249708906225588, 0.021965529914739868, 0.021698026254864673, 0.021378123124622207, 0.021088144829246657, 0.020886774671724655, 0.020580654801762799, 0.020340928401971565, 0.020106832060966386, 0.019866100581400464, 0.019595427663293804, 0.019357863817740469, 0.019161344170354123, 0.018904348055376753, 0.018717446712462619, 0.018465695920465829, 0.018272597088603904, 0.018074038569034587, 0.017902496111286439, 0.017688582191512987, 0.017482922233189643, 0.017310611695394759, 0.017115500371685799, 0.016952389472265048, 0.016780096680463093, 0.016597920851012388, 0.016464051880113292, 0.016282898613349604, 0.016136333558525773, 0.015962599748449359, 0.015804226601263859, 0.01564233479142425, 0.015498832345390763, 0.015356876162963705, 0.01521854563645951, 0.015073417138070878, 0.014934450228727312, 0.014825699216383425, 0.014665671908121175, 0.014541603837007016, 0.014406468550218309, 0.014283810530787627, 0.014161122679261126, 0.014032574364961678, 0.013910840101390157, 0.013798885792050663, 0.013687896782864042, 0.013560391660895389, 0.013438577986090204, 0.013334228955099942, 0.013211877601969076, 0.013097885924483273, 0.012989409068889089, 0.012879749073942919, 0.012764357708175841, 0.012655707745460402, 0.012552080081974131, 0.012437201020647024, 0.012337374519593609, 0.012235799102028641, 0.012145033948497589, 0.012041173192129051, 0.011950396057231671, 0.011858981944298361, 0.011775080348835348, 0.011680489496468773, 0.011599470483943787, 0.011508039313397542, 0.011428340295649663, 0.011349428130039004, 0.011268838312831554, 0.011189912165110935, 0.011109723837424776, 0.011034560780687212, 0.010957160107627907, 0.010882577878407575, 0.010811098528082833, 0.010735442936960134, 0.010665821648065388, 0.010595437817822002, 0.010527971065975287, 0.010460804790718484, 0.010390628071234642, 0.010324849341451231, 0.010260426245422116, 0.01019672721940704, 0.010130572405193659, 0.010072420053693198, 0.010006628636997746, 0.0099438229901934062, 0.0098844582257840465, 0.0098246362024759087, 0.0097663800115457729, 0.0097127946154311525, 0.0096488779061362981, 0.0095934368030428964, 0.0095357715395345787, 0.0094828105181004665, 0.0094248109827411788, 0.0093706712239598373, 0.0093193323548417979, 0.0092640297508175557, 0.0092140979255875208, 0.0091599683295718951, 0.0091085532532171488, 0.0090566144373361267, 0.0090078660753137273, 0.0089574895292539591, 0.0089102414421480258, 0.0088602907747430508, 0.0088130951045928482, 0.0087627473899933809, 0.0087169057599027176, 0.008669602206522074, 0.0086245638096965112, 0.008578829590782322, 0.0085337682055181534, 0.0084904627352943417, 0.0084437487803313786, 0.0084005496486743488, 0.0083564344772356029, 0.0083141617581171147, 0.0082711428471498073, 0.0082280554148385712, 0.0081861058590978232, 0.0081453445960328139, 0.0081050739787153214, 0.0080629964697627073, 0.0080227532574564074, 0.0079824692727665826, 0.0079430728092899933, 0.0079037010818164485, 0.007866500840720405, 0.0078262183123015989, 0.0077880045569414854, 0.0077501375585374184, 0.0077130927824958879, 0.0076749447769945724, 0.0076383206771078876, 0.0076037234598087793, 0.0075656978732744681, 0.0075306312245213185, 0.0074933959545258758, 0.007457863479961753, 0.0074224044906936416, 0.0073875427008978636, 0.0073533040575986478, 0.0073188636048639828, 0.0072851975374219257, 0.007249551514341885, 0.0072155976990766037, 0.0071837716588880659, 0.0071490490354368446, 0.0071149465640242852, 0.0070813394799004378, 0.007047562934784808, 0.0070148336131211995, 0.0069818923170584749, 0.0069501733149712605, 0.0069160116872818119, 0.0068840401735022448, 0.0068514561156714704, 0.0068196243684198622, 0.0067863217789309715, 0.0067534474287550073, 0.0067200631321686754, 0.0066885181859545314, 0.0066568666894365278, 0.0066240210906374454, 0.0065926113197570003, 0.0065626818950437473, 0.0065315628190666742, 0.0065011591385262619, 0.0064707220819024717, 0.0064414830189593493, 0.006412153696041689, 0.0063833172680227371, 0.0063550710682834894, 0.0063263283024680974, 0.006298472160087395, 0.0062705864326000225, 0.0062432859687904735, 0.0062170035873869919, 0.0061896468954568112, 0.0061633789845077242, 0.0061370029472117751, 0.0061113940805202389, 0.006084934659958091, 0.0060594838733285623, 0.0060344559813401231, 0.0060094203492223955, 0.0059846013866920586, 0.0059595844553596081, 0.0059354063125516329, 0.0059106299326143925, 0.0058865500049247376, 0.0058629576783263884, 0.0058398259656488255, 0.0058157681197817893, 0.0057925894220656601, 0.005769719671300088, 0.0057465133779847615, 0.005723885452264303, 0.0057011088426493122, 0.0056788750789765873, 0.0056565840835991754, 0.0056345091063197805, 0.0056126545497789098, 0.0055907478836642096, 0.0055695879730558121, 0.0055480499521874576, 0.0055269393518896613, 0.0055055760390316528, 0.0054846407395392127, 0.0054639492319568522, 0.0054430681469272582, 0.0054228564773156733, 0.0054023245806928445, 0.0053821589279841198, 0.0053622701718764919, 0.0053422962061517205, 0.0053225545608313832, 0.0053025727602387341, 0.0052830117459178542, 0.0052637900268743395, 0.0052445911331395157, 0.005225301628550106, 0.0052063734092377542, 0.0051873981768633098, 0.0051686550874710333, 0.0051501308303700411, 0.0051317159223978588, 0.0051132407038391343, 0.0050949646868208237, 0.0050768163679191418, 0.0050588920830417876, 0.0050410603343375689, 0.0050234460109072039, 0.0050058479424628429, 0.0049883551396970116, 0.0049709905024850117, 0.0049534380633703065, 0.0049362622084869086, 0.0049191825923840336, 0.0049019339002642093, 0.0048852638935989387, 0.0048685355903025033, 0.0048516783369022421, 0.004835176074289591, 0.0048187414702752064, 0.0048024676248850815, 0.004786205475115702, 0.0047700818966571806, 0.0047540229511325634, 0.0047381312460395321, 0.0047222819541272538, 0.004706582163886538, 0.0046908199271972883, 0.004675294500871013, 0.0046600271163220889, 0.004644560115930613, 0.0046292967748754361, 0.0046142583741101018, 0.0045990679936898007, 0.0045841369248796251, 0.0045692503625969364, 0.0045545855753471856, 0.0045398450796813317, 0.0045253266069212915, 0.0045109305522560966, 0.0044964925773122444, 0.0044821543135014023, 0.0044678460394820168, 0.0044536907146381774, 0.0044396992221706283, 0.0044257701614943187, 0.0044119841879101604, 0.0043980561122206668, 0.0043843449981956339, 0.0043706979320343045, 0.004357190360999688, 0.0043441270550987543, 0.0043307263717703463, 0.0043174162674548587, 0.004304214971579858, 0.0042909772791592301, 0.004277917818300953, 0.0042651151252712065, 0.0042522204909684596, 0.0042395626122975951, 0.0042267964052173168, 0.0042141543979121142, 0.0042016444290295845], [690, 749, 812, 848, 872, 865, 914, 932, 948, 949, 957, 966, 970, 973, 979, 977, 981, 982, 982, 985, 984, 988, 989, 990, 988, 990, 991, 991, 992, 992, 993, 993, 992, 992, 993, 994, 994, 995, 996, 995, 995, 996, 996, 996, 997, 997, 998, 998, 998, 998, 998, 998, 997, 997, 998, 998, 998, 998, 998, 998, 998, 998, 998, 998, 998, 999, 999, 1000, 1000, 1000, 1000, 1000, 999, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]] -------------------------------------------------------------------------------- /fig/overfitting.py: -------------------------------------------------------------------------------- 1 | """ 2 | overfitting 3 | ~~~~~~~~~~~ 4 | 5 | Plot graphs to illustrate the problem of overfitting. 6 | """ 7 | 8 | # Standard library 9 | import json 10 | import random 11 | import sys 12 | 13 | # My library 14 | sys.path.append('../src/') 15 | import mnist_loader 16 | import network2 17 | 18 | # Third-party libraries 19 | import matplotlib.pyplot as plt 20 | import numpy as np 21 | 22 | 23 | def main(filename, num_epochs, 24 | training_cost_xmin=200, 25 | test_accuracy_xmin=200, 26 | test_cost_xmin=0, 27 | training_accuracy_xmin=0, 28 | training_set_size=1000, 29 | lmbda=0.0): 30 | """``filename`` is the name of the file where the results will be 31 | stored. ``num_epochs`` is the number of epochs to train for. 32 | ``training_set_size`` is the number of images to train on. 33 | ``lmbda`` is the regularization parameter. The other parameters 34 | set the epochs at which to start plotting on the x axis. 35 | """ 36 | run_network(filename, num_epochs, training_set_size, lmbda) 37 | make_plots(filename, num_epochs, 38 | test_accuracy_xmin, 39 | training_cost_xmin, 40 | test_accuracy_xmin, 41 | training_accuracy_xmin, 42 | training_set_size) 43 | 44 | def run_network(filename, num_epochs, training_set_size=1000, lmbda=0.0): 45 | """Train the network for ``num_epochs`` on ``training_set_size`` 46 | images, and store the results in ``filename``. Those results can 47 | later be used by ``make_plots``. Note that the results are stored 48 | to disk in large part because it's convenient not to have to 49 | ``run_network`` each time we want to make a plot (it's slow). 50 | 51 | """ 52 | # Make results more easily reproducible 53 | random.seed(12345678) 54 | np.random.seed(12345678) 55 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper() 56 | net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost()) 57 | net.large_weight_initializer() 58 | test_cost, test_accuracy, training_cost, training_accuracy \ 59 | = net.SGD(training_data[:training_set_size], num_epochs, 10, 0.5, 60 | evaluation_data=test_data, lmbda = lmbda, 61 | monitor_evaluation_cost=True, 62 | monitor_evaluation_accuracy=True, 63 | monitor_training_cost=True, 64 | monitor_training_accuracy=True) 65 | f = open(filename, "w") 66 | json.dump([test_cost, test_accuracy, training_cost, training_accuracy], f) 67 | f.close() 68 | 69 | def make_plots(filename, num_epochs, 70 | training_cost_xmin=200, 71 | test_accuracy_xmin=200, 72 | test_cost_xmin=0, 73 | training_accuracy_xmin=0, 74 | training_set_size=1000): 75 | """Load the results from ``filename``, and generate the corresponding 76 | plots. """ 77 | f = open(filename, "r") 78 | test_cost, test_accuracy, training_cost, training_accuracy \ 79 | = json.load(f) 80 | f.close() 81 | plot_training_cost(training_cost, num_epochs, training_cost_xmin) 82 | plot_test_accuracy(test_accuracy, num_epochs, test_accuracy_xmin) 83 | plot_test_cost(test_cost, num_epochs, test_cost_xmin) 84 | plot_training_accuracy(training_accuracy, num_epochs, 85 | training_accuracy_xmin, training_set_size) 86 | plot_overlay(test_accuracy, training_accuracy, num_epochs, 87 | min(test_accuracy_xmin, training_accuracy_xmin), 88 | training_set_size) 89 | 90 | def plot_training_cost(training_cost, num_epochs, training_cost_xmin): 91 | fig = plt.figure() 92 | ax = fig.add_subplot(111) 93 | ax.plot(np.arange(training_cost_xmin, num_epochs), 94 | training_cost[training_cost_xmin:num_epochs], 95 | color='#2A6EA6') 96 | ax.set_xlim([training_cost_xmin, num_epochs]) 97 | ax.grid(True) 98 | ax.set_xlabel('Epoch') 99 | ax.set_title('Cost on the training data') 100 | plt.show() 101 | 102 | def plot_test_accuracy(test_accuracy, num_epochs, test_accuracy_xmin): 103 | fig = plt.figure() 104 | ax = fig.add_subplot(111) 105 | ax.plot(np.arange(test_accuracy_xmin, num_epochs), 106 | [accuracy/100.0 107 | for accuracy in test_accuracy[test_accuracy_xmin:num_epochs]], 108 | color='#2A6EA6') 109 | ax.set_xlim([test_accuracy_xmin, num_epochs]) 110 | ax.grid(True) 111 | ax.set_xlabel('Epoch') 112 | ax.set_title('Accuracy (%) on the test data') 113 | plt.show() 114 | 115 | def plot_test_cost(test_cost, num_epochs, test_cost_xmin): 116 | fig = plt.figure() 117 | ax = fig.add_subplot(111) 118 | ax.plot(np.arange(test_cost_xmin, num_epochs), 119 | test_cost[test_cost_xmin:num_epochs], 120 | color='#2A6EA6') 121 | ax.set_xlim([test_cost_xmin, num_epochs]) 122 | ax.grid(True) 123 | ax.set_xlabel('Epoch') 124 | ax.set_title('Cost on the test data') 125 | plt.show() 126 | 127 | def plot_training_accuracy(training_accuracy, num_epochs, 128 | training_accuracy_xmin, training_set_size): 129 | fig = plt.figure() 130 | ax = fig.add_subplot(111) 131 | ax.plot(np.arange(training_accuracy_xmin, num_epochs), 132 | [accuracy*100.0/training_set_size 133 | for accuracy in training_accuracy[training_accuracy_xmin:num_epochs]], 134 | color='#2A6EA6') 135 | ax.set_xlim([training_accuracy_xmin, num_epochs]) 136 | ax.grid(True) 137 | ax.set_xlabel('Epoch') 138 | ax.set_title('Accuracy (%) on the training data') 139 | plt.show() 140 | 141 | def plot_overlay(test_accuracy, training_accuracy, num_epochs, xmin, 142 | training_set_size): 143 | fig = plt.figure() 144 | ax = fig.add_subplot(111) 145 | ax.plot(np.arange(xmin, num_epochs), 146 | [accuracy/100.0 for accuracy in test_accuracy], 147 | color='#2A6EA6', 148 | label="Accuracy on the test data") 149 | ax.plot(np.arange(xmin, num_epochs), 150 | [accuracy*100.0/training_set_size 151 | for accuracy in training_accuracy], 152 | color='#FFA933', 153 | label="Accuracy on the training data") 154 | ax.grid(True) 155 | ax.set_xlim([xmin, num_epochs]) 156 | ax.set_xlabel('Epoch') 157 | ax.set_ylim([90, 100]) 158 | plt.legend(loc="lower right") 159 | plt.show() 160 | 161 | if __name__ == "__main__": 162 | filename = raw_input("Enter a file name: ") 163 | num_epochs = int(raw_input( 164 | "Enter the number of epochs to run for: ")) 165 | training_cost_xmin = int(raw_input( 166 | "training_cost_xmin (suggest 200): ")) 167 | test_accuracy_xmin = int(raw_input( 168 | "test_accuracy_xmin (suggest 200): ")) 169 | test_cost_xmin = int(raw_input( 170 | "test_cost_xmin (suggest 0): ")) 171 | training_accuracy_xmin = int(raw_input( 172 | "training_accuracy_xmin (suggest 0): ")) 173 | training_set_size = int(raw_input( 174 | "Training set size (suggest 1000): ")) 175 | lmbda = float(raw_input( 176 | "Enter the regularization parameter, lambda (suggest: 5.0): ")) 177 | main(filename, num_epochs, training_cost_xmin, 178 | test_accuracy_xmin, test_cost_xmin, training_accuracy_xmin, 179 | training_set_size, lmbda) 180 | -------------------------------------------------------------------------------- /fig/overfitting1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting1.png -------------------------------------------------------------------------------- /fig/overfitting2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting2.png -------------------------------------------------------------------------------- /fig/overfitting3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting3.png -------------------------------------------------------------------------------- /fig/overfitting4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting4.png -------------------------------------------------------------------------------- /fig/overfitting_full.json: -------------------------------------------------------------------------------- 1 | [[0.56135590058630858, 0.47806921271034553, 0.457510836259925, 0.42504920544144992, 0.39449553344420019, 0.39810448800345, 0.37017079712250733, 0.37403997639944547, 0.36290253019659285, 0.4006868170859208, 0.36817548958488616, 0.37299310675826219, 0.36871967242261605, 0.37146610246666006, 0.35704621996697938, 0.35821464151288968, 0.38622103466509744, 0.37010939716781127, 0.36539832104327125, 0.35511546847032671, 0.3828088676932585, 0.36160025922354638, 0.37028708356461698, 0.37605182846277163, 0.36634313696187393, 0.36129044456360238, 0.37531885586439506, 0.36415225595876555, 0.35707895858237054, 0.36631987373588193], [9136, 9275, 9307, 9377, 9450, 9429, 9468, 9488, 9494, 9424, 9483, 9483, 9505, 9499, 9508, 9508, 9445, 9524, 9524, 9524, 9494, 9527, 9518, 9505, 9533, 9529, 9512, 9530, 9532, 9531], [0.55994588582554705, 0.44664870303435988, 0.42455329174078477, 0.38578320429266705, 0.33992291017592285, 0.33162477096795895, 0.3137480626518645, 0.30028971890544093, 0.27353890048167528, 0.30236927117202678, 0.26487026303889277, 0.2661714884193439, 0.24734280015146709, 0.26355551438395558, 0.23088530423416964, 0.22618350577327287, 0.25137541006767478, 0.23085585354651994, 0.21417931191800957, 0.20049587923059808, 0.23713128948069295, 0.20327728799861464, 0.21953883029836488, 0.20264436321820509, 0.19643949703516961, 0.18467980669870671, 0.18788606162530633, 0.18535916502880764, 0.18466759834259142, 0.17218286758911475], [45708, 46605, 46797, 47190, 47543, 47570, 47638, 47838, 48061, 47825, 48160, 48195, 48265, 48156, 48439, 48449, 48267, 48433, 48598, 48697, 48380, 48648, 48500, 48669, 48734, 48796, 48802, 48837, 48810, 48932]] -------------------------------------------------------------------------------- /fig/overfitting_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting_full.png -------------------------------------------------------------------------------- /fig/pca_hard_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/pca_hard_data.png -------------------------------------------------------------------------------- /fig/pca_hard_data_fit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/pca_hard_data_fit.png -------------------------------------------------------------------------------- /fig/pca_limitations.py: -------------------------------------------------------------------------------- 1 | """ 2 | pca_limitations 3 | ~~~~~~~~~~~~~~~ 4 | 5 | Plot graphs to illustrate the limitations of PCA. 6 | """ 7 | 8 | # Third-party libraries 9 | from mpl_toolkits.mplot3d import Axes3D 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | 13 | # Plot just the data 14 | fig = plt.figure() 15 | ax = fig.gca(projection='3d') 16 | z = np.linspace(-2, 2, 20) 17 | theta = np.linspace(-4 * np.pi, 4 * np.pi, 20) 18 | x = np.sin(theta)+0.03*np.random.randn(20) 19 | y = np.cos(theta)+0.03*np.random.randn(20) 20 | ax.plot(x, y, z, 'ro') 21 | plt.show() 22 | 23 | # Plot the data and the helix together 24 | fig = plt.figure() 25 | ax = fig.gca(projection='3d') 26 | z_helix = np.linspace(-2, 2, 100) 27 | theta_helix = np.linspace(-4 * np.pi, 4 * np.pi, 100) 28 | x_helix = np.sin(theta_helix) 29 | y_helix = np.cos(theta_helix) 30 | ax.plot(x, y, z, 'ro') 31 | ax.plot(x_helix, y_helix, z_helix, '') 32 | plt.show() 33 | -------------------------------------------------------------------------------- /fig/regularized.json: -------------------------------------------------------------------------------- 1 | [[2.1903999605374445, 1.935511698609431, 1.7732802465315387, 1.5964599634182588, 1.5652172568011837, 1.6556596783771818, 1.4247190111080004, 1.3955109870673246, 1.361995997263147, 1.3212968996719621, 1.3618994944086014, 1.3366270986282185, 1.2984174484948621, 1.2752432242493146, 1.2700513563686027, 1.2799551345628921, 1.2749818189596829, 1.2871333718872076, 1.2828951499601804, 1.2735961691755695, 1.2647655170041852, 1.2594502355807271, 1.2795772447252642, 1.2727190387701224, 1.2831666300389069, 1.2672449017721463, 1.2776841316470489, 1.2805727395923963, 1.2722125928642627, 1.2638997694524687, 1.269084743039151, 1.2659230794396108, 1.2641539416626584, 1.2752029232558733, 1.298792042906463, 1.2702727329507408, 1.2711688340896683, 1.2666235616415085, 1.2674371283336907, 1.2595334250676924, 1.2648151045633766, 1.2671334304994974, 1.266707046535049, 1.2656886025740639, 1.2691346809011976, 1.2584242684159623, 1.2740111893572812, 1.2663064744488597, 1.2615652820429444, 1.2582241448899749, 1.2739320485437491, 1.2712439351275546, 1.2545617120441739, 1.2508444846333084, 1.2591492899022321, 1.2536296905827162, 1.2468994556717075, 1.2484355031942465, 1.2471530150490704, 1.2505186156687369, 1.2388584688991651, 1.2430898462249378, 1.2398539196469682, 1.242251547050703, 1.2367804592809712, 1.2298331549219981, 1.2395688633999551, 1.2354039648955897, 1.2344606308529216, 1.2266843921216057, 1.2265644119519759, 1.2245581771451541, 1.2248119226544247, 1.2243084822111481, 1.2212309349638009, 1.2161717307338038, 1.2225008376402358, 1.2193471795614443, 1.2066653202233959, 1.2123964143141386, 1.2122530086322325, 1.2045089672810716, 1.2071180830593222, 1.1989333713336954, 1.1986255015016054, 1.1956459321325097, 1.2033704463208892, 1.2021008432740117, 1.1955283612344483, 1.1952024132825856, 1.1911013449298988, 1.1876590163918093, 1.1799304033665541, 1.1872817515808496, 1.1853155824780264, 1.1801518934970823, 1.1775421394848582, 1.1789945000698789, 1.1796464383843646, 1.1712541492322308, 1.1690597759769423, 1.1620993711661565, 1.169520268455509, 1.1706984323357481, 1.16142157165315, 1.1594443722282681, 1.1551307036588288, 1.1560927746464871, 1.1558458569577748, 1.1495662430692957, 1.1484728277323, 1.1500520790631064, 1.1505873482743494, 1.1434314002731534, 1.1399993109386326, 1.1374729793443425, 1.1387912606672026, 1.1355838671499126, 1.1407036755338451, 1.1300401447223136, 1.1322867822750577, 1.1292371015493867, 1.128702809951617, 1.1294136504785579, 1.1283079440929984, 1.1220916685170936, 1.1201001413539298, 1.115187080898234, 1.1209986528548488, 1.1105355050898427, 1.1038970751483002, 1.1107296608773507, 1.1081484849851853, 1.1024815938527315, 1.1049008621060641, 1.1049124053518378, 1.0973347585372921, 1.0989702833881896, 1.0982371727299989, 1.093818667730126, 1.0928775595886391, 1.0896907351586045, 1.0924381381269741, 1.08208790851767, 1.0886157209258644, 1.0871294743556819, 1.0874176569695346, 1.0810207377200443, 1.0741795043940798, 1.0758584646112539, 1.0751401687109641, 1.0742516957490034, 1.0782702961220325, 1.0722235429082911, 1.0676018687662321, 1.0681955051837146, 1.0659656804470576, 1.065476631432785, 1.0568244036884478, 1.0563633108751844, 1.0531856804602515, 1.0538622665809867, 1.0529003762157403, 1.0518237229645757, 1.0532673191813249, 1.0470748079542624, 1.0473355198727492, 1.0478358455529726, 1.0465867871958874, 1.0427114422019927, 1.0327604365967915, 1.0302075306662013, 1.0421326895298604, 1.0461320167330266, 1.0327502589606541, 1.0460621333327711, 1.0316861390185799, 1.0312932803708881, 1.0354453434095872, 1.0288234156512026, 1.0220427243291068, 1.0236529852899188, 1.022506499230259, 1.0327268485891929, 1.0183557109961541, 1.0166409797026996, 1.0201653625824105, 1.0181942185781696, 1.0059281072990294, 1.0151120898053012, 1.0158007330308185, 1.0076227765153161, 1.0139342289012658, 1.0108626749865222, 1.0133465258769703, 1.0093790455178744, 1.0032704939881629, 1.0065849479396327, 0.99239921410409282, 1.0078593577776735, 1.0056533242624528, 0.99382804473952768, 1.0000313327944634, 0.99267979287687835, 0.98996333995056751, 0.98771450417037443, 0.98835879860887599, 0.98707998292775845, 0.99162222526658916, 0.98196648125555774, 0.98830619571563316, 0.98659881401490612, 0.99273488773050456, 0.98124338898029595, 0.98701911078915805, 0.97950085339921733, 0.97754982301824889, 0.97393880231450858, 0.97728343447809918, 0.9822446382489719, 0.97264123514085032, 0.97552252049617039, 0.97787781129407614, 0.97911317538881426, 0.9685897325254843, 0.97031228863494989, 0.97280520294417017, 0.96809562498274027, 0.96329522921298438, 0.96499204553248841, 0.96872124082325861, 0.97279943472003694, 0.96380722647531536, 0.9681111619139029, 0.95255860371790635, 0.95685889512103384, 0.95490880778293086, 0.96165060698066118, 0.95429007422149059, 0.9485254903573368, 0.95964565458504292, 0.9463922864368649, 0.95301119244822674, 0.96000201462323909, 0.9533654233889125, 0.94897620981608699, 0.95449157578848032, 0.95096516926024288, 0.9540996353802198, 0.94724765894116203, 0.94778948958239595, 0.94591749106378031, 0.94877930991320225, 0.94627724201870156, 0.94527196978974903, 0.93990172135237238, 0.93976092189395699, 0.94174955547339068, 0.94168247039593567, 0.94014241402470755, 0.9407513673596033, 0.9404657385931684, 0.93660325056571514, 0.93963125525499047, 0.94507818868542304, 0.93515283399520432, 0.93426655514408952, 0.94036084535630249, 0.94278800241797989, 0.9303464277379202, 0.93946074265767165, 0.94011896731994538, 0.93219548844993971, 0.92901047418070248, 0.9413051979588738, 0.9262554563794031, 0.93609567274753203, 0.93033762483130578, 0.92717362481531107, 0.92953512970255081, 0.92597921058408705, 0.92944035872481756, 0.92961945644634048, 0.92707208850656797, 0.92662815136112076, 0.92539822828480234, 0.92880970077986702, 0.92243586760987628, 0.9222750133422748, 0.92256361764295314, 0.92112697126740262, 0.92800921486682297, 0.92926106782568352, 0.9153004667869965, 0.91094443103923883, 0.93496572342069606, 0.91974312407475323, 0.91821212221209769, 0.91941566704066269, 0.92743446987850353, 0.91503638999300818, 0.9165744471959929, 0.9173574311901096, 0.91424899909475399, 0.9105910983179335, 0.91705676725576613, 0.9153198466244068, 0.90864738371647913, 0.91433392116041334, 0.91730855979513537, 0.91943446809871454, 0.91559373840877156, 0.9117409480398676, 0.91824229192475937, 0.91601442202251138, 0.90785487652541763, 0.91036044345459455, 0.90611631831996731, 0.90776253175168242, 0.91042057926704512, 0.90874629789034811, 0.9149280544309798, 0.90782476705166226, 0.9033169594105388, 0.91714376925946128, 0.91281316274517155, 0.90717852083845951, 0.90094537088101878, 0.92254053072003039, 0.90983334489593448, 0.90708146627494168, 0.90476333243913687, 0.9103459736117403, 0.90187539543204431, 0.9131362289361491, 0.90586625644843533, 0.90304462354844051, 0.90181187136526952, 0.91019459725955099, 0.90469807709268235, 0.90432952876471406, 0.90262631470543575, 0.90744672937259074, 0.90508781907365621, 0.91257822910795028, 0.90056483900994111, 0.91093355429606471, 0.90310791000879842, 0.90693900875758127, 0.90544454108106043, 0.90810441816334841, 0.89820458428510686, 0.90551853969183604, 0.89597859386528622, 0.90023866788648133, 0.89957093023412082, 0.89800265641361166, 0.89390398408977567, 0.90802340168738838, 0.89924965362547771, 0.89428545643713697, 0.90096102794054678, 0.89998055268679522, 0.90162280275845208, 0.90437904400674751, 0.89893950123068544, 0.89953917921889903, 0.89979142882290919, 0.89255702183771879, 0.90218120233559551, 0.90059584479083477, 0.90604923024967621, 0.89808476276490912, 0.9035111823998403, 0.88812459864856652, 0.88732828138284314, 0.8990176842324713, 0.89889007193703918, 0.90199559435268073, 0.89687789130803197, 0.89551173581908239, 0.90166333865434023, 0.89319951769196892, 0.89123147694826832, 0.8999938689076068, 0.89347788349594881, 0.90133726872596209, 0.88893467356964939, 0.89150929244536248, 0.89309087673528165, 0.8964350026673984, 0.88372758347780378, 0.89708257630045563, 0.8937592373076666, 0.89910169411630581, 0.89041595406974294, 0.90399067374724229, 0.8891242242836932, 0.893623077380578, 0.89104131698590328], [5898, 6512, 6986, 7289, 7455, 7227, 7723, 7767, 7890, 7942, 7879, 7893, 8036, 8052, 8089, 8079, 8132, 8148, 8138, 8163, 8152, 8199, 8160, 8160, 8152, 8176, 8201, 8184, 8203, 8207, 8232, 8219, 8240, 8221, 8177, 8250, 8250, 8240, 8250, 8271, 8269, 8254, 8256, 8268, 8248, 8283, 8275, 8278, 8302, 8278, 8287, 8293, 8302, 8304, 8305, 8317, 8329, 8316, 8316, 8324, 8323, 8332, 8336, 8319, 8336, 8346, 8324, 8344, 8337, 8352, 8353, 8351, 8349, 8360, 8364, 8359, 8363, 8371, 8371, 8360, 8350, 8369, 8355, 8368, 8375, 8393, 8366, 8385, 8374, 8393, 8383, 8381, 8392, 8387, 8379, 8395, 8389, 8382, 8395, 8409, 8412, 8415, 8404, 8389, 8414, 8405, 8417, 8417, 8416, 8415, 8416, 8418, 8414, 8425, 8437, 8445, 8429, 8454, 8442, 8441, 8456, 8452, 8446, 8458, 8452, 8465, 8451, 8470, 8455, 8467, 8476, 8464, 8473, 8500, 8485, 8480, 8479, 8485, 8484, 8491, 8486, 8494, 8481, 8515, 8476, 8495, 8485, 8498, 8518, 8504, 8510, 8508, 8505, 8502, 8514, 8520, 8500, 8519, 8525, 8512, 8532, 8531, 8534, 8533, 8536, 8536, 8530, 8526, 8521, 8530, 8544, 8572, 8536, 8543, 8545, 8542, 8559, 8558, 8548, 8551, 8567, 8557, 8554, 8535, 8566, 8573, 8554, 8554, 8589, 8571, 8554, 8582, 8572, 8571, 8569, 8574, 8573, 8580, 8603, 8558, 8590, 8595, 8599, 8594, 8594, 8599, 8594, 8594, 8588, 8607, 8594, 8606, 8594, 8609, 8606, 8597, 8612, 8618, 8617, 8602, 8624, 8625, 8601, 8605, 8620, 8622, 8613, 8622, 8621, 8612, 8613, 8609, 8614, 8613, 8627, 8626, 8627, 8622, 8636, 8637, 8624, 8632, 8616, 8610, 8632, 8613, 8617, 8626, 8610, 8626, 8620, 8630, 8619, 8629, 8633, 8637, 8631, 8627, 8638, 8634, 8636, 8633, 8633, 8620, 8630, 8637, 8638, 8630, 8626, 8647, 8630, 8625, 8646, 8637, 8623, 8645, 8625, 8631, 8649, 8644, 8645, 8642, 8632, 8644, 8638, 8641, 8636, 8642, 8643, 8646, 8638, 8635, 8637, 8650, 8671, 8632, 8655, 8657, 8651, 8632, 8649, 8655, 8653, 8659, 8654, 8657, 8655, 8652, 8651, 8666, 8648, 8647, 8663, 8644, 8651, 8678, 8672, 8668, 8663, 8654, 8665, 8655, 8668, 8676, 8660, 8662, 8674, 8674, 8648, 8666, 8673, 8679, 8658, 8680, 8666, 8674, 8674, 8675, 8673, 8678, 8676, 8671, 8668, 8673, 8647, 8676, 8656, 8673, 8675, 8669, 8658, 8689, 8675, 8691, 8683, 8691, 8684, 8695, 8683, 8689, 8693, 8691, 8687, 8680, 8674, 8692, 8685, 8688, 8698, 8685, 8688, 8663, 8685, 8691, 8694, 8701, 8678, 8679, 8686, 8694, 8690, 8685, 8689, 8710, 8677, 8699, 8681, 8694, 8703, 8691, 8690, 8703, 8699, 8703, 8686, 8692, 8678, 8707, 8692, 8695], [3.0054668602173189, 2.630540182112914, 2.3510873884747334, 2.1474705267659759, 2.0293529993091846, 2.0032380926060593, 1.8042553867762137, 1.7003373477735675, 1.6321855821328803, 1.5610897547444651, 1.5281925730670125, 1.4856976518536389, 1.4217730925966259, 1.3909369689936708, 1.3493713532353451, 1.3200187301521982, 1.301800278663491, 1.2676513467349682, 1.2429301255926062, 1.2185580400821583, 1.2050358080246586, 1.1832267788099187, 1.1685930523656132, 1.1484535916807292, 1.1359697154554902, 1.114320381224261, 1.1014631295982018, 1.089492003214271, 1.0727926078697159, 1.0613717416584318, 1.0469730951133949, 1.0327418635716941, 1.021579545763369, 1.007466532996552, 1.0019118318591766, 0.98744520456502871, 0.97474616908673617, 0.96296146445044695, 0.95371837313859686, 0.94627032091708685, 0.93498894772954433, 0.92371303592517773, 0.91598769285379622, 0.90572616038790177, 0.89742104654365473, 0.88871852051305211, 0.88017134729428881, 0.87050654917655768, 0.86212122128894098, 0.85338982152113285, 0.84517324140670691, 0.83628724752074646, 0.82688131184478686, 0.81882973565637929, 0.81057777549741772, 0.80277250205231088, 0.79480020668910989, 0.78780832187387972, 0.78001287137204445, 0.77319156351717266, 0.76596973399820345, 0.75937155578658067, 0.75198198829961649, 0.74542036500766451, 0.73896269018433358, 0.73189672823515506, 0.72511198497639617, 0.71858339754179046, 0.71283671972744378, 0.70572537262239332, 0.69919661822074053, 0.6932705965074516, 0.68720037824228397, 0.68083868446687623, 0.67490287294715556, 0.6688525492359253, 0.66319307295604624, 0.65751891741935764, 0.65160400038475963, 0.64606741109074506, 0.64065241850535526, 0.63470824020889005, 0.62942800255997755, 0.6238131388272512, 0.61821534837327441, 0.6131143735593132, 0.60801454092208473, 0.60307257257391134, 0.597472701650819, 0.5922276823975956, 0.58718276628487431, 0.58219953746425901, 0.57721941418480194, 0.57251776210593941, 0.56770248252692224, 0.56293171594753588, 0.5583703980639495, 0.55356242427910862, 0.54911205675728603, 0.54434678517468216, 0.53983964079181412, 0.53560546501131556, 0.53104610602400237, 0.52690257757896453, 0.52244249181950919, 0.51837040316250427, 0.51406707535766105, 0.50997890157700754, 0.50580316759379418, 0.50197927328326941, 0.49765475959114985, 0.49363114894282656, 0.49013948810957153, 0.48574563160725903, 0.481996433456091, 0.47836254150930335, 0.47438049006936467, 0.47063209526452893, 0.46700010263520492, 0.46326294173821808, 0.45941178891664192, 0.45579944000986117, 0.45222495753341951, 0.44874146785608826, 0.44545391416308638, 0.44181467115194983, 0.43828437942217396, 0.43497413417170505, 0.4314933212391851, 0.42806639128733814, 0.4249830118133005, 0.4217067271918078, 0.41822647856750578, 0.41518744647089212, 0.41184644098125406, 0.40883008626966744, 0.40553823067182421, 0.40244746846822144, 0.39928585652511611, 0.39626977647492767, 0.39331263919868908, 0.39019492871393685, 0.38732420260579697, 0.38435150189678424, 0.38177223040643438, 0.37853457408790142, 0.37569449510498465, 0.37295407280298576, 0.37017541614667188, 0.36735907932787892, 0.36474742614606914, 0.36195502625628007, 0.35947915235047068, 0.35675421962663562, 0.35401004282520687, 0.35144133746409423, 0.34889387731183558, 0.34641383158012634, 0.34392904811658986, 0.34130203990500291, 0.33888366962785094, 0.33638822418723674, 0.33396129048481904, 0.33175452278050255, 0.32933814039393, 0.32694391946859563, 0.32460086680314448, 0.32231863682007983, 0.32010480841447037, 0.31790147822028658, 0.31571847553356108, 0.31361078068871434, 0.31122285863559135, 0.30943747241499886, 0.30710922580369193, 0.30523966755164406, 0.30284937095608283, 0.3007194800484288, 0.29894516963898177, 0.29668334988284817, 0.29487017447397706, 0.29263559416080354, 0.29070159725793254, 0.28909427829661305, 0.28678884984900743, 0.28490027458015588, 0.2830863666836243, 0.28117503586861597, 0.27940625519366591, 0.27761490071595862, 0.27571735158201321, 0.27400939202985924, 0.2720994498803232, 0.27041895644243802, 0.26870944341767583, 0.26692503810639295, 0.26514216579715211, 0.26350425688621143, 0.26194775244796215, 0.26031808133537176, 0.25867644728947803, 0.25701548157578263, 0.2553925131758501, 0.2537805110290578, 0.25233205572342077, 0.25059011155916222, 0.24900995023173669, 0.24750970679163162, 0.24603574581720411, 0.24453517928837162, 0.24297118556626207, 0.24163254815954366, 0.24010060406903927, 0.23858447318485032, 0.23709355381025712, 0.23575693213477292, 0.23430242910697505, 0.23300424062786362, 0.23146492791787876, 0.23028619269651887, 0.22872263832614514, 0.22757051865720562, 0.22612070285691649, 0.22486518055185545, 0.22356486396463154, 0.22215084040399144, 0.22116285480573897, 0.21954904035047973, 0.21853554445024348, 0.21730688614947213, 0.21600390968478739, 0.21473510420394584, 0.21359511875939363, 0.21250417810571734, 0.21119670161592852, 0.20998559164773262, 0.208837489317242, 0.20768479087034092, 0.20659155500937021, 0.20558367284244244, 0.20435335237628477, 0.2032464875641013, 0.20219802987124882, 0.20127901932926417, 0.20007579622717186, 0.19898737190593724, 0.19804803849997596, 0.19689315580825217, 0.19610885980109602, 0.19479852999276509, 0.1940612380895031, 0.19292786347284827, 0.19185809503845005, 0.19096893321163924, 0.19003230799124482, 0.18913859695629501, 0.1881291695604892, 0.18718973645719555, 0.18627742977977121, 0.18541142438497082, 0.18448857944953381, 0.18359351692378717, 0.18261368242334564, 0.18180375921357417, 0.18093506139675003, 0.17998546923834557, 0.17915124316640865, 0.17852974081084139, 0.17753670237988683, 0.17675502721943334, 0.17596267360975426, 0.17532135165792365, 0.1742351851435536, 0.17358483897004762, 0.1729350931986928, 0.1720805519195413, 0.17109064918417963, 0.17027331927806791, 0.16963634485184267, 0.16895358154624776, 0.1682839465581161, 0.16747701705369528, 0.16667755231392004, 0.16591178893673617, 0.16538283429287487, 0.16444040613503746, 0.16373895724486862, 0.16308076465326077, 0.16233620430635892, 0.16176781013586375, 0.16104183013834106, 0.16046925595808373, 0.15987647357985665, 0.15926248578219754, 0.15861934606502726, 0.15813634134480264, 0.15728566055545845, 0.15654044850087195, 0.15576910232092384, 0.15542825510458746, 0.15471291341581561, 0.15393039632238567, 0.15338574586063461, 0.15289796647946724, 0.15237239387638404, 0.15167106792711721, 0.15117419365146861, 0.15053294284800808, 0.14984558772800877, 0.14939067983754478, 0.14873871322311416, 0.14816358361629323, 0.14764993080499633, 0.147081011815695, 0.14665877802009278, 0.146330915059212, 0.14558165958404565, 0.14485851618233073, 0.14468885149823432, 0.1438703517822042, 0.14344305879631206, 0.14286592428666159, 0.14233498842740988, 0.14196829563110053, 0.14137537114428306, 0.14093879724708491, 0.14032845116761775, 0.13999255024009272, 0.13970462455415528, 0.13891699772284177, 0.13845977036853441, 0.13800758209633504, 0.13755412286780858, 0.13710288863433284, 0.1366941098396158, 0.13620566631276054, 0.13574747139784185, 0.1354018600833839, 0.13509926299887218, 0.13449907813149148, 0.13414481985203083, 0.13360822141476442, 0.13319767720007691, 0.13296506419947443, 0.13254819602035237, 0.13205345942670241, 0.13167698206691389, 0.13129565313440172, 0.13088461421366027, 0.13059579877002456, 0.13013895016529287, 0.12974813771716298, 0.12942570940664525, 0.12894055787725606, 0.12857812443876793, 0.12831920733468941, 0.12787017352245639, 0.12757677057644992, 0.12716368111937662, 0.12684805248186204, 0.12642447555752939, 0.12619325054698116, 0.12568699211395123, 0.1255825925227049, 0.12531188331355458, 0.12477983663711714, 0.12448255939185263, 0.12410353700139506, 0.12374131975287532, 0.12346266645310616, 0.12310787618062269, 0.12306935609380343, 0.12260837265741077, 0.12213474763430163, 0.12209935233252732, 0.12180246601285891, 0.1212539118889692, 0.12108968375061299, 0.12068240811734519, 0.12041266804252408, 0.12016957520782154, 0.11993799531844312, 0.1196710285606524, 0.11935732791087648, 0.11929515289226478, 0.11889389635415669, 0.11862785428698526, 0.11817071954732133, 0.11814865141172132, 0.11763340847971795, 0.11749514446259617, 0.11725747872816564, 0.11695850963543949, 0.1166363293817957, 0.11644033569813024, 0.11615572825807459, 0.11608765530562379, 0.11580834205760465, 0.1154832427731588, 0.1153905733766095], [690, 749, 810, 849, 874, 869, 912, 932, 948, 950, 958, 964, 971, 975, 980, 979, 980, 981, 983, 986, 982, 986, 987, 990, 990, 990, 992, 991, 992, 991, 993, 992, 993, 994, 994, 995, 995, 995, 996, 994, 997, 997, 996, 998, 998, 997, 998, 998, 998, 998, 998, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 1000, 999, 1000, 999, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]] -------------------------------------------------------------------------------- /fig/regularized1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/regularized1.png -------------------------------------------------------------------------------- /fig/regularized2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/regularized2.png -------------------------------------------------------------------------------- /fig/regularized_full.json: -------------------------------------------------------------------------------- 1 | [[4.3072791918656037, 2.9331304641086344, 2.1348073553576041, 1.6588303607817259, 1.330889938797851, 1.1963223601928472, 1.1170765304219505, 1.0170754480838433, 0.99110935015398149, 1.0071179800661803, 0.96280080386971378, 0.99226609521675169, 0.96023984363523895, 0.97253784945751276, 0.93966545596520334, 0.95330563342376551, 0.96378529404233837, 0.97367336858037301, 0.94435985290781166, 0.94622931411839994, 0.98392022263201184, 0.94091005661041272, 0.9496551347987412, 0.94714964684453073, 0.95026655456196552, 0.92915894672179755, 0.95831053042987979, 1.0153994919718721, 0.92940339906358749, 0.97682851862658082], [9212, 9341, 9375, 9424, 9532, 9537, 9504, 9541, 9578, 9538, 9579, 9530, 9590, 9543, 9607, 9597, 9576, 9546, 9600, 9634, 9544, 9606, 9614, 9607, 9621, 9637, 9620, 9511, 9649, 9561], [1.2925405259017666, 0.92479539229795305, 0.72611252037165497, 0.61618944188425839, 0.49142410439713557, 0.46552608507795468, 0.46074829841290343, 0.40775149802551902, 0.39671750686791218, 0.42031570708192345, 0.38057096091326847, 0.40768033915334978, 0.3895210257834103, 0.40585871820346864, 0.36003072887701948, 0.37700037701783806, 0.39300003862768451, 0.40774598935627593, 0.37194215157507704, 0.3662415845761452, 0.40722309031673021, 0.36476961463606117, 0.36988528906574514, 0.36112644707329011, 0.380710641602238, 0.35700998663848571, 0.37724740623797381, 0.44991741876110503, 0.35820321110078079, 0.39226034353556583], [45919, 46835, 47204, 47434, 47989, 47930, 47839, 48157, 48218, 48105, 48313, 48089, 48282, 48111, 48463, 48362, 48243, 48123, 48416, 48533, 48123, 48483, 48435, 48548, 48434, 48524, 48417, 47797, 48561, 48235]] -------------------------------------------------------------------------------- /fig/regularized_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/regularized_full.png -------------------------------------------------------------------------------- /fig/replaced_by_d3/README.md: -------------------------------------------------------------------------------- 1 | # Replaced by d3 directory 2 | 3 | This directory contains python code which generated png figures which 4 | were later replaced by d3 in the live version of the site. They've 5 | been preserved here on the off chance that they may be of use at some 6 | point in the future. 7 | -------------------------------------------------------------------------------- /fig/replaced_by_d3/relu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/relu.png -------------------------------------------------------------------------------- /fig/replaced_by_d3/relu.py: -------------------------------------------------------------------------------- 1 | """ 2 | relu 3 | ~~~~ 4 | 5 | Plots a graph of the squashing function used by a rectified linear 6 | unit.""" 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | z = np.arange(-2, 2, .1) 12 | zero = np.zeros(len(z)) 13 | y = np.max([zero, z], axis=0) 14 | 15 | fig = plt.figure() 16 | ax = fig.add_subplot(111) 17 | ax.plot(z, y) 18 | ax.set_ylim([-2.0, 2.0]) 19 | ax.set_xlim([-2.0, 2.0]) 20 | ax.grid(True) 21 | ax.set_xlabel('z') 22 | ax.set_title('Rectified linear unit') 23 | 24 | plt.show() 25 | -------------------------------------------------------------------------------- /fig/replaced_by_d3/sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/sigmoid.png -------------------------------------------------------------------------------- /fig/replaced_by_d3/sigmoid.py: -------------------------------------------------------------------------------- 1 | """ 2 | sigmoid 3 | ~~~~~~~ 4 | 5 | Plots a graph of the sigmoid function.""" 6 | 7 | import numpy 8 | import matplotlib.pyplot as plt 9 | 10 | z = numpy.arange(-5, 5, .1) 11 | sigma_fn = numpy.vectorize(lambda z: 1/(1+numpy.exp(-z))) 12 | sigma = sigma_fn(z) 13 | 14 | fig = plt.figure() 15 | ax = fig.add_subplot(111) 16 | ax.plot(z, sigma) 17 | ax.set_ylim([-0.5, 1.5]) 18 | ax.set_xlim([-5,5]) 19 | ax.grid(True) 20 | ax.set_xlabel('z') 21 | ax.set_title('sigmoid function') 22 | 23 | plt.show() 24 | -------------------------------------------------------------------------------- /fig/replaced_by_d3/step.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/step.png -------------------------------------------------------------------------------- /fig/replaced_by_d3/step.py: -------------------------------------------------------------------------------- 1 | """ 2 | step 3 | ~~~~~~~ 4 | 5 | Plots a graph of a step function.""" 6 | 7 | import numpy 8 | import matplotlib.pyplot as plt 9 | 10 | z = numpy.arange(-5, 5, .02) 11 | step_fn = numpy.vectorize(lambda z: 1.0 if z >= 0.0 else 0.0) 12 | step = step_fn(z) 13 | 14 | fig = plt.figure() 15 | ax = fig.add_subplot(111) 16 | ax.plot(z, step) 17 | ax.set_ylim([-0.5, 1.5]) 18 | ax.set_xlim([-5,5]) 19 | ax.grid(True) 20 | ax.set_xlabel('z') 21 | ax.set_title('step function') 22 | 23 | plt.show() 24 | -------------------------------------------------------------------------------- /fig/replaced_by_d3/tanh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/tanh.png -------------------------------------------------------------------------------- /fig/replaced_by_d3/tanh.py: -------------------------------------------------------------------------------- 1 | """ 2 | tanh 3 | ~~~~ 4 | 5 | Plots a graph of the tanh function.""" 6 | 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | z = np.arange(-5, 5, .1) 11 | t = np.tanh(z) 12 | 13 | fig = plt.figure() 14 | ax = fig.add_subplot(111) 15 | ax.plot(z, t) 16 | ax.set_ylim([-1.0, 1.0]) 17 | ax.set_xlim([-5,5]) 18 | ax.grid(True) 19 | ax.set_xlabel('z') 20 | ax.set_title('tanh function') 21 | 22 | plt.show() 23 | -------------------------------------------------------------------------------- /fig/serialize_images_to_json.py: -------------------------------------------------------------------------------- 1 | """ 2 | serialize_images_to_json 3 | ~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | Utility to serialize parts of the training and validation data to JSON, 6 | for use with Javascript. """ 7 | 8 | #### Libraries 9 | # Standard library 10 | import json 11 | import sys 12 | 13 | # My library 14 | sys.path.append('../src/') 15 | import mnist_loader 16 | 17 | # Third-party libraries 18 | import numpy as np 19 | 20 | 21 | # Number of training and validation data images to serialize 22 | NTD = 1000 23 | NVD = 100 24 | 25 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper() 26 | 27 | def make_data_integer(td): 28 | # This will be slow, due to the loop. It'd be better if numpy did 29 | # this directly. But numpy.rint followed by tolist() doesn't 30 | # convert to a standard Python int. 31 | return [int(x) for x in (td*256).reshape(784).tolist()] 32 | 33 | data = {"training": [ 34 | {"x": [x[0] for x in training_data[j][0].tolist()], 35 | "y": [y[0] for y in training_data[j][1].tolist()]} 36 | for j in xrange(NTD)], 37 | "validation": [ 38 | {"x": [x[0] for x in validation_data[j][0].tolist()], 39 | "y": validation_data[j][1]} 40 | for j in xrange(NVD)]} 41 | 42 | f = open("data_1000.json", "w") 43 | json.dump(data, f) 44 | f.close() 45 | 46 | 47 | -------------------------------------------------------------------------------- /fig/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/test.png -------------------------------------------------------------------------------- /fig/training_speed_2_layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/training_speed_2_layers.png -------------------------------------------------------------------------------- /fig/training_speed_3_layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/training_speed_3_layers.png -------------------------------------------------------------------------------- /fig/training_speed_4_layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/training_speed_4_layers.png -------------------------------------------------------------------------------- /fig/valley.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/valley.png -------------------------------------------------------------------------------- /fig/valley.py: -------------------------------------------------------------------------------- 1 | """ 2 | valley 3 | ~~~~~~ 4 | 5 | Plots a function of two variables to minimize. The function is a 6 | fairly generic valley function.""" 7 | 8 | #### Libraries 9 | # Third party libraries 10 | from matplotlib.ticker import LinearLocator 11 | # Note that axes3d is not explicitly used in the code, but is needed 12 | # to register the 3d plot type correctly 13 | from mpl_toolkits.mplot3d import axes3d 14 | import matplotlib.pyplot as plt 15 | import numpy 16 | 17 | fig = plt.figure() 18 | ax = fig.gca(projection='3d') 19 | X = numpy.arange(-1, 1, 0.1) 20 | Y = numpy.arange(-1, 1, 0.1) 21 | X, Y = numpy.meshgrid(X, Y) 22 | Z = X**2 + Y**2 23 | 24 | colortuple = ('w', 'b') 25 | colors = numpy.empty(X.shape, dtype=str) 26 | for x in xrange(len(X)): 27 | for y in xrange(len(Y)): 28 | colors[x, y] = colortuple[(x + y) % 2] 29 | 30 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors, 31 | linewidth=0) 32 | 33 | ax.set_xlim3d(-1, 1) 34 | ax.set_ylim3d(-1, 1) 35 | ax.set_zlim3d(0, 2) 36 | ax.w_xaxis.set_major_locator(LinearLocator(3)) 37 | ax.w_yaxis.set_major_locator(LinearLocator(3)) 38 | ax.w_zaxis.set_major_locator(LinearLocator(3)) 39 | ax.text(1.79, 0, 1.62, "$C$", fontsize=20) 40 | ax.text(0.05, -1.8, 0, "$v_1$", fontsize=20) 41 | ax.text(1.5, -0.25, 0, "$v_2$", fontsize=20) 42 | 43 | plt.show() 44 | -------------------------------------------------------------------------------- /fig/valley2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/valley2.png -------------------------------------------------------------------------------- /fig/valley2.py: -------------------------------------------------------------------------------- 1 | """valley2.py 2 | ~~~~~~~~~~~~~ 3 | 4 | Plots a function of two variables to minimize. The function is a 5 | fairly generic valley function. 6 | 7 | Note that this is a duplicate of valley.py, but omits labels on the 8 | axis. It's bad practice to duplicate in this way, but I had 9 | considerable trouble getting matplotlib to update a graph in the way I 10 | needed (adding or removing labels), so finally fell back on this as a 11 | kludge solution. 12 | 13 | """ 14 | 15 | #### Libraries 16 | # Third party libraries 17 | from matplotlib.ticker import LinearLocator 18 | # Note that axes3d is not explicitly used in the code, but is needed 19 | # to register the 3d plot type correctly 20 | from mpl_toolkits.mplot3d import axes3d 21 | import matplotlib.pyplot as plt 22 | import numpy 23 | 24 | fig = plt.figure() 25 | ax = fig.gca(projection='3d') 26 | X = numpy.arange(-1, 1, 0.1) 27 | Y = numpy.arange(-1, 1, 0.1) 28 | X, Y = numpy.meshgrid(X, Y) 29 | Z = X**2 + Y**2 30 | 31 | colortuple = ('w', 'b') 32 | colors = numpy.empty(X.shape, dtype=str) 33 | for x in xrange(len(X)): 34 | for y in xrange(len(Y)): 35 | colors[x, y] = colortuple[(x + y) % 2] 36 | 37 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors, 38 | linewidth=0) 39 | 40 | ax.set_xlim3d(-1, 1) 41 | ax.set_ylim3d(-1, 1) 42 | ax.set_zlim3d(0, 2) 43 | ax.w_xaxis.set_major_locator(LinearLocator(3)) 44 | ax.w_yaxis.set_major_locator(LinearLocator(3)) 45 | ax.w_zaxis.set_major_locator(LinearLocator(3)) 46 | ax.text(1.79, 0, 1.62, "$C$", fontsize=20) 47 | 48 | plt.show() 49 | -------------------------------------------------------------------------------- /fig/weight_initialization.py: -------------------------------------------------------------------------------- 1 | """weight_initialization 2 | ~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | This program shows how weight initialization affects training. In 5 | particular, we'll plot out how the classification accuracies improve 6 | using either large starting weights, whose standard deviation is 1, or 7 | the default starting weights, whose standard deviation is 1 over the 8 | square root of the number of input neurons. 9 | 10 | """ 11 | 12 | # Standard library 13 | import json 14 | import random 15 | import sys 16 | 17 | # My library 18 | sys.path.append('../src/') 19 | import mnist_loader 20 | import network2 21 | 22 | # Third-party libraries 23 | import matplotlib.pyplot as plt 24 | import numpy as np 25 | 26 | def main(filename, n, eta): 27 | run_network(filename, n, eta) 28 | make_plot(filename) 29 | 30 | def run_network(filename, n, eta): 31 | """Train the network using both the default and the large starting 32 | weights. Store the results in the file with name ``filename``, 33 | where they can later be used by ``make_plots``. 34 | 35 | """ 36 | # Make results more easily reproducible 37 | random.seed(12345678) 38 | np.random.seed(12345678) 39 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper() 40 | net = network2.Network([784, n, 10], cost=network2.CrossEntropyCost) 41 | print "Train the network using the default starting weights." 42 | default_vc, default_va, default_tc, default_ta \ 43 | = net.SGD(training_data, 30, 10, eta, lmbda=5.0, 44 | evaluation_data=validation_data, 45 | monitor_evaluation_accuracy=True) 46 | print "Train the network using the large starting weights." 47 | net.large_weight_initializer() 48 | large_vc, large_va, large_tc, large_ta \ 49 | = net.SGD(training_data, 30, 10, eta, lmbda=5.0, 50 | evaluation_data=validation_data, 51 | monitor_evaluation_accuracy=True) 52 | f = open(filename, "w") 53 | json.dump({"default_weight_initialization": 54 | [default_vc, default_va, default_tc, default_ta], 55 | "large_weight_initialization": 56 | [large_vc, large_va, large_tc, large_ta]}, 57 | f) 58 | f.close() 59 | 60 | def make_plot(filename): 61 | """Load the results from the file ``filename``, and generate the 62 | corresponding plot. 63 | 64 | """ 65 | f = open(filename, "r") 66 | results = json.load(f) 67 | f.close() 68 | default_vc, default_va, default_tc, default_ta = results[ 69 | "default_weight_initialization"] 70 | large_vc, large_va, large_tc, large_ta = results[ 71 | "large_weight_initialization"] 72 | # Convert raw classification numbers to percentages, for plotting 73 | default_va = [x/100.0 for x in default_va] 74 | large_va = [x/100.0 for x in large_va] 75 | fig = plt.figure() 76 | ax = fig.add_subplot(111) 77 | ax.plot(np.arange(0, 30, 1), large_va, color='#2A6EA6', 78 | label="Old approach to weight initialization") 79 | ax.plot(np.arange(0, 30, 1), default_va, color='#FFA933', 80 | label="New approach to weight initialization") 81 | ax.set_xlim([0, 30]) 82 | ax.set_xlabel('Epoch') 83 | ax.set_ylim([85, 100]) 84 | ax.set_title('Classification accuracy') 85 | plt.legend(loc="lower right") 86 | plt.show() 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /fig/weight_initialization_100.json: -------------------------------------------------------------------------------- 1 | {"default_weight_initialization": [[], [9295, 9481, 9547, 9592, 9664, 9673, 9702, 9719, 9726, 9726, 9732, 9732, 9730, 9734, 9745, 9751, 9757, 9761, 9764, 9766, 9758, 9767, 9756, 9752, 9777, 9775, 9770, 9770, 9771, 9781], [], []], "large_weight_initialization": [[], [8994, 9181, 9260, 9364, 9427, 9449, 9497, 9512, 9560, 9578, 9603, 9616, 9626, 9629, 9644, 9671, 9674, 9679, 9700, 9708, 9707, 9717, 9729, 9720, 9719, 9745, 9751, 9754, 9755, 9742], [], []]} -------------------------------------------------------------------------------- /fig/weight_initialization_100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/weight_initialization_100.png -------------------------------------------------------------------------------- /fig/weight_initialization_30.json: -------------------------------------------------------------------------------- 1 | {"default_weight_initialization": [[], [9270, 9414, 9470, 9504, 9537, 9550, 9587, 9594, 9596, 9594, 9616, 9595, 9622, 9630, 9636, 9641, 9625, 9652, 9637, 9634, 9642, 9639, 9649, 9646, 9646, 9653, 9646, 9653, 9640, 9650], [], []], "large_weight_initialization": [[], [8643, 9044, 9141, 9231, 9299, 9327, 9385, 9416, 9433, 9449, 9476, 9489, 9500, 9535, 9521, 9548, 9564, 9573, 9585, 9592, 9596, 9615, 9607, 9605, 9606, 9622, 9637, 9648, 9635, 9637], [], []]} -------------------------------------------------------------------------------- /fig/weight_initialization_30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/weight_initialization_30.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scikit-learn 3 | scipy 4 | Theano 5 | -------------------------------------------------------------------------------- /src/conv.py: -------------------------------------------------------------------------------- 1 | """conv.py 2 | ~~~~~~~~~~ 3 | 4 | Code for many of the experiments involving convolutional networks in 5 | Chapter 6 of the book 'Neural Networks and Deep Learning', by Michael 6 | Nielsen. The code essentially duplicates (and parallels) what is in 7 | the text, so this is simply a convenience, and has not been commented 8 | in detail. Consult the original text for more details. 9 | 10 | """ 11 | 12 | from collections import Counter 13 | 14 | import matplotlib 15 | matplotlib.use('Agg') 16 | import matplotlib.pyplot as plt 17 | import numpy as np 18 | import theano 19 | import theano.tensor as T 20 | 21 | import network3 22 | from network3 import sigmoid, tanh, ReLU, Network 23 | from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer 24 | 25 | training_data, validation_data, test_data = network3.load_data_shared() 26 | mini_batch_size = 10 27 | 28 | def shallow(n=3, epochs=60): 29 | nets = [] 30 | for j in range(n): 31 | print "A shallow net with 100 hidden neurons" 32 | net = Network([ 33 | FullyConnectedLayer(n_in=784, n_out=100), 34 | SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size) 35 | net.SGD( 36 | training_data, epochs, mini_batch_size, 0.1, 37 | validation_data, test_data) 38 | nets.append(net) 39 | return nets 40 | 41 | def basic_conv(n=3, epochs=60): 42 | for j in range(n): 43 | print "Conv + FC architecture" 44 | net = Network([ 45 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 46 | filter_shape=(20, 1, 5, 5), 47 | poolsize=(2, 2)), 48 | FullyConnectedLayer(n_in=20*12*12, n_out=100), 49 | SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size) 50 | net.SGD( 51 | training_data, epochs, mini_batch_size, 0.1, validation_data, test_data) 52 | return net 53 | 54 | def omit_FC(): 55 | for j in range(3): 56 | print "Conv only, no FC" 57 | net = Network([ 58 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 59 | filter_shape=(20, 1, 5, 5), 60 | poolsize=(2, 2)), 61 | SoftmaxLayer(n_in=20*12*12, n_out=10)], mini_batch_size) 62 | net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data) 63 | return net 64 | 65 | def dbl_conv(activation_fn=sigmoid): 66 | for j in range(3): 67 | print "Conv + Conv + FC architecture" 68 | net = Network([ 69 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 70 | filter_shape=(20, 1, 5, 5), 71 | poolsize=(2, 2), 72 | activation_fn=activation_fn), 73 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 74 | filter_shape=(40, 20, 5, 5), 75 | poolsize=(2, 2), 76 | activation_fn=activation_fn), 77 | FullyConnectedLayer( 78 | n_in=40*4*4, n_out=100, activation_fn=activation_fn), 79 | SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size) 80 | net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data) 81 | return net 82 | 83 | # The following experiment was eventually omitted from the chapter, 84 | # but I've left it in here, since it's an important negative result: 85 | # basic l2 regularization didn't help much. The reason (I believe) is 86 | # that using convolutional-pooling layers is already a pretty strong 87 | # regularizer. 88 | def regularized_dbl_conv(): 89 | for lmbda in [0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]: 90 | for j in range(3): 91 | print "Conv + Conv + FC num %s, with regularization %s" % (j, lmbda) 92 | net = Network([ 93 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 94 | filter_shape=(20, 1, 5, 5), 95 | poolsize=(2, 2)), 96 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 97 | filter_shape=(40, 20, 5, 5), 98 | poolsize=(2, 2)), 99 | FullyConnectedLayer(n_in=40*4*4, n_out=100), 100 | SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size) 101 | net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data, lmbda=lmbda) 102 | 103 | def dbl_conv_relu(): 104 | for lmbda in [0.0, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]: 105 | for j in range(3): 106 | print "Conv + Conv + FC num %s, relu, with regularization %s" % (j, lmbda) 107 | net = Network([ 108 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 109 | filter_shape=(20, 1, 5, 5), 110 | poolsize=(2, 2), 111 | activation_fn=ReLU), 112 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 113 | filter_shape=(40, 20, 5, 5), 114 | poolsize=(2, 2), 115 | activation_fn=ReLU), 116 | FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU), 117 | SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size) 118 | net.SGD(training_data, 60, mini_batch_size, 0.03, validation_data, test_data, lmbda=lmbda) 119 | 120 | #### Some subsequent functions may make use of the expanded MNIST 121 | #### data. That can be generated by running expand_mnist.py. 122 | 123 | def expanded_data(n=100): 124 | """n is the number of neurons in the fully-connected layer. We'll try 125 | n=100, 300, and 1000. 126 | 127 | """ 128 | expanded_training_data, _, _ = network3.load_data_shared( 129 | "../data/mnist_expanded.pkl.gz") 130 | for j in range(3): 131 | print "Training with expanded data, %s neurons in the FC layer, run num %s" % (n, j) 132 | net = Network([ 133 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 134 | filter_shape=(20, 1, 5, 5), 135 | poolsize=(2, 2), 136 | activation_fn=ReLU), 137 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 138 | filter_shape=(40, 20, 5, 5), 139 | poolsize=(2, 2), 140 | activation_fn=ReLU), 141 | FullyConnectedLayer(n_in=40*4*4, n_out=n, activation_fn=ReLU), 142 | SoftmaxLayer(n_in=n, n_out=10)], mini_batch_size) 143 | net.SGD(expanded_training_data, 60, mini_batch_size, 0.03, 144 | validation_data, test_data, lmbda=0.1) 145 | return net 146 | 147 | def expanded_data_double_fc(n=100): 148 | """n is the number of neurons in both fully-connected layers. We'll 149 | try n=100, 300, and 1000. 150 | 151 | """ 152 | expanded_training_data, _, _ = network3.load_data_shared( 153 | "../data/mnist_expanded.pkl.gz") 154 | for j in range(3): 155 | print "Training with expanded data, %s neurons in two FC layers, run num %s" % (n, j) 156 | net = Network([ 157 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 158 | filter_shape=(20, 1, 5, 5), 159 | poolsize=(2, 2), 160 | activation_fn=ReLU), 161 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 162 | filter_shape=(40, 20, 5, 5), 163 | poolsize=(2, 2), 164 | activation_fn=ReLU), 165 | FullyConnectedLayer(n_in=40*4*4, n_out=n, activation_fn=ReLU), 166 | FullyConnectedLayer(n_in=n, n_out=n, activation_fn=ReLU), 167 | SoftmaxLayer(n_in=n, n_out=10)], mini_batch_size) 168 | net.SGD(expanded_training_data, 60, mini_batch_size, 0.03, 169 | validation_data, test_data, lmbda=0.1) 170 | 171 | def double_fc_dropout(p0, p1, p2, repetitions): 172 | expanded_training_data, _, _ = network3.load_data_shared( 173 | "../data/mnist_expanded.pkl.gz") 174 | nets = [] 175 | for j in range(repetitions): 176 | print "\n\nTraining using a dropout network with parameters ",p0,p1,p2 177 | print "Training with expanded data, run num %s" % j 178 | net = Network([ 179 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 180 | filter_shape=(20, 1, 5, 5), 181 | poolsize=(2, 2), 182 | activation_fn=ReLU), 183 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 184 | filter_shape=(40, 20, 5, 5), 185 | poolsize=(2, 2), 186 | activation_fn=ReLU), 187 | FullyConnectedLayer( 188 | n_in=40*4*4, n_out=1000, activation_fn=ReLU, p_dropout=p0), 189 | FullyConnectedLayer( 190 | n_in=1000, n_out=1000, activation_fn=ReLU, p_dropout=p1), 191 | SoftmaxLayer(n_in=1000, n_out=10, p_dropout=p2)], mini_batch_size) 192 | net.SGD(expanded_training_data, 40, mini_batch_size, 0.03, 193 | validation_data, test_data) 194 | nets.append(net) 195 | return nets 196 | 197 | def ensemble(nets): 198 | """Takes as input a list of nets, and then computes the accuracy on 199 | the test data when classifications are computed by taking a vote 200 | amongst the nets. Returns a tuple containing a list of indices 201 | for test data which is erroneously classified, and a list of the 202 | corresponding erroneous predictions. 203 | 204 | Note that this is a quick-and-dirty kluge: it'd be more reusable 205 | (and faster) to define a Theano function taking the vote. But 206 | this works. 207 | 208 | """ 209 | 210 | test_x, test_y = test_data 211 | for net in nets: 212 | i = T.lscalar() # mini-batch index 213 | net.test_mb_predictions = theano.function( 214 | [i], net.layers[-1].y_out, 215 | givens={ 216 | net.x: 217 | test_x[i*net.mini_batch_size: (i+1)*net.mini_batch_size] 218 | }) 219 | net.test_predictions = list(np.concatenate( 220 | [net.test_mb_predictions(i) for i in xrange(1000)])) 221 | all_test_predictions = zip(*[net.test_predictions for net in nets]) 222 | def plurality(p): return Counter(p).most_common(1)[0][0] 223 | plurality_test_predictions = [plurality(p) 224 | for p in all_test_predictions] 225 | test_y_eval = test_y.eval() 226 | error_locations = [j for j in xrange(10000) 227 | if plurality_test_predictions[j] != test_y_eval[j]] 228 | erroneous_predictions = [plurality(all_test_predictions[j]) 229 | for j in error_locations] 230 | print "Accuracy is {:.2%}".format((1-len(error_locations)/10000.0)) 231 | return error_locations, erroneous_predictions 232 | 233 | def plot_errors(error_locations, erroneous_predictions=None): 234 | test_x, test_y = test_data[0].eval(), test_data[1].eval() 235 | fig = plt.figure() 236 | error_images = [np.array(test_x[i]).reshape(28, -1) for i in error_locations] 237 | n = min(40, len(error_locations)) 238 | for j in range(n): 239 | ax = plt.subplot2grid((5, 8), (j/8, j % 8)) 240 | ax.matshow(error_images[j], cmap = matplotlib.cm.binary) 241 | ax.text(24, 5, test_y[error_locations[j]]) 242 | if erroneous_predictions: 243 | ax.text(24, 24, erroneous_predictions[j]) 244 | plt.xticks(np.array([])) 245 | plt.yticks(np.array([])) 246 | plt.tight_layout() 247 | return plt 248 | 249 | def plot_filters(net, layer, x, y): 250 | 251 | """Plot the filters for net after the (convolutional) layer number 252 | layer. They are plotted in x by y format. So, for example, if we 253 | have 20 filters after layer 0, then we can call show_filters(net, 0, 5, 4) to 254 | get a 5 by 4 plot of all filters.""" 255 | filters = net.layers[layer].w.eval() 256 | fig = plt.figure() 257 | for j in range(len(filters)): 258 | ax = fig.add_subplot(y, x, j) 259 | ax.matshow(filters[j][0], cmap = matplotlib.cm.binary) 260 | plt.xticks(np.array([])) 261 | plt.yticks(np.array([])) 262 | plt.tight_layout() 263 | return plt 264 | 265 | 266 | #### Helper method to run all experiments in the book 267 | 268 | def run_experiments(): 269 | 270 | """Run the experiments described in the book. Note that the later 271 | experiments require access to the expanded training data, which 272 | can be generated by running expand_mnist.py. 273 | 274 | """ 275 | shallow() 276 | basic_conv() 277 | omit_FC() 278 | dbl_conv(activation_fn=sigmoid) 279 | # omitted, but still interesting: regularized_dbl_conv() 280 | dbl_conv_relu() 281 | expanded_data(n=100) 282 | expanded_data(n=300) 283 | expanded_data(n=1000) 284 | expanded_data_double_fc(n=100) 285 | expanded_data_double_fc(n=300) 286 | expanded_data_double_fc(n=1000) 287 | nets = double_fc_dropout(0.5, 0.5, 0.5, 5) 288 | # plot the erroneous digits in the ensemble of nets just trained 289 | error_locations, erroneous_predictions = ensemble(nets) 290 | plt = plot_errors(error_locations, erroneous_predictions) 291 | plt.savefig("ensemble_errors.png") 292 | # plot the filters learned by the first of the nets just trained 293 | plt = plot_filters(nets[0], 0, 5, 4) 294 | plt.savefig("net_full_layer_0.png") 295 | plt = plot_filters(nets[0], 1, 8, 5) 296 | plt.savefig("net_full_layer_1.png") 297 | 298 | -------------------------------------------------------------------------------- /src/expand_mnist.py: -------------------------------------------------------------------------------- 1 | """expand_mnist.py 2 | ~~~~~~~~~~~~~~~~~~ 3 | 4 | Take the 50,000 MNIST training images, and create an expanded set of 5 | 250,000 images, by displacing each training image up, down, left and 6 | right, by one pixel. Save the resulting file to 7 | ../data/mnist_expanded.pkl.gz. 8 | 9 | Note that this program is memory intensive, and may not run on small 10 | systems. 11 | 12 | """ 13 | 14 | from __future__ import print_function 15 | 16 | #### Libraries 17 | 18 | # Standard library 19 | import cPickle 20 | import gzip 21 | import os.path 22 | import random 23 | 24 | # Third-party libraries 25 | import numpy as np 26 | 27 | print("Expanding the MNIST training set") 28 | 29 | if os.path.exists("../data/mnist_expanded.pkl.gz"): 30 | print("The expanded training set already exists. Exiting.") 31 | else: 32 | f = gzip.open("../data/mnist.pkl.gz", 'rb') 33 | training_data, validation_data, test_data = cPickle.load(f) 34 | f.close() 35 | expanded_training_pairs = [] 36 | j = 0 # counter 37 | for x, y in zip(training_data[0], training_data[1]): 38 | expanded_training_pairs.append((x, y)) 39 | image = np.reshape(x, (-1, 28)) 40 | j += 1 41 | if j % 1000 == 0: print("Expanding image number", j) 42 | # iterate over data telling us the details of how to 43 | # do the displacement 44 | for d, axis, index_position, index in [ 45 | (1, 0, "first", 0), 46 | (-1, 0, "first", 27), 47 | (1, 1, "last", 0), 48 | (-1, 1, "last", 27)]: 49 | new_img = np.roll(image, d, axis) 50 | if index_position == "first": 51 | new_img[index, :] = np.zeros(28) 52 | else: 53 | new_img[:, index] = np.zeros(28) 54 | expanded_training_pairs.append((np.reshape(new_img, 784), y)) 55 | random.shuffle(expanded_training_pairs) 56 | expanded_training_data = [list(d) for d in zip(*expanded_training_pairs)] 57 | print("Saving expanded data. This may take a few minutes.") 58 | f = gzip.open("../data/mnist_expanded.pkl.gz", "w") 59 | cPickle.dump((expanded_training_data, validation_data, test_data), f) 60 | f.close() 61 | -------------------------------------------------------------------------------- /src/mnist_average_darkness.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_average_darkness 3 | ~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | A naive classifier for recognizing handwritten digits from the MNIST 6 | data set. The program classifies digits based on how dark they are 7 | --- the idea is that digits like "1" tend to be less dark than digits 8 | like "8", simply because the latter has a more complex shape. When 9 | shown an image the classifier returns whichever digit in the training 10 | data had the closest average darkness. 11 | 12 | The program works in two steps: first it trains the classifier, and 13 | then it applies the classifier to the MNIST test data to see how many 14 | digits are correctly classified. 15 | 16 | Needless to say, this isn't a very good way of recognizing handwritten 17 | digits! Still, it's useful to show what sort of performance we get 18 | from naive ideas.""" 19 | 20 | #### Libraries 21 | # Standard library 22 | from collections import defaultdict 23 | 24 | # My libraries 25 | import mnist_loader 26 | 27 | def main(): 28 | training_data, validation_data, test_data = mnist_loader.load_data() 29 | # training phase: compute the average darknesses for each digit, 30 | # based on the training data 31 | avgs = avg_darknesses(training_data) 32 | # testing phase: see how many of the test images are classified 33 | # correctly 34 | num_correct = sum(int(guess_digit(image, avgs) == digit) 35 | for image, digit in zip(test_data[0], test_data[1])) 36 | print "Baseline classifier using average darkness of image." 37 | print "%s of %s values correct." % (num_correct, len(test_data[1])) 38 | 39 | def avg_darknesses(training_data): 40 | """ Return a defaultdict whose keys are the digits 0 through 9. 41 | For each digit we compute a value which is the average darkness of 42 | training images containing that digit. The darkness for any 43 | particular image is just the sum of the darknesses for each pixel.""" 44 | digit_counts = defaultdict(int) 45 | darknesses = defaultdict(float) 46 | for image, digit in zip(training_data[0], training_data[1]): 47 | digit_counts[digit] += 1 48 | darknesses[digit] += sum(image) 49 | avgs = defaultdict(float) 50 | for digit, n in digit_counts.iteritems(): 51 | avgs[digit] = darknesses[digit] / n 52 | return avgs 53 | 54 | def guess_digit(image, avgs): 55 | """Return the digit whose average darkness in the training data is 56 | closest to the darkness of ``image``. Note that ``avgs`` is 57 | assumed to be a defaultdict whose keys are 0...9, and whose values 58 | are the corresponding average darknesses across the training data.""" 59 | darkness = sum(image) 60 | distances = {k: abs(v-darkness) for k, v in avgs.iteritems()} 61 | return min(distances, key=distances.get) 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /src/mnist_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_loader 3 | ~~~~~~~~~~~~ 4 | 5 | A library to load the MNIST image data. For details of the data 6 | structures that are returned, see the doc strings for ``load_data`` 7 | and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the 8 | function usually called by our neural network code. 9 | """ 10 | 11 | #### Libraries 12 | # Standard library 13 | import pickle 14 | import gzip 15 | 16 | # Third-party libraries 17 | import numpy as np 18 | 19 | def load_data(): 20 | """Return the MNIST data as a tuple containing the training data, 21 | the validation data, and the test data. 22 | 23 | The ``training_data`` is returned as a tuple with two entries. 24 | The first entry contains the actual training images. This is a 25 | numpy ndarray with 50,000 entries. Each entry is, in turn, a 26 | numpy ndarray with 784 values, representing the 28 * 28 = 784 27 | pixels in a single MNIST image. 28 | 29 | The second entry in the ``training_data`` tuple is a numpy ndarray 30 | containing 50,000 entries. Those entries are just the digit 31 | values (0...9) for the corresponding images contained in the first 32 | entry of the tuple. 33 | 34 | The ``validation_data`` and ``test_data`` are similar, except 35 | each contains only 10,000 images. 36 | 37 | This is a nice data format, but for use in neural networks it's 38 | helpful to modify the format of the ``training_data`` a little. 39 | That's done in the wrapper function ``load_data_wrapper()``, see 40 | below. 41 | """ 42 | f = gzip.open('../data/mnist.pkl.gz', 'rb') 43 | training_data, validation_data, test_data = pickle.load(f, encoding='bytes') 44 | f.close() 45 | return (training_data, validation_data, test_data) 46 | 47 | def load_data_wrapper(): 48 | """Return a tuple containing ``(training_data, validation_data, 49 | test_data)``. Based on ``load_data``, but the format is more 50 | convenient for use in our implementation of neural networks. 51 | 52 | In particular, ``training_data`` is a list containing 50,000 53 | 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray 54 | containing the input image. ``y`` is a 10-dimensional 55 | numpy.ndarray representing the unit vector corresponding to the 56 | correct digit for ``x``. 57 | 58 | ``validation_data`` and ``test_data`` are lists containing 10,000 59 | 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional 60 | numpy.ndarry containing the input image, and ``y`` is the 61 | corresponding classification, i.e., the digit values (integers) 62 | corresponding to ``x``. 63 | 64 | Obviously, this means we're using slightly different formats for 65 | the training data and the validation / test data. These formats 66 | turn out to be the most convenient for use in our neural network 67 | code.""" 68 | tr_d, va_d, te_d = load_data() 69 | training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]] 70 | training_results = [vectorized_result(y) for y in tr_d[1]] 71 | training_data = zip(training_inputs, training_results) 72 | validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]] 73 | validation_data = zip(validation_inputs, va_d[1]) 74 | test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]] 75 | test_data = zip(test_inputs, te_d[1]) 76 | return (training_data, validation_data, test_data) 77 | 78 | def vectorized_result(j): 79 | """Return a 10-dimensional unit vector with a 1.0 in the jth 80 | position and zeroes elsewhere. This is used to convert a digit 81 | (0...9) into a corresponding desired output from the neural 82 | network.""" 83 | e = np.zeros((10, 1)) 84 | e[j] = 1.0 85 | return e 86 | -------------------------------------------------------------------------------- /src/mnist_svm.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_svm 3 | ~~~~~~~~~ 4 | 5 | A classifier program for recognizing handwritten digits from the MNIST 6 | data set, using an SVM classifier.""" 7 | 8 | #### Libraries 9 | # My libraries 10 | import mnist_loader 11 | 12 | # Third-party libraries 13 | from sklearn import svm 14 | 15 | def svm_baseline(): 16 | training_data, validation_data, test_data = mnist_loader.load_data() 17 | # train 18 | clf = svm.SVC() 19 | clf.fit(training_data[0], training_data[1]) 20 | # test 21 | predictions = [int(a) for a in clf.predict(test_data[0])] 22 | num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1])) 23 | print "Baseline classifier using an SVM." 24 | print "%s of %s values correct." % (num_correct, len(test_data[1])) 25 | 26 | if __name__ == "__main__": 27 | svm_baseline() 28 | 29 | -------------------------------------------------------------------------------- /src/network.py: -------------------------------------------------------------------------------- 1 | """ 2 | network.py 3 | ~~~~~~~~~~ 4 | 5 | A module to implement the stochastic gradient descent learning 6 | algorithm for a feedforward neural network. Gradients are calculated 7 | using backpropagation. Note that I have focused on making the code 8 | simple, easily readable, and easily modifiable. It is not optimized, 9 | and omits many desirable features. 10 | """ 11 | 12 | #### Libraries 13 | # Standard library 14 | import random 15 | 16 | # Third-party libraries 17 | import numpy as np 18 | 19 | class Network(object): 20 | 21 | def __init__(self, sizes): 22 | """The list ``sizes`` contains the number of neurons in the 23 | respective layers of the network. For example, if the list 24 | was [2, 3, 1] then it would be a three-layer network, with the 25 | first layer containing 2 neurons, the second layer 3 neurons, 26 | and the third layer 1 neuron. The biases and weights for the 27 | network are initialized randomly, using a Gaussian 28 | distribution with mean 0, and variance 1. Note that the first 29 | layer is assumed to be an input layer, and by convention we 30 | won't set any biases for those neurons, since biases are only 31 | ever used in computing the outputs from later layers.""" 32 | self.num_layers = len(sizes) 33 | self.sizes = sizes 34 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]] 35 | self.weights = [np.random.randn(y, x) 36 | for x, y in zip(sizes[:-1], sizes[1:])] 37 | 38 | def feedforward(self, a): 39 | """Return the output of the network if ``a`` is input.""" 40 | for b, w in zip(self.biases, self.weights): 41 | a = sigmoid(np.dot(w, a)+b) 42 | return a 43 | 44 | def SGD(self, training_data, epochs, mini_batch_size, eta, 45 | test_data=None): 46 | """Train the neural network using mini-batch stochastic 47 | gradient descent. The ``training_data`` is a list of tuples 48 | ``(x, y)`` representing the training inputs and the desired 49 | outputs. The other non-optional parameters are 50 | self-explanatory. If ``test_data`` is provided then the 51 | network will be evaluated against the test data after each 52 | epoch, and partial progress printed out. This is useful for 53 | tracking progress, but slows things down substantially.""" 54 | test_data = list(test_data) 55 | training_data = list(training_data) 56 | if test_data: n_test = len(test_data) 57 | n = len(training_data) 58 | for j in range(epochs): 59 | random.shuffle(training_data) 60 | mini_batches = [ 61 | training_data[k:k+mini_batch_size] 62 | for k in range(0, n, mini_batch_size)] 63 | for mini_batch in mini_batches: 64 | self.update_mini_batch(mini_batch, eta) 65 | if test_data: 66 | print("Epoch {0}: {1} / {2}".format( 67 | j, self.evaluate(test_data), n_test)) 68 | else: 69 | print("Epoch {0} complete".format(j)) 70 | 71 | def update_mini_batch(self, mini_batch, eta): 72 | """Update the network's weights and biases by applying 73 | gradient descent using backpropagation to a single mini batch. 74 | The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta`` 75 | is the learning rate.""" 76 | nabla_b = [np.zeros(b.shape) for b in self.biases] 77 | nabla_w = [np.zeros(w.shape) for w in self.weights] 78 | for x, y in mini_batch: 79 | delta_nabla_b, delta_nabla_w = self.backprop(x, y) 80 | nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] 81 | nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] 82 | self.weights = [w-(eta/len(mini_batch))*nw 83 | for w, nw in zip(self.weights, nabla_w)] 84 | self.biases = [b-(eta/len(mini_batch))*nb 85 | for b, nb in zip(self.biases, nabla_b)] 86 | 87 | def backprop(self, x, y): 88 | """Return a tuple ``(nabla_b, nabla_w)`` representing the 89 | gradient for the cost function C_x. ``nabla_b`` and 90 | ``nabla_w`` are layer-by-layer lists of numpy arrays, similar 91 | to ``self.biases`` and ``self.weights``.""" 92 | nabla_b = [np.zeros(b.shape) for b in self.biases] 93 | nabla_w = [np.zeros(w.shape) for w in self.weights] 94 | # feedforwar 95 | activation = x 96 | activations = [x] # list to store all the activations, layer by layer 97 | zs = [] # list to store all the z vectors, layer by layer 98 | for b, w in zip(self.biases, self.weights): 99 | z = np.dot(w, activation)+b 100 | zs.append(z) 101 | activation = sigmoid(z) 102 | activations.append(activation) 103 | # backward pass 104 | delta = self.cost_derivative(activations[-1], y) * \ 105 | sigmoid_prime(zs[-1]) 106 | nabla_b[-1] = delta 107 | nabla_w[-1] = np.dot(delta, activations[-2].transpose()) 108 | # Note that the variable l in the loop below is used a little 109 | # differently to the notation in Chapter 2 of the book. Here, 110 | # l = 1 means the last layer of neurons, l = 2 is the 111 | # second-last layer, and so on. It's a renumbering of the 112 | # scheme in the book, used here to take advantage of the fact 113 | # that Python can use negative indices in lists. 114 | for l in range(2, self.num_layers): 115 | z = zs[-l] 116 | sp = sigmoid_prime(z) 117 | delta = np.dot(self.weights[-l+1].transpose(), delta) * sp 118 | nabla_b[-l] = delta 119 | nabla_w[-l] = np.dot(delta, activations[-l-1].transpose()) 120 | return (nabla_b, nabla_w) 121 | 122 | def evaluate(self, test_data): 123 | """Return the number of test inputs for which the neural 124 | network outputs the correct result. Note that the neural 125 | network's output is assumed to be the index of whichever 126 | neuron in the final layer has the highest activation.""" 127 | test_results = [(np.argmax(self.feedforward(x)), y) 128 | for (x, y) in test_data] 129 | return sum(int(x == y) for (x, y) in test_results) 130 | 131 | def cost_derivative(self, output_activations, y): 132 | """Return the vector of partial derivatives \partial C_x / 133 | \partial a for the output activations.""" 134 | return (output_activations-y) 135 | 136 | #### Miscellaneous functions 137 | def sigmoid(z): 138 | """The sigmoid function.""" 139 | return 1.0/(1.0+np.exp(-z)) 140 | 141 | def sigmoid_prime(z): 142 | """Derivative of the sigmoid function.""" 143 | return sigmoid(z)*(1-sigmoid(z)) 144 | -------------------------------------------------------------------------------- /src/network2.py: -------------------------------------------------------------------------------- 1 | """network2.py 2 | ~~~~~~~~~~~~~~ 3 | 4 | An improved version of network.py, implementing the stochastic 5 | gradient descent learning algorithm for a feedforward neural network. 6 | Improvements include the addition of the cross-entropy cost function, 7 | regularization, and better initialization of network weights. Note 8 | that I have focused on making the code simple, easily readable, and 9 | easily modifiable. It is not optimized, and omits many desirable 10 | features. 11 | 12 | """ 13 | 14 | #### Libraries 15 | # Standard library 16 | import json 17 | import random 18 | import sys 19 | 20 | # Third-party libraries 21 | import numpy as np 22 | 23 | 24 | #### Define the quadratic and cross-entropy cost functions 25 | 26 | class QuadraticCost(object): 27 | 28 | @staticmethod 29 | def fn(a, y): 30 | """Return the cost associated with an output ``a`` and desired output 31 | ``y``. 32 | 33 | """ 34 | return 0.5*np.linalg.norm(a-y)**2 35 | 36 | @staticmethod 37 | def delta(z, a, y): 38 | """Return the error delta from the output layer.""" 39 | return (a-y) * sigmoid_prime(z) 40 | 41 | 42 | class CrossEntropyCost(object): 43 | 44 | @staticmethod 45 | def fn(a, y): 46 | """Return the cost associated with an output ``a`` and desired output 47 | ``y``. Note that np.nan_to_num is used to ensure numerical 48 | stability. In particular, if both ``a`` and ``y`` have a 1.0 49 | in the same slot, then the expression (1-y)*np.log(1-a) 50 | returns nan. The np.nan_to_num ensures that that is converted 51 | to the correct value (0.0). 52 | 53 | """ 54 | return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a))) 55 | 56 | @staticmethod 57 | def delta(z, a, y): 58 | """Return the error delta from the output layer. Note that the 59 | parameter ``z`` is not used by the method. It is included in 60 | the method's parameters in order to make the interface 61 | consistent with the delta method for other cost classes. 62 | 63 | """ 64 | return (a-y) 65 | 66 | 67 | #### Main Network class 68 | class Network(object): 69 | 70 | def __init__(self, sizes, cost=CrossEntropyCost): 71 | """The list ``sizes`` contains the number of neurons in the respective 72 | layers of the network. For example, if the list was [2, 3, 1] 73 | then it would be a three-layer network, with the first layer 74 | containing 2 neurons, the second layer 3 neurons, and the 75 | third layer 1 neuron. The biases and weights for the network 76 | are initialized randomly, using 77 | ``self.default_weight_initializer`` (see docstring for that 78 | method). 79 | 80 | """ 81 | self.num_layers = len(sizes) 82 | self.sizes = sizes 83 | self.default_weight_initializer() 84 | self.cost=cost 85 | 86 | def default_weight_initializer(self): 87 | """Initialize each weight using a Gaussian distribution with mean 0 88 | and standard deviation 1 over the square root of the number of 89 | weights connecting to the same neuron. Initialize the biases 90 | using a Gaussian distribution with mean 0 and standard 91 | deviation 1. 92 | 93 | Note that the first layer is assumed to be an input layer, and 94 | by convention we won't set any biases for those neurons, since 95 | biases are only ever used in computing the outputs from later 96 | layers. 97 | 98 | """ 99 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]] 100 | self.weights = [np.random.randn(y, x)/np.sqrt(x) 101 | for x, y in zip(self.sizes[:-1], self.sizes[1:])] 102 | 103 | def large_weight_initializer(self): 104 | """Initialize the weights using a Gaussian distribution with mean 0 105 | and standard deviation 1. Initialize the biases using a 106 | Gaussian distribution with mean 0 and standard deviation 1. 107 | 108 | Note that the first layer is assumed to be an input layer, and 109 | by convention we won't set any biases for those neurons, since 110 | biases are only ever used in computing the outputs from later 111 | layers. 112 | 113 | This weight and bias initializer uses the same approach as in 114 | Chapter 1, and is included for purposes of comparison. It 115 | will usually be better to use the default weight initializer 116 | instead. 117 | 118 | """ 119 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]] 120 | self.weights = [np.random.randn(y, x) 121 | for x, y in zip(self.sizes[:-1], self.sizes[1:])] 122 | 123 | def feedforward(self, a): 124 | """Return the output of the network if ``a`` is input.""" 125 | for b, w in zip(self.biases, self.weights): 126 | a = sigmoid(np.dot(w, a)+b) 127 | return a 128 | 129 | def SGD(self, training_data, epochs, mini_batch_size, eta, 130 | lmbda = 0.0, 131 | evaluation_data=None, 132 | monitor_evaluation_cost=False, 133 | monitor_evaluation_accuracy=False, 134 | monitor_training_cost=False, 135 | monitor_training_accuracy=False): 136 | """Train the neural network using mini-batch stochastic gradient 137 | descent. The ``training_data`` is a list of tuples ``(x, y)`` 138 | representing the training inputs and the desired outputs. The 139 | other non-optional parameters are self-explanatory, as is the 140 | regularization parameter ``lmbda``. The method also accepts 141 | ``evaluation_data``, usually either the validation or test 142 | data. We can monitor the cost and accuracy on either the 143 | evaluation data or the training data, by setting the 144 | appropriate flags. The method returns a tuple containing four 145 | lists: the (per-epoch) costs on the evaluation data, the 146 | accuracies on the evaluation data, the costs on the training 147 | data, and the accuracies on the training data. All values are 148 | evaluated at the end of each training epoch. So, for example, 149 | if we train for 30 epochs, then the first element of the tuple 150 | will be a 30-element list containing the cost on the 151 | evaluation data at the end of each epoch. Note that the lists 152 | are empty if the corresponding flag is not set. 153 | 154 | """ 155 | if evaluation_data: n_data = len(evaluation_data) 156 | n = len(training_data) 157 | evaluation_cost, evaluation_accuracy = [], [] 158 | training_cost, training_accuracy = [], [] 159 | for j in xrange(epochs): 160 | random.shuffle(training_data) 161 | mini_batches = [ 162 | training_data[k:k+mini_batch_size] 163 | for k in xrange(0, n, mini_batch_size)] 164 | for mini_batch in mini_batches: 165 | self.update_mini_batch( 166 | mini_batch, eta, lmbda, len(training_data)) 167 | print "Epoch %s training complete" % j 168 | if monitor_training_cost: 169 | cost = self.total_cost(training_data, lmbda) 170 | training_cost.append(cost) 171 | print "Cost on training data: {}".format(cost) 172 | if monitor_training_accuracy: 173 | accuracy = self.accuracy(training_data, convert=True) 174 | training_accuracy.append(accuracy) 175 | print "Accuracy on training data: {} / {}".format( 176 | accuracy, n) 177 | if monitor_evaluation_cost: 178 | cost = self.total_cost(evaluation_data, lmbda, convert=True) 179 | evaluation_cost.append(cost) 180 | print "Cost on evaluation data: {}".format(cost) 181 | if monitor_evaluation_accuracy: 182 | accuracy = self.accuracy(evaluation_data) 183 | evaluation_accuracy.append(accuracy) 184 | print "Accuracy on evaluation data: {} / {}".format( 185 | self.accuracy(evaluation_data), n_data) 186 | print 187 | return evaluation_cost, evaluation_accuracy, \ 188 | training_cost, training_accuracy 189 | 190 | def update_mini_batch(self, mini_batch, eta, lmbda, n): 191 | """Update the network's weights and biases by applying gradient 192 | descent using backpropagation to a single mini batch. The 193 | ``mini_batch`` is a list of tuples ``(x, y)``, ``eta`` is the 194 | learning rate, ``lmbda`` is the regularization parameter, and 195 | ``n`` is the total size of the training data set. 196 | 197 | """ 198 | nabla_b = [np.zeros(b.shape) for b in self.biases] 199 | nabla_w = [np.zeros(w.shape) for w in self.weights] 200 | for x, y in mini_batch: 201 | delta_nabla_b, delta_nabla_w = self.backprop(x, y) 202 | nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] 203 | nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] 204 | self.weights = [(1-eta*(lmbda/n))*w-(eta/len(mini_batch))*nw 205 | for w, nw in zip(self.weights, nabla_w)] 206 | self.biases = [b-(eta/len(mini_batch))*nb 207 | for b, nb in zip(self.biases, nabla_b)] 208 | 209 | def backprop(self, x, y): 210 | """Return a tuple ``(nabla_b, nabla_w)`` representing the 211 | gradient for the cost function C_x. ``nabla_b`` and 212 | ``nabla_w`` are layer-by-layer lists of numpy arrays, similar 213 | to ``self.biases`` and ``self.weights``.""" 214 | nabla_b = [np.zeros(b.shape) for b in self.biases] 215 | nabla_w = [np.zeros(w.shape) for w in self.weights] 216 | # feedforward 217 | activation = x 218 | activations = [x] # list to store all the activations, layer by layer 219 | zs = [] # list to store all the z vectors, layer by layer 220 | for b, w in zip(self.biases, self.weights): 221 | z = np.dot(w, activation)+b 222 | zs.append(z) 223 | activation = sigmoid(z) 224 | activations.append(activation) 225 | # backward pass 226 | delta = (self.cost).delta(zs[-1], activations[-1], y) 227 | nabla_b[-1] = delta 228 | nabla_w[-1] = np.dot(delta, activations[-2].transpose()) 229 | # Note that the variable l in the loop below is used a little 230 | # differently to the notation in Chapter 2 of the book. Here, 231 | # l = 1 means the last layer of neurons, l = 2 is the 232 | # second-last layer, and so on. It's a renumbering of the 233 | # scheme in the book, used here to take advantage of the fact 234 | # that Python can use negative indices in lists. 235 | for l in xrange(2, self.num_layers): 236 | z = zs[-l] 237 | sp = sigmoid_prime(z) 238 | delta = np.dot(self.weights[-l+1].transpose(), delta) * sp 239 | nabla_b[-l] = delta 240 | nabla_w[-l] = np.dot(delta, activations[-l-1].transpose()) 241 | return (nabla_b, nabla_w) 242 | 243 | def accuracy(self, data, convert=False): 244 | """Return the number of inputs in ``data`` for which the neural 245 | network outputs the correct result. The neural network's 246 | output is assumed to be the index of whichever neuron in the 247 | final layer has the highest activation. 248 | 249 | The flag ``convert`` should be set to False if the data set is 250 | validation or test data (the usual case), and to True if the 251 | data set is the training data. The need for this flag arises 252 | due to differences in the way the results ``y`` are 253 | represented in the different data sets. In particular, it 254 | flags whether we need to convert between the different 255 | representations. It may seem strange to use different 256 | representations for the different data sets. Why not use the 257 | same representation for all three data sets? It's done for 258 | efficiency reasons -- the program usually evaluates the cost 259 | on the training data and the accuracy on other data sets. 260 | These are different types of computations, and using different 261 | representations speeds things up. More details on the 262 | representations can be found in 263 | mnist_loader.load_data_wrapper. 264 | 265 | """ 266 | if convert: 267 | results = [(np.argmax(self.feedforward(x)), np.argmax(y)) 268 | for (x, y) in data] 269 | else: 270 | results = [(np.argmax(self.feedforward(x)), y) 271 | for (x, y) in data] 272 | return sum(int(x == y) for (x, y) in results) 273 | 274 | def total_cost(self, data, lmbda, convert=False): 275 | """Return the total cost for the data set ``data``. The flag 276 | ``convert`` should be set to False if the data set is the 277 | training data (the usual case), and to True if the data set is 278 | the validation or test data. See comments on the similar (but 279 | reversed) convention for the ``accuracy`` method, above. 280 | """ 281 | cost = 0.0 282 | for x, y in data: 283 | a = self.feedforward(x) 284 | if convert: y = vectorized_result(y) 285 | cost += self.cost.fn(a, y)/len(data) 286 | cost += 0.5*(lmbda/len(data))*sum( 287 | np.linalg.norm(w)**2 for w in self.weights) 288 | return cost 289 | 290 | def save(self, filename): 291 | """Save the neural network to the file ``filename``.""" 292 | data = {"sizes": self.sizes, 293 | "weights": [w.tolist() for w in self.weights], 294 | "biases": [b.tolist() for b in self.biases], 295 | "cost": str(self.cost.__name__)} 296 | f = open(filename, "w") 297 | json.dump(data, f) 298 | f.close() 299 | 300 | #### Loading a Network 301 | def load(filename): 302 | """Load a neural network from the file ``filename``. Returns an 303 | instance of Network. 304 | 305 | """ 306 | f = open(filename, "r") 307 | data = json.load(f) 308 | f.close() 309 | cost = getattr(sys.modules[__name__], data["cost"]) 310 | net = Network(data["sizes"], cost=cost) 311 | net.weights = [np.array(w) for w in data["weights"]] 312 | net.biases = [np.array(b) for b in data["biases"]] 313 | return net 314 | 315 | #### Miscellaneous functions 316 | def vectorized_result(j): 317 | """Return a 10-dimensional unit vector with a 1.0 in the j'th position 318 | and zeroes elsewhere. This is used to convert a digit (0...9) 319 | into a corresponding desired output from the neural network. 320 | 321 | """ 322 | e = np.zeros((10, 1)) 323 | e[j] = 1.0 324 | return e 325 | 326 | def sigmoid(z): 327 | """The sigmoid function.""" 328 | return 1.0/(1.0+np.exp(-z)) 329 | 330 | def sigmoid_prime(z): 331 | """Derivative of the sigmoid function.""" 332 | return sigmoid(z)*(1-sigmoid(z)) 333 | -------------------------------------------------------------------------------- /src/network3.py: -------------------------------------------------------------------------------- 1 | """network3.py 2 | ~~~~~~~~~~~~~~ 3 | 4 | A Theano-based program for training and running simple neural 5 | networks. 6 | 7 | Supports several layer types (fully connected, convolutional, max 8 | pooling, softmax), and activation functions (sigmoid, tanh, and 9 | rectified linear units, with more easily added). 10 | 11 | When run on a CPU, this program is much faster than network.py and 12 | network2.py. However, unlike network.py and network2.py it can also 13 | be run on a GPU, which makes it faster still. 14 | 15 | Because the code is based on Theano, the code is different in many 16 | ways from network.py and network2.py. However, where possible I have 17 | tried to maintain consistency with the earlier programs. In 18 | particular, the API is similar to network2.py. Note that I have 19 | focused on making the code simple, easily readable, and easily 20 | modifiable. It is not optimized, and omits many desirable features. 21 | 22 | This program incorporates ideas from the Theano documentation on 23 | convolutional neural nets (notably, 24 | http://deeplearning.net/tutorial/lenet.html ), from Misha Denil's 25 | implementation of dropout (https://github.com/mdenil/dropout ), and 26 | from Chris Olah (http://colah.github.io ). 27 | 28 | """ 29 | 30 | #### Libraries 31 | # Standard library 32 | import cPickle 33 | import gzip 34 | 35 | # Third-party libraries 36 | import numpy as np 37 | import theano 38 | import theano.tensor as T 39 | from theano.tensor.nnet import conv 40 | from theano.tensor.nnet import softmax 41 | from theano.tensor import shared_randomstreams 42 | from theano.tensor.signal import downsample 43 | 44 | # Activation functions for neurons 45 | def linear(z): return z 46 | def ReLU(z): return T.maximum(0.0, z) 47 | from theano.tensor.nnet import sigmoid 48 | from theano.tensor import tanh 49 | 50 | 51 | #### Constants 52 | GPU = True 53 | if GPU: 54 | print "Trying to run under a GPU. If this is not desired, then modify "+\ 55 | "network3.py\nto set the GPU flag to False." 56 | try: theano.config.device = 'gpu' 57 | except: pass # it's already set 58 | theano.config.floatX = 'float32' 59 | else: 60 | print "Running with a CPU. If this is not desired, then the modify "+\ 61 | "network3.py to set\nthe GPU flag to True." 62 | 63 | #### Load the MNIST data 64 | def load_data_shared(filename="../data/mnist.pkl.gz"): 65 | f = gzip.open(filename, 'rb') 66 | training_data, validation_data, test_data = cPickle.load(f) 67 | f.close() 68 | def shared(data): 69 | """Place the data into shared variables. This allows Theano to copy 70 | the data to the GPU, if one is available. 71 | 72 | """ 73 | shared_x = theano.shared( 74 | np.asarray(data[0], dtype=theano.config.floatX), borrow=True) 75 | shared_y = theano.shared( 76 | np.asarray(data[1], dtype=theano.config.floatX), borrow=True) 77 | return shared_x, T.cast(shared_y, "int32") 78 | return [shared(training_data), shared(validation_data), shared(test_data)] 79 | 80 | #### Main class used to construct and train networks 81 | class Network(object): 82 | 83 | def __init__(self, layers, mini_batch_size): 84 | """Takes a list of `layers`, describing the network architecture, and 85 | a value for the `mini_batch_size` to be used during training 86 | by stochastic gradient descent. 87 | 88 | """ 89 | self.layers = layers 90 | self.mini_batch_size = mini_batch_size 91 | self.params = [param for layer in self.layers for param in layer.params] 92 | self.x = T.matrix("x") 93 | self.y = T.ivector("y") 94 | init_layer = self.layers[0] 95 | init_layer.set_inpt(self.x, self.x, self.mini_batch_size) 96 | for j in xrange(1, len(self.layers)): 97 | prev_layer, layer = self.layers[j-1], self.layers[j] 98 | layer.set_inpt( 99 | prev_layer.output, prev_layer.output_dropout, self.mini_batch_size) 100 | self.output = self.layers[-1].output 101 | self.output_dropout = self.layers[-1].output_dropout 102 | 103 | def SGD(self, training_data, epochs, mini_batch_size, eta, 104 | validation_data, test_data, lmbda=0.0): 105 | """Train the network using mini-batch stochastic gradient descent.""" 106 | training_x, training_y = training_data 107 | validation_x, validation_y = validation_data 108 | test_x, test_y = test_data 109 | 110 | # compute number of minibatches for training, validation and testing 111 | num_training_batches = size(training_data)/mini_batch_size 112 | num_validation_batches = size(validation_data)/mini_batch_size 113 | num_test_batches = size(test_data)/mini_batch_size 114 | 115 | # define the (regularized) cost function, symbolic gradients, and updates 116 | l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers]) 117 | cost = self.layers[-1].cost(self)+\ 118 | 0.5*lmbda*l2_norm_squared/num_training_batches 119 | grads = T.grad(cost, self.params) 120 | updates = [(param, param-eta*grad) 121 | for param, grad in zip(self.params, grads)] 122 | 123 | # define functions to train a mini-batch, and to compute the 124 | # accuracy in validation and test mini-batches. 125 | i = T.lscalar() # mini-batch index 126 | train_mb = theano.function( 127 | [i], cost, updates=updates, 128 | givens={ 129 | self.x: 130 | training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size], 131 | self.y: 132 | training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] 133 | }) 134 | validate_mb_accuracy = theano.function( 135 | [i], self.layers[-1].accuracy(self.y), 136 | givens={ 137 | self.x: 138 | validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size], 139 | self.y: 140 | validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] 141 | }) 142 | test_mb_accuracy = theano.function( 143 | [i], self.layers[-1].accuracy(self.y), 144 | givens={ 145 | self.x: 146 | test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size], 147 | self.y: 148 | test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] 149 | }) 150 | self.test_mb_predictions = theano.function( 151 | [i], self.layers[-1].y_out, 152 | givens={ 153 | self.x: 154 | test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size] 155 | }) 156 | # Do the actual training 157 | best_validation_accuracy = 0.0 158 | for epoch in xrange(epochs): 159 | for minibatch_index in xrange(num_training_batches): 160 | iteration = num_training_batches*epoch+minibatch_index 161 | if iteration % 1000 == 0: 162 | print("Training mini-batch number {0}".format(iteration)) 163 | cost_ij = train_mb(minibatch_index) 164 | if (iteration+1) % num_training_batches == 0: 165 | validation_accuracy = np.mean( 166 | [validate_mb_accuracy(j) for j in xrange(num_validation_batches)]) 167 | print("Epoch {0}: validation accuracy {1:.2%}".format( 168 | epoch, validation_accuracy)) 169 | if validation_accuracy >= best_validation_accuracy: 170 | print("This is the best validation accuracy to date.") 171 | best_validation_accuracy = validation_accuracy 172 | best_iteration = iteration 173 | if test_data: 174 | test_accuracy = np.mean( 175 | [test_mb_accuracy(j) for j in xrange(num_test_batches)]) 176 | print('The corresponding test accuracy is {0:.2%}'.format( 177 | test_accuracy)) 178 | print("Finished training network.") 179 | print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format( 180 | best_validation_accuracy, best_iteration)) 181 | print("Corresponding test accuracy of {0:.2%}".format(test_accuracy)) 182 | 183 | #### Define layer types 184 | 185 | class ConvPoolLayer(object): 186 | """Used to create a combination of a convolutional and a max-pooling 187 | layer. A more sophisticated implementation would separate the 188 | two, but for our purposes we'll always use them together, and it 189 | simplifies the code, so it makes sense to combine them. 190 | 191 | """ 192 | 193 | def __init__(self, filter_shape, image_shape, poolsize=(2, 2), 194 | activation_fn=sigmoid): 195 | """`filter_shape` is a tuple of length 4, whose entries are the number 196 | of filters, the number of input feature maps, the filter height, and the 197 | filter width. 198 | 199 | `image_shape` is a tuple of length 4, whose entries are the 200 | mini-batch size, the number of input feature maps, the image 201 | height, and the image width. 202 | 203 | `poolsize` is a tuple of length 2, whose entries are the y and 204 | x pooling sizes. 205 | 206 | """ 207 | self.filter_shape = filter_shape 208 | self.image_shape = image_shape 209 | self.poolsize = poolsize 210 | self.activation_fn=activation_fn 211 | # initialize weights and biases 212 | n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize)) 213 | self.w = theano.shared( 214 | np.asarray( 215 | np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape), 216 | dtype=theano.config.floatX), 217 | borrow=True) 218 | self.b = theano.shared( 219 | np.asarray( 220 | np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)), 221 | dtype=theano.config.floatX), 222 | borrow=True) 223 | self.params = [self.w, self.b] 224 | 225 | def set_inpt(self, inpt, inpt_dropout, mini_batch_size): 226 | self.inpt = inpt.reshape(self.image_shape) 227 | conv_out = conv.conv2d( 228 | input=self.inpt, filters=self.w, filter_shape=self.filter_shape, 229 | image_shape=self.image_shape) 230 | pooled_out = downsample.max_pool_2d( 231 | input=conv_out, ds=self.poolsize, ignore_border=True) 232 | self.output = self.activation_fn( 233 | pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) 234 | self.output_dropout = self.output # no dropout in the convolutional layers 235 | 236 | class FullyConnectedLayer(object): 237 | 238 | def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0): 239 | self.n_in = n_in 240 | self.n_out = n_out 241 | self.activation_fn = activation_fn 242 | self.p_dropout = p_dropout 243 | # Initialize weights and biases 244 | self.w = theano.shared( 245 | np.asarray( 246 | np.random.normal( 247 | loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)), 248 | dtype=theano.config.floatX), 249 | name='w', borrow=True) 250 | self.b = theano.shared( 251 | np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)), 252 | dtype=theano.config.floatX), 253 | name='b', borrow=True) 254 | self.params = [self.w, self.b] 255 | 256 | def set_inpt(self, inpt, inpt_dropout, mini_batch_size): 257 | self.inpt = inpt.reshape((mini_batch_size, self.n_in)) 258 | self.output = self.activation_fn( 259 | (1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b) 260 | self.y_out = T.argmax(self.output, axis=1) 261 | self.inpt_dropout = dropout_layer( 262 | inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) 263 | self.output_dropout = self.activation_fn( 264 | T.dot(self.inpt_dropout, self.w) + self.b) 265 | 266 | def accuracy(self, y): 267 | "Return the accuracy for the mini-batch." 268 | return T.mean(T.eq(y, self.y_out)) 269 | 270 | class SoftmaxLayer(object): 271 | 272 | def __init__(self, n_in, n_out, p_dropout=0.0): 273 | self.n_in = n_in 274 | self.n_out = n_out 275 | self.p_dropout = p_dropout 276 | # Initialize weights and biases 277 | self.w = theano.shared( 278 | np.zeros((n_in, n_out), dtype=theano.config.floatX), 279 | name='w', borrow=True) 280 | self.b = theano.shared( 281 | np.zeros((n_out,), dtype=theano.config.floatX), 282 | name='b', borrow=True) 283 | self.params = [self.w, self.b] 284 | 285 | def set_inpt(self, inpt, inpt_dropout, mini_batch_size): 286 | self.inpt = inpt.reshape((mini_batch_size, self.n_in)) 287 | self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b) 288 | self.y_out = T.argmax(self.output, axis=1) 289 | self.inpt_dropout = dropout_layer( 290 | inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) 291 | self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b) 292 | 293 | def cost(self, net): 294 | "Return the log-likelihood cost." 295 | return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y]) 296 | 297 | def accuracy(self, y): 298 | "Return the accuracy for the mini-batch." 299 | return T.mean(T.eq(y, self.y_out)) 300 | 301 | 302 | #### Miscellanea 303 | def size(data): 304 | "Return the size of the dataset `data`." 305 | return data[0].get_value(borrow=True).shape[0] 306 | 307 | def dropout_layer(layer, p_dropout): 308 | srng = shared_randomstreams.RandomStreams( 309 | np.random.RandomState(0).randint(999999)) 310 | mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape) 311 | return layer*T.cast(mask, theano.config.floatX) 312 | -------------------------------------------------------------------------------- /src/old/blog/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/blog/__init__.py -------------------------------------------------------------------------------- /src/old/blog/common_knowledge.py: -------------------------------------------------------------------------------- 1 | """ 2 | common_knowledge 3 | ~~~~~~~~~~~~~~~~ 4 | 5 | Try to determine whether or not it's possible to relate the 6 | descriptions given by two different autoencoders. 7 | 8 | """ 9 | 10 | #### Libraries 11 | # My libraries 12 | from backprop2 import Network, sigmoid_vec 13 | import mnist_loader 14 | 15 | # Third-party libraries 16 | import matplotlib 17 | import matplotlib.pyplot as plt 18 | import numpy as np 19 | 20 | 21 | #### Parameters 22 | # Size of the training sets. May range from 1000 to 12,500. Lower 23 | # will be faster, higher will give more accuracy. 24 | SIZE = 5000 25 | # Number of hidden units in the autoencoder 26 | HIDDEN = 30 27 | 28 | print "\nGenerating training data" 29 | training_data, _, _ = mnist_loader.load_data_nn() 30 | td_1 = [(x, x) for x, _ in training_data[0:SIZE]] 31 | td_2 = [(x, x) for x, _ in training_data[12500:12500+SIZE]] 32 | td_3 = [x for x, _ in training_data[25000:25000+SIZE]] 33 | test = [x for x, _ in training_data[37500:37500+SIZE]] 34 | 35 | print "\nFinding first autoencoder" 36 | ae_1 = Network([784, HIDDEN, 784]) 37 | ae_1.SGD(td_1, 4, 10, 0.01, 0.05) 38 | 39 | print "\nFinding second autoencoder" 40 | ae_2 = Network([784, HIDDEN, 784]) 41 | ae_2.SGD(td_1, 4, 10, 0.01, 0.05) 42 | 43 | print "\nGenerating encoded training data" 44 | encoded_td_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0]) 45 | for x in td_3] 46 | encoded_td_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0]) 47 | for x in td_3] 48 | encoded_training_data = zip(encoded_td_1, encoded_td_2) 49 | 50 | print "\nFinding mapping between theories" 51 | net = Network([HIDDEN, HIDDEN]) 52 | net.SGD(encoded_training_data, 6, 10, 0.01, 0.05) 53 | 54 | print """\nBaseline for comparison: decompress with the first autoencoder""" 55 | print """and compress with the second autoencoder""" 56 | encoded_test_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0]) 57 | for x in test] 58 | encoded_test_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0]) 59 | for x in test] 60 | test_data = zip(encoded_test_1, encoded_test_2) 61 | net_baseline = Network([HIDDEN, 784, HIDDEN]) 62 | net_baseline.biases[0] = ae_1.biases[1] 63 | net_baseline.weights[0] = ae_1.weights[1] 64 | net_baseline.biases[1] = ae_2.biases[0] 65 | net_baseline.weights[1] = ae_2.weights[0] 66 | error_baseline = sum(np.linalg.norm(net_baseline.feedforward(x)-y, 1) 67 | for (x, y) in test_data) 68 | print "Baseline average l1 error per training image: %s" % (error_baseline / SIZE,) 69 | 70 | print "\nComparing theories with a simple interconversion" 71 | print "Mean desired output activation: %s" % ( 72 | sum(y.mean() for _, y in test_data) / SIZE,) 73 | error = sum(np.linalg.norm(net.feedforward(x)-y, 1) for (x, y) in test_data) 74 | print "Average l1 error per training image: %s" % (error / SIZE,) 75 | 76 | print "\nComputing fiducial image inputs" 77 | fiducial_images_1 = [ 78 | ae_1.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:]) 79 | for j in range(HIDDEN)] 80 | fiducial_images_2 = [ 81 | ae_2.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:]) 82 | for j in range(HIDDEN)] 83 | image = np.concatenate([np.concatenate(fiducial_images_1, axis=1), 84 | np.concatenate(fiducial_images_2, axis=1)]) 85 | fig = plt.figure() 86 | ax = fig.add_subplot(111) 87 | ax.matshow(image, cmap = matplotlib.cm.binary) 88 | plt.xticks(np.array([])) 89 | plt.yticks(np.array([])) 90 | plt.show() 91 | -------------------------------------------------------------------------------- /src/old/cost_vs_iterations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/cost_vs_iterations.png -------------------------------------------------------------------------------- /src/old/cost_vs_iterations_trapped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/cost_vs_iterations_trapped.png -------------------------------------------------------------------------------- /src/old/deep_autoencoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | deep_autoencoder 3 | ~~~~~~~~~~~~~~~~ 4 | 5 | A module which implements deep autoencoders. 6 | """ 7 | 8 | #### Libraries 9 | # Standard library 10 | import random 11 | 12 | # My libraries 13 | from backprop2 import Network, sigmoid_vec 14 | 15 | # Third-party libraries 16 | import numpy as np 17 | 18 | 19 | def plot_helper(x): 20 | import matplotlib 21 | import matplotlib.pyplot as plt 22 | x = np.reshape(x, (-1, 28)) 23 | fig = plt.figure() 24 | ax = fig.add_subplot(1, 1, 1) 25 | ax.matshow(x, cmap = matplotlib.cm.binary) 26 | plt.xticks(np.array([])) 27 | plt.yticks(np.array([])) 28 | plt.show() 29 | 30 | 31 | class DeepAutoencoder(Network): 32 | 33 | def __init__(self, layers): 34 | """ 35 | The list ``layers`` specifies the sizes of the nested 36 | autoencoders. For example, if ``layers`` is [50, 20, 10] then 37 | the deep autoencoder will be a neural network with layers of 38 | size [50, 20, 10, 20, 50].""" 39 | self.layers = layers 40 | Network.__init__(self, layers+layers[-2::-1]) 41 | 42 | def train(self, training_data, epochs, mini_batch_size, eta, 43 | lmbda): 44 | """ 45 | Train the DeepAutoencoder. The ``training_data`` is a list of 46 | training inputs, ``x``, ``mini_batch_size`` is a single 47 | positive integer, and ``epochs``, ``eta``, ``lmbda`` are lists 48 | of parameters, with the different list members corresponding 49 | to the different stages of training. For example, ``eta[0]`` 50 | is the learning rate used for the first nested autoencoder, 51 | ``eta[1]`` is the learning rate for the second nested 52 | autoencoder, and so on. ``eta[-1]`` is the learning rate used 53 | for the final stage of fine-tuning. 54 | """ 55 | print "\nTraining a %s deep autoencoder" % ( 56 | "-".join([str(j) for j in self.sizes]),) 57 | training_data = double(training_data) 58 | cur_training_data = training_data[::] 59 | for j in range(len(self.layers)-1): 60 | print "\nTraining the %s-%s-%s nested autoencoder" % ( 61 | self.layers[j], self.layers[j+1], self.layers[j]) 62 | print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % ( 63 | epochs[j], mini_batch_size, eta[j], lmbda[j]) 64 | self.train_nested_autoencoder( 65 | j, cur_training_data, epochs[j], mini_batch_size, eta[j], 66 | lmbda[j]) 67 | cur_training_data = [ 68 | (sigmoid_vec(np.dot(net.weights[0], x)+net.biases[0]),)*2 69 | for (x, _) in cur_training_data] 70 | print "\nFine-tuning network weights with backpropagation" 71 | print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % ( 72 | epochs[-1], mini_batch_size, eta[-1], lmbda[-1]) 73 | self.SGD(training_data, epochs[-1], mini_batch_size, eta[-1], 74 | lmbda[-1]) 75 | 76 | def train_nested_autoencoder( 77 | self, j, encoded_training_data, epochs, mini_batch_size, eta, lmbda): 78 | """ 79 | Train the nested autoencoder that starts at layer ``j`` in the 80 | deep autoencoder. Note that ``encoded_training_data`` is a 81 | list with entries of the form ``(x, x)``, where the ``x`` are 82 | encoded training inputs for layer ``j``.""" 83 | net = Network([self.layers[j], self.layers[j+1], self.layers[j]]) 84 | net.biases[0] = self.biases[j] 85 | net.biases[1] = self.biases[-j-1] 86 | net.weights[0] = self.weights[j] 87 | net.weights[1] = self.weights[-j-1] 88 | net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda) 89 | self.biases[j] = net.biases[0] 90 | self.biases[-j-1] = net.biases[1] 91 | self.weights[j] = net.weights[0] 92 | self.weights[-j-1] = net.weights[1] 93 | 94 | def train_nested_autoencoder_repl( 95 | self, j, training_data, epochs, mini_batch_size, eta, lmbda): 96 | """ 97 | This is a convenience method that can be used from the REPL to 98 | train the nested autoencoder that starts at level ``j`` in the 99 | deep autoencoder. Note that ``training_data`` is the input 100 | data for the first layer of the network, and is a list of 101 | entries ``x``.""" 102 | self.train_nested_autoencoder( 103 | j, 104 | double( 105 | [self.feedforward(x, start=0, end=j) for x in training_data]), 106 | epochs, mini_batch_size, eta, lmbda) 107 | 108 | def feature(self, j, k): 109 | """ 110 | Return the output if neuron number ``k`` in layer ``j`` is 111 | activated, and all others are not active. """ 112 | a = np.zeros((self.sizes[j], 1)) 113 | a[k] = 1.0 114 | return self.feedforward(a, start=j, end=self.num_layers) 115 | 116 | def double(l): 117 | return [(x, x) for x in l] 118 | 119 | -------------------------------------------------------------------------------- /src/old/deep_learning.py: -------------------------------------------------------------------------------- 1 | """ 2 | deep_learning 3 | ~~~~~~~~~~~~~ 4 | 5 | Module to do deep learning. Most of the functionality needed is 6 | already in the ``backprop2`` and ``deep_autoencoder`` modules, but 7 | this adds convenience functions to help in doing things like unrolling 8 | deep autoencoders, and adding and training a classifier layer.""" 9 | 10 | # My Libraries 11 | from backprop2 import Network 12 | from deep_autoencoder import DeepAutoencoder 13 | 14 | def unroll(deep_autoencoder): 15 | """ 16 | Return a Network that contains the compression stage of the 17 | ``deep_autoencoder``.""" 18 | net = Network(deep_autoencoder.layers) 19 | net.weights = deep_autoencoder.weights[:len(deep_autoencoder.layers)-1] 20 | net.biases = deep_autoencoder.biases[:len(deep_autoencoder.layers)-1] 21 | return net 22 | 23 | def add_classifier_layer(net, num_outputs): 24 | """ 25 | Return the Network ``net``, but with an extra layer containing 26 | ``num_outputs`` neurons appended.""" 27 | net_classifier = Network(net.sizes+[num_outputs]) 28 | net_classifier.weights[:-1] = net.weights 29 | net_classifier.biases[:-1] = net.biases 30 | return net_classifier 31 | 32 | def SGD_final_layer( 33 | self, training_data, epochs, mini_batch_size, eta, lmbda): 34 | """ 35 | Run SGD on the final layer of the Network ``self``. Note that 36 | ``training_data`` is the input to the whole Network, not the 37 | encoded training data input to the final layer. 38 | """ 39 | encoded_training_data = [ 40 | (self.feedforward(x, start=0, end=self.num_layers-2), y) 41 | for x, y in training_data] 42 | net = Network(self.sizes[-2:]) 43 | net.biases[0] = self.biases[-1] 44 | net.weights[0] = self.weights[-1] 45 | net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda) 46 | self.biases[-1] = net.biases[0] 47 | self.weights[-1] = net.weights[0] 48 | 49 | 50 | # Add the SGD_final_layer method to the Network class 51 | Network.SGD_final_layer = SGD_final_layer 52 | -------------------------------------------------------------------------------- /src/old/gradient_descent_hack.py: -------------------------------------------------------------------------------- 1 | """ 2 | gradient_descent_hack 3 | ~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | This program uses gradient descent to learn weights and biases for a 6 | three-neuron network to compute the XOR function. The program is a 7 | quick-and-dirty hack meant to illustrate the basic ideas of gradient 8 | descent, not a cleanly-designed and generalizable implementation.""" 9 | 10 | #### Libraries 11 | # Third-party libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | 15 | def sigmoid(z): 16 | return 1.0/(1.0+np.exp(-z)) 17 | 18 | def neuron(w, x): 19 | """ Return the output from the sigmoid neuron with weights ``w`` 20 | and inputs ``x``. Both are numpy arrays, with three and two 21 | elements, respectively. The first input weight is the bias.""" 22 | return sigmoid(w[0]+np.inner(w[1:], x)) 23 | 24 | def h(w, x): 25 | """ Return the output from the three-neuron network with weights 26 | ``w`` and inputs ``x``. Note that ``w`` is a numpy array with 27 | nine elements, consisting of three weights for each neuron (the 28 | bias plus two input weights). ``x`` is a numpy array with just 29 | two elements.""" 30 | neuron1_out = neuron(w[0:3], x) # top left neuron 31 | neuron2_out = neuron(w[3:6], x) # bottom left neuron 32 | return neuron(w[6:9], np.array([neuron1_out, neuron2_out])) 33 | 34 | # inputs and corresponding outputs for the function we're computing (XOR) 35 | INPUTS = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]] 36 | OUTPUTS = [0.0, 1.0, 1.0, 0.0] 37 | 38 | def cost(w): 39 | """ Return the cost when the neural network has weights ``w``. 40 | The cost is computed with respect to the XOR function.""" 41 | return 0.5 * sum((y-h(w, np.array(x)))**2 for x, y in zip(INPUTS, OUTPUTS)) 42 | 43 | def partial(f, k, w): 44 | """ Return the partial derivative of the function ``f`` with 45 | respect to the ``k``th variable, at location ``w``. Note that 46 | ``f`` must take a numpy array as input, and the partial derivative 47 | is evaluated with respect to the ``k``th element in that array. 48 | Similarly, ``w`` is a numpy array which can be used as input to 49 | ``f``.""" 50 | w_plus, w_minus = w.copy(), w.copy() 51 | w_plus[k] += 0.01 # using epsilon = 0.01 52 | w_minus[k] += -0.01 53 | return (f(w_plus)-f(w_minus))/0.02 54 | 55 | def gradient_descent(cost, eta, n): 56 | """ Perform ``n`` iterations of the gradient descent algorithm to 57 | minimize the ``cost`` function, with a learning rate ``eta``. 58 | Return a tuple whose first entry is an array containing the final 59 | weights, and whose second entry is a list of the values the 60 | ``cost`` function took at different iterations.""" 61 | w = np.random.uniform(-1, 1, 9) # initialize weights randomly 62 | costs = [] 63 | for j in xrange(n): 64 | c = cost(w) 65 | print "Current cost: {0:.3f}".format(c) 66 | costs.append(c) 67 | gradient = [partial(cost, k, w) for k in xrange(9)] 68 | w = np.array([wt-eta*d for wt, d in zip(w, gradient)]) 69 | return w, costs 70 | 71 | def main(): 72 | """ Perform gradient descent to find weights for a sigmoid neural 73 | network to compute XOR. 10,000 iterations are used. Outputs the 74 | final value of the cost function, the final weights, and plots a 75 | graph of cost as a function of iteration.""" 76 | w, costs = gradient_descent(cost, 0.1, 10000) 77 | print "\nFinal cost: {0:.3f}".format(cost(w)) 78 | print "\nFinal weights: %s" % w 79 | plt.plot(np.array(costs)) 80 | plt.xlabel('iteration') 81 | plt.ylabel('cost') 82 | plt.title('How cost decreases with the number of iterations') 83 | plt.show() 84 | 85 | if __name__ == "__main__": 86 | main() 87 | -------------------------------------------------------------------------------- /src/old/mnist_100_30_deep_autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_100_30_deep_autoencoder.png -------------------------------------------------------------------------------- /src/old/mnist_100_unit_autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_100_unit_autoencoder.png -------------------------------------------------------------------------------- /src/old/mnist_10_unit_autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_10_unit_autoencoder.png -------------------------------------------------------------------------------- /src/old/mnist_30_component_pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_30_component_pca.png -------------------------------------------------------------------------------- /src/old/mnist_30_unit_autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_30_unit_autoencoder.png -------------------------------------------------------------------------------- /src/old/mnist_autoencoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_autoencoder 3 | ~~~~~~~~~~~~~~~~~ 4 | 5 | Implements an autoencoder for the MNIST data. The program can do two 6 | things: (1) plot the autoencoder's output for the first ten images in 7 | the MNIST test set; and (2) use the autoencoder to build a classifier. 8 | The program is a quick-and-dirty hack --- we'll do things in a more 9 | systematic way in the module ``deep_autoencoder``. 10 | """ 11 | 12 | # My Libraries 13 | from backprop2 import Network 14 | import mnist_loader 15 | 16 | # Third-party libraries 17 | import matplotlib 18 | import matplotlib.pyplot as plt 19 | import numpy as np 20 | 21 | def autoencoder_results(hidden_units): 22 | """ 23 | Train an autoencoder using the MNIST training data and plot the 24 | results when the first ten MNIST test images are passed through 25 | the autoencoder. 26 | """ 27 | training_data, test_inputs, actual_test_results = \ 28 | mnist_loader.load_data_nn() 29 | net = train_autoencoder(hidden_units, training_data) 30 | plot_test_results(net, test_inputs) 31 | 32 | def train_autoencoder(hidden_units, training_data): 33 | "Return a trained autoencoder." 34 | autoencoder_training_data = [(x, x) for x, _ in training_data] 35 | net = Network([784, hidden_units, 784]) 36 | net.SGD(autoencoder_training_data, 6, 10, 0.01, 0.05) 37 | return net 38 | 39 | def plot_test_results(net, test_inputs): 40 | """ 41 | Plot the results after passing the first ten test MNIST digits through 42 | the autoencoder ``net``.""" 43 | fig = plt.figure() 44 | ax = fig.add_subplot(111) 45 | images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)] 46 | images_out = [net.feedforward(test_inputs[j]).reshape(-1, 28) 47 | for j in range(10)] 48 | image_in = np.concatenate(images_in, axis=1) 49 | image_out = np.concatenate(images_out, axis=1) 50 | image = np.concatenate([image_in, image_out]) 51 | ax.matshow(image, cmap = matplotlib.cm.binary) 52 | plt.xticks(np.array([])) 53 | plt.yticks(np.array([])) 54 | plt.show() 55 | 56 | def classifier(hidden_units, n_unlabeled_inputs, n_labeled_inputs): 57 | """ 58 | Train a semi-supervised classifier. We begin with pretraining, 59 | creating an autoencoder which uses ``n_unlabeled_inputs`` from the 60 | MNIST training data. This is then converted into a classifier 61 | which is fine-tuned using the ``n_labeled_inputs``. 62 | 63 | For comparison a classifier is also created which does not make 64 | use of the unlabeled data. 65 | """ 66 | training_data, test_inputs, actual_test_results = \ 67 | mnist_loader.load_data_nn() 68 | print "\nUsing pretraining and %s items of unlabeled data" %\ 69 | n_unlabeled_inputs 70 | net_ae = train_autoencoder(hidden_units, training_data[:n_unlabeled_inputs]) 71 | net_c = Network([784, hidden_units, 10]) 72 | net_c.biases = net_ae.biases[:1]+[np.random.randn(10, 1)/np.sqrt(10)] 73 | net_c.weights = net_ae.weights[:1]+\ 74 | [np.random.randn(10, hidden_units)/np.sqrt(10)] 75 | net_c.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05) 76 | print "Result on test data: %s / %s" % ( 77 | net_c.evaluate(test_inputs, actual_test_results), len(test_inputs)) 78 | print "Training a network with %s items of training data" % n_labeled_inputs 79 | net = Network([784, hidden_units, 10]) 80 | net.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05) 81 | print "Result on test data: %s / %s" % ( 82 | net.evaluate(test_inputs, actual_test_results), len(test_inputs)) 83 | return net_c 84 | -------------------------------------------------------------------------------- /src/old/mnist_pca.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_pca 3 | ~~~~~~~~~ 4 | 5 | Use PCA to reconstruct some of the MNIST test digits. 6 | """ 7 | 8 | # My libraries 9 | import mnist_loader 10 | 11 | # Third-party libraries 12 | import matplotlib 13 | import matplotlib.pyplot as plt 14 | import numpy as np 15 | from sklearn.decomposition import RandomizedPCA 16 | 17 | 18 | # Training 19 | training_data, test_inputs, actual_test_results = mnist_loader.load_data_nn() 20 | pca = RandomizedPCA(n_components=30) 21 | nn_images = [x for (x, y) in training_data] 22 | pca_images = np.concatenate(nn_images, axis=1).transpose() 23 | pca_r = pca.fit(pca_images) 24 | 25 | # Try PCA on first ten test images 26 | test_images = np.array(test_inputs[:10]).reshape((10,784)) 27 | test_outputs = pca_r.inverse_transform(pca_r.transform(test_images)) 28 | 29 | # Plot the first ten test images and the corresponding outputs 30 | fig = plt.figure() 31 | ax = fig.add_subplot(111) 32 | images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)] 33 | images_out = [test_outputs[j].reshape(-1, 28) for j in range(10)] 34 | image_in = np.concatenate(images_in, axis=1) 35 | image_out = np.concatenate(images_out, axis=1) 36 | image = np.concatenate([image_in, image_out]) 37 | ax.matshow(image, cmap = matplotlib.cm.binary) 38 | plt.xticks(np.array([])) 39 | plt.yticks(np.array([])) 40 | plt.show() 41 | -------------------------------------------------------------------------------- /src/old/perceptron_learning.py: -------------------------------------------------------------------------------- 1 | """ 2 | perceptron_learning 3 | ~~~~~~~~~~~~~~~~~~~ 4 | 5 | Demonstrates how a perceptron can learn the NAND gate, using the 6 | perceptron learning algorithm.""" 7 | 8 | #### Libraries 9 | # Third-party library 10 | import numpy as np 11 | 12 | class Perceptron(object): 13 | """ A Perceptron instance can take a function and attempt to 14 | ``learn`` a bias and set of weights that compute that function, 15 | using the perceptron learning algorithm.""" 16 | 17 | def __init__(self, num_inputs=2): 18 | """ Initialize the perceptron with the bias and all weights 19 | set to 0.0. ``num_inputs`` is the number of input bits to the 20 | perceptron.""" 21 | self.num_inputs = num_inputs 22 | self.bias = 0.0 23 | self.weights = np.zeros(num_inputs) 24 | # self.inputs is a convenience attribute. It's a list containing 25 | # all possible binary inputs to the perceptron. E.g., for three 26 | # inputs it is: [np.array([0, 0, 0]), np.array([0, 0, 1]), ...] 27 | self.inputs = [np.array([int(y) 28 | for y in bin(x).lstrip("0b").zfill(num_inputs)]) 29 | for x in xrange(2**num_inputs)] 30 | 31 | def output(self, x): 32 | """ Return the output (0 or 1) from the perceptron, with input 33 | ``x``.""" 34 | return 1 if np.inner(self.weights, x)+self.bias > 0 else 0 35 | 36 | def learn(self, f, eta=0.1): 37 | """ Find a bias and a set of weights for a perceptron that 38 | computes the function ``f``. ``eta`` is the learning rate, and 39 | should be a small positive number. Does not terminate when 40 | the function cannot be computed using a perceptron.""" 41 | # initialize the bias and weights with random values 42 | self.bias = np.random.normal() 43 | self.weights = np.random.randn(self.num_inputs) 44 | number_of_errors = -1 45 | while number_of_errors != 0: 46 | number_of_errors = 0 47 | print "Beginning iteration" 48 | print "Bias: {:.3f}".format(self.bias) 49 | print "Weights:", ", ".join( 50 | "{:.3f}".format(wt) for wt in self.weights) 51 | for x in self.inputs: 52 | error = f(x)-self.output(x) 53 | if error: 54 | number_of_errors += 1 55 | self.bias = self.bias+eta*error 56 | self.weights = self.weights+eta*error*x 57 | print "Number of errors:", number_of_errors, "\n" 58 | 59 | def f(x): 60 | """ Target function for the perceptron learning algorithm. I've 61 | chosen the NAND gate, but any function is okay, with the caveat 62 | that the algorithm won't terminate if ``f`` cannot be computed by 63 | a perceptron.""" 64 | return int(not (x[0] and x[1])) 65 | 66 | if __name__ == "__main__": 67 | Perceptron(2).learn(f, 0.1) 68 | -------------------------------------------------------------------------------- /src/test_mnist.py: -------------------------------------------------------------------------------- 1 | import network 2 | import mnist_loader 3 | 4 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper(); 5 | 6 | net = network.Network([784, 100, 10]); 7 | net.SGD(training_data, 30, 10, 0.1, test_data=test_data); --------------------------------------------------------------------------------