├── .gitignore
├── .idea
├── misc.xml
├── modules.xml
├── neural-networks-and-deep-learning.iml
└── vcs.xml
├── README.md
├── data
└── mnist.pkl.gz
├── fig
├── backprop_magnitude_nabla.png
├── backprop_magnitude_nabla.py
├── data_1000.json
├── digits.png
├── digits_separate.png
├── false_minima.png
├── false_minima.py
├── generate_gradient.py
├── initial_gradient.json
├── misleading_gradient.png
├── misleading_gradient.py
├── misleading_gradient_contours.png
├── misleading_gradient_contours.py
├── mnist.py
├── mnist_100_digits.png
├── mnist_2_and_1.png
├── mnist_complete_zero.png
├── mnist_first_digit.png
├── mnist_other_features.png
├── mnist_really_bad_images.png
├── mnist_top_left_feature.png
├── more_data.json
├── more_data.png
├── more_data.py
├── more_data_5.png
├── more_data_comparison.png
├── more_data_log.png
├── more_data_rotated_5.png
├── more_data_svm.json
├── multiple_eta.json
├── multiple_eta.png
├── multiple_eta.py
├── norms_during_training_2_layers.json
├── norms_during_training_3_layers.json
├── norms_during_training_4_layers.json
├── overfitting.json
├── overfitting.py
├── overfitting1.png
├── overfitting2.png
├── overfitting3.png
├── overfitting4.png
├── overfitting_full.json
├── overfitting_full.png
├── pca_hard_data.png
├── pca_hard_data_fit.png
├── pca_limitations.py
├── regularized.json
├── regularized1.png
├── regularized2.png
├── regularized_full.json
├── regularized_full.png
├── replaced_by_d3
│ ├── README.md
│ ├── relu.png
│ ├── relu.py
│ ├── sigmoid.png
│ ├── sigmoid.py
│ ├── step.png
│ ├── step.py
│ ├── tanh.png
│ └── tanh.py
├── serialize_images_to_json.py
├── test.png
├── training_speed_2_layers.png
├── training_speed_3_layers.png
├── training_speed_4_layers.png
├── valley.png
├── valley.py
├── valley2.png
├── valley2.py
├── weight_initialization.py
├── weight_initialization_100.json
├── weight_initialization_100.png
├── weight_initialization_30.json
└── weight_initialization_30.png
├── requirements.txt
└── src
├── conv.py
├── expand_mnist.py
├── mnist_average_darkness.py
├── mnist_loader.py
├── mnist_svm.py
├── network.py
├── network2.py
├── network3.py
├── old
├── blog
│ ├── __init__.py
│ └── common_knowledge.py
├── cost_vs_iterations.png
├── cost_vs_iterations_trapped.png
├── deep_autoencoder.py
├── deep_learning.py
├── gradient_descent_hack.py
├── mnist_100_30_deep_autoencoder.png
├── mnist_100_unit_autoencoder.png
├── mnist_10_unit_autoencoder.png
├── mnist_30_component_pca.png
├── mnist_30_unit_autoencoder.png
├── mnist_autoencoder.py
├── mnist_pca.py
└── perceptron_learning.py
└── test_mnist.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.org
3 | *.pem
4 | *.pkl
5 | *.pyc
6 | .DS_Store
7 | loc.py
8 | src/ec2
9 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/neural-networks-and-deep-learning.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Code samples for "Neural Networks and Deep Learning" (Python 3.x version)
2 |
3 | This repository contains code samples for my (forthcoming) book on
4 | "Neural Networks and Deep Learning".
5 |
6 | As the code is written to accompany the book, I don't intend to add
7 | new features. However, bug reports are welcome, and you should feel
8 | free to fork and modify the code.
9 |
10 | ## Changes
11 | This is the code for online book "Neural Networks and Deep Learning". But it is modified for Python 3.x.
12 |
13 | If you are interested in that book but only prefer to python 3, you can use this version.
14 |
15 | My homepage : http://www.liuxiao.org
16 |
17 | ## License
18 |
19 | MIT License
20 |
21 | Copyright (c) 2012-2015 Michael Nielsen
22 |
23 | Permission is hereby granted, free of charge, to any person obtaining
24 | a copy of this software and associated documentation files (the
25 | "Software"), to deal in the Software without restriction, including
26 | without limitation the rights to use, copy, modify, merge, publish,
27 | distribute, sublicense, and/or sell copies of the Software, and to
28 | permit persons to whom the Software is furnished to do so, subject to
29 | the following conditions:
30 |
31 | The above copyright notice and this permission notice shall be
32 | included in all copies or substantial portions of the Software.
33 |
34 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
38 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
39 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
40 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
41 |
--------------------------------------------------------------------------------
/data/mnist.pkl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/data/mnist.pkl.gz
--------------------------------------------------------------------------------
/fig/backprop_magnitude_nabla.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/backprop_magnitude_nabla.png
--------------------------------------------------------------------------------
/fig/backprop_magnitude_nabla.py:
--------------------------------------------------------------------------------
1 | """
2 | backprop_magnitude_nabla
3 | ~~~~~~~~~~~~~~~~~~~~~~~~
4 |
5 | Using backprop2 I constructed a 784-30-30-30-30-30-10 network to classify
6 | MNIST data. I ran ten mini-batches of size 100, with eta = 0.01 and
7 | lambda = 0.05, using:
8 |
9 | net.SGD(otd[:1000], 1, 100, 0.01, 0.05,
10 |
11 | I obtained the following norms for the (unregularized) nabla_w for the
12 | respective mini-batches:
13 |
14 | [0.90845722175923671, 2.8852730656073566, 10.696793986223632, 37.75701921183488, 157.7365422527995, 304.43990075227839]
15 | [0.22493835119537842, 0.6555126517964851, 2.6036801277234076, 11.408825365731225, 46.882319190445472, 70.499637502698221]
16 | [0.11935180022357521, 0.19756069137133489, 0.8152794148335869, 3.4590802543293977, 15.470507965493903, 31.032396017142556]
17 | [0.15130005837653659, 0.39687135985664701, 1.4810006139254532, 4.392519005642268, 16.831939776937311, 34.082104455938733]
18 | [0.11594085276308999, 0.17177668061395848, 0.72204558746599512, 3.05062409378366, 14.133001132214286, 29.776204839994385]
19 | [0.10790389807606221, 0.20707152756018626, 0.96348134037828603, 3.9043824079499561, 15.986873430586924, 39.195258080490895]
20 | [0.088613291101645356, 0.129173436407863, 0.4242933114455002, 1.6154682713449411, 7.5451567587160069, 20.180545544006566]
21 | [0.086175380639289575, 0.12571016850457151, 0.44231149185805047, 1.8435833504677326, 7.61973813981073, 19.474539356281781]
22 | [0.095372080184163904, 0.15854489503205446, 0.70244235144444678, 2.6294803575724157, 10.427062019753425, 24.309420272033819]
23 | [0.096453131000155692, 0.13574642196947601, 0.53551377709415471, 2.0247466793066895, 9.4503978546018068, 21.73772148470092]
24 |
25 | Note that results are listed in order of layer. They clearly show how
26 | the magnitude of nabla_w decreases as we go back through layers.
27 |
28 | In this program I take min-batches 7, 8, 9 as representative and plot
29 | them. I omit the results from the first and final layers since they
30 | correspond to 784 input neurons and 10 output neurons, not 30 as in
31 | the other layers, making it difficult to compare results.
32 |
33 | Note that I haven't attempted to preserve the whole workflow here. It
34 | involved some minor hacking around with backprop2, which messed up
35 | that code. That's why I've simply put the results in by hand below.
36 | """
37 |
38 | # Third-party libraries
39 | import matplotlib.pyplot as plt
40 |
41 | nw1 = [0.129173436407863, 0.4242933114455002,
42 | 1.6154682713449411, 7.5451567587160069]
43 | nw2 = [0.12571016850457151, 0.44231149185805047,
44 | 1.8435833504677326, 7.61973813981073]
45 | nw3 = [0.15854489503205446, 0.70244235144444678,
46 | 2.6294803575724157, 10.427062019753425]
47 | plt.plot(range(1, 5), nw1, "ro-", range(1, 5), nw2, "go-",
48 | range(1, 5), nw3, "bo-")
49 | plt.xlabel('Layer $l$')
50 | plt.ylabel(r"$\Vert\nabla C^l_w\Vert$")
51 | plt.xticks([1, 2, 3, 4])
52 | plt.show()
53 |
--------------------------------------------------------------------------------
/fig/digits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/digits.png
--------------------------------------------------------------------------------
/fig/digits_separate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/digits_separate.png
--------------------------------------------------------------------------------
/fig/false_minima.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/false_minima.png
--------------------------------------------------------------------------------
/fig/false_minima.py:
--------------------------------------------------------------------------------
1 | """
2 | false_minimum
3 | ~~~~~~~~~~~~~
4 |
5 | Plots a function of two variables with many false minima."""
6 |
7 | #### Libraries
8 | # Third party libraries
9 | from matplotlib.ticker import LinearLocator
10 | # Note that axes3d is not explicitly used in the code, but is needed
11 | # to register the 3d plot type correctly
12 | from mpl_toolkits.mplot3d import axes3d
13 | import matplotlib.pyplot as plt
14 | import numpy
15 |
16 | fig = plt.figure()
17 | ax = fig.gca(projection='3d')
18 | X = numpy.arange(-5, 5, 0.1)
19 | Y = numpy.arange(-5, 5, 0.1)
20 | X, Y = numpy.meshgrid(X, Y)
21 | Z = numpy.sin(X)*numpy.sin(Y)+0.2*X
22 |
23 | colortuple = ('w', 'b')
24 | colors = numpy.empty(X.shape, dtype=str)
25 | for x in xrange(len(X)):
26 | for y in xrange(len(Y)):
27 | colors[x, y] = colortuple[(x + y) % 2]
28 |
29 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
30 | linewidth=0)
31 |
32 | ax.set_xlim3d(-5, 5)
33 | ax.set_ylim3d(-5, 5)
34 | ax.set_zlim3d(-2, 2)
35 | ax.w_xaxis.set_major_locator(LinearLocator(3))
36 | ax.w_yaxis.set_major_locator(LinearLocator(3))
37 | ax.w_zaxis.set_major_locator(LinearLocator(3))
38 |
39 | plt.show()
40 |
41 |
--------------------------------------------------------------------------------
/fig/generate_gradient.py:
--------------------------------------------------------------------------------
1 | """generate_gradient.py
2 | ~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | Use network2 to figure out the average starting values of the gradient
5 | error terms \delta^l_j = \partial C / \partial z^l_j = \partial C /
6 | \partial b^l_j.
7 |
8 | """
9 |
10 | #### Libraries
11 | # Standard library
12 | import json
13 | import math
14 | import random
15 | import shutil
16 | import sys
17 | sys.path.append("../src/")
18 |
19 | # My library
20 | import mnist_loader
21 | import network2
22 |
23 | # Third-party libraries
24 | import matplotlib.pyplot as plt
25 | import numpy as np
26 |
27 | def main():
28 | # Load the data
29 | full_td, _, _ = mnist_loader.load_data_wrapper()
30 | td = full_td[:1000] # Just use the first 1000 items of training data
31 | epochs = 500 # Number of epochs to train for
32 |
33 | print "\nTwo hidden layers:"
34 | net = network2.Network([784, 30, 30, 10])
35 | initial_norms(td, net)
36 | abbreviated_gradient = [
37 | ag[:6] for ag in get_average_gradient(net, td)[:-1]]
38 | print "Saving the averaged gradient for the top six neurons in each "+\
39 | "layer.\nWARNING: This will affect the look of the book, so be "+\
40 | "sure to check the\nrelevant material (early chapter 5)."
41 | f = open("initial_gradient.json", "w")
42 | json.dump(abbreviated_gradient, f)
43 | f.close()
44 | shutil.copy("initial_gradient.json", "../../js/initial_gradient.json")
45 | training(td, net, epochs, "norms_during_training_2_layers.json")
46 | plot_training(
47 | epochs, "norms_during_training_2_layers.json", 2)
48 |
49 | print "\nThree hidden layers:"
50 | net = network2.Network([784, 30, 30, 30, 10])
51 | initial_norms(td, net)
52 | training(td, net, epochs, "norms_during_training_3_layers.json")
53 | plot_training(
54 | epochs, "norms_during_training_3_layers.json", 3)
55 |
56 | print "\nFour hidden layers:"
57 | net = network2.Network([784, 30, 30, 30, 30, 10])
58 | initial_norms(td, net)
59 | training(td, net, epochs,
60 | "norms_during_training_4_layers.json")
61 | plot_training(
62 | epochs, "norms_during_training_4_layers.json", 4)
63 |
64 | def initial_norms(training_data, net):
65 | average_gradient = get_average_gradient(net, training_data)
66 | norms = [list_norm(avg) for avg in average_gradient[:-1]]
67 | print "Average gradient for the hidden layers: "+str(norms)
68 |
69 | def training(training_data, net, epochs, filename):
70 | norms = []
71 | for j in range(epochs):
72 | average_gradient = get_average_gradient(net, training_data)
73 | norms.append([list_norm(avg) for avg in average_gradient[:-1]])
74 | print "Epoch: %s" % j
75 | net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0)
76 | f = open(filename, "w")
77 | json.dump(norms, f)
78 | f.close()
79 |
80 | def plot_training(epochs, filename, num_layers):
81 | f = open(filename, "r")
82 | norms = json.load(f)
83 | f.close()
84 | fig = plt.figure()
85 | ax = fig.add_subplot(111)
86 | colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"]
87 | for j in range(num_layers):
88 | ax.plot(np.arange(epochs),
89 | [n[j] for n in norms],
90 | color=colors[j],
91 | label="Hidden layer %s" % (j+1,))
92 | ax.set_xlim([0, epochs])
93 | ax.grid(True)
94 | ax.set_xlabel('Number of epochs of training')
95 | ax.set_title('Speed of learning: %s hidden layers' % num_layers)
96 | ax.set_yscale('log')
97 | plt.legend(loc="upper right")
98 | fig_filename = "training_speed_%s_layers.png" % num_layers
99 | plt.savefig(fig_filename)
100 | shutil.copy(fig_filename, "../../images/"+fig_filename)
101 | plt.show()
102 |
103 | def get_average_gradient(net, training_data):
104 | nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data]
105 | gradient = list_sum(nabla_b_results)
106 | return [(np.reshape(g, len(g))/len(training_data)).tolist()
107 | for g in gradient]
108 |
109 | def zip_sum(a, b):
110 | return [x+y for (x, y) in zip(a, b)]
111 |
112 | def list_sum(l):
113 | return reduce(zip_sum, l)
114 |
115 | def list_norm(l):
116 | return math.sqrt(sum([x*x for x in l]))
117 |
118 | if __name__ == "__main__":
119 | main()
120 |
--------------------------------------------------------------------------------
/fig/initial_gradient.json:
--------------------------------------------------------------------------------
1 | [[-0.003970677333144113, -0.0031684316985881185, 0.008103235909196014, 0.012598010584130365, -0.026465907331998335, 0.0017583319323150341], [0.04152906589960523, 0.044025552524932406, -0.009669682279354514, 0.046736871369353235, 0.03877302528270452, 0.012336459551975156]]
--------------------------------------------------------------------------------
/fig/misleading_gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/misleading_gradient.png
--------------------------------------------------------------------------------
/fig/misleading_gradient.py:
--------------------------------------------------------------------------------
1 | """
2 | misleading_gradient
3 | ~~~~~~~~~~~~~~~~~~~
4 |
5 | Plots a function which misleads the gradient descent algorithm."""
6 |
7 | #### Libraries
8 | # Third party libraries
9 | from matplotlib.ticker import LinearLocator
10 | # Note that axes3d is not explicitly used in the code, but is needed
11 | # to register the 3d plot type correctly
12 | from mpl_toolkits.mplot3d import axes3d
13 | import matplotlib.pyplot as plt
14 | import numpy
15 |
16 | fig = plt.figure()
17 | ax = fig.gca(projection='3d')
18 | X = numpy.arange(-1, 1, 0.025)
19 | Y = numpy.arange(-1, 1, 0.025)
20 | X, Y = numpy.meshgrid(X, Y)
21 | Z = X**2 + 10*Y**2
22 |
23 | colortuple = ('w', 'b')
24 | colors = numpy.empty(X.shape, dtype=str)
25 | for x in xrange(len(X)):
26 | for y in xrange(len(Y)):
27 | colors[x, y] = colortuple[(x + y) % 2]
28 |
29 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
30 | linewidth=0)
31 |
32 | ax.set_xlim3d(-1, 1)
33 | ax.set_ylim3d(-1, 1)
34 | ax.set_zlim3d(0, 12)
35 | ax.w_xaxis.set_major_locator(LinearLocator(3))
36 | ax.w_yaxis.set_major_locator(LinearLocator(3))
37 | ax.w_zaxis.set_major_locator(LinearLocator(3))
38 | ax.text(0.05, -1.8, 0, "$w_1$", fontsize=20)
39 | ax.text(1.5, -0.25, 0, "$w_2$", fontsize=20)
40 | ax.text(1.79, 0, 9.62, "$C$", fontsize=20)
41 |
42 | plt.show()
43 |
44 |
--------------------------------------------------------------------------------
/fig/misleading_gradient_contours.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/misleading_gradient_contours.png
--------------------------------------------------------------------------------
/fig/misleading_gradient_contours.py:
--------------------------------------------------------------------------------
1 | """
2 | misleading_gradient_contours
3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4 |
5 | Plots the contours of the function from misleading_gradient.py"""
6 |
7 | #### Libraries
8 | # Third party libraries
9 | import matplotlib.pyplot as plt
10 | import numpy
11 |
12 | X = numpy.arange(-1, 1, 0.02)
13 | Y = numpy.arange(-1, 1, 0.02)
14 | X, Y = numpy.meshgrid(X, Y)
15 | Z = X**2 + 10*Y**2
16 |
17 | plt.figure()
18 | CS = plt.contour(X, Y, Z, levels=[0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
19 | plt.xlabel("$w_1$", fontsize=16)
20 | plt.ylabel("$w_2$", fontsize=16)
21 | plt.show()
22 |
--------------------------------------------------------------------------------
/fig/mnist.py:
--------------------------------------------------------------------------------
1 | """
2 | mnist
3 | ~~~~~
4 |
5 | Draws images based on the MNIST data."""
6 |
7 | #### Libraries
8 | # Standard library
9 | import cPickle
10 | import sys
11 |
12 | # My library
13 | sys.path.append('../src/')
14 | import mnist_loader
15 |
16 | # Third-party libraries
17 | import matplotlib
18 | import matplotlib.pyplot as plt
19 | import numpy as np
20 |
21 | def main():
22 | training_set, validation_set, test_set = mnist_loader.load_data()
23 | images = get_images(training_set)
24 | plot_rotated_image(images[0])
25 |
26 | #### Plotting
27 | def plot_images_together(images):
28 | """ Plot a single image containing all six MNIST images, one after
29 | the other. Note that we crop the sides of the images so that they
30 | appear reasonably close together."""
31 | fig = plt.figure()
32 | images = [image[:, 3:25] for image in images]
33 | image = np.concatenate(images, axis=1)
34 | ax = fig.add_subplot(1, 1, 1)
35 | ax.matshow(image, cmap = matplotlib.cm.binary)
36 | plt.xticks(np.array([]))
37 | plt.yticks(np.array([]))
38 | plt.show()
39 |
40 | def plot_10_by_10_images(images):
41 | """ Plot 100 MNIST images in a 10 by 10 table. Note that we crop
42 | the images so that they appear reasonably close together. The
43 | image is post-processed to give the appearance of being continued."""
44 | fig = plt.figure()
45 | images = [image[3:25, 3:25] for image in images]
46 | #image = np.concatenate(images, axis=1)
47 | for x in range(10):
48 | for y in range(10):
49 | ax = fig.add_subplot(10, 10, 10*y+x)
50 | ax.matshow(images[10*y+x], cmap = matplotlib.cm.binary)
51 | plt.xticks(np.array([]))
52 | plt.yticks(np.array([]))
53 | plt.show()
54 |
55 | def plot_images_separately(images):
56 | "Plot the six MNIST images separately."
57 | fig = plt.figure()
58 | for j in xrange(1, 7):
59 | ax = fig.add_subplot(1, 6, j)
60 | ax.matshow(images[j-1], cmap = matplotlib.cm.binary)
61 | plt.xticks(np.array([]))
62 | plt.yticks(np.array([]))
63 | plt.show()
64 |
65 | def plot_mnist_digit(image):
66 | """ Plot a single MNIST image."""
67 | fig = plt.figure()
68 | ax = fig.add_subplot(1, 1, 1)
69 | ax.matshow(image, cmap = matplotlib.cm.binary)
70 | plt.xticks(np.array([]))
71 | plt.yticks(np.array([]))
72 | plt.show()
73 |
74 | def plot_2_and_1(images):
75 | "Plot a 2 and a 1 image from the MNIST set."
76 | fig = plt.figure()
77 | ax = fig.add_subplot(1, 2, 1)
78 | ax.matshow(images[5], cmap = matplotlib.cm.binary)
79 | plt.xticks(np.array([]))
80 | plt.yticks(np.array([]))
81 | ax = fig.add_subplot(1, 2, 2)
82 | ax.matshow(images[3], cmap = matplotlib.cm.binary)
83 | plt.xticks(np.array([]))
84 | plt.yticks(np.array([]))
85 | plt.show()
86 |
87 | def plot_top_left(image):
88 | "Plot the top left of ``image``."
89 | image[14:,:] = np.zeros((14,28))
90 | image[:,14:] = np.zeros((28,14))
91 | fig = plt.figure()
92 | ax = fig.add_subplot(1, 1, 1)
93 | ax.matshow(image, cmap = matplotlib.cm.binary)
94 | plt.xticks(np.array([]))
95 | plt.yticks(np.array([]))
96 | plt.show()
97 |
98 | def plot_bad_images(images):
99 | """This takes a list of images misclassified by a pretty good
100 | neural network --- one achieving over 93 percent accuracy --- and
101 | turns them into a figure."""
102 | bad_image_indices = [8, 18, 33, 92, 119, 124, 149, 151, 193, 233, 241, 247, 259, 300, 313, 321, 324, 341, 349, 352, 359, 362, 381, 412, 435, 445, 449, 478, 479, 495, 502, 511, 528, 531, 547, 571, 578, 582, 597, 610, 619, 628, 629, 659, 667, 691, 707, 717, 726, 740, 791, 810, 844, 846, 898, 938, 939, 947, 956, 959, 965, 982, 1014, 1033, 1039, 1044, 1050, 1055, 1107, 1112, 1124, 1147, 1181, 1191, 1192, 1198, 1202, 1204, 1206, 1224, 1226, 1232, 1242, 1243, 1247, 1256, 1260, 1263, 1283, 1289, 1299, 1310, 1319, 1326, 1328, 1357, 1378, 1393, 1413, 1422, 1435, 1467, 1469, 1494, 1500, 1522, 1523, 1525, 1527, 1530, 1549, 1553, 1609, 1611, 1634, 1641, 1676, 1678, 1681, 1709, 1717, 1722, 1730, 1732, 1737, 1741, 1754, 1759, 1772, 1773, 1790, 1808, 1813, 1823, 1843, 1850, 1857, 1868, 1878, 1880, 1883, 1901, 1913, 1930, 1938, 1940, 1952, 1969, 1970, 1984, 2001, 2009, 2016, 2018, 2035, 2040, 2043, 2044, 2053, 2063, 2098, 2105, 2109, 2118, 2129, 2130, 2135, 2148, 2161, 2168, 2174, 2182, 2185, 2186, 2189, 2224, 2229, 2237, 2266, 2272, 2293, 2299, 2319, 2325, 2326, 2334, 2369, 2371, 2380, 2381, 2387, 2393, 2395, 2406, 2408, 2414, 2422, 2433, 2450, 2488, 2514, 2526, 2548, 2574, 2589, 2598, 2607, 2610, 2631, 2648, 2654, 2695, 2713, 2720, 2721, 2730, 2770, 2771, 2780, 2863, 2866, 2896, 2907, 2925, 2927, 2939, 2995, 3005, 3023, 3030, 3060, 3073, 3102, 3108, 3110, 3114, 3115, 3117, 3130, 3132, 3157, 3160, 3167, 3183, 3189, 3206, 3240, 3254, 3260, 3280, 3329, 3330, 3333, 3383, 3384, 3475, 3490, 3503, 3520, 3525, 3559, 3567, 3573, 3597, 3598, 3604, 3629, 3664, 3702, 3716, 3718, 3725, 3726, 3727, 3751, 3752, 3757, 3763, 3766, 3767, 3769, 3776, 3780, 3798, 3806, 3808, 3811, 3817, 3821, 3838, 3848, 3853, 3855, 3869, 3876, 3902, 3906, 3926, 3941, 3943, 3951, 3954, 3962, 3976, 3985, 3995, 4000, 4002, 4007, 4017, 4018, 4065, 4075, 4078, 4093, 4102, 4139, 4140, 4152, 4154, 4163, 4165, 4176, 4199, 4201, 4205, 4207, 4212, 4224, 4238, 4248, 4256, 4284, 4289, 4297, 4300, 4306, 4344, 4355, 4356, 4359, 4360, 4369, 4405, 4425, 4433, 4435, 4449, 4487, 4497, 4498, 4500, 4521, 4536, 4548, 4563, 4571, 4575, 4601, 4615, 4620, 4633, 4639, 4662, 4690, 4722, 4731, 4735, 4737, 4739, 4740, 4761, 4798, 4807, 4814, 4823, 4833, 4837, 4874, 4876, 4879, 4880, 4886, 4890, 4910, 4950, 4951, 4952, 4956, 4963, 4966, 4968, 4978, 4990, 5001, 5020, 5054, 5067, 5068, 5078, 5135, 5140, 5143, 5176, 5183, 5201, 5210, 5331, 5409, 5457, 5495, 5600, 5601, 5617, 5623, 5634, 5642, 5677, 5678, 5718, 5734, 5735, 5749, 5752, 5771, 5787, 5835, 5842, 5845, 5858, 5887, 5888, 5891, 5906, 5913, 5936, 5937, 5945, 5955, 5957, 5972, 5973, 5985, 5987, 5997, 6035, 6042, 6043, 6045, 6053, 6059, 6065, 6071, 6081, 6091, 6112, 6124, 6157, 6166, 6168, 6172, 6173, 6347, 6370, 6386, 6390, 6391, 6392, 6421, 6426, 6428, 6505, 6542, 6555, 6556, 6560, 6564, 6568, 6571, 6572, 6597, 6598, 6603, 6608, 6625, 6651, 6694, 6706, 6721, 6725, 6740, 6746, 6768, 6783, 6785, 6796, 6817, 6827, 6847, 6870, 6872, 6926, 6945, 7002, 7035, 7043, 7089, 7121, 7130, 7198, 7216, 7233, 7248, 7265, 7426, 7432, 7434, 7494, 7498, 7691, 7777, 7779, 7797, 7800, 7809, 7812, 7821, 7849, 7876, 7886, 7897, 7902, 7905, 7917, 7921, 7945, 7999, 8020, 8059, 8081, 8094, 8095, 8115, 8246, 8256, 8262, 8272, 8273, 8278, 8279, 8293, 8322, 8339, 8353, 8408, 8453, 8456, 8502, 8520, 8522, 8607, 9009, 9010, 9013, 9015, 9019, 9022, 9024, 9026, 9036, 9045, 9046, 9128, 9214, 9280, 9316, 9342, 9382, 9433, 9446, 9506, 9540, 9544, 9587, 9614, 9634, 9642, 9645, 9700, 9716, 9719, 9729, 9732, 9738, 9740, 9741, 9742, 9744, 9745, 9749, 9752, 9768, 9770, 9777, 9779, 9792, 9808, 9831, 9839, 9856, 9858, 9867, 9879, 9883, 9888, 9890, 9893, 9905, 9944, 9970, 9982]
103 | n = len(bad_image_indices)
104 | bad_images = [images[j] for j in bad_image_indices]
105 | fig = plt.figure(figsize=(10, 15))
106 | for j in xrange(1, n+1):
107 | ax = fig.add_subplot(25, 125, j)
108 | ax.matshow(bad_images[j-1], cmap = matplotlib.cm.binary)
109 | ax.set_title(str(bad_image_indices[j-1]))
110 | plt.xticks(np.array([]))
111 | plt.yticks(np.array([]))
112 | plt.subplots_adjust(hspace = 1.2)
113 | plt.show()
114 |
115 | def plot_really_bad_images(images):
116 | """This takes a list of the worst images from plot_bad_images and
117 | turns them into a figure."""
118 | really_bad_image_indices = [
119 | 324, 582, 659, 726, 846, 956, 1124, 1393,
120 | 1773, 1868, 2018, 2109, 2654, 4199, 4201, 4620, 5457, 5642]
121 | n = len(really_bad_image_indices)
122 | really_bad_images = [images[j] for j in really_bad_image_indices]
123 | fig = plt.figure(figsize=(10, 2))
124 | for j in xrange(1, n+1):
125 | ax = fig.add_subplot(2, 9, j)
126 | ax.matshow(really_bad_images[j-1], cmap = matplotlib.cm.binary)
127 | #ax.set_title(str(really_bad_image_indices[j-1]))
128 | plt.xticks(np.array([]))
129 | plt.yticks(np.array([]))
130 | plt.show()
131 |
132 | def plot_features(image):
133 | "Plot the top right, bottom left, and bottom right of ``image``."
134 | image_1, image_2, image_3 = np.copy(image), np.copy(image), np.copy(image)
135 | image_1[:,:14] = np.zeros((28,14))
136 | image_1[14:,:] = np.zeros((14,28))
137 | image_2[:,14:] = np.zeros((28,14))
138 | image_2[:14,:] = np.zeros((14,28))
139 | image_3[:14,:] = np.zeros((14,28))
140 | image_3[:,:14] = np.zeros((28,14))
141 | fig = plt.figure()
142 | ax = fig.add_subplot(1, 3, 1)
143 | ax.matshow(image_1, cmap = matplotlib.cm.binary)
144 | plt.xticks(np.array([]))
145 | plt.yticks(np.array([]))
146 | ax = fig.add_subplot(1, 3, 2)
147 | ax.matshow(image_2, cmap = matplotlib.cm.binary)
148 | plt.xticks(np.array([]))
149 | plt.yticks(np.array([]))
150 | ax = fig.add_subplot(1, 3, 3)
151 | ax.matshow(image_3, cmap = matplotlib.cm.binary)
152 | plt.xticks(np.array([]))
153 | plt.yticks(np.array([]))
154 | plt.show()
155 |
156 | def plot_rotated_image(image):
157 | """ Plot an MNIST digit and a version rotated by 10 degrees."""
158 | # Do the initial plot
159 | fig = plt.figure()
160 | ax = fig.add_subplot(1, 1, 1)
161 | ax.matshow(image, cmap = matplotlib.cm.binary)
162 | plt.xticks(np.array([]))
163 | plt.yticks(np.array([]))
164 | plt.show()
165 | # Set up the rotated image. There are fast matrix techniques
166 | # for doing this, but we'll do a pedestrian approach
167 | rot_image = np.zeros((28,28))
168 | theta = 15*np.pi/180 # 15 degrees
169 | def to_xy(j, k):
170 | # Converts from matrix indices to x, y co-ords, using the
171 | # 13, 14 matrix entry as the origin
172 | return (k-13, -j+14) # x range: -13..14, y range: -13..14
173 | def to_jk(x, y):
174 | # Converts from x, y co-ords to matrix indices
175 | return (-y+14, x+13)
176 | def image_value(image, x, y):
177 | # returns the value of the image at co-ordinate x, y
178 | # (Note that this would be better done as a closure, if Pythong
179 | # supported closures, so that image didn't need to be passed)
180 | j, k = to_jk(x, y)
181 | return image[j, k]
182 | # Element by element, figure out what should be in the rotated
183 | # image. We simply take each matrix entry, figure out the
184 | # corresponding x, y co-ordinates, rotate backward, and then
185 | # average the nearby matrix elements. It's not perfect, and it's
186 | # not fast, but it works okay.
187 | for j in range(28):
188 | for k in range(28):
189 | x, y = to_xy(j, k)
190 | # rotate by -theta
191 | x1 = np.cos(theta)*x + np.sin(theta)*y
192 | y1 = -np.sin(theta)*x + np.cos(theta)*y
193 | # Nearest integer x entries are x2 and x2+1. delta_x
194 | # measures how to interpolate
195 | x2 = np.floor(x1)
196 | delta_x = x1-x2
197 | # Similarly for y
198 | y2 = np.floor(y1)
199 | delta_y = y1-y2
200 | # Check if we're out of bounds, and if so continue to next entry
201 | # This will miss a boundary row and layer, but that's okay,
202 | # MNIST digits usually don't go that near the boundary.
203 | if x2 < -13 or x2 > 13 or y2 < -13 or y2 > 13: continue
204 | # If we're in bounds, average the nearby entries.
205 | value \
206 | = (1-delta_x)*(1-delta_y)*image_value(image, x2, y2)+\
207 | (1-delta_x)*delta_y*image_value(image, x2, y2+1)+\
208 | delta_x*(1-delta_y)*image_value(image, x2+1, y2)+\
209 | delta_x*delta_y*image_value(image, x2+1, y2+1)
210 | # Rescale the value by a hand-set fudge factor. This
211 | # seems to be necessary because the averaging doesn't
212 | # quite work right. The fudge-factor should probably be
213 | # theta-dependent, but I've set it by hand.
214 | rot_image[j, k] = 1.3*value
215 | plot_mnist_digit(rot_image)
216 |
217 | #### Miscellanea
218 | def load_data():
219 | """ Return the MNIST data as a tuple containing the training data,
220 | the validation data, and the test data."""
221 | f = open('../data/mnist.pkl', 'rb')
222 | training_set, validation_set, test_set = cPickle.load(f)
223 | f.close()
224 | return (training_set, validation_set, test_set)
225 |
226 | def get_images(training_set):
227 | """ Return a list containing the images from the MNIST data
228 | set. Each image is represented as a 2-d numpy array."""
229 | flattened_images = training_set[0]
230 | return [np.reshape(f, (-1, 28)) for f in flattened_images]
231 |
232 | #### Main
233 | if __name__ == "__main__":
234 | main()
235 |
--------------------------------------------------------------------------------
/fig/mnist_100_digits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_100_digits.png
--------------------------------------------------------------------------------
/fig/mnist_2_and_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_2_and_1.png
--------------------------------------------------------------------------------
/fig/mnist_complete_zero.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_complete_zero.png
--------------------------------------------------------------------------------
/fig/mnist_first_digit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_first_digit.png
--------------------------------------------------------------------------------
/fig/mnist_other_features.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_other_features.png
--------------------------------------------------------------------------------
/fig/mnist_really_bad_images.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_really_bad_images.png
--------------------------------------------------------------------------------
/fig/mnist_top_left_feature.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_top_left_feature.png
--------------------------------------------------------------------------------
/fig/more_data.json:
--------------------------------------------------------------------------------
1 | [69.09, 76.37, 85.29, 88.85, 91.27, 93.24, 94.89, 95.85, 95.97]
--------------------------------------------------------------------------------
/fig/more_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data.png
--------------------------------------------------------------------------------
/fig/more_data.py:
--------------------------------------------------------------------------------
1 | """more_data
2 | ~~~~~~~~~~~~
3 |
4 | Plot graphs to illustrate the performance of MNIST when different size
5 | training sets are used.
6 |
7 | """
8 |
9 | # Standard library
10 | import json
11 | import random
12 | import sys
13 |
14 | # My library
15 | sys.path.append('../src/')
16 | import mnist_loader
17 | import network2
18 |
19 | # Third-party libraries
20 | import matplotlib.pyplot as plt
21 | import numpy as np
22 | from sklearn import svm
23 |
24 | # The sizes to use for the different training sets
25 | SIZES = [100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000]
26 |
27 | def main():
28 | run_networks()
29 | run_svms()
30 | make_plots()
31 |
32 | def run_networks():
33 | # Make results more easily reproducible
34 | random.seed(12345678)
35 | np.random.seed(12345678)
36 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
37 | net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost())
38 | accuracies = []
39 | for size in SIZES:
40 | print "\n\nTraining network with data set size %s" % size
41 | net.large_weight_initializer()
42 | num_epochs = 1500000 / size
43 | net.SGD(training_data[:size], num_epochs, 10, 0.5, lmbda = size*0.0001)
44 | accuracy = net.accuracy(validation_data) / 100.0
45 | print "Accuracy was %s percent" % accuracy
46 | accuracies.append(accuracy)
47 | f = open("more_data.json", "w")
48 | json.dump(accuracies, f)
49 | f.close()
50 |
51 | def run_svms():
52 | svm_training_data, svm_validation_data, svm_test_data \
53 | = mnist_loader.load_data()
54 | accuracies = []
55 | for size in SIZES:
56 | print "\n\nTraining SVM with data set size %s" % size
57 | clf = svm.SVC()
58 | clf.fit(svm_training_data[0][:size], svm_training_data[1][:size])
59 | predictions = [int(a) for a in clf.predict(svm_validation_data[0])]
60 | accuracy = sum(int(a == y) for a, y in
61 | zip(predictions, svm_validation_data[1])) / 100.0
62 | print "Accuracy was %s percent" % accuracy
63 | accuracies.append(accuracy)
64 | f = open("more_data_svm.json", "w")
65 | json.dump(accuracies, f)
66 | f.close()
67 |
68 | def make_plots():
69 | f = open("more_data.json", "r")
70 | accuracies = json.load(f)
71 | f.close()
72 | f = open("more_data_svm.json", "r")
73 | svm_accuracies = json.load(f)
74 | f.close()
75 | make_linear_plot(accuracies)
76 | make_log_plot(accuracies)
77 | make_combined_plot(accuracies, svm_accuracies)
78 |
79 | def make_linear_plot(accuracies):
80 | fig = plt.figure()
81 | ax = fig.add_subplot(111)
82 | ax.plot(SIZES, accuracies, color='#2A6EA6')
83 | ax.plot(SIZES, accuracies, "o", color='#FFA933')
84 | ax.set_xlim(0, 50000)
85 | ax.set_ylim(60, 100)
86 | ax.grid(True)
87 | ax.set_xlabel('Training set size')
88 | ax.set_title('Accuracy (%) on the validation data')
89 | plt.show()
90 |
91 | def make_log_plot(accuracies):
92 | fig = plt.figure()
93 | ax = fig.add_subplot(111)
94 | ax.plot(SIZES, accuracies, color='#2A6EA6')
95 | ax.plot(SIZES, accuracies, "o", color='#FFA933')
96 | ax.set_xlim(100, 50000)
97 | ax.set_ylim(60, 100)
98 | ax.set_xscale('log')
99 | ax.grid(True)
100 | ax.set_xlabel('Training set size')
101 | ax.set_title('Accuracy (%) on the validation data')
102 | plt.show()
103 |
104 | def make_combined_plot(accuracies, svm_accuracies):
105 | fig = plt.figure()
106 | ax = fig.add_subplot(111)
107 | ax.plot(SIZES, accuracies, color='#2A6EA6')
108 | ax.plot(SIZES, accuracies, "o", color='#2A6EA6',
109 | label='Neural network accuracy (%)')
110 | ax.plot(SIZES, svm_accuracies, color='#FFA933')
111 | ax.plot(SIZES, svm_accuracies, "o", color='#FFA933',
112 | label='SVM accuracy (%)')
113 | ax.set_xlim(100, 50000)
114 | ax.set_ylim(25, 100)
115 | ax.set_xscale('log')
116 | ax.grid(True)
117 | ax.set_xlabel('Training set size')
118 | plt.legend(loc="lower right")
119 | plt.show()
120 |
121 | if __name__ == "__main__":
122 | main()
123 |
--------------------------------------------------------------------------------
/fig/more_data_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_5.png
--------------------------------------------------------------------------------
/fig/more_data_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_comparison.png
--------------------------------------------------------------------------------
/fig/more_data_log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_log.png
--------------------------------------------------------------------------------
/fig/more_data_rotated_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_rotated_5.png
--------------------------------------------------------------------------------
/fig/more_data_svm.json:
--------------------------------------------------------------------------------
1 | [25.07, 48.93, 75.13, 83.87, 88.49, 91.46, 92.45, 93.47, 94.48]
--------------------------------------------------------------------------------
/fig/multiple_eta.json:
--------------------------------------------------------------------------------
1 | [[[], [], [0.87809508908377998, 0.67406552530098141, 0.59798920430275404, 0.55533015743656189, 0.51751101003208144, 0.4942033354556824, 0.47255041042913526, 0.46069879353359433, 0.44304475294352064, 0.43099562372228112, 0.42310993427766375, 0.41408265298981006, 0.40573464183982105, 0.40110722961828227, 0.39162028064538967, 0.38705015774740958, 0.38116357043417587, 0.37603986695304614, 0.37297012040237154, 0.37057334627661631, 0.36551756338853658, 0.36335674264586654, 0.35745296185579917, 0.35535960956849127, 0.35365591135061097, 0.35011353300568238, 0.34946519495897871, 0.34604661988238178, 0.34386077098862522, 0.33919980880230349], []], [[], [], [0.49501954654296704, 0.4063145129425576, 0.40482383242804637, 0.37156577828840276, 0.37380111172151681, 0.37152751786000143, 0.35371985224004426, 0.3557161388797867, 0.34323780090168027, 0.3433514311156789, 0.3367645441708797, 0.34532085892085329, 0.33506383267050244, 0.34760988079085842, 0.34921493732996928, 0.33853424834583179, 0.32837282561262077, 0.33175599401109612, 0.33132920379429243, 0.33024353325326034, 0.32736756892399654, 0.3259638557593546, 0.32004264784244907, 0.33424319076405928, 0.33878125802305081, 0.32521839878261177, 0.32679267619514646, 0.32488571435373748, 0.33056367198473002, 0.33879633130932685], []], [[], [], [0.92489293305102116, 0.83919130289246469, 0.88748421594232696, 0.79625231780396133, 0.78117959228699174, 1.1365919079387048, 0.78787239608336346, 0.76778614131217449, 0.73689525303227721, 0.80127437393519696, 0.74433665287336681, 0.73725544607013882, 0.80249602203179993, 0.85190338199210014, 0.79872168623645712, 0.80243104440756152, 0.80649160680410659, 0.81467254023600921, 0.82526467696100858, 0.75042379852601759, 0.93658673378777402, 0.88236662906752283, 0.86121396033520892, 0.72492681699401829, 0.80405009868466648, 0.83959963179208197, 0.83387510808276821, 0.88282498566307899, 0.88583473645177979, 0.86068501713490919], []]]
--------------------------------------------------------------------------------
/fig/multiple_eta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/multiple_eta.png
--------------------------------------------------------------------------------
/fig/multiple_eta.py:
--------------------------------------------------------------------------------
1 | """multiple_eta
2 | ~~~~~~~~~~~~~~~
3 |
4 | This program shows how different values for the learning rate affect
5 | training. In particular, we'll plot out how the cost changes using
6 | three different values for eta.
7 |
8 | """
9 |
10 | # Standard library
11 | import json
12 | import random
13 | import sys
14 |
15 | # My library
16 | sys.path.append('../src/')
17 | import mnist_loader
18 | import network2
19 |
20 | # Third-party libraries
21 | import matplotlib.pyplot as plt
22 | import numpy as np
23 |
24 | # Constants
25 | LEARNING_RATES = [0.025, 0.25, 2.5]
26 | COLORS = ['#2A6EA6', '#FFCD33', '#FF7033']
27 | NUM_EPOCHS = 30
28 |
29 | def main():
30 | run_networks()
31 | make_plot()
32 |
33 | def run_networks():
34 | """Train networks using three different values for the learning rate,
35 | and store the cost curves in the file ``multiple_eta.json``, where
36 | they can later be used by ``make_plot``.
37 |
38 | """
39 | # Make results more easily reproducible
40 | random.seed(12345678)
41 | np.random.seed(12345678)
42 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
43 | results = []
44 | for eta in LEARNING_RATES:
45 | print "\nTrain a network using eta = "+str(eta)
46 | net = network2.Network([784, 30, 10])
47 | results.append(
48 | net.SGD(training_data, NUM_EPOCHS, 10, eta, lmbda=5.0,
49 | evaluation_data=validation_data,
50 | monitor_training_cost=True))
51 | f = open("multiple_eta.json", "w")
52 | json.dump(results, f)
53 | f.close()
54 |
55 | def make_plot():
56 | f = open("multiple_eta.json", "r")
57 | results = json.load(f)
58 | f.close()
59 | fig = plt.figure()
60 | ax = fig.add_subplot(111)
61 | for eta, result, color in zip(LEARNING_RATES, results, COLORS):
62 | _, _, training_cost, _ = result
63 | ax.plot(np.arange(NUM_EPOCHS), training_cost, "o-",
64 | label="$\eta$ = "+str(eta),
65 | color=color)
66 | ax.set_xlim([0, NUM_EPOCHS])
67 | ax.set_xlabel('Epoch')
68 | ax.set_ylabel('Cost')
69 | plt.legend(loc='upper right')
70 | plt.show()
71 |
72 | if __name__ == "__main__":
73 | main()
74 |
--------------------------------------------------------------------------------
/fig/norms_during_training_2_layers.json:
--------------------------------------------------------------------------------
1 | [[0.06574134326503182, 0.3092184942703712], [0.042965950225229275, 0.21697470825384765], [0.03285743853560062, 0.1661475616404329], [0.02683217541545865, 0.13284259725100744], [0.022444963188347787, 0.10869569875668701], [0.01898702292286868, 0.09026907616072426], [0.016176320606231937, 0.0757817449583434], [0.013856769047630028, 0.06415813857855861], [0.011923755327842544, 0.05468929179740756], [0.010300852608835414, 0.04688240462739417], [0.008929980735740226, 0.04038231936325889], [0.007766037293739668, 0.03492602136501374], [0.0067734782810398185, 0.0303144328294002], [0.005923963369769822, 0.02639409406963878], [0.0051946687903396006, 0.02304484718674352], [0.004567050423835018, 0.02017132438924394], [0.004025922146371357, 0.017696929143574278], [0.003558759740350878, 0.015559495043546933], [0.003155168513245846, 0.01370809963803681], [0.0028064709792152993, 0.012100689100100913], [0.0025053833194008273, 0.010702282128687351], [0.0022457579431536864, 0.009483594139974067], [0.0020223756268692833, 0.008419970767157997], [0.0018307752663026288, 0.007490551952524308], [0.0016671128100467073, 0.006677609993253912], [0.0015280437877996404, 0.00596602024352487], [0.001410626139902204, 0.005342833987422243], [0.0013122417372158585, 0.004796930719247236], [0.001230535877050377, 0.0043187326553456445], [0.0011633740094011671, 0.003899968399564661], [0.0011088140690242762, 0.003533475735331852], [0.0010650914693569541, 0.003213035828461012], [0.0010306126820953377, 0.002933232910393291], [0.0010039529391577253, 0.0026893349185429013], [0.0009838541507884253, 0.002477191693569685], [0.0009692204018936975, 0.0022931482255388073], [0.0009591098842916142, 0.0021339711220004043], [0.0009527233875527112, 0.001996786940464415], [0.000949390258845377, 0.001879031281725876], [0.0009485530386537979, 0.0017784075917766545], [0.0009497519150621335, 0.0016928545134905027], [0.0009526098833938276, 0.00162052044418621], [0.0009568191883996328, 0.0015597437895911916], [0.0009621293468216554, 0.0015090373462108967], [0.0009683368349615602, 0.0014670753405878112], [0.0009752763835047726, 0.0014326818995832435], [0.0009828137394213656, 0.0014048200688868667], [0.0009908397165437146, 0.001382580864127233], [0.0009992653475108188, 0.0013651721629557073], [0.0010080179583493688, 0.0013519074848511415], [0.0010170380046287963, 0.0013421948454926998], [0.0010262765293966452, 0.0013355259254374328], [0.0010356931246733264, 0.0013314657823426134], [0.0010452542983669293, 0.001329643288455763], [0.0010549321662308622, 0.0013297424128337033], [0.0010647034036817776, 0.0013314944062056567], [0.0010745484049984948, 0.001334670894282615], [0.0010844506078711124, 0.001339077846101557], [0.0010943959497621623, 0.00134455035772627], [0.0011043724293854176, 0.001350948176527696], [0.0011143697520880728, 0.0013581518848623535], [0.0011243790422903717, 0.0013660596617759066], [0.0011343926096075954, 0.001374584545237105], [0.0011444037580310062, 0.0013836521236696693], [0.0011544066297232028, 0.0013931985929649267], [0.001164396076707179, 0.0014031691228828121], [0.0011743675550923705, 0.0014135164842454385], [0.001184317037560411, 0.001424199895282673], [0.0011942409406884003, 0.0014351840517464725], [0.0012041360643654548, 0.001446438310918101], [0.00121399954109664, 0.0014579360044009737], [0.0012238287934166479, 0.001469653858672124], [0.001233621497976716, 0.0014815715058239837], [0.0012433755551407887, 0.0014936710698406808], [0.0012530890631449795, 0.00150593681619279], [0.0012627602960492853, 0.0015183548545708195], [0.0012723876848512645, 0.0015309128862728684], [0.0012819698012447925, 0.0015435999891708265], [0.0012915053435987739, 0.0015564064343490218], [0.0013009931248051125, 0.0015693235294801865], [0.0013104320617056788, 0.0015823434848093638], [0.0013198211658574318, 0.0015954592982856857], [0.001329159535435146, 0.0016086646569381795], [0.0013384463481043436, 0.0016219538520547598], [0.0013476808547242748, 0.0016353217061091565], [0.0013568623737632026, 0.0016487635097024642], [0.0013659902863269694, 0.0016622749670549338], [0.0013750640317171125, 0.0016758521488088555], [0.0013840831034477462, 0.001689491451092161], [0.0013930470456610636, 0.0017031895599511186], [0.0014019554498903382, 0.0017169434203938478], [0.001410807952126751, 0.001730750209399147], [0.0014196042301527312, 0.0017446073123398938], [0.001428344001109794, 0.0017585123023509564], [0.0014370270192733724, 0.0017724629222394034], [0.0014456530740109097, 0.0017864570685928126], [0.0014542219879027463, 0.0018004927777905224], [0.0014627336150080232, 0.001814568213664344], [0.0014711878392602328, 0.00182868165659123], [0.0014795845729789635, 0.001842831493830357], [0.0014879237554861209, 0.001857016210943517], [0.001496205351816403, 0.0018712343841595458], [0.001504429351512978, 0.0018854846735628997], [0.001512595767500488, 0.0018997658170025018], [0.0015207046350283473, 0.001914076624631347], [0.0015287560106781513, 0.0019284159739990133], [0.0015367499714297151, 0.0019427828056299383], [0.001544686613780801, 0.0019571761190289314], [0.0015525660529162172, 0.00197159496906334], [0.0015603884219223176, 0.0019860384626777115], [0.0015681538710434048, 0.002000505755902608], [0.0015758625669768783, 0.0020149960511241243], [0.0015835146922042135, 0.0020295085945849795], [0.0015911104443552297, 0.002044042674091702], [0.001598650035603237, 0.0020585976169056313], [0.0016061336920889376, 0.0020731727877982367], [0.001613561653371099, 0.002087767587253655], [0.0016209341719021985, 0.0021023814498034505], [0.0016282515125274055, 0.0021170138424803724], [0.001635513952005339, 0.0021316642633795106], [0.0016427217785492264, 0.0021463322403165704], [0.0016498752913871664, 0.002161017329574194], [0.0016569748003402733, 0.0021757191147283752], [0.0016640206254176105, 0.00219043720554777], [0.0016710130964268687, 0.002205171236959631], [0.0016779525525998347, 0.0022199208680767388], [0.001684839342231744, 0.002234685781280274], [0.0016916738223337158, 0.0022494656813542074], [0.0016984563582974415, 0.002264260294667135], [0.0017051873235714623, 0.002279069368397964], [0.0017118670993483138, 0.0022938926698022047], [0.0017184960742619357, 0.002308729985515969], [0.001725074644094738, 0.0023235811208949593], [0.0017316032114938027, 0.0023384458993861265], [0.001738082185695673, 0.0023533241619297417], [0.0017445119822592711, 0.0023682157663899766], [0.0017508930228065083, 0.0023831205870120933], [0.001757225734770155, 0.0023980385139046865], [0.0017635105511485838, 0.002412969452545351], [0.0017697479102670496, 0.0024279133233084706], [0.0017759382555451366, 0.002442870061013807], [0.0017820820352700965, 0.002457839614494661], [0.0017881797023757589, 0.002472821946184576], [0.0017942317142267865, 0.002487817031721519], [0.0018002385324079868, 0.0025028248595686305], [0.0018062006225184927, 0.002517845430650605], [0.001812118453970572, 0.002532878758004949], [0.0018179924997929005, 0.0025479248664473006], [0.0018238232364381085, 0.0025629837922500893], [0.0018296111435944535, 0.0025780555828339136], [0.0018353567040014507, 0.0025931402964709826], [0.0018410604032693685, 0.002608238001999926], [0.0018467227297024314, 0.002623348778551656], [0.001852344174125658, 0.0026384727152854465], [0.0018579252297151955, 0.0026536099111349864], [0.0018634663918321254, 0.0026687604745637963], [0.0018689681578596077, 0.002683924523329614], [0.0018744310270433402, 0.0026991021842573027], [0.0018798555003352415, 0.0027142935930199196], [0.001885242080240342, 0.0027294988939275022], [0.0018905912706668024, 0.002744718239723253], [0.001895903576779071, 0.002759951791386862], [0.0019011795048541078, 0.0027751997179443952], [0.0019064195621406662, 0.002790462196284726], [0.001911624256721632, 0.0028057394109820286], [0.0019167940973793586, 0.0028210315541241206], [0.0019219295934640498, 0.0028363388251463613], [0.0019270312547651242, 0.0028516614306708394], [0.001932099591385598, 0.002866999584350647], [0.0019371351136194554, 0.002882353506718987], [0.0019421383318320318, 0.0028977234250428295], [0.0019471097563433955, 0.002913109573180972], [0.0019520498973147331, 0.0029285121914463023], [0.001956959264637746, 0.002943931526472021], [0.0019618383678270644, 0.0029593678310816003], [0.0019666877159156756, 0.002974821364162418], [0.0019715078173533898, 0.0029902923905428024], [0.001976299179908307, 0.0030057811808722997], [0.0019810623105713573, 0.0030212880115050643], [0.001985797715463823, 0.0030368131643862127], [0.0019905058997479567, 0.0030523569269409264], [0.0019951873675405837, 0.0030679195919662114], [0.0019998426218297647, 0.003083501457525218], [0.0020044721643944826, 0.0030991028268438415], [0.0020090764957273553, 0.003114724008209653], [0.0020136561149603598, 0.0031303653148729107], [0.002018211519793561, 0.003146027064949554], [0.0020227432064268357, 0.0031617095813261313], [0.0020272516694945634, 0.003177413191566421], [0.0020317374020032944, 0.0031931382278198197], [0.0020362008952723117, 0.0032088850267311727], [0.0020406426388771466, 0.0032246539293521105], [0.002045063120595933, 0.0032404452810537725], [0.002049462826358647, 0.0032562594314406923], [0.00205384224019913, 0.0032720967342659444], [0.002058201844209906, 0.003287957547347296], [0.002062542118499728, 0.0033038422324843707], [0.002066863541153808, 0.0033197511553766943], [0.0020711665881966946, 0.0033356846855425656], [0.0020754517335577396, 0.003351643196238663], [0.0020797194490391057, 0.0033676270643802518], [0.002083970204286235, 0.0033836366704620515], [0.002088204466760761, 0.003399672398479515], [0.0020924227017157433, 0.0034157346358505767], [0.0020966253721732215, 0.003431823773337752], [0.0021008129389039577, 0.0034479402049704716], [0.0021049858604093342, 0.0034640843279676843], [0.0021091445929053232, 0.0034802565426605417], [0.0021132895903084527, 0.003496457252415226], [0.0021174213042236528, 0.0035126868635556737], [0.0021215401839339712, 0.003528945785286369], [0.002125646676391995, 0.00354523442961492], [0.0021297412262129456, 0.0035615532112745186], [0.002133824275669347, 0.00357790254764607], [0.0021378962646871474, 0.003594282858680137], [0.0021419576308432365, 0.003610694566818363], [0.0021460088093642558, 0.003627138096914653], [0.002150050233126582, 0.003643613876155697], [0.002154082332657443, 0.0036601223339810694], [0.0021581055361369977, 0.0036766639020027005], [0.0021621202694013285, 0.0036932390139236514], [0.0021661269559462466, 0.0037098481054562895], [0.0021701260169317894, 0.003726491614239563], [0.002174117871187324, 0.0037431699797555932], [0.0021781029352171515, 0.0037598836432453132], [0.0021820816232065265, 0.0037766330476232263], [0.0021860543470279595, 0.0037934186373911826], [0.002190021516247749, 0.0038102408585511584], [0.002193983538132606, 0.0038271001585168936], [0.0021979408176562884, 0.003843996986024524], [0.0022018937575061484, 0.0038609317910419383], [0.0022058427580895048, 0.0038779050246769663], [0.002209788217539725, 0.0038949171390842974], [0.0022137305317219465, 0.003911968587371091], [0.002217670094238327, 0.0039290598235012155], [0.0022216072964327377, 0.003946191302198092], [0.002225542527394841, 0.0039633634788460706], [0.0022294761739634085, 0.003980576809390355], [0.0022334086207288447, 0.0039978317502353386], [0.0022373402500348184, 0.004015128758141401], [0.002241271441978918, 0.004032468290120044], [0.0022452025744122447, 0.004049850803327413], [0.0022491340229379045, 0.004067276754956062], [0.002253066160908286, 0.0040847466021249985], [0.0022569993594210693, 0.004102260801767939], [0.00226093398731391, 0.004119819810519758], [0.0022648704111577263, 0.004137424084601017], [0.0022688089952485196, 0.004155074079700713], [0.002272750101597681, 0.004172770250856938], [0.0022766940899207363, 0.00419051305233571], [0.002280641317624457, 0.004208302937507712], [0.0022845921397922925, 0.004226140358723033], [0.002288546909168105, 0.004244025767183831], [0.0022925059761381293, 0.004261959612814915], [0.0022964696887111365, 0.004279942344132131], [0.0023004383924967553, 0.004297974408108706], [0.0023044124306819376, 0.004316056250039291], [0.0023083921440055144, 0.004334188313401842], [0.0023123778707308316, 0.0043523710397172805], [0.0023163699466164206, 0.0043706048684068475], [0.0023203687048847367, 0.004388890236647192], [0.002324374476188852, 0.0044072275792231734], [0.002328387588577205, 0.004425617328378256], [0.002332408367456302, 0.00444405991366268], [0.002336437135551399, 0.004462555761779136], [0.002340474212865165, 0.004481105296426132], [0.002344519916634313, 0.004499708938138957], [0.0023485745612841953, 0.004518367104128216], [0.0023526384583813687, 0.004537080208115914], [0.002356711916584148, 0.0045558486601691706], [0.0023607952415911305, 0.004574672866531417], [0.0023648887360877228, 0.004593553229451196], [0.0023689926996906716, 0.004612490147008489], [0.002373107428890626, 0.0046314840129385105], [0.0023772332169927127, 0.004650535216453183], [0.002381370354055213, 0.00466964414206], [0.0023855191268262756, 0.00468881116937846], [0.002389679818678771, 0.004708036672954117], [0.0023938527095432355, 0.004727321022070043], [0.00239803807583901, 0.004746664580555933], [0.0024022361904035426, 0.004766067706594688], [0.002406447322419903, 0.00478553075252663], [0.00241067173734256, 0.004805054064651225], [0.0024149096968214294, 0.0048246379830264126], [0.002419161458624243, 0.004844282841265573], [0.002423427276557266, 0.004863988966332031], [0.0024277074003844113, 0.004883756678331269], [0.002432002075744773, 0.004903586290300732], [0.0024363115440686556, 0.004923478107997383], [0.002440636042492077, 0.004943432429682883], [0.0024449758037698543, 0.00496344954590655], [0.0024493310561872718, 0.004983529739286014], [0.0024537020234704065, 0.005003673284285789], [0.0024580889246951163, 0.005023880446993503], [0.002462491974194787, 0.005044151484894066], [0.002466911381466829, 0.0050644866466417445], [0.002471347351078043, 0.005084886171830108], [0.002475800082568811, 0.005105350290759991], [0.002480269770356263, 0.005125879224205454], [0.0024847566036363747, 0.005146473183177764], [0.0024892607662851274, 0.005167132368687558], [0.0024937824367587164, 0.005187856971505075], [0.0024983217879929066, 0.005208647171918635], [0.002502878987301572, 0.005229503139491423], [0.00250745419627445, 0.0052504250328164375], [0.002512047570674216, 0.005271412999270007], [0.0025166592603328656, 0.005292467174763562], [0.002521289409047543, 0.005313587683494025], [0.0025259381544757834, 0.005334774637692681], [0.002530605628030294, 0.005356028137372697], [0.0025352919547732976, 0.005377348270075373], [0.0025399972533105206, 0.005398735110615056], [0.002544721635684845, 0.005420188720823024], [0.0025494652072697325, 0.005441709149290172], [0.002554228066662461, 0.005463296431108753], [0.002559010305577223, 0.005484950587613131], [0.002563812008738162, 0.005506671626119766], [0.0025686332537724317, 0.005528459539666374], [0.0025734741111032786, 0.005550314306750434], [0.0025783346438433175, 0.005572235891067092], [0.0025832149076879425, 0.005594224241246602], [0.002588114950809057, 0.005616279290591347], [0.0025930348137491147, 0.0056384009568125615], [0.0025979745293155743, 0.005660589141766809], [0.002602934122475831, 0.005682843731192474], [0.002607913610252721, 0.005705164594446077], [0.002612913001620622, 0.005727551584238853], [0.0026179322974022668, 0.005750004536373475], [0.0026229714901663545, 0.005772523269481117], [0.002628030564125997, 0.005795107584758912], [0.0026331094950380945, 0.00581775726570805], [0.002638208250103759, 0.0058404720778724864], [0.002643326787869787, 0.005863251768578484], [0.002648465058131364, 0.0058860960666750175], [0.0026536230018359866, 0.00590900468227531], [0.002658800550988757, 0.005931977306499468], [0.0026639976285591067, 0.005955013611218456], [0.0026692141483890316, 0.005978113248799521], [0.0026744500151029385, 0.006001275851853173], [0.0026797051240192004, 0.0060245010329819], [0.0026849793610634813, 0.006047788384530743], [0.0026902726026839423, 0.006071137478339861], [0.0026955847157684207, 0.006094547865499277], [0.0027009155575636696, 0.006118019076105893], [0.0027062649755967313, 0.006141550619023013], [0.0027116328075985773, 0.006165141981642434], [0.0027170188814300816, 0.0061887926296493095], [0.0027224230150104177, 0.006212502006790031], [0.0027278450162479717, 0.0062362695346431015], [0.002733284682973905, 0.0062600946123933425], [0.0027387418028783873, 0.006283976616609525], [0.002744216153449669, 0.006307914901025608], [0.002749707501916029, 0.006331908796325816], [0.0027552156051907084, 0.006355957609933584], [0.002760740209819949, 0.0063800606258047595], [0.0027662810519341746, 0.0064042171042251045], [0.002771837857202434, 0.006428426281612287], [0.0027774103407902243, 0.006452687370322627], [0.0027829982073206906, 0.006476999558462748], [0.002788601150839403, 0.006501362009706332], [0.0027942188547827, 0.006525773863116187], [0.002799850991949728, 0.006550234232971763], [0.0028054972244782395, 0.006574742208602484], [0.0028111572038242425, 0.006599296854226912], [0.002816830570745557, 0.006623897208798031], [0.0028225169552893617, 0.006648542285854994], [0.002828215976783803, 0.00667323107338124], [0.002833927243833722, 0.006697962533669548], [0.0028396503543205768, 0.006722735603193962], [0.002845384895406613, 0.006747549192489009], [0.002851130443543345, 0.006772402186036285], [0.002856886564484383, 0.006797293442158781], [0.0028626528133026993, 0.006822221792922986], [0.002868428734412329, 0.00684718604404922], [0.002874213861594579, 0.006872184974830269], [0.0028800077180287957, 0.006897217338058531], [0.0028858098163276804, 0.006922281859962103], [0.0028916196585772355, 0.006947377240149799], [0.002897436736381345, 0.006972502151565546], [0.0029032605309109905, 0.0069976552404521905], [0.0029090905129581864, 0.007022835126325175], [0.0029149261429945754, 0.007048040401956119], [0.0029207668712347323, 0.0070732696333666465], [0.0029266121377042017, 0.007098521359832755], [0.0029324613723122235, 0.007123794093899752], [0.0029383139949291925, 0.007149086321408257], [0.0029441694154688112, 0.0071743965015313126], [0.0029500270339749505, 0.007199723066822926], [0.002955886240713191, 0.007225064423278256], [0.002961746416267046, 0.007250418950405634], [0.0029676069316388162, 0.007275785001310789], [0.002973467148355095, 0.007301160902793266], [0.00297932641857685, 0.007326544955455544], [0.002985184085214092, 0.0073519354338248254], [0.0029910394820450784, 0.007377330586487867], [0.0029968919338400238, 0.007402728636239055], [0.0030027407564892613, 0.0074281277802418205], [0.0030085852571358606, 0.007453526190203774], [0.003014424734312599, 0.007478922012565629], [0.0030202584780832896, 0.007504313368704147], [0.003026085770188423, 0.007529698355149343], [0.003031905884195024, 0.007555075043816072], [0.0030377180856507547, 0.007580441482250205], [0.0030435216322421445, 0.007605795693889583], [0.0030493157739569453, 0.007631135678339849], [0.0030550997532505586, 0.007656459411665481], [0.0030608728052164543, 0.0076817648466958955], [0.0030666341577606053, 0.007707049913347047], [0.003072383031779785, 0.007732312518958536], [0.0030781186413438083, 0.007757550548646268], [0.0030838401938815666, 0.007782761865670929], [0.003089546890370893, 0.007807944311822333], [0.003095237925532164, 0.007833095707819617], [0.0031009124880256394, 0.007858213853727558], [0.0031065697606524763, 0.007883296529388973], [0.003112208920559412, 0.007908341494873227], [0.003117829139447088, 0.007933346490940978], [0.0031234295837819443, 0.007958309239525193], [0.0031290094150117314, 0.007983227444228353], [0.0031345677897845654, 0.008008098790835989], [0.0031401038601715564, 0.008032920947846427], [0.003145616773892945, 0.008057691567016832], [0.003151105674547794, 0.008082408283925395], [0.0031565697018472124, 0.008107068718549694], [0.0031620079918510805, 0.008131670475861174], [0.0031674196772083587, 0.008156211146435586], [0.0031728038874008703, 0.00818068830707935], [0.0031781597489907083, 0.008205099521471734], [0.0031834863858711568, 0.00822944234082266], [0.003188782919521227, 0.008253714304546031], [0.0031940484692637937, 0.008277912940948463], [0.003199282152527358, 0.008302035767933084], [0.003204483085111488, 0.008326080293718377], [0.0032096503814559156, 0.008350044017571764], [0.00321478315491339, 0.00837392443055768], [0.0032198805180262396, 0.008397719016299926], [0.003224941582806784, 0.008421425251757972], [0.0032299654610214766, 0.008445040608017042], [0.003234951264478985, 0.00846856255109149], [0.003239898105322112, 0.008491988542741325], [0.0032448050963236478, 0.008515316041301367], [0.0032496713511861865, 0.008538542502522846], [0.003254495984845906, 0.008561665380426933], [0.003259278113780387, 0.008584682128169829], [0.0032640168563204284, 0.008607590198918996], [0.0032687113329659496, 0.008630387046740198], [0.003273360666705946, 0.00865307012749472], [0.0032779639833425303, 0.008675636899746382], [0.0032825204118190417, 0.008698084825677976], [0.003287029084552262, 0.008720411372016288], [0.0032914891377686735, 0.0087426140109657], [0.003295899711844795, 0.008764690221149226], [0.0033002599516515174, 0.008786637488556974], [0.003304569006902456, 0.008808453307501136], [0.003308826032506215, 0.008830135181577006], [0.0033130301889225614, 0.008851680624629514], [0.0033171806425224127, 0.008873087161724488], [0.0033212765659515527, 0.00889435233012424], [0.003325317138497986, 0.008915473680266628], [0.0033293015464628613, 0.008936448776747088], [0.0033332289835347776, 0.008957275199302915], [0.003337098651167397, 0.008977950543799138], [0.00334090975896019, 0.0089984724232153], [0.003344661525042164, 0.009018838468632452], [0.0033483531764583814, 0.00903904633021964], [0.0033519839495590852, 0.009059093678219316], [0.0033555530903912312, 0.00907897820393063], [0.003359059855092182, 0.009098697620690337], [0.0033625035102853467, 0.009118249664850217], [0.003365883333477502, 0.009137632096750422], [0.0033691986134575026, 0.009156842701688046], [0.0033724486506961386, 0.009175879290880175], [0.0033756327577467954, 0.009194739702420565], [0.003378750259646627, 0.009213421802229387], [0.0033818004943178886, 0.009231923484995163], [0.003384782812969105, 0.009250242675108208], [0.0033876965804956857, 0.009268377327584836], [0.003390541175879653, 0.009286325428981633], [0.0033933159925880257, 0.00930408499829904], [0.0033960204389695586, 0.009321654087873438], [0.0033986539386493208, 0.009339030784257189], [0.0034012159309207645, 0.009356213209085801], [0.0034037058711348213, 0.009373199519931572], [0.0034061232310855644, 0.009389987911142941], [0.003408467499392047, 0.009406576614668954], [0.003410738181875776, 0.009422963900868135], [0.003412934801933416, 0.009439148079301062], [0.0034150569009042436, 0.00945512749950608], [0.00341710403843184, 0.009470900551757401], [0.00341907579281959, 0.009486465667805097], [0.003420971761379469, 0.009501821321596259], [0.0034227915607736494, 0.009516966029976797], [0.003424534827348476, 0.00953189835337327], [0.0034262012174602332, 0.00954661689645427], [0.0034277904077923576, 0.009561120308770606], [0.0034293020956635053, 0.009575407285374095], [0.0034307359993260692, 0.009589476567414058], [0.003432091858254652, 0.009603326942711399], [0.0034333694334240427, 0.00961695724630957], [0.003434568507576292, 0.009630366361002016]]
--------------------------------------------------------------------------------
/fig/overfitting.json:
--------------------------------------------------------------------------------
1 | [[2.0762772323329082, 1.8232334122685845, 1.6640751933146665, 1.4913409287162824, 1.4626645665352562, 1.5608730982986192, 1.3270934349008427, 1.3031689400520545, 1.2737198013316875, 1.2353298430277617, 1.2781249875365142, 1.2587094591590358, 1.2236320447498565, 1.2049258878595992, 1.202838510821453, 1.2175903284804579, 1.2166038163981336, 1.2302002518540471, 1.2284292747614989, 1.2284082512336671, 1.2206853894877705, 1.21982789310683, 1.2416121174277031, 1.2386867792565612, 1.2590040086618466, 1.2442300811597213, 1.256214615756384, 1.2688359032682412, 1.262880085921275, 1.2580241299014177, 1.2715941639378459, 1.2704272355044199, 1.2713173651241083, 1.2883075453227311, 1.309478393757302, 1.2884464353157816, 1.2992864292684581, 1.2995723853510095, 1.3057065520137037, 1.2996067919082652, 1.3113694262185569, 1.3180980499575814, 1.3224531791316712, 1.3288895530170624, 1.333896011747612, 1.3342655386450013, 1.3507230411896862, 1.349523874760193, 1.3486221834113297, 1.3492461107256304, 1.371103940789913, 1.363742107503537, 1.3591970586051429, 1.3628600849625045, 1.3715455620910941, 1.3762306334690999, 1.374571326678441, 1.3797013054519787, 1.3852744476067012, 1.3910542555139365, 1.3898246384066992, 1.3990962884350675, 1.4024643563307768, 1.4090064214871054, 1.4084778553386963, 1.4088023031568424, 1.4191800292184851, 1.4231642276020737, 1.4249505527888344, 1.4243906063296523, 1.4329048311102033, 1.4324957023669891, 1.4393786314154042, 1.4422194928893282, 1.4464979740530604, 1.4456638780161708, 1.4510268869602028, 1.4569921654272227, 1.4501932854980157, 1.4586286646321187, 1.4641932180653843, 1.4627134909477864, 1.4695342388383457, 1.4677910970476582, 1.4741781001557179, 1.4716445971833882, 1.4801857605543194, 1.4824209608683785, 1.4850739585015795, 1.4862526009107158, 1.4891994206257972, 1.4911449111215642, 1.4895703546607124, 1.4965483049082178, 1.498475574872792, 1.4983442080951213, 1.5006331103383848, 1.5026762910773346, 1.5056581200232744, 1.5052999301275902, 1.5101971277214103, 1.5052100436149518, 1.5122319812343581, 1.5140032252405993, 1.5124829079283069, 1.5155523493173086, 1.5173126087446009, 1.5182757080707934, 1.5226359219963441, 1.5219631288706079, 1.5280382373274002, 1.5305864446018889, 1.5308400057540297, 1.5298912015444006, 1.5299337876516403, 1.5359447484558009, 1.5373272763323333, 1.5357721830268081, 1.5411296193695012, 1.5408039410712637, 1.5435982660843079, 1.5448396233143082, 1.54640053335383, 1.5497924569986792, 1.5505113782392206, 1.5510873904758762, 1.5522372758736236, 1.5551534205865707, 1.5579037629164372, 1.5562529728398988, 1.557078203694122, 1.5605381965411875, 1.5615119183693009, 1.5620335052988363, 1.5636399289779603, 1.5666556667329066, 1.565967137736715, 1.5692899251189862, 1.569455366800145, 1.570201924893627, 1.5724338032777558, 1.5743182022210231, 1.5779052215415215, 1.5765908208317501, 1.5777906843645095, 1.5807065832710021, 1.5826042798570108, 1.5834282733757874, 1.5828727942783614, 1.5856236197268949, 1.5865253156344346, 1.5870711209440933, 1.5901102159916298, 1.589831374782438, 1.5903365893797863, 1.5922289915737446, 1.5937242093943276, 1.5957483404630928, 1.5961125976163055, 1.5970150572826043, 1.5978109963232512, 1.5992551729862305, 1.6002494658191888, 1.6011359241282295, 1.6026033918901981, 1.6036086580112265, 1.6048749030222984, 1.6074694961111247, 1.606561408369042, 1.6080270858679979, 1.6070173269387908, 1.6073846856343872, 1.6108499262729024, 1.6142347957554661, 1.6123239687358804, 1.6161556699815407, 1.6165460098238449, 1.6175561633900144, 1.6189398493899978, 1.6196154264250775, 1.6203787165288632, 1.621001492936847, 1.6202434861259687, 1.6245124200435654, 1.6239635265755512, 1.6241046070201524, 1.6252458899999425, 1.6270324298456853, 1.6251838072169713, 1.6275992851310326, 1.6284252702633855, 1.63017431200309, 1.6310654169645247, 1.6317526370318307, 1.6338220017737439, 1.6337496047773168, 1.6341251344927106, 1.6352945051866614, 1.6350194166439092, 1.6370499538617151, 1.6398726421890257, 1.6392502241532569, 1.6407454538992294, 1.6403690089576106, 1.6407818413256172, 1.6422394672335026, 1.6440166122111106, 1.6445019840290256, 1.6454238395620735, 1.6446012999992357, 1.646388125891822, 1.647477767725033, 1.6489903681956257, 1.6495670111667955, 1.6518317266949349, 1.6521411302385651, 1.6516324900159436, 1.6519268509016449, 1.6536769664130893, 1.6557231011758236, 1.6556135178242031, 1.6563575299485291, 1.6585034371713305, 1.6590068125468771, 1.6594819047820268, 1.6602894618927027, 1.6610971710252704, 1.6614742412028516, 1.662062758010538, 1.6618888908042855, 1.664003604166757, 1.6650773987115881, 1.6655027680401031, 1.6669239090330996, 1.6670229476327978, 1.6673755478034697, 1.668631108043269, 1.6691187845751598, 1.6698574553969809, 1.6699987129628646, 1.6718587417171702, 1.6728186780767957, 1.6735770871095164, 1.6751317162887345, 1.6754619738035605, 1.6757497694666139, 1.6767453551142881, 1.6773935555529487, 1.6790329446692798, 1.6796379042981611, 1.6798597028202431, 1.6814275113531045, 1.681540584668908, 1.6825376023031897, 1.6838370704483998, 1.6839490568545408, 1.6849619150773361, 1.6856698652111073, 1.6863000466757747, 1.6871126637371965, 1.6880316020404877, 1.6879506729766618, 1.6894802944100824, 1.6905853787788423, 1.6917216848211414, 1.6926191212187904, 1.6929352076880684, 1.6942338256895795, 1.6948350819305742, 1.6947248010331575, 1.6956565470999065, 1.6966226855434137, 1.6977135512214465, 1.6976628333622414, 1.6992728088551838, 1.6995188736719555, 1.7004758991163513, 1.7003390074918037, 1.701757593590616, 1.7030610036769165, 1.7032572445905845, 1.70424379410192, 1.7050439989771855, 1.7050460437739656, 1.705340680240933, 1.7063172030736129, 1.7074694863569662, 1.7081060241444701, 1.7083152915972599, 1.7100598915164169, 1.7098580352207235, 1.7110452055463516, 1.7118315825579393, 1.7119458704259569, 1.7128193121191575, 1.714088357669219, 1.7143137710846792, 1.7151310218423155, 1.7158160290566882, 1.7164824543799349, 1.7170992986317428, 1.7176374854282062, 1.7180675777622618, 1.7183529546663991, 1.7183913489385712, 1.7200745183529782, 1.7201860622786533, 1.7209378765189278, 1.7211755130028632, 1.7227001954459273, 1.7233033322709161, 1.724388593301251, 1.7249286817001739, 1.7257212014285681, 1.7264759882752161, 1.7263145509431113, 1.727169750737453, 1.7274545949025009, 1.728138187570482, 1.7284329943827041, 1.7291647307556921, 1.7297691124388797, 1.7303999198392592, 1.7309397188198092, 1.731884218614588, 1.732752372009305, 1.7337895563134313, 1.7338046236202502, 1.7343991319697829, 1.7354045120011685, 1.7359328966782865, 1.7363186919712537, 1.7368728425169133, 1.7376556264901872, 1.738218355695242, 1.7389320558428096, 1.7392586695357521, 1.7395726489260961, 1.7403457853492119, 1.7411144686251934, 1.7418114756639416, 1.7423787115928511, 1.7429156859372819, 1.7433516620794796, 1.7445995405595869, 1.7449650517928348, 1.7454219936222521, 1.7454049499805062, 1.7461754045631253, 1.747238770079671, 1.7480364894800848, 1.7481891743633657, 1.7484612615979531, 1.7492265370334927, 1.7499314246477431, 1.7503425435026281, 1.7509597451899421, 1.7513546402678131, 1.7521155770124217, 1.7527284609234106, 1.7529769148484364, 1.7538672981186787, 1.7544452588346211, 1.7549061780496615, 1.7553447817113197, 1.755809458463981, 1.7558520671233728, 1.7568602710475358, 1.7568136319142174, 1.7575653625849685, 1.758205144851257, 1.7587151972026469, 1.7591737337097375, 1.7594811212041248, 1.7599224471680641, 1.7604958546917258, 1.7614357150479159, 1.7620773477904375, 1.7629340224321914, 1.7634360517269456, 1.7634779671556928, 1.7642836857118194, 1.7646825015144432, 1.7652084365396346, 1.765550476840142, 1.766323427364384, 1.7671269295963092, 1.7674831990461801, 1.7679902398030436, 1.7688556765701444, 1.7693755350034828, 1.7691087563919485, 1.7699593793502248, 1.7702219339149627, 1.7709157134395872, 1.7709574156060244, 1.7720375325001132, 1.7722910641140253, 1.7728105919575348, 1.7731493757222807], [5887, 6505, 6970, 7271, 7433, 7198, 7710, 7747, 7850, 7899, 7853, 7848, 7986, 8020, 8046, 8039, 8056, 8090, 8090, 8107, 8086, 8126, 8104, 8107, 8116, 8121, 8128, 8121, 8135, 8126, 8137, 8149, 8146, 8124, 8118, 8146, 8134, 8156, 8148, 8165, 8170, 8146, 8157, 8157, 8156, 8161, 8134, 8156, 8166, 8145, 8140, 8154, 8156, 8147, 8144, 8147, 8144, 8156, 8154, 8157, 8148, 8137, 8144, 8145, 8148, 8149, 8147, 8152, 8154, 8152, 8136, 8151, 8145, 8152, 8150, 8155, 8152, 8147, 8159, 8148, 8164, 8160, 8153, 8149, 8153, 8158, 8153, 8160, 8154, 8165, 8158, 8155, 8159, 8164, 8170, 8176, 8174, 8176, 8165, 8173, 8163, 8189, 8181, 8175, 8180, 8185, 8177, 8179, 8184, 8178, 8185, 8177, 8182, 8194, 8189, 8174, 8184, 8188, 8180, 8192, 8180, 8181, 8197, 8183, 8184, 8194, 8186, 8188, 8189, 8195, 8192, 8184, 8194, 8197, 8197, 8185, 8196, 8192, 8200, 8200, 8199, 8197, 8191, 8196, 8193, 8193, 8192, 8194, 8201, 8195, 8196, 8195, 8196, 8197, 8190, 8196, 8197, 8197, 8193, 8190, 8195, 8196, 8199, 8195, 8191, 8192, 8187, 8189, 8192, 8193, 8189, 8192, 8194, 8187, 8189, 8193, 8197, 8193, 8194, 8195, 8197, 8191, 8198, 8196, 8196, 8196, 8194, 8192, 8196, 8192, 8193, 8192, 8197, 8195, 8191, 8192, 8191, 8194, 8191, 8190, 8192, 8191, 8195, 8190, 8197, 8193, 8194, 8191, 8196, 8192, 8192, 8198, 8199, 8199, 8200, 8199, 8195, 8196, 8199, 8191, 8194, 8199, 8205, 8195, 8204, 8206, 8207, 8208, 8204, 8203, 8203, 8207, 8207, 8200, 8206, 8206, 8203, 8203, 8206, 8210, 8208, 8207, 8208, 8208, 8209, 8209, 8208, 8211, 8210, 8209, 8208, 8213, 8205, 8207, 8211, 8210, 8213, 8212, 8213, 8211, 8216, 8213, 8214, 8214, 8216, 8214, 8218, 8218, 8218, 8217, 8220, 8217, 8221, 8217, 8217, 8218, 8217, 8217, 8218, 8223, 8221, 8223, 8222, 8226, 8218, 8221, 8217, 8219, 8218, 8221, 8219, 8218, 8221, 8220, 8220, 8214, 8220, 8218, 8218, 8220, 8221, 8220, 8222, 8220, 8223, 8220, 8221, 8223, 8219, 8217, 8221, 8217, 8218, 8220, 8221, 8220, 8219, 8221, 8219, 8219, 8223, 8223, 8222, 8221, 8221, 8221, 8219, 8220, 8220, 8221, 8222, 8222, 8221, 8222, 8222, 8223, 8222, 8222, 8222, 8220, 8221, 8219, 8220, 8219, 8219, 8219, 8220, 8222, 8220, 8218, 8218, 8220, 8222, 8221, 8220, 8221, 8219, 8222, 8220, 8218, 8221, 8220, 8221, 8223, 8224, 8223, 8225, 8224, 8223, 8223, 8224, 8226, 8223, 8226, 8227, 8223, 8222, 8222, 8223, 8222, 8222, 8223, 8221, 8222, 8221, 8222, 8223, 8222, 8223, 8221, 8221, 8222, 8221, 8220, 8223, 8223, 8221, 8220, 8220, 8223], [1.8433647860328504, 1.4777434227600235, 1.2072861295975754, 1.0133122929166287, 0.90463155805724549, 0.88914085245876628, 0.69510736399672024, 0.60063282069902524, 0.54091603110413877, 0.47790622727830795, 0.45205834534806816, 0.41823557259928568, 0.36353886658117263, 0.34097725702984655, 0.30704391068438625, 0.28505238206157008, 0.27569888987164376, 0.24768957063883623, 0.23176895744869463, 0.21675013886039948, 0.20851633183590543, 0.19581288303288452, 0.1883450862028723, 0.17598212888313519, 0.17148361118265443, 0.1579893219777749, 0.15232727768047913, 0.14908652369052086, 0.13959561541748497, 0.13575056473237712, 0.13038402753584682, 0.12633393160050335, 0.12301377811155474, 0.11760787528185435, 0.11768063532050554, 0.1116090975652263, 0.10594482960527374, 0.10146350864471576, 0.098950522532661414, 0.097470601335692755, 0.092587258119849816, 0.08890875709297294, 0.087334066191656873, 0.083968079007007201, 0.080425567454585997, 0.077388473929440482, 0.075732073394689639, 0.073264736306067349, 0.071041388148701201, 0.069457129692052144, 0.068501170653270813, 0.066032566370952353, 0.063988473365526183, 0.062795927261155279, 0.060741640047227284, 0.059231989932508271, 0.05768849915339639, 0.05665547621362698, 0.054958317927951077, 0.053790448746231824, 0.052736286230260894, 0.051837510403181575, 0.050246365862518169, 0.049157161612436856, 0.048399496486416158, 0.047223244213956135, 0.0462029122464475, 0.045362346946900868, 0.044991942955692463, 0.043548301006926415, 0.042619315582575576, 0.041872679888486676, 0.041353003773040818, 0.040343023072558423, 0.039610640543463034, 0.038827894599382441, 0.038165321713376478, 0.037555764670128743, 0.036994307367393418, 0.036209725814626531, 0.035681364944148621, 0.035011174581168515, 0.034291592495846304, 0.033547709156373326, 0.032718263569078379, 0.032023904582162074, 0.031191659853705877, 0.030626708962014425, 0.029958425643681459, 0.029353265126344338, 0.028797964387740602, 0.028422012718391305, 0.027812191605286883, 0.02735501112071697, 0.026993564391853634, 0.026569113029157501, 0.026092064055110505, 0.025674781762346367, 0.025305894007476161, 0.024889083317963079, 0.024558131841685489, 0.024199329722505512, 0.023811240945016011, 0.02349334595548817, 0.023144448162318861, 0.022855591970993343, 0.022543488743196019, 0.022249708906225588, 0.021965529914739868, 0.021698026254864673, 0.021378123124622207, 0.021088144829246657, 0.020886774671724655, 0.020580654801762799, 0.020340928401971565, 0.020106832060966386, 0.019866100581400464, 0.019595427663293804, 0.019357863817740469, 0.019161344170354123, 0.018904348055376753, 0.018717446712462619, 0.018465695920465829, 0.018272597088603904, 0.018074038569034587, 0.017902496111286439, 0.017688582191512987, 0.017482922233189643, 0.017310611695394759, 0.017115500371685799, 0.016952389472265048, 0.016780096680463093, 0.016597920851012388, 0.016464051880113292, 0.016282898613349604, 0.016136333558525773, 0.015962599748449359, 0.015804226601263859, 0.01564233479142425, 0.015498832345390763, 0.015356876162963705, 0.01521854563645951, 0.015073417138070878, 0.014934450228727312, 0.014825699216383425, 0.014665671908121175, 0.014541603837007016, 0.014406468550218309, 0.014283810530787627, 0.014161122679261126, 0.014032574364961678, 0.013910840101390157, 0.013798885792050663, 0.013687896782864042, 0.013560391660895389, 0.013438577986090204, 0.013334228955099942, 0.013211877601969076, 0.013097885924483273, 0.012989409068889089, 0.012879749073942919, 0.012764357708175841, 0.012655707745460402, 0.012552080081974131, 0.012437201020647024, 0.012337374519593609, 0.012235799102028641, 0.012145033948497589, 0.012041173192129051, 0.011950396057231671, 0.011858981944298361, 0.011775080348835348, 0.011680489496468773, 0.011599470483943787, 0.011508039313397542, 0.011428340295649663, 0.011349428130039004, 0.011268838312831554, 0.011189912165110935, 0.011109723837424776, 0.011034560780687212, 0.010957160107627907, 0.010882577878407575, 0.010811098528082833, 0.010735442936960134, 0.010665821648065388, 0.010595437817822002, 0.010527971065975287, 0.010460804790718484, 0.010390628071234642, 0.010324849341451231, 0.010260426245422116, 0.01019672721940704, 0.010130572405193659, 0.010072420053693198, 0.010006628636997746, 0.0099438229901934062, 0.0098844582257840465, 0.0098246362024759087, 0.0097663800115457729, 0.0097127946154311525, 0.0096488779061362981, 0.0095934368030428964, 0.0095357715395345787, 0.0094828105181004665, 0.0094248109827411788, 0.0093706712239598373, 0.0093193323548417979, 0.0092640297508175557, 0.0092140979255875208, 0.0091599683295718951, 0.0091085532532171488, 0.0090566144373361267, 0.0090078660753137273, 0.0089574895292539591, 0.0089102414421480258, 0.0088602907747430508, 0.0088130951045928482, 0.0087627473899933809, 0.0087169057599027176, 0.008669602206522074, 0.0086245638096965112, 0.008578829590782322, 0.0085337682055181534, 0.0084904627352943417, 0.0084437487803313786, 0.0084005496486743488, 0.0083564344772356029, 0.0083141617581171147, 0.0082711428471498073, 0.0082280554148385712, 0.0081861058590978232, 0.0081453445960328139, 0.0081050739787153214, 0.0080629964697627073, 0.0080227532574564074, 0.0079824692727665826, 0.0079430728092899933, 0.0079037010818164485, 0.007866500840720405, 0.0078262183123015989, 0.0077880045569414854, 0.0077501375585374184, 0.0077130927824958879, 0.0076749447769945724, 0.0076383206771078876, 0.0076037234598087793, 0.0075656978732744681, 0.0075306312245213185, 0.0074933959545258758, 0.007457863479961753, 0.0074224044906936416, 0.0073875427008978636, 0.0073533040575986478, 0.0073188636048639828, 0.0072851975374219257, 0.007249551514341885, 0.0072155976990766037, 0.0071837716588880659, 0.0071490490354368446, 0.0071149465640242852, 0.0070813394799004378, 0.007047562934784808, 0.0070148336131211995, 0.0069818923170584749, 0.0069501733149712605, 0.0069160116872818119, 0.0068840401735022448, 0.0068514561156714704, 0.0068196243684198622, 0.0067863217789309715, 0.0067534474287550073, 0.0067200631321686754, 0.0066885181859545314, 0.0066568666894365278, 0.0066240210906374454, 0.0065926113197570003, 0.0065626818950437473, 0.0065315628190666742, 0.0065011591385262619, 0.0064707220819024717, 0.0064414830189593493, 0.006412153696041689, 0.0063833172680227371, 0.0063550710682834894, 0.0063263283024680974, 0.006298472160087395, 0.0062705864326000225, 0.0062432859687904735, 0.0062170035873869919, 0.0061896468954568112, 0.0061633789845077242, 0.0061370029472117751, 0.0061113940805202389, 0.006084934659958091, 0.0060594838733285623, 0.0060344559813401231, 0.0060094203492223955, 0.0059846013866920586, 0.0059595844553596081, 0.0059354063125516329, 0.0059106299326143925, 0.0058865500049247376, 0.0058629576783263884, 0.0058398259656488255, 0.0058157681197817893, 0.0057925894220656601, 0.005769719671300088, 0.0057465133779847615, 0.005723885452264303, 0.0057011088426493122, 0.0056788750789765873, 0.0056565840835991754, 0.0056345091063197805, 0.0056126545497789098, 0.0055907478836642096, 0.0055695879730558121, 0.0055480499521874576, 0.0055269393518896613, 0.0055055760390316528, 0.0054846407395392127, 0.0054639492319568522, 0.0054430681469272582, 0.0054228564773156733, 0.0054023245806928445, 0.0053821589279841198, 0.0053622701718764919, 0.0053422962061517205, 0.0053225545608313832, 0.0053025727602387341, 0.0052830117459178542, 0.0052637900268743395, 0.0052445911331395157, 0.005225301628550106, 0.0052063734092377542, 0.0051873981768633098, 0.0051686550874710333, 0.0051501308303700411, 0.0051317159223978588, 0.0051132407038391343, 0.0050949646868208237, 0.0050768163679191418, 0.0050588920830417876, 0.0050410603343375689, 0.0050234460109072039, 0.0050058479424628429, 0.0049883551396970116, 0.0049709905024850117, 0.0049534380633703065, 0.0049362622084869086, 0.0049191825923840336, 0.0049019339002642093, 0.0048852638935989387, 0.0048685355903025033, 0.0048516783369022421, 0.004835176074289591, 0.0048187414702752064, 0.0048024676248850815, 0.004786205475115702, 0.0047700818966571806, 0.0047540229511325634, 0.0047381312460395321, 0.0047222819541272538, 0.004706582163886538, 0.0046908199271972883, 0.004675294500871013, 0.0046600271163220889, 0.004644560115930613, 0.0046292967748754361, 0.0046142583741101018, 0.0045990679936898007, 0.0045841369248796251, 0.0045692503625969364, 0.0045545855753471856, 0.0045398450796813317, 0.0045253266069212915, 0.0045109305522560966, 0.0044964925773122444, 0.0044821543135014023, 0.0044678460394820168, 0.0044536907146381774, 0.0044396992221706283, 0.0044257701614943187, 0.0044119841879101604, 0.0043980561122206668, 0.0043843449981956339, 0.0043706979320343045, 0.004357190360999688, 0.0043441270550987543, 0.0043307263717703463, 0.0043174162674548587, 0.004304214971579858, 0.0042909772791592301, 0.004277917818300953, 0.0042651151252712065, 0.0042522204909684596, 0.0042395626122975951, 0.0042267964052173168, 0.0042141543979121142, 0.0042016444290295845], [690, 749, 812, 848, 872, 865, 914, 932, 948, 949, 957, 966, 970, 973, 979, 977, 981, 982, 982, 985, 984, 988, 989, 990, 988, 990, 991, 991, 992, 992, 993, 993, 992, 992, 993, 994, 994, 995, 996, 995, 995, 996, 996, 996, 997, 997, 998, 998, 998, 998, 998, 998, 997, 997, 998, 998, 998, 998, 998, 998, 998, 998, 998, 998, 998, 999, 999, 1000, 1000, 1000, 1000, 1000, 999, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]]
--------------------------------------------------------------------------------
/fig/overfitting.py:
--------------------------------------------------------------------------------
1 | """
2 | overfitting
3 | ~~~~~~~~~~~
4 |
5 | Plot graphs to illustrate the problem of overfitting.
6 | """
7 |
8 | # Standard library
9 | import json
10 | import random
11 | import sys
12 |
13 | # My library
14 | sys.path.append('../src/')
15 | import mnist_loader
16 | import network2
17 |
18 | # Third-party libraries
19 | import matplotlib.pyplot as plt
20 | import numpy as np
21 |
22 |
23 | def main(filename, num_epochs,
24 | training_cost_xmin=200,
25 | test_accuracy_xmin=200,
26 | test_cost_xmin=0,
27 | training_accuracy_xmin=0,
28 | training_set_size=1000,
29 | lmbda=0.0):
30 | """``filename`` is the name of the file where the results will be
31 | stored. ``num_epochs`` is the number of epochs to train for.
32 | ``training_set_size`` is the number of images to train on.
33 | ``lmbda`` is the regularization parameter. The other parameters
34 | set the epochs at which to start plotting on the x axis.
35 | """
36 | run_network(filename, num_epochs, training_set_size, lmbda)
37 | make_plots(filename, num_epochs,
38 | test_accuracy_xmin,
39 | training_cost_xmin,
40 | test_accuracy_xmin,
41 | training_accuracy_xmin,
42 | training_set_size)
43 |
44 | def run_network(filename, num_epochs, training_set_size=1000, lmbda=0.0):
45 | """Train the network for ``num_epochs`` on ``training_set_size``
46 | images, and store the results in ``filename``. Those results can
47 | later be used by ``make_plots``. Note that the results are stored
48 | to disk in large part because it's convenient not to have to
49 | ``run_network`` each time we want to make a plot (it's slow).
50 |
51 | """
52 | # Make results more easily reproducible
53 | random.seed(12345678)
54 | np.random.seed(12345678)
55 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
56 | net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost())
57 | net.large_weight_initializer()
58 | test_cost, test_accuracy, training_cost, training_accuracy \
59 | = net.SGD(training_data[:training_set_size], num_epochs, 10, 0.5,
60 | evaluation_data=test_data, lmbda = lmbda,
61 | monitor_evaluation_cost=True,
62 | monitor_evaluation_accuracy=True,
63 | monitor_training_cost=True,
64 | monitor_training_accuracy=True)
65 | f = open(filename, "w")
66 | json.dump([test_cost, test_accuracy, training_cost, training_accuracy], f)
67 | f.close()
68 |
69 | def make_plots(filename, num_epochs,
70 | training_cost_xmin=200,
71 | test_accuracy_xmin=200,
72 | test_cost_xmin=0,
73 | training_accuracy_xmin=0,
74 | training_set_size=1000):
75 | """Load the results from ``filename``, and generate the corresponding
76 | plots. """
77 | f = open(filename, "r")
78 | test_cost, test_accuracy, training_cost, training_accuracy \
79 | = json.load(f)
80 | f.close()
81 | plot_training_cost(training_cost, num_epochs, training_cost_xmin)
82 | plot_test_accuracy(test_accuracy, num_epochs, test_accuracy_xmin)
83 | plot_test_cost(test_cost, num_epochs, test_cost_xmin)
84 | plot_training_accuracy(training_accuracy, num_epochs,
85 | training_accuracy_xmin, training_set_size)
86 | plot_overlay(test_accuracy, training_accuracy, num_epochs,
87 | min(test_accuracy_xmin, training_accuracy_xmin),
88 | training_set_size)
89 |
90 | def plot_training_cost(training_cost, num_epochs, training_cost_xmin):
91 | fig = plt.figure()
92 | ax = fig.add_subplot(111)
93 | ax.plot(np.arange(training_cost_xmin, num_epochs),
94 | training_cost[training_cost_xmin:num_epochs],
95 | color='#2A6EA6')
96 | ax.set_xlim([training_cost_xmin, num_epochs])
97 | ax.grid(True)
98 | ax.set_xlabel('Epoch')
99 | ax.set_title('Cost on the training data')
100 | plt.show()
101 |
102 | def plot_test_accuracy(test_accuracy, num_epochs, test_accuracy_xmin):
103 | fig = plt.figure()
104 | ax = fig.add_subplot(111)
105 | ax.plot(np.arange(test_accuracy_xmin, num_epochs),
106 | [accuracy/100.0
107 | for accuracy in test_accuracy[test_accuracy_xmin:num_epochs]],
108 | color='#2A6EA6')
109 | ax.set_xlim([test_accuracy_xmin, num_epochs])
110 | ax.grid(True)
111 | ax.set_xlabel('Epoch')
112 | ax.set_title('Accuracy (%) on the test data')
113 | plt.show()
114 |
115 | def plot_test_cost(test_cost, num_epochs, test_cost_xmin):
116 | fig = plt.figure()
117 | ax = fig.add_subplot(111)
118 | ax.plot(np.arange(test_cost_xmin, num_epochs),
119 | test_cost[test_cost_xmin:num_epochs],
120 | color='#2A6EA6')
121 | ax.set_xlim([test_cost_xmin, num_epochs])
122 | ax.grid(True)
123 | ax.set_xlabel('Epoch')
124 | ax.set_title('Cost on the test data')
125 | plt.show()
126 |
127 | def plot_training_accuracy(training_accuracy, num_epochs,
128 | training_accuracy_xmin, training_set_size):
129 | fig = plt.figure()
130 | ax = fig.add_subplot(111)
131 | ax.plot(np.arange(training_accuracy_xmin, num_epochs),
132 | [accuracy*100.0/training_set_size
133 | for accuracy in training_accuracy[training_accuracy_xmin:num_epochs]],
134 | color='#2A6EA6')
135 | ax.set_xlim([training_accuracy_xmin, num_epochs])
136 | ax.grid(True)
137 | ax.set_xlabel('Epoch')
138 | ax.set_title('Accuracy (%) on the training data')
139 | plt.show()
140 |
141 | def plot_overlay(test_accuracy, training_accuracy, num_epochs, xmin,
142 | training_set_size):
143 | fig = plt.figure()
144 | ax = fig.add_subplot(111)
145 | ax.plot(np.arange(xmin, num_epochs),
146 | [accuracy/100.0 for accuracy in test_accuracy],
147 | color='#2A6EA6',
148 | label="Accuracy on the test data")
149 | ax.plot(np.arange(xmin, num_epochs),
150 | [accuracy*100.0/training_set_size
151 | for accuracy in training_accuracy],
152 | color='#FFA933',
153 | label="Accuracy on the training data")
154 | ax.grid(True)
155 | ax.set_xlim([xmin, num_epochs])
156 | ax.set_xlabel('Epoch')
157 | ax.set_ylim([90, 100])
158 | plt.legend(loc="lower right")
159 | plt.show()
160 |
161 | if __name__ == "__main__":
162 | filename = raw_input("Enter a file name: ")
163 | num_epochs = int(raw_input(
164 | "Enter the number of epochs to run for: "))
165 | training_cost_xmin = int(raw_input(
166 | "training_cost_xmin (suggest 200): "))
167 | test_accuracy_xmin = int(raw_input(
168 | "test_accuracy_xmin (suggest 200): "))
169 | test_cost_xmin = int(raw_input(
170 | "test_cost_xmin (suggest 0): "))
171 | training_accuracy_xmin = int(raw_input(
172 | "training_accuracy_xmin (suggest 0): "))
173 | training_set_size = int(raw_input(
174 | "Training set size (suggest 1000): "))
175 | lmbda = float(raw_input(
176 | "Enter the regularization parameter, lambda (suggest: 5.0): "))
177 | main(filename, num_epochs, training_cost_xmin,
178 | test_accuracy_xmin, test_cost_xmin, training_accuracy_xmin,
179 | training_set_size, lmbda)
180 |
--------------------------------------------------------------------------------
/fig/overfitting1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting1.png
--------------------------------------------------------------------------------
/fig/overfitting2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting2.png
--------------------------------------------------------------------------------
/fig/overfitting3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting3.png
--------------------------------------------------------------------------------
/fig/overfitting4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting4.png
--------------------------------------------------------------------------------
/fig/overfitting_full.json:
--------------------------------------------------------------------------------
1 | [[0.56135590058630858, 0.47806921271034553, 0.457510836259925, 0.42504920544144992, 0.39449553344420019, 0.39810448800345, 0.37017079712250733, 0.37403997639944547, 0.36290253019659285, 0.4006868170859208, 0.36817548958488616, 0.37299310675826219, 0.36871967242261605, 0.37146610246666006, 0.35704621996697938, 0.35821464151288968, 0.38622103466509744, 0.37010939716781127, 0.36539832104327125, 0.35511546847032671, 0.3828088676932585, 0.36160025922354638, 0.37028708356461698, 0.37605182846277163, 0.36634313696187393, 0.36129044456360238, 0.37531885586439506, 0.36415225595876555, 0.35707895858237054, 0.36631987373588193], [9136, 9275, 9307, 9377, 9450, 9429, 9468, 9488, 9494, 9424, 9483, 9483, 9505, 9499, 9508, 9508, 9445, 9524, 9524, 9524, 9494, 9527, 9518, 9505, 9533, 9529, 9512, 9530, 9532, 9531], [0.55994588582554705, 0.44664870303435988, 0.42455329174078477, 0.38578320429266705, 0.33992291017592285, 0.33162477096795895, 0.3137480626518645, 0.30028971890544093, 0.27353890048167528, 0.30236927117202678, 0.26487026303889277, 0.2661714884193439, 0.24734280015146709, 0.26355551438395558, 0.23088530423416964, 0.22618350577327287, 0.25137541006767478, 0.23085585354651994, 0.21417931191800957, 0.20049587923059808, 0.23713128948069295, 0.20327728799861464, 0.21953883029836488, 0.20264436321820509, 0.19643949703516961, 0.18467980669870671, 0.18788606162530633, 0.18535916502880764, 0.18466759834259142, 0.17218286758911475], [45708, 46605, 46797, 47190, 47543, 47570, 47638, 47838, 48061, 47825, 48160, 48195, 48265, 48156, 48439, 48449, 48267, 48433, 48598, 48697, 48380, 48648, 48500, 48669, 48734, 48796, 48802, 48837, 48810, 48932]]
--------------------------------------------------------------------------------
/fig/overfitting_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting_full.png
--------------------------------------------------------------------------------
/fig/pca_hard_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/pca_hard_data.png
--------------------------------------------------------------------------------
/fig/pca_hard_data_fit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/pca_hard_data_fit.png
--------------------------------------------------------------------------------
/fig/pca_limitations.py:
--------------------------------------------------------------------------------
1 | """
2 | pca_limitations
3 | ~~~~~~~~~~~~~~~
4 |
5 | Plot graphs to illustrate the limitations of PCA.
6 | """
7 |
8 | # Third-party libraries
9 | from mpl_toolkits.mplot3d import Axes3D
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 |
13 | # Plot just the data
14 | fig = plt.figure()
15 | ax = fig.gca(projection='3d')
16 | z = np.linspace(-2, 2, 20)
17 | theta = np.linspace(-4 * np.pi, 4 * np.pi, 20)
18 | x = np.sin(theta)+0.03*np.random.randn(20)
19 | y = np.cos(theta)+0.03*np.random.randn(20)
20 | ax.plot(x, y, z, 'ro')
21 | plt.show()
22 |
23 | # Plot the data and the helix together
24 | fig = plt.figure()
25 | ax = fig.gca(projection='3d')
26 | z_helix = np.linspace(-2, 2, 100)
27 | theta_helix = np.linspace(-4 * np.pi, 4 * np.pi, 100)
28 | x_helix = np.sin(theta_helix)
29 | y_helix = np.cos(theta_helix)
30 | ax.plot(x, y, z, 'ro')
31 | ax.plot(x_helix, y_helix, z_helix, '')
32 | plt.show()
33 |
--------------------------------------------------------------------------------
/fig/regularized.json:
--------------------------------------------------------------------------------
1 | [[2.1903999605374445, 1.935511698609431, 1.7732802465315387, 1.5964599634182588, 1.5652172568011837, 1.6556596783771818, 1.4247190111080004, 1.3955109870673246, 1.361995997263147, 1.3212968996719621, 1.3618994944086014, 1.3366270986282185, 1.2984174484948621, 1.2752432242493146, 1.2700513563686027, 1.2799551345628921, 1.2749818189596829, 1.2871333718872076, 1.2828951499601804, 1.2735961691755695, 1.2647655170041852, 1.2594502355807271, 1.2795772447252642, 1.2727190387701224, 1.2831666300389069, 1.2672449017721463, 1.2776841316470489, 1.2805727395923963, 1.2722125928642627, 1.2638997694524687, 1.269084743039151, 1.2659230794396108, 1.2641539416626584, 1.2752029232558733, 1.298792042906463, 1.2702727329507408, 1.2711688340896683, 1.2666235616415085, 1.2674371283336907, 1.2595334250676924, 1.2648151045633766, 1.2671334304994974, 1.266707046535049, 1.2656886025740639, 1.2691346809011976, 1.2584242684159623, 1.2740111893572812, 1.2663064744488597, 1.2615652820429444, 1.2582241448899749, 1.2739320485437491, 1.2712439351275546, 1.2545617120441739, 1.2508444846333084, 1.2591492899022321, 1.2536296905827162, 1.2468994556717075, 1.2484355031942465, 1.2471530150490704, 1.2505186156687369, 1.2388584688991651, 1.2430898462249378, 1.2398539196469682, 1.242251547050703, 1.2367804592809712, 1.2298331549219981, 1.2395688633999551, 1.2354039648955897, 1.2344606308529216, 1.2266843921216057, 1.2265644119519759, 1.2245581771451541, 1.2248119226544247, 1.2243084822111481, 1.2212309349638009, 1.2161717307338038, 1.2225008376402358, 1.2193471795614443, 1.2066653202233959, 1.2123964143141386, 1.2122530086322325, 1.2045089672810716, 1.2071180830593222, 1.1989333713336954, 1.1986255015016054, 1.1956459321325097, 1.2033704463208892, 1.2021008432740117, 1.1955283612344483, 1.1952024132825856, 1.1911013449298988, 1.1876590163918093, 1.1799304033665541, 1.1872817515808496, 1.1853155824780264, 1.1801518934970823, 1.1775421394848582, 1.1789945000698789, 1.1796464383843646, 1.1712541492322308, 1.1690597759769423, 1.1620993711661565, 1.169520268455509, 1.1706984323357481, 1.16142157165315, 1.1594443722282681, 1.1551307036588288, 1.1560927746464871, 1.1558458569577748, 1.1495662430692957, 1.1484728277323, 1.1500520790631064, 1.1505873482743494, 1.1434314002731534, 1.1399993109386326, 1.1374729793443425, 1.1387912606672026, 1.1355838671499126, 1.1407036755338451, 1.1300401447223136, 1.1322867822750577, 1.1292371015493867, 1.128702809951617, 1.1294136504785579, 1.1283079440929984, 1.1220916685170936, 1.1201001413539298, 1.115187080898234, 1.1209986528548488, 1.1105355050898427, 1.1038970751483002, 1.1107296608773507, 1.1081484849851853, 1.1024815938527315, 1.1049008621060641, 1.1049124053518378, 1.0973347585372921, 1.0989702833881896, 1.0982371727299989, 1.093818667730126, 1.0928775595886391, 1.0896907351586045, 1.0924381381269741, 1.08208790851767, 1.0886157209258644, 1.0871294743556819, 1.0874176569695346, 1.0810207377200443, 1.0741795043940798, 1.0758584646112539, 1.0751401687109641, 1.0742516957490034, 1.0782702961220325, 1.0722235429082911, 1.0676018687662321, 1.0681955051837146, 1.0659656804470576, 1.065476631432785, 1.0568244036884478, 1.0563633108751844, 1.0531856804602515, 1.0538622665809867, 1.0529003762157403, 1.0518237229645757, 1.0532673191813249, 1.0470748079542624, 1.0473355198727492, 1.0478358455529726, 1.0465867871958874, 1.0427114422019927, 1.0327604365967915, 1.0302075306662013, 1.0421326895298604, 1.0461320167330266, 1.0327502589606541, 1.0460621333327711, 1.0316861390185799, 1.0312932803708881, 1.0354453434095872, 1.0288234156512026, 1.0220427243291068, 1.0236529852899188, 1.022506499230259, 1.0327268485891929, 1.0183557109961541, 1.0166409797026996, 1.0201653625824105, 1.0181942185781696, 1.0059281072990294, 1.0151120898053012, 1.0158007330308185, 1.0076227765153161, 1.0139342289012658, 1.0108626749865222, 1.0133465258769703, 1.0093790455178744, 1.0032704939881629, 1.0065849479396327, 0.99239921410409282, 1.0078593577776735, 1.0056533242624528, 0.99382804473952768, 1.0000313327944634, 0.99267979287687835, 0.98996333995056751, 0.98771450417037443, 0.98835879860887599, 0.98707998292775845, 0.99162222526658916, 0.98196648125555774, 0.98830619571563316, 0.98659881401490612, 0.99273488773050456, 0.98124338898029595, 0.98701911078915805, 0.97950085339921733, 0.97754982301824889, 0.97393880231450858, 0.97728343447809918, 0.9822446382489719, 0.97264123514085032, 0.97552252049617039, 0.97787781129407614, 0.97911317538881426, 0.9685897325254843, 0.97031228863494989, 0.97280520294417017, 0.96809562498274027, 0.96329522921298438, 0.96499204553248841, 0.96872124082325861, 0.97279943472003694, 0.96380722647531536, 0.9681111619139029, 0.95255860371790635, 0.95685889512103384, 0.95490880778293086, 0.96165060698066118, 0.95429007422149059, 0.9485254903573368, 0.95964565458504292, 0.9463922864368649, 0.95301119244822674, 0.96000201462323909, 0.9533654233889125, 0.94897620981608699, 0.95449157578848032, 0.95096516926024288, 0.9540996353802198, 0.94724765894116203, 0.94778948958239595, 0.94591749106378031, 0.94877930991320225, 0.94627724201870156, 0.94527196978974903, 0.93990172135237238, 0.93976092189395699, 0.94174955547339068, 0.94168247039593567, 0.94014241402470755, 0.9407513673596033, 0.9404657385931684, 0.93660325056571514, 0.93963125525499047, 0.94507818868542304, 0.93515283399520432, 0.93426655514408952, 0.94036084535630249, 0.94278800241797989, 0.9303464277379202, 0.93946074265767165, 0.94011896731994538, 0.93219548844993971, 0.92901047418070248, 0.9413051979588738, 0.9262554563794031, 0.93609567274753203, 0.93033762483130578, 0.92717362481531107, 0.92953512970255081, 0.92597921058408705, 0.92944035872481756, 0.92961945644634048, 0.92707208850656797, 0.92662815136112076, 0.92539822828480234, 0.92880970077986702, 0.92243586760987628, 0.9222750133422748, 0.92256361764295314, 0.92112697126740262, 0.92800921486682297, 0.92926106782568352, 0.9153004667869965, 0.91094443103923883, 0.93496572342069606, 0.91974312407475323, 0.91821212221209769, 0.91941566704066269, 0.92743446987850353, 0.91503638999300818, 0.9165744471959929, 0.9173574311901096, 0.91424899909475399, 0.9105910983179335, 0.91705676725576613, 0.9153198466244068, 0.90864738371647913, 0.91433392116041334, 0.91730855979513537, 0.91943446809871454, 0.91559373840877156, 0.9117409480398676, 0.91824229192475937, 0.91601442202251138, 0.90785487652541763, 0.91036044345459455, 0.90611631831996731, 0.90776253175168242, 0.91042057926704512, 0.90874629789034811, 0.9149280544309798, 0.90782476705166226, 0.9033169594105388, 0.91714376925946128, 0.91281316274517155, 0.90717852083845951, 0.90094537088101878, 0.92254053072003039, 0.90983334489593448, 0.90708146627494168, 0.90476333243913687, 0.9103459736117403, 0.90187539543204431, 0.9131362289361491, 0.90586625644843533, 0.90304462354844051, 0.90181187136526952, 0.91019459725955099, 0.90469807709268235, 0.90432952876471406, 0.90262631470543575, 0.90744672937259074, 0.90508781907365621, 0.91257822910795028, 0.90056483900994111, 0.91093355429606471, 0.90310791000879842, 0.90693900875758127, 0.90544454108106043, 0.90810441816334841, 0.89820458428510686, 0.90551853969183604, 0.89597859386528622, 0.90023866788648133, 0.89957093023412082, 0.89800265641361166, 0.89390398408977567, 0.90802340168738838, 0.89924965362547771, 0.89428545643713697, 0.90096102794054678, 0.89998055268679522, 0.90162280275845208, 0.90437904400674751, 0.89893950123068544, 0.89953917921889903, 0.89979142882290919, 0.89255702183771879, 0.90218120233559551, 0.90059584479083477, 0.90604923024967621, 0.89808476276490912, 0.9035111823998403, 0.88812459864856652, 0.88732828138284314, 0.8990176842324713, 0.89889007193703918, 0.90199559435268073, 0.89687789130803197, 0.89551173581908239, 0.90166333865434023, 0.89319951769196892, 0.89123147694826832, 0.8999938689076068, 0.89347788349594881, 0.90133726872596209, 0.88893467356964939, 0.89150929244536248, 0.89309087673528165, 0.8964350026673984, 0.88372758347780378, 0.89708257630045563, 0.8937592373076666, 0.89910169411630581, 0.89041595406974294, 0.90399067374724229, 0.8891242242836932, 0.893623077380578, 0.89104131698590328], [5898, 6512, 6986, 7289, 7455, 7227, 7723, 7767, 7890, 7942, 7879, 7893, 8036, 8052, 8089, 8079, 8132, 8148, 8138, 8163, 8152, 8199, 8160, 8160, 8152, 8176, 8201, 8184, 8203, 8207, 8232, 8219, 8240, 8221, 8177, 8250, 8250, 8240, 8250, 8271, 8269, 8254, 8256, 8268, 8248, 8283, 8275, 8278, 8302, 8278, 8287, 8293, 8302, 8304, 8305, 8317, 8329, 8316, 8316, 8324, 8323, 8332, 8336, 8319, 8336, 8346, 8324, 8344, 8337, 8352, 8353, 8351, 8349, 8360, 8364, 8359, 8363, 8371, 8371, 8360, 8350, 8369, 8355, 8368, 8375, 8393, 8366, 8385, 8374, 8393, 8383, 8381, 8392, 8387, 8379, 8395, 8389, 8382, 8395, 8409, 8412, 8415, 8404, 8389, 8414, 8405, 8417, 8417, 8416, 8415, 8416, 8418, 8414, 8425, 8437, 8445, 8429, 8454, 8442, 8441, 8456, 8452, 8446, 8458, 8452, 8465, 8451, 8470, 8455, 8467, 8476, 8464, 8473, 8500, 8485, 8480, 8479, 8485, 8484, 8491, 8486, 8494, 8481, 8515, 8476, 8495, 8485, 8498, 8518, 8504, 8510, 8508, 8505, 8502, 8514, 8520, 8500, 8519, 8525, 8512, 8532, 8531, 8534, 8533, 8536, 8536, 8530, 8526, 8521, 8530, 8544, 8572, 8536, 8543, 8545, 8542, 8559, 8558, 8548, 8551, 8567, 8557, 8554, 8535, 8566, 8573, 8554, 8554, 8589, 8571, 8554, 8582, 8572, 8571, 8569, 8574, 8573, 8580, 8603, 8558, 8590, 8595, 8599, 8594, 8594, 8599, 8594, 8594, 8588, 8607, 8594, 8606, 8594, 8609, 8606, 8597, 8612, 8618, 8617, 8602, 8624, 8625, 8601, 8605, 8620, 8622, 8613, 8622, 8621, 8612, 8613, 8609, 8614, 8613, 8627, 8626, 8627, 8622, 8636, 8637, 8624, 8632, 8616, 8610, 8632, 8613, 8617, 8626, 8610, 8626, 8620, 8630, 8619, 8629, 8633, 8637, 8631, 8627, 8638, 8634, 8636, 8633, 8633, 8620, 8630, 8637, 8638, 8630, 8626, 8647, 8630, 8625, 8646, 8637, 8623, 8645, 8625, 8631, 8649, 8644, 8645, 8642, 8632, 8644, 8638, 8641, 8636, 8642, 8643, 8646, 8638, 8635, 8637, 8650, 8671, 8632, 8655, 8657, 8651, 8632, 8649, 8655, 8653, 8659, 8654, 8657, 8655, 8652, 8651, 8666, 8648, 8647, 8663, 8644, 8651, 8678, 8672, 8668, 8663, 8654, 8665, 8655, 8668, 8676, 8660, 8662, 8674, 8674, 8648, 8666, 8673, 8679, 8658, 8680, 8666, 8674, 8674, 8675, 8673, 8678, 8676, 8671, 8668, 8673, 8647, 8676, 8656, 8673, 8675, 8669, 8658, 8689, 8675, 8691, 8683, 8691, 8684, 8695, 8683, 8689, 8693, 8691, 8687, 8680, 8674, 8692, 8685, 8688, 8698, 8685, 8688, 8663, 8685, 8691, 8694, 8701, 8678, 8679, 8686, 8694, 8690, 8685, 8689, 8710, 8677, 8699, 8681, 8694, 8703, 8691, 8690, 8703, 8699, 8703, 8686, 8692, 8678, 8707, 8692, 8695], [3.0054668602173189, 2.630540182112914, 2.3510873884747334, 2.1474705267659759, 2.0293529993091846, 2.0032380926060593, 1.8042553867762137, 1.7003373477735675, 1.6321855821328803, 1.5610897547444651, 1.5281925730670125, 1.4856976518536389, 1.4217730925966259, 1.3909369689936708, 1.3493713532353451, 1.3200187301521982, 1.301800278663491, 1.2676513467349682, 1.2429301255926062, 1.2185580400821583, 1.2050358080246586, 1.1832267788099187, 1.1685930523656132, 1.1484535916807292, 1.1359697154554902, 1.114320381224261, 1.1014631295982018, 1.089492003214271, 1.0727926078697159, 1.0613717416584318, 1.0469730951133949, 1.0327418635716941, 1.021579545763369, 1.007466532996552, 1.0019118318591766, 0.98744520456502871, 0.97474616908673617, 0.96296146445044695, 0.95371837313859686, 0.94627032091708685, 0.93498894772954433, 0.92371303592517773, 0.91598769285379622, 0.90572616038790177, 0.89742104654365473, 0.88871852051305211, 0.88017134729428881, 0.87050654917655768, 0.86212122128894098, 0.85338982152113285, 0.84517324140670691, 0.83628724752074646, 0.82688131184478686, 0.81882973565637929, 0.81057777549741772, 0.80277250205231088, 0.79480020668910989, 0.78780832187387972, 0.78001287137204445, 0.77319156351717266, 0.76596973399820345, 0.75937155578658067, 0.75198198829961649, 0.74542036500766451, 0.73896269018433358, 0.73189672823515506, 0.72511198497639617, 0.71858339754179046, 0.71283671972744378, 0.70572537262239332, 0.69919661822074053, 0.6932705965074516, 0.68720037824228397, 0.68083868446687623, 0.67490287294715556, 0.6688525492359253, 0.66319307295604624, 0.65751891741935764, 0.65160400038475963, 0.64606741109074506, 0.64065241850535526, 0.63470824020889005, 0.62942800255997755, 0.6238131388272512, 0.61821534837327441, 0.6131143735593132, 0.60801454092208473, 0.60307257257391134, 0.597472701650819, 0.5922276823975956, 0.58718276628487431, 0.58219953746425901, 0.57721941418480194, 0.57251776210593941, 0.56770248252692224, 0.56293171594753588, 0.5583703980639495, 0.55356242427910862, 0.54911205675728603, 0.54434678517468216, 0.53983964079181412, 0.53560546501131556, 0.53104610602400237, 0.52690257757896453, 0.52244249181950919, 0.51837040316250427, 0.51406707535766105, 0.50997890157700754, 0.50580316759379418, 0.50197927328326941, 0.49765475959114985, 0.49363114894282656, 0.49013948810957153, 0.48574563160725903, 0.481996433456091, 0.47836254150930335, 0.47438049006936467, 0.47063209526452893, 0.46700010263520492, 0.46326294173821808, 0.45941178891664192, 0.45579944000986117, 0.45222495753341951, 0.44874146785608826, 0.44545391416308638, 0.44181467115194983, 0.43828437942217396, 0.43497413417170505, 0.4314933212391851, 0.42806639128733814, 0.4249830118133005, 0.4217067271918078, 0.41822647856750578, 0.41518744647089212, 0.41184644098125406, 0.40883008626966744, 0.40553823067182421, 0.40244746846822144, 0.39928585652511611, 0.39626977647492767, 0.39331263919868908, 0.39019492871393685, 0.38732420260579697, 0.38435150189678424, 0.38177223040643438, 0.37853457408790142, 0.37569449510498465, 0.37295407280298576, 0.37017541614667188, 0.36735907932787892, 0.36474742614606914, 0.36195502625628007, 0.35947915235047068, 0.35675421962663562, 0.35401004282520687, 0.35144133746409423, 0.34889387731183558, 0.34641383158012634, 0.34392904811658986, 0.34130203990500291, 0.33888366962785094, 0.33638822418723674, 0.33396129048481904, 0.33175452278050255, 0.32933814039393, 0.32694391946859563, 0.32460086680314448, 0.32231863682007983, 0.32010480841447037, 0.31790147822028658, 0.31571847553356108, 0.31361078068871434, 0.31122285863559135, 0.30943747241499886, 0.30710922580369193, 0.30523966755164406, 0.30284937095608283, 0.3007194800484288, 0.29894516963898177, 0.29668334988284817, 0.29487017447397706, 0.29263559416080354, 0.29070159725793254, 0.28909427829661305, 0.28678884984900743, 0.28490027458015588, 0.2830863666836243, 0.28117503586861597, 0.27940625519366591, 0.27761490071595862, 0.27571735158201321, 0.27400939202985924, 0.2720994498803232, 0.27041895644243802, 0.26870944341767583, 0.26692503810639295, 0.26514216579715211, 0.26350425688621143, 0.26194775244796215, 0.26031808133537176, 0.25867644728947803, 0.25701548157578263, 0.2553925131758501, 0.2537805110290578, 0.25233205572342077, 0.25059011155916222, 0.24900995023173669, 0.24750970679163162, 0.24603574581720411, 0.24453517928837162, 0.24297118556626207, 0.24163254815954366, 0.24010060406903927, 0.23858447318485032, 0.23709355381025712, 0.23575693213477292, 0.23430242910697505, 0.23300424062786362, 0.23146492791787876, 0.23028619269651887, 0.22872263832614514, 0.22757051865720562, 0.22612070285691649, 0.22486518055185545, 0.22356486396463154, 0.22215084040399144, 0.22116285480573897, 0.21954904035047973, 0.21853554445024348, 0.21730688614947213, 0.21600390968478739, 0.21473510420394584, 0.21359511875939363, 0.21250417810571734, 0.21119670161592852, 0.20998559164773262, 0.208837489317242, 0.20768479087034092, 0.20659155500937021, 0.20558367284244244, 0.20435335237628477, 0.2032464875641013, 0.20219802987124882, 0.20127901932926417, 0.20007579622717186, 0.19898737190593724, 0.19804803849997596, 0.19689315580825217, 0.19610885980109602, 0.19479852999276509, 0.1940612380895031, 0.19292786347284827, 0.19185809503845005, 0.19096893321163924, 0.19003230799124482, 0.18913859695629501, 0.1881291695604892, 0.18718973645719555, 0.18627742977977121, 0.18541142438497082, 0.18448857944953381, 0.18359351692378717, 0.18261368242334564, 0.18180375921357417, 0.18093506139675003, 0.17998546923834557, 0.17915124316640865, 0.17852974081084139, 0.17753670237988683, 0.17675502721943334, 0.17596267360975426, 0.17532135165792365, 0.1742351851435536, 0.17358483897004762, 0.1729350931986928, 0.1720805519195413, 0.17109064918417963, 0.17027331927806791, 0.16963634485184267, 0.16895358154624776, 0.1682839465581161, 0.16747701705369528, 0.16667755231392004, 0.16591178893673617, 0.16538283429287487, 0.16444040613503746, 0.16373895724486862, 0.16308076465326077, 0.16233620430635892, 0.16176781013586375, 0.16104183013834106, 0.16046925595808373, 0.15987647357985665, 0.15926248578219754, 0.15861934606502726, 0.15813634134480264, 0.15728566055545845, 0.15654044850087195, 0.15576910232092384, 0.15542825510458746, 0.15471291341581561, 0.15393039632238567, 0.15338574586063461, 0.15289796647946724, 0.15237239387638404, 0.15167106792711721, 0.15117419365146861, 0.15053294284800808, 0.14984558772800877, 0.14939067983754478, 0.14873871322311416, 0.14816358361629323, 0.14764993080499633, 0.147081011815695, 0.14665877802009278, 0.146330915059212, 0.14558165958404565, 0.14485851618233073, 0.14468885149823432, 0.1438703517822042, 0.14344305879631206, 0.14286592428666159, 0.14233498842740988, 0.14196829563110053, 0.14137537114428306, 0.14093879724708491, 0.14032845116761775, 0.13999255024009272, 0.13970462455415528, 0.13891699772284177, 0.13845977036853441, 0.13800758209633504, 0.13755412286780858, 0.13710288863433284, 0.1366941098396158, 0.13620566631276054, 0.13574747139784185, 0.1354018600833839, 0.13509926299887218, 0.13449907813149148, 0.13414481985203083, 0.13360822141476442, 0.13319767720007691, 0.13296506419947443, 0.13254819602035237, 0.13205345942670241, 0.13167698206691389, 0.13129565313440172, 0.13088461421366027, 0.13059579877002456, 0.13013895016529287, 0.12974813771716298, 0.12942570940664525, 0.12894055787725606, 0.12857812443876793, 0.12831920733468941, 0.12787017352245639, 0.12757677057644992, 0.12716368111937662, 0.12684805248186204, 0.12642447555752939, 0.12619325054698116, 0.12568699211395123, 0.1255825925227049, 0.12531188331355458, 0.12477983663711714, 0.12448255939185263, 0.12410353700139506, 0.12374131975287532, 0.12346266645310616, 0.12310787618062269, 0.12306935609380343, 0.12260837265741077, 0.12213474763430163, 0.12209935233252732, 0.12180246601285891, 0.1212539118889692, 0.12108968375061299, 0.12068240811734519, 0.12041266804252408, 0.12016957520782154, 0.11993799531844312, 0.1196710285606524, 0.11935732791087648, 0.11929515289226478, 0.11889389635415669, 0.11862785428698526, 0.11817071954732133, 0.11814865141172132, 0.11763340847971795, 0.11749514446259617, 0.11725747872816564, 0.11695850963543949, 0.1166363293817957, 0.11644033569813024, 0.11615572825807459, 0.11608765530562379, 0.11580834205760465, 0.1154832427731588, 0.1153905733766095], [690, 749, 810, 849, 874, 869, 912, 932, 948, 950, 958, 964, 971, 975, 980, 979, 980, 981, 983, 986, 982, 986, 987, 990, 990, 990, 992, 991, 992, 991, 993, 992, 993, 994, 994, 995, 995, 995, 996, 994, 997, 997, 996, 998, 998, 997, 998, 998, 998, 998, 998, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 1000, 999, 1000, 999, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]]
--------------------------------------------------------------------------------
/fig/regularized1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/regularized1.png
--------------------------------------------------------------------------------
/fig/regularized2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/regularized2.png
--------------------------------------------------------------------------------
/fig/regularized_full.json:
--------------------------------------------------------------------------------
1 | [[4.3072791918656037, 2.9331304641086344, 2.1348073553576041, 1.6588303607817259, 1.330889938797851, 1.1963223601928472, 1.1170765304219505, 1.0170754480838433, 0.99110935015398149, 1.0071179800661803, 0.96280080386971378, 0.99226609521675169, 0.96023984363523895, 0.97253784945751276, 0.93966545596520334, 0.95330563342376551, 0.96378529404233837, 0.97367336858037301, 0.94435985290781166, 0.94622931411839994, 0.98392022263201184, 0.94091005661041272, 0.9496551347987412, 0.94714964684453073, 0.95026655456196552, 0.92915894672179755, 0.95831053042987979, 1.0153994919718721, 0.92940339906358749, 0.97682851862658082], [9212, 9341, 9375, 9424, 9532, 9537, 9504, 9541, 9578, 9538, 9579, 9530, 9590, 9543, 9607, 9597, 9576, 9546, 9600, 9634, 9544, 9606, 9614, 9607, 9621, 9637, 9620, 9511, 9649, 9561], [1.2925405259017666, 0.92479539229795305, 0.72611252037165497, 0.61618944188425839, 0.49142410439713557, 0.46552608507795468, 0.46074829841290343, 0.40775149802551902, 0.39671750686791218, 0.42031570708192345, 0.38057096091326847, 0.40768033915334978, 0.3895210257834103, 0.40585871820346864, 0.36003072887701948, 0.37700037701783806, 0.39300003862768451, 0.40774598935627593, 0.37194215157507704, 0.3662415845761452, 0.40722309031673021, 0.36476961463606117, 0.36988528906574514, 0.36112644707329011, 0.380710641602238, 0.35700998663848571, 0.37724740623797381, 0.44991741876110503, 0.35820321110078079, 0.39226034353556583], [45919, 46835, 47204, 47434, 47989, 47930, 47839, 48157, 48218, 48105, 48313, 48089, 48282, 48111, 48463, 48362, 48243, 48123, 48416, 48533, 48123, 48483, 48435, 48548, 48434, 48524, 48417, 47797, 48561, 48235]]
--------------------------------------------------------------------------------
/fig/regularized_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/regularized_full.png
--------------------------------------------------------------------------------
/fig/replaced_by_d3/README.md:
--------------------------------------------------------------------------------
1 | # Replaced by d3 directory
2 |
3 | This directory contains python code which generated png figures which
4 | were later replaced by d3 in the live version of the site. They've
5 | been preserved here on the off chance that they may be of use at some
6 | point in the future.
7 |
--------------------------------------------------------------------------------
/fig/replaced_by_d3/relu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/relu.png
--------------------------------------------------------------------------------
/fig/replaced_by_d3/relu.py:
--------------------------------------------------------------------------------
1 | """
2 | relu
3 | ~~~~
4 |
5 | Plots a graph of the squashing function used by a rectified linear
6 | unit."""
7 |
8 | import numpy as np
9 | import matplotlib.pyplot as plt
10 |
11 | z = np.arange(-2, 2, .1)
12 | zero = np.zeros(len(z))
13 | y = np.max([zero, z], axis=0)
14 |
15 | fig = plt.figure()
16 | ax = fig.add_subplot(111)
17 | ax.plot(z, y)
18 | ax.set_ylim([-2.0, 2.0])
19 | ax.set_xlim([-2.0, 2.0])
20 | ax.grid(True)
21 | ax.set_xlabel('z')
22 | ax.set_title('Rectified linear unit')
23 |
24 | plt.show()
25 |
--------------------------------------------------------------------------------
/fig/replaced_by_d3/sigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/sigmoid.png
--------------------------------------------------------------------------------
/fig/replaced_by_d3/sigmoid.py:
--------------------------------------------------------------------------------
1 | """
2 | sigmoid
3 | ~~~~~~~
4 |
5 | Plots a graph of the sigmoid function."""
6 |
7 | import numpy
8 | import matplotlib.pyplot as plt
9 |
10 | z = numpy.arange(-5, 5, .1)
11 | sigma_fn = numpy.vectorize(lambda z: 1/(1+numpy.exp(-z)))
12 | sigma = sigma_fn(z)
13 |
14 | fig = plt.figure()
15 | ax = fig.add_subplot(111)
16 | ax.plot(z, sigma)
17 | ax.set_ylim([-0.5, 1.5])
18 | ax.set_xlim([-5,5])
19 | ax.grid(True)
20 | ax.set_xlabel('z')
21 | ax.set_title('sigmoid function')
22 |
23 | plt.show()
24 |
--------------------------------------------------------------------------------
/fig/replaced_by_d3/step.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/step.png
--------------------------------------------------------------------------------
/fig/replaced_by_d3/step.py:
--------------------------------------------------------------------------------
1 | """
2 | step
3 | ~~~~~~~
4 |
5 | Plots a graph of a step function."""
6 |
7 | import numpy
8 | import matplotlib.pyplot as plt
9 |
10 | z = numpy.arange(-5, 5, .02)
11 | step_fn = numpy.vectorize(lambda z: 1.0 if z >= 0.0 else 0.0)
12 | step = step_fn(z)
13 |
14 | fig = plt.figure()
15 | ax = fig.add_subplot(111)
16 | ax.plot(z, step)
17 | ax.set_ylim([-0.5, 1.5])
18 | ax.set_xlim([-5,5])
19 | ax.grid(True)
20 | ax.set_xlabel('z')
21 | ax.set_title('step function')
22 |
23 | plt.show()
24 |
--------------------------------------------------------------------------------
/fig/replaced_by_d3/tanh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/tanh.png
--------------------------------------------------------------------------------
/fig/replaced_by_d3/tanh.py:
--------------------------------------------------------------------------------
1 | """
2 | tanh
3 | ~~~~
4 |
5 | Plots a graph of the tanh function."""
6 |
7 | import numpy as np
8 | import matplotlib.pyplot as plt
9 |
10 | z = np.arange(-5, 5, .1)
11 | t = np.tanh(z)
12 |
13 | fig = plt.figure()
14 | ax = fig.add_subplot(111)
15 | ax.plot(z, t)
16 | ax.set_ylim([-1.0, 1.0])
17 | ax.set_xlim([-5,5])
18 | ax.grid(True)
19 | ax.set_xlabel('z')
20 | ax.set_title('tanh function')
21 |
22 | plt.show()
23 |
--------------------------------------------------------------------------------
/fig/serialize_images_to_json.py:
--------------------------------------------------------------------------------
1 | """
2 | serialize_images_to_json
3 | ~~~~~~~~~~~~~~~~~~~~~~~~
4 |
5 | Utility to serialize parts of the training and validation data to JSON,
6 | for use with Javascript. """
7 |
8 | #### Libraries
9 | # Standard library
10 | import json
11 | import sys
12 |
13 | # My library
14 | sys.path.append('../src/')
15 | import mnist_loader
16 |
17 | # Third-party libraries
18 | import numpy as np
19 |
20 |
21 | # Number of training and validation data images to serialize
22 | NTD = 1000
23 | NVD = 100
24 |
25 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
26 |
27 | def make_data_integer(td):
28 | # This will be slow, due to the loop. It'd be better if numpy did
29 | # this directly. But numpy.rint followed by tolist() doesn't
30 | # convert to a standard Python int.
31 | return [int(x) for x in (td*256).reshape(784).tolist()]
32 |
33 | data = {"training": [
34 | {"x": [x[0] for x in training_data[j][0].tolist()],
35 | "y": [y[0] for y in training_data[j][1].tolist()]}
36 | for j in xrange(NTD)],
37 | "validation": [
38 | {"x": [x[0] for x in validation_data[j][0].tolist()],
39 | "y": validation_data[j][1]}
40 | for j in xrange(NVD)]}
41 |
42 | f = open("data_1000.json", "w")
43 | json.dump(data, f)
44 | f.close()
45 |
46 |
47 |
--------------------------------------------------------------------------------
/fig/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/test.png
--------------------------------------------------------------------------------
/fig/training_speed_2_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/training_speed_2_layers.png
--------------------------------------------------------------------------------
/fig/training_speed_3_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/training_speed_3_layers.png
--------------------------------------------------------------------------------
/fig/training_speed_4_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/training_speed_4_layers.png
--------------------------------------------------------------------------------
/fig/valley.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/valley.png
--------------------------------------------------------------------------------
/fig/valley.py:
--------------------------------------------------------------------------------
1 | """
2 | valley
3 | ~~~~~~
4 |
5 | Plots a function of two variables to minimize. The function is a
6 | fairly generic valley function."""
7 |
8 | #### Libraries
9 | # Third party libraries
10 | from matplotlib.ticker import LinearLocator
11 | # Note that axes3d is not explicitly used in the code, but is needed
12 | # to register the 3d plot type correctly
13 | from mpl_toolkits.mplot3d import axes3d
14 | import matplotlib.pyplot as plt
15 | import numpy
16 |
17 | fig = plt.figure()
18 | ax = fig.gca(projection='3d')
19 | X = numpy.arange(-1, 1, 0.1)
20 | Y = numpy.arange(-1, 1, 0.1)
21 | X, Y = numpy.meshgrid(X, Y)
22 | Z = X**2 + Y**2
23 |
24 | colortuple = ('w', 'b')
25 | colors = numpy.empty(X.shape, dtype=str)
26 | for x in xrange(len(X)):
27 | for y in xrange(len(Y)):
28 | colors[x, y] = colortuple[(x + y) % 2]
29 |
30 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
31 | linewidth=0)
32 |
33 | ax.set_xlim3d(-1, 1)
34 | ax.set_ylim3d(-1, 1)
35 | ax.set_zlim3d(0, 2)
36 | ax.w_xaxis.set_major_locator(LinearLocator(3))
37 | ax.w_yaxis.set_major_locator(LinearLocator(3))
38 | ax.w_zaxis.set_major_locator(LinearLocator(3))
39 | ax.text(1.79, 0, 1.62, "$C$", fontsize=20)
40 | ax.text(0.05, -1.8, 0, "$v_1$", fontsize=20)
41 | ax.text(1.5, -0.25, 0, "$v_2$", fontsize=20)
42 |
43 | plt.show()
44 |
--------------------------------------------------------------------------------
/fig/valley2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/valley2.png
--------------------------------------------------------------------------------
/fig/valley2.py:
--------------------------------------------------------------------------------
1 | """valley2.py
2 | ~~~~~~~~~~~~~
3 |
4 | Plots a function of two variables to minimize. The function is a
5 | fairly generic valley function.
6 |
7 | Note that this is a duplicate of valley.py, but omits labels on the
8 | axis. It's bad practice to duplicate in this way, but I had
9 | considerable trouble getting matplotlib to update a graph in the way I
10 | needed (adding or removing labels), so finally fell back on this as a
11 | kludge solution.
12 |
13 | """
14 |
15 | #### Libraries
16 | # Third party libraries
17 | from matplotlib.ticker import LinearLocator
18 | # Note that axes3d is not explicitly used in the code, but is needed
19 | # to register the 3d plot type correctly
20 | from mpl_toolkits.mplot3d import axes3d
21 | import matplotlib.pyplot as plt
22 | import numpy
23 |
24 | fig = plt.figure()
25 | ax = fig.gca(projection='3d')
26 | X = numpy.arange(-1, 1, 0.1)
27 | Y = numpy.arange(-1, 1, 0.1)
28 | X, Y = numpy.meshgrid(X, Y)
29 | Z = X**2 + Y**2
30 |
31 | colortuple = ('w', 'b')
32 | colors = numpy.empty(X.shape, dtype=str)
33 | for x in xrange(len(X)):
34 | for y in xrange(len(Y)):
35 | colors[x, y] = colortuple[(x + y) % 2]
36 |
37 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
38 | linewidth=0)
39 |
40 | ax.set_xlim3d(-1, 1)
41 | ax.set_ylim3d(-1, 1)
42 | ax.set_zlim3d(0, 2)
43 | ax.w_xaxis.set_major_locator(LinearLocator(3))
44 | ax.w_yaxis.set_major_locator(LinearLocator(3))
45 | ax.w_zaxis.set_major_locator(LinearLocator(3))
46 | ax.text(1.79, 0, 1.62, "$C$", fontsize=20)
47 |
48 | plt.show()
49 |
--------------------------------------------------------------------------------
/fig/weight_initialization.py:
--------------------------------------------------------------------------------
1 | """weight_initialization
2 | ~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | This program shows how weight initialization affects training. In
5 | particular, we'll plot out how the classification accuracies improve
6 | using either large starting weights, whose standard deviation is 1, or
7 | the default starting weights, whose standard deviation is 1 over the
8 | square root of the number of input neurons.
9 |
10 | """
11 |
12 | # Standard library
13 | import json
14 | import random
15 | import sys
16 |
17 | # My library
18 | sys.path.append('../src/')
19 | import mnist_loader
20 | import network2
21 |
22 | # Third-party libraries
23 | import matplotlib.pyplot as plt
24 | import numpy as np
25 |
26 | def main(filename, n, eta):
27 | run_network(filename, n, eta)
28 | make_plot(filename)
29 |
30 | def run_network(filename, n, eta):
31 | """Train the network using both the default and the large starting
32 | weights. Store the results in the file with name ``filename``,
33 | where they can later be used by ``make_plots``.
34 |
35 | """
36 | # Make results more easily reproducible
37 | random.seed(12345678)
38 | np.random.seed(12345678)
39 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
40 | net = network2.Network([784, n, 10], cost=network2.CrossEntropyCost)
41 | print "Train the network using the default starting weights."
42 | default_vc, default_va, default_tc, default_ta \
43 | = net.SGD(training_data, 30, 10, eta, lmbda=5.0,
44 | evaluation_data=validation_data,
45 | monitor_evaluation_accuracy=True)
46 | print "Train the network using the large starting weights."
47 | net.large_weight_initializer()
48 | large_vc, large_va, large_tc, large_ta \
49 | = net.SGD(training_data, 30, 10, eta, lmbda=5.0,
50 | evaluation_data=validation_data,
51 | monitor_evaluation_accuracy=True)
52 | f = open(filename, "w")
53 | json.dump({"default_weight_initialization":
54 | [default_vc, default_va, default_tc, default_ta],
55 | "large_weight_initialization":
56 | [large_vc, large_va, large_tc, large_ta]},
57 | f)
58 | f.close()
59 |
60 | def make_plot(filename):
61 | """Load the results from the file ``filename``, and generate the
62 | corresponding plot.
63 |
64 | """
65 | f = open(filename, "r")
66 | results = json.load(f)
67 | f.close()
68 | default_vc, default_va, default_tc, default_ta = results[
69 | "default_weight_initialization"]
70 | large_vc, large_va, large_tc, large_ta = results[
71 | "large_weight_initialization"]
72 | # Convert raw classification numbers to percentages, for plotting
73 | default_va = [x/100.0 for x in default_va]
74 | large_va = [x/100.0 for x in large_va]
75 | fig = plt.figure()
76 | ax = fig.add_subplot(111)
77 | ax.plot(np.arange(0, 30, 1), large_va, color='#2A6EA6',
78 | label="Old approach to weight initialization")
79 | ax.plot(np.arange(0, 30, 1), default_va, color='#FFA933',
80 | label="New approach to weight initialization")
81 | ax.set_xlim([0, 30])
82 | ax.set_xlabel('Epoch')
83 | ax.set_ylim([85, 100])
84 | ax.set_title('Classification accuracy')
85 | plt.legend(loc="lower right")
86 | plt.show()
87 |
88 | if __name__ == "__main__":
89 | main()
90 |
--------------------------------------------------------------------------------
/fig/weight_initialization_100.json:
--------------------------------------------------------------------------------
1 | {"default_weight_initialization": [[], [9295, 9481, 9547, 9592, 9664, 9673, 9702, 9719, 9726, 9726, 9732, 9732, 9730, 9734, 9745, 9751, 9757, 9761, 9764, 9766, 9758, 9767, 9756, 9752, 9777, 9775, 9770, 9770, 9771, 9781], [], []], "large_weight_initialization": [[], [8994, 9181, 9260, 9364, 9427, 9449, 9497, 9512, 9560, 9578, 9603, 9616, 9626, 9629, 9644, 9671, 9674, 9679, 9700, 9708, 9707, 9717, 9729, 9720, 9719, 9745, 9751, 9754, 9755, 9742], [], []]}
--------------------------------------------------------------------------------
/fig/weight_initialization_100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/weight_initialization_100.png
--------------------------------------------------------------------------------
/fig/weight_initialization_30.json:
--------------------------------------------------------------------------------
1 | {"default_weight_initialization": [[], [9270, 9414, 9470, 9504, 9537, 9550, 9587, 9594, 9596, 9594, 9616, 9595, 9622, 9630, 9636, 9641, 9625, 9652, 9637, 9634, 9642, 9639, 9649, 9646, 9646, 9653, 9646, 9653, 9640, 9650], [], []], "large_weight_initialization": [[], [8643, 9044, 9141, 9231, 9299, 9327, 9385, 9416, 9433, 9449, 9476, 9489, 9500, 9535, 9521, 9548, 9564, 9573, 9585, 9592, 9596, 9615, 9607, 9605, 9606, 9622, 9637, 9648, 9635, 9637], [], []]}
--------------------------------------------------------------------------------
/fig/weight_initialization_30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/weight_initialization_30.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scikit-learn
3 | scipy
4 | Theano
5 |
--------------------------------------------------------------------------------
/src/conv.py:
--------------------------------------------------------------------------------
1 | """conv.py
2 | ~~~~~~~~~~
3 |
4 | Code for many of the experiments involving convolutional networks in
5 | Chapter 6 of the book 'Neural Networks and Deep Learning', by Michael
6 | Nielsen. The code essentially duplicates (and parallels) what is in
7 | the text, so this is simply a convenience, and has not been commented
8 | in detail. Consult the original text for more details.
9 |
10 | """
11 |
12 | from collections import Counter
13 |
14 | import matplotlib
15 | matplotlib.use('Agg')
16 | import matplotlib.pyplot as plt
17 | import numpy as np
18 | import theano
19 | import theano.tensor as T
20 |
21 | import network3
22 | from network3 import sigmoid, tanh, ReLU, Network
23 | from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
24 |
25 | training_data, validation_data, test_data = network3.load_data_shared()
26 | mini_batch_size = 10
27 |
28 | def shallow(n=3, epochs=60):
29 | nets = []
30 | for j in range(n):
31 | print "A shallow net with 100 hidden neurons"
32 | net = Network([
33 | FullyConnectedLayer(n_in=784, n_out=100),
34 | SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
35 | net.SGD(
36 | training_data, epochs, mini_batch_size, 0.1,
37 | validation_data, test_data)
38 | nets.append(net)
39 | return nets
40 |
41 | def basic_conv(n=3, epochs=60):
42 | for j in range(n):
43 | print "Conv + FC architecture"
44 | net = Network([
45 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
46 | filter_shape=(20, 1, 5, 5),
47 | poolsize=(2, 2)),
48 | FullyConnectedLayer(n_in=20*12*12, n_out=100),
49 | SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
50 | net.SGD(
51 | training_data, epochs, mini_batch_size, 0.1, validation_data, test_data)
52 | return net
53 |
54 | def omit_FC():
55 | for j in range(3):
56 | print "Conv only, no FC"
57 | net = Network([
58 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
59 | filter_shape=(20, 1, 5, 5),
60 | poolsize=(2, 2)),
61 | SoftmaxLayer(n_in=20*12*12, n_out=10)], mini_batch_size)
62 | net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
63 | return net
64 |
65 | def dbl_conv(activation_fn=sigmoid):
66 | for j in range(3):
67 | print "Conv + Conv + FC architecture"
68 | net = Network([
69 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
70 | filter_shape=(20, 1, 5, 5),
71 | poolsize=(2, 2),
72 | activation_fn=activation_fn),
73 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
74 | filter_shape=(40, 20, 5, 5),
75 | poolsize=(2, 2),
76 | activation_fn=activation_fn),
77 | FullyConnectedLayer(
78 | n_in=40*4*4, n_out=100, activation_fn=activation_fn),
79 | SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
80 | net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
81 | return net
82 |
83 | # The following experiment was eventually omitted from the chapter,
84 | # but I've left it in here, since it's an important negative result:
85 | # basic l2 regularization didn't help much. The reason (I believe) is
86 | # that using convolutional-pooling layers is already a pretty strong
87 | # regularizer.
88 | def regularized_dbl_conv():
89 | for lmbda in [0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]:
90 | for j in range(3):
91 | print "Conv + Conv + FC num %s, with regularization %s" % (j, lmbda)
92 | net = Network([
93 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
94 | filter_shape=(20, 1, 5, 5),
95 | poolsize=(2, 2)),
96 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
97 | filter_shape=(40, 20, 5, 5),
98 | poolsize=(2, 2)),
99 | FullyConnectedLayer(n_in=40*4*4, n_out=100),
100 | SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
101 | net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data, lmbda=lmbda)
102 |
103 | def dbl_conv_relu():
104 | for lmbda in [0.0, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]:
105 | for j in range(3):
106 | print "Conv + Conv + FC num %s, relu, with regularization %s" % (j, lmbda)
107 | net = Network([
108 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
109 | filter_shape=(20, 1, 5, 5),
110 | poolsize=(2, 2),
111 | activation_fn=ReLU),
112 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
113 | filter_shape=(40, 20, 5, 5),
114 | poolsize=(2, 2),
115 | activation_fn=ReLU),
116 | FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU),
117 | SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
118 | net.SGD(training_data, 60, mini_batch_size, 0.03, validation_data, test_data, lmbda=lmbda)
119 |
120 | #### Some subsequent functions may make use of the expanded MNIST
121 | #### data. That can be generated by running expand_mnist.py.
122 |
123 | def expanded_data(n=100):
124 | """n is the number of neurons in the fully-connected layer. We'll try
125 | n=100, 300, and 1000.
126 |
127 | """
128 | expanded_training_data, _, _ = network3.load_data_shared(
129 | "../data/mnist_expanded.pkl.gz")
130 | for j in range(3):
131 | print "Training with expanded data, %s neurons in the FC layer, run num %s" % (n, j)
132 | net = Network([
133 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
134 | filter_shape=(20, 1, 5, 5),
135 | poolsize=(2, 2),
136 | activation_fn=ReLU),
137 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
138 | filter_shape=(40, 20, 5, 5),
139 | poolsize=(2, 2),
140 | activation_fn=ReLU),
141 | FullyConnectedLayer(n_in=40*4*4, n_out=n, activation_fn=ReLU),
142 | SoftmaxLayer(n_in=n, n_out=10)], mini_batch_size)
143 | net.SGD(expanded_training_data, 60, mini_batch_size, 0.03,
144 | validation_data, test_data, lmbda=0.1)
145 | return net
146 |
147 | def expanded_data_double_fc(n=100):
148 | """n is the number of neurons in both fully-connected layers. We'll
149 | try n=100, 300, and 1000.
150 |
151 | """
152 | expanded_training_data, _, _ = network3.load_data_shared(
153 | "../data/mnist_expanded.pkl.gz")
154 | for j in range(3):
155 | print "Training with expanded data, %s neurons in two FC layers, run num %s" % (n, j)
156 | net = Network([
157 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
158 | filter_shape=(20, 1, 5, 5),
159 | poolsize=(2, 2),
160 | activation_fn=ReLU),
161 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
162 | filter_shape=(40, 20, 5, 5),
163 | poolsize=(2, 2),
164 | activation_fn=ReLU),
165 | FullyConnectedLayer(n_in=40*4*4, n_out=n, activation_fn=ReLU),
166 | FullyConnectedLayer(n_in=n, n_out=n, activation_fn=ReLU),
167 | SoftmaxLayer(n_in=n, n_out=10)], mini_batch_size)
168 | net.SGD(expanded_training_data, 60, mini_batch_size, 0.03,
169 | validation_data, test_data, lmbda=0.1)
170 |
171 | def double_fc_dropout(p0, p1, p2, repetitions):
172 | expanded_training_data, _, _ = network3.load_data_shared(
173 | "../data/mnist_expanded.pkl.gz")
174 | nets = []
175 | for j in range(repetitions):
176 | print "\n\nTraining using a dropout network with parameters ",p0,p1,p2
177 | print "Training with expanded data, run num %s" % j
178 | net = Network([
179 | ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
180 | filter_shape=(20, 1, 5, 5),
181 | poolsize=(2, 2),
182 | activation_fn=ReLU),
183 | ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
184 | filter_shape=(40, 20, 5, 5),
185 | poolsize=(2, 2),
186 | activation_fn=ReLU),
187 | FullyConnectedLayer(
188 | n_in=40*4*4, n_out=1000, activation_fn=ReLU, p_dropout=p0),
189 | FullyConnectedLayer(
190 | n_in=1000, n_out=1000, activation_fn=ReLU, p_dropout=p1),
191 | SoftmaxLayer(n_in=1000, n_out=10, p_dropout=p2)], mini_batch_size)
192 | net.SGD(expanded_training_data, 40, mini_batch_size, 0.03,
193 | validation_data, test_data)
194 | nets.append(net)
195 | return nets
196 |
197 | def ensemble(nets):
198 | """Takes as input a list of nets, and then computes the accuracy on
199 | the test data when classifications are computed by taking a vote
200 | amongst the nets. Returns a tuple containing a list of indices
201 | for test data which is erroneously classified, and a list of the
202 | corresponding erroneous predictions.
203 |
204 | Note that this is a quick-and-dirty kluge: it'd be more reusable
205 | (and faster) to define a Theano function taking the vote. But
206 | this works.
207 |
208 | """
209 |
210 | test_x, test_y = test_data
211 | for net in nets:
212 | i = T.lscalar() # mini-batch index
213 | net.test_mb_predictions = theano.function(
214 | [i], net.layers[-1].y_out,
215 | givens={
216 | net.x:
217 | test_x[i*net.mini_batch_size: (i+1)*net.mini_batch_size]
218 | })
219 | net.test_predictions = list(np.concatenate(
220 | [net.test_mb_predictions(i) for i in xrange(1000)]))
221 | all_test_predictions = zip(*[net.test_predictions for net in nets])
222 | def plurality(p): return Counter(p).most_common(1)[0][0]
223 | plurality_test_predictions = [plurality(p)
224 | for p in all_test_predictions]
225 | test_y_eval = test_y.eval()
226 | error_locations = [j for j in xrange(10000)
227 | if plurality_test_predictions[j] != test_y_eval[j]]
228 | erroneous_predictions = [plurality(all_test_predictions[j])
229 | for j in error_locations]
230 | print "Accuracy is {:.2%}".format((1-len(error_locations)/10000.0))
231 | return error_locations, erroneous_predictions
232 |
233 | def plot_errors(error_locations, erroneous_predictions=None):
234 | test_x, test_y = test_data[0].eval(), test_data[1].eval()
235 | fig = plt.figure()
236 | error_images = [np.array(test_x[i]).reshape(28, -1) for i in error_locations]
237 | n = min(40, len(error_locations))
238 | for j in range(n):
239 | ax = plt.subplot2grid((5, 8), (j/8, j % 8))
240 | ax.matshow(error_images[j], cmap = matplotlib.cm.binary)
241 | ax.text(24, 5, test_y[error_locations[j]])
242 | if erroneous_predictions:
243 | ax.text(24, 24, erroneous_predictions[j])
244 | plt.xticks(np.array([]))
245 | plt.yticks(np.array([]))
246 | plt.tight_layout()
247 | return plt
248 |
249 | def plot_filters(net, layer, x, y):
250 |
251 | """Plot the filters for net after the (convolutional) layer number
252 | layer. They are plotted in x by y format. So, for example, if we
253 | have 20 filters after layer 0, then we can call show_filters(net, 0, 5, 4) to
254 | get a 5 by 4 plot of all filters."""
255 | filters = net.layers[layer].w.eval()
256 | fig = plt.figure()
257 | for j in range(len(filters)):
258 | ax = fig.add_subplot(y, x, j)
259 | ax.matshow(filters[j][0], cmap = matplotlib.cm.binary)
260 | plt.xticks(np.array([]))
261 | plt.yticks(np.array([]))
262 | plt.tight_layout()
263 | return plt
264 |
265 |
266 | #### Helper method to run all experiments in the book
267 |
268 | def run_experiments():
269 |
270 | """Run the experiments described in the book. Note that the later
271 | experiments require access to the expanded training data, which
272 | can be generated by running expand_mnist.py.
273 |
274 | """
275 | shallow()
276 | basic_conv()
277 | omit_FC()
278 | dbl_conv(activation_fn=sigmoid)
279 | # omitted, but still interesting: regularized_dbl_conv()
280 | dbl_conv_relu()
281 | expanded_data(n=100)
282 | expanded_data(n=300)
283 | expanded_data(n=1000)
284 | expanded_data_double_fc(n=100)
285 | expanded_data_double_fc(n=300)
286 | expanded_data_double_fc(n=1000)
287 | nets = double_fc_dropout(0.5, 0.5, 0.5, 5)
288 | # plot the erroneous digits in the ensemble of nets just trained
289 | error_locations, erroneous_predictions = ensemble(nets)
290 | plt = plot_errors(error_locations, erroneous_predictions)
291 | plt.savefig("ensemble_errors.png")
292 | # plot the filters learned by the first of the nets just trained
293 | plt = plot_filters(nets[0], 0, 5, 4)
294 | plt.savefig("net_full_layer_0.png")
295 | plt = plot_filters(nets[0], 1, 8, 5)
296 | plt.savefig("net_full_layer_1.png")
297 |
298 |
--------------------------------------------------------------------------------
/src/expand_mnist.py:
--------------------------------------------------------------------------------
1 | """expand_mnist.py
2 | ~~~~~~~~~~~~~~~~~~
3 |
4 | Take the 50,000 MNIST training images, and create an expanded set of
5 | 250,000 images, by displacing each training image up, down, left and
6 | right, by one pixel. Save the resulting file to
7 | ../data/mnist_expanded.pkl.gz.
8 |
9 | Note that this program is memory intensive, and may not run on small
10 | systems.
11 |
12 | """
13 |
14 | from __future__ import print_function
15 |
16 | #### Libraries
17 |
18 | # Standard library
19 | import cPickle
20 | import gzip
21 | import os.path
22 | import random
23 |
24 | # Third-party libraries
25 | import numpy as np
26 |
27 | print("Expanding the MNIST training set")
28 |
29 | if os.path.exists("../data/mnist_expanded.pkl.gz"):
30 | print("The expanded training set already exists. Exiting.")
31 | else:
32 | f = gzip.open("../data/mnist.pkl.gz", 'rb')
33 | training_data, validation_data, test_data = cPickle.load(f)
34 | f.close()
35 | expanded_training_pairs = []
36 | j = 0 # counter
37 | for x, y in zip(training_data[0], training_data[1]):
38 | expanded_training_pairs.append((x, y))
39 | image = np.reshape(x, (-1, 28))
40 | j += 1
41 | if j % 1000 == 0: print("Expanding image number", j)
42 | # iterate over data telling us the details of how to
43 | # do the displacement
44 | for d, axis, index_position, index in [
45 | (1, 0, "first", 0),
46 | (-1, 0, "first", 27),
47 | (1, 1, "last", 0),
48 | (-1, 1, "last", 27)]:
49 | new_img = np.roll(image, d, axis)
50 | if index_position == "first":
51 | new_img[index, :] = np.zeros(28)
52 | else:
53 | new_img[:, index] = np.zeros(28)
54 | expanded_training_pairs.append((np.reshape(new_img, 784), y))
55 | random.shuffle(expanded_training_pairs)
56 | expanded_training_data = [list(d) for d in zip(*expanded_training_pairs)]
57 | print("Saving expanded data. This may take a few minutes.")
58 | f = gzip.open("../data/mnist_expanded.pkl.gz", "w")
59 | cPickle.dump((expanded_training_data, validation_data, test_data), f)
60 | f.close()
61 |
--------------------------------------------------------------------------------
/src/mnist_average_darkness.py:
--------------------------------------------------------------------------------
1 | """
2 | mnist_average_darkness
3 | ~~~~~~~~~~~~~~~~~~~~~~
4 |
5 | A naive classifier for recognizing handwritten digits from the MNIST
6 | data set. The program classifies digits based on how dark they are
7 | --- the idea is that digits like "1" tend to be less dark than digits
8 | like "8", simply because the latter has a more complex shape. When
9 | shown an image the classifier returns whichever digit in the training
10 | data had the closest average darkness.
11 |
12 | The program works in two steps: first it trains the classifier, and
13 | then it applies the classifier to the MNIST test data to see how many
14 | digits are correctly classified.
15 |
16 | Needless to say, this isn't a very good way of recognizing handwritten
17 | digits! Still, it's useful to show what sort of performance we get
18 | from naive ideas."""
19 |
20 | #### Libraries
21 | # Standard library
22 | from collections import defaultdict
23 |
24 | # My libraries
25 | import mnist_loader
26 |
27 | def main():
28 | training_data, validation_data, test_data = mnist_loader.load_data()
29 | # training phase: compute the average darknesses for each digit,
30 | # based on the training data
31 | avgs = avg_darknesses(training_data)
32 | # testing phase: see how many of the test images are classified
33 | # correctly
34 | num_correct = sum(int(guess_digit(image, avgs) == digit)
35 | for image, digit in zip(test_data[0], test_data[1]))
36 | print "Baseline classifier using average darkness of image."
37 | print "%s of %s values correct." % (num_correct, len(test_data[1]))
38 |
39 | def avg_darknesses(training_data):
40 | """ Return a defaultdict whose keys are the digits 0 through 9.
41 | For each digit we compute a value which is the average darkness of
42 | training images containing that digit. The darkness for any
43 | particular image is just the sum of the darknesses for each pixel."""
44 | digit_counts = defaultdict(int)
45 | darknesses = defaultdict(float)
46 | for image, digit in zip(training_data[0], training_data[1]):
47 | digit_counts[digit] += 1
48 | darknesses[digit] += sum(image)
49 | avgs = defaultdict(float)
50 | for digit, n in digit_counts.iteritems():
51 | avgs[digit] = darknesses[digit] / n
52 | return avgs
53 |
54 | def guess_digit(image, avgs):
55 | """Return the digit whose average darkness in the training data is
56 | closest to the darkness of ``image``. Note that ``avgs`` is
57 | assumed to be a defaultdict whose keys are 0...9, and whose values
58 | are the corresponding average darknesses across the training data."""
59 | darkness = sum(image)
60 | distances = {k: abs(v-darkness) for k, v in avgs.iteritems()}
61 | return min(distances, key=distances.get)
62 |
63 | if __name__ == "__main__":
64 | main()
65 |
--------------------------------------------------------------------------------
/src/mnist_loader.py:
--------------------------------------------------------------------------------
1 | """
2 | mnist_loader
3 | ~~~~~~~~~~~~
4 |
5 | A library to load the MNIST image data. For details of the data
6 | structures that are returned, see the doc strings for ``load_data``
7 | and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the
8 | function usually called by our neural network code.
9 | """
10 |
11 | #### Libraries
12 | # Standard library
13 | import pickle
14 | import gzip
15 |
16 | # Third-party libraries
17 | import numpy as np
18 |
19 | def load_data():
20 | """Return the MNIST data as a tuple containing the training data,
21 | the validation data, and the test data.
22 |
23 | The ``training_data`` is returned as a tuple with two entries.
24 | The first entry contains the actual training images. This is a
25 | numpy ndarray with 50,000 entries. Each entry is, in turn, a
26 | numpy ndarray with 784 values, representing the 28 * 28 = 784
27 | pixels in a single MNIST image.
28 |
29 | The second entry in the ``training_data`` tuple is a numpy ndarray
30 | containing 50,000 entries. Those entries are just the digit
31 | values (0...9) for the corresponding images contained in the first
32 | entry of the tuple.
33 |
34 | The ``validation_data`` and ``test_data`` are similar, except
35 | each contains only 10,000 images.
36 |
37 | This is a nice data format, but for use in neural networks it's
38 | helpful to modify the format of the ``training_data`` a little.
39 | That's done in the wrapper function ``load_data_wrapper()``, see
40 | below.
41 | """
42 | f = gzip.open('../data/mnist.pkl.gz', 'rb')
43 | training_data, validation_data, test_data = pickle.load(f, encoding='bytes')
44 | f.close()
45 | return (training_data, validation_data, test_data)
46 |
47 | def load_data_wrapper():
48 | """Return a tuple containing ``(training_data, validation_data,
49 | test_data)``. Based on ``load_data``, but the format is more
50 | convenient for use in our implementation of neural networks.
51 |
52 | In particular, ``training_data`` is a list containing 50,000
53 | 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
54 | containing the input image. ``y`` is a 10-dimensional
55 | numpy.ndarray representing the unit vector corresponding to the
56 | correct digit for ``x``.
57 |
58 | ``validation_data`` and ``test_data`` are lists containing 10,000
59 | 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
60 | numpy.ndarry containing the input image, and ``y`` is the
61 | corresponding classification, i.e., the digit values (integers)
62 | corresponding to ``x``.
63 |
64 | Obviously, this means we're using slightly different formats for
65 | the training data and the validation / test data. These formats
66 | turn out to be the most convenient for use in our neural network
67 | code."""
68 | tr_d, va_d, te_d = load_data()
69 | training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
70 | training_results = [vectorized_result(y) for y in tr_d[1]]
71 | training_data = zip(training_inputs, training_results)
72 | validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
73 | validation_data = zip(validation_inputs, va_d[1])
74 | test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
75 | test_data = zip(test_inputs, te_d[1])
76 | return (training_data, validation_data, test_data)
77 |
78 | def vectorized_result(j):
79 | """Return a 10-dimensional unit vector with a 1.0 in the jth
80 | position and zeroes elsewhere. This is used to convert a digit
81 | (0...9) into a corresponding desired output from the neural
82 | network."""
83 | e = np.zeros((10, 1))
84 | e[j] = 1.0
85 | return e
86 |
--------------------------------------------------------------------------------
/src/mnist_svm.py:
--------------------------------------------------------------------------------
1 | """
2 | mnist_svm
3 | ~~~~~~~~~
4 |
5 | A classifier program for recognizing handwritten digits from the MNIST
6 | data set, using an SVM classifier."""
7 |
8 | #### Libraries
9 | # My libraries
10 | import mnist_loader
11 |
12 | # Third-party libraries
13 | from sklearn import svm
14 |
15 | def svm_baseline():
16 | training_data, validation_data, test_data = mnist_loader.load_data()
17 | # train
18 | clf = svm.SVC()
19 | clf.fit(training_data[0], training_data[1])
20 | # test
21 | predictions = [int(a) for a in clf.predict(test_data[0])]
22 | num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1]))
23 | print "Baseline classifier using an SVM."
24 | print "%s of %s values correct." % (num_correct, len(test_data[1]))
25 |
26 | if __name__ == "__main__":
27 | svm_baseline()
28 |
29 |
--------------------------------------------------------------------------------
/src/network.py:
--------------------------------------------------------------------------------
1 | """
2 | network.py
3 | ~~~~~~~~~~
4 |
5 | A module to implement the stochastic gradient descent learning
6 | algorithm for a feedforward neural network. Gradients are calculated
7 | using backpropagation. Note that I have focused on making the code
8 | simple, easily readable, and easily modifiable. It is not optimized,
9 | and omits many desirable features.
10 | """
11 |
12 | #### Libraries
13 | # Standard library
14 | import random
15 |
16 | # Third-party libraries
17 | import numpy as np
18 |
19 | class Network(object):
20 |
21 | def __init__(self, sizes):
22 | """The list ``sizes`` contains the number of neurons in the
23 | respective layers of the network. For example, if the list
24 | was [2, 3, 1] then it would be a three-layer network, with the
25 | first layer containing 2 neurons, the second layer 3 neurons,
26 | and the third layer 1 neuron. The biases and weights for the
27 | network are initialized randomly, using a Gaussian
28 | distribution with mean 0, and variance 1. Note that the first
29 | layer is assumed to be an input layer, and by convention we
30 | won't set any biases for those neurons, since biases are only
31 | ever used in computing the outputs from later layers."""
32 | self.num_layers = len(sizes)
33 | self.sizes = sizes
34 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
35 | self.weights = [np.random.randn(y, x)
36 | for x, y in zip(sizes[:-1], sizes[1:])]
37 |
38 | def feedforward(self, a):
39 | """Return the output of the network if ``a`` is input."""
40 | for b, w in zip(self.biases, self.weights):
41 | a = sigmoid(np.dot(w, a)+b)
42 | return a
43 |
44 | def SGD(self, training_data, epochs, mini_batch_size, eta,
45 | test_data=None):
46 | """Train the neural network using mini-batch stochastic
47 | gradient descent. The ``training_data`` is a list of tuples
48 | ``(x, y)`` representing the training inputs and the desired
49 | outputs. The other non-optional parameters are
50 | self-explanatory. If ``test_data`` is provided then the
51 | network will be evaluated against the test data after each
52 | epoch, and partial progress printed out. This is useful for
53 | tracking progress, but slows things down substantially."""
54 | test_data = list(test_data)
55 | training_data = list(training_data)
56 | if test_data: n_test = len(test_data)
57 | n = len(training_data)
58 | for j in range(epochs):
59 | random.shuffle(training_data)
60 | mini_batches = [
61 | training_data[k:k+mini_batch_size]
62 | for k in range(0, n, mini_batch_size)]
63 | for mini_batch in mini_batches:
64 | self.update_mini_batch(mini_batch, eta)
65 | if test_data:
66 | print("Epoch {0}: {1} / {2}".format(
67 | j, self.evaluate(test_data), n_test))
68 | else:
69 | print("Epoch {0} complete".format(j))
70 |
71 | def update_mini_batch(self, mini_batch, eta):
72 | """Update the network's weights and biases by applying
73 | gradient descent using backpropagation to a single mini batch.
74 | The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
75 | is the learning rate."""
76 | nabla_b = [np.zeros(b.shape) for b in self.biases]
77 | nabla_w = [np.zeros(w.shape) for w in self.weights]
78 | for x, y in mini_batch:
79 | delta_nabla_b, delta_nabla_w = self.backprop(x, y)
80 | nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
81 | nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
82 | self.weights = [w-(eta/len(mini_batch))*nw
83 | for w, nw in zip(self.weights, nabla_w)]
84 | self.biases = [b-(eta/len(mini_batch))*nb
85 | for b, nb in zip(self.biases, nabla_b)]
86 |
87 | def backprop(self, x, y):
88 | """Return a tuple ``(nabla_b, nabla_w)`` representing the
89 | gradient for the cost function C_x. ``nabla_b`` and
90 | ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
91 | to ``self.biases`` and ``self.weights``."""
92 | nabla_b = [np.zeros(b.shape) for b in self.biases]
93 | nabla_w = [np.zeros(w.shape) for w in self.weights]
94 | # feedforwar
95 | activation = x
96 | activations = [x] # list to store all the activations, layer by layer
97 | zs = [] # list to store all the z vectors, layer by layer
98 | for b, w in zip(self.biases, self.weights):
99 | z = np.dot(w, activation)+b
100 | zs.append(z)
101 | activation = sigmoid(z)
102 | activations.append(activation)
103 | # backward pass
104 | delta = self.cost_derivative(activations[-1], y) * \
105 | sigmoid_prime(zs[-1])
106 | nabla_b[-1] = delta
107 | nabla_w[-1] = np.dot(delta, activations[-2].transpose())
108 | # Note that the variable l in the loop below is used a little
109 | # differently to the notation in Chapter 2 of the book. Here,
110 | # l = 1 means the last layer of neurons, l = 2 is the
111 | # second-last layer, and so on. It's a renumbering of the
112 | # scheme in the book, used here to take advantage of the fact
113 | # that Python can use negative indices in lists.
114 | for l in range(2, self.num_layers):
115 | z = zs[-l]
116 | sp = sigmoid_prime(z)
117 | delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
118 | nabla_b[-l] = delta
119 | nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
120 | return (nabla_b, nabla_w)
121 |
122 | def evaluate(self, test_data):
123 | """Return the number of test inputs for which the neural
124 | network outputs the correct result. Note that the neural
125 | network's output is assumed to be the index of whichever
126 | neuron in the final layer has the highest activation."""
127 | test_results = [(np.argmax(self.feedforward(x)), y)
128 | for (x, y) in test_data]
129 | return sum(int(x == y) for (x, y) in test_results)
130 |
131 | def cost_derivative(self, output_activations, y):
132 | """Return the vector of partial derivatives \partial C_x /
133 | \partial a for the output activations."""
134 | return (output_activations-y)
135 |
136 | #### Miscellaneous functions
137 | def sigmoid(z):
138 | """The sigmoid function."""
139 | return 1.0/(1.0+np.exp(-z))
140 |
141 | def sigmoid_prime(z):
142 | """Derivative of the sigmoid function."""
143 | return sigmoid(z)*(1-sigmoid(z))
144 |
--------------------------------------------------------------------------------
/src/network2.py:
--------------------------------------------------------------------------------
1 | """network2.py
2 | ~~~~~~~~~~~~~~
3 |
4 | An improved version of network.py, implementing the stochastic
5 | gradient descent learning algorithm for a feedforward neural network.
6 | Improvements include the addition of the cross-entropy cost function,
7 | regularization, and better initialization of network weights. Note
8 | that I have focused on making the code simple, easily readable, and
9 | easily modifiable. It is not optimized, and omits many desirable
10 | features.
11 |
12 | """
13 |
14 | #### Libraries
15 | # Standard library
16 | import json
17 | import random
18 | import sys
19 |
20 | # Third-party libraries
21 | import numpy as np
22 |
23 |
24 | #### Define the quadratic and cross-entropy cost functions
25 |
26 | class QuadraticCost(object):
27 |
28 | @staticmethod
29 | def fn(a, y):
30 | """Return the cost associated with an output ``a`` and desired output
31 | ``y``.
32 |
33 | """
34 | return 0.5*np.linalg.norm(a-y)**2
35 |
36 | @staticmethod
37 | def delta(z, a, y):
38 | """Return the error delta from the output layer."""
39 | return (a-y) * sigmoid_prime(z)
40 |
41 |
42 | class CrossEntropyCost(object):
43 |
44 | @staticmethod
45 | def fn(a, y):
46 | """Return the cost associated with an output ``a`` and desired output
47 | ``y``. Note that np.nan_to_num is used to ensure numerical
48 | stability. In particular, if both ``a`` and ``y`` have a 1.0
49 | in the same slot, then the expression (1-y)*np.log(1-a)
50 | returns nan. The np.nan_to_num ensures that that is converted
51 | to the correct value (0.0).
52 |
53 | """
54 | return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))
55 |
56 | @staticmethod
57 | def delta(z, a, y):
58 | """Return the error delta from the output layer. Note that the
59 | parameter ``z`` is not used by the method. It is included in
60 | the method's parameters in order to make the interface
61 | consistent with the delta method for other cost classes.
62 |
63 | """
64 | return (a-y)
65 |
66 |
67 | #### Main Network class
68 | class Network(object):
69 |
70 | def __init__(self, sizes, cost=CrossEntropyCost):
71 | """The list ``sizes`` contains the number of neurons in the respective
72 | layers of the network. For example, if the list was [2, 3, 1]
73 | then it would be a three-layer network, with the first layer
74 | containing 2 neurons, the second layer 3 neurons, and the
75 | third layer 1 neuron. The biases and weights for the network
76 | are initialized randomly, using
77 | ``self.default_weight_initializer`` (see docstring for that
78 | method).
79 |
80 | """
81 | self.num_layers = len(sizes)
82 | self.sizes = sizes
83 | self.default_weight_initializer()
84 | self.cost=cost
85 |
86 | def default_weight_initializer(self):
87 | """Initialize each weight using a Gaussian distribution with mean 0
88 | and standard deviation 1 over the square root of the number of
89 | weights connecting to the same neuron. Initialize the biases
90 | using a Gaussian distribution with mean 0 and standard
91 | deviation 1.
92 |
93 | Note that the first layer is assumed to be an input layer, and
94 | by convention we won't set any biases for those neurons, since
95 | biases are only ever used in computing the outputs from later
96 | layers.
97 |
98 | """
99 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
100 | self.weights = [np.random.randn(y, x)/np.sqrt(x)
101 | for x, y in zip(self.sizes[:-1], self.sizes[1:])]
102 |
103 | def large_weight_initializer(self):
104 | """Initialize the weights using a Gaussian distribution with mean 0
105 | and standard deviation 1. Initialize the biases using a
106 | Gaussian distribution with mean 0 and standard deviation 1.
107 |
108 | Note that the first layer is assumed to be an input layer, and
109 | by convention we won't set any biases for those neurons, since
110 | biases are only ever used in computing the outputs from later
111 | layers.
112 |
113 | This weight and bias initializer uses the same approach as in
114 | Chapter 1, and is included for purposes of comparison. It
115 | will usually be better to use the default weight initializer
116 | instead.
117 |
118 | """
119 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
120 | self.weights = [np.random.randn(y, x)
121 | for x, y in zip(self.sizes[:-1], self.sizes[1:])]
122 |
123 | def feedforward(self, a):
124 | """Return the output of the network if ``a`` is input."""
125 | for b, w in zip(self.biases, self.weights):
126 | a = sigmoid(np.dot(w, a)+b)
127 | return a
128 |
129 | def SGD(self, training_data, epochs, mini_batch_size, eta,
130 | lmbda = 0.0,
131 | evaluation_data=None,
132 | monitor_evaluation_cost=False,
133 | monitor_evaluation_accuracy=False,
134 | monitor_training_cost=False,
135 | monitor_training_accuracy=False):
136 | """Train the neural network using mini-batch stochastic gradient
137 | descent. The ``training_data`` is a list of tuples ``(x, y)``
138 | representing the training inputs and the desired outputs. The
139 | other non-optional parameters are self-explanatory, as is the
140 | regularization parameter ``lmbda``. The method also accepts
141 | ``evaluation_data``, usually either the validation or test
142 | data. We can monitor the cost and accuracy on either the
143 | evaluation data or the training data, by setting the
144 | appropriate flags. The method returns a tuple containing four
145 | lists: the (per-epoch) costs on the evaluation data, the
146 | accuracies on the evaluation data, the costs on the training
147 | data, and the accuracies on the training data. All values are
148 | evaluated at the end of each training epoch. So, for example,
149 | if we train for 30 epochs, then the first element of the tuple
150 | will be a 30-element list containing the cost on the
151 | evaluation data at the end of each epoch. Note that the lists
152 | are empty if the corresponding flag is not set.
153 |
154 | """
155 | if evaluation_data: n_data = len(evaluation_data)
156 | n = len(training_data)
157 | evaluation_cost, evaluation_accuracy = [], []
158 | training_cost, training_accuracy = [], []
159 | for j in xrange(epochs):
160 | random.shuffle(training_data)
161 | mini_batches = [
162 | training_data[k:k+mini_batch_size]
163 | for k in xrange(0, n, mini_batch_size)]
164 | for mini_batch in mini_batches:
165 | self.update_mini_batch(
166 | mini_batch, eta, lmbda, len(training_data))
167 | print "Epoch %s training complete" % j
168 | if monitor_training_cost:
169 | cost = self.total_cost(training_data, lmbda)
170 | training_cost.append(cost)
171 | print "Cost on training data: {}".format(cost)
172 | if monitor_training_accuracy:
173 | accuracy = self.accuracy(training_data, convert=True)
174 | training_accuracy.append(accuracy)
175 | print "Accuracy on training data: {} / {}".format(
176 | accuracy, n)
177 | if monitor_evaluation_cost:
178 | cost = self.total_cost(evaluation_data, lmbda, convert=True)
179 | evaluation_cost.append(cost)
180 | print "Cost on evaluation data: {}".format(cost)
181 | if monitor_evaluation_accuracy:
182 | accuracy = self.accuracy(evaluation_data)
183 | evaluation_accuracy.append(accuracy)
184 | print "Accuracy on evaluation data: {} / {}".format(
185 | self.accuracy(evaluation_data), n_data)
186 | print
187 | return evaluation_cost, evaluation_accuracy, \
188 | training_cost, training_accuracy
189 |
190 | def update_mini_batch(self, mini_batch, eta, lmbda, n):
191 | """Update the network's weights and biases by applying gradient
192 | descent using backpropagation to a single mini batch. The
193 | ``mini_batch`` is a list of tuples ``(x, y)``, ``eta`` is the
194 | learning rate, ``lmbda`` is the regularization parameter, and
195 | ``n`` is the total size of the training data set.
196 |
197 | """
198 | nabla_b = [np.zeros(b.shape) for b in self.biases]
199 | nabla_w = [np.zeros(w.shape) for w in self.weights]
200 | for x, y in mini_batch:
201 | delta_nabla_b, delta_nabla_w = self.backprop(x, y)
202 | nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
203 | nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
204 | self.weights = [(1-eta*(lmbda/n))*w-(eta/len(mini_batch))*nw
205 | for w, nw in zip(self.weights, nabla_w)]
206 | self.biases = [b-(eta/len(mini_batch))*nb
207 | for b, nb in zip(self.biases, nabla_b)]
208 |
209 | def backprop(self, x, y):
210 | """Return a tuple ``(nabla_b, nabla_w)`` representing the
211 | gradient for the cost function C_x. ``nabla_b`` and
212 | ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
213 | to ``self.biases`` and ``self.weights``."""
214 | nabla_b = [np.zeros(b.shape) for b in self.biases]
215 | nabla_w = [np.zeros(w.shape) for w in self.weights]
216 | # feedforward
217 | activation = x
218 | activations = [x] # list to store all the activations, layer by layer
219 | zs = [] # list to store all the z vectors, layer by layer
220 | for b, w in zip(self.biases, self.weights):
221 | z = np.dot(w, activation)+b
222 | zs.append(z)
223 | activation = sigmoid(z)
224 | activations.append(activation)
225 | # backward pass
226 | delta = (self.cost).delta(zs[-1], activations[-1], y)
227 | nabla_b[-1] = delta
228 | nabla_w[-1] = np.dot(delta, activations[-2].transpose())
229 | # Note that the variable l in the loop below is used a little
230 | # differently to the notation in Chapter 2 of the book. Here,
231 | # l = 1 means the last layer of neurons, l = 2 is the
232 | # second-last layer, and so on. It's a renumbering of the
233 | # scheme in the book, used here to take advantage of the fact
234 | # that Python can use negative indices in lists.
235 | for l in xrange(2, self.num_layers):
236 | z = zs[-l]
237 | sp = sigmoid_prime(z)
238 | delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
239 | nabla_b[-l] = delta
240 | nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
241 | return (nabla_b, nabla_w)
242 |
243 | def accuracy(self, data, convert=False):
244 | """Return the number of inputs in ``data`` for which the neural
245 | network outputs the correct result. The neural network's
246 | output is assumed to be the index of whichever neuron in the
247 | final layer has the highest activation.
248 |
249 | The flag ``convert`` should be set to False if the data set is
250 | validation or test data (the usual case), and to True if the
251 | data set is the training data. The need for this flag arises
252 | due to differences in the way the results ``y`` are
253 | represented in the different data sets. In particular, it
254 | flags whether we need to convert between the different
255 | representations. It may seem strange to use different
256 | representations for the different data sets. Why not use the
257 | same representation for all three data sets? It's done for
258 | efficiency reasons -- the program usually evaluates the cost
259 | on the training data and the accuracy on other data sets.
260 | These are different types of computations, and using different
261 | representations speeds things up. More details on the
262 | representations can be found in
263 | mnist_loader.load_data_wrapper.
264 |
265 | """
266 | if convert:
267 | results = [(np.argmax(self.feedforward(x)), np.argmax(y))
268 | for (x, y) in data]
269 | else:
270 | results = [(np.argmax(self.feedforward(x)), y)
271 | for (x, y) in data]
272 | return sum(int(x == y) for (x, y) in results)
273 |
274 | def total_cost(self, data, lmbda, convert=False):
275 | """Return the total cost for the data set ``data``. The flag
276 | ``convert`` should be set to False if the data set is the
277 | training data (the usual case), and to True if the data set is
278 | the validation or test data. See comments on the similar (but
279 | reversed) convention for the ``accuracy`` method, above.
280 | """
281 | cost = 0.0
282 | for x, y in data:
283 | a = self.feedforward(x)
284 | if convert: y = vectorized_result(y)
285 | cost += self.cost.fn(a, y)/len(data)
286 | cost += 0.5*(lmbda/len(data))*sum(
287 | np.linalg.norm(w)**2 for w in self.weights)
288 | return cost
289 |
290 | def save(self, filename):
291 | """Save the neural network to the file ``filename``."""
292 | data = {"sizes": self.sizes,
293 | "weights": [w.tolist() for w in self.weights],
294 | "biases": [b.tolist() for b in self.biases],
295 | "cost": str(self.cost.__name__)}
296 | f = open(filename, "w")
297 | json.dump(data, f)
298 | f.close()
299 |
300 | #### Loading a Network
301 | def load(filename):
302 | """Load a neural network from the file ``filename``. Returns an
303 | instance of Network.
304 |
305 | """
306 | f = open(filename, "r")
307 | data = json.load(f)
308 | f.close()
309 | cost = getattr(sys.modules[__name__], data["cost"])
310 | net = Network(data["sizes"], cost=cost)
311 | net.weights = [np.array(w) for w in data["weights"]]
312 | net.biases = [np.array(b) for b in data["biases"]]
313 | return net
314 |
315 | #### Miscellaneous functions
316 | def vectorized_result(j):
317 | """Return a 10-dimensional unit vector with a 1.0 in the j'th position
318 | and zeroes elsewhere. This is used to convert a digit (0...9)
319 | into a corresponding desired output from the neural network.
320 |
321 | """
322 | e = np.zeros((10, 1))
323 | e[j] = 1.0
324 | return e
325 |
326 | def sigmoid(z):
327 | """The sigmoid function."""
328 | return 1.0/(1.0+np.exp(-z))
329 |
330 | def sigmoid_prime(z):
331 | """Derivative of the sigmoid function."""
332 | return sigmoid(z)*(1-sigmoid(z))
333 |
--------------------------------------------------------------------------------
/src/network3.py:
--------------------------------------------------------------------------------
1 | """network3.py
2 | ~~~~~~~~~~~~~~
3 |
4 | A Theano-based program for training and running simple neural
5 | networks.
6 |
7 | Supports several layer types (fully connected, convolutional, max
8 | pooling, softmax), and activation functions (sigmoid, tanh, and
9 | rectified linear units, with more easily added).
10 |
11 | When run on a CPU, this program is much faster than network.py and
12 | network2.py. However, unlike network.py and network2.py it can also
13 | be run on a GPU, which makes it faster still.
14 |
15 | Because the code is based on Theano, the code is different in many
16 | ways from network.py and network2.py. However, where possible I have
17 | tried to maintain consistency with the earlier programs. In
18 | particular, the API is similar to network2.py. Note that I have
19 | focused on making the code simple, easily readable, and easily
20 | modifiable. It is not optimized, and omits many desirable features.
21 |
22 | This program incorporates ideas from the Theano documentation on
23 | convolutional neural nets (notably,
24 | http://deeplearning.net/tutorial/lenet.html ), from Misha Denil's
25 | implementation of dropout (https://github.com/mdenil/dropout ), and
26 | from Chris Olah (http://colah.github.io ).
27 |
28 | """
29 |
30 | #### Libraries
31 | # Standard library
32 | import cPickle
33 | import gzip
34 |
35 | # Third-party libraries
36 | import numpy as np
37 | import theano
38 | import theano.tensor as T
39 | from theano.tensor.nnet import conv
40 | from theano.tensor.nnet import softmax
41 | from theano.tensor import shared_randomstreams
42 | from theano.tensor.signal import downsample
43 |
44 | # Activation functions for neurons
45 | def linear(z): return z
46 | def ReLU(z): return T.maximum(0.0, z)
47 | from theano.tensor.nnet import sigmoid
48 | from theano.tensor import tanh
49 |
50 |
51 | #### Constants
52 | GPU = True
53 | if GPU:
54 | print "Trying to run under a GPU. If this is not desired, then modify "+\
55 | "network3.py\nto set the GPU flag to False."
56 | try: theano.config.device = 'gpu'
57 | except: pass # it's already set
58 | theano.config.floatX = 'float32'
59 | else:
60 | print "Running with a CPU. If this is not desired, then the modify "+\
61 | "network3.py to set\nthe GPU flag to True."
62 |
63 | #### Load the MNIST data
64 | def load_data_shared(filename="../data/mnist.pkl.gz"):
65 | f = gzip.open(filename, 'rb')
66 | training_data, validation_data, test_data = cPickle.load(f)
67 | f.close()
68 | def shared(data):
69 | """Place the data into shared variables. This allows Theano to copy
70 | the data to the GPU, if one is available.
71 |
72 | """
73 | shared_x = theano.shared(
74 | np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
75 | shared_y = theano.shared(
76 | np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
77 | return shared_x, T.cast(shared_y, "int32")
78 | return [shared(training_data), shared(validation_data), shared(test_data)]
79 |
80 | #### Main class used to construct and train networks
81 | class Network(object):
82 |
83 | def __init__(self, layers, mini_batch_size):
84 | """Takes a list of `layers`, describing the network architecture, and
85 | a value for the `mini_batch_size` to be used during training
86 | by stochastic gradient descent.
87 |
88 | """
89 | self.layers = layers
90 | self.mini_batch_size = mini_batch_size
91 | self.params = [param for layer in self.layers for param in layer.params]
92 | self.x = T.matrix("x")
93 | self.y = T.ivector("y")
94 | init_layer = self.layers[0]
95 | init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
96 | for j in xrange(1, len(self.layers)):
97 | prev_layer, layer = self.layers[j-1], self.layers[j]
98 | layer.set_inpt(
99 | prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
100 | self.output = self.layers[-1].output
101 | self.output_dropout = self.layers[-1].output_dropout
102 |
103 | def SGD(self, training_data, epochs, mini_batch_size, eta,
104 | validation_data, test_data, lmbda=0.0):
105 | """Train the network using mini-batch stochastic gradient descent."""
106 | training_x, training_y = training_data
107 | validation_x, validation_y = validation_data
108 | test_x, test_y = test_data
109 |
110 | # compute number of minibatches for training, validation and testing
111 | num_training_batches = size(training_data)/mini_batch_size
112 | num_validation_batches = size(validation_data)/mini_batch_size
113 | num_test_batches = size(test_data)/mini_batch_size
114 |
115 | # define the (regularized) cost function, symbolic gradients, and updates
116 | l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
117 | cost = self.layers[-1].cost(self)+\
118 | 0.5*lmbda*l2_norm_squared/num_training_batches
119 | grads = T.grad(cost, self.params)
120 | updates = [(param, param-eta*grad)
121 | for param, grad in zip(self.params, grads)]
122 |
123 | # define functions to train a mini-batch, and to compute the
124 | # accuracy in validation and test mini-batches.
125 | i = T.lscalar() # mini-batch index
126 | train_mb = theano.function(
127 | [i], cost, updates=updates,
128 | givens={
129 | self.x:
130 | training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
131 | self.y:
132 | training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
133 | })
134 | validate_mb_accuracy = theano.function(
135 | [i], self.layers[-1].accuracy(self.y),
136 | givens={
137 | self.x:
138 | validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
139 | self.y:
140 | validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
141 | })
142 | test_mb_accuracy = theano.function(
143 | [i], self.layers[-1].accuracy(self.y),
144 | givens={
145 | self.x:
146 | test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
147 | self.y:
148 | test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
149 | })
150 | self.test_mb_predictions = theano.function(
151 | [i], self.layers[-1].y_out,
152 | givens={
153 | self.x:
154 | test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
155 | })
156 | # Do the actual training
157 | best_validation_accuracy = 0.0
158 | for epoch in xrange(epochs):
159 | for minibatch_index in xrange(num_training_batches):
160 | iteration = num_training_batches*epoch+minibatch_index
161 | if iteration % 1000 == 0:
162 | print("Training mini-batch number {0}".format(iteration))
163 | cost_ij = train_mb(minibatch_index)
164 | if (iteration+1) % num_training_batches == 0:
165 | validation_accuracy = np.mean(
166 | [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
167 | print("Epoch {0}: validation accuracy {1:.2%}".format(
168 | epoch, validation_accuracy))
169 | if validation_accuracy >= best_validation_accuracy:
170 | print("This is the best validation accuracy to date.")
171 | best_validation_accuracy = validation_accuracy
172 | best_iteration = iteration
173 | if test_data:
174 | test_accuracy = np.mean(
175 | [test_mb_accuracy(j) for j in xrange(num_test_batches)])
176 | print('The corresponding test accuracy is {0:.2%}'.format(
177 | test_accuracy))
178 | print("Finished training network.")
179 | print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(
180 | best_validation_accuracy, best_iteration))
181 | print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))
182 |
183 | #### Define layer types
184 |
185 | class ConvPoolLayer(object):
186 | """Used to create a combination of a convolutional and a max-pooling
187 | layer. A more sophisticated implementation would separate the
188 | two, but for our purposes we'll always use them together, and it
189 | simplifies the code, so it makes sense to combine them.
190 |
191 | """
192 |
193 | def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
194 | activation_fn=sigmoid):
195 | """`filter_shape` is a tuple of length 4, whose entries are the number
196 | of filters, the number of input feature maps, the filter height, and the
197 | filter width.
198 |
199 | `image_shape` is a tuple of length 4, whose entries are the
200 | mini-batch size, the number of input feature maps, the image
201 | height, and the image width.
202 |
203 | `poolsize` is a tuple of length 2, whose entries are the y and
204 | x pooling sizes.
205 |
206 | """
207 | self.filter_shape = filter_shape
208 | self.image_shape = image_shape
209 | self.poolsize = poolsize
210 | self.activation_fn=activation_fn
211 | # initialize weights and biases
212 | n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
213 | self.w = theano.shared(
214 | np.asarray(
215 | np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
216 | dtype=theano.config.floatX),
217 | borrow=True)
218 | self.b = theano.shared(
219 | np.asarray(
220 | np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
221 | dtype=theano.config.floatX),
222 | borrow=True)
223 | self.params = [self.w, self.b]
224 |
225 | def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
226 | self.inpt = inpt.reshape(self.image_shape)
227 | conv_out = conv.conv2d(
228 | input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
229 | image_shape=self.image_shape)
230 | pooled_out = downsample.max_pool_2d(
231 | input=conv_out, ds=self.poolsize, ignore_border=True)
232 | self.output = self.activation_fn(
233 | pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
234 | self.output_dropout = self.output # no dropout in the convolutional layers
235 |
236 | class FullyConnectedLayer(object):
237 |
238 | def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
239 | self.n_in = n_in
240 | self.n_out = n_out
241 | self.activation_fn = activation_fn
242 | self.p_dropout = p_dropout
243 | # Initialize weights and biases
244 | self.w = theano.shared(
245 | np.asarray(
246 | np.random.normal(
247 | loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
248 | dtype=theano.config.floatX),
249 | name='w', borrow=True)
250 | self.b = theano.shared(
251 | np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
252 | dtype=theano.config.floatX),
253 | name='b', borrow=True)
254 | self.params = [self.w, self.b]
255 |
256 | def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
257 | self.inpt = inpt.reshape((mini_batch_size, self.n_in))
258 | self.output = self.activation_fn(
259 | (1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
260 | self.y_out = T.argmax(self.output, axis=1)
261 | self.inpt_dropout = dropout_layer(
262 | inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
263 | self.output_dropout = self.activation_fn(
264 | T.dot(self.inpt_dropout, self.w) + self.b)
265 |
266 | def accuracy(self, y):
267 | "Return the accuracy for the mini-batch."
268 | return T.mean(T.eq(y, self.y_out))
269 |
270 | class SoftmaxLayer(object):
271 |
272 | def __init__(self, n_in, n_out, p_dropout=0.0):
273 | self.n_in = n_in
274 | self.n_out = n_out
275 | self.p_dropout = p_dropout
276 | # Initialize weights and biases
277 | self.w = theano.shared(
278 | np.zeros((n_in, n_out), dtype=theano.config.floatX),
279 | name='w', borrow=True)
280 | self.b = theano.shared(
281 | np.zeros((n_out,), dtype=theano.config.floatX),
282 | name='b', borrow=True)
283 | self.params = [self.w, self.b]
284 |
285 | def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
286 | self.inpt = inpt.reshape((mini_batch_size, self.n_in))
287 | self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
288 | self.y_out = T.argmax(self.output, axis=1)
289 | self.inpt_dropout = dropout_layer(
290 | inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
291 | self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
292 |
293 | def cost(self, net):
294 | "Return the log-likelihood cost."
295 | return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])
296 |
297 | def accuracy(self, y):
298 | "Return the accuracy for the mini-batch."
299 | return T.mean(T.eq(y, self.y_out))
300 |
301 |
302 | #### Miscellanea
303 | def size(data):
304 | "Return the size of the dataset `data`."
305 | return data[0].get_value(borrow=True).shape[0]
306 |
307 | def dropout_layer(layer, p_dropout):
308 | srng = shared_randomstreams.RandomStreams(
309 | np.random.RandomState(0).randint(999999))
310 | mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
311 | return layer*T.cast(mask, theano.config.floatX)
312 |
--------------------------------------------------------------------------------
/src/old/blog/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/blog/__init__.py
--------------------------------------------------------------------------------
/src/old/blog/common_knowledge.py:
--------------------------------------------------------------------------------
1 | """
2 | common_knowledge
3 | ~~~~~~~~~~~~~~~~
4 |
5 | Try to determine whether or not it's possible to relate the
6 | descriptions given by two different autoencoders.
7 |
8 | """
9 |
10 | #### Libraries
11 | # My libraries
12 | from backprop2 import Network, sigmoid_vec
13 | import mnist_loader
14 |
15 | # Third-party libraries
16 | import matplotlib
17 | import matplotlib.pyplot as plt
18 | import numpy as np
19 |
20 |
21 | #### Parameters
22 | # Size of the training sets. May range from 1000 to 12,500. Lower
23 | # will be faster, higher will give more accuracy.
24 | SIZE = 5000
25 | # Number of hidden units in the autoencoder
26 | HIDDEN = 30
27 |
28 | print "\nGenerating training data"
29 | training_data, _, _ = mnist_loader.load_data_nn()
30 | td_1 = [(x, x) for x, _ in training_data[0:SIZE]]
31 | td_2 = [(x, x) for x, _ in training_data[12500:12500+SIZE]]
32 | td_3 = [x for x, _ in training_data[25000:25000+SIZE]]
33 | test = [x for x, _ in training_data[37500:37500+SIZE]]
34 |
35 | print "\nFinding first autoencoder"
36 | ae_1 = Network([784, HIDDEN, 784])
37 | ae_1.SGD(td_1, 4, 10, 0.01, 0.05)
38 |
39 | print "\nFinding second autoencoder"
40 | ae_2 = Network([784, HIDDEN, 784])
41 | ae_2.SGD(td_1, 4, 10, 0.01, 0.05)
42 |
43 | print "\nGenerating encoded training data"
44 | encoded_td_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0])
45 | for x in td_3]
46 | encoded_td_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0])
47 | for x in td_3]
48 | encoded_training_data = zip(encoded_td_1, encoded_td_2)
49 |
50 | print "\nFinding mapping between theories"
51 | net = Network([HIDDEN, HIDDEN])
52 | net.SGD(encoded_training_data, 6, 10, 0.01, 0.05)
53 |
54 | print """\nBaseline for comparison: decompress with the first autoencoder"""
55 | print """and compress with the second autoencoder"""
56 | encoded_test_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0])
57 | for x in test]
58 | encoded_test_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0])
59 | for x in test]
60 | test_data = zip(encoded_test_1, encoded_test_2)
61 | net_baseline = Network([HIDDEN, 784, HIDDEN])
62 | net_baseline.biases[0] = ae_1.biases[1]
63 | net_baseline.weights[0] = ae_1.weights[1]
64 | net_baseline.biases[1] = ae_2.biases[0]
65 | net_baseline.weights[1] = ae_2.weights[0]
66 | error_baseline = sum(np.linalg.norm(net_baseline.feedforward(x)-y, 1)
67 | for (x, y) in test_data)
68 | print "Baseline average l1 error per training image: %s" % (error_baseline / SIZE,)
69 |
70 | print "\nComparing theories with a simple interconversion"
71 | print "Mean desired output activation: %s" % (
72 | sum(y.mean() for _, y in test_data) / SIZE,)
73 | error = sum(np.linalg.norm(net.feedforward(x)-y, 1) for (x, y) in test_data)
74 | print "Average l1 error per training image: %s" % (error / SIZE,)
75 |
76 | print "\nComputing fiducial image inputs"
77 | fiducial_images_1 = [
78 | ae_1.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:])
79 | for j in range(HIDDEN)]
80 | fiducial_images_2 = [
81 | ae_2.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:])
82 | for j in range(HIDDEN)]
83 | image = np.concatenate([np.concatenate(fiducial_images_1, axis=1),
84 | np.concatenate(fiducial_images_2, axis=1)])
85 | fig = plt.figure()
86 | ax = fig.add_subplot(111)
87 | ax.matshow(image, cmap = matplotlib.cm.binary)
88 | plt.xticks(np.array([]))
89 | plt.yticks(np.array([]))
90 | plt.show()
91 |
--------------------------------------------------------------------------------
/src/old/cost_vs_iterations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/cost_vs_iterations.png
--------------------------------------------------------------------------------
/src/old/cost_vs_iterations_trapped.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/cost_vs_iterations_trapped.png
--------------------------------------------------------------------------------
/src/old/deep_autoencoder.py:
--------------------------------------------------------------------------------
1 | """
2 | deep_autoencoder
3 | ~~~~~~~~~~~~~~~~
4 |
5 | A module which implements deep autoencoders.
6 | """
7 |
8 | #### Libraries
9 | # Standard library
10 | import random
11 |
12 | # My libraries
13 | from backprop2 import Network, sigmoid_vec
14 |
15 | # Third-party libraries
16 | import numpy as np
17 |
18 |
19 | def plot_helper(x):
20 | import matplotlib
21 | import matplotlib.pyplot as plt
22 | x = np.reshape(x, (-1, 28))
23 | fig = plt.figure()
24 | ax = fig.add_subplot(1, 1, 1)
25 | ax.matshow(x, cmap = matplotlib.cm.binary)
26 | plt.xticks(np.array([]))
27 | plt.yticks(np.array([]))
28 | plt.show()
29 |
30 |
31 | class DeepAutoencoder(Network):
32 |
33 | def __init__(self, layers):
34 | """
35 | The list ``layers`` specifies the sizes of the nested
36 | autoencoders. For example, if ``layers`` is [50, 20, 10] then
37 | the deep autoencoder will be a neural network with layers of
38 | size [50, 20, 10, 20, 50]."""
39 | self.layers = layers
40 | Network.__init__(self, layers+layers[-2::-1])
41 |
42 | def train(self, training_data, epochs, mini_batch_size, eta,
43 | lmbda):
44 | """
45 | Train the DeepAutoencoder. The ``training_data`` is a list of
46 | training inputs, ``x``, ``mini_batch_size`` is a single
47 | positive integer, and ``epochs``, ``eta``, ``lmbda`` are lists
48 | of parameters, with the different list members corresponding
49 | to the different stages of training. For example, ``eta[0]``
50 | is the learning rate used for the first nested autoencoder,
51 | ``eta[1]`` is the learning rate for the second nested
52 | autoencoder, and so on. ``eta[-1]`` is the learning rate used
53 | for the final stage of fine-tuning.
54 | """
55 | print "\nTraining a %s deep autoencoder" % (
56 | "-".join([str(j) for j in self.sizes]),)
57 | training_data = double(training_data)
58 | cur_training_data = training_data[::]
59 | for j in range(len(self.layers)-1):
60 | print "\nTraining the %s-%s-%s nested autoencoder" % (
61 | self.layers[j], self.layers[j+1], self.layers[j])
62 | print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % (
63 | epochs[j], mini_batch_size, eta[j], lmbda[j])
64 | self.train_nested_autoencoder(
65 | j, cur_training_data, epochs[j], mini_batch_size, eta[j],
66 | lmbda[j])
67 | cur_training_data = [
68 | (sigmoid_vec(np.dot(net.weights[0], x)+net.biases[0]),)*2
69 | for (x, _) in cur_training_data]
70 | print "\nFine-tuning network weights with backpropagation"
71 | print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % (
72 | epochs[-1], mini_batch_size, eta[-1], lmbda[-1])
73 | self.SGD(training_data, epochs[-1], mini_batch_size, eta[-1],
74 | lmbda[-1])
75 |
76 | def train_nested_autoencoder(
77 | self, j, encoded_training_data, epochs, mini_batch_size, eta, lmbda):
78 | """
79 | Train the nested autoencoder that starts at layer ``j`` in the
80 | deep autoencoder. Note that ``encoded_training_data`` is a
81 | list with entries of the form ``(x, x)``, where the ``x`` are
82 | encoded training inputs for layer ``j``."""
83 | net = Network([self.layers[j], self.layers[j+1], self.layers[j]])
84 | net.biases[0] = self.biases[j]
85 | net.biases[1] = self.biases[-j-1]
86 | net.weights[0] = self.weights[j]
87 | net.weights[1] = self.weights[-j-1]
88 | net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda)
89 | self.biases[j] = net.biases[0]
90 | self.biases[-j-1] = net.biases[1]
91 | self.weights[j] = net.weights[0]
92 | self.weights[-j-1] = net.weights[1]
93 |
94 | def train_nested_autoencoder_repl(
95 | self, j, training_data, epochs, mini_batch_size, eta, lmbda):
96 | """
97 | This is a convenience method that can be used from the REPL to
98 | train the nested autoencoder that starts at level ``j`` in the
99 | deep autoencoder. Note that ``training_data`` is the input
100 | data for the first layer of the network, and is a list of
101 | entries ``x``."""
102 | self.train_nested_autoencoder(
103 | j,
104 | double(
105 | [self.feedforward(x, start=0, end=j) for x in training_data]),
106 | epochs, mini_batch_size, eta, lmbda)
107 |
108 | def feature(self, j, k):
109 | """
110 | Return the output if neuron number ``k`` in layer ``j`` is
111 | activated, and all others are not active. """
112 | a = np.zeros((self.sizes[j], 1))
113 | a[k] = 1.0
114 | return self.feedforward(a, start=j, end=self.num_layers)
115 |
116 | def double(l):
117 | return [(x, x) for x in l]
118 |
119 |
--------------------------------------------------------------------------------
/src/old/deep_learning.py:
--------------------------------------------------------------------------------
1 | """
2 | deep_learning
3 | ~~~~~~~~~~~~~
4 |
5 | Module to do deep learning. Most of the functionality needed is
6 | already in the ``backprop2`` and ``deep_autoencoder`` modules, but
7 | this adds convenience functions to help in doing things like unrolling
8 | deep autoencoders, and adding and training a classifier layer."""
9 |
10 | # My Libraries
11 | from backprop2 import Network
12 | from deep_autoencoder import DeepAutoencoder
13 |
14 | def unroll(deep_autoencoder):
15 | """
16 | Return a Network that contains the compression stage of the
17 | ``deep_autoencoder``."""
18 | net = Network(deep_autoencoder.layers)
19 | net.weights = deep_autoencoder.weights[:len(deep_autoencoder.layers)-1]
20 | net.biases = deep_autoencoder.biases[:len(deep_autoencoder.layers)-1]
21 | return net
22 |
23 | def add_classifier_layer(net, num_outputs):
24 | """
25 | Return the Network ``net``, but with an extra layer containing
26 | ``num_outputs`` neurons appended."""
27 | net_classifier = Network(net.sizes+[num_outputs])
28 | net_classifier.weights[:-1] = net.weights
29 | net_classifier.biases[:-1] = net.biases
30 | return net_classifier
31 |
32 | def SGD_final_layer(
33 | self, training_data, epochs, mini_batch_size, eta, lmbda):
34 | """
35 | Run SGD on the final layer of the Network ``self``. Note that
36 | ``training_data`` is the input to the whole Network, not the
37 | encoded training data input to the final layer.
38 | """
39 | encoded_training_data = [
40 | (self.feedforward(x, start=0, end=self.num_layers-2), y)
41 | for x, y in training_data]
42 | net = Network(self.sizes[-2:])
43 | net.biases[0] = self.biases[-1]
44 | net.weights[0] = self.weights[-1]
45 | net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda)
46 | self.biases[-1] = net.biases[0]
47 | self.weights[-1] = net.weights[0]
48 |
49 |
50 | # Add the SGD_final_layer method to the Network class
51 | Network.SGD_final_layer = SGD_final_layer
52 |
--------------------------------------------------------------------------------
/src/old/gradient_descent_hack.py:
--------------------------------------------------------------------------------
1 | """
2 | gradient_descent_hack
3 | ~~~~~~~~~~~~~~~~~~~~~
4 |
5 | This program uses gradient descent to learn weights and biases for a
6 | three-neuron network to compute the XOR function. The program is a
7 | quick-and-dirty hack meant to illustrate the basic ideas of gradient
8 | descent, not a cleanly-designed and generalizable implementation."""
9 |
10 | #### Libraries
11 | # Third-party libraries
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 |
15 | def sigmoid(z):
16 | return 1.0/(1.0+np.exp(-z))
17 |
18 | def neuron(w, x):
19 | """ Return the output from the sigmoid neuron with weights ``w``
20 | and inputs ``x``. Both are numpy arrays, with three and two
21 | elements, respectively. The first input weight is the bias."""
22 | return sigmoid(w[0]+np.inner(w[1:], x))
23 |
24 | def h(w, x):
25 | """ Return the output from the three-neuron network with weights
26 | ``w`` and inputs ``x``. Note that ``w`` is a numpy array with
27 | nine elements, consisting of three weights for each neuron (the
28 | bias plus two input weights). ``x`` is a numpy array with just
29 | two elements."""
30 | neuron1_out = neuron(w[0:3], x) # top left neuron
31 | neuron2_out = neuron(w[3:6], x) # bottom left neuron
32 | return neuron(w[6:9], np.array([neuron1_out, neuron2_out]))
33 |
34 | # inputs and corresponding outputs for the function we're computing (XOR)
35 | INPUTS = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]
36 | OUTPUTS = [0.0, 1.0, 1.0, 0.0]
37 |
38 | def cost(w):
39 | """ Return the cost when the neural network has weights ``w``.
40 | The cost is computed with respect to the XOR function."""
41 | return 0.5 * sum((y-h(w, np.array(x)))**2 for x, y in zip(INPUTS, OUTPUTS))
42 |
43 | def partial(f, k, w):
44 | """ Return the partial derivative of the function ``f`` with
45 | respect to the ``k``th variable, at location ``w``. Note that
46 | ``f`` must take a numpy array as input, and the partial derivative
47 | is evaluated with respect to the ``k``th element in that array.
48 | Similarly, ``w`` is a numpy array which can be used as input to
49 | ``f``."""
50 | w_plus, w_minus = w.copy(), w.copy()
51 | w_plus[k] += 0.01 # using epsilon = 0.01
52 | w_minus[k] += -0.01
53 | return (f(w_plus)-f(w_minus))/0.02
54 |
55 | def gradient_descent(cost, eta, n):
56 | """ Perform ``n`` iterations of the gradient descent algorithm to
57 | minimize the ``cost`` function, with a learning rate ``eta``.
58 | Return a tuple whose first entry is an array containing the final
59 | weights, and whose second entry is a list of the values the
60 | ``cost`` function took at different iterations."""
61 | w = np.random.uniform(-1, 1, 9) # initialize weights randomly
62 | costs = []
63 | for j in xrange(n):
64 | c = cost(w)
65 | print "Current cost: {0:.3f}".format(c)
66 | costs.append(c)
67 | gradient = [partial(cost, k, w) for k in xrange(9)]
68 | w = np.array([wt-eta*d for wt, d in zip(w, gradient)])
69 | return w, costs
70 |
71 | def main():
72 | """ Perform gradient descent to find weights for a sigmoid neural
73 | network to compute XOR. 10,000 iterations are used. Outputs the
74 | final value of the cost function, the final weights, and plots a
75 | graph of cost as a function of iteration."""
76 | w, costs = gradient_descent(cost, 0.1, 10000)
77 | print "\nFinal cost: {0:.3f}".format(cost(w))
78 | print "\nFinal weights: %s" % w
79 | plt.plot(np.array(costs))
80 | plt.xlabel('iteration')
81 | plt.ylabel('cost')
82 | plt.title('How cost decreases with the number of iterations')
83 | plt.show()
84 |
85 | if __name__ == "__main__":
86 | main()
87 |
--------------------------------------------------------------------------------
/src/old/mnist_100_30_deep_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_100_30_deep_autoencoder.png
--------------------------------------------------------------------------------
/src/old/mnist_100_unit_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_100_unit_autoencoder.png
--------------------------------------------------------------------------------
/src/old/mnist_10_unit_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_10_unit_autoencoder.png
--------------------------------------------------------------------------------
/src/old/mnist_30_component_pca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_30_component_pca.png
--------------------------------------------------------------------------------
/src/old/mnist_30_unit_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_30_unit_autoencoder.png
--------------------------------------------------------------------------------
/src/old/mnist_autoencoder.py:
--------------------------------------------------------------------------------
1 | """
2 | mnist_autoencoder
3 | ~~~~~~~~~~~~~~~~~
4 |
5 | Implements an autoencoder for the MNIST data. The program can do two
6 | things: (1) plot the autoencoder's output for the first ten images in
7 | the MNIST test set; and (2) use the autoencoder to build a classifier.
8 | The program is a quick-and-dirty hack --- we'll do things in a more
9 | systematic way in the module ``deep_autoencoder``.
10 | """
11 |
12 | # My Libraries
13 | from backprop2 import Network
14 | import mnist_loader
15 |
16 | # Third-party libraries
17 | import matplotlib
18 | import matplotlib.pyplot as plt
19 | import numpy as np
20 |
21 | def autoencoder_results(hidden_units):
22 | """
23 | Train an autoencoder using the MNIST training data and plot the
24 | results when the first ten MNIST test images are passed through
25 | the autoencoder.
26 | """
27 | training_data, test_inputs, actual_test_results = \
28 | mnist_loader.load_data_nn()
29 | net = train_autoencoder(hidden_units, training_data)
30 | plot_test_results(net, test_inputs)
31 |
32 | def train_autoencoder(hidden_units, training_data):
33 | "Return a trained autoencoder."
34 | autoencoder_training_data = [(x, x) for x, _ in training_data]
35 | net = Network([784, hidden_units, 784])
36 | net.SGD(autoencoder_training_data, 6, 10, 0.01, 0.05)
37 | return net
38 |
39 | def plot_test_results(net, test_inputs):
40 | """
41 | Plot the results after passing the first ten test MNIST digits through
42 | the autoencoder ``net``."""
43 | fig = plt.figure()
44 | ax = fig.add_subplot(111)
45 | images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)]
46 | images_out = [net.feedforward(test_inputs[j]).reshape(-1, 28)
47 | for j in range(10)]
48 | image_in = np.concatenate(images_in, axis=1)
49 | image_out = np.concatenate(images_out, axis=1)
50 | image = np.concatenate([image_in, image_out])
51 | ax.matshow(image, cmap = matplotlib.cm.binary)
52 | plt.xticks(np.array([]))
53 | plt.yticks(np.array([]))
54 | plt.show()
55 |
56 | def classifier(hidden_units, n_unlabeled_inputs, n_labeled_inputs):
57 | """
58 | Train a semi-supervised classifier. We begin with pretraining,
59 | creating an autoencoder which uses ``n_unlabeled_inputs`` from the
60 | MNIST training data. This is then converted into a classifier
61 | which is fine-tuned using the ``n_labeled_inputs``.
62 |
63 | For comparison a classifier is also created which does not make
64 | use of the unlabeled data.
65 | """
66 | training_data, test_inputs, actual_test_results = \
67 | mnist_loader.load_data_nn()
68 | print "\nUsing pretraining and %s items of unlabeled data" %\
69 | n_unlabeled_inputs
70 | net_ae = train_autoencoder(hidden_units, training_data[:n_unlabeled_inputs])
71 | net_c = Network([784, hidden_units, 10])
72 | net_c.biases = net_ae.biases[:1]+[np.random.randn(10, 1)/np.sqrt(10)]
73 | net_c.weights = net_ae.weights[:1]+\
74 | [np.random.randn(10, hidden_units)/np.sqrt(10)]
75 | net_c.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05)
76 | print "Result on test data: %s / %s" % (
77 | net_c.evaluate(test_inputs, actual_test_results), len(test_inputs))
78 | print "Training a network with %s items of training data" % n_labeled_inputs
79 | net = Network([784, hidden_units, 10])
80 | net.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05)
81 | print "Result on test data: %s / %s" % (
82 | net.evaluate(test_inputs, actual_test_results), len(test_inputs))
83 | return net_c
84 |
--------------------------------------------------------------------------------
/src/old/mnist_pca.py:
--------------------------------------------------------------------------------
1 | """
2 | mnist_pca
3 | ~~~~~~~~~
4 |
5 | Use PCA to reconstruct some of the MNIST test digits.
6 | """
7 |
8 | # My libraries
9 | import mnist_loader
10 |
11 | # Third-party libraries
12 | import matplotlib
13 | import matplotlib.pyplot as plt
14 | import numpy as np
15 | from sklearn.decomposition import RandomizedPCA
16 |
17 |
18 | # Training
19 | training_data, test_inputs, actual_test_results = mnist_loader.load_data_nn()
20 | pca = RandomizedPCA(n_components=30)
21 | nn_images = [x for (x, y) in training_data]
22 | pca_images = np.concatenate(nn_images, axis=1).transpose()
23 | pca_r = pca.fit(pca_images)
24 |
25 | # Try PCA on first ten test images
26 | test_images = np.array(test_inputs[:10]).reshape((10,784))
27 | test_outputs = pca_r.inverse_transform(pca_r.transform(test_images))
28 |
29 | # Plot the first ten test images and the corresponding outputs
30 | fig = plt.figure()
31 | ax = fig.add_subplot(111)
32 | images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)]
33 | images_out = [test_outputs[j].reshape(-1, 28) for j in range(10)]
34 | image_in = np.concatenate(images_in, axis=1)
35 | image_out = np.concatenate(images_out, axis=1)
36 | image = np.concatenate([image_in, image_out])
37 | ax.matshow(image, cmap = matplotlib.cm.binary)
38 | plt.xticks(np.array([]))
39 | plt.yticks(np.array([]))
40 | plt.show()
41 |
--------------------------------------------------------------------------------
/src/old/perceptron_learning.py:
--------------------------------------------------------------------------------
1 | """
2 | perceptron_learning
3 | ~~~~~~~~~~~~~~~~~~~
4 |
5 | Demonstrates how a perceptron can learn the NAND gate, using the
6 | perceptron learning algorithm."""
7 |
8 | #### Libraries
9 | # Third-party library
10 | import numpy as np
11 |
12 | class Perceptron(object):
13 | """ A Perceptron instance can take a function and attempt to
14 | ``learn`` a bias and set of weights that compute that function,
15 | using the perceptron learning algorithm."""
16 |
17 | def __init__(self, num_inputs=2):
18 | """ Initialize the perceptron with the bias and all weights
19 | set to 0.0. ``num_inputs`` is the number of input bits to the
20 | perceptron."""
21 | self.num_inputs = num_inputs
22 | self.bias = 0.0
23 | self.weights = np.zeros(num_inputs)
24 | # self.inputs is a convenience attribute. It's a list containing
25 | # all possible binary inputs to the perceptron. E.g., for three
26 | # inputs it is: [np.array([0, 0, 0]), np.array([0, 0, 1]), ...]
27 | self.inputs = [np.array([int(y)
28 | for y in bin(x).lstrip("0b").zfill(num_inputs)])
29 | for x in xrange(2**num_inputs)]
30 |
31 | def output(self, x):
32 | """ Return the output (0 or 1) from the perceptron, with input
33 | ``x``."""
34 | return 1 if np.inner(self.weights, x)+self.bias > 0 else 0
35 |
36 | def learn(self, f, eta=0.1):
37 | """ Find a bias and a set of weights for a perceptron that
38 | computes the function ``f``. ``eta`` is the learning rate, and
39 | should be a small positive number. Does not terminate when
40 | the function cannot be computed using a perceptron."""
41 | # initialize the bias and weights with random values
42 | self.bias = np.random.normal()
43 | self.weights = np.random.randn(self.num_inputs)
44 | number_of_errors = -1
45 | while number_of_errors != 0:
46 | number_of_errors = 0
47 | print "Beginning iteration"
48 | print "Bias: {:.3f}".format(self.bias)
49 | print "Weights:", ", ".join(
50 | "{:.3f}".format(wt) for wt in self.weights)
51 | for x in self.inputs:
52 | error = f(x)-self.output(x)
53 | if error:
54 | number_of_errors += 1
55 | self.bias = self.bias+eta*error
56 | self.weights = self.weights+eta*error*x
57 | print "Number of errors:", number_of_errors, "\n"
58 |
59 | def f(x):
60 | """ Target function for the perceptron learning algorithm. I've
61 | chosen the NAND gate, but any function is okay, with the caveat
62 | that the algorithm won't terminate if ``f`` cannot be computed by
63 | a perceptron."""
64 | return int(not (x[0] and x[1]))
65 |
66 | if __name__ == "__main__":
67 | Perceptron(2).learn(f, 0.1)
68 |
--------------------------------------------------------------------------------
/src/test_mnist.py:
--------------------------------------------------------------------------------
1 | import network
2 | import mnist_loader
3 |
4 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper();
5 |
6 | net = network.Network([784, 100, 10]);
7 | net.SGD(training_data, 30, 10, 0.1, test_data=test_data);
--------------------------------------------------------------------------------