├── .gitignore
├── .idea
    ├── misc.xml
    ├── modules.xml
    ├── neural-networks-and-deep-learning.iml
    └── vcs.xml
├── README.md
├── data
    └── mnist.pkl.gz
├── fig
    ├── backprop_magnitude_nabla.png
    ├── backprop_magnitude_nabla.py
    ├── data_1000.json
    ├── digits.png
    ├── digits_separate.png
    ├── false_minima.png
    ├── false_minima.py
    ├── generate_gradient.py
    ├── initial_gradient.json
    ├── misleading_gradient.png
    ├── misleading_gradient.py
    ├── misleading_gradient_contours.png
    ├── misleading_gradient_contours.py
    ├── mnist.py
    ├── mnist_100_digits.png
    ├── mnist_2_and_1.png
    ├── mnist_complete_zero.png
    ├── mnist_first_digit.png
    ├── mnist_other_features.png
    ├── mnist_really_bad_images.png
    ├── mnist_top_left_feature.png
    ├── more_data.json
    ├── more_data.png
    ├── more_data.py
    ├── more_data_5.png
    ├── more_data_comparison.png
    ├── more_data_log.png
    ├── more_data_rotated_5.png
    ├── more_data_svm.json
    ├── multiple_eta.json
    ├── multiple_eta.png
    ├── multiple_eta.py
    ├── norms_during_training_2_layers.json
    ├── norms_during_training_3_layers.json
    ├── norms_during_training_4_layers.json
    ├── overfitting.json
    ├── overfitting.py
    ├── overfitting1.png
    ├── overfitting2.png
    ├── overfitting3.png
    ├── overfitting4.png
    ├── overfitting_full.json
    ├── overfitting_full.png
    ├── pca_hard_data.png
    ├── pca_hard_data_fit.png
    ├── pca_limitations.py
    ├── regularized.json
    ├── regularized1.png
    ├── regularized2.png
    ├── regularized_full.json
    ├── regularized_full.png
    ├── replaced_by_d3
    │   ├── README.md
    │   ├── relu.png
    │   ├── relu.py
    │   ├── sigmoid.png
    │   ├── sigmoid.py
    │   ├── step.png
    │   ├── step.py
    │   ├── tanh.png
    │   └── tanh.py
    ├── serialize_images_to_json.py
    ├── test.png
    ├── training_speed_2_layers.png
    ├── training_speed_3_layers.png
    ├── training_speed_4_layers.png
    ├── valley.png
    ├── valley.py
    ├── valley2.png
    ├── valley2.py
    ├── weight_initialization.py
    ├── weight_initialization_100.json
    ├── weight_initialization_100.png
    ├── weight_initialization_30.json
    └── weight_initialization_30.png
├── requirements.txt
└── src
    ├── conv.py
    ├── expand_mnist.py
    ├── mnist_average_darkness.py
    ├── mnist_loader.py
    ├── mnist_svm.py
    ├── network.py
    ├── network2.py
    ├── network3.py
    ├── old
        ├── blog
        │   ├── __init__.py
        │   └── common_knowledge.py
        ├── cost_vs_iterations.png
        ├── cost_vs_iterations_trapped.png
        ├── deep_autoencoder.py
        ├── deep_learning.py
        ├── gradient_descent_hack.py
        ├── mnist_100_30_deep_autoencoder.png
        ├── mnist_100_unit_autoencoder.png
        ├── mnist_10_unit_autoencoder.png
        ├── mnist_30_component_pca.png
        ├── mnist_30_unit_autoencoder.png
        ├── mnist_autoencoder.py
        ├── mnist_pca.py
        └── perceptron_learning.py
    └── test_mnist.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.org
3 | *.pem
4 | *.pkl
5 | *.pyc
6 | .DS_Store
7 | loc.py
8 | src/ec2
9 | 


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="ProjectLevelVcsManager" settingsEditedManually="false">
 4 |     <OptionsSetting value="true" id="Add" />
 5 |     <OptionsSetting value="true" id="Remove" />
 6 |     <OptionsSetting value="true" id="Checkout" />
 7 |     <OptionsSetting value="true" id="Update" />
 8 |     <OptionsSetting value="true" id="Status" />
 9 |     <OptionsSetting value="true" id="Edit" />
10 |     <ConfirmationsSetting value="0" id="Add" />
11 |     <ConfirmationsSetting value="0" id="Remove" />
12 |   </component>
13 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (/Library/Frameworks/Python.framework/Versions/3.5/bin/python3.5)" project-jdk-type="Python SDK" />
14 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/neural-networks-and-deep-learning.iml" filepath="$PROJECT_DIR$/.idea/neural-networks-and-deep-learning.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/neural-networks-and-deep-learning.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Code samples for "Neural Networks and Deep Learning" (Python 3.x version)
 2 | 
 3 | This repository contains code samples for my (forthcoming) book on
 4 | "Neural Networks and Deep Learning".
 5 | 
 6 | As the code is written to accompany the book, I don't intend to add
 7 | new features.  However, bug reports are welcome, and you should feel
 8 | free to fork and modify the code.
 9 | 
10 | ## Changes
11 | This is the code for online book "Neural Networks and Deep Learning". But it is modified for Python 3.x.
12 | 
13 | If you are interested in that book but only prefer to python 3, you can use this version.
14 | 
15 | My homepage : http://www.liuxiao.org
16 | 
17 | ## License
18 | 
19 | MIT License
20 | 
21 | Copyright (c) 2012-2015 Michael Nielsen
22 | 
23 | Permission is hereby granted, free of charge, to any person obtaining
24 | a copy of this software and associated documentation files (the
25 | "Software"), to deal in the Software without restriction, including
26 | without limitation the rights to use, copy, modify, merge, publish,
27 | distribute, sublicense, and/or sell copies of the Software, and to
28 | permit persons to whom the Software is furnished to do so, subject to
29 | the following conditions:
30 | 
31 | The above copyright notice and this permission notice shall be
32 | included in all copies or substantial portions of the Software.
33 | 
34 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
38 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
39 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
40 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
41 | 


--------------------------------------------------------------------------------
/data/mnist.pkl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/data/mnist.pkl.gz


--------------------------------------------------------------------------------
/fig/backprop_magnitude_nabla.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/backprop_magnitude_nabla.png


--------------------------------------------------------------------------------
/fig/backprop_magnitude_nabla.py:
--------------------------------------------------------------------------------
 1 | """
 2 | backprop_magnitude_nabla
 3 | ~~~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Using backprop2 I constructed a 784-30-30-30-30-30-10 network to classify
 6 | MNIST data.  I ran ten mini-batches of size 100, with eta = 0.01 and
 7 | lambda = 0.05, using:
 8 | 
 9 | net.SGD(otd[:1000], 1, 100, 0.01, 0.05,
10 | 
11 | I obtained the following norms for the (unregularized) nabla_w for the
12 | respective mini-batches:
13 | 
14 | [0.90845722175923671, 2.8852730656073566, 10.696793986223632, 37.75701921183488, 157.7365422527995, 304.43990075227839]
15 | [0.22493835119537842, 0.6555126517964851, 2.6036801277234076, 11.408825365731225, 46.882319190445472, 70.499637502698221]
16 | [0.11935180022357521, 0.19756069137133489, 0.8152794148335869, 3.4590802543293977, 15.470507965493903, 31.032396017142556]
17 | [0.15130005837653659, 0.39687135985664701, 1.4810006139254532, 4.392519005642268, 16.831939776937311, 34.082104455938733]
18 | [0.11594085276308999, 0.17177668061395848, 0.72204558746599512, 3.05062409378366, 14.133001132214286, 29.776204839994385]
19 | [0.10790389807606221, 0.20707152756018626, 0.96348134037828603, 3.9043824079499561, 15.986873430586924, 39.195258080490895]
20 | [0.088613291101645356, 0.129173436407863, 0.4242933114455002, 1.6154682713449411, 7.5451567587160069, 20.180545544006566]
21 | [0.086175380639289575, 0.12571016850457151, 0.44231149185805047, 1.8435833504677326, 7.61973813981073, 19.474539356281781]
22 | [0.095372080184163904, 0.15854489503205446, 0.70244235144444678, 2.6294803575724157, 10.427062019753425, 24.309420272033819]
23 | [0.096453131000155692, 0.13574642196947601, 0.53551377709415471, 2.0247466793066895, 9.4503978546018068, 21.73772148470092]
24 | 
25 | Note that results are listed in order of layer.  They clearly show how
26 | the magnitude of nabla_w decreases as we go back through layers.
27 | 
28 | In this program I take min-batches 7, 8, 9 as representative and plot
29 | them.  I omit the results from the first and final layers since they
30 | correspond to 784 input neurons and 10 output neurons, not 30 as in
31 | the other layers, making it difficult to compare results.
32 | 
33 | Note that I haven't attempted to preserve the whole workflow here. It
34 | involved some minor hacking around with backprop2, which messed up
35 | that code.  That's why I've simply put the results in by hand below.
36 | """
37 | 
38 | # Third-party libraries
39 | import matplotlib.pyplot as plt
40 | 
41 | nw1 = [0.129173436407863, 0.4242933114455002, 
42 |        1.6154682713449411, 7.5451567587160069]
43 | nw2 = [0.12571016850457151, 0.44231149185805047, 
44 |        1.8435833504677326, 7.61973813981073]
45 | nw3 = [0.15854489503205446, 0.70244235144444678, 
46 |        2.6294803575724157, 10.427062019753425]
47 | plt.plot(range(1, 5), nw1, "ro-", range(1, 5), nw2, "go-", 
48 |          range(1, 5), nw3, "bo-")
49 | plt.xlabel('Layer $l$')
50 | plt.ylabel(r"$\Vert\nabla C^l_w\Vert$")
51 | plt.xticks([1, 2, 3, 4])
52 | plt.show()
53 | 


--------------------------------------------------------------------------------
/fig/digits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/digits.png


--------------------------------------------------------------------------------
/fig/digits_separate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/digits_separate.png


--------------------------------------------------------------------------------
/fig/false_minima.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/false_minima.png


--------------------------------------------------------------------------------
/fig/false_minima.py:
--------------------------------------------------------------------------------
 1 | """
 2 | false_minimum
 3 | ~~~~~~~~~~~~~
 4 | 
 5 | Plots a function of two variables with many false minima."""
 6 | 
 7 | #### Libraries
 8 | # Third party libraries
 9 | from matplotlib.ticker import LinearLocator
10 | # Note that axes3d is not explicitly used in the code, but is needed
11 | # to register the 3d plot type correctly
12 | from mpl_toolkits.mplot3d import axes3d 
13 | import matplotlib.pyplot as plt
14 | import numpy
15 | 
16 | fig = plt.figure()
17 | ax = fig.gca(projection='3d')
18 | X = numpy.arange(-5, 5, 0.1)
19 | Y = numpy.arange(-5, 5, 0.1)
20 | X, Y = numpy.meshgrid(X, Y)
21 | Z = numpy.sin(X)*numpy.sin(Y)+0.2*X
22 | 
23 | colortuple = ('w', 'b')
24 | colors = numpy.empty(X.shape, dtype=str)
25 | for x in xrange(len(X)):
26 |     for y in xrange(len(Y)):
27 |         colors[x, y] = colortuple[(x + y) % 2]
28 | 
29 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
30 |         linewidth=0)
31 | 
32 | ax.set_xlim3d(-5, 5)
33 | ax.set_ylim3d(-5, 5)
34 | ax.set_zlim3d(-2, 2)
35 | ax.w_xaxis.set_major_locator(LinearLocator(3))
36 | ax.w_yaxis.set_major_locator(LinearLocator(3))
37 | ax.w_zaxis.set_major_locator(LinearLocator(3))
38 | 
39 | plt.show()
40 | 
41 | 


--------------------------------------------------------------------------------
/fig/generate_gradient.py:
--------------------------------------------------------------------------------
  1 | """generate_gradient.py
  2 | ~~~~~~~~~~~~~~~~~~~~~~~
  3 | 
  4 | Use network2 to figure out the average starting values of the gradient
  5 | error terms \delta^l_j = \partial C / \partial z^l_j = \partial C /
  6 | \partial b^l_j.
  7 | 
  8 | """
  9 | 
 10 | #### Libraries
 11 | # Standard library
 12 | import json
 13 | import math
 14 | import random
 15 | import shutil
 16 | import sys
 17 | sys.path.append("../src/")
 18 | 
 19 | # My library
 20 | import mnist_loader
 21 | import network2
 22 | 
 23 | # Third-party libraries
 24 | import matplotlib.pyplot as plt
 25 | import numpy as np
 26 | 
 27 | def main():
 28 |     # Load the data
 29 |     full_td, _, _ = mnist_loader.load_data_wrapper()
 30 |     td = full_td[:1000] # Just use the first 1000 items of training data
 31 |     epochs = 500 # Number of epochs to train for
 32 | 
 33 |     print "\nTwo hidden layers:"
 34 |     net = network2.Network([784, 30, 30, 10])
 35 |     initial_norms(td, net)
 36 |     abbreviated_gradient = [
 37 |         ag[:6] for ag in get_average_gradient(net, td)[:-1]] 
 38 |     print "Saving the averaged gradient for the top six neurons in each "+\
 39 |         "layer.\nWARNING: This will affect the look of the book, so be "+\
 40 |         "sure to check the\nrelevant material (early chapter 5)."
 41 |     f = open("initial_gradient.json", "w")
 42 |     json.dump(abbreviated_gradient, f)
 43 |     f.close()
 44 |     shutil.copy("initial_gradient.json", "../../js/initial_gradient.json")
 45 |     training(td, net, epochs, "norms_during_training_2_layers.json")
 46 |     plot_training(
 47 |         epochs, "norms_during_training_2_layers.json", 2)
 48 | 
 49 |     print "\nThree hidden layers:"
 50 |     net = network2.Network([784, 30, 30, 30, 10])
 51 |     initial_norms(td, net)
 52 |     training(td, net, epochs, "norms_during_training_3_layers.json")
 53 |     plot_training(
 54 |         epochs, "norms_during_training_3_layers.json", 3)
 55 | 
 56 |     print "\nFour hidden layers:"
 57 |     net = network2.Network([784, 30, 30, 30, 30, 10])
 58 |     initial_norms(td, net)
 59 |     training(td, net, epochs, 
 60 |              "norms_during_training_4_layers.json")
 61 |     plot_training(
 62 |         epochs, "norms_during_training_4_layers.json", 4)
 63 | 
 64 | def initial_norms(training_data, net):
 65 |     average_gradient = get_average_gradient(net, training_data)
 66 |     norms = [list_norm(avg) for avg in average_gradient[:-1]]
 67 |     print "Average gradient for the hidden layers: "+str(norms)
 68 |     
 69 | def training(training_data, net, epochs, filename):
 70 |     norms = []
 71 |     for j in range(epochs):
 72 |         average_gradient = get_average_gradient(net, training_data)
 73 |         norms.append([list_norm(avg) for avg in average_gradient[:-1]])
 74 |         print "Epoch: %s" % j
 75 |         net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0)
 76 |     f = open(filename, "w")
 77 |     json.dump(norms, f)
 78 |     f.close()
 79 | 
 80 | def plot_training(epochs, filename, num_layers):
 81 |     f = open(filename, "r")
 82 |     norms = json.load(f)
 83 |     f.close()
 84 |     fig = plt.figure()
 85 |     ax = fig.add_subplot(111)
 86 |     colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"]
 87 |     for j in range(num_layers):
 88 |         ax.plot(np.arange(epochs), 
 89 |                 [n[j] for n in norms], 
 90 |                 color=colors[j],
 91 |                 label="Hidden layer %s" % (j+1,))
 92 |     ax.set_xlim([0, epochs])
 93 |     ax.grid(True)
 94 |     ax.set_xlabel('Number of epochs of training')
 95 |     ax.set_title('Speed of learning: %s hidden layers' % num_layers)
 96 |     ax.set_yscale('log')
 97 |     plt.legend(loc="upper right")
 98 |     fig_filename = "training_speed_%s_layers.png" % num_layers
 99 |     plt.savefig(fig_filename)
100 |     shutil.copy(fig_filename, "../../images/"+fig_filename)
101 |     plt.show()
102 | 
103 | def get_average_gradient(net, training_data):
104 |     nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data]
105 |     gradient = list_sum(nabla_b_results)
106 |     return [(np.reshape(g, len(g))/len(training_data)).tolist() 
107 |             for g in gradient]
108 | 
109 | def zip_sum(a, b): 
110 |     return [x+y for (x, y) in zip(a, b)]
111 | 
112 | def list_sum(l):
113 |     return reduce(zip_sum, l)
114 | 
115 | def list_norm(l):
116 |     return math.sqrt(sum([x*x for x in l]))
117 | 
118 | if __name__ == "__main__":
119 |     main()
120 | 


--------------------------------------------------------------------------------
/fig/initial_gradient.json:
--------------------------------------------------------------------------------
1 | [[-0.003970677333144113, -0.0031684316985881185, 0.008103235909196014, 0.012598010584130365, -0.026465907331998335, 0.0017583319323150341], [0.04152906589960523, 0.044025552524932406, -0.009669682279354514, 0.046736871369353235, 0.03877302528270452, 0.012336459551975156]]


--------------------------------------------------------------------------------
/fig/misleading_gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/misleading_gradient.png


--------------------------------------------------------------------------------
/fig/misleading_gradient.py:
--------------------------------------------------------------------------------
 1 | """
 2 | misleading_gradient
 3 | ~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Plots a function which misleads the gradient descent algorithm."""
 6 | 
 7 | #### Libraries
 8 | # Third party libraries
 9 | from matplotlib.ticker import LinearLocator
10 | # Note that axes3d is not explicitly used in the code, but is needed
11 | # to register the 3d plot type correctly
12 | from mpl_toolkits.mplot3d import axes3d 
13 | import matplotlib.pyplot as plt
14 | import numpy
15 | 
16 | fig = plt.figure()
17 | ax = fig.gca(projection='3d')
18 | X = numpy.arange(-1, 1, 0.025)
19 | Y = numpy.arange(-1, 1, 0.025)
20 | X, Y = numpy.meshgrid(X, Y)
21 | Z = X**2 + 10*Y**2
22 | 
23 | colortuple = ('w', 'b')
24 | colors = numpy.empty(X.shape, dtype=str)
25 | for x in xrange(len(X)):
26 |     for y in xrange(len(Y)):
27 |         colors[x, y] = colortuple[(x + y) % 2]
28 | 
29 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
30 |         linewidth=0)
31 | 
32 | ax.set_xlim3d(-1, 1)
33 | ax.set_ylim3d(-1, 1)
34 | ax.set_zlim3d(0, 12)
35 | ax.w_xaxis.set_major_locator(LinearLocator(3))
36 | ax.w_yaxis.set_major_locator(LinearLocator(3))
37 | ax.w_zaxis.set_major_locator(LinearLocator(3))
38 | ax.text(0.05, -1.8, 0, "$w_1$", fontsize=20)
39 | ax.text(1.5, -0.25, 0, "$w_2$", fontsize=20)
40 | ax.text(1.79, 0, 9.62, "$C$", fontsize=20)
41 | 
42 | plt.show()
43 | 
44 | 


--------------------------------------------------------------------------------
/fig/misleading_gradient_contours.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/misleading_gradient_contours.png


--------------------------------------------------------------------------------
/fig/misleading_gradient_contours.py:
--------------------------------------------------------------------------------
 1 | """
 2 | misleading_gradient_contours
 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Plots the contours of the function from misleading_gradient.py"""
 6 | 
 7 | #### Libraries
 8 | # Third party libraries
 9 | import matplotlib.pyplot as plt
10 | import numpy
11 | 
12 | X = numpy.arange(-1, 1, 0.02)
13 | Y = numpy.arange(-1, 1, 0.02)
14 | X, Y = numpy.meshgrid(X, Y)
15 | Z = X**2 + 10*Y**2
16 | 
17 | plt.figure()
18 | CS = plt.contour(X, Y, Z, levels=[0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
19 | plt.xlabel("$w_1$", fontsize=16)
20 | plt.ylabel("$w_2$", fontsize=16)
21 | plt.show()
22 | 


--------------------------------------------------------------------------------
/fig/mnist.py:
--------------------------------------------------------------------------------
  1 | """
  2 | mnist
  3 | ~~~~~
  4 | 
  5 | Draws images based on the MNIST data."""
  6 | 
  7 | #### Libraries
  8 | # Standard library
  9 | import cPickle
 10 | import sys
 11 | 
 12 | # My library
 13 | sys.path.append('../src/')
 14 | import mnist_loader
 15 | 
 16 | # Third-party libraries
 17 | import matplotlib
 18 | import matplotlib.pyplot as plt
 19 | import numpy as np
 20 | 
 21 | def main():
 22 |     training_set, validation_set, test_set = mnist_loader.load_data()
 23 |     images = get_images(training_set)
 24 |     plot_rotated_image(images[0])
 25 | 
 26 | #### Plotting
 27 | def plot_images_together(images):
 28 |     """ Plot a single image containing all six MNIST images, one after
 29 |     the other.  Note that we crop the sides of the images so that they
 30 |     appear reasonably close together."""
 31 |     fig = plt.figure()
 32 |     images = [image[:, 3:25] for image in images]
 33 |     image = np.concatenate(images, axis=1)
 34 |     ax = fig.add_subplot(1, 1, 1)
 35 |     ax.matshow(image, cmap = matplotlib.cm.binary)
 36 |     plt.xticks(np.array([]))
 37 |     plt.yticks(np.array([]))
 38 |     plt.show()
 39 | 
 40 | def plot_10_by_10_images(images):
 41 |     """ Plot 100 MNIST images in a 10 by 10 table. Note that we crop
 42 |     the images so that they appear reasonably close together.  The
 43 |     image is post-processed to give the appearance of being continued."""
 44 |     fig = plt.figure()
 45 |     images = [image[3:25, 3:25] for image in images]
 46 |     #image = np.concatenate(images, axis=1)
 47 |     for x in range(10):
 48 |         for y in range(10):
 49 |             ax = fig.add_subplot(10, 10, 10*y+x)
 50 |             ax.matshow(images[10*y+x], cmap = matplotlib.cm.binary)
 51 |             plt.xticks(np.array([]))
 52 |             plt.yticks(np.array([]))
 53 |     plt.show()
 54 | 
 55 | def plot_images_separately(images):
 56 |     "Plot the six MNIST images separately."
 57 |     fig = plt.figure()
 58 |     for j in xrange(1, 7):
 59 |         ax = fig.add_subplot(1, 6, j)
 60 |         ax.matshow(images[j-1], cmap = matplotlib.cm.binary)
 61 |         plt.xticks(np.array([]))
 62 |         plt.yticks(np.array([]))
 63 |     plt.show()
 64 | 
 65 | def plot_mnist_digit(image):
 66 |     """ Plot a single MNIST image."""
 67 |     fig = plt.figure()
 68 |     ax = fig.add_subplot(1, 1, 1)
 69 |     ax.matshow(image, cmap = matplotlib.cm.binary)
 70 |     plt.xticks(np.array([]))
 71 |     plt.yticks(np.array([]))
 72 |     plt.show()
 73 | 
 74 | def plot_2_and_1(images):
 75 |     "Plot a 2 and a 1 image from the MNIST set."
 76 |     fig = plt.figure()
 77 |     ax = fig.add_subplot(1, 2, 1)
 78 |     ax.matshow(images[5], cmap = matplotlib.cm.binary)
 79 |     plt.xticks(np.array([]))
 80 |     plt.yticks(np.array([]))
 81 |     ax = fig.add_subplot(1, 2, 2)
 82 |     ax.matshow(images[3], cmap = matplotlib.cm.binary)
 83 |     plt.xticks(np.array([]))
 84 |     plt.yticks(np.array([]))
 85 |     plt.show()
 86 | 
 87 | def plot_top_left(image):
 88 |     "Plot the top left of ``image``."
 89 |     image[14:,:] = np.zeros((14,28))
 90 |     image[:,14:] = np.zeros((28,14))
 91 |     fig = plt.figure()
 92 |     ax = fig.add_subplot(1, 1, 1)
 93 |     ax.matshow(image, cmap = matplotlib.cm.binary)
 94 |     plt.xticks(np.array([]))
 95 |     plt.yticks(np.array([]))
 96 |     plt.show()
 97 | 
 98 | def plot_bad_images(images):
 99 |     """This takes a list of images misclassified by a pretty good
100 |     neural network --- one achieving over 93 percent accuracy --- and
101 |     turns them into a figure."""
102 |     bad_image_indices = [8, 18, 33, 92, 119, 124, 149, 151, 193, 233, 241, 247, 259, 300, 313, 321, 324, 341, 349, 352, 359, 362, 381, 412, 435, 445, 449, 478, 479, 495, 502, 511, 528, 531, 547, 571, 578, 582, 597, 610, 619, 628, 629, 659, 667, 691, 707, 717, 726, 740, 791, 810, 844, 846, 898, 938, 939, 947, 956, 959, 965, 982, 1014, 1033, 1039, 1044, 1050, 1055, 1107, 1112, 1124, 1147, 1181, 1191, 1192, 1198, 1202, 1204, 1206, 1224, 1226, 1232, 1242, 1243, 1247, 1256, 1260, 1263, 1283, 1289, 1299, 1310, 1319, 1326, 1328, 1357, 1378, 1393, 1413, 1422, 1435, 1467, 1469, 1494, 1500, 1522, 1523, 1525, 1527, 1530, 1549, 1553, 1609, 1611, 1634, 1641, 1676, 1678, 1681, 1709, 1717, 1722, 1730, 1732, 1737, 1741, 1754, 1759, 1772, 1773, 1790, 1808, 1813, 1823, 1843, 1850, 1857, 1868, 1878, 1880, 1883, 1901, 1913, 1930, 1938, 1940, 1952, 1969, 1970, 1984, 2001, 2009, 2016, 2018, 2035, 2040, 2043, 2044, 2053, 2063, 2098, 2105, 2109, 2118, 2129, 2130, 2135, 2148, 2161, 2168, 2174, 2182, 2185, 2186, 2189, 2224, 2229, 2237, 2266, 2272, 2293, 2299, 2319, 2325, 2326, 2334, 2369, 2371, 2380, 2381, 2387, 2393, 2395, 2406, 2408, 2414, 2422, 2433, 2450, 2488, 2514, 2526, 2548, 2574, 2589, 2598, 2607, 2610, 2631, 2648, 2654, 2695, 2713, 2720, 2721, 2730, 2770, 2771, 2780, 2863, 2866, 2896, 2907, 2925, 2927, 2939, 2995, 3005, 3023, 3030, 3060, 3073, 3102, 3108, 3110, 3114, 3115, 3117, 3130, 3132, 3157, 3160, 3167, 3183, 3189, 3206, 3240, 3254, 3260, 3280, 3329, 3330, 3333, 3383, 3384, 3475, 3490, 3503, 3520, 3525, 3559, 3567, 3573, 3597, 3598, 3604, 3629, 3664, 3702, 3716, 3718, 3725, 3726, 3727, 3751, 3752, 3757, 3763, 3766, 3767, 3769, 3776, 3780, 3798, 3806, 3808, 3811, 3817, 3821, 3838, 3848, 3853, 3855, 3869, 3876, 3902, 3906, 3926, 3941, 3943, 3951, 3954, 3962, 3976, 3985, 3995, 4000, 4002, 4007, 4017, 4018, 4065, 4075, 4078, 4093, 4102, 4139, 4140, 4152, 4154, 4163, 4165, 4176, 4199, 4201, 4205, 4207, 4212, 4224, 4238, 4248, 4256, 4284, 4289, 4297, 4300, 4306, 4344, 4355, 4356, 4359, 4360, 4369, 4405, 4425, 4433, 4435, 4449, 4487, 4497, 4498, 4500, 4521, 4536, 4548, 4563, 4571, 4575, 4601, 4615, 4620, 4633, 4639, 4662, 4690, 4722, 4731, 4735, 4737, 4739, 4740, 4761, 4798, 4807, 4814, 4823, 4833, 4837, 4874, 4876, 4879, 4880, 4886, 4890, 4910, 4950, 4951, 4952, 4956, 4963, 4966, 4968, 4978, 4990, 5001, 5020, 5054, 5067, 5068, 5078, 5135, 5140, 5143, 5176, 5183, 5201, 5210, 5331, 5409, 5457, 5495, 5600, 5601, 5617, 5623, 5634, 5642, 5677, 5678, 5718, 5734, 5735, 5749, 5752, 5771, 5787, 5835, 5842, 5845, 5858, 5887, 5888, 5891, 5906, 5913, 5936, 5937, 5945, 5955, 5957, 5972, 5973, 5985, 5987, 5997, 6035, 6042, 6043, 6045, 6053, 6059, 6065, 6071, 6081, 6091, 6112, 6124, 6157, 6166, 6168, 6172, 6173, 6347, 6370, 6386, 6390, 6391, 6392, 6421, 6426, 6428, 6505, 6542, 6555, 6556, 6560, 6564, 6568, 6571, 6572, 6597, 6598, 6603, 6608, 6625, 6651, 6694, 6706, 6721, 6725, 6740, 6746, 6768, 6783, 6785, 6796, 6817, 6827, 6847, 6870, 6872, 6926, 6945, 7002, 7035, 7043, 7089, 7121, 7130, 7198, 7216, 7233, 7248, 7265, 7426, 7432, 7434, 7494, 7498, 7691, 7777, 7779, 7797, 7800, 7809, 7812, 7821, 7849, 7876, 7886, 7897, 7902, 7905, 7917, 7921, 7945, 7999, 8020, 8059, 8081, 8094, 8095, 8115, 8246, 8256, 8262, 8272, 8273, 8278, 8279, 8293, 8322, 8339, 8353, 8408, 8453, 8456, 8502, 8520, 8522, 8607, 9009, 9010, 9013, 9015, 9019, 9022, 9024, 9026, 9036, 9045, 9046, 9128, 9214, 9280, 9316, 9342, 9382, 9433, 9446, 9506, 9540, 9544, 9587, 9614, 9634, 9642, 9645, 9700, 9716, 9719, 9729, 9732, 9738, 9740, 9741, 9742, 9744, 9745, 9749, 9752, 9768, 9770, 9777, 9779, 9792, 9808, 9831, 9839, 9856, 9858, 9867, 9879, 9883, 9888, 9890, 9893, 9905, 9944, 9970, 9982]
103 |     n = len(bad_image_indices)
104 |     bad_images = [images[j] for j in bad_image_indices]
105 |     fig = plt.figure(figsize=(10, 15))
106 |     for j in xrange(1, n+1):
107 |         ax = fig.add_subplot(25, 125, j)
108 |         ax.matshow(bad_images[j-1], cmap = matplotlib.cm.binary)
109 |         ax.set_title(str(bad_image_indices[j-1]))
110 |         plt.xticks(np.array([]))
111 |         plt.yticks(np.array([]))
112 |     plt.subplots_adjust(hspace = 1.2)
113 |     plt.show()
114 | 
115 | def plot_really_bad_images(images):
116 |     """This takes a list of the worst images from plot_bad_images and
117 |     turns them into a figure."""
118 |     really_bad_image_indices = [
119 |         324, 582, 659, 726, 846, 956, 1124, 1393,
120 |         1773, 1868, 2018, 2109, 2654, 4199, 4201, 4620, 5457, 5642]
121 |     n = len(really_bad_image_indices)
122 |     really_bad_images = [images[j] for j in really_bad_image_indices]
123 |     fig = plt.figure(figsize=(10, 2))
124 |     for j in xrange(1, n+1):
125 |         ax = fig.add_subplot(2, 9, j)
126 |         ax.matshow(really_bad_images[j-1], cmap = matplotlib.cm.binary)
127 |         #ax.set_title(str(really_bad_image_indices[j-1]))
128 |         plt.xticks(np.array([]))
129 |         plt.yticks(np.array([]))
130 |     plt.show()
131 | 
132 | def plot_features(image):
133 |     "Plot the top right, bottom left, and bottom right of ``image``."
134 |     image_1, image_2, image_3 = np.copy(image), np.copy(image), np.copy(image)
135 |     image_1[:,:14] = np.zeros((28,14))
136 |     image_1[14:,:] = np.zeros((14,28))
137 |     image_2[:,14:] = np.zeros((28,14))
138 |     image_2[:14,:] = np.zeros((14,28))
139 |     image_3[:14,:] = np.zeros((14,28))
140 |     image_3[:,:14] = np.zeros((28,14))
141 |     fig = plt.figure()
142 |     ax = fig.add_subplot(1, 3, 1)
143 |     ax.matshow(image_1, cmap = matplotlib.cm.binary)
144 |     plt.xticks(np.array([]))
145 |     plt.yticks(np.array([]))
146 |     ax = fig.add_subplot(1, 3, 2)
147 |     ax.matshow(image_2, cmap = matplotlib.cm.binary)
148 |     plt.xticks(np.array([]))
149 |     plt.yticks(np.array([]))
150 |     ax = fig.add_subplot(1, 3, 3)
151 |     ax.matshow(image_3, cmap = matplotlib.cm.binary)
152 |     plt.xticks(np.array([]))
153 |     plt.yticks(np.array([]))
154 |     plt.show()
155 | 
156 | def plot_rotated_image(image):
157 |     """ Plot an MNIST digit and a version rotated by 10 degrees."""
158 |     # Do the initial plot
159 |     fig = plt.figure()
160 |     ax = fig.add_subplot(1, 1, 1)
161 |     ax.matshow(image, cmap = matplotlib.cm.binary)
162 |     plt.xticks(np.array([]))
163 |     plt.yticks(np.array([]))
164 |     plt.show()
165 |     # Set up the rotated image.  There are fast matrix techniques
166 |     # for doing this, but we'll do a pedestrian approach
167 |     rot_image = np.zeros((28,28))
168 |     theta = 15*np.pi/180 # 15 degrees
169 |     def to_xy(j, k):
170 |         # Converts from matrix indices to x, y co-ords, using the
171 |         # 13, 14 matrix entry as the origin
172 |         return (k-13, -j+14) # x range: -13..14, y range: -13..14
173 |     def to_jk(x, y):
174 |         # Converts from x, y co-ords to matrix indices
175 |         return (-y+14, x+13)
176 |     def image_value(image, x, y):
177 |         # returns the value of the image at co-ordinate x, y
178 |         # (Note that this would be better done as a closure, if Pythong
179 |         # supported closures, so that image didn't need to be passed)
180 |         j, k = to_jk(x, y)
181 |         return image[j, k]
182 |     # Element by element, figure out what should be in the rotated
183 |     # image.  We simply take each matrix entry, figure out the
184 |     # corresponding x, y co-ordinates, rotate backward, and then
185 |     # average the nearby matrix elements.  It's not perfect, and it's
186 |     # not fast, but it works okay.
187 |     for j in range(28):
188 |         for k in range(28):
189 |             x, y = to_xy(j, k)
190 |             # rotate by -theta
191 |             x1 = np.cos(theta)*x + np.sin(theta)*y
192 |             y1 = -np.sin(theta)*x + np.cos(theta)*y
193 |             # Nearest integer x entries are x2 and x2+1. delta_x 
194 |             # measures how to interpolate
195 |             x2 = np.floor(x1)
196 |             delta_x = x1-x2
197 |             # Similarly for y
198 |             y2 = np.floor(y1)
199 |             delta_y = y1-y2
200 |             # Check if we're out of bounds, and if so continue to next entry
201 |             # This will miss a boundary row and layer, but that's okay,
202 |             # MNIST digits usually don't go that near the boundary.
203 |             if x2 < -13 or x2 > 13 or y2 < -13 or y2 > 13: continue
204 |             # If we're in bounds, average the nearby entries.
205 |             value \
206 |                 = (1-delta_x)*(1-delta_y)*image_value(image, x2, y2)+\
207 |                 (1-delta_x)*delta_y*image_value(image, x2, y2+1)+\
208 |                 delta_x*(1-delta_y)*image_value(image, x2+1, y2)+\
209 |                 delta_x*delta_y*image_value(image, x2+1, y2+1)
210 |             # Rescale the value by a hand-set fudge factor.  This
211 |             # seems to be necessary because the averaging doesn't
212 |             # quite work right.  The fudge-factor should probably be
213 |             # theta-dependent, but I've set it by hand.  
214 |             rot_image[j, k] = 1.3*value
215 |     plot_mnist_digit(rot_image)
216 | 
217 | #### Miscellanea
218 | def load_data():
219 |     """ Return the MNIST data as a tuple containing the training data,
220 |     the validation data, and the test data."""
221 |     f = open('../data/mnist.pkl', 'rb')
222 |     training_set, validation_set, test_set = cPickle.load(f)
223 |     f.close()
224 |     return (training_set, validation_set, test_set)
225 | 
226 | def get_images(training_set):
227 |     """ Return a list containing the images from the MNIST data
228 |     set. Each image is represented as a 2-d numpy array."""
229 |     flattened_images = training_set[0]
230 |     return [np.reshape(f, (-1, 28)) for f in flattened_images]
231 | 
232 | #### Main
233 | if __name__ == "__main__":
234 |     main()
235 | 


--------------------------------------------------------------------------------
/fig/mnist_100_digits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_100_digits.png


--------------------------------------------------------------------------------
/fig/mnist_2_and_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_2_and_1.png


--------------------------------------------------------------------------------
/fig/mnist_complete_zero.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_complete_zero.png


--------------------------------------------------------------------------------
/fig/mnist_first_digit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_first_digit.png


--------------------------------------------------------------------------------
/fig/mnist_other_features.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_other_features.png


--------------------------------------------------------------------------------
/fig/mnist_really_bad_images.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_really_bad_images.png


--------------------------------------------------------------------------------
/fig/mnist_top_left_feature.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/mnist_top_left_feature.png


--------------------------------------------------------------------------------
/fig/more_data.json:
--------------------------------------------------------------------------------
1 | [69.09, 76.37, 85.29, 88.85, 91.27, 93.24, 94.89, 95.85, 95.97]


--------------------------------------------------------------------------------
/fig/more_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data.png


--------------------------------------------------------------------------------
/fig/more_data.py:
--------------------------------------------------------------------------------
  1 | """more_data
  2 | ~~~~~~~~~~~~
  3 | 
  4 | Plot graphs to illustrate the performance of MNIST when different size
  5 | training sets are used.
  6 | 
  7 | """
  8 | 
  9 | # Standard library
 10 | import json
 11 | import random
 12 | import sys
 13 | 
 14 | # My library
 15 | sys.path.append('../src/')
 16 | import mnist_loader
 17 | import network2
 18 | 
 19 | # Third-party libraries
 20 | import matplotlib.pyplot as plt
 21 | import numpy as np
 22 | from sklearn import svm
 23 | 
 24 | # The sizes to use for the different training sets
 25 | SIZES = [100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000] 
 26 | 
 27 | def main():
 28 |     run_networks()
 29 |     run_svms()
 30 |     make_plots()
 31 |                        
 32 | def run_networks():
 33 |     # Make results more easily reproducible
 34 |     random.seed(12345678)
 35 |     np.random.seed(12345678)
 36 |     training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
 37 |     net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost())
 38 |     accuracies = []
 39 |     for size in SIZES:
 40 |         print "\n\nTraining network with data set size %s" % size
 41 |         net.large_weight_initializer()
 42 |         num_epochs = 1500000 / size 
 43 |         net.SGD(training_data[:size], num_epochs, 10, 0.5, lmbda = size*0.0001)
 44 |         accuracy = net.accuracy(validation_data) / 100.0
 45 |         print "Accuracy was %s percent" % accuracy
 46 |         accuracies.append(accuracy)
 47 |     f = open("more_data.json", "w")
 48 |     json.dump(accuracies, f)
 49 |     f.close()
 50 | 
 51 | def run_svms():
 52 |     svm_training_data, svm_validation_data, svm_test_data \
 53 |         = mnist_loader.load_data()
 54 |     accuracies = []
 55 |     for size in SIZES:
 56 |         print "\n\nTraining SVM with data set size %s" % size
 57 |         clf = svm.SVC()
 58 |         clf.fit(svm_training_data[0][:size], svm_training_data[1][:size])
 59 |         predictions = [int(a) for a in clf.predict(svm_validation_data[0])]
 60 |         accuracy = sum(int(a == y) for a, y in 
 61 |                        zip(predictions, svm_validation_data[1])) / 100.0
 62 |         print "Accuracy was %s percent" % accuracy
 63 |         accuracies.append(accuracy)
 64 |     f = open("more_data_svm.json", "w")
 65 |     json.dump(accuracies, f)
 66 |     f.close()
 67 | 
 68 | def make_plots():
 69 |     f = open("more_data.json", "r")
 70 |     accuracies = json.load(f)
 71 |     f.close()
 72 |     f = open("more_data_svm.json", "r")
 73 |     svm_accuracies = json.load(f)
 74 |     f.close()
 75 |     make_linear_plot(accuracies)
 76 |     make_log_plot(accuracies)
 77 |     make_combined_plot(accuracies, svm_accuracies)
 78 | 
 79 | def make_linear_plot(accuracies):
 80 |     fig = plt.figure()
 81 |     ax = fig.add_subplot(111)
 82 |     ax.plot(SIZES, accuracies, color='#2A6EA6')
 83 |     ax.plot(SIZES, accuracies, "o", color='#FFA933')
 84 |     ax.set_xlim(0, 50000)
 85 |     ax.set_ylim(60, 100)
 86 |     ax.grid(True)
 87 |     ax.set_xlabel('Training set size')
 88 |     ax.set_title('Accuracy (%) on the validation data')
 89 |     plt.show()
 90 | 
 91 | def make_log_plot(accuracies):
 92 |     fig = plt.figure()
 93 |     ax = fig.add_subplot(111)
 94 |     ax.plot(SIZES, accuracies, color='#2A6EA6')
 95 |     ax.plot(SIZES, accuracies, "o", color='#FFA933')
 96 |     ax.set_xlim(100, 50000)
 97 |     ax.set_ylim(60, 100)
 98 |     ax.set_xscale('log')
 99 |     ax.grid(True)
100 |     ax.set_xlabel('Training set size')
101 |     ax.set_title('Accuracy (%) on the validation data')
102 |     plt.show()
103 | 
104 | def make_combined_plot(accuracies, svm_accuracies):
105 |     fig = plt.figure()
106 |     ax = fig.add_subplot(111)
107 |     ax.plot(SIZES, accuracies, color='#2A6EA6')
108 |     ax.plot(SIZES, accuracies, "o", color='#2A6EA6', 
109 |             label='Neural network accuracy (%)')
110 |     ax.plot(SIZES, svm_accuracies, color='#FFA933')
111 |     ax.plot(SIZES, svm_accuracies, "o", color='#FFA933',
112 |             label='SVM accuracy (%)')
113 |     ax.set_xlim(100, 50000)
114 |     ax.set_ylim(25, 100)
115 |     ax.set_xscale('log')
116 |     ax.grid(True)
117 |     ax.set_xlabel('Training set size')
118 |     plt.legend(loc="lower right")
119 |     plt.show()
120 | 
121 | if __name__ == "__main__":
122 |     main()
123 | 


--------------------------------------------------------------------------------
/fig/more_data_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_5.png


--------------------------------------------------------------------------------
/fig/more_data_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_comparison.png


--------------------------------------------------------------------------------
/fig/more_data_log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_log.png


--------------------------------------------------------------------------------
/fig/more_data_rotated_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/more_data_rotated_5.png


--------------------------------------------------------------------------------
/fig/more_data_svm.json:
--------------------------------------------------------------------------------
1 | [25.07, 48.93, 75.13, 83.87, 88.49, 91.46, 92.45, 93.47, 94.48]


--------------------------------------------------------------------------------
/fig/multiple_eta.json:
--------------------------------------------------------------------------------
1 | [[[], [], [0.87809508908377998, 0.67406552530098141, 0.59798920430275404, 0.55533015743656189, 0.51751101003208144, 0.4942033354556824, 0.47255041042913526, 0.46069879353359433, 0.44304475294352064, 0.43099562372228112, 0.42310993427766375, 0.41408265298981006, 0.40573464183982105, 0.40110722961828227, 0.39162028064538967, 0.38705015774740958, 0.38116357043417587, 0.37603986695304614, 0.37297012040237154, 0.37057334627661631, 0.36551756338853658, 0.36335674264586654, 0.35745296185579917, 0.35535960956849127, 0.35365591135061097, 0.35011353300568238, 0.34946519495897871, 0.34604661988238178, 0.34386077098862522, 0.33919980880230349], []], [[], [], [0.49501954654296704, 0.4063145129425576, 0.40482383242804637, 0.37156577828840276, 0.37380111172151681, 0.37152751786000143, 0.35371985224004426, 0.3557161388797867, 0.34323780090168027, 0.3433514311156789, 0.3367645441708797, 0.34532085892085329, 0.33506383267050244, 0.34760988079085842, 0.34921493732996928, 0.33853424834583179, 0.32837282561262077, 0.33175599401109612, 0.33132920379429243, 0.33024353325326034, 0.32736756892399654, 0.3259638557593546, 0.32004264784244907, 0.33424319076405928, 0.33878125802305081, 0.32521839878261177, 0.32679267619514646, 0.32488571435373748, 0.33056367198473002, 0.33879633130932685], []], [[], [], [0.92489293305102116, 0.83919130289246469, 0.88748421594232696, 0.79625231780396133, 0.78117959228699174, 1.1365919079387048, 0.78787239608336346, 0.76778614131217449, 0.73689525303227721, 0.80127437393519696, 0.74433665287336681, 0.73725544607013882, 0.80249602203179993, 0.85190338199210014, 0.79872168623645712, 0.80243104440756152, 0.80649160680410659, 0.81467254023600921, 0.82526467696100858, 0.75042379852601759, 0.93658673378777402, 0.88236662906752283, 0.86121396033520892, 0.72492681699401829, 0.80405009868466648, 0.83959963179208197, 0.83387510808276821, 0.88282498566307899, 0.88583473645177979, 0.86068501713490919], []]]


--------------------------------------------------------------------------------
/fig/multiple_eta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/multiple_eta.png


--------------------------------------------------------------------------------
/fig/multiple_eta.py:
--------------------------------------------------------------------------------
 1 | """multiple_eta
 2 | ~~~~~~~~~~~~~~~
 3 | 
 4 | This program shows how different values for the learning rate affect
 5 | training.  In particular, we'll plot out how the cost changes using
 6 | three different values for eta.
 7 | 
 8 | """
 9 | 
10 | # Standard library
11 | import json
12 | import random
13 | import sys
14 | 
15 | # My library
16 | sys.path.append('../src/')
17 | import mnist_loader
18 | import network2
19 | 
20 | # Third-party libraries
21 | import matplotlib.pyplot as plt
22 | import numpy as np
23 | 
24 | # Constants
25 | LEARNING_RATES = [0.025, 0.25, 2.5]
26 | COLORS = ['#2A6EA6', '#FFCD33', '#FF7033']
27 | NUM_EPOCHS = 30
28 | 
29 | def main():
30 |     run_networks()
31 |     make_plot()
32 | 
33 | def run_networks():
34 |     """Train networks using three different values for the learning rate,
35 |     and store the cost curves in the file ``multiple_eta.json``, where
36 |     they can later be used by ``make_plot``.
37 | 
38 |     """
39 |     # Make results more easily reproducible
40 |     random.seed(12345678)
41 |     np.random.seed(12345678)
42 |     training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
43 |     results = []
44 |     for eta in LEARNING_RATES:
45 |         print "\nTrain a network using eta = "+str(eta)
46 |         net = network2.Network([784, 30, 10])
47 |         results.append(
48 |             net.SGD(training_data, NUM_EPOCHS, 10, eta, lmbda=5.0,
49 |                     evaluation_data=validation_data, 
50 |                     monitor_training_cost=True))
51 |     f = open("multiple_eta.json", "w")
52 |     json.dump(results, f)
53 |     f.close()
54 | 
55 | def make_plot():
56 |     f = open("multiple_eta.json", "r")
57 |     results = json.load(f)
58 |     f.close()
59 |     fig = plt.figure()
60 |     ax = fig.add_subplot(111)
61 |     for eta, result, color in zip(LEARNING_RATES, results, COLORS):
62 |         _, _, training_cost, _ = result
63 |         ax.plot(np.arange(NUM_EPOCHS), training_cost, "o-",
64 |                 label="$\eta$ = "+str(eta),
65 |                 color=color)
66 |     ax.set_xlim([0, NUM_EPOCHS])
67 |     ax.set_xlabel('Epoch')
68 |     ax.set_ylabel('Cost')
69 |     plt.legend(loc='upper right')
70 |     plt.show()
71 | 
72 | if __name__ == "__main__":
73 |     main()
74 | 


--------------------------------------------------------------------------------
/fig/norms_during_training_2_layers.json:
--------------------------------------------------------------------------------
1 | [[0.06574134326503182, 0.3092184942703712], [0.042965950225229275, 0.21697470825384765], [0.03285743853560062, 0.1661475616404329], [0.02683217541545865, 0.13284259725100744], [0.022444963188347787, 0.10869569875668701], [0.01898702292286868, 0.09026907616072426], [0.016176320606231937, 0.0757817449583434], [0.013856769047630028, 0.06415813857855861], [0.011923755327842544, 0.05468929179740756], [0.010300852608835414, 0.04688240462739417], [0.008929980735740226, 0.04038231936325889], [0.007766037293739668, 0.03492602136501374], [0.0067734782810398185, 0.0303144328294002], [0.005923963369769822, 0.02639409406963878], [0.0051946687903396006, 0.02304484718674352], [0.004567050423835018, 0.02017132438924394], [0.004025922146371357, 0.017696929143574278], [0.003558759740350878, 0.015559495043546933], [0.003155168513245846, 0.01370809963803681], [0.0028064709792152993, 0.012100689100100913], [0.0025053833194008273, 0.010702282128687351], [0.0022457579431536864, 0.009483594139974067], [0.0020223756268692833, 0.008419970767157997], [0.0018307752663026288, 0.007490551952524308], [0.0016671128100467073, 0.006677609993253912], [0.0015280437877996404, 0.00596602024352487], [0.001410626139902204, 0.005342833987422243], [0.0013122417372158585, 0.004796930719247236], [0.001230535877050377, 0.0043187326553456445], [0.0011633740094011671, 0.003899968399564661], [0.0011088140690242762, 0.003533475735331852], [0.0010650914693569541, 0.003213035828461012], [0.0010306126820953377, 0.002933232910393291], [0.0010039529391577253, 0.0026893349185429013], [0.0009838541507884253, 0.002477191693569685], [0.0009692204018936975, 0.0022931482255388073], [0.0009591098842916142, 0.0021339711220004043], [0.0009527233875527112, 0.001996786940464415], [0.000949390258845377, 0.001879031281725876], [0.0009485530386537979, 0.0017784075917766545], [0.0009497519150621335, 0.0016928545134905027], [0.0009526098833938276, 0.00162052044418621], [0.0009568191883996328, 0.0015597437895911916], [0.0009621293468216554, 0.0015090373462108967], [0.0009683368349615602, 0.0014670753405878112], [0.0009752763835047726, 0.0014326818995832435], [0.0009828137394213656, 0.0014048200688868667], [0.0009908397165437146, 0.001382580864127233], [0.0009992653475108188, 0.0013651721629557073], [0.0010080179583493688, 0.0013519074848511415], [0.0010170380046287963, 0.0013421948454926998], [0.0010262765293966452, 0.0013355259254374328], [0.0010356931246733264, 0.0013314657823426134], [0.0010452542983669293, 0.001329643288455763], [0.0010549321662308622, 0.0013297424128337033], [0.0010647034036817776, 0.0013314944062056567], [0.0010745484049984948, 0.001334670894282615], [0.0010844506078711124, 0.001339077846101557], [0.0010943959497621623, 0.00134455035772627], [0.0011043724293854176, 0.001350948176527696], [0.0011143697520880728, 0.0013581518848623535], [0.0011243790422903717, 0.0013660596617759066], [0.0011343926096075954, 0.001374584545237105], [0.0011444037580310062, 0.0013836521236696693], [0.0011544066297232028, 0.0013931985929649267], [0.001164396076707179, 0.0014031691228828121], [0.0011743675550923705, 0.0014135164842454385], [0.001184317037560411, 0.001424199895282673], [0.0011942409406884003, 0.0014351840517464725], [0.0012041360643654548, 0.001446438310918101], [0.00121399954109664, 0.0014579360044009737], [0.0012238287934166479, 0.001469653858672124], [0.001233621497976716, 0.0014815715058239837], [0.0012433755551407887, 0.0014936710698406808], [0.0012530890631449795, 0.00150593681619279], [0.0012627602960492853, 0.0015183548545708195], [0.0012723876848512645, 0.0015309128862728684], [0.0012819698012447925, 0.0015435999891708265], [0.0012915053435987739, 0.0015564064343490218], [0.0013009931248051125, 0.0015693235294801865], [0.0013104320617056788, 0.0015823434848093638], [0.0013198211658574318, 0.0015954592982856857], [0.001329159535435146, 0.0016086646569381795], [0.0013384463481043436, 0.0016219538520547598], [0.0013476808547242748, 0.0016353217061091565], [0.0013568623737632026, 0.0016487635097024642], [0.0013659902863269694, 0.0016622749670549338], [0.0013750640317171125, 0.0016758521488088555], [0.0013840831034477462, 0.001689491451092161], [0.0013930470456610636, 0.0017031895599511186], [0.0014019554498903382, 0.0017169434203938478], [0.001410807952126751, 0.001730750209399147], [0.0014196042301527312, 0.0017446073123398938], [0.001428344001109794, 0.0017585123023509564], [0.0014370270192733724, 0.0017724629222394034], [0.0014456530740109097, 0.0017864570685928126], [0.0014542219879027463, 0.0018004927777905224], [0.0014627336150080232, 0.001814568213664344], [0.0014711878392602328, 0.00182868165659123], [0.0014795845729789635, 0.001842831493830357], [0.0014879237554861209, 0.001857016210943517], [0.001496205351816403, 0.0018712343841595458], [0.001504429351512978, 0.0018854846735628997], [0.001512595767500488, 0.0018997658170025018], [0.0015207046350283473, 0.001914076624631347], [0.0015287560106781513, 0.0019284159739990133], [0.0015367499714297151, 0.0019427828056299383], [0.001544686613780801, 0.0019571761190289314], [0.0015525660529162172, 0.00197159496906334], [0.0015603884219223176, 0.0019860384626777115], [0.0015681538710434048, 0.002000505755902608], [0.0015758625669768783, 0.0020149960511241243], [0.0015835146922042135, 0.0020295085945849795], [0.0015911104443552297, 0.002044042674091702], [0.001598650035603237, 0.0020585976169056313], [0.0016061336920889376, 0.0020731727877982367], [0.001613561653371099, 0.002087767587253655], [0.0016209341719021985, 0.0021023814498034505], [0.0016282515125274055, 0.0021170138424803724], [0.001635513952005339, 0.0021316642633795106], [0.0016427217785492264, 0.0021463322403165704], [0.0016498752913871664, 0.002161017329574194], [0.0016569748003402733, 0.0021757191147283752], [0.0016640206254176105, 0.00219043720554777], [0.0016710130964268687, 0.002205171236959631], [0.0016779525525998347, 0.0022199208680767388], [0.001684839342231744, 0.002234685781280274], [0.0016916738223337158, 0.0022494656813542074], [0.0016984563582974415, 0.002264260294667135], [0.0017051873235714623, 0.002279069368397964], [0.0017118670993483138, 0.0022938926698022047], [0.0017184960742619357, 0.002308729985515969], [0.001725074644094738, 0.0023235811208949593], [0.0017316032114938027, 0.0023384458993861265], [0.001738082185695673, 0.0023533241619297417], [0.0017445119822592711, 0.0023682157663899766], [0.0017508930228065083, 0.0023831205870120933], [0.001757225734770155, 0.0023980385139046865], [0.0017635105511485838, 0.002412969452545351], [0.0017697479102670496, 0.0024279133233084706], [0.0017759382555451366, 0.002442870061013807], [0.0017820820352700965, 0.002457839614494661], [0.0017881797023757589, 0.002472821946184576], [0.0017942317142267865, 0.002487817031721519], [0.0018002385324079868, 0.0025028248595686305], [0.0018062006225184927, 0.002517845430650605], [0.001812118453970572, 0.002532878758004949], [0.0018179924997929005, 0.0025479248664473006], [0.0018238232364381085, 0.0025629837922500893], [0.0018296111435944535, 0.0025780555828339136], [0.0018353567040014507, 0.0025931402964709826], [0.0018410604032693685, 0.002608238001999926], [0.0018467227297024314, 0.002623348778551656], [0.001852344174125658, 0.0026384727152854465], [0.0018579252297151955, 0.0026536099111349864], [0.0018634663918321254, 0.0026687604745637963], [0.0018689681578596077, 0.002683924523329614], [0.0018744310270433402, 0.0026991021842573027], [0.0018798555003352415, 0.0027142935930199196], [0.001885242080240342, 0.0027294988939275022], [0.0018905912706668024, 0.002744718239723253], [0.001895903576779071, 0.002759951791386862], [0.0019011795048541078, 0.0027751997179443952], [0.0019064195621406662, 0.002790462196284726], [0.001911624256721632, 0.0028057394109820286], [0.0019167940973793586, 0.0028210315541241206], [0.0019219295934640498, 0.0028363388251463613], [0.0019270312547651242, 0.0028516614306708394], [0.001932099591385598, 0.002866999584350647], [0.0019371351136194554, 0.002882353506718987], [0.0019421383318320318, 0.0028977234250428295], [0.0019471097563433955, 0.002913109573180972], [0.0019520498973147331, 0.0029285121914463023], [0.001956959264637746, 0.002943931526472021], [0.0019618383678270644, 0.0029593678310816003], [0.0019666877159156756, 0.002974821364162418], [0.0019715078173533898, 0.0029902923905428024], [0.001976299179908307, 0.0030057811808722997], [0.0019810623105713573, 0.0030212880115050643], [0.001985797715463823, 0.0030368131643862127], [0.0019905058997479567, 0.0030523569269409264], [0.0019951873675405837, 0.0030679195919662114], [0.0019998426218297647, 0.003083501457525218], [0.0020044721643944826, 0.0030991028268438415], [0.0020090764957273553, 0.003114724008209653], [0.0020136561149603598, 0.0031303653148729107], [0.002018211519793561, 0.003146027064949554], [0.0020227432064268357, 0.0031617095813261313], [0.0020272516694945634, 0.003177413191566421], [0.0020317374020032944, 0.0031931382278198197], [0.0020362008952723117, 0.0032088850267311727], [0.0020406426388771466, 0.0032246539293521105], [0.002045063120595933, 0.0032404452810537725], [0.002049462826358647, 0.0032562594314406923], [0.00205384224019913, 0.0032720967342659444], [0.002058201844209906, 0.003287957547347296], [0.002062542118499728, 0.0033038422324843707], [0.002066863541153808, 0.0033197511553766943], [0.0020711665881966946, 0.0033356846855425656], [0.0020754517335577396, 0.003351643196238663], [0.0020797194490391057, 0.0033676270643802518], [0.002083970204286235, 0.0033836366704620515], [0.002088204466760761, 0.003399672398479515], [0.0020924227017157433, 0.0034157346358505767], [0.0020966253721732215, 0.003431823773337752], [0.0021008129389039577, 0.0034479402049704716], [0.0021049858604093342, 0.0034640843279676843], [0.0021091445929053232, 0.0034802565426605417], [0.0021132895903084527, 0.003496457252415226], [0.0021174213042236528, 0.0035126868635556737], [0.0021215401839339712, 0.003528945785286369], [0.002125646676391995, 0.00354523442961492], [0.0021297412262129456, 0.0035615532112745186], [0.002133824275669347, 0.00357790254764607], [0.0021378962646871474, 0.003594282858680137], [0.0021419576308432365, 0.003610694566818363], [0.0021460088093642558, 0.003627138096914653], [0.002150050233126582, 0.003643613876155697], [0.002154082332657443, 0.0036601223339810694], [0.0021581055361369977, 0.0036766639020027005], [0.0021621202694013285, 0.0036932390139236514], [0.0021661269559462466, 0.0037098481054562895], [0.0021701260169317894, 0.003726491614239563], [0.002174117871187324, 0.0037431699797555932], [0.0021781029352171515, 0.0037598836432453132], [0.0021820816232065265, 0.0037766330476232263], [0.0021860543470279595, 0.0037934186373911826], [0.002190021516247749, 0.0038102408585511584], [0.002193983538132606, 0.0038271001585168936], [0.0021979408176562884, 0.003843996986024524], [0.0022018937575061484, 0.0038609317910419383], [0.0022058427580895048, 0.0038779050246769663], [0.002209788217539725, 0.0038949171390842974], [0.0022137305317219465, 0.003911968587371091], [0.002217670094238327, 0.0039290598235012155], [0.0022216072964327377, 0.003946191302198092], [0.002225542527394841, 0.0039633634788460706], [0.0022294761739634085, 0.003980576809390355], [0.0022334086207288447, 0.0039978317502353386], [0.0022373402500348184, 0.004015128758141401], [0.002241271441978918, 0.004032468290120044], [0.0022452025744122447, 0.004049850803327413], [0.0022491340229379045, 0.004067276754956062], [0.002253066160908286, 0.0040847466021249985], [0.0022569993594210693, 0.004102260801767939], [0.00226093398731391, 0.004119819810519758], [0.0022648704111577263, 0.004137424084601017], [0.0022688089952485196, 0.004155074079700713], [0.002272750101597681, 0.004172770250856938], [0.0022766940899207363, 0.00419051305233571], [0.002280641317624457, 0.004208302937507712], [0.0022845921397922925, 0.004226140358723033], [0.002288546909168105, 0.004244025767183831], [0.0022925059761381293, 0.004261959612814915], [0.0022964696887111365, 0.004279942344132131], [0.0023004383924967553, 0.004297974408108706], [0.0023044124306819376, 0.004316056250039291], [0.0023083921440055144, 0.004334188313401842], [0.0023123778707308316, 0.0043523710397172805], [0.0023163699466164206, 0.0043706048684068475], [0.0023203687048847367, 0.004388890236647192], [0.002324374476188852, 0.0044072275792231734], [0.002328387588577205, 0.004425617328378256], [0.002332408367456302, 0.00444405991366268], [0.002336437135551399, 0.004462555761779136], [0.002340474212865165, 0.004481105296426132], [0.002344519916634313, 0.004499708938138957], [0.0023485745612841953, 0.004518367104128216], [0.0023526384583813687, 0.004537080208115914], [0.002356711916584148, 0.0045558486601691706], [0.0023607952415911305, 0.004574672866531417], [0.0023648887360877228, 0.004593553229451196], [0.0023689926996906716, 0.004612490147008489], [0.002373107428890626, 0.0046314840129385105], [0.0023772332169927127, 0.004650535216453183], [0.002381370354055213, 0.00466964414206], [0.0023855191268262756, 0.00468881116937846], [0.002389679818678771, 0.004708036672954117], [0.0023938527095432355, 0.004727321022070043], [0.00239803807583901, 0.004746664580555933], [0.0024022361904035426, 0.004766067706594688], [0.002406447322419903, 0.00478553075252663], [0.00241067173734256, 0.004805054064651225], [0.0024149096968214294, 0.0048246379830264126], [0.002419161458624243, 0.004844282841265573], [0.002423427276557266, 0.004863988966332031], [0.0024277074003844113, 0.004883756678331269], [0.002432002075744773, 0.004903586290300732], [0.0024363115440686556, 0.004923478107997383], [0.002440636042492077, 0.004943432429682883], [0.0024449758037698543, 0.00496344954590655], [0.0024493310561872718, 0.004983529739286014], [0.0024537020234704065, 0.005003673284285789], [0.0024580889246951163, 0.005023880446993503], [0.002462491974194787, 0.005044151484894066], [0.002466911381466829, 0.0050644866466417445], [0.002471347351078043, 0.005084886171830108], [0.002475800082568811, 0.005105350290759991], [0.002480269770356263, 0.005125879224205454], [0.0024847566036363747, 0.005146473183177764], [0.0024892607662851274, 0.005167132368687558], [0.0024937824367587164, 0.005187856971505075], [0.0024983217879929066, 0.005208647171918635], [0.002502878987301572, 0.005229503139491423], [0.00250745419627445, 0.0052504250328164375], [0.002512047570674216, 0.005271412999270007], [0.0025166592603328656, 0.005292467174763562], [0.002521289409047543, 0.005313587683494025], [0.0025259381544757834, 0.005334774637692681], [0.002530605628030294, 0.005356028137372697], [0.0025352919547732976, 0.005377348270075373], [0.0025399972533105206, 0.005398735110615056], [0.002544721635684845, 0.005420188720823024], [0.0025494652072697325, 0.005441709149290172], [0.002554228066662461, 0.005463296431108753], [0.002559010305577223, 0.005484950587613131], [0.002563812008738162, 0.005506671626119766], [0.0025686332537724317, 0.005528459539666374], [0.0025734741111032786, 0.005550314306750434], [0.0025783346438433175, 0.005572235891067092], [0.0025832149076879425, 0.005594224241246602], [0.002588114950809057, 0.005616279290591347], [0.0025930348137491147, 0.0056384009568125615], [0.0025979745293155743, 0.005660589141766809], [0.002602934122475831, 0.005682843731192474], [0.002607913610252721, 0.005705164594446077], [0.002612913001620622, 0.005727551584238853], [0.0026179322974022668, 0.005750004536373475], [0.0026229714901663545, 0.005772523269481117], [0.002628030564125997, 0.005795107584758912], [0.0026331094950380945, 0.00581775726570805], [0.002638208250103759, 0.0058404720778724864], [0.002643326787869787, 0.005863251768578484], [0.002648465058131364, 0.0058860960666750175], [0.0026536230018359866, 0.00590900468227531], [0.002658800550988757, 0.005931977306499468], [0.0026639976285591067, 0.005955013611218456], [0.0026692141483890316, 0.005978113248799521], [0.0026744500151029385, 0.006001275851853173], [0.0026797051240192004, 0.0060245010329819], [0.0026849793610634813, 0.006047788384530743], [0.0026902726026839423, 0.006071137478339861], [0.0026955847157684207, 0.006094547865499277], [0.0027009155575636696, 0.006118019076105893], [0.0027062649755967313, 0.006141550619023013], [0.0027116328075985773, 0.006165141981642434], [0.0027170188814300816, 0.0061887926296493095], [0.0027224230150104177, 0.006212502006790031], [0.0027278450162479717, 0.0062362695346431015], [0.002733284682973905, 0.0062600946123933425], [0.0027387418028783873, 0.006283976616609525], [0.002744216153449669, 0.006307914901025608], [0.002749707501916029, 0.006331908796325816], [0.0027552156051907084, 0.006355957609933584], [0.002760740209819949, 0.0063800606258047595], [0.0027662810519341746, 0.0064042171042251045], [0.002771837857202434, 0.006428426281612287], [0.0027774103407902243, 0.006452687370322627], [0.0027829982073206906, 0.006476999558462748], [0.002788601150839403, 0.006501362009706332], [0.0027942188547827, 0.006525773863116187], [0.002799850991949728, 0.006550234232971763], [0.0028054972244782395, 0.006574742208602484], [0.0028111572038242425, 0.006599296854226912], [0.002816830570745557, 0.006623897208798031], [0.0028225169552893617, 0.006648542285854994], [0.002828215976783803, 0.00667323107338124], [0.002833927243833722, 0.006697962533669548], [0.0028396503543205768, 0.006722735603193962], [0.002845384895406613, 0.006747549192489009], [0.002851130443543345, 0.006772402186036285], [0.002856886564484383, 0.006797293442158781], [0.0028626528133026993, 0.006822221792922986], [0.002868428734412329, 0.00684718604404922], [0.002874213861594579, 0.006872184974830269], [0.0028800077180287957, 0.006897217338058531], [0.0028858098163276804, 0.006922281859962103], [0.0028916196585772355, 0.006947377240149799], [0.002897436736381345, 0.006972502151565546], [0.0029032605309109905, 0.0069976552404521905], [0.0029090905129581864, 0.007022835126325175], [0.0029149261429945754, 0.007048040401956119], [0.0029207668712347323, 0.0070732696333666465], [0.0029266121377042017, 0.007098521359832755], [0.0029324613723122235, 0.007123794093899752], [0.0029383139949291925, 0.007149086321408257], [0.0029441694154688112, 0.0071743965015313126], [0.0029500270339749505, 0.007199723066822926], [0.002955886240713191, 0.007225064423278256], [0.002961746416267046, 0.007250418950405634], [0.0029676069316388162, 0.007275785001310789], [0.002973467148355095, 0.007301160902793266], [0.00297932641857685, 0.007326544955455544], [0.002985184085214092, 0.0073519354338248254], [0.0029910394820450784, 0.007377330586487867], [0.0029968919338400238, 0.007402728636239055], [0.0030027407564892613, 0.0074281277802418205], [0.0030085852571358606, 0.007453526190203774], [0.003014424734312599, 0.007478922012565629], [0.0030202584780832896, 0.007504313368704147], [0.003026085770188423, 0.007529698355149343], [0.003031905884195024, 0.007555075043816072], [0.0030377180856507547, 0.007580441482250205], [0.0030435216322421445, 0.007605795693889583], [0.0030493157739569453, 0.007631135678339849], [0.0030550997532505586, 0.007656459411665481], [0.0030608728052164543, 0.0076817648466958955], [0.0030666341577606053, 0.007707049913347047], [0.003072383031779785, 0.007732312518958536], [0.0030781186413438083, 0.007757550548646268], [0.0030838401938815666, 0.007782761865670929], [0.003089546890370893, 0.007807944311822333], [0.003095237925532164, 0.007833095707819617], [0.0031009124880256394, 0.007858213853727558], [0.0031065697606524763, 0.007883296529388973], [0.003112208920559412, 0.007908341494873227], [0.003117829139447088, 0.007933346490940978], [0.0031234295837819443, 0.007958309239525193], [0.0031290094150117314, 0.007983227444228353], [0.0031345677897845654, 0.008008098790835989], [0.0031401038601715564, 0.008032920947846427], [0.003145616773892945, 0.008057691567016832], [0.003151105674547794, 0.008082408283925395], [0.0031565697018472124, 0.008107068718549694], [0.0031620079918510805, 0.008131670475861174], [0.0031674196772083587, 0.008156211146435586], [0.0031728038874008703, 0.00818068830707935], [0.0031781597489907083, 0.008205099521471734], [0.0031834863858711568, 0.00822944234082266], [0.003188782919521227, 0.008253714304546031], [0.0031940484692637937, 0.008277912940948463], [0.003199282152527358, 0.008302035767933084], [0.003204483085111488, 0.008326080293718377], [0.0032096503814559156, 0.008350044017571764], [0.00321478315491339, 0.00837392443055768], [0.0032198805180262396, 0.008397719016299926], [0.003224941582806784, 0.008421425251757972], [0.0032299654610214766, 0.008445040608017042], [0.003234951264478985, 0.00846856255109149], [0.003239898105322112, 0.008491988542741325], [0.0032448050963236478, 0.008515316041301367], [0.0032496713511861865, 0.008538542502522846], [0.003254495984845906, 0.008561665380426933], [0.003259278113780387, 0.008584682128169829], [0.0032640168563204284, 0.008607590198918996], [0.0032687113329659496, 0.008630387046740198], [0.003273360666705946, 0.00865307012749472], [0.0032779639833425303, 0.008675636899746382], [0.0032825204118190417, 0.008698084825677976], [0.003287029084552262, 0.008720411372016288], [0.0032914891377686735, 0.0087426140109657], [0.003295899711844795, 0.008764690221149226], [0.0033002599516515174, 0.008786637488556974], [0.003304569006902456, 0.008808453307501136], [0.003308826032506215, 0.008830135181577006], [0.0033130301889225614, 0.008851680624629514], [0.0033171806425224127, 0.008873087161724488], [0.0033212765659515527, 0.00889435233012424], [0.003325317138497986, 0.008915473680266628], [0.0033293015464628613, 0.008936448776747088], [0.0033332289835347776, 0.008957275199302915], [0.003337098651167397, 0.008977950543799138], [0.00334090975896019, 0.0089984724232153], [0.003344661525042164, 0.009018838468632452], [0.0033483531764583814, 0.00903904633021964], [0.0033519839495590852, 0.009059093678219316], [0.0033555530903912312, 0.00907897820393063], [0.003359059855092182, 0.009098697620690337], [0.0033625035102853467, 0.009118249664850217], [0.003365883333477502, 0.009137632096750422], [0.0033691986134575026, 0.009156842701688046], [0.0033724486506961386, 0.009175879290880175], [0.0033756327577467954, 0.009194739702420565], [0.003378750259646627, 0.009213421802229387], [0.0033818004943178886, 0.009231923484995163], [0.003384782812969105, 0.009250242675108208], [0.0033876965804956857, 0.009268377327584836], [0.003390541175879653, 0.009286325428981633], [0.0033933159925880257, 0.00930408499829904], [0.0033960204389695586, 0.009321654087873438], [0.0033986539386493208, 0.009339030784257189], [0.0034012159309207645, 0.009356213209085801], [0.0034037058711348213, 0.009373199519931572], [0.0034061232310855644, 0.009389987911142941], [0.003408467499392047, 0.009406576614668954], [0.003410738181875776, 0.009422963900868135], [0.003412934801933416, 0.009439148079301062], [0.0034150569009042436, 0.00945512749950608], [0.00341710403843184, 0.009470900551757401], [0.00341907579281959, 0.009486465667805097], [0.003420971761379469, 0.009501821321596259], [0.0034227915607736494, 0.009516966029976797], [0.003424534827348476, 0.00953189835337327], [0.0034262012174602332, 0.00954661689645427], [0.0034277904077923576, 0.009561120308770606], [0.0034293020956635053, 0.009575407285374095], [0.0034307359993260692, 0.009589476567414058], [0.003432091858254652, 0.009603326942711399], [0.0034333694334240427, 0.00961695724630957], [0.003434568507576292, 0.009630366361002016]]


--------------------------------------------------------------------------------
/fig/overfitting.json:
--------------------------------------------------------------------------------
1 | [[2.0762772323329082, 1.8232334122685845, 1.6640751933146665, 1.4913409287162824, 1.4626645665352562, 1.5608730982986192, 1.3270934349008427, 1.3031689400520545, 1.2737198013316875, 1.2353298430277617, 1.2781249875365142, 1.2587094591590358, 1.2236320447498565, 1.2049258878595992, 1.202838510821453, 1.2175903284804579, 1.2166038163981336, 1.2302002518540471, 1.2284292747614989, 1.2284082512336671, 1.2206853894877705, 1.21982789310683, 1.2416121174277031, 1.2386867792565612, 1.2590040086618466, 1.2442300811597213, 1.256214615756384, 1.2688359032682412, 1.262880085921275, 1.2580241299014177, 1.2715941639378459, 1.2704272355044199, 1.2713173651241083, 1.2883075453227311, 1.309478393757302, 1.2884464353157816, 1.2992864292684581, 1.2995723853510095, 1.3057065520137037, 1.2996067919082652, 1.3113694262185569, 1.3180980499575814, 1.3224531791316712, 1.3288895530170624, 1.333896011747612, 1.3342655386450013, 1.3507230411896862, 1.349523874760193, 1.3486221834113297, 1.3492461107256304, 1.371103940789913, 1.363742107503537, 1.3591970586051429, 1.3628600849625045, 1.3715455620910941, 1.3762306334690999, 1.374571326678441, 1.3797013054519787, 1.3852744476067012, 1.3910542555139365, 1.3898246384066992, 1.3990962884350675, 1.4024643563307768, 1.4090064214871054, 1.4084778553386963, 1.4088023031568424, 1.4191800292184851, 1.4231642276020737, 1.4249505527888344, 1.4243906063296523, 1.4329048311102033, 1.4324957023669891, 1.4393786314154042, 1.4422194928893282, 1.4464979740530604, 1.4456638780161708, 1.4510268869602028, 1.4569921654272227, 1.4501932854980157, 1.4586286646321187, 1.4641932180653843, 1.4627134909477864, 1.4695342388383457, 1.4677910970476582, 1.4741781001557179, 1.4716445971833882, 1.4801857605543194, 1.4824209608683785, 1.4850739585015795, 1.4862526009107158, 1.4891994206257972, 1.4911449111215642, 1.4895703546607124, 1.4965483049082178, 1.498475574872792, 1.4983442080951213, 1.5006331103383848, 1.5026762910773346, 1.5056581200232744, 1.5052999301275902, 1.5101971277214103, 1.5052100436149518, 1.5122319812343581, 1.5140032252405993, 1.5124829079283069, 1.5155523493173086, 1.5173126087446009, 1.5182757080707934, 1.5226359219963441, 1.5219631288706079, 1.5280382373274002, 1.5305864446018889, 1.5308400057540297, 1.5298912015444006, 1.5299337876516403, 1.5359447484558009, 1.5373272763323333, 1.5357721830268081, 1.5411296193695012, 1.5408039410712637, 1.5435982660843079, 1.5448396233143082, 1.54640053335383, 1.5497924569986792, 1.5505113782392206, 1.5510873904758762, 1.5522372758736236, 1.5551534205865707, 1.5579037629164372, 1.5562529728398988, 1.557078203694122, 1.5605381965411875, 1.5615119183693009, 1.5620335052988363, 1.5636399289779603, 1.5666556667329066, 1.565967137736715, 1.5692899251189862, 1.569455366800145, 1.570201924893627, 1.5724338032777558, 1.5743182022210231, 1.5779052215415215, 1.5765908208317501, 1.5777906843645095, 1.5807065832710021, 1.5826042798570108, 1.5834282733757874, 1.5828727942783614, 1.5856236197268949, 1.5865253156344346, 1.5870711209440933, 1.5901102159916298, 1.589831374782438, 1.5903365893797863, 1.5922289915737446, 1.5937242093943276, 1.5957483404630928, 1.5961125976163055, 1.5970150572826043, 1.5978109963232512, 1.5992551729862305, 1.6002494658191888, 1.6011359241282295, 1.6026033918901981, 1.6036086580112265, 1.6048749030222984, 1.6074694961111247, 1.606561408369042, 1.6080270858679979, 1.6070173269387908, 1.6073846856343872, 1.6108499262729024, 1.6142347957554661, 1.6123239687358804, 1.6161556699815407, 1.6165460098238449, 1.6175561633900144, 1.6189398493899978, 1.6196154264250775, 1.6203787165288632, 1.621001492936847, 1.6202434861259687, 1.6245124200435654, 1.6239635265755512, 1.6241046070201524, 1.6252458899999425, 1.6270324298456853, 1.6251838072169713, 1.6275992851310326, 1.6284252702633855, 1.63017431200309, 1.6310654169645247, 1.6317526370318307, 1.6338220017737439, 1.6337496047773168, 1.6341251344927106, 1.6352945051866614, 1.6350194166439092, 1.6370499538617151, 1.6398726421890257, 1.6392502241532569, 1.6407454538992294, 1.6403690089576106, 1.6407818413256172, 1.6422394672335026, 1.6440166122111106, 1.6445019840290256, 1.6454238395620735, 1.6446012999992357, 1.646388125891822, 1.647477767725033, 1.6489903681956257, 1.6495670111667955, 1.6518317266949349, 1.6521411302385651, 1.6516324900159436, 1.6519268509016449, 1.6536769664130893, 1.6557231011758236, 1.6556135178242031, 1.6563575299485291, 1.6585034371713305, 1.6590068125468771, 1.6594819047820268, 1.6602894618927027, 1.6610971710252704, 1.6614742412028516, 1.662062758010538, 1.6618888908042855, 1.664003604166757, 1.6650773987115881, 1.6655027680401031, 1.6669239090330996, 1.6670229476327978, 1.6673755478034697, 1.668631108043269, 1.6691187845751598, 1.6698574553969809, 1.6699987129628646, 1.6718587417171702, 1.6728186780767957, 1.6735770871095164, 1.6751317162887345, 1.6754619738035605, 1.6757497694666139, 1.6767453551142881, 1.6773935555529487, 1.6790329446692798, 1.6796379042981611, 1.6798597028202431, 1.6814275113531045, 1.681540584668908, 1.6825376023031897, 1.6838370704483998, 1.6839490568545408, 1.6849619150773361, 1.6856698652111073, 1.6863000466757747, 1.6871126637371965, 1.6880316020404877, 1.6879506729766618, 1.6894802944100824, 1.6905853787788423, 1.6917216848211414, 1.6926191212187904, 1.6929352076880684, 1.6942338256895795, 1.6948350819305742, 1.6947248010331575, 1.6956565470999065, 1.6966226855434137, 1.6977135512214465, 1.6976628333622414, 1.6992728088551838, 1.6995188736719555, 1.7004758991163513, 1.7003390074918037, 1.701757593590616, 1.7030610036769165, 1.7032572445905845, 1.70424379410192, 1.7050439989771855, 1.7050460437739656, 1.705340680240933, 1.7063172030736129, 1.7074694863569662, 1.7081060241444701, 1.7083152915972599, 1.7100598915164169, 1.7098580352207235, 1.7110452055463516, 1.7118315825579393, 1.7119458704259569, 1.7128193121191575, 1.714088357669219, 1.7143137710846792, 1.7151310218423155, 1.7158160290566882, 1.7164824543799349, 1.7170992986317428, 1.7176374854282062, 1.7180675777622618, 1.7183529546663991, 1.7183913489385712, 1.7200745183529782, 1.7201860622786533, 1.7209378765189278, 1.7211755130028632, 1.7227001954459273, 1.7233033322709161, 1.724388593301251, 1.7249286817001739, 1.7257212014285681, 1.7264759882752161, 1.7263145509431113, 1.727169750737453, 1.7274545949025009, 1.728138187570482, 1.7284329943827041, 1.7291647307556921, 1.7297691124388797, 1.7303999198392592, 1.7309397188198092, 1.731884218614588, 1.732752372009305, 1.7337895563134313, 1.7338046236202502, 1.7343991319697829, 1.7354045120011685, 1.7359328966782865, 1.7363186919712537, 1.7368728425169133, 1.7376556264901872, 1.738218355695242, 1.7389320558428096, 1.7392586695357521, 1.7395726489260961, 1.7403457853492119, 1.7411144686251934, 1.7418114756639416, 1.7423787115928511, 1.7429156859372819, 1.7433516620794796, 1.7445995405595869, 1.7449650517928348, 1.7454219936222521, 1.7454049499805062, 1.7461754045631253, 1.747238770079671, 1.7480364894800848, 1.7481891743633657, 1.7484612615979531, 1.7492265370334927, 1.7499314246477431, 1.7503425435026281, 1.7509597451899421, 1.7513546402678131, 1.7521155770124217, 1.7527284609234106, 1.7529769148484364, 1.7538672981186787, 1.7544452588346211, 1.7549061780496615, 1.7553447817113197, 1.755809458463981, 1.7558520671233728, 1.7568602710475358, 1.7568136319142174, 1.7575653625849685, 1.758205144851257, 1.7587151972026469, 1.7591737337097375, 1.7594811212041248, 1.7599224471680641, 1.7604958546917258, 1.7614357150479159, 1.7620773477904375, 1.7629340224321914, 1.7634360517269456, 1.7634779671556928, 1.7642836857118194, 1.7646825015144432, 1.7652084365396346, 1.765550476840142, 1.766323427364384, 1.7671269295963092, 1.7674831990461801, 1.7679902398030436, 1.7688556765701444, 1.7693755350034828, 1.7691087563919485, 1.7699593793502248, 1.7702219339149627, 1.7709157134395872, 1.7709574156060244, 1.7720375325001132, 1.7722910641140253, 1.7728105919575348, 1.7731493757222807], [5887, 6505, 6970, 7271, 7433, 7198, 7710, 7747, 7850, 7899, 7853, 7848, 7986, 8020, 8046, 8039, 8056, 8090, 8090, 8107, 8086, 8126, 8104, 8107, 8116, 8121, 8128, 8121, 8135, 8126, 8137, 8149, 8146, 8124, 8118, 8146, 8134, 8156, 8148, 8165, 8170, 8146, 8157, 8157, 8156, 8161, 8134, 8156, 8166, 8145, 8140, 8154, 8156, 8147, 8144, 8147, 8144, 8156, 8154, 8157, 8148, 8137, 8144, 8145, 8148, 8149, 8147, 8152, 8154, 8152, 8136, 8151, 8145, 8152, 8150, 8155, 8152, 8147, 8159, 8148, 8164, 8160, 8153, 8149, 8153, 8158, 8153, 8160, 8154, 8165, 8158, 8155, 8159, 8164, 8170, 8176, 8174, 8176, 8165, 8173, 8163, 8189, 8181, 8175, 8180, 8185, 8177, 8179, 8184, 8178, 8185, 8177, 8182, 8194, 8189, 8174, 8184, 8188, 8180, 8192, 8180, 8181, 8197, 8183, 8184, 8194, 8186, 8188, 8189, 8195, 8192, 8184, 8194, 8197, 8197, 8185, 8196, 8192, 8200, 8200, 8199, 8197, 8191, 8196, 8193, 8193, 8192, 8194, 8201, 8195, 8196, 8195, 8196, 8197, 8190, 8196, 8197, 8197, 8193, 8190, 8195, 8196, 8199, 8195, 8191, 8192, 8187, 8189, 8192, 8193, 8189, 8192, 8194, 8187, 8189, 8193, 8197, 8193, 8194, 8195, 8197, 8191, 8198, 8196, 8196, 8196, 8194, 8192, 8196, 8192, 8193, 8192, 8197, 8195, 8191, 8192, 8191, 8194, 8191, 8190, 8192, 8191, 8195, 8190, 8197, 8193, 8194, 8191, 8196, 8192, 8192, 8198, 8199, 8199, 8200, 8199, 8195, 8196, 8199, 8191, 8194, 8199, 8205, 8195, 8204, 8206, 8207, 8208, 8204, 8203, 8203, 8207, 8207, 8200, 8206, 8206, 8203, 8203, 8206, 8210, 8208, 8207, 8208, 8208, 8209, 8209, 8208, 8211, 8210, 8209, 8208, 8213, 8205, 8207, 8211, 8210, 8213, 8212, 8213, 8211, 8216, 8213, 8214, 8214, 8216, 8214, 8218, 8218, 8218, 8217, 8220, 8217, 8221, 8217, 8217, 8218, 8217, 8217, 8218, 8223, 8221, 8223, 8222, 8226, 8218, 8221, 8217, 8219, 8218, 8221, 8219, 8218, 8221, 8220, 8220, 8214, 8220, 8218, 8218, 8220, 8221, 8220, 8222, 8220, 8223, 8220, 8221, 8223, 8219, 8217, 8221, 8217, 8218, 8220, 8221, 8220, 8219, 8221, 8219, 8219, 8223, 8223, 8222, 8221, 8221, 8221, 8219, 8220, 8220, 8221, 8222, 8222, 8221, 8222, 8222, 8223, 8222, 8222, 8222, 8220, 8221, 8219, 8220, 8219, 8219, 8219, 8220, 8222, 8220, 8218, 8218, 8220, 8222, 8221, 8220, 8221, 8219, 8222, 8220, 8218, 8221, 8220, 8221, 8223, 8224, 8223, 8225, 8224, 8223, 8223, 8224, 8226, 8223, 8226, 8227, 8223, 8222, 8222, 8223, 8222, 8222, 8223, 8221, 8222, 8221, 8222, 8223, 8222, 8223, 8221, 8221, 8222, 8221, 8220, 8223, 8223, 8221, 8220, 8220, 8223], [1.8433647860328504, 1.4777434227600235, 1.2072861295975754, 1.0133122929166287, 0.90463155805724549, 0.88914085245876628, 0.69510736399672024, 0.60063282069902524, 0.54091603110413877, 0.47790622727830795, 0.45205834534806816, 0.41823557259928568, 0.36353886658117263, 0.34097725702984655, 0.30704391068438625, 0.28505238206157008, 0.27569888987164376, 0.24768957063883623, 0.23176895744869463, 0.21675013886039948, 0.20851633183590543, 0.19581288303288452, 0.1883450862028723, 0.17598212888313519, 0.17148361118265443, 0.1579893219777749, 0.15232727768047913, 0.14908652369052086, 0.13959561541748497, 0.13575056473237712, 0.13038402753584682, 0.12633393160050335, 0.12301377811155474, 0.11760787528185435, 0.11768063532050554, 0.1116090975652263, 0.10594482960527374, 0.10146350864471576, 0.098950522532661414, 0.097470601335692755, 0.092587258119849816, 0.08890875709297294, 0.087334066191656873, 0.083968079007007201, 0.080425567454585997, 0.077388473929440482, 0.075732073394689639, 0.073264736306067349, 0.071041388148701201, 0.069457129692052144, 0.068501170653270813, 0.066032566370952353, 0.063988473365526183, 0.062795927261155279, 0.060741640047227284, 0.059231989932508271, 0.05768849915339639, 0.05665547621362698, 0.054958317927951077, 0.053790448746231824, 0.052736286230260894, 0.051837510403181575, 0.050246365862518169, 0.049157161612436856, 0.048399496486416158, 0.047223244213956135, 0.0462029122464475, 0.045362346946900868, 0.044991942955692463, 0.043548301006926415, 0.042619315582575576, 0.041872679888486676, 0.041353003773040818, 0.040343023072558423, 0.039610640543463034, 0.038827894599382441, 0.038165321713376478, 0.037555764670128743, 0.036994307367393418, 0.036209725814626531, 0.035681364944148621, 0.035011174581168515, 0.034291592495846304, 0.033547709156373326, 0.032718263569078379, 0.032023904582162074, 0.031191659853705877, 0.030626708962014425, 0.029958425643681459, 0.029353265126344338, 0.028797964387740602, 0.028422012718391305, 0.027812191605286883, 0.02735501112071697, 0.026993564391853634, 0.026569113029157501, 0.026092064055110505, 0.025674781762346367, 0.025305894007476161, 0.024889083317963079, 0.024558131841685489, 0.024199329722505512, 0.023811240945016011, 0.02349334595548817, 0.023144448162318861, 0.022855591970993343, 0.022543488743196019, 0.022249708906225588, 0.021965529914739868, 0.021698026254864673, 0.021378123124622207, 0.021088144829246657, 0.020886774671724655, 0.020580654801762799, 0.020340928401971565, 0.020106832060966386, 0.019866100581400464, 0.019595427663293804, 0.019357863817740469, 0.019161344170354123, 0.018904348055376753, 0.018717446712462619, 0.018465695920465829, 0.018272597088603904, 0.018074038569034587, 0.017902496111286439, 0.017688582191512987, 0.017482922233189643, 0.017310611695394759, 0.017115500371685799, 0.016952389472265048, 0.016780096680463093, 0.016597920851012388, 0.016464051880113292, 0.016282898613349604, 0.016136333558525773, 0.015962599748449359, 0.015804226601263859, 0.01564233479142425, 0.015498832345390763, 0.015356876162963705, 0.01521854563645951, 0.015073417138070878, 0.014934450228727312, 0.014825699216383425, 0.014665671908121175, 0.014541603837007016, 0.014406468550218309, 0.014283810530787627, 0.014161122679261126, 0.014032574364961678, 0.013910840101390157, 0.013798885792050663, 0.013687896782864042, 0.013560391660895389, 0.013438577986090204, 0.013334228955099942, 0.013211877601969076, 0.013097885924483273, 0.012989409068889089, 0.012879749073942919, 0.012764357708175841, 0.012655707745460402, 0.012552080081974131, 0.012437201020647024, 0.012337374519593609, 0.012235799102028641, 0.012145033948497589, 0.012041173192129051, 0.011950396057231671, 0.011858981944298361, 0.011775080348835348, 0.011680489496468773, 0.011599470483943787, 0.011508039313397542, 0.011428340295649663, 0.011349428130039004, 0.011268838312831554, 0.011189912165110935, 0.011109723837424776, 0.011034560780687212, 0.010957160107627907, 0.010882577878407575, 0.010811098528082833, 0.010735442936960134, 0.010665821648065388, 0.010595437817822002, 0.010527971065975287, 0.010460804790718484, 0.010390628071234642, 0.010324849341451231, 0.010260426245422116, 0.01019672721940704, 0.010130572405193659, 0.010072420053693198, 0.010006628636997746, 0.0099438229901934062, 0.0098844582257840465, 0.0098246362024759087, 0.0097663800115457729, 0.0097127946154311525, 0.0096488779061362981, 0.0095934368030428964, 0.0095357715395345787, 0.0094828105181004665, 0.0094248109827411788, 0.0093706712239598373, 0.0093193323548417979, 0.0092640297508175557, 0.0092140979255875208, 0.0091599683295718951, 0.0091085532532171488, 0.0090566144373361267, 0.0090078660753137273, 0.0089574895292539591, 0.0089102414421480258, 0.0088602907747430508, 0.0088130951045928482, 0.0087627473899933809, 0.0087169057599027176, 0.008669602206522074, 0.0086245638096965112, 0.008578829590782322, 0.0085337682055181534, 0.0084904627352943417, 0.0084437487803313786, 0.0084005496486743488, 0.0083564344772356029, 0.0083141617581171147, 0.0082711428471498073, 0.0082280554148385712, 0.0081861058590978232, 0.0081453445960328139, 0.0081050739787153214, 0.0080629964697627073, 0.0080227532574564074, 0.0079824692727665826, 0.0079430728092899933, 0.0079037010818164485, 0.007866500840720405, 0.0078262183123015989, 0.0077880045569414854, 0.0077501375585374184, 0.0077130927824958879, 0.0076749447769945724, 0.0076383206771078876, 0.0076037234598087793, 0.0075656978732744681, 0.0075306312245213185, 0.0074933959545258758, 0.007457863479961753, 0.0074224044906936416, 0.0073875427008978636, 0.0073533040575986478, 0.0073188636048639828, 0.0072851975374219257, 0.007249551514341885, 0.0072155976990766037, 0.0071837716588880659, 0.0071490490354368446, 0.0071149465640242852, 0.0070813394799004378, 0.007047562934784808, 0.0070148336131211995, 0.0069818923170584749, 0.0069501733149712605, 0.0069160116872818119, 0.0068840401735022448, 0.0068514561156714704, 0.0068196243684198622, 0.0067863217789309715, 0.0067534474287550073, 0.0067200631321686754, 0.0066885181859545314, 0.0066568666894365278, 0.0066240210906374454, 0.0065926113197570003, 0.0065626818950437473, 0.0065315628190666742, 0.0065011591385262619, 0.0064707220819024717, 0.0064414830189593493, 0.006412153696041689, 0.0063833172680227371, 0.0063550710682834894, 0.0063263283024680974, 0.006298472160087395, 0.0062705864326000225, 0.0062432859687904735, 0.0062170035873869919, 0.0061896468954568112, 0.0061633789845077242, 0.0061370029472117751, 0.0061113940805202389, 0.006084934659958091, 0.0060594838733285623, 0.0060344559813401231, 0.0060094203492223955, 0.0059846013866920586, 0.0059595844553596081, 0.0059354063125516329, 0.0059106299326143925, 0.0058865500049247376, 0.0058629576783263884, 0.0058398259656488255, 0.0058157681197817893, 0.0057925894220656601, 0.005769719671300088, 0.0057465133779847615, 0.005723885452264303, 0.0057011088426493122, 0.0056788750789765873, 0.0056565840835991754, 0.0056345091063197805, 0.0056126545497789098, 0.0055907478836642096, 0.0055695879730558121, 0.0055480499521874576, 0.0055269393518896613, 0.0055055760390316528, 0.0054846407395392127, 0.0054639492319568522, 0.0054430681469272582, 0.0054228564773156733, 0.0054023245806928445, 0.0053821589279841198, 0.0053622701718764919, 0.0053422962061517205, 0.0053225545608313832, 0.0053025727602387341, 0.0052830117459178542, 0.0052637900268743395, 0.0052445911331395157, 0.005225301628550106, 0.0052063734092377542, 0.0051873981768633098, 0.0051686550874710333, 0.0051501308303700411, 0.0051317159223978588, 0.0051132407038391343, 0.0050949646868208237, 0.0050768163679191418, 0.0050588920830417876, 0.0050410603343375689, 0.0050234460109072039, 0.0050058479424628429, 0.0049883551396970116, 0.0049709905024850117, 0.0049534380633703065, 0.0049362622084869086, 0.0049191825923840336, 0.0049019339002642093, 0.0048852638935989387, 0.0048685355903025033, 0.0048516783369022421, 0.004835176074289591, 0.0048187414702752064, 0.0048024676248850815, 0.004786205475115702, 0.0047700818966571806, 0.0047540229511325634, 0.0047381312460395321, 0.0047222819541272538, 0.004706582163886538, 0.0046908199271972883, 0.004675294500871013, 0.0046600271163220889, 0.004644560115930613, 0.0046292967748754361, 0.0046142583741101018, 0.0045990679936898007, 0.0045841369248796251, 0.0045692503625969364, 0.0045545855753471856, 0.0045398450796813317, 0.0045253266069212915, 0.0045109305522560966, 0.0044964925773122444, 0.0044821543135014023, 0.0044678460394820168, 0.0044536907146381774, 0.0044396992221706283, 0.0044257701614943187, 0.0044119841879101604, 0.0043980561122206668, 0.0043843449981956339, 0.0043706979320343045, 0.004357190360999688, 0.0043441270550987543, 0.0043307263717703463, 0.0043174162674548587, 0.004304214971579858, 0.0042909772791592301, 0.004277917818300953, 0.0042651151252712065, 0.0042522204909684596, 0.0042395626122975951, 0.0042267964052173168, 0.0042141543979121142, 0.0042016444290295845], [690, 749, 812, 848, 872, 865, 914, 932, 948, 949, 957, 966, 970, 973, 979, 977, 981, 982, 982, 985, 984, 988, 989, 990, 988, 990, 991, 991, 992, 992, 993, 993, 992, 992, 993, 994, 994, 995, 996, 995, 995, 996, 996, 996, 997, 997, 998, 998, 998, 998, 998, 998, 997, 997, 998, 998, 998, 998, 998, 998, 998, 998, 998, 998, 998, 999, 999, 1000, 1000, 1000, 1000, 1000, 999, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]]


--------------------------------------------------------------------------------
/fig/overfitting.py:
--------------------------------------------------------------------------------
  1 | """
  2 | overfitting
  3 | ~~~~~~~~~~~
  4 | 
  5 | Plot graphs to illustrate the problem of overfitting.  
  6 | """
  7 | 
  8 | # Standard library
  9 | import json
 10 | import random
 11 | import sys
 12 | 
 13 | # My library
 14 | sys.path.append('../src/')
 15 | import mnist_loader
 16 | import network2
 17 | 
 18 | # Third-party libraries
 19 | import matplotlib.pyplot as plt
 20 | import numpy as np
 21 | 
 22 | 
 23 | def main(filename, num_epochs,
 24 |          training_cost_xmin=200, 
 25 |          test_accuracy_xmin=200, 
 26 |          test_cost_xmin=0, 
 27 |          training_accuracy_xmin=0,
 28 |          training_set_size=1000, 
 29 |          lmbda=0.0):
 30 |     """``filename`` is the name of the file where the results will be
 31 |     stored.  ``num_epochs`` is the number of epochs to train for.
 32 |     ``training_set_size`` is the number of images to train on.
 33 |     ``lmbda`` is the regularization parameter.  The other parameters
 34 |     set the epochs at which to start plotting on the x axis.
 35 |     """
 36 |     run_network(filename, num_epochs, training_set_size, lmbda)
 37 |     make_plots(filename, num_epochs, 
 38 |                test_accuracy_xmin,
 39 |                training_cost_xmin,
 40 |                test_accuracy_xmin, 
 41 |                training_accuracy_xmin,
 42 |                training_set_size)
 43 |                        
 44 | def run_network(filename, num_epochs, training_set_size=1000, lmbda=0.0):
 45 |     """Train the network for ``num_epochs`` on ``training_set_size``
 46 |     images, and store the results in ``filename``.  Those results can
 47 |     later be used by ``make_plots``.  Note that the results are stored
 48 |     to disk in large part because it's convenient not to have to
 49 |     ``run_network`` each time we want to make a plot (it's slow).
 50 | 
 51 |     """
 52 |     # Make results more easily reproducible
 53 |     random.seed(12345678)
 54 |     np.random.seed(12345678)
 55 |     training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
 56 |     net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost())
 57 |     net.large_weight_initializer()
 58 |     test_cost, test_accuracy, training_cost, training_accuracy \
 59 |         = net.SGD(training_data[:training_set_size], num_epochs, 10, 0.5,
 60 |                   evaluation_data=test_data, lmbda = lmbda,
 61 |                   monitor_evaluation_cost=True, 
 62 |                   monitor_evaluation_accuracy=True, 
 63 |                   monitor_training_cost=True, 
 64 |                   monitor_training_accuracy=True)
 65 |     f = open(filename, "w")
 66 |     json.dump([test_cost, test_accuracy, training_cost, training_accuracy], f)
 67 |     f.close()
 68 | 
 69 | def make_plots(filename, num_epochs, 
 70 |                training_cost_xmin=200, 
 71 |                test_accuracy_xmin=200, 
 72 |                test_cost_xmin=0, 
 73 |                training_accuracy_xmin=0,
 74 |                training_set_size=1000):
 75 |     """Load the results from ``filename``, and generate the corresponding
 76 |     plots. """
 77 |     f = open(filename, "r")
 78 |     test_cost, test_accuracy, training_cost, training_accuracy \
 79 |         = json.load(f)
 80 |     f.close()
 81 |     plot_training_cost(training_cost, num_epochs, training_cost_xmin)
 82 |     plot_test_accuracy(test_accuracy, num_epochs, test_accuracy_xmin)
 83 |     plot_test_cost(test_cost, num_epochs, test_cost_xmin)
 84 |     plot_training_accuracy(training_accuracy, num_epochs, 
 85 |                            training_accuracy_xmin, training_set_size)
 86 |     plot_overlay(test_accuracy, training_accuracy, num_epochs,
 87 |                  min(test_accuracy_xmin, training_accuracy_xmin),
 88 |                  training_set_size)
 89 | 
 90 | def plot_training_cost(training_cost, num_epochs, training_cost_xmin):
 91 |     fig = plt.figure()
 92 |     ax = fig.add_subplot(111)
 93 |     ax.plot(np.arange(training_cost_xmin, num_epochs), 
 94 |             training_cost[training_cost_xmin:num_epochs],
 95 |             color='#2A6EA6')
 96 |     ax.set_xlim([training_cost_xmin, num_epochs])
 97 |     ax.grid(True)
 98 |     ax.set_xlabel('Epoch')
 99 |     ax.set_title('Cost on the training data')
100 |     plt.show()
101 | 
102 | def plot_test_accuracy(test_accuracy, num_epochs, test_accuracy_xmin):
103 |     fig = plt.figure()
104 |     ax = fig.add_subplot(111)
105 |     ax.plot(np.arange(test_accuracy_xmin, num_epochs), 
106 |             [accuracy/100.0 
107 |              for accuracy in test_accuracy[test_accuracy_xmin:num_epochs]],
108 |             color='#2A6EA6')
109 |     ax.set_xlim([test_accuracy_xmin, num_epochs])
110 |     ax.grid(True)
111 |     ax.set_xlabel('Epoch')
112 |     ax.set_title('Accuracy (%) on the test data')
113 |     plt.show()
114 | 
115 | def plot_test_cost(test_cost, num_epochs, test_cost_xmin):
116 |     fig = plt.figure()
117 |     ax = fig.add_subplot(111)
118 |     ax.plot(np.arange(test_cost_xmin, num_epochs), 
119 |             test_cost[test_cost_xmin:num_epochs],
120 |             color='#2A6EA6')
121 |     ax.set_xlim([test_cost_xmin, num_epochs])
122 |     ax.grid(True)
123 |     ax.set_xlabel('Epoch')
124 |     ax.set_title('Cost on the test data')
125 |     plt.show()
126 | 
127 | def plot_training_accuracy(training_accuracy, num_epochs, 
128 |                            training_accuracy_xmin, training_set_size):
129 |     fig = plt.figure()
130 |     ax = fig.add_subplot(111)
131 |     ax.plot(np.arange(training_accuracy_xmin, num_epochs), 
132 |             [accuracy*100.0/training_set_size 
133 |              for accuracy in training_accuracy[training_accuracy_xmin:num_epochs]],
134 |             color='#2A6EA6')
135 |     ax.set_xlim([training_accuracy_xmin, num_epochs])
136 |     ax.grid(True)
137 |     ax.set_xlabel('Epoch')
138 |     ax.set_title('Accuracy (%) on the training data')
139 |     plt.show()
140 | 
141 | def plot_overlay(test_accuracy, training_accuracy, num_epochs, xmin,
142 |                  training_set_size):
143 |     fig = plt.figure()
144 |     ax = fig.add_subplot(111)
145 |     ax.plot(np.arange(xmin, num_epochs), 
146 |             [accuracy/100.0 for accuracy in test_accuracy], 
147 |             color='#2A6EA6',
148 |             label="Accuracy on the test data")
149 |     ax.plot(np.arange(xmin, num_epochs), 
150 |             [accuracy*100.0/training_set_size 
151 |              for accuracy in training_accuracy], 
152 |             color='#FFA933',
153 |             label="Accuracy on the training data")
154 |     ax.grid(True)
155 |     ax.set_xlim([xmin, num_epochs])
156 |     ax.set_xlabel('Epoch')
157 |     ax.set_ylim([90, 100])
158 |     plt.legend(loc="lower right")
159 |     plt.show()
160 | 
161 | if __name__ == "__main__":
162 |     filename = raw_input("Enter a file name: ")
163 |     num_epochs = int(raw_input(
164 |         "Enter the number of epochs to run for: "))
165 |     training_cost_xmin = int(raw_input(
166 |         "training_cost_xmin (suggest 200): "))
167 |     test_accuracy_xmin = int(raw_input(
168 |         "test_accuracy_xmin (suggest 200): "))
169 |     test_cost_xmin = int(raw_input(
170 |         "test_cost_xmin (suggest 0): "))
171 |     training_accuracy_xmin = int(raw_input(
172 |         "training_accuracy_xmin (suggest 0): "))
173 |     training_set_size = int(raw_input(
174 |         "Training set size (suggest 1000): "))
175 |     lmbda = float(raw_input(
176 |         "Enter the regularization parameter, lambda (suggest: 5.0): "))
177 |     main(filename, num_epochs, training_cost_xmin, 
178 |          test_accuracy_xmin, test_cost_xmin, training_accuracy_xmin,
179 |          training_set_size, lmbda)
180 | 


--------------------------------------------------------------------------------
/fig/overfitting1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting1.png


--------------------------------------------------------------------------------
/fig/overfitting2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting2.png


--------------------------------------------------------------------------------
/fig/overfitting3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting3.png


--------------------------------------------------------------------------------
/fig/overfitting4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting4.png


--------------------------------------------------------------------------------
/fig/overfitting_full.json:
--------------------------------------------------------------------------------
1 | [[0.56135590058630858, 0.47806921271034553, 0.457510836259925, 0.42504920544144992, 0.39449553344420019, 0.39810448800345, 0.37017079712250733, 0.37403997639944547, 0.36290253019659285, 0.4006868170859208, 0.36817548958488616, 0.37299310675826219, 0.36871967242261605, 0.37146610246666006, 0.35704621996697938, 0.35821464151288968, 0.38622103466509744, 0.37010939716781127, 0.36539832104327125, 0.35511546847032671, 0.3828088676932585, 0.36160025922354638, 0.37028708356461698, 0.37605182846277163, 0.36634313696187393, 0.36129044456360238, 0.37531885586439506, 0.36415225595876555, 0.35707895858237054, 0.36631987373588193], [9136, 9275, 9307, 9377, 9450, 9429, 9468, 9488, 9494, 9424, 9483, 9483, 9505, 9499, 9508, 9508, 9445, 9524, 9524, 9524, 9494, 9527, 9518, 9505, 9533, 9529, 9512, 9530, 9532, 9531], [0.55994588582554705, 0.44664870303435988, 0.42455329174078477, 0.38578320429266705, 0.33992291017592285, 0.33162477096795895, 0.3137480626518645, 0.30028971890544093, 0.27353890048167528, 0.30236927117202678, 0.26487026303889277, 0.2661714884193439, 0.24734280015146709, 0.26355551438395558, 0.23088530423416964, 0.22618350577327287, 0.25137541006767478, 0.23085585354651994, 0.21417931191800957, 0.20049587923059808, 0.23713128948069295, 0.20327728799861464, 0.21953883029836488, 0.20264436321820509, 0.19643949703516961, 0.18467980669870671, 0.18788606162530633, 0.18535916502880764, 0.18466759834259142, 0.17218286758911475], [45708, 46605, 46797, 47190, 47543, 47570, 47638, 47838, 48061, 47825, 48160, 48195, 48265, 48156, 48439, 48449, 48267, 48433, 48598, 48697, 48380, 48648, 48500, 48669, 48734, 48796, 48802, 48837, 48810, 48932]]


--------------------------------------------------------------------------------
/fig/overfitting_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/overfitting_full.png


--------------------------------------------------------------------------------
/fig/pca_hard_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/pca_hard_data.png


--------------------------------------------------------------------------------
/fig/pca_hard_data_fit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/pca_hard_data_fit.png


--------------------------------------------------------------------------------
/fig/pca_limitations.py:
--------------------------------------------------------------------------------
 1 | """
 2 | pca_limitations
 3 | ~~~~~~~~~~~~~~~
 4 | 
 5 | Plot graphs to illustrate the limitations of PCA.
 6 | """
 7 | 
 8 | # Third-party libraries
 9 | from mpl_toolkits.mplot3d import Axes3D
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | 
13 | # Plot just the data
14 | fig = plt.figure()
15 | ax = fig.gca(projection='3d')
16 | z = np.linspace(-2, 2, 20)
17 | theta = np.linspace(-4 * np.pi, 4 * np.pi, 20)
18 | x = np.sin(theta)+0.03*np.random.randn(20)
19 | y = np.cos(theta)+0.03*np.random.randn(20)
20 | ax.plot(x, y, z, 'ro')
21 | plt.show()
22 | 
23 | # Plot the data and the helix together
24 | fig = plt.figure()
25 | ax = fig.gca(projection='3d')
26 | z_helix = np.linspace(-2, 2, 100)
27 | theta_helix = np.linspace(-4 * np.pi, 4 * np.pi, 100)
28 | x_helix = np.sin(theta_helix)
29 | y_helix = np.cos(theta_helix)
30 | ax.plot(x, y, z, 'ro')
31 | ax.plot(x_helix, y_helix, z_helix, '')
32 | plt.show()
33 | 


--------------------------------------------------------------------------------
/fig/regularized.json:
--------------------------------------------------------------------------------
1 | [[2.1903999605374445, 1.935511698609431, 1.7732802465315387, 1.5964599634182588, 1.5652172568011837, 1.6556596783771818, 1.4247190111080004, 1.3955109870673246, 1.361995997263147, 1.3212968996719621, 1.3618994944086014, 1.3366270986282185, 1.2984174484948621, 1.2752432242493146, 1.2700513563686027, 1.2799551345628921, 1.2749818189596829, 1.2871333718872076, 1.2828951499601804, 1.2735961691755695, 1.2647655170041852, 1.2594502355807271, 1.2795772447252642, 1.2727190387701224, 1.2831666300389069, 1.2672449017721463, 1.2776841316470489, 1.2805727395923963, 1.2722125928642627, 1.2638997694524687, 1.269084743039151, 1.2659230794396108, 1.2641539416626584, 1.2752029232558733, 1.298792042906463, 1.2702727329507408, 1.2711688340896683, 1.2666235616415085, 1.2674371283336907, 1.2595334250676924, 1.2648151045633766, 1.2671334304994974, 1.266707046535049, 1.2656886025740639, 1.2691346809011976, 1.2584242684159623, 1.2740111893572812, 1.2663064744488597, 1.2615652820429444, 1.2582241448899749, 1.2739320485437491, 1.2712439351275546, 1.2545617120441739, 1.2508444846333084, 1.2591492899022321, 1.2536296905827162, 1.2468994556717075, 1.2484355031942465, 1.2471530150490704, 1.2505186156687369, 1.2388584688991651, 1.2430898462249378, 1.2398539196469682, 1.242251547050703, 1.2367804592809712, 1.2298331549219981, 1.2395688633999551, 1.2354039648955897, 1.2344606308529216, 1.2266843921216057, 1.2265644119519759, 1.2245581771451541, 1.2248119226544247, 1.2243084822111481, 1.2212309349638009, 1.2161717307338038, 1.2225008376402358, 1.2193471795614443, 1.2066653202233959, 1.2123964143141386, 1.2122530086322325, 1.2045089672810716, 1.2071180830593222, 1.1989333713336954, 1.1986255015016054, 1.1956459321325097, 1.2033704463208892, 1.2021008432740117, 1.1955283612344483, 1.1952024132825856, 1.1911013449298988, 1.1876590163918093, 1.1799304033665541, 1.1872817515808496, 1.1853155824780264, 1.1801518934970823, 1.1775421394848582, 1.1789945000698789, 1.1796464383843646, 1.1712541492322308, 1.1690597759769423, 1.1620993711661565, 1.169520268455509, 1.1706984323357481, 1.16142157165315, 1.1594443722282681, 1.1551307036588288, 1.1560927746464871, 1.1558458569577748, 1.1495662430692957, 1.1484728277323, 1.1500520790631064, 1.1505873482743494, 1.1434314002731534, 1.1399993109386326, 1.1374729793443425, 1.1387912606672026, 1.1355838671499126, 1.1407036755338451, 1.1300401447223136, 1.1322867822750577, 1.1292371015493867, 1.128702809951617, 1.1294136504785579, 1.1283079440929984, 1.1220916685170936, 1.1201001413539298, 1.115187080898234, 1.1209986528548488, 1.1105355050898427, 1.1038970751483002, 1.1107296608773507, 1.1081484849851853, 1.1024815938527315, 1.1049008621060641, 1.1049124053518378, 1.0973347585372921, 1.0989702833881896, 1.0982371727299989, 1.093818667730126, 1.0928775595886391, 1.0896907351586045, 1.0924381381269741, 1.08208790851767, 1.0886157209258644, 1.0871294743556819, 1.0874176569695346, 1.0810207377200443, 1.0741795043940798, 1.0758584646112539, 1.0751401687109641, 1.0742516957490034, 1.0782702961220325, 1.0722235429082911, 1.0676018687662321, 1.0681955051837146, 1.0659656804470576, 1.065476631432785, 1.0568244036884478, 1.0563633108751844, 1.0531856804602515, 1.0538622665809867, 1.0529003762157403, 1.0518237229645757, 1.0532673191813249, 1.0470748079542624, 1.0473355198727492, 1.0478358455529726, 1.0465867871958874, 1.0427114422019927, 1.0327604365967915, 1.0302075306662013, 1.0421326895298604, 1.0461320167330266, 1.0327502589606541, 1.0460621333327711, 1.0316861390185799, 1.0312932803708881, 1.0354453434095872, 1.0288234156512026, 1.0220427243291068, 1.0236529852899188, 1.022506499230259, 1.0327268485891929, 1.0183557109961541, 1.0166409797026996, 1.0201653625824105, 1.0181942185781696, 1.0059281072990294, 1.0151120898053012, 1.0158007330308185, 1.0076227765153161, 1.0139342289012658, 1.0108626749865222, 1.0133465258769703, 1.0093790455178744, 1.0032704939881629, 1.0065849479396327, 0.99239921410409282, 1.0078593577776735, 1.0056533242624528, 0.99382804473952768, 1.0000313327944634, 0.99267979287687835, 0.98996333995056751, 0.98771450417037443, 0.98835879860887599, 0.98707998292775845, 0.99162222526658916, 0.98196648125555774, 0.98830619571563316, 0.98659881401490612, 0.99273488773050456, 0.98124338898029595, 0.98701911078915805, 0.97950085339921733, 0.97754982301824889, 0.97393880231450858, 0.97728343447809918, 0.9822446382489719, 0.97264123514085032, 0.97552252049617039, 0.97787781129407614, 0.97911317538881426, 0.9685897325254843, 0.97031228863494989, 0.97280520294417017, 0.96809562498274027, 0.96329522921298438, 0.96499204553248841, 0.96872124082325861, 0.97279943472003694, 0.96380722647531536, 0.9681111619139029, 0.95255860371790635, 0.95685889512103384, 0.95490880778293086, 0.96165060698066118, 0.95429007422149059, 0.9485254903573368, 0.95964565458504292, 0.9463922864368649, 0.95301119244822674, 0.96000201462323909, 0.9533654233889125, 0.94897620981608699, 0.95449157578848032, 0.95096516926024288, 0.9540996353802198, 0.94724765894116203, 0.94778948958239595, 0.94591749106378031, 0.94877930991320225, 0.94627724201870156, 0.94527196978974903, 0.93990172135237238, 0.93976092189395699, 0.94174955547339068, 0.94168247039593567, 0.94014241402470755, 0.9407513673596033, 0.9404657385931684, 0.93660325056571514, 0.93963125525499047, 0.94507818868542304, 0.93515283399520432, 0.93426655514408952, 0.94036084535630249, 0.94278800241797989, 0.9303464277379202, 0.93946074265767165, 0.94011896731994538, 0.93219548844993971, 0.92901047418070248, 0.9413051979588738, 0.9262554563794031, 0.93609567274753203, 0.93033762483130578, 0.92717362481531107, 0.92953512970255081, 0.92597921058408705, 0.92944035872481756, 0.92961945644634048, 0.92707208850656797, 0.92662815136112076, 0.92539822828480234, 0.92880970077986702, 0.92243586760987628, 0.9222750133422748, 0.92256361764295314, 0.92112697126740262, 0.92800921486682297, 0.92926106782568352, 0.9153004667869965, 0.91094443103923883, 0.93496572342069606, 0.91974312407475323, 0.91821212221209769, 0.91941566704066269, 0.92743446987850353, 0.91503638999300818, 0.9165744471959929, 0.9173574311901096, 0.91424899909475399, 0.9105910983179335, 0.91705676725576613, 0.9153198466244068, 0.90864738371647913, 0.91433392116041334, 0.91730855979513537, 0.91943446809871454, 0.91559373840877156, 0.9117409480398676, 0.91824229192475937, 0.91601442202251138, 0.90785487652541763, 0.91036044345459455, 0.90611631831996731, 0.90776253175168242, 0.91042057926704512, 0.90874629789034811, 0.9149280544309798, 0.90782476705166226, 0.9033169594105388, 0.91714376925946128, 0.91281316274517155, 0.90717852083845951, 0.90094537088101878, 0.92254053072003039, 0.90983334489593448, 0.90708146627494168, 0.90476333243913687, 0.9103459736117403, 0.90187539543204431, 0.9131362289361491, 0.90586625644843533, 0.90304462354844051, 0.90181187136526952, 0.91019459725955099, 0.90469807709268235, 0.90432952876471406, 0.90262631470543575, 0.90744672937259074, 0.90508781907365621, 0.91257822910795028, 0.90056483900994111, 0.91093355429606471, 0.90310791000879842, 0.90693900875758127, 0.90544454108106043, 0.90810441816334841, 0.89820458428510686, 0.90551853969183604, 0.89597859386528622, 0.90023866788648133, 0.89957093023412082, 0.89800265641361166, 0.89390398408977567, 0.90802340168738838, 0.89924965362547771, 0.89428545643713697, 0.90096102794054678, 0.89998055268679522, 0.90162280275845208, 0.90437904400674751, 0.89893950123068544, 0.89953917921889903, 0.89979142882290919, 0.89255702183771879, 0.90218120233559551, 0.90059584479083477, 0.90604923024967621, 0.89808476276490912, 0.9035111823998403, 0.88812459864856652, 0.88732828138284314, 0.8990176842324713, 0.89889007193703918, 0.90199559435268073, 0.89687789130803197, 0.89551173581908239, 0.90166333865434023, 0.89319951769196892, 0.89123147694826832, 0.8999938689076068, 0.89347788349594881, 0.90133726872596209, 0.88893467356964939, 0.89150929244536248, 0.89309087673528165, 0.8964350026673984, 0.88372758347780378, 0.89708257630045563, 0.8937592373076666, 0.89910169411630581, 0.89041595406974294, 0.90399067374724229, 0.8891242242836932, 0.893623077380578, 0.89104131698590328], [5898, 6512, 6986, 7289, 7455, 7227, 7723, 7767, 7890, 7942, 7879, 7893, 8036, 8052, 8089, 8079, 8132, 8148, 8138, 8163, 8152, 8199, 8160, 8160, 8152, 8176, 8201, 8184, 8203, 8207, 8232, 8219, 8240, 8221, 8177, 8250, 8250, 8240, 8250, 8271, 8269, 8254, 8256, 8268, 8248, 8283, 8275, 8278, 8302, 8278, 8287, 8293, 8302, 8304, 8305, 8317, 8329, 8316, 8316, 8324, 8323, 8332, 8336, 8319, 8336, 8346, 8324, 8344, 8337, 8352, 8353, 8351, 8349, 8360, 8364, 8359, 8363, 8371, 8371, 8360, 8350, 8369, 8355, 8368, 8375, 8393, 8366, 8385, 8374, 8393, 8383, 8381, 8392, 8387, 8379, 8395, 8389, 8382, 8395, 8409, 8412, 8415, 8404, 8389, 8414, 8405, 8417, 8417, 8416, 8415, 8416, 8418, 8414, 8425, 8437, 8445, 8429, 8454, 8442, 8441, 8456, 8452, 8446, 8458, 8452, 8465, 8451, 8470, 8455, 8467, 8476, 8464, 8473, 8500, 8485, 8480, 8479, 8485, 8484, 8491, 8486, 8494, 8481, 8515, 8476, 8495, 8485, 8498, 8518, 8504, 8510, 8508, 8505, 8502, 8514, 8520, 8500, 8519, 8525, 8512, 8532, 8531, 8534, 8533, 8536, 8536, 8530, 8526, 8521, 8530, 8544, 8572, 8536, 8543, 8545, 8542, 8559, 8558, 8548, 8551, 8567, 8557, 8554, 8535, 8566, 8573, 8554, 8554, 8589, 8571, 8554, 8582, 8572, 8571, 8569, 8574, 8573, 8580, 8603, 8558, 8590, 8595, 8599, 8594, 8594, 8599, 8594, 8594, 8588, 8607, 8594, 8606, 8594, 8609, 8606, 8597, 8612, 8618, 8617, 8602, 8624, 8625, 8601, 8605, 8620, 8622, 8613, 8622, 8621, 8612, 8613, 8609, 8614, 8613, 8627, 8626, 8627, 8622, 8636, 8637, 8624, 8632, 8616, 8610, 8632, 8613, 8617, 8626, 8610, 8626, 8620, 8630, 8619, 8629, 8633, 8637, 8631, 8627, 8638, 8634, 8636, 8633, 8633, 8620, 8630, 8637, 8638, 8630, 8626, 8647, 8630, 8625, 8646, 8637, 8623, 8645, 8625, 8631, 8649, 8644, 8645, 8642, 8632, 8644, 8638, 8641, 8636, 8642, 8643, 8646, 8638, 8635, 8637, 8650, 8671, 8632, 8655, 8657, 8651, 8632, 8649, 8655, 8653, 8659, 8654, 8657, 8655, 8652, 8651, 8666, 8648, 8647, 8663, 8644, 8651, 8678, 8672, 8668, 8663, 8654, 8665, 8655, 8668, 8676, 8660, 8662, 8674, 8674, 8648, 8666, 8673, 8679, 8658, 8680, 8666, 8674, 8674, 8675, 8673, 8678, 8676, 8671, 8668, 8673, 8647, 8676, 8656, 8673, 8675, 8669, 8658, 8689, 8675, 8691, 8683, 8691, 8684, 8695, 8683, 8689, 8693, 8691, 8687, 8680, 8674, 8692, 8685, 8688, 8698, 8685, 8688, 8663, 8685, 8691, 8694, 8701, 8678, 8679, 8686, 8694, 8690, 8685, 8689, 8710, 8677, 8699, 8681, 8694, 8703, 8691, 8690, 8703, 8699, 8703, 8686, 8692, 8678, 8707, 8692, 8695], [3.0054668602173189, 2.630540182112914, 2.3510873884747334, 2.1474705267659759, 2.0293529993091846, 2.0032380926060593, 1.8042553867762137, 1.7003373477735675, 1.6321855821328803, 1.5610897547444651, 1.5281925730670125, 1.4856976518536389, 1.4217730925966259, 1.3909369689936708, 1.3493713532353451, 1.3200187301521982, 1.301800278663491, 1.2676513467349682, 1.2429301255926062, 1.2185580400821583, 1.2050358080246586, 1.1832267788099187, 1.1685930523656132, 1.1484535916807292, 1.1359697154554902, 1.114320381224261, 1.1014631295982018, 1.089492003214271, 1.0727926078697159, 1.0613717416584318, 1.0469730951133949, 1.0327418635716941, 1.021579545763369, 1.007466532996552, 1.0019118318591766, 0.98744520456502871, 0.97474616908673617, 0.96296146445044695, 0.95371837313859686, 0.94627032091708685, 0.93498894772954433, 0.92371303592517773, 0.91598769285379622, 0.90572616038790177, 0.89742104654365473, 0.88871852051305211, 0.88017134729428881, 0.87050654917655768, 0.86212122128894098, 0.85338982152113285, 0.84517324140670691, 0.83628724752074646, 0.82688131184478686, 0.81882973565637929, 0.81057777549741772, 0.80277250205231088, 0.79480020668910989, 0.78780832187387972, 0.78001287137204445, 0.77319156351717266, 0.76596973399820345, 0.75937155578658067, 0.75198198829961649, 0.74542036500766451, 0.73896269018433358, 0.73189672823515506, 0.72511198497639617, 0.71858339754179046, 0.71283671972744378, 0.70572537262239332, 0.69919661822074053, 0.6932705965074516, 0.68720037824228397, 0.68083868446687623, 0.67490287294715556, 0.6688525492359253, 0.66319307295604624, 0.65751891741935764, 0.65160400038475963, 0.64606741109074506, 0.64065241850535526, 0.63470824020889005, 0.62942800255997755, 0.6238131388272512, 0.61821534837327441, 0.6131143735593132, 0.60801454092208473, 0.60307257257391134, 0.597472701650819, 0.5922276823975956, 0.58718276628487431, 0.58219953746425901, 0.57721941418480194, 0.57251776210593941, 0.56770248252692224, 0.56293171594753588, 0.5583703980639495, 0.55356242427910862, 0.54911205675728603, 0.54434678517468216, 0.53983964079181412, 0.53560546501131556, 0.53104610602400237, 0.52690257757896453, 0.52244249181950919, 0.51837040316250427, 0.51406707535766105, 0.50997890157700754, 0.50580316759379418, 0.50197927328326941, 0.49765475959114985, 0.49363114894282656, 0.49013948810957153, 0.48574563160725903, 0.481996433456091, 0.47836254150930335, 0.47438049006936467, 0.47063209526452893, 0.46700010263520492, 0.46326294173821808, 0.45941178891664192, 0.45579944000986117, 0.45222495753341951, 0.44874146785608826, 0.44545391416308638, 0.44181467115194983, 0.43828437942217396, 0.43497413417170505, 0.4314933212391851, 0.42806639128733814, 0.4249830118133005, 0.4217067271918078, 0.41822647856750578, 0.41518744647089212, 0.41184644098125406, 0.40883008626966744, 0.40553823067182421, 0.40244746846822144, 0.39928585652511611, 0.39626977647492767, 0.39331263919868908, 0.39019492871393685, 0.38732420260579697, 0.38435150189678424, 0.38177223040643438, 0.37853457408790142, 0.37569449510498465, 0.37295407280298576, 0.37017541614667188, 0.36735907932787892, 0.36474742614606914, 0.36195502625628007, 0.35947915235047068, 0.35675421962663562, 0.35401004282520687, 0.35144133746409423, 0.34889387731183558, 0.34641383158012634, 0.34392904811658986, 0.34130203990500291, 0.33888366962785094, 0.33638822418723674, 0.33396129048481904, 0.33175452278050255, 0.32933814039393, 0.32694391946859563, 0.32460086680314448, 0.32231863682007983, 0.32010480841447037, 0.31790147822028658, 0.31571847553356108, 0.31361078068871434, 0.31122285863559135, 0.30943747241499886, 0.30710922580369193, 0.30523966755164406, 0.30284937095608283, 0.3007194800484288, 0.29894516963898177, 0.29668334988284817, 0.29487017447397706, 0.29263559416080354, 0.29070159725793254, 0.28909427829661305, 0.28678884984900743, 0.28490027458015588, 0.2830863666836243, 0.28117503586861597, 0.27940625519366591, 0.27761490071595862, 0.27571735158201321, 0.27400939202985924, 0.2720994498803232, 0.27041895644243802, 0.26870944341767583, 0.26692503810639295, 0.26514216579715211, 0.26350425688621143, 0.26194775244796215, 0.26031808133537176, 0.25867644728947803, 0.25701548157578263, 0.2553925131758501, 0.2537805110290578, 0.25233205572342077, 0.25059011155916222, 0.24900995023173669, 0.24750970679163162, 0.24603574581720411, 0.24453517928837162, 0.24297118556626207, 0.24163254815954366, 0.24010060406903927, 0.23858447318485032, 0.23709355381025712, 0.23575693213477292, 0.23430242910697505, 0.23300424062786362, 0.23146492791787876, 0.23028619269651887, 0.22872263832614514, 0.22757051865720562, 0.22612070285691649, 0.22486518055185545, 0.22356486396463154, 0.22215084040399144, 0.22116285480573897, 0.21954904035047973, 0.21853554445024348, 0.21730688614947213, 0.21600390968478739, 0.21473510420394584, 0.21359511875939363, 0.21250417810571734, 0.21119670161592852, 0.20998559164773262, 0.208837489317242, 0.20768479087034092, 0.20659155500937021, 0.20558367284244244, 0.20435335237628477, 0.2032464875641013, 0.20219802987124882, 0.20127901932926417, 0.20007579622717186, 0.19898737190593724, 0.19804803849997596, 0.19689315580825217, 0.19610885980109602, 0.19479852999276509, 0.1940612380895031, 0.19292786347284827, 0.19185809503845005, 0.19096893321163924, 0.19003230799124482, 0.18913859695629501, 0.1881291695604892, 0.18718973645719555, 0.18627742977977121, 0.18541142438497082, 0.18448857944953381, 0.18359351692378717, 0.18261368242334564, 0.18180375921357417, 0.18093506139675003, 0.17998546923834557, 0.17915124316640865, 0.17852974081084139, 0.17753670237988683, 0.17675502721943334, 0.17596267360975426, 0.17532135165792365, 0.1742351851435536, 0.17358483897004762, 0.1729350931986928, 0.1720805519195413, 0.17109064918417963, 0.17027331927806791, 0.16963634485184267, 0.16895358154624776, 0.1682839465581161, 0.16747701705369528, 0.16667755231392004, 0.16591178893673617, 0.16538283429287487, 0.16444040613503746, 0.16373895724486862, 0.16308076465326077, 0.16233620430635892, 0.16176781013586375, 0.16104183013834106, 0.16046925595808373, 0.15987647357985665, 0.15926248578219754, 0.15861934606502726, 0.15813634134480264, 0.15728566055545845, 0.15654044850087195, 0.15576910232092384, 0.15542825510458746, 0.15471291341581561, 0.15393039632238567, 0.15338574586063461, 0.15289796647946724, 0.15237239387638404, 0.15167106792711721, 0.15117419365146861, 0.15053294284800808, 0.14984558772800877, 0.14939067983754478, 0.14873871322311416, 0.14816358361629323, 0.14764993080499633, 0.147081011815695, 0.14665877802009278, 0.146330915059212, 0.14558165958404565, 0.14485851618233073, 0.14468885149823432, 0.1438703517822042, 0.14344305879631206, 0.14286592428666159, 0.14233498842740988, 0.14196829563110053, 0.14137537114428306, 0.14093879724708491, 0.14032845116761775, 0.13999255024009272, 0.13970462455415528, 0.13891699772284177, 0.13845977036853441, 0.13800758209633504, 0.13755412286780858, 0.13710288863433284, 0.1366941098396158, 0.13620566631276054, 0.13574747139784185, 0.1354018600833839, 0.13509926299887218, 0.13449907813149148, 0.13414481985203083, 0.13360822141476442, 0.13319767720007691, 0.13296506419947443, 0.13254819602035237, 0.13205345942670241, 0.13167698206691389, 0.13129565313440172, 0.13088461421366027, 0.13059579877002456, 0.13013895016529287, 0.12974813771716298, 0.12942570940664525, 0.12894055787725606, 0.12857812443876793, 0.12831920733468941, 0.12787017352245639, 0.12757677057644992, 0.12716368111937662, 0.12684805248186204, 0.12642447555752939, 0.12619325054698116, 0.12568699211395123, 0.1255825925227049, 0.12531188331355458, 0.12477983663711714, 0.12448255939185263, 0.12410353700139506, 0.12374131975287532, 0.12346266645310616, 0.12310787618062269, 0.12306935609380343, 0.12260837265741077, 0.12213474763430163, 0.12209935233252732, 0.12180246601285891, 0.1212539118889692, 0.12108968375061299, 0.12068240811734519, 0.12041266804252408, 0.12016957520782154, 0.11993799531844312, 0.1196710285606524, 0.11935732791087648, 0.11929515289226478, 0.11889389635415669, 0.11862785428698526, 0.11817071954732133, 0.11814865141172132, 0.11763340847971795, 0.11749514446259617, 0.11725747872816564, 0.11695850963543949, 0.1166363293817957, 0.11644033569813024, 0.11615572825807459, 0.11608765530562379, 0.11580834205760465, 0.1154832427731588, 0.1153905733766095], [690, 749, 810, 849, 874, 869, 912, 932, 948, 950, 958, 964, 971, 975, 980, 979, 980, 981, 983, 986, 982, 986, 987, 990, 990, 990, 992, 991, 992, 991, 993, 992, 993, 994, 994, 995, 995, 995, 996, 994, 997, 997, 996, 998, 998, 997, 998, 998, 998, 998, 998, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 1000, 999, 1000, 999, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]]


--------------------------------------------------------------------------------
/fig/regularized1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/regularized1.png


--------------------------------------------------------------------------------
/fig/regularized2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/regularized2.png


--------------------------------------------------------------------------------
/fig/regularized_full.json:
--------------------------------------------------------------------------------
1 | [[4.3072791918656037, 2.9331304641086344, 2.1348073553576041, 1.6588303607817259, 1.330889938797851, 1.1963223601928472, 1.1170765304219505, 1.0170754480838433, 0.99110935015398149, 1.0071179800661803, 0.96280080386971378, 0.99226609521675169, 0.96023984363523895, 0.97253784945751276, 0.93966545596520334, 0.95330563342376551, 0.96378529404233837, 0.97367336858037301, 0.94435985290781166, 0.94622931411839994, 0.98392022263201184, 0.94091005661041272, 0.9496551347987412, 0.94714964684453073, 0.95026655456196552, 0.92915894672179755, 0.95831053042987979, 1.0153994919718721, 0.92940339906358749, 0.97682851862658082], [9212, 9341, 9375, 9424, 9532, 9537, 9504, 9541, 9578, 9538, 9579, 9530, 9590, 9543, 9607, 9597, 9576, 9546, 9600, 9634, 9544, 9606, 9614, 9607, 9621, 9637, 9620, 9511, 9649, 9561], [1.2925405259017666, 0.92479539229795305, 0.72611252037165497, 0.61618944188425839, 0.49142410439713557, 0.46552608507795468, 0.46074829841290343, 0.40775149802551902, 0.39671750686791218, 0.42031570708192345, 0.38057096091326847, 0.40768033915334978, 0.3895210257834103, 0.40585871820346864, 0.36003072887701948, 0.37700037701783806, 0.39300003862768451, 0.40774598935627593, 0.37194215157507704, 0.3662415845761452, 0.40722309031673021, 0.36476961463606117, 0.36988528906574514, 0.36112644707329011, 0.380710641602238, 0.35700998663848571, 0.37724740623797381, 0.44991741876110503, 0.35820321110078079, 0.39226034353556583], [45919, 46835, 47204, 47434, 47989, 47930, 47839, 48157, 48218, 48105, 48313, 48089, 48282, 48111, 48463, 48362, 48243, 48123, 48416, 48533, 48123, 48483, 48435, 48548, 48434, 48524, 48417, 47797, 48561, 48235]]


--------------------------------------------------------------------------------
/fig/regularized_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/regularized_full.png


--------------------------------------------------------------------------------
/fig/replaced_by_d3/README.md:
--------------------------------------------------------------------------------
1 | # Replaced by d3 directory
2 | 
3 | This directory contains python code which generated png figures which
4 | were later replaced by d3 in the live version of the site.  They've
5 | been preserved here on the off chance that they may be of use at some
6 | point in the future.
7 | 


--------------------------------------------------------------------------------
/fig/replaced_by_d3/relu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/relu.png


--------------------------------------------------------------------------------
/fig/replaced_by_d3/relu.py:
--------------------------------------------------------------------------------
 1 | """
 2 | relu
 3 | ~~~~
 4 | 
 5 | Plots a graph of the squashing function used by a rectified linear
 6 | unit."""
 7 | 
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | 
11 | z = np.arange(-2, 2, .1)
12 | zero = np.zeros(len(z))
13 | y = np.max([zero, z], axis=0)
14 | 
15 | fig = plt.figure()
16 | ax = fig.add_subplot(111)
17 | ax.plot(z, y)
18 | ax.set_ylim([-2.0, 2.0])
19 | ax.set_xlim([-2.0, 2.0])
20 | ax.grid(True)
21 | ax.set_xlabel('z')
22 | ax.set_title('Rectified linear unit')
23 | 
24 | plt.show()
25 | 


--------------------------------------------------------------------------------
/fig/replaced_by_d3/sigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/sigmoid.png


--------------------------------------------------------------------------------
/fig/replaced_by_d3/sigmoid.py:
--------------------------------------------------------------------------------
 1 | """
 2 | sigmoid
 3 | ~~~~~~~
 4 | 
 5 | Plots a graph of the sigmoid function."""
 6 | 
 7 | import numpy
 8 | import matplotlib.pyplot as plt
 9 | 
10 | z = numpy.arange(-5, 5, .1)
11 | sigma_fn = numpy.vectorize(lambda z: 1/(1+numpy.exp(-z)))
12 | sigma = sigma_fn(z)
13 | 
14 | fig = plt.figure()
15 | ax = fig.add_subplot(111)
16 | ax.plot(z, sigma)
17 | ax.set_ylim([-0.5, 1.5])
18 | ax.set_xlim([-5,5])
19 | ax.grid(True)
20 | ax.set_xlabel('z')
21 | ax.set_title('sigmoid function')
22 | 
23 | plt.show()
24 | 


--------------------------------------------------------------------------------
/fig/replaced_by_d3/step.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/step.png


--------------------------------------------------------------------------------
/fig/replaced_by_d3/step.py:
--------------------------------------------------------------------------------
 1 | """
 2 | step
 3 | ~~~~~~~
 4 | 
 5 | Plots a graph of a step function."""
 6 | 
 7 | import numpy
 8 | import matplotlib.pyplot as plt
 9 | 
10 | z = numpy.arange(-5, 5, .02)
11 | step_fn = numpy.vectorize(lambda z: 1.0 if z >= 0.0 else 0.0)
12 | step = step_fn(z)
13 | 
14 | fig = plt.figure()
15 | ax = fig.add_subplot(111)
16 | ax.plot(z, step)
17 | ax.set_ylim([-0.5, 1.5])
18 | ax.set_xlim([-5,5])
19 | ax.grid(True)
20 | ax.set_xlabel('z')
21 | ax.set_title('step function')
22 | 
23 | plt.show()
24 | 


--------------------------------------------------------------------------------
/fig/replaced_by_d3/tanh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/replaced_by_d3/tanh.png


--------------------------------------------------------------------------------
/fig/replaced_by_d3/tanh.py:
--------------------------------------------------------------------------------
 1 | """
 2 | tanh
 3 | ~~~~
 4 | 
 5 | Plots a graph of the tanh function."""
 6 | 
 7 | import numpy as np
 8 | import matplotlib.pyplot as plt
 9 | 
10 | z = np.arange(-5, 5, .1)
11 | t = np.tanh(z)
12 | 
13 | fig = plt.figure()
14 | ax = fig.add_subplot(111)
15 | ax.plot(z, t)
16 | ax.set_ylim([-1.0, 1.0])
17 | ax.set_xlim([-5,5])
18 | ax.grid(True)
19 | ax.set_xlabel('z')
20 | ax.set_title('tanh function')
21 | 
22 | plt.show()
23 | 


--------------------------------------------------------------------------------
/fig/serialize_images_to_json.py:
--------------------------------------------------------------------------------
 1 | """
 2 | serialize_images_to_json
 3 | ~~~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Utility to serialize parts of the training and validation data to JSON, 
 6 | for use with Javascript.  """
 7 | 
 8 | #### Libraries
 9 | # Standard library
10 | import json 
11 | import sys
12 | 
13 | # My library
14 | sys.path.append('../src/')
15 | import mnist_loader
16 | 
17 | # Third-party libraries
18 | import numpy as np
19 | 
20 | 
21 | # Number of training and validation data images to serialize
22 | NTD = 1000
23 | NVD = 100
24 | 
25 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
26 | 
27 | def make_data_integer(td):
28 |     # This will be slow, due to the loop.  It'd be better if numpy did
29 |     # this directly.  But numpy.rint followed by tolist() doesn't
30 |     # convert to a standard Python int.
31 |     return [int(x) for x in (td*256).reshape(784).tolist()]
32 | 
33 | data = {"training": [
34 |     {"x": [x[0] for x in training_data[j][0].tolist()],
35 |      "y": [y[0] for y in training_data[j][1].tolist()]}
36 |     for j in xrange(NTD)],
37 |         "validation": [
38 |     {"x": [x[0] for x in validation_data[j][0].tolist()],
39 |      "y": validation_data[j][1]}
40 |             for j in xrange(NVD)]}
41 | 
42 | f = open("data_1000.json", "w")
43 | json.dump(data, f)
44 | f.close()
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/fig/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/test.png


--------------------------------------------------------------------------------
/fig/training_speed_2_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/training_speed_2_layers.png


--------------------------------------------------------------------------------
/fig/training_speed_3_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/training_speed_3_layers.png


--------------------------------------------------------------------------------
/fig/training_speed_4_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/training_speed_4_layers.png


--------------------------------------------------------------------------------
/fig/valley.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/valley.png


--------------------------------------------------------------------------------
/fig/valley.py:
--------------------------------------------------------------------------------
 1 | """
 2 | valley
 3 | ~~~~~~
 4 | 
 5 | Plots a function of two variables to minimize.  The function is a
 6 | fairly generic valley function."""
 7 | 
 8 | #### Libraries
 9 | # Third party libraries
10 | from matplotlib.ticker import LinearLocator
11 | # Note that axes3d is not explicitly used in the code, but is needed
12 | # to register the 3d plot type correctly
13 | from mpl_toolkits.mplot3d import axes3d 
14 | import matplotlib.pyplot as plt
15 | import numpy
16 | 
17 | fig = plt.figure()
18 | ax = fig.gca(projection='3d')
19 | X = numpy.arange(-1, 1, 0.1)
20 | Y = numpy.arange(-1, 1, 0.1)
21 | X, Y = numpy.meshgrid(X, Y)
22 | Z = X**2 + Y**2
23 | 
24 | colortuple = ('w', 'b')
25 | colors = numpy.empty(X.shape, dtype=str)
26 | for x in xrange(len(X)):
27 |     for y in xrange(len(Y)):
28 |         colors[x, y] = colortuple[(x + y) % 2]
29 | 
30 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
31 |         linewidth=0)
32 | 
33 | ax.set_xlim3d(-1, 1)
34 | ax.set_ylim3d(-1, 1)
35 | ax.set_zlim3d(0, 2)
36 | ax.w_xaxis.set_major_locator(LinearLocator(3))
37 | ax.w_yaxis.set_major_locator(LinearLocator(3))
38 | ax.w_zaxis.set_major_locator(LinearLocator(3))
39 | ax.text(1.79, 0, 1.62, "$C$", fontsize=20)
40 | ax.text(0.05, -1.8, 0, "$v_1$", fontsize=20)
41 | ax.text(1.5, -0.25, 0, "$v_2$", fontsize=20)
42 | 
43 | plt.show()
44 | 


--------------------------------------------------------------------------------
/fig/valley2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/valley2.png


--------------------------------------------------------------------------------
/fig/valley2.py:
--------------------------------------------------------------------------------
 1 | """valley2.py
 2 | ~~~~~~~~~~~~~
 3 | 
 4 | Plots a function of two variables to minimize.  The function is a
 5 | fairly generic valley function.
 6 | 
 7 | Note that this is a duplicate of valley.py, but omits labels on the
 8 | axis.  It's bad practice to duplicate in this way, but I had
 9 | considerable trouble getting matplotlib to update a graph in the way I
10 | needed (adding or removing labels), so finally fell back on this as a
11 | kludge solution.
12 | 
13 | """
14 | 
15 | #### Libraries
16 | # Third party libraries
17 | from matplotlib.ticker import LinearLocator
18 | # Note that axes3d is not explicitly used in the code, but is needed
19 | # to register the 3d plot type correctly
20 | from mpl_toolkits.mplot3d import axes3d 
21 | import matplotlib.pyplot as plt
22 | import numpy
23 | 
24 | fig = plt.figure()
25 | ax = fig.gca(projection='3d')
26 | X = numpy.arange(-1, 1, 0.1)
27 | Y = numpy.arange(-1, 1, 0.1)
28 | X, Y = numpy.meshgrid(X, Y)
29 | Z = X**2 + Y**2
30 | 
31 | colortuple = ('w', 'b')
32 | colors = numpy.empty(X.shape, dtype=str)
33 | for x in xrange(len(X)):
34 |     for y in xrange(len(Y)):
35 |         colors[x, y] = colortuple[(x + y) % 2]
36 | 
37 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
38 |         linewidth=0)
39 | 
40 | ax.set_xlim3d(-1, 1)
41 | ax.set_ylim3d(-1, 1)
42 | ax.set_zlim3d(0, 2)
43 | ax.w_xaxis.set_major_locator(LinearLocator(3))
44 | ax.w_yaxis.set_major_locator(LinearLocator(3))
45 | ax.w_zaxis.set_major_locator(LinearLocator(3))
46 | ax.text(1.79, 0, 1.62, "$C$", fontsize=20)
47 | 
48 | plt.show()
49 | 


--------------------------------------------------------------------------------
/fig/weight_initialization.py:
--------------------------------------------------------------------------------
 1 | """weight_initialization 
 2 | ~~~~~~~~~~~~~~~~~~~~~~~~
 3 | 
 4 | This program shows how weight initialization affects training.  In
 5 | particular, we'll plot out how the classification accuracies improve
 6 | using either large starting weights, whose standard deviation is 1, or
 7 | the default starting weights, whose standard deviation is 1 over the
 8 | square root of the number of input neurons.
 9 | 
10 | """
11 | 
12 | # Standard library
13 | import json
14 | import random
15 | import sys
16 | 
17 | # My library
18 | sys.path.append('../src/')
19 | import mnist_loader
20 | import network2
21 | 
22 | # Third-party libraries
23 | import matplotlib.pyplot as plt
24 | import numpy as np
25 | 
26 | def main(filename, n, eta):
27 |     run_network(filename, n, eta)
28 |     make_plot(filename)
29 |                        
30 | def run_network(filename, n, eta):
31 |     """Train the network using both the default and the large starting
32 |     weights.  Store the results in the file with name ``filename``,
33 |     where they can later be used by ``make_plots``.
34 | 
35 |     """
36 |     # Make results more easily reproducible
37 |     random.seed(12345678)
38 |     np.random.seed(12345678)
39 |     training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
40 |     net = network2.Network([784, n, 10], cost=network2.CrossEntropyCost)
41 |     print "Train the network using the default starting weights."
42 |     default_vc, default_va, default_tc, default_ta \
43 |         = net.SGD(training_data, 30, 10, eta, lmbda=5.0,
44 |                   evaluation_data=validation_data, 
45 |                   monitor_evaluation_accuracy=True)
46 |     print "Train the network using the large starting weights."
47 |     net.large_weight_initializer()
48 |     large_vc, large_va, large_tc, large_ta \
49 |         = net.SGD(training_data, 30, 10, eta, lmbda=5.0,
50 |                   evaluation_data=validation_data, 
51 |                   monitor_evaluation_accuracy=True)
52 |     f = open(filename, "w")
53 |     json.dump({"default_weight_initialization":
54 |                [default_vc, default_va, default_tc, default_ta],
55 |                "large_weight_initialization":
56 |                [large_vc, large_va, large_tc, large_ta]}, 
57 |               f)
58 |     f.close()
59 | 
60 | def make_plot(filename):
61 |     """Load the results from the file ``filename``, and generate the
62 |     corresponding plot.
63 | 
64 |     """
65 |     f = open(filename, "r")
66 |     results = json.load(f)
67 |     f.close()
68 |     default_vc, default_va, default_tc, default_ta = results[
69 |         "default_weight_initialization"]
70 |     large_vc, large_va, large_tc, large_ta = results[
71 |         "large_weight_initialization"]
72 |     # Convert raw classification numbers to percentages, for plotting
73 |     default_va = [x/100.0 for x in default_va]
74 |     large_va = [x/100.0 for x in large_va]
75 |     fig = plt.figure()
76 |     ax = fig.add_subplot(111)
77 |     ax.plot(np.arange(0, 30, 1), large_va, color='#2A6EA6',
78 |             label="Old approach to weight initialization")
79 |     ax.plot(np.arange(0, 30, 1), default_va, color='#FFA933', 
80 |             label="New approach to weight initialization")
81 |     ax.set_xlim([0, 30])
82 |     ax.set_xlabel('Epoch')
83 |     ax.set_ylim([85, 100])
84 |     ax.set_title('Classification accuracy')
85 |     plt.legend(loc="lower right")
86 |     plt.show()
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/fig/weight_initialization_100.json:
--------------------------------------------------------------------------------
1 | {"default_weight_initialization": [[], [9295, 9481, 9547, 9592, 9664, 9673, 9702, 9719, 9726, 9726, 9732, 9732, 9730, 9734, 9745, 9751, 9757, 9761, 9764, 9766, 9758, 9767, 9756, 9752, 9777, 9775, 9770, 9770, 9771, 9781], [], []], "large_weight_initialization": [[], [8994, 9181, 9260, 9364, 9427, 9449, 9497, 9512, 9560, 9578, 9603, 9616, 9626, 9629, 9644, 9671, 9674, 9679, 9700, 9708, 9707, 9717, 9729, 9720, 9719, 9745, 9751, 9754, 9755, 9742], [], []]}


--------------------------------------------------------------------------------
/fig/weight_initialization_100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/weight_initialization_100.png


--------------------------------------------------------------------------------
/fig/weight_initialization_30.json:
--------------------------------------------------------------------------------
1 | {"default_weight_initialization": [[], [9270, 9414, 9470, 9504, 9537, 9550, 9587, 9594, 9596, 9594, 9616, 9595, 9622, 9630, 9636, 9641, 9625, 9652, 9637, 9634, 9642, 9639, 9649, 9646, 9646, 9653, 9646, 9653, 9640, 9650], [], []], "large_weight_initialization": [[], [8643, 9044, 9141, 9231, 9299, 9327, 9385, 9416, 9433, 9449, 9476, 9489, 9500, 9535, 9521, 9548, 9564, 9573, 9585, 9592, 9596, 9615, 9607, 9605, 9606, 9622, 9637, 9648, 9635, 9637], [], []]}


--------------------------------------------------------------------------------
/fig/weight_initialization_30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/fig/weight_initialization_30.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scikit-learn
3 | scipy
4 | Theano
5 | 


--------------------------------------------------------------------------------
/src/conv.py:
--------------------------------------------------------------------------------
  1 | """conv.py
  2 | ~~~~~~~~~~
  3 | 
  4 | Code for many of the experiments involving convolutional networks in
  5 | Chapter 6 of the book 'Neural Networks and Deep Learning', by Michael
  6 | Nielsen.  The code essentially duplicates (and parallels) what is in
  7 | the text, so this is simply a convenience, and has not been commented
  8 | in detail.  Consult the original text for more details.
  9 | 
 10 | """
 11 | 
 12 | from collections import Counter
 13 | 
 14 | import matplotlib
 15 | matplotlib.use('Agg')
 16 | import matplotlib.pyplot as plt
 17 | import numpy as np
 18 | import theano
 19 | import theano.tensor as T
 20 | 
 21 | import network3
 22 | from network3 import sigmoid, tanh, ReLU, Network
 23 | from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
 24 | 
 25 | training_data, validation_data, test_data = network3.load_data_shared()
 26 | mini_batch_size = 10
 27 | 
 28 | def shallow(n=3, epochs=60):
 29 |     nets = []
 30 |     for j in range(n):
 31 |         print "A shallow net with 100 hidden neurons"
 32 |         net = Network([
 33 |             FullyConnectedLayer(n_in=784, n_out=100),
 34 |             SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
 35 |         net.SGD(
 36 |             training_data, epochs, mini_batch_size, 0.1, 
 37 |             validation_data, test_data)
 38 |         nets.append(net)
 39 |     return nets 
 40 | 
 41 | def basic_conv(n=3, epochs=60):
 42 |     for j in range(n):
 43 |         print "Conv + FC architecture"
 44 |         net = Network([
 45 |             ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 
 46 |                           filter_shape=(20, 1, 5, 5), 
 47 |                           poolsize=(2, 2)),
 48 |             FullyConnectedLayer(n_in=20*12*12, n_out=100),
 49 |             SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
 50 |         net.SGD(
 51 |             training_data, epochs, mini_batch_size, 0.1, validation_data, test_data)
 52 |     return net 
 53 | 
 54 | def omit_FC():
 55 |     for j in range(3):
 56 |         print "Conv only, no FC"
 57 |         net = Network([
 58 |             ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 
 59 |                           filter_shape=(20, 1, 5, 5), 
 60 |                           poolsize=(2, 2)),
 61 |             SoftmaxLayer(n_in=20*12*12, n_out=10)], mini_batch_size)
 62 |         net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
 63 |     return net 
 64 | 
 65 | def dbl_conv(activation_fn=sigmoid):
 66 |     for j in range(3):
 67 |         print "Conv + Conv + FC architecture"
 68 |         net = Network([
 69 |             ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 
 70 |                           filter_shape=(20, 1, 5, 5), 
 71 |                           poolsize=(2, 2),
 72 |                           activation_fn=activation_fn),
 73 |             ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 
 74 |                           filter_shape=(40, 20, 5, 5), 
 75 |                           poolsize=(2, 2),
 76 |                           activation_fn=activation_fn),
 77 |             FullyConnectedLayer(
 78 |                 n_in=40*4*4, n_out=100, activation_fn=activation_fn),
 79 |             SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
 80 |         net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
 81 |     return net 
 82 | 
 83 | # The following experiment was eventually omitted from the chapter,
 84 | # but I've left it in here, since it's an important negative result:
 85 | # basic l2 regularization didn't help much.  The reason (I believe) is
 86 | # that using convolutional-pooling layers is already a pretty strong
 87 | # regularizer.
 88 | def regularized_dbl_conv():
 89 |     for lmbda in [0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]:
 90 |         for j in range(3):
 91 |             print "Conv + Conv + FC num %s, with regularization %s" % (j, lmbda)
 92 |             net = Network([
 93 |                 ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 
 94 |                               filter_shape=(20, 1, 5, 5), 
 95 |                               poolsize=(2, 2)),
 96 |                 ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 
 97 |                               filter_shape=(40, 20, 5, 5), 
 98 |                               poolsize=(2, 2)),
 99 |                 FullyConnectedLayer(n_in=40*4*4, n_out=100),
100 |                 SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
101 |             net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data, lmbda=lmbda)
102 | 
103 | def dbl_conv_relu():
104 |     for lmbda in [0.0, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]:
105 |         for j in range(3):
106 |             print "Conv + Conv + FC num %s, relu, with regularization %s" % (j, lmbda)
107 |             net = Network([
108 |                 ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 
109 |                               filter_shape=(20, 1, 5, 5), 
110 |                               poolsize=(2, 2), 
111 |                               activation_fn=ReLU),
112 |                 ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 
113 |                               filter_shape=(40, 20, 5, 5), 
114 |                               poolsize=(2, 2), 
115 |                               activation_fn=ReLU),
116 |                 FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU),
117 |                 SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
118 |             net.SGD(training_data, 60, mini_batch_size, 0.03, validation_data, test_data, lmbda=lmbda)
119 | 
120 | #### Some subsequent functions may make use of the expanded MNIST
121 | #### data.  That can be generated by running expand_mnist.py.
122 | 
123 | def expanded_data(n=100):
124 |     """n is the number of neurons in the fully-connected layer.  We'll try
125 |     n=100, 300, and 1000.
126 | 
127 |     """
128 |     expanded_training_data, _, _ = network3.load_data_shared(
129 |         "../data/mnist_expanded.pkl.gz")
130 |     for j in range(3):
131 |         print "Training with expanded data, %s neurons in the FC layer, run num %s" % (n, j)
132 |         net = Network([
133 |             ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 
134 |                           filter_shape=(20, 1, 5, 5), 
135 |                           poolsize=(2, 2), 
136 |                           activation_fn=ReLU),
137 |             ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 
138 |                           filter_shape=(40, 20, 5, 5), 
139 |                           poolsize=(2, 2), 
140 |                           activation_fn=ReLU),
141 |             FullyConnectedLayer(n_in=40*4*4, n_out=n, activation_fn=ReLU),
142 |             SoftmaxLayer(n_in=n, n_out=10)], mini_batch_size)
143 |         net.SGD(expanded_training_data, 60, mini_batch_size, 0.03, 
144 |                 validation_data, test_data, lmbda=0.1)
145 |     return net 
146 | 
147 | def expanded_data_double_fc(n=100):
148 |     """n is the number of neurons in both fully-connected layers.  We'll
149 |     try n=100, 300, and 1000.
150 | 
151 |     """
152 |     expanded_training_data, _, _ = network3.load_data_shared(
153 |         "../data/mnist_expanded.pkl.gz")
154 |     for j in range(3):
155 |         print "Training with expanded data, %s neurons in two FC layers, run num %s" % (n, j)
156 |         net = Network([
157 |             ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 
158 |                           filter_shape=(20, 1, 5, 5), 
159 |                           poolsize=(2, 2), 
160 |                           activation_fn=ReLU),
161 |             ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 
162 |                           filter_shape=(40, 20, 5, 5), 
163 |                           poolsize=(2, 2), 
164 |                           activation_fn=ReLU),
165 |             FullyConnectedLayer(n_in=40*4*4, n_out=n, activation_fn=ReLU),
166 |             FullyConnectedLayer(n_in=n, n_out=n, activation_fn=ReLU),
167 |             SoftmaxLayer(n_in=n, n_out=10)], mini_batch_size)
168 |         net.SGD(expanded_training_data, 60, mini_batch_size, 0.03, 
169 |                 validation_data, test_data, lmbda=0.1)
170 | 
171 | def double_fc_dropout(p0, p1, p2, repetitions):
172 |     expanded_training_data, _, _ = network3.load_data_shared(
173 |         "../data/mnist_expanded.pkl.gz")
174 |     nets = []
175 |     for j in range(repetitions):
176 |         print "\n\nTraining using a dropout network with parameters ",p0,p1,p2
177 |         print "Training with expanded data, run num %s" % j
178 |         net = Network([
179 |             ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 
180 |                           filter_shape=(20, 1, 5, 5), 
181 |                           poolsize=(2, 2), 
182 |                           activation_fn=ReLU),
183 |             ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 
184 |                           filter_shape=(40, 20, 5, 5), 
185 |                           poolsize=(2, 2), 
186 |                           activation_fn=ReLU),
187 |             FullyConnectedLayer(
188 |                 n_in=40*4*4, n_out=1000, activation_fn=ReLU, p_dropout=p0),
189 |             FullyConnectedLayer(
190 |                 n_in=1000, n_out=1000, activation_fn=ReLU, p_dropout=p1),
191 |             SoftmaxLayer(n_in=1000, n_out=10, p_dropout=p2)], mini_batch_size)
192 |         net.SGD(expanded_training_data, 40, mini_batch_size, 0.03, 
193 |                 validation_data, test_data)
194 |         nets.append(net)
195 |     return nets
196 | 
197 | def ensemble(nets): 
198 |     """Takes as input a list of nets, and then computes the accuracy on
199 |     the test data when classifications are computed by taking a vote
200 |     amongst the nets.  Returns a tuple containing a list of indices
201 |     for test data which is erroneously classified, and a list of the
202 |     corresponding erroneous predictions.
203 | 
204 |     Note that this is a quick-and-dirty kluge: it'd be more reusable
205 |     (and faster) to define a Theano function taking the vote.  But
206 |     this works.
207 | 
208 |     """
209 |     
210 |     test_x, test_y = test_data
211 |     for net in nets:
212 |         i = T.lscalar() # mini-batch index
213 |         net.test_mb_predictions = theano.function(
214 |             [i], net.layers[-1].y_out,
215 |             givens={
216 |                 net.x: 
217 |                 test_x[i*net.mini_batch_size: (i+1)*net.mini_batch_size]
218 |             })
219 |         net.test_predictions = list(np.concatenate(
220 |             [net.test_mb_predictions(i) for i in xrange(1000)]))
221 |     all_test_predictions = zip(*[net.test_predictions for net in nets])
222 |     def plurality(p): return Counter(p).most_common(1)[0][0]
223 |     plurality_test_predictions = [plurality(p) 
224 |                                   for p in all_test_predictions]
225 |     test_y_eval = test_y.eval()
226 |     error_locations = [j for j in xrange(10000) 
227 |                        if plurality_test_predictions[j] != test_y_eval[j]]
228 |     erroneous_predictions = [plurality(all_test_predictions[j])
229 |                              for j in error_locations]
230 |     print "Accuracy is {:.2%}".format((1-len(error_locations)/10000.0))
231 |     return error_locations, erroneous_predictions
232 | 
233 | def plot_errors(error_locations, erroneous_predictions=None):
234 |     test_x, test_y = test_data[0].eval(), test_data[1].eval()
235 |     fig = plt.figure()
236 |     error_images = [np.array(test_x[i]).reshape(28, -1) for i in error_locations]
237 |     n = min(40, len(error_locations))
238 |     for j in range(n):
239 |         ax = plt.subplot2grid((5, 8), (j/8, j % 8))
240 |         ax.matshow(error_images[j], cmap = matplotlib.cm.binary)
241 |         ax.text(24, 5, test_y[error_locations[j]])
242 |         if erroneous_predictions:
243 |             ax.text(24, 24, erroneous_predictions[j])
244 |         plt.xticks(np.array([]))
245 |         plt.yticks(np.array([]))
246 |     plt.tight_layout()
247 |     return plt
248 |     
249 | def plot_filters(net, layer, x, y):
250 | 
251 |     """Plot the filters for net after the (convolutional) layer number
252 |     layer.  They are plotted in x by y format.  So, for example, if we
253 |     have 20 filters after layer 0, then we can call show_filters(net, 0, 5, 4) to
254 |     get a 5 by 4 plot of all filters."""
255 |     filters = net.layers[layer].w.eval()
256 |     fig = plt.figure()
257 |     for j in range(len(filters)):
258 |         ax = fig.add_subplot(y, x, j)
259 |         ax.matshow(filters[j][0], cmap = matplotlib.cm.binary)
260 |         plt.xticks(np.array([]))
261 |         plt.yticks(np.array([]))
262 |     plt.tight_layout()
263 |     return plt
264 | 
265 | 
266 | #### Helper method to run all experiments in the book
267 | 
268 | def run_experiments():
269 | 
270 |     """Run the experiments described in the book.  Note that the later
271 |     experiments require access to the expanded training data, which
272 |     can be generated by running expand_mnist.py.
273 | 
274 |     """
275 |     shallow()
276 |     basic_conv()
277 |     omit_FC()
278 |     dbl_conv(activation_fn=sigmoid)
279 |     # omitted, but still interesting: regularized_dbl_conv()
280 |     dbl_conv_relu()
281 |     expanded_data(n=100)
282 |     expanded_data(n=300)
283 |     expanded_data(n=1000)
284 |     expanded_data_double_fc(n=100)    
285 |     expanded_data_double_fc(n=300)
286 |     expanded_data_double_fc(n=1000)
287 |     nets = double_fc_dropout(0.5, 0.5, 0.5, 5)
288 |     # plot the erroneous digits in the ensemble of nets just trained
289 |     error_locations, erroneous_predictions = ensemble(nets)
290 |     plt = plot_errors(error_locations, erroneous_predictions)
291 |     plt.savefig("ensemble_errors.png")
292 |     # plot the filters learned by the first of the nets just trained
293 |     plt = plot_filters(nets[0], 0, 5, 4)
294 |     plt.savefig("net_full_layer_0.png")
295 |     plt = plot_filters(nets[0], 1, 8, 5)
296 |     plt.savefig("net_full_layer_1.png")
297 | 
298 | 


--------------------------------------------------------------------------------
/src/expand_mnist.py:
--------------------------------------------------------------------------------
 1 | """expand_mnist.py
 2 | ~~~~~~~~~~~~~~~~~~
 3 | 
 4 | Take the 50,000 MNIST training images, and create an expanded set of
 5 | 250,000 images, by displacing each training image up, down, left and
 6 | right, by one pixel.  Save the resulting file to
 7 | ../data/mnist_expanded.pkl.gz.
 8 | 
 9 | Note that this program is memory intensive, and may not run on small
10 | systems.
11 | 
12 | """
13 | 
14 | from __future__ import print_function
15 | 
16 | #### Libraries
17 | 
18 | # Standard library
19 | import cPickle
20 | import gzip
21 | import os.path
22 | import random
23 | 
24 | # Third-party libraries
25 | import numpy as np
26 | 
27 | print("Expanding the MNIST training set")
28 | 
29 | if os.path.exists("../data/mnist_expanded.pkl.gz"):
30 |     print("The expanded training set already exists.  Exiting.")
31 | else:
32 |     f = gzip.open("../data/mnist.pkl.gz", 'rb')
33 |     training_data, validation_data, test_data = cPickle.load(f)
34 |     f.close()
35 |     expanded_training_pairs = []
36 |     j = 0 # counter
37 |     for x, y in zip(training_data[0], training_data[1]):
38 |         expanded_training_pairs.append((x, y))
39 |         image = np.reshape(x, (-1, 28))
40 |         j += 1
41 |         if j % 1000 == 0: print("Expanding image number", j)
42 |         # iterate over data telling us the details of how to
43 |         # do the displacement
44 |         for d, axis, index_position, index in [
45 |                 (1,  0, "first", 0),
46 |                 (-1, 0, "first", 27),
47 |                 (1,  1, "last",  0),
48 |                 (-1, 1, "last",  27)]:
49 |             new_img = np.roll(image, d, axis)
50 |             if index_position == "first": 
51 |                 new_img[index, :] = np.zeros(28)
52 |             else: 
53 |                 new_img[:, index] = np.zeros(28)
54 |             expanded_training_pairs.append((np.reshape(new_img, 784), y))
55 |     random.shuffle(expanded_training_pairs)
56 |     expanded_training_data = [list(d) for d in zip(*expanded_training_pairs)]
57 |     print("Saving expanded data. This may take a few minutes.")
58 |     f = gzip.open("../data/mnist_expanded.pkl.gz", "w")
59 |     cPickle.dump((expanded_training_data, validation_data, test_data), f)
60 |     f.close()
61 | 


--------------------------------------------------------------------------------
/src/mnist_average_darkness.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mnist_average_darkness
 3 | ~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | A naive classifier for recognizing handwritten digits from the MNIST
 6 | data set.  The program classifies digits based on how dark they are
 7 | --- the idea is that digits like "1" tend to be less dark than digits
 8 | like "8", simply because the latter has a more complex shape.  When
 9 | shown an image the classifier returns whichever digit in the training
10 | data had the closest average darkness.
11 | 
12 | The program works in two steps: first it trains the classifier, and
13 | then it applies the classifier to the MNIST test data to see how many
14 | digits are correctly classified.
15 | 
16 | Needless to say, this isn't a very good way of recognizing handwritten
17 | digits!  Still, it's useful to show what sort of performance we get
18 | from naive ideas."""
19 | 
20 | #### Libraries
21 | # Standard library
22 | from collections import defaultdict
23 | 
24 | # My libraries
25 | import mnist_loader
26 | 
27 | def main():
28 |     training_data, validation_data, test_data = mnist_loader.load_data()
29 |     # training phase: compute the average darknesses for each digit,
30 |     # based on the training data
31 |     avgs = avg_darknesses(training_data)
32 |     # testing phase: see how many of the test images are classified
33 |     # correctly
34 |     num_correct = sum(int(guess_digit(image, avgs) == digit)
35 |                       for image, digit in zip(test_data[0], test_data[1]))
36 |     print "Baseline classifier using average darkness of image."
37 |     print "%s of %s values correct." % (num_correct, len(test_data[1]))
38 | 
39 | def avg_darknesses(training_data):
40 |     """ Return a defaultdict whose keys are the digits 0 through 9.
41 |     For each digit we compute a value which is the average darkness of
42 |     training images containing that digit.  The darkness for any
43 |     particular image is just the sum of the darknesses for each pixel."""
44 |     digit_counts = defaultdict(int)
45 |     darknesses = defaultdict(float)
46 |     for image, digit in zip(training_data[0], training_data[1]):
47 |         digit_counts[digit] += 1
48 |         darknesses[digit] += sum(image)
49 |     avgs = defaultdict(float)
50 |     for digit, n in digit_counts.iteritems():
51 |         avgs[digit] = darknesses[digit] / n
52 |     return avgs
53 | 
54 | def guess_digit(image, avgs):
55 |     """Return the digit whose average darkness in the training data is
56 |     closest to the darkness of ``image``.  Note that ``avgs`` is
57 |     assumed to be a defaultdict whose keys are 0...9, and whose values
58 |     are the corresponding average darknesses across the training data."""
59 |     darkness = sum(image)
60 |     distances = {k: abs(v-darkness) for k, v in avgs.iteritems()}
61 |     return min(distances, key=distances.get)
62 | 
63 | if __name__ == "__main__":
64 |     main()
65 | 


--------------------------------------------------------------------------------
/src/mnist_loader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mnist_loader
 3 | ~~~~~~~~~~~~
 4 | 
 5 | A library to load the MNIST image data.  For details of the data
 6 | structures that are returned, see the doc strings for ``load_data``
 7 | and ``load_data_wrapper``.  In practice, ``load_data_wrapper`` is the
 8 | function usually called by our neural network code.
 9 | """
10 | 
11 | #### Libraries
12 | # Standard library
13 | import pickle
14 | import gzip
15 | 
16 | # Third-party libraries
17 | import numpy as np
18 | 
19 | def load_data():
20 |     """Return the MNIST data as a tuple containing the training data,
21 |     the validation data, and the test data.
22 | 
23 |     The ``training_data`` is returned as a tuple with two entries.
24 |     The first entry contains the actual training images.  This is a
25 |     numpy ndarray with 50,000 entries.  Each entry is, in turn, a
26 |     numpy ndarray with 784 values, representing the 28 * 28 = 784
27 |     pixels in a single MNIST image.
28 | 
29 |     The second entry in the ``training_data`` tuple is a numpy ndarray
30 |     containing 50,000 entries.  Those entries are just the digit
31 |     values (0...9) for the corresponding images contained in the first
32 |     entry of the tuple.
33 | 
34 |     The ``validation_data`` and ``test_data`` are similar, except
35 |     each contains only 10,000 images.
36 | 
37 |     This is a nice data format, but for use in neural networks it's
38 |     helpful to modify the format of the ``training_data`` a little.
39 |     That's done in the wrapper function ``load_data_wrapper()``, see
40 |     below.
41 |     """
42 |     f = gzip.open('../data/mnist.pkl.gz', 'rb')
43 |     training_data, validation_data, test_data = pickle.load(f, encoding='bytes')
44 |     f.close()
45 |     return (training_data, validation_data, test_data)
46 | 
47 | def load_data_wrapper():
48 |     """Return a tuple containing ``(training_data, validation_data,
49 |     test_data)``. Based on ``load_data``, but the format is more
50 |     convenient for use in our implementation of neural networks.
51 | 
52 |     In particular, ``training_data`` is a list containing 50,000
53 |     2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
54 |     containing the input image.  ``y`` is a 10-dimensional
55 |     numpy.ndarray representing the unit vector corresponding to the
56 |     correct digit for ``x``.
57 | 
58 |     ``validation_data`` and ``test_data`` are lists containing 10,000
59 |     2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
60 |     numpy.ndarry containing the input image, and ``y`` is the
61 |     corresponding classification, i.e., the digit values (integers)
62 |     corresponding to ``x``.
63 | 
64 |     Obviously, this means we're using slightly different formats for
65 |     the training data and the validation / test data.  These formats
66 |     turn out to be the most convenient for use in our neural network
67 |     code."""
68 |     tr_d, va_d, te_d = load_data()
69 |     training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
70 |     training_results = [vectorized_result(y) for y in tr_d[1]]
71 |     training_data = zip(training_inputs, training_results)
72 |     validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
73 |     validation_data = zip(validation_inputs, va_d[1])
74 |     test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
75 |     test_data = zip(test_inputs, te_d[1])
76 |     return (training_data, validation_data, test_data)
77 | 
78 | def vectorized_result(j):
79 |     """Return a 10-dimensional unit vector with a 1.0 in the jth
80 |     position and zeroes elsewhere.  This is used to convert a digit
81 |     (0...9) into a corresponding desired output from the neural
82 |     network."""
83 |     e = np.zeros((10, 1))
84 |     e[j] = 1.0
85 |     return e
86 | 


--------------------------------------------------------------------------------
/src/mnist_svm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mnist_svm
 3 | ~~~~~~~~~
 4 | 
 5 | A classifier program for recognizing handwritten digits from the MNIST
 6 | data set, using an SVM classifier."""
 7 | 
 8 | #### Libraries
 9 | # My libraries
10 | import mnist_loader 
11 | 
12 | # Third-party libraries
13 | from sklearn import svm
14 | 
15 | def svm_baseline():
16 |     training_data, validation_data, test_data = mnist_loader.load_data()
17 |     # train
18 |     clf = svm.SVC()
19 |     clf.fit(training_data[0], training_data[1])
20 |     # test
21 |     predictions = [int(a) for a in clf.predict(test_data[0])]
22 |     num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1]))
23 |     print "Baseline classifier using an SVM."
24 |     print "%s of %s values correct." % (num_correct, len(test_data[1]))
25 | 
26 | if __name__ == "__main__":
27 |     svm_baseline()
28 |     
29 | 


--------------------------------------------------------------------------------
/src/network.py:
--------------------------------------------------------------------------------
  1 | """
  2 | network.py
  3 | ~~~~~~~~~~
  4 | 
  5 | A module to implement the stochastic gradient descent learning
  6 | algorithm for a feedforward neural network.  Gradients are calculated
  7 | using backpropagation.  Note that I have focused on making the code
  8 | simple, easily readable, and easily modifiable.  It is not optimized,
  9 | and omits many desirable features.
 10 | """
 11 | 
 12 | #### Libraries
 13 | # Standard library
 14 | import random
 15 | 
 16 | # Third-party libraries
 17 | import numpy as np
 18 | 
 19 | class Network(object):
 20 | 
 21 |     def __init__(self, sizes):
 22 |         """The list ``sizes`` contains the number of neurons in the
 23 |         respective layers of the network.  For example, if the list
 24 |         was [2, 3, 1] then it would be a three-layer network, with the
 25 |         first layer containing 2 neurons, the second layer 3 neurons,
 26 |         and the third layer 1 neuron.  The biases and weights for the
 27 |         network are initialized randomly, using a Gaussian
 28 |         distribution with mean 0, and variance 1.  Note that the first
 29 |         layer is assumed to be an input layer, and by convention we
 30 |         won't set any biases for those neurons, since biases are only
 31 |         ever used in computing the outputs from later layers."""
 32 |         self.num_layers = len(sizes)
 33 |         self.sizes = sizes
 34 |         self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
 35 |         self.weights = [np.random.randn(y, x)
 36 |                         for x, y in zip(sizes[:-1], sizes[1:])]
 37 | 
 38 |     def feedforward(self, a):
 39 |         """Return the output of the network if ``a`` is input."""
 40 |         for b, w in zip(self.biases, self.weights):
 41 |             a = sigmoid(np.dot(w, a)+b)
 42 |         return a
 43 | 
 44 |     def SGD(self, training_data, epochs, mini_batch_size, eta,
 45 |             test_data=None):
 46 |         """Train the neural network using mini-batch stochastic
 47 |         gradient descent.  The ``training_data`` is a list of tuples
 48 |         ``(x, y)`` representing the training inputs and the desired
 49 |         outputs.  The other non-optional parameters are
 50 |         self-explanatory.  If ``test_data`` is provided then the
 51 |         network will be evaluated against the test data after each
 52 |         epoch, and partial progress printed out.  This is useful for
 53 |         tracking progress, but slows things down substantially."""
 54 |         test_data = list(test_data)
 55 |         training_data = list(training_data)
 56 |         if test_data: n_test = len(test_data)
 57 |         n = len(training_data)
 58 |         for j in range(epochs):
 59 |             random.shuffle(training_data)
 60 |             mini_batches = [
 61 |                 training_data[k:k+mini_batch_size]
 62 |                 for k in range(0, n, mini_batch_size)]
 63 |             for mini_batch in mini_batches:
 64 |                 self.update_mini_batch(mini_batch, eta)
 65 |             if test_data:
 66 |                 print("Epoch {0}: {1} / {2}".format(
 67 |                     j, self.evaluate(test_data), n_test))
 68 |             else:
 69 |                 print("Epoch {0} complete".format(j))
 70 | 
 71 |     def update_mini_batch(self, mini_batch, eta):
 72 |         """Update the network's weights and biases by applying
 73 |         gradient descent using backpropagation to a single mini batch.
 74 |         The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
 75 |         is the learning rate."""
 76 |         nabla_b = [np.zeros(b.shape) for b in self.biases]
 77 |         nabla_w = [np.zeros(w.shape) for w in self.weights]
 78 |         for x, y in mini_batch:
 79 |             delta_nabla_b, delta_nabla_w = self.backprop(x, y)
 80 |             nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
 81 |             nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
 82 |         self.weights = [w-(eta/len(mini_batch))*nw
 83 |                         for w, nw in zip(self.weights, nabla_w)]
 84 |         self.biases = [b-(eta/len(mini_batch))*nb
 85 |                        for b, nb in zip(self.biases, nabla_b)]
 86 | 
 87 |     def backprop(self, x, y):
 88 |         """Return a tuple ``(nabla_b, nabla_w)`` representing the
 89 |         gradient for the cost function C_x.  ``nabla_b`` and
 90 |         ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
 91 |         to ``self.biases`` and ``self.weights``."""
 92 |         nabla_b = [np.zeros(b.shape) for b in self.biases]
 93 |         nabla_w = [np.zeros(w.shape) for w in self.weights]
 94 |         # feedforwar
 95 |         activation = x
 96 |         activations = [x] # list to store all the activations, layer by layer
 97 |         zs = [] # list to store all the z vectors, layer by layer
 98 |         for b, w in zip(self.biases, self.weights):
 99 |             z = np.dot(w, activation)+b
100 |             zs.append(z)
101 |             activation = sigmoid(z)
102 |             activations.append(activation)
103 |         # backward pass
104 |         delta = self.cost_derivative(activations[-1], y) * \
105 |             sigmoid_prime(zs[-1])
106 |         nabla_b[-1] = delta
107 |         nabla_w[-1] = np.dot(delta, activations[-2].transpose())
108 |         # Note that the variable l in the loop below is used a little
109 |         # differently to the notation in Chapter 2 of the book.  Here,
110 |         # l = 1 means the last layer of neurons, l = 2 is the
111 |         # second-last layer, and so on.  It's a renumbering of the
112 |         # scheme in the book, used here to take advantage of the fact
113 |         # that Python can use negative indices in lists.
114 |         for l in range(2, self.num_layers):
115 |             z = zs[-l]
116 |             sp = sigmoid_prime(z)
117 |             delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
118 |             nabla_b[-l] = delta
119 |             nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
120 |         return (nabla_b, nabla_w)
121 | 
122 |     def evaluate(self, test_data):
123 |         """Return the number of test inputs for which the neural
124 |         network outputs the correct result. Note that the neural
125 |         network's output is assumed to be the index of whichever
126 |         neuron in the final layer has the highest activation."""
127 |         test_results = [(np.argmax(self.feedforward(x)), y)
128 |                         for (x, y) in test_data]
129 |         return sum(int(x == y) for (x, y) in test_results)
130 | 
131 |     def cost_derivative(self, output_activations, y):
132 |         """Return the vector of partial derivatives \partial C_x /
133 |         \partial a for the output activations."""
134 |         return (output_activations-y)
135 | 
136 | #### Miscellaneous functions
137 | def sigmoid(z):
138 |     """The sigmoid function."""
139 |     return 1.0/(1.0+np.exp(-z))
140 | 
141 | def sigmoid_prime(z):
142 |     """Derivative of the sigmoid function."""
143 |     return sigmoid(z)*(1-sigmoid(z))
144 | 


--------------------------------------------------------------------------------
/src/network2.py:
--------------------------------------------------------------------------------
  1 | """network2.py
  2 | ~~~~~~~~~~~~~~
  3 | 
  4 | An improved version of network.py, implementing the stochastic
  5 | gradient descent learning algorithm for a feedforward neural network.
  6 | Improvements include the addition of the cross-entropy cost function,
  7 | regularization, and better initialization of network weights.  Note
  8 | that I have focused on making the code simple, easily readable, and
  9 | easily modifiable.  It is not optimized, and omits many desirable
 10 | features.
 11 | 
 12 | """
 13 | 
 14 | #### Libraries
 15 | # Standard library
 16 | import json
 17 | import random
 18 | import sys
 19 | 
 20 | # Third-party libraries
 21 | import numpy as np
 22 | 
 23 | 
 24 | #### Define the quadratic and cross-entropy cost functions
 25 | 
 26 | class QuadraticCost(object):
 27 | 
 28 |     @staticmethod
 29 |     def fn(a, y):
 30 |         """Return the cost associated with an output ``a`` and desired output
 31 |         ``y``.
 32 | 
 33 |         """
 34 |         return 0.5*np.linalg.norm(a-y)**2
 35 | 
 36 |     @staticmethod
 37 |     def delta(z, a, y):
 38 |         """Return the error delta from the output layer."""
 39 |         return (a-y) * sigmoid_prime(z)
 40 | 
 41 | 
 42 | class CrossEntropyCost(object):
 43 | 
 44 |     @staticmethod
 45 |     def fn(a, y):
 46 |         """Return the cost associated with an output ``a`` and desired output
 47 |         ``y``.  Note that np.nan_to_num is used to ensure numerical
 48 |         stability.  In particular, if both ``a`` and ``y`` have a 1.0
 49 |         in the same slot, then the expression (1-y)*np.log(1-a)
 50 |         returns nan.  The np.nan_to_num ensures that that is converted
 51 |         to the correct value (0.0).
 52 | 
 53 |         """
 54 |         return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))
 55 | 
 56 |     @staticmethod
 57 |     def delta(z, a, y):
 58 |         """Return the error delta from the output layer.  Note that the
 59 |         parameter ``z`` is not used by the method.  It is included in
 60 |         the method's parameters in order to make the interface
 61 |         consistent with the delta method for other cost classes.
 62 | 
 63 |         """
 64 |         return (a-y)
 65 | 
 66 | 
 67 | #### Main Network class
 68 | class Network(object):
 69 | 
 70 |     def __init__(self, sizes, cost=CrossEntropyCost):
 71 |         """The list ``sizes`` contains the number of neurons in the respective
 72 |         layers of the network.  For example, if the list was [2, 3, 1]
 73 |         then it would be a three-layer network, with the first layer
 74 |         containing 2 neurons, the second layer 3 neurons, and the
 75 |         third layer 1 neuron.  The biases and weights for the network
 76 |         are initialized randomly, using
 77 |         ``self.default_weight_initializer`` (see docstring for that
 78 |         method).
 79 | 
 80 |         """
 81 |         self.num_layers = len(sizes)
 82 |         self.sizes = sizes
 83 |         self.default_weight_initializer()
 84 |         self.cost=cost
 85 | 
 86 |     def default_weight_initializer(self):
 87 |         """Initialize each weight using a Gaussian distribution with mean 0
 88 |         and standard deviation 1 over the square root of the number of
 89 |         weights connecting to the same neuron.  Initialize the biases
 90 |         using a Gaussian distribution with mean 0 and standard
 91 |         deviation 1.
 92 | 
 93 |         Note that the first layer is assumed to be an input layer, and
 94 |         by convention we won't set any biases for those neurons, since
 95 |         biases are only ever used in computing the outputs from later
 96 |         layers.
 97 | 
 98 |         """
 99 |         self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
100 |         self.weights = [np.random.randn(y, x)/np.sqrt(x)
101 |                         for x, y in zip(self.sizes[:-1], self.sizes[1:])]
102 | 
103 |     def large_weight_initializer(self):
104 |         """Initialize the weights using a Gaussian distribution with mean 0
105 |         and standard deviation 1.  Initialize the biases using a
106 |         Gaussian distribution with mean 0 and standard deviation 1.
107 | 
108 |         Note that the first layer is assumed to be an input layer, and
109 |         by convention we won't set any biases for those neurons, since
110 |         biases are only ever used in computing the outputs from later
111 |         layers.
112 | 
113 |         This weight and bias initializer uses the same approach as in
114 |         Chapter 1, and is included for purposes of comparison.  It
115 |         will usually be better to use the default weight initializer
116 |         instead.
117 | 
118 |         """
119 |         self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
120 |         self.weights = [np.random.randn(y, x)
121 |                         for x, y in zip(self.sizes[:-1], self.sizes[1:])]
122 | 
123 |     def feedforward(self, a):
124 |         """Return the output of the network if ``a`` is input."""
125 |         for b, w in zip(self.biases, self.weights):
126 |             a = sigmoid(np.dot(w, a)+b)
127 |         return a
128 | 
129 |     def SGD(self, training_data, epochs, mini_batch_size, eta,
130 |             lmbda = 0.0,
131 |             evaluation_data=None,
132 |             monitor_evaluation_cost=False,
133 |             monitor_evaluation_accuracy=False,
134 |             monitor_training_cost=False,
135 |             monitor_training_accuracy=False):
136 |         """Train the neural network using mini-batch stochastic gradient
137 |         descent.  The ``training_data`` is a list of tuples ``(x, y)``
138 |         representing the training inputs and the desired outputs.  The
139 |         other non-optional parameters are self-explanatory, as is the
140 |         regularization parameter ``lmbda``.  The method also accepts
141 |         ``evaluation_data``, usually either the validation or test
142 |         data.  We can monitor the cost and accuracy on either the
143 |         evaluation data or the training data, by setting the
144 |         appropriate flags.  The method returns a tuple containing four
145 |         lists: the (per-epoch) costs on the evaluation data, the
146 |         accuracies on the evaluation data, the costs on the training
147 |         data, and the accuracies on the training data.  All values are
148 |         evaluated at the end of each training epoch.  So, for example,
149 |         if we train for 30 epochs, then the first element of the tuple
150 |         will be a 30-element list containing the cost on the
151 |         evaluation data at the end of each epoch. Note that the lists
152 |         are empty if the corresponding flag is not set.
153 | 
154 |         """
155 |         if evaluation_data: n_data = len(evaluation_data)
156 |         n = len(training_data)
157 |         evaluation_cost, evaluation_accuracy = [], []
158 |         training_cost, training_accuracy = [], []
159 |         for j in xrange(epochs):
160 |             random.shuffle(training_data)
161 |             mini_batches = [
162 |                 training_data[k:k+mini_batch_size]
163 |                 for k in xrange(0, n, mini_batch_size)]
164 |             for mini_batch in mini_batches:
165 |                 self.update_mini_batch(
166 |                     mini_batch, eta, lmbda, len(training_data))
167 |             print "Epoch %s training complete" % j
168 |             if monitor_training_cost:
169 |                 cost = self.total_cost(training_data, lmbda)
170 |                 training_cost.append(cost)
171 |                 print "Cost on training data: {}".format(cost)
172 |             if monitor_training_accuracy:
173 |                 accuracy = self.accuracy(training_data, convert=True)
174 |                 training_accuracy.append(accuracy)
175 |                 print "Accuracy on training data: {} / {}".format(
176 |                     accuracy, n)
177 |             if monitor_evaluation_cost:
178 |                 cost = self.total_cost(evaluation_data, lmbda, convert=True)
179 |                 evaluation_cost.append(cost)
180 |                 print "Cost on evaluation data: {}".format(cost)
181 |             if monitor_evaluation_accuracy:
182 |                 accuracy = self.accuracy(evaluation_data)
183 |                 evaluation_accuracy.append(accuracy)
184 |                 print "Accuracy on evaluation data: {} / {}".format(
185 |                     self.accuracy(evaluation_data), n_data)
186 |             print
187 |         return evaluation_cost, evaluation_accuracy, \
188 |             training_cost, training_accuracy
189 | 
190 |     def update_mini_batch(self, mini_batch, eta, lmbda, n):
191 |         """Update the network's weights and biases by applying gradient
192 |         descent using backpropagation to a single mini batch.  The
193 |         ``mini_batch`` is a list of tuples ``(x, y)``, ``eta`` is the
194 |         learning rate, ``lmbda`` is the regularization parameter, and
195 |         ``n`` is the total size of the training data set.
196 | 
197 |         """
198 |         nabla_b = [np.zeros(b.shape) for b in self.biases]
199 |         nabla_w = [np.zeros(w.shape) for w in self.weights]
200 |         for x, y in mini_batch:
201 |             delta_nabla_b, delta_nabla_w = self.backprop(x, y)
202 |             nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
203 |             nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
204 |         self.weights = [(1-eta*(lmbda/n))*w-(eta/len(mini_batch))*nw
205 |                         for w, nw in zip(self.weights, nabla_w)]
206 |         self.biases = [b-(eta/len(mini_batch))*nb
207 |                        for b, nb in zip(self.biases, nabla_b)]
208 | 
209 |     def backprop(self, x, y):
210 |         """Return a tuple ``(nabla_b, nabla_w)`` representing the
211 |         gradient for the cost function C_x.  ``nabla_b`` and
212 |         ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
213 |         to ``self.biases`` and ``self.weights``."""
214 |         nabla_b = [np.zeros(b.shape) for b in self.biases]
215 |         nabla_w = [np.zeros(w.shape) for w in self.weights]
216 |         # feedforward
217 |         activation = x
218 |         activations = [x] # list to store all the activations, layer by layer
219 |         zs = [] # list to store all the z vectors, layer by layer
220 |         for b, w in zip(self.biases, self.weights):
221 |             z = np.dot(w, activation)+b
222 |             zs.append(z)
223 |             activation = sigmoid(z)
224 |             activations.append(activation)
225 |         # backward pass
226 |         delta = (self.cost).delta(zs[-1], activations[-1], y)
227 |         nabla_b[-1] = delta
228 |         nabla_w[-1] = np.dot(delta, activations[-2].transpose())
229 |         # Note that the variable l in the loop below is used a little
230 |         # differently to the notation in Chapter 2 of the book.  Here,
231 |         # l = 1 means the last layer of neurons, l = 2 is the
232 |         # second-last layer, and so on.  It's a renumbering of the
233 |         # scheme in the book, used here to take advantage of the fact
234 |         # that Python can use negative indices in lists.
235 |         for l in xrange(2, self.num_layers):
236 |             z = zs[-l]
237 |             sp = sigmoid_prime(z)
238 |             delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
239 |             nabla_b[-l] = delta
240 |             nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
241 |         return (nabla_b, nabla_w)
242 | 
243 |     def accuracy(self, data, convert=False):
244 |         """Return the number of inputs in ``data`` for which the neural
245 |         network outputs the correct result. The neural network's
246 |         output is assumed to be the index of whichever neuron in the
247 |         final layer has the highest activation.
248 | 
249 |         The flag ``convert`` should be set to False if the data set is
250 |         validation or test data (the usual case), and to True if the
251 |         data set is the training data. The need for this flag arises
252 |         due to differences in the way the results ``y`` are
253 |         represented in the different data sets.  In particular, it
254 |         flags whether we need to convert between the different
255 |         representations.  It may seem strange to use different
256 |         representations for the different data sets.  Why not use the
257 |         same representation for all three data sets?  It's done for
258 |         efficiency reasons -- the program usually evaluates the cost
259 |         on the training data and the accuracy on other data sets.
260 |         These are different types of computations, and using different
261 |         representations speeds things up.  More details on the
262 |         representations can be found in
263 |         mnist_loader.load_data_wrapper.
264 | 
265 |         """
266 |         if convert:
267 |             results = [(np.argmax(self.feedforward(x)), np.argmax(y))
268 |                        for (x, y) in data]
269 |         else:
270 |             results = [(np.argmax(self.feedforward(x)), y)
271 |                         for (x, y) in data]
272 |         return sum(int(x == y) for (x, y) in results)
273 | 
274 |     def total_cost(self, data, lmbda, convert=False):
275 |         """Return the total cost for the data set ``data``.  The flag
276 |         ``convert`` should be set to False if the data set is the
277 |         training data (the usual case), and to True if the data set is
278 |         the validation or test data.  See comments on the similar (but
279 |         reversed) convention for the ``accuracy`` method, above.
280 |         """
281 |         cost = 0.0
282 |         for x, y in data:
283 |             a = self.feedforward(x)
284 |             if convert: y = vectorized_result(y)
285 |             cost += self.cost.fn(a, y)/len(data)
286 |         cost += 0.5*(lmbda/len(data))*sum(
287 |             np.linalg.norm(w)**2 for w in self.weights)
288 |         return cost
289 | 
290 |     def save(self, filename):
291 |         """Save the neural network to the file ``filename``."""
292 |         data = {"sizes": self.sizes,
293 |                 "weights": [w.tolist() for w in self.weights],
294 |                 "biases": [b.tolist() for b in self.biases],
295 |                 "cost": str(self.cost.__name__)}
296 |         f = open(filename, "w")
297 |         json.dump(data, f)
298 |         f.close()
299 | 
300 | #### Loading a Network
301 | def load(filename):
302 |     """Load a neural network from the file ``filename``.  Returns an
303 |     instance of Network.
304 | 
305 |     """
306 |     f = open(filename, "r")
307 |     data = json.load(f)
308 |     f.close()
309 |     cost = getattr(sys.modules[__name__], data["cost"])
310 |     net = Network(data["sizes"], cost=cost)
311 |     net.weights = [np.array(w) for w in data["weights"]]
312 |     net.biases = [np.array(b) for b in data["biases"]]
313 |     return net
314 | 
315 | #### Miscellaneous functions
316 | def vectorized_result(j):
317 |     """Return a 10-dimensional unit vector with a 1.0 in the j'th position
318 |     and zeroes elsewhere.  This is used to convert a digit (0...9)
319 |     into a corresponding desired output from the neural network.
320 | 
321 |     """
322 |     e = np.zeros((10, 1))
323 |     e[j] = 1.0
324 |     return e
325 | 
326 | def sigmoid(z):
327 |     """The sigmoid function."""
328 |     return 1.0/(1.0+np.exp(-z))
329 | 
330 | def sigmoid_prime(z):
331 |     """Derivative of the sigmoid function."""
332 |     return sigmoid(z)*(1-sigmoid(z))
333 | 


--------------------------------------------------------------------------------
/src/network3.py:
--------------------------------------------------------------------------------
  1 | """network3.py
  2 | ~~~~~~~~~~~~~~
  3 | 
  4 | A Theano-based program for training and running simple neural
  5 | networks.
  6 | 
  7 | Supports several layer types (fully connected, convolutional, max
  8 | pooling, softmax), and activation functions (sigmoid, tanh, and
  9 | rectified linear units, with more easily added).
 10 | 
 11 | When run on a CPU, this program is much faster than network.py and
 12 | network2.py.  However, unlike network.py and network2.py it can also
 13 | be run on a GPU, which makes it faster still.
 14 | 
 15 | Because the code is based on Theano, the code is different in many
 16 | ways from network.py and network2.py.  However, where possible I have
 17 | tried to maintain consistency with the earlier programs.  In
 18 | particular, the API is similar to network2.py.  Note that I have
 19 | focused on making the code simple, easily readable, and easily
 20 | modifiable.  It is not optimized, and omits many desirable features.
 21 | 
 22 | This program incorporates ideas from the Theano documentation on
 23 | convolutional neural nets (notably,
 24 | http://deeplearning.net/tutorial/lenet.html ), from Misha Denil's
 25 | implementation of dropout (https://github.com/mdenil/dropout ), and
 26 | from Chris Olah (http://colah.github.io ).
 27 | 
 28 | """
 29 | 
 30 | #### Libraries
 31 | # Standard library
 32 | import cPickle
 33 | import gzip
 34 | 
 35 | # Third-party libraries
 36 | import numpy as np
 37 | import theano
 38 | import theano.tensor as T
 39 | from theano.tensor.nnet import conv
 40 | from theano.tensor.nnet import softmax
 41 | from theano.tensor import shared_randomstreams
 42 | from theano.tensor.signal import downsample
 43 | 
 44 | # Activation functions for neurons
 45 | def linear(z): return z
 46 | def ReLU(z): return T.maximum(0.0, z)
 47 | from theano.tensor.nnet import sigmoid
 48 | from theano.tensor import tanh
 49 | 
 50 | 
 51 | #### Constants
 52 | GPU = True
 53 | if GPU:
 54 |     print "Trying to run under a GPU.  If this is not desired, then modify "+\
 55 |         "network3.py\nto set the GPU flag to False."
 56 |     try: theano.config.device = 'gpu'
 57 |     except: pass # it's already set
 58 |     theano.config.floatX = 'float32'
 59 | else:
 60 |     print "Running with a CPU.  If this is not desired, then the modify "+\
 61 |         "network3.py to set\nthe GPU flag to True."
 62 | 
 63 | #### Load the MNIST data
 64 | def load_data_shared(filename="../data/mnist.pkl.gz"):
 65 |     f = gzip.open(filename, 'rb')
 66 |     training_data, validation_data, test_data = cPickle.load(f)
 67 |     f.close()
 68 |     def shared(data):
 69 |         """Place the data into shared variables.  This allows Theano to copy
 70 |         the data to the GPU, if one is available.
 71 | 
 72 |         """
 73 |         shared_x = theano.shared(
 74 |             np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
 75 |         shared_y = theano.shared(
 76 |             np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
 77 |         return shared_x, T.cast(shared_y, "int32")
 78 |     return [shared(training_data), shared(validation_data), shared(test_data)]
 79 | 
 80 | #### Main class used to construct and train networks
 81 | class Network(object):
 82 | 
 83 |     def __init__(self, layers, mini_batch_size):
 84 |         """Takes a list of `layers`, describing the network architecture, and
 85 |         a value for the `mini_batch_size` to be used during training
 86 |         by stochastic gradient descent.
 87 | 
 88 |         """
 89 |         self.layers = layers
 90 |         self.mini_batch_size = mini_batch_size
 91 |         self.params = [param for layer in self.layers for param in layer.params]
 92 |         self.x = T.matrix("x")
 93 |         self.y = T.ivector("y")
 94 |         init_layer = self.layers[0]
 95 |         init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
 96 |         for j in xrange(1, len(self.layers)):
 97 |             prev_layer, layer  = self.layers[j-1], self.layers[j]
 98 |             layer.set_inpt(
 99 |                 prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
100 |         self.output = self.layers[-1].output
101 |         self.output_dropout = self.layers[-1].output_dropout
102 | 
103 |     def SGD(self, training_data, epochs, mini_batch_size, eta,
104 |             validation_data, test_data, lmbda=0.0):
105 |         """Train the network using mini-batch stochastic gradient descent."""
106 |         training_x, training_y = training_data
107 |         validation_x, validation_y = validation_data
108 |         test_x, test_y = test_data
109 | 
110 |         # compute number of minibatches for training, validation and testing
111 |         num_training_batches = size(training_data)/mini_batch_size
112 |         num_validation_batches = size(validation_data)/mini_batch_size
113 |         num_test_batches = size(test_data)/mini_batch_size
114 | 
115 |         # define the (regularized) cost function, symbolic gradients, and updates
116 |         l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
117 |         cost = self.layers[-1].cost(self)+\
118 |                0.5*lmbda*l2_norm_squared/num_training_batches
119 |         grads = T.grad(cost, self.params)
120 |         updates = [(param, param-eta*grad)
121 |                    for param, grad in zip(self.params, grads)]
122 | 
123 |         # define functions to train a mini-batch, and to compute the
124 |         # accuracy in validation and test mini-batches.
125 |         i = T.lscalar() # mini-batch index
126 |         train_mb = theano.function(
127 |             [i], cost, updates=updates,
128 |             givens={
129 |                 self.x:
130 |                 training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
131 |                 self.y:
132 |                 training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
133 |             })
134 |         validate_mb_accuracy = theano.function(
135 |             [i], self.layers[-1].accuracy(self.y),
136 |             givens={
137 |                 self.x:
138 |                 validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
139 |                 self.y:
140 |                 validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
141 |             })
142 |         test_mb_accuracy = theano.function(
143 |             [i], self.layers[-1].accuracy(self.y),
144 |             givens={
145 |                 self.x:
146 |                 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
147 |                 self.y:
148 |                 test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
149 |             })
150 |         self.test_mb_predictions = theano.function(
151 |             [i], self.layers[-1].y_out,
152 |             givens={
153 |                 self.x:
154 |                 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
155 |             })
156 |         # Do the actual training
157 |         best_validation_accuracy = 0.0
158 |         for epoch in xrange(epochs):
159 |             for minibatch_index in xrange(num_training_batches):
160 |                 iteration = num_training_batches*epoch+minibatch_index
161 |                 if iteration % 1000 == 0:
162 |                     print("Training mini-batch number {0}".format(iteration))
163 |                 cost_ij = train_mb(minibatch_index)
164 |                 if (iteration+1) % num_training_batches == 0:
165 |                     validation_accuracy = np.mean(
166 |                         [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
167 |                     print("Epoch {0}: validation accuracy {1:.2%}".format(
168 |                         epoch, validation_accuracy))
169 |                     if validation_accuracy >= best_validation_accuracy:
170 |                         print("This is the best validation accuracy to date.")
171 |                         best_validation_accuracy = validation_accuracy
172 |                         best_iteration = iteration
173 |                         if test_data:
174 |                             test_accuracy = np.mean(
175 |                                 [test_mb_accuracy(j) for j in xrange(num_test_batches)])
176 |                             print('The corresponding test accuracy is {0:.2%}'.format(
177 |                                 test_accuracy))
178 |         print("Finished training network.")
179 |         print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(
180 |             best_validation_accuracy, best_iteration))
181 |         print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))
182 | 
183 | #### Define layer types
184 | 
185 | class ConvPoolLayer(object):
186 |     """Used to create a combination of a convolutional and a max-pooling
187 |     layer.  A more sophisticated implementation would separate the
188 |     two, but for our purposes we'll always use them together, and it
189 |     simplifies the code, so it makes sense to combine them.
190 | 
191 |     """
192 | 
193 |     def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
194 |                  activation_fn=sigmoid):
195 |         """`filter_shape` is a tuple of length 4, whose entries are the number
196 |         of filters, the number of input feature maps, the filter height, and the
197 |         filter width.
198 | 
199 |         `image_shape` is a tuple of length 4, whose entries are the
200 |         mini-batch size, the number of input feature maps, the image
201 |         height, and the image width.
202 | 
203 |         `poolsize` is a tuple of length 2, whose entries are the y and
204 |         x pooling sizes.
205 | 
206 |         """
207 |         self.filter_shape = filter_shape
208 |         self.image_shape = image_shape
209 |         self.poolsize = poolsize
210 |         self.activation_fn=activation_fn
211 |         # initialize weights and biases
212 |         n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
213 |         self.w = theano.shared(
214 |             np.asarray(
215 |                 np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
216 |                 dtype=theano.config.floatX),
217 |             borrow=True)
218 |         self.b = theano.shared(
219 |             np.asarray(
220 |                 np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
221 |                 dtype=theano.config.floatX),
222 |             borrow=True)
223 |         self.params = [self.w, self.b]
224 | 
225 |     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
226 |         self.inpt = inpt.reshape(self.image_shape)
227 |         conv_out = conv.conv2d(
228 |             input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
229 |             image_shape=self.image_shape)
230 |         pooled_out = downsample.max_pool_2d(
231 |             input=conv_out, ds=self.poolsize, ignore_border=True)
232 |         self.output = self.activation_fn(
233 |             pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
234 |         self.output_dropout = self.output # no dropout in the convolutional layers
235 | 
236 | class FullyConnectedLayer(object):
237 | 
238 |     def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
239 |         self.n_in = n_in
240 |         self.n_out = n_out
241 |         self.activation_fn = activation_fn
242 |         self.p_dropout = p_dropout
243 |         # Initialize weights and biases
244 |         self.w = theano.shared(
245 |             np.asarray(
246 |                 np.random.normal(
247 |                     loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
248 |                 dtype=theano.config.floatX),
249 |             name='w', borrow=True)
250 |         self.b = theano.shared(
251 |             np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
252 |                        dtype=theano.config.floatX),
253 |             name='b', borrow=True)
254 |         self.params = [self.w, self.b]
255 | 
256 |     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
257 |         self.inpt = inpt.reshape((mini_batch_size, self.n_in))
258 |         self.output = self.activation_fn(
259 |             (1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
260 |         self.y_out = T.argmax(self.output, axis=1)
261 |         self.inpt_dropout = dropout_layer(
262 |             inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
263 |         self.output_dropout = self.activation_fn(
264 |             T.dot(self.inpt_dropout, self.w) + self.b)
265 | 
266 |     def accuracy(self, y):
267 |         "Return the accuracy for the mini-batch."
268 |         return T.mean(T.eq(y, self.y_out))
269 | 
270 | class SoftmaxLayer(object):
271 | 
272 |     def __init__(self, n_in, n_out, p_dropout=0.0):
273 |         self.n_in = n_in
274 |         self.n_out = n_out
275 |         self.p_dropout = p_dropout
276 |         # Initialize weights and biases
277 |         self.w = theano.shared(
278 |             np.zeros((n_in, n_out), dtype=theano.config.floatX),
279 |             name='w', borrow=True)
280 |         self.b = theano.shared(
281 |             np.zeros((n_out,), dtype=theano.config.floatX),
282 |             name='b', borrow=True)
283 |         self.params = [self.w, self.b]
284 | 
285 |     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
286 |         self.inpt = inpt.reshape((mini_batch_size, self.n_in))
287 |         self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
288 |         self.y_out = T.argmax(self.output, axis=1)
289 |         self.inpt_dropout = dropout_layer(
290 |             inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
291 |         self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
292 | 
293 |     def cost(self, net):
294 |         "Return the log-likelihood cost."
295 |         return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])
296 | 
297 |     def accuracy(self, y):
298 |         "Return the accuracy for the mini-batch."
299 |         return T.mean(T.eq(y, self.y_out))
300 | 
301 | 
302 | #### Miscellanea
303 | def size(data):
304 |     "Return the size of the dataset `data`."
305 |     return data[0].get_value(borrow=True).shape[0]
306 | 
307 | def dropout_layer(layer, p_dropout):
308 |     srng = shared_randomstreams.RandomStreams(
309 |         np.random.RandomState(0).randint(999999))
310 |     mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
311 |     return layer*T.cast(mask, theano.config.floatX)
312 | 


--------------------------------------------------------------------------------
/src/old/blog/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/blog/__init__.py


--------------------------------------------------------------------------------
/src/old/blog/common_knowledge.py:
--------------------------------------------------------------------------------
 1 | """
 2 | common_knowledge
 3 | ~~~~~~~~~~~~~~~~
 4 | 
 5 | Try to determine whether or not it's possible to relate the
 6 | descriptions given by two different autoencoders.
 7 | 
 8 | """
 9 | 
10 | #### Libraries
11 | # My libraries
12 | from backprop2 import Network, sigmoid_vec
13 | import mnist_loader
14 | 
15 | # Third-party libraries
16 | import matplotlib
17 | import matplotlib.pyplot as plt
18 | import numpy as np
19 | 
20 | 
21 | #### Parameters
22 | # Size of the training sets.  May range from 1000 to 12,500.  Lower
23 | # will be faster, higher will give more accuracy.
24 | SIZE = 5000 
25 | # Number of hidden units in the autoencoder
26 | HIDDEN = 30
27 | 
28 | print "\nGenerating training data"
29 | training_data, _, _ = mnist_loader.load_data_nn()
30 | td_1 = [(x, x) for x, _ in training_data[0:SIZE]]
31 | td_2 = [(x, x) for x, _ in training_data[12500:12500+SIZE]]
32 | td_3 = [x for x, _ in training_data[25000:25000+SIZE]]
33 | test = [x for x, _ in training_data[37500:37500+SIZE]]
34 | 
35 | print "\nFinding first autoencoder"
36 | ae_1 = Network([784, HIDDEN, 784])
37 | ae_1.SGD(td_1, 4, 10, 0.01, 0.05)
38 | 
39 | print "\nFinding second autoencoder"
40 | ae_2 = Network([784, HIDDEN, 784])
41 | ae_2.SGD(td_1, 4, 10, 0.01, 0.05)
42 | 
43 | print "\nGenerating encoded training data"
44 | encoded_td_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0])
45 |                 for x in td_3]
46 | encoded_td_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0])
47 |                 for x in td_3]
48 | encoded_training_data = zip(encoded_td_1, encoded_td_2)
49 | 
50 | print "\nFinding mapping between theories"
51 | net = Network([HIDDEN, HIDDEN])
52 | net.SGD(encoded_training_data, 6, 10, 0.01, 0.05)
53 | 
54 | print """\nBaseline for comparison: decompress with the first autoencoder"""
55 | print """and compress with the second autoencoder"""
56 | encoded_test_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0])
57 |                   for x in test]
58 | encoded_test_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0])
59 |                   for x in test]
60 | test_data = zip(encoded_test_1, encoded_test_2)
61 | net_baseline = Network([HIDDEN, 784, HIDDEN])
62 | net_baseline.biases[0] = ae_1.biases[1]
63 | net_baseline.weights[0] = ae_1.weights[1]
64 | net_baseline.biases[1] = ae_2.biases[0]
65 | net_baseline.weights[1] = ae_2.weights[0]
66 | error_baseline = sum(np.linalg.norm(net_baseline.feedforward(x)-y, 1) 
67 |                      for (x, y) in test_data)
68 | print "Baseline average l1 error per training image: %s" % (error_baseline / SIZE,)
69 | 
70 | print "\nComparing theories with a simple interconversion"
71 | print "Mean desired output activation: %s" % (
72 |     sum(y.mean() for _, y in test_data) / SIZE,)
73 | error = sum(np.linalg.norm(net.feedforward(x)-y, 1) for (x, y) in test_data)
74 | print "Average l1 error per training image: %s" % (error / SIZE,)
75 | 
76 | print "\nComputing fiducial image inputs"
77 | fiducial_images_1 = [
78 |     ae_1.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:])
79 |     for j in range(HIDDEN)]
80 | fiducial_images_2 = [
81 |     ae_2.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:])
82 |     for j in range(HIDDEN)]
83 | image = np.concatenate([np.concatenate(fiducial_images_1, axis=1), 
84 |                         np.concatenate(fiducial_images_2, axis=1)])
85 | fig = plt.figure()
86 | ax = fig.add_subplot(111)
87 | ax.matshow(image, cmap = matplotlib.cm.binary)
88 | plt.xticks(np.array([]))
89 | plt.yticks(np.array([]))
90 | plt.show()
91 | 


--------------------------------------------------------------------------------
/src/old/cost_vs_iterations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/cost_vs_iterations.png


--------------------------------------------------------------------------------
/src/old/cost_vs_iterations_trapped.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/cost_vs_iterations_trapped.png


--------------------------------------------------------------------------------
/src/old/deep_autoencoder.py:
--------------------------------------------------------------------------------
  1 | """
  2 | deep_autoencoder
  3 | ~~~~~~~~~~~~~~~~
  4 | 
  5 | A module which implements deep autoencoders.  
  6 | """
  7 | 
  8 | #### Libraries
  9 | # Standard library
 10 | import random
 11 | 
 12 | # My libraries
 13 | from backprop2 import Network, sigmoid_vec
 14 | 
 15 | # Third-party libraries
 16 | import numpy as np
 17 | 
 18 | 
 19 | def plot_helper(x):
 20 |     import matplotlib
 21 |     import matplotlib.pyplot as plt
 22 |     x = np.reshape(x, (-1, 28))
 23 |     fig = plt.figure()
 24 |     ax = fig.add_subplot(1, 1, 1)
 25 |     ax.matshow(x, cmap = matplotlib.cm.binary)
 26 |     plt.xticks(np.array([]))
 27 |     plt.yticks(np.array([]))
 28 |     plt.show()
 29 | 
 30 | 
 31 | class DeepAutoencoder(Network):
 32 | 
 33 |     def __init__(self, layers):
 34 |         """
 35 |         The list ``layers`` specifies the sizes of the nested
 36 |         autoencoders.  For example, if ``layers`` is [50, 20, 10] then
 37 |         the deep autoencoder will be a neural network with layers of
 38 |         size [50, 20, 10, 20, 50]."""
 39 |         self.layers = layers
 40 |         Network.__init__(self, layers+layers[-2::-1])
 41 | 
 42 |     def train(self, training_data, epochs, mini_batch_size, eta,
 43 |               lmbda):
 44 |         """
 45 |         Train the DeepAutoencoder.  The ``training_data`` is a list of
 46 |         training inputs, ``x``, ``mini_batch_size`` is a single
 47 |         positive integer, and ``epochs``, ``eta``, ``lmbda`` are lists
 48 |         of parameters, with the different list members corresponding
 49 |         to the different stages of training.  For example, ``eta[0]``
 50 |         is the learning rate used for the first nested autoencoder,
 51 |         ``eta[1]`` is the learning rate for the second nested
 52 |         autoencoder, and so on.  ``eta[-1]`` is the learning rate used
 53 |         for the final stage of fine-tuning.
 54 |         """
 55 |         print "\nTraining a %s deep autoencoder" % (
 56 |             "-".join([str(j) for j in self.sizes]),)
 57 |         training_data = double(training_data)
 58 |         cur_training_data = training_data[::]
 59 |         for j in range(len(self.layers)-1):
 60 |             print "\nTraining the %s-%s-%s nested autoencoder" % (
 61 |                 self.layers[j], self.layers[j+1], self.layers[j])
 62 |             print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % (
 63 |                 epochs[j], mini_batch_size, eta[j], lmbda[j])
 64 |             self.train_nested_autoencoder(
 65 |                 j, cur_training_data, epochs[j], mini_batch_size, eta[j],
 66 |                 lmbda[j])
 67 |             cur_training_data = [
 68 |                 (sigmoid_vec(np.dot(net.weights[0], x)+net.biases[0]),)*2
 69 |                 for (x, _) in cur_training_data]
 70 |         print "\nFine-tuning network weights with backpropagation"
 71 |         print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % (
 72 |                 epochs[-1], mini_batch_size, eta[-1], lmbda[-1])
 73 |         self.SGD(training_data, epochs[-1], mini_batch_size, eta[-1],
 74 |                  lmbda[-1])
 75 | 
 76 |     def train_nested_autoencoder(
 77 |         self, j, encoded_training_data, epochs, mini_batch_size, eta, lmbda):
 78 |         """
 79 |         Train the nested autoencoder that starts at layer ``j`` in the
 80 |         deep autoencoder.  Note that ``encoded_training_data`` is a
 81 |         list with entries of the form ``(x, x)``, where the ``x`` are
 82 |         encoded training inputs for layer ``j``."""
 83 |         net = Network([self.layers[j], self.layers[j+1], self.layers[j]])
 84 |         net.biases[0] = self.biases[j]
 85 |         net.biases[1] = self.biases[-j-1]
 86 |         net.weights[0] = self.weights[j]
 87 |         net.weights[1] = self.weights[-j-1]
 88 |         net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda)
 89 |         self.biases[j] = net.biases[0]
 90 |         self.biases[-j-1] = net.biases[1]
 91 |         self.weights[j] = net.weights[0]
 92 |         self.weights[-j-1] = net.weights[1]
 93 | 
 94 |     def train_nested_autoencoder_repl(
 95 |         self, j, training_data, epochs, mini_batch_size, eta, lmbda):
 96 |         """
 97 |         This is a convenience method that can be used from the REPL to
 98 |         train the nested autoencoder that starts at level ``j`` in the
 99 |         deep autoencoder.  Note that ``training_data`` is the input
100 |         data for the first layer of the network, and is a list of
101 |         entries ``x``."""
102 |         self.train_nested_autoencoder(
103 |             j, 
104 |             double(
105 |                 [self.feedforward(x, start=0, end=j) for x in training_data]),
106 |             epochs, mini_batch_size, eta, lmbda)
107 | 
108 |     def feature(self, j, k):
109 |         """
110 |         Return the output if neuron number ``k`` in layer ``j`` is
111 |         activated, and all others are not active.  """
112 |         a = np.zeros((self.sizes[j], 1))
113 |         a[k] = 1.0
114 |         return self.feedforward(a, start=j, end=self.num_layers)
115 | 
116 | def double(l):
117 |     return [(x, x) for x in l]
118 | 
119 | 


--------------------------------------------------------------------------------
/src/old/deep_learning.py:
--------------------------------------------------------------------------------
 1 | """
 2 | deep_learning
 3 | ~~~~~~~~~~~~~
 4 | 
 5 | Module to do deep learning.  Most of the functionality needed is
 6 | already in the ``backprop2`` and ``deep_autoencoder`` modules, but
 7 | this adds convenience functions to help in doing things like unrolling
 8 | deep autoencoders, and adding and training a classifier layer."""
 9 | 
10 | # My Libraries
11 | from backprop2 import Network
12 | from deep_autoencoder import DeepAutoencoder
13 | 
14 | def unroll(deep_autoencoder):
15 |     """
16 |     Return a Network that contains the compression stage of the
17 |     ``deep_autoencoder``."""
18 |     net = Network(deep_autoencoder.layers)
19 |     net.weights = deep_autoencoder.weights[:len(deep_autoencoder.layers)-1]
20 |     net.biases = deep_autoencoder.biases[:len(deep_autoencoder.layers)-1]
21 |     return net
22 | 
23 | def add_classifier_layer(net, num_outputs):
24 |     """
25 |     Return the Network ``net``, but with an extra layer containing
26 |     ``num_outputs`` neurons appended."""
27 |     net_classifier = Network(net.sizes+[num_outputs])
28 |     net_classifier.weights[:-1] = net.weights
29 |     net_classifier.biases[:-1] = net.biases
30 |     return net_classifier
31 | 
32 | def SGD_final_layer(
33 |     self, training_data, epochs, mini_batch_size, eta, lmbda):
34 |     """
35 |     Run SGD on the final layer of the Network ``self``.  Note that
36 |     ``training_data`` is the input to the whole Network, not the
37 |     encoded training data input to the final layer. 
38 |     """
39 |     encoded_training_data = [
40 |         (self.feedforward(x, start=0, end=self.num_layers-2), y) 
41 |         for x, y in training_data]
42 |     net = Network(self.sizes[-2:])
43 |     net.biases[0] = self.biases[-1]
44 |     net.weights[0] = self.weights[-1]
45 |     net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda)
46 |     self.biases[-1] = net.biases[0]
47 |     self.weights[-1] = net.weights[0]
48 | 
49 | 
50 | # Add the SGD_final_layer method to the Network class
51 | Network.SGD_final_layer = SGD_final_layer
52 | 


--------------------------------------------------------------------------------
/src/old/gradient_descent_hack.py:
--------------------------------------------------------------------------------
 1 | """
 2 | gradient_descent_hack
 3 | ~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | This program uses gradient descent to learn weights and biases for a
 6 | three-neuron network to compute the XOR function.  The program is a
 7 | quick-and-dirty hack meant to illustrate the basic ideas of gradient
 8 | descent, not a cleanly-designed and generalizable implementation."""
 9 | 
10 | #### Libraries
11 | # Third-party libraries
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 | 
15 | def sigmoid(z):
16 |     return 1.0/(1.0+np.exp(-z))
17 | 
18 | def neuron(w, x):
19 |     """ Return the output from the sigmoid neuron with weights ``w``
20 |     and inputs ``x``.  Both are numpy arrays, with three and two
21 |     elements, respectively.  The first input weight is the bias."""
22 |     return sigmoid(w[0]+np.inner(w[1:], x))
23 | 
24 | def h(w, x):
25 |     """ Return the output from the three-neuron network with weights
26 |     ``w`` and inputs ``x``.  Note that ``w`` is a numpy array with
27 |     nine elements, consisting of three weights for each neuron (the
28 |     bias plus two input weights).  ``x`` is a numpy array with just
29 |     two elements."""
30 |     neuron1_out = neuron(w[0:3], x) # top left neuron
31 |     neuron2_out = neuron(w[3:6], x) # bottom left neuron
32 |     return neuron(w[6:9], np.array([neuron1_out, neuron2_out]))
33 | 
34 | # inputs and corresponding outputs for the function we're computing (XOR)
35 | INPUTS = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]] 
36 | OUTPUTS = [0.0, 1.0, 1.0, 0.0]
37 | 
38 | def cost(w):
39 |     """ Return the cost when the neural network has weights ``w``.
40 |     The cost is computed with respect to the XOR function."""
41 |     return 0.5 * sum((y-h(w, np.array(x)))**2 for x, y in zip(INPUTS, OUTPUTS))
42 | 
43 | def partial(f, k, w):
44 |     """ Return the partial derivative of the function ``f`` with
45 |     respect to the ``k``th variable, at location ``w``.  Note that
46 |     ``f`` must take a numpy array as input, and the partial derivative
47 |     is evaluated with respect to the ``k``th element in that array.
48 |     Similarly, ``w`` is a numpy array which can be used as input to
49 |     ``f``."""
50 |     w_plus, w_minus = w.copy(), w.copy()
51 |     w_plus[k] += 0.01 # using epsilon = 0.01
52 |     w_minus[k] += -0.01
53 |     return (f(w_plus)-f(w_minus))/0.02
54 |     
55 | def gradient_descent(cost, eta, n):
56 |     """ Perform ``n`` iterations of the gradient descent algorithm to
57 |     minimize the ``cost`` function, with a learning rate ``eta``.
58 |     Return a tuple whose first entry is an array containing the final
59 |     weights, and whose second entry is a list of the values the
60 |     ``cost`` function took at different iterations."""
61 |     w = np.random.uniform(-1, 1, 9) # initialize weights randomly
62 |     costs = []
63 |     for j in xrange(n):
64 |         c = cost(w)
65 |         print "Current cost: {0:.3f}".format(c)
66 |         costs.append(c)
67 |         gradient = [partial(cost, k, w) for k in xrange(9)]
68 |         w = np.array([wt-eta*d for wt, d in zip(w, gradient)])
69 |     return w, costs
70 | 
71 | def main():
72 |     """ Perform gradient descent to find weights for a sigmoid neural
73 |     network to compute XOR.  10,000 iterations are used.  Outputs the
74 |     final value of the cost function, the final weights, and plots a
75 |     graph of cost as a function of iteration."""
76 |     w, costs = gradient_descent(cost, 0.1, 10000)
77 |     print "\nFinal cost: {0:.3f}".format(cost(w))
78 |     print "\nFinal weights: %s" % w
79 |     plt.plot(np.array(costs))
80 |     plt.xlabel('iteration')
81 |     plt.ylabel('cost')
82 |     plt.title('How cost decreases with the number of iterations')
83 |     plt.show()
84 | 
85 | if __name__ == "__main__":
86 |     main()
87 | 


--------------------------------------------------------------------------------
/src/old/mnist_100_30_deep_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_100_30_deep_autoencoder.png


--------------------------------------------------------------------------------
/src/old/mnist_100_unit_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_100_unit_autoencoder.png


--------------------------------------------------------------------------------
/src/old/mnist_10_unit_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_10_unit_autoencoder.png


--------------------------------------------------------------------------------
/src/old/mnist_30_component_pca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_30_component_pca.png


--------------------------------------------------------------------------------
/src/old/mnist_30_unit_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skylook/neural-networks-and-deep-learning/2e354a529f45bfaf757cd6eed3832de65d50a897/src/old/mnist_30_unit_autoencoder.png


--------------------------------------------------------------------------------
/src/old/mnist_autoencoder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mnist_autoencoder
 3 | ~~~~~~~~~~~~~~~~~
 4 | 
 5 | Implements an autoencoder for the MNIST data.  The program can do two
 6 | things: (1) plot the autoencoder's output for the first ten images in
 7 | the MNIST test set; and (2) use the autoencoder to build a classifier.
 8 | The program is a quick-and-dirty hack --- we'll do things in a more
 9 | systematic way in the module ``deep_autoencoder``.
10 | """
11 | 
12 | # My Libraries
13 | from backprop2 import Network
14 | import mnist_loader 
15 | 
16 | # Third-party libraries
17 | import matplotlib
18 | import matplotlib.pyplot as plt
19 | import numpy as np
20 | 
21 | def autoencoder_results(hidden_units):
22 |     """
23 |     Train an autoencoder using the MNIST training data and plot the
24 |     results when the first ten MNIST test images are passed through
25 |     the autoencoder.
26 |     """
27 |     training_data, test_inputs, actual_test_results = \
28 |         mnist_loader.load_data_nn()
29 |     net = train_autoencoder(hidden_units, training_data)
30 |     plot_test_results(net, test_inputs)
31 | 
32 | def train_autoencoder(hidden_units, training_data):
33 |     "Return a trained autoencoder."
34 |     autoencoder_training_data = [(x, x) for x, _ in training_data]
35 |     net = Network([784, hidden_units, 784])
36 |     net.SGD(autoencoder_training_data, 6, 10, 0.01, 0.05)
37 |     return net
38 | 
39 | def plot_test_results(net, test_inputs):
40 |     """
41 |     Plot the results after passing the first ten test MNIST digits through
42 |     the autoencoder ``net``."""
43 |     fig = plt.figure()
44 |     ax = fig.add_subplot(111)
45 |     images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)]
46 |     images_out = [net.feedforward(test_inputs[j]).reshape(-1, 28) 
47 |                   for j in range(10)]
48 |     image_in = np.concatenate(images_in, axis=1)
49 |     image_out = np.concatenate(images_out, axis=1)
50 |     image = np.concatenate([image_in, image_out])
51 |     ax.matshow(image, cmap = matplotlib.cm.binary)
52 |     plt.xticks(np.array([]))
53 |     plt.yticks(np.array([]))
54 |     plt.show()
55 | 
56 | def classifier(hidden_units, n_unlabeled_inputs, n_labeled_inputs):
57 |     """
58 |     Train a semi-supervised classifier.  We begin with pretraining,
59 |     creating an autoencoder which uses ``n_unlabeled_inputs`` from the
60 |     MNIST training data.  This is then converted into a classifier
61 |     which is fine-tuned using the ``n_labeled_inputs``.
62 | 
63 |     For comparison a classifier is also created which does not make
64 |     use of the unlabeled data.
65 |     """
66 |     training_data, test_inputs, actual_test_results = \
67 |         mnist_loader.load_data_nn()
68 |     print "\nUsing pretraining and %s items of unlabeled data" %\
69 |         n_unlabeled_inputs
70 |     net_ae = train_autoencoder(hidden_units, training_data[:n_unlabeled_inputs])
71 |     net_c = Network([784, hidden_units, 10])
72 |     net_c.biases = net_ae.biases[:1]+[np.random.randn(10, 1)/np.sqrt(10)]
73 |     net_c.weights = net_ae.weights[:1]+\
74 |         [np.random.randn(10, hidden_units)/np.sqrt(10)]
75 |     net_c.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05)
76 |     print "Result on test data: %s / %s" % (
77 |         net_c.evaluate(test_inputs, actual_test_results), len(test_inputs))
78 |     print "Training a network with %s items of training data" % n_labeled_inputs
79 |     net = Network([784, hidden_units, 10])
80 |     net.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05)
81 |     print "Result on test data: %s / %s" % (
82 |         net.evaluate(test_inputs, actual_test_results), len(test_inputs))
83 |     return net_c
84 | 


--------------------------------------------------------------------------------
/src/old/mnist_pca.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mnist_pca
 3 | ~~~~~~~~~
 4 | 
 5 | Use PCA to reconstruct some of the MNIST test digits.
 6 | """
 7 | 
 8 | # My libraries
 9 | import mnist_loader
10 | 
11 | # Third-party libraries
12 | import matplotlib
13 | import matplotlib.pyplot as plt
14 | import numpy as np
15 | from sklearn.decomposition import RandomizedPCA
16 | 
17 | 
18 | # Training
19 | training_data, test_inputs, actual_test_results = mnist_loader.load_data_nn()
20 | pca = RandomizedPCA(n_components=30)
21 | nn_images = [x for (x, y) in training_data]
22 | pca_images = np.concatenate(nn_images, axis=1).transpose()
23 | pca_r = pca.fit(pca_images)
24 | 
25 | # Try PCA on first ten test images
26 | test_images = np.array(test_inputs[:10]).reshape((10,784))
27 | test_outputs = pca_r.inverse_transform(pca_r.transform(test_images))
28 | 
29 | # Plot the first ten test images and the corresponding outputs
30 | fig = plt.figure()
31 | ax = fig.add_subplot(111)
32 | images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)]
33 | images_out = [test_outputs[j].reshape(-1, 28) for j in range(10)]
34 | image_in = np.concatenate(images_in, axis=1)
35 | image_out = np.concatenate(images_out, axis=1)
36 | image = np.concatenate([image_in, image_out])
37 | ax.matshow(image, cmap = matplotlib.cm.binary)
38 | plt.xticks(np.array([]))
39 | plt.yticks(np.array([]))
40 | plt.show()
41 | 


--------------------------------------------------------------------------------
/src/old/perceptron_learning.py:
--------------------------------------------------------------------------------
 1 | """
 2 | perceptron_learning
 3 | ~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Demonstrates how a perceptron can learn the NAND gate, using the
 6 | perceptron learning algorithm."""
 7 | 
 8 | #### Libraries
 9 | # Third-party library
10 | import numpy as np
11 | 
12 | class Perceptron(object):
13 |     """ A Perceptron instance can take a function and attempt to
14 |     ``learn`` a bias and set of weights that compute that function,
15 |     using the perceptron learning algorithm."""
16 | 
17 |     def __init__(self, num_inputs=2):
18 |         """ Initialize the perceptron with the bias and all weights
19 |         set to 0.0. ``num_inputs`` is the number of input bits to the
20 |         perceptron."""
21 |         self.num_inputs = num_inputs
22 |         self.bias = 0.0
23 |         self.weights = np.zeros(num_inputs)
24 |         # self.inputs is a convenience attribute.  It's a list containing
25 |         # all possible binary inputs to the perceptron.  E.g., for three
26 |         # inputs it is: [np.array([0, 0, 0]), np.array([0, 0, 1]), ...]
27 |         self.inputs = [np.array([int(y)
28 |                         for y in bin(x).lstrip("0b").zfill(num_inputs)])
29 |                        for x in xrange(2**num_inputs)]
30 | 
31 |     def output(self, x):
32 |         """ Return the output (0 or 1) from the perceptron, with input
33 |         ``x``."""
34 |         return 1 if np.inner(self.weights, x)+self.bias > 0 else 0
35 | 
36 |     def learn(self, f, eta=0.1):
37 |         """ Find a bias and a set of weights for a perceptron that
38 |         computes the function ``f``. ``eta`` is the learning rate, and
39 |         should be a small positive number.  Does not terminate when
40 |         the function cannot be computed using a perceptron."""
41 |         # initialize the bias and weights with random values
42 |         self.bias = np.random.normal()
43 |         self.weights = np.random.randn(self.num_inputs)
44 |         number_of_errors = -1
45 |         while number_of_errors != 0:
46 |             number_of_errors = 0
47 |             print "Beginning iteration"
48 |             print "Bias: {:.3f}".format(self.bias)
49 |             print "Weights:", ", ".join(
50 |                 "{:.3f}".format(wt) for wt in self.weights)
51 |             for x in self.inputs:
52 |                 error = f(x)-self.output(x)
53 |                 if error:
54 |                     number_of_errors += 1
55 |                     self.bias = self.bias+eta*error
56 |                     self.weights = self.weights+eta*error*x
57 |             print "Number of errors:", number_of_errors, "\n"
58 | 
59 | def f(x):
60 |     """ Target function for the perceptron learning algorithm.  I've
61 |     chosen the NAND gate, but any function is okay, with the caveat
62 |     that the algorithm won't terminate if ``f`` cannot be computed by
63 |     a perceptron."""
64 |     return int(not (x[0] and x[1]))
65 | 
66 | if __name__ == "__main__":
67 |     Perceptron(2).learn(f, 0.1)
68 | 


--------------------------------------------------------------------------------
/src/test_mnist.py:
--------------------------------------------------------------------------------
1 | import network
2 | import mnist_loader
3 | 
4 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper();
5 | 
6 | net = network.Network([784, 100, 10]);
7 | net.SGD(training_data, 30, 10, 0.1, test_data=test_data);


--------------------------------------------------------------------------------