├── .gitignore
├── LICENSE
├── README.md
├── demos-for-talks
    ├── AlexNet.ipynb
    ├── Keras_MNIST_ConvNet.ipynb
    ├── VGGNet.ipynb
    ├── imdb_lstm.ipynb
    └── simple_dnn.ipynb
├── neural-networks-and-deep-learning
    ├── .gitignore
    ├── README.md
    ├── data
    │   └── mnist.pkl.gz
    ├── fig
    │   ├── backprop_magnitude_nabla.png
    │   ├── backprop_magnitude_nabla.py
    │   ├── data_1000.json
    │   ├── digits.png
    │   ├── digits_separate.png
    │   ├── false_minima.png
    │   ├── false_minima.py
    │   ├── generate_gradient.py
    │   ├── initial_gradient.json
    │   ├── misleading_gradient.png
    │   ├── misleading_gradient.py
    │   ├── misleading_gradient_contours.png
    │   ├── misleading_gradient_contours.py
    │   ├── mnist.py
    │   ├── mnist_100_digits.png
    │   ├── mnist_2_and_1.png
    │   ├── mnist_complete_zero.png
    │   ├── mnist_first_digit.png
    │   ├── mnist_other_features.png
    │   ├── mnist_really_bad_images.png
    │   ├── mnist_top_left_feature.png
    │   ├── more_data.json
    │   ├── more_data.png
    │   ├── more_data.py
    │   ├── more_data_5.png
    │   ├── more_data_comparison.png
    │   ├── more_data_log.png
    │   ├── more_data_rotated_5.png
    │   ├── more_data_svm.json
    │   ├── multiple_eta.json
    │   ├── multiple_eta.png
    │   ├── multiple_eta.py
    │   ├── norms_during_training_2_layers.json
    │   ├── norms_during_training_3_layers.json
    │   ├── norms_during_training_4_layers.json
    │   ├── overfitting.json
    │   ├── overfitting.py
    │   ├── overfitting1.png
    │   ├── overfitting2.png
    │   ├── overfitting3.png
    │   ├── overfitting4.png
    │   ├── overfitting_full.json
    │   ├── overfitting_full.png
    │   ├── pca_hard_data.png
    │   ├── pca_hard_data_fit.png
    │   ├── pca_limitations.py
    │   ├── regularized.json
    │   ├── regularized1.png
    │   ├── regularized2.png
    │   ├── regularized_full.json
    │   ├── regularized_full.png
    │   ├── replaced_by_d3
    │   │   ├── README.md
    │   │   ├── relu.png
    │   │   ├── relu.py
    │   │   ├── sigmoid.png
    │   │   ├── sigmoid.py
    │   │   ├── step.png
    │   │   ├── step.py
    │   │   ├── tanh.png
    │   │   └── tanh.py
    │   ├── serialize_images_to_json.py
    │   ├── test.png
    │   ├── training_speed_2_layers.png
    │   ├── training_speed_3_layers.png
    │   ├── training_speed_4_layers.png
    │   ├── valley.png
    │   ├── valley.py
    │   ├── valley2.png
    │   ├── valley2.py
    │   ├── weight_initialization.py
    │   ├── weight_initialization_100.json
    │   ├── weight_initialization_100.png
    │   ├── weight_initialization_30.json
    │   └── weight_initialization_30.png
    ├── requirements.txt
    └── src
    │   ├── conv.py
    │   ├── expand_mnist.py
    │   ├── mnist_average_darkness.py
    │   ├── mnist_loader.py
    │   ├── mnist_svm.py
    │   ├── network.py
    │   ├── network2.py
    │   ├── network3.py
    │   ├── old
    │       ├── blog
    │       │   ├── __init__.py
    │       │   └── common_knowledge.py
    │       ├── cost_vs_iterations.png
    │       ├── cost_vs_iterations_trapped.png
    │       ├── deep_autoencoder.py
    │       ├── deep_learning.py
    │       ├── gradient_descent_hack.py
    │       ├── mnist_100_30_deep_autoencoder.png
    │       ├── mnist_100_unit_autoencoder.png
    │       ├── mnist_10_unit_autoencoder.png
    │       ├── mnist_30_component_pca.png
    │       ├── mnist_30_unit_autoencoder.png
    │       ├── mnist_autoencoder.py
    │       ├── mnist_pca.py
    │       └── perceptron_learning.py
    │   ├── run_network.ipynb
    │   └── run_network.py
├── nn-from-scratch
    ├── MNIST-loader.ipynb
    ├── MNIST-nn-SGD-flex_arch.ipynb
    ├── MNIST-nn-SGD.ipynb
    ├── MNIST-nn-scipy.ipynb
    ├── README.md
    └── data
    │   ├── gzips
    │       ├── t10k-images-idx3-ubyte.gz
    │       ├── t10k-labels-idx1-ubyte.gz
    │       ├── train-images-idx3-ubyte.gz
    │       └── train-labels-idx1-ubyte.gz
    │   └── pickled
    │       ├── xtest.pickle
    │       ├── xtrain.pickle
    │       ├── xval.pickle
    │       ├── ytest.pickle
    │       ├── ytrain.pickle
    │       └── yval.pickle
├── slides
    ├── 2017-02-07__katya_vasilaky__ridge_regression.pdf
    ├── 2017-02-07__raphaela_sapire__billion_dollar_AI.pdf
    ├── 2017-03-06__grant_beyleveld__u_net.pdf
    ├── 2017-03-27__karl_habermas__CS224d_assignment1.pdf
    ├── 2017-04-19__claudia_perlich__predictability.pdf
    ├── 2017-10-17__thomas_balestri__reinforcement_learning.pdf
    ├── 2017-12-09__keng_laura__RL.pdf
    ├── 2019-10-16_dmitri_nesterenko__Capsule_Nets.pdf
    └── 2019-10-16_grant_beyleveld__BERT.pdf
├── weekly-work
    ├── week1
    │   ├── MNIST_for_beginners.ipynb
    │   ├── MNIST_for_beginners.py
    │   ├── README.md
    │   ├── basic_usage.ipynb
    │   ├── basic_usage.py
    │   ├── deep_MNIST.ipynb
    │   ├── deep_MNIST.py
    │   ├── exercise3.py
    │   ├── get_started.ipynb
    │   ├── get_started.py
    │   └── softmax_vs_convolutional_nn.py
    ├── week10
    │   └── README.md
    ├── week11
    │   ├── README.md
    │   └── sutskever_et_al_2014__PCA.png
    ├── week12
    │   ├── README.md
    │   └── img
    │   │   ├── CNN_feature_map.png
    │   │   ├── CTC_peaks.png
    │   │   ├── GRU_gates.png
    │   │   ├── GRU_shortcut.png
    │   │   ├── GRU_visualisation.png
    │   │   ├── LSTM_secret.png
    │   │   ├── RNN_visualisation.png
    │   │   ├── RNNs_vs_CNNs.png
    │   │   ├── are_languages_recursive.png
    │   │   ├── attention_for_long_sentences_plot.png
    │   │   ├── attn_hidden_state.png
    │   │   ├── bilinear_form.png
    │   │   ├── bldg_on_WVSMs.png
    │   │   ├── choosing_better_targets.png
    │   │   ├── choosing_output_targets.png
    │   │   ├── decoding.png
    │   │   ├── doubly_attention.png
    │   │   ├── end_to_end_ASR_as_model.png
    │   │   ├── global_vs_local.png
    │   │   ├── heres_the-church_here_are_the_people.png
    │   │   ├── learned_tree_structure.png
    │   │   ├── lstm_vs_rnn_127.png
    │   │   ├── lstm_vs_rnn_32.png
    │   │   ├── nn_ASR.png
    │   │   ├── octopus-gan.gif
    │   │   ├── phrases_in_vector_space.png
    │   │   ├── recursive_vs_recurrent_NN.png
    │   │   ├── scoring_attention.png
    │   │   ├── sentiment_distributions.png
    │   │   ├── seq2seq_ASR.png
    │   │   ├── seq2seq_ASR_attn.png
    │   │   ├── single_layer_CNN.png
    │   │   ├── traditional_ASR.png
    │   │   └── what_is_a_convolution.png
    ├── week13
    │   ├── README.md
    │   └── img
    │   │   ├── QA_independence.png
    │   │   ├── SNLI_results.png
    │   │   ├── SPINN.png
    │   │   ├── arch_search_2.png
    │   │   ├── arch_search_3.png
    │   │   ├── arch_search_4.png
    │   │   ├── architecture_search.png
    │   │   ├── b_cubed.png
    │   │   ├── chunking_training.png
    │   │   ├── diff_inputs.png
    │   │   ├── dynamic_memory_network.png
    │   │   ├── episodic_module.png
    │   │   ├── harder_questions.png
    │   │   ├── inference_corpus.png
    │   │   ├── input_module.png
    │   │   ├── more_qa_examples.png
    │   │   ├── obstacle_1.png
    │   │   ├── obstacle_2.png
    │   │   ├── pointer_mixture.png
    │   │   ├── qa_examples.png
    │   │   ├── question_module.png
    │   │   ├── semantic_relatedness.png
    │   │   ├── sharper_attn.png
    │   │   ├── state_of_the_art.png
    │   │   ├── tackling_joint_training.png
    │   │   ├── tennis_Qs.png
    │   │   ├── touch.txt
    │   │   ├── tying_word_vectors.png
    │   │   ├── visual_attn.png
    │   │   ├── visual_attn_2.png
    │   │   ├── visual_attn_3.png
    │   │   ├── where_SPINN_is_better.png
    │   │   ├── writing_systems.png
    │   │   ├── ws_2.png
    │   │   └── ws_3.png
    ├── week14
    │   ├── README.md
    │   └── img
    │   │   ├── WnT1.png
    │   │   ├── WnT2.png
    │   │   ├── WnT3.png
    │   │   ├── emmaRL.png
    │   │   ├── finn1.png
    │   │   ├── finn1617.png
    │   │   ├── finn2.png
    │   │   ├── markovDP.png
    │   │   ├── oh15.png
    │   │   ├── silverVenn.png
    │   │   └── tan14.png
    ├── week15
    │   ├── README.md
    │   └── img
    │   │   ├── Q-star.png
    │   │   ├── Qvalue-fxn.png
    │   │   ├── atari-case-study.png
    │   │   ├── atari-case-study2.png
    │   │   ├── bellman-exn.png
    │   │   ├── dnn-for-q-learning.png
    │   │   ├── grid-world-1.png
    │   │   ├── grid-world-2.png
    │   │   ├── mdp-defn.png
    │   │   ├── mdp-process.png
    │   │   ├── policy-grad-defn.png
    │   │   ├── q-learning-fxn.png
    │   │   ├── reinforce-in-axn.png
    │   │   ├── reinforce-in-axn2.png
    │   │   ├── value-fxn.png
    │   │   └── value-itn-algo.png
    ├── week16
    │   └── README.md
    ├── week17
    │   └── README.md
    ├── week2
    │   ├── MNIST_hierarchical_rnn.ipynb
    │   ├── README.md
    │   ├── getting_started_with_keras.ipynb
    │   └── reuters_mlp.ipynb
    ├── week3
    │   └── README.md
    ├── week4
    │   └── README.md
    ├── week5
    │   ├── README.md
    │   └── network3.ipynb
    ├── week6
    │   └── README.md
    ├── week7
    │   └── README.md
    ├── week8
    │   └── README.md
    └── week9
    │   ├── 02_pros_and_cons_of_counting_vs_w2v.png
    │   ├── 03_05_GloVe_visualizations_gender.png
    │   ├── 03_06_GloVe_visualizations_CEO.png
    │   ├── 03_07_GloVe_visualizations_superlatives.png
    │   ├── 2017_02_skipgram_diagram.png
    │   ├── 2017_02_softmax.png
    │   ├── 2017_02_w2v_dot_products.png
    │   ├── 2017_02_word2vec_definition.png
    │   ├── README.md
    │   ├── fun_glove_expressions.png
    │   └── w2v_objective_fxn.png
└── wiki-resources
    ├── 3.jpg
    ├── 5_cropped.jpg
    ├── IMG_2697.jpeg
    ├── IMG_5959.JPG
    ├── IMG_5974.JPG
    ├── IMG_7624.JPG
    ├── IMG_7641.JPG
    ├── IMG_9147.JPG
    ├── dlsg-dli-authors.jpg
    ├── dlsg-xvii.jpg
    ├── gitflow.png
    ├── jk-at-dlsg-xviii.jpg
    └── session_XI_crew.JPG


/.gitignore:
--------------------------------------------------------------------------------
 1 | MNIST_data/
 2 | demos-for-talks/mnist/
 3 | 
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | *$py.class
 8 | 
 9 | # C extensions
10 | *.so
11 | 
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | 
30 | # PyInstaller
31 | #  Usually these files are written by a python script from a template
32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *,cover
49 | .hypothesis/
50 | 
51 | # Translations
52 | *.mo
53 | *.pot
54 | 
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 | 
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 | 
63 | # Scrapy stuff:
64 | .scrapy
65 | 
66 | # Sphinx documentation
67 | docs/_build/
68 | 
69 | # PyBuilder
70 | target/
71 | 
72 | # IPython Notebook
73 | .ipynb_checkpoints
74 | 
75 | # pyenv
76 | .python-version
77 | 
78 | # celery beat schedule file
79 | celerybeat-schedule
80 | 
81 | # dotenv
82 | .env
83 | 
84 | # virtualenv
85 | venv/
86 | ENV/
87 | 
88 | # Spyder project settings
89 | .spyderproject
90 | 
91 | # Rope project settings
92 | .ropeproject
93 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 jonkrohn
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.org
3 | *.pem
4 | *.pkl
5 | *.pyc
6 | .DS_Store
7 | loc.py
8 | src/ec2
9 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/README.md:
--------------------------------------------------------------------------------
 1 | # Code samples for "Neural Networks and Deep Learning"
 2 | 
 3 | This repository contains code samples for my (forthcoming) book on
 4 | "Neural Networks and Deep Learning".
 5 | 
 6 | As the code is written to accompany the book, I don't intend to add
 7 | new features.  However, bug reports are welcome, and you should feel
 8 | free to fork and modify the code.
 9 | 
10 | ## License
11 | 
12 | MIT License
13 | 
14 | Copyright (c) 2012-2015 Michael Nielsen
15 | 
16 | Permission is hereby granted, free of charge, to any person obtaining
17 | a copy of this software and associated documentation files (the
18 | "Software"), to deal in the Software without restriction, including
19 | without limitation the rights to use, copy, modify, merge, publish,
20 | distribute, sublicense, and/or sell copies of the Software, and to
21 | permit persons to whom the Software is furnished to do so, subject to
22 | the following conditions:
23 | 
24 | The above copyright notice and this permission notice shall be
25 | included in all copies or substantial portions of the Software.
26 | 
27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
31 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
32 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
33 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/data/mnist.pkl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/data/mnist.pkl.gz


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/backprop_magnitude_nabla.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/backprop_magnitude_nabla.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/backprop_magnitude_nabla.py:
--------------------------------------------------------------------------------
 1 | """
 2 | backprop_magnitude_nabla
 3 | ~~~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Using backprop2 I constructed a 784-30-30-30-30-30-10 network to classify
 6 | MNIST data.  I ran ten mini-batches of size 100, with eta = 0.01 and
 7 | lambda = 0.05, using:
 8 | 
 9 | net.SGD(otd[:1000], 1, 100, 0.01, 0.05,
10 | 
11 | I obtained the following norms for the (unregularized) nabla_w for the
12 | respective mini-batches:
13 | 
14 | [0.90845722175923671, 2.8852730656073566, 10.696793986223632, 37.75701921183488, 157.7365422527995, 304.43990075227839]
15 | [0.22493835119537842, 0.6555126517964851, 2.6036801277234076, 11.408825365731225, 46.882319190445472, 70.499637502698221]
16 | [0.11935180022357521, 0.19756069137133489, 0.8152794148335869, 3.4590802543293977, 15.470507965493903, 31.032396017142556]
17 | [0.15130005837653659, 0.39687135985664701, 1.4810006139254532, 4.392519005642268, 16.831939776937311, 34.082104455938733]
18 | [0.11594085276308999, 0.17177668061395848, 0.72204558746599512, 3.05062409378366, 14.133001132214286, 29.776204839994385]
19 | [0.10790389807606221, 0.20707152756018626, 0.96348134037828603, 3.9043824079499561, 15.986873430586924, 39.195258080490895]
20 | [0.088613291101645356, 0.129173436407863, 0.4242933114455002, 1.6154682713449411, 7.5451567587160069, 20.180545544006566]
21 | [0.086175380639289575, 0.12571016850457151, 0.44231149185805047, 1.8435833504677326, 7.61973813981073, 19.474539356281781]
22 | [0.095372080184163904, 0.15854489503205446, 0.70244235144444678, 2.6294803575724157, 10.427062019753425, 24.309420272033819]
23 | [0.096453131000155692, 0.13574642196947601, 0.53551377709415471, 2.0247466793066895, 9.4503978546018068, 21.73772148470092]
24 | 
25 | Note that results are listed in order of layer.  They clearly show how
26 | the magnitude of nabla_w decreases as we go back through layers.
27 | 
28 | In this program I take min-batches 7, 8, 9 as representative and plot
29 | them.  I omit the results from the first and final layers since they
30 | correspond to 784 input neurons and 10 output neurons, not 30 as in
31 | the other layers, making it difficult to compare results.
32 | 
33 | Note that I haven't attempted to preserve the whole workflow here. It
34 | involved some minor hacking around with backprop2, which messed up
35 | that code.  That's why I've simply put the results in by hand below.
36 | """
37 | 
38 | # Third-party libraries
39 | import matplotlib.pyplot as plt
40 | 
41 | nw1 = [0.129173436407863, 0.4242933114455002, 
42 |        1.6154682713449411, 7.5451567587160069]
43 | nw2 = [0.12571016850457151, 0.44231149185805047, 
44 |        1.8435833504677326, 7.61973813981073]
45 | nw3 = [0.15854489503205446, 0.70244235144444678, 
46 |        2.6294803575724157, 10.427062019753425]
47 | plt.plot(range(1, 5), nw1, "ro-", range(1, 5), nw2, "go-", 
48 |          range(1, 5), nw3, "bo-")
49 | plt.xlabel('Layer $l$')
50 | plt.ylabel(r"$\Vert\nabla C^l_w\Vert$")
51 | plt.xticks([1, 2, 3, 4])
52 | plt.show()
53 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/digits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/digits.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/digits_separate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/digits_separate.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/false_minima.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/false_minima.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/false_minima.py:
--------------------------------------------------------------------------------
 1 | """
 2 | false_minimum
 3 | ~~~~~~~~~~~~~
 4 | 
 5 | Plots a function of two variables with many false minima."""
 6 | 
 7 | #### Libraries
 8 | # Third party libraries
 9 | from matplotlib.ticker import LinearLocator
10 | # Note that axes3d is not explicitly used in the code, but is needed
11 | # to register the 3d plot type correctly
12 | from mpl_toolkits.mplot3d import axes3d 
13 | import matplotlib.pyplot as plt
14 | import numpy
15 | 
16 | fig = plt.figure()
17 | ax = fig.gca(projection='3d')
18 | X = numpy.arange(-5, 5, 0.1)
19 | Y = numpy.arange(-5, 5, 0.1)
20 | X, Y = numpy.meshgrid(X, Y)
21 | Z = numpy.sin(X)*numpy.sin(Y)+0.2*X
22 | 
23 | colortuple = ('w', 'b')
24 | colors = numpy.empty(X.shape, dtype=str)
25 | for x in xrange(len(X)):
26 |     for y in xrange(len(Y)):
27 |         colors[x, y] = colortuple[(x + y) % 2]
28 | 
29 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
30 |         linewidth=0)
31 | 
32 | ax.set_xlim3d(-5, 5)
33 | ax.set_ylim3d(-5, 5)
34 | ax.set_zlim3d(-2, 2)
35 | ax.w_xaxis.set_major_locator(LinearLocator(3))
36 | ax.w_yaxis.set_major_locator(LinearLocator(3))
37 | ax.w_zaxis.set_major_locator(LinearLocator(3))
38 | 
39 | plt.show()
40 | 
41 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/generate_gradient.py:
--------------------------------------------------------------------------------
  1 | """generate_gradient.py
  2 | ~~~~~~~~~~~~~~~~~~~~~~~
  3 | 
  4 | Use network2 to figure out the average starting values of the gradient
  5 | error terms \delta^l_j = \partial C / \partial z^l_j = \partial C /
  6 | \partial b^l_j.
  7 | 
  8 | """
  9 | 
 10 | #### Libraries
 11 | # Standard library
 12 | import json
 13 | import math
 14 | import random
 15 | import shutil
 16 | import sys
 17 | sys.path.append("../src/")
 18 | 
 19 | # My library
 20 | import mnist_loader
 21 | import network2
 22 | 
 23 | # Third-party libraries
 24 | import matplotlib.pyplot as plt
 25 | import numpy as np
 26 | 
 27 | def main():
 28 |     # Load the data
 29 |     full_td, _, _ = mnist_loader.load_data_wrapper()
 30 |     td = full_td[:1000] # Just use the first 1000 items of training data
 31 |     epochs = 500 # Number of epochs to train for
 32 | 
 33 |     print "\nTwo hidden layers:"
 34 |     net = network2.Network([784, 30, 30, 10])
 35 |     initial_norms(td, net)
 36 |     abbreviated_gradient = [
 37 |         ag[:6] for ag in get_average_gradient(net, td)[:-1]] 
 38 |     print "Saving the averaged gradient for the top six neurons in each "+\
 39 |         "layer.\nWARNING: This will affect the look of the book, so be "+\
 40 |         "sure to check the\nrelevant material (early chapter 5)."
 41 |     f = open("initial_gradient.json", "w")
 42 |     json.dump(abbreviated_gradient, f)
 43 |     f.close()
 44 |     shutil.copy("initial_gradient.json", "../../js/initial_gradient.json")
 45 |     training(td, net, epochs, "norms_during_training_2_layers.json")
 46 |     plot_training(
 47 |         epochs, "norms_during_training_2_layers.json", 2)
 48 | 
 49 |     print "\nThree hidden layers:"
 50 |     net = network2.Network([784, 30, 30, 30, 10])
 51 |     initial_norms(td, net)
 52 |     training(td, net, epochs, "norms_during_training_3_layers.json")
 53 |     plot_training(
 54 |         epochs, "norms_during_training_3_layers.json", 3)
 55 | 
 56 |     print "\nFour hidden layers:"
 57 |     net = network2.Network([784, 30, 30, 30, 30, 10])
 58 |     initial_norms(td, net)
 59 |     training(td, net, epochs, 
 60 |              "norms_during_training_4_layers.json")
 61 |     plot_training(
 62 |         epochs, "norms_during_training_4_layers.json", 4)
 63 | 
 64 | def initial_norms(training_data, net):
 65 |     average_gradient = get_average_gradient(net, training_data)
 66 |     norms = [list_norm(avg) for avg in average_gradient[:-1]]
 67 |     print "Average gradient for the hidden layers: "+str(norms)
 68 |     
 69 | def training(training_data, net, epochs, filename):
 70 |     norms = []
 71 |     for j in range(epochs):
 72 |         average_gradient = get_average_gradient(net, training_data)
 73 |         norms.append([list_norm(avg) for avg in average_gradient[:-1]])
 74 |         print "Epoch: %s" % j
 75 |         net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0)
 76 |     f = open(filename, "w")
 77 |     json.dump(norms, f)
 78 |     f.close()
 79 | 
 80 | def plot_training(epochs, filename, num_layers):
 81 |     f = open(filename, "r")
 82 |     norms = json.load(f)
 83 |     f.close()
 84 |     fig = plt.figure()
 85 |     ax = fig.add_subplot(111)
 86 |     colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"]
 87 |     for j in range(num_layers):
 88 |         ax.plot(np.arange(epochs), 
 89 |                 [n[j] for n in norms], 
 90 |                 color=colors[j],
 91 |                 label="Hidden layer %s" % (j+1,))
 92 |     ax.set_xlim([0, epochs])
 93 |     ax.grid(True)
 94 |     ax.set_xlabel('Number of epochs of training')
 95 |     ax.set_title('Speed of learning: %s hidden layers' % num_layers)
 96 |     ax.set_yscale('log')
 97 |     plt.legend(loc="upper right")
 98 |     fig_filename = "training_speed_%s_layers.png" % num_layers
 99 |     plt.savefig(fig_filename)
100 |     shutil.copy(fig_filename, "../../images/"+fig_filename)
101 |     plt.show()
102 | 
103 | def get_average_gradient(net, training_data):
104 |     nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data]
105 |     gradient = list_sum(nabla_b_results)
106 |     return [(np.reshape(g, len(g))/len(training_data)).tolist() 
107 |             for g in gradient]
108 | 
109 | def zip_sum(a, b): 
110 |     return [x+y for (x, y) in zip(a, b)]
111 | 
112 | def list_sum(l):
113 |     return reduce(zip_sum, l)
114 | 
115 | def list_norm(l):
116 |     return math.sqrt(sum([x*x for x in l]))
117 | 
118 | if __name__ == "__main__":
119 |     main()
120 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/initial_gradient.json:
--------------------------------------------------------------------------------
1 | [[-0.003970677333144113, -0.0031684316985881185, 0.008103235909196014, 0.012598010584130365, -0.026465907331998335, 0.0017583319323150341], [0.04152906589960523, 0.044025552524932406, -0.009669682279354514, 0.046736871369353235, 0.03877302528270452, 0.012336459551975156]]


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/misleading_gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/misleading_gradient.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/misleading_gradient.py:
--------------------------------------------------------------------------------
 1 | """
 2 | misleading_gradient
 3 | ~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Plots a function which misleads the gradient descent algorithm."""
 6 | 
 7 | #### Libraries
 8 | # Third party libraries
 9 | from matplotlib.ticker import LinearLocator
10 | # Note that axes3d is not explicitly used in the code, but is needed
11 | # to register the 3d plot type correctly
12 | from mpl_toolkits.mplot3d import axes3d 
13 | import matplotlib.pyplot as plt
14 | import numpy
15 | 
16 | fig = plt.figure()
17 | ax = fig.gca(projection='3d')
18 | X = numpy.arange(-1, 1, 0.025)
19 | Y = numpy.arange(-1, 1, 0.025)
20 | X, Y = numpy.meshgrid(X, Y)
21 | Z = X**2 + 10*Y**2
22 | 
23 | colortuple = ('w', 'b')
24 | colors = numpy.empty(X.shape, dtype=str)
25 | for x in xrange(len(X)):
26 |     for y in xrange(len(Y)):
27 |         colors[x, y] = colortuple[(x + y) % 2]
28 | 
29 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
30 |         linewidth=0)
31 | 
32 | ax.set_xlim3d(-1, 1)
33 | ax.set_ylim3d(-1, 1)
34 | ax.set_zlim3d(0, 12)
35 | ax.w_xaxis.set_major_locator(LinearLocator(3))
36 | ax.w_yaxis.set_major_locator(LinearLocator(3))
37 | ax.w_zaxis.set_major_locator(LinearLocator(3))
38 | ax.text(0.05, -1.8, 0, "$w_1$", fontsize=20)
39 | ax.text(1.5, -0.25, 0, "$w_2$", fontsize=20)
40 | ax.text(1.79, 0, 9.62, "$C$", fontsize=20)
41 | 
42 | plt.show()
43 | 
44 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/misleading_gradient_contours.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/misleading_gradient_contours.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/misleading_gradient_contours.py:
--------------------------------------------------------------------------------
 1 | """
 2 | misleading_gradient_contours
 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Plots the contours of the function from misleading_gradient.py"""
 6 | 
 7 | #### Libraries
 8 | # Third party libraries
 9 | import matplotlib.pyplot as plt
10 | import numpy
11 | 
12 | X = numpy.arange(-1, 1, 0.02)
13 | Y = numpy.arange(-1, 1, 0.02)
14 | X, Y = numpy.meshgrid(X, Y)
15 | Z = X**2 + 10*Y**2
16 | 
17 | plt.figure()
18 | CS = plt.contour(X, Y, Z, levels=[0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
19 | plt.xlabel("$w_1$", fontsize=16)
20 | plt.ylabel("$w_2$", fontsize=16)
21 | plt.show()
22 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/mnist_100_digits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_100_digits.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/mnist_2_and_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_2_and_1.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/mnist_complete_zero.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_complete_zero.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/mnist_first_digit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_first_digit.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/mnist_other_features.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_other_features.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/mnist_really_bad_images.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_really_bad_images.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/mnist_top_left_feature.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_top_left_feature.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/more_data.json:
--------------------------------------------------------------------------------
1 | [69.09, 76.37, 85.29, 88.85, 91.27, 93.24, 94.89, 95.85, 95.97]


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/more_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/more_data.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/more_data.py:
--------------------------------------------------------------------------------
  1 | """more_data
  2 | ~~~~~~~~~~~~
  3 | 
  4 | Plot graphs to illustrate the performance of MNIST when different size
  5 | training sets are used.
  6 | 
  7 | """
  8 | 
  9 | # Standard library
 10 | import json
 11 | import random
 12 | import sys
 13 | 
 14 | # My library
 15 | sys.path.append('../src/')
 16 | import mnist_loader
 17 | import network2
 18 | 
 19 | # Third-party libraries
 20 | import matplotlib.pyplot as plt
 21 | import numpy as np
 22 | from sklearn import svm
 23 | 
 24 | # The sizes to use for the different training sets
 25 | SIZES = [100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000] 
 26 | 
 27 | def main():
 28 |     run_networks()
 29 |     run_svms()
 30 |     make_plots()
 31 |                        
 32 | def run_networks():
 33 |     # Make results more easily reproducible
 34 |     random.seed(12345678)
 35 |     np.random.seed(12345678)
 36 |     training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
 37 |     net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost())
 38 |     accuracies = []
 39 |     for size in SIZES:
 40 |         print "\n\nTraining network with data set size %s" % size
 41 |         net.large_weight_initializer()
 42 |         num_epochs = 1500000 / size 
 43 |         net.SGD(training_data[:size], num_epochs, 10, 0.5, lmbda = size*0.0001)
 44 |         accuracy = net.accuracy(validation_data) / 100.0
 45 |         print "Accuracy was %s percent" % accuracy
 46 |         accuracies.append(accuracy)
 47 |     f = open("more_data.json", "w")
 48 |     json.dump(accuracies, f)
 49 |     f.close()
 50 | 
 51 | def run_svms():
 52 |     svm_training_data, svm_validation_data, svm_test_data \
 53 |         = mnist_loader.load_data()
 54 |     accuracies = []
 55 |     for size in SIZES:
 56 |         print "\n\nTraining SVM with data set size %s" % size
 57 |         clf = svm.SVC()
 58 |         clf.fit(svm_training_data[0][:size], svm_training_data[1][:size])
 59 |         predictions = [int(a) for a in clf.predict(svm_validation_data[0])]
 60 |         accuracy = sum(int(a == y) for a, y in 
 61 |                        zip(predictions, svm_validation_data[1])) / 100.0
 62 |         print "Accuracy was %s percent" % accuracy
 63 |         accuracies.append(accuracy)
 64 |     f = open("more_data_svm.json", "w")
 65 |     json.dump(accuracies, f)
 66 |     f.close()
 67 | 
 68 | def make_plots():
 69 |     f = open("more_data.json", "r")
 70 |     accuracies = json.load(f)
 71 |     f.close()
 72 |     f = open("more_data_svm.json", "r")
 73 |     svm_accuracies = json.load(f)
 74 |     f.close()
 75 |     make_linear_plot(accuracies)
 76 |     make_log_plot(accuracies)
 77 |     make_combined_plot(accuracies, svm_accuracies)
 78 | 
 79 | def make_linear_plot(accuracies):
 80 |     fig = plt.figure()
 81 |     ax = fig.add_subplot(111)
 82 |     ax.plot(SIZES, accuracies, color='#2A6EA6')
 83 |     ax.plot(SIZES, accuracies, "o", color='#FFA933')
 84 |     ax.set_xlim(0, 50000)
 85 |     ax.set_ylim(60, 100)
 86 |     ax.grid(True)
 87 |     ax.set_xlabel('Training set size')
 88 |     ax.set_title('Accuracy (%) on the validation data')
 89 |     plt.show()
 90 | 
 91 | def make_log_plot(accuracies):
 92 |     fig = plt.figure()
 93 |     ax = fig.add_subplot(111)
 94 |     ax.plot(SIZES, accuracies, color='#2A6EA6')
 95 |     ax.plot(SIZES, accuracies, "o", color='#FFA933')
 96 |     ax.set_xlim(100, 50000)
 97 |     ax.set_ylim(60, 100)
 98 |     ax.set_xscale('log')
 99 |     ax.grid(True)
100 |     ax.set_xlabel('Training set size')
101 |     ax.set_title('Accuracy (%) on the validation data')
102 |     plt.show()
103 | 
104 | def make_combined_plot(accuracies, svm_accuracies):
105 |     fig = plt.figure()
106 |     ax = fig.add_subplot(111)
107 |     ax.plot(SIZES, accuracies, color='#2A6EA6')
108 |     ax.plot(SIZES, accuracies, "o", color='#2A6EA6', 
109 |             label='Neural network accuracy (%)')
110 |     ax.plot(SIZES, svm_accuracies, color='#FFA933')
111 |     ax.plot(SIZES, svm_accuracies, "o", color='#FFA933',
112 |             label='SVM accuracy (%)')
113 |     ax.set_xlim(100, 50000)
114 |     ax.set_ylim(25, 100)
115 |     ax.set_xscale('log')
116 |     ax.grid(True)
117 |     ax.set_xlabel('Training set size')
118 |     plt.legend(loc="lower right")
119 |     plt.show()
120 | 
121 | if __name__ == "__main__":
122 |     main()
123 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/more_data_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/more_data_5.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/more_data_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/more_data_comparison.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/more_data_log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/more_data_log.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/more_data_rotated_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/more_data_rotated_5.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/more_data_svm.json:
--------------------------------------------------------------------------------
1 | [25.07, 48.93, 75.13, 83.87, 88.49, 91.46, 92.45, 93.47, 94.48]


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/multiple_eta.json:
--------------------------------------------------------------------------------
1 | [[[], [], [0.87809508908377998, 0.67406552530098141, 0.59798920430275404, 0.55533015743656189, 0.51751101003208144, 0.4942033354556824, 0.47255041042913526, 0.46069879353359433, 0.44304475294352064, 0.43099562372228112, 0.42310993427766375, 0.41408265298981006, 0.40573464183982105, 0.40110722961828227, 0.39162028064538967, 0.38705015774740958, 0.38116357043417587, 0.37603986695304614, 0.37297012040237154, 0.37057334627661631, 0.36551756338853658, 0.36335674264586654, 0.35745296185579917, 0.35535960956849127, 0.35365591135061097, 0.35011353300568238, 0.34946519495897871, 0.34604661988238178, 0.34386077098862522, 0.33919980880230349], []], [[], [], [0.49501954654296704, 0.4063145129425576, 0.40482383242804637, 0.37156577828840276, 0.37380111172151681, 0.37152751786000143, 0.35371985224004426, 0.3557161388797867, 0.34323780090168027, 0.3433514311156789, 0.3367645441708797, 0.34532085892085329, 0.33506383267050244, 0.34760988079085842, 0.34921493732996928, 0.33853424834583179, 0.32837282561262077, 0.33175599401109612, 0.33132920379429243, 0.33024353325326034, 0.32736756892399654, 0.3259638557593546, 0.32004264784244907, 0.33424319076405928, 0.33878125802305081, 0.32521839878261177, 0.32679267619514646, 0.32488571435373748, 0.33056367198473002, 0.33879633130932685], []], [[], [], [0.92489293305102116, 0.83919130289246469, 0.88748421594232696, 0.79625231780396133, 0.78117959228699174, 1.1365919079387048, 0.78787239608336346, 0.76778614131217449, 0.73689525303227721, 0.80127437393519696, 0.74433665287336681, 0.73725544607013882, 0.80249602203179993, 0.85190338199210014, 0.79872168623645712, 0.80243104440756152, 0.80649160680410659, 0.81467254023600921, 0.82526467696100858, 0.75042379852601759, 0.93658673378777402, 0.88236662906752283, 0.86121396033520892, 0.72492681699401829, 0.80405009868466648, 0.83959963179208197, 0.83387510808276821, 0.88282498566307899, 0.88583473645177979, 0.86068501713490919], []]]


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/multiple_eta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/multiple_eta.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/multiple_eta.py:
--------------------------------------------------------------------------------
 1 | """multiple_eta
 2 | ~~~~~~~~~~~~~~~
 3 | 
 4 | This program shows how different values for the learning rate affect
 5 | training.  In particular, we'll plot out how the cost changes using
 6 | three different values for eta.
 7 | 
 8 | """
 9 | 
10 | # Standard library
11 | import json
12 | import random
13 | import sys
14 | 
15 | # My library
16 | sys.path.append('../src/')
17 | import mnist_loader
18 | import network2
19 | 
20 | # Third-party libraries
21 | import matplotlib.pyplot as plt
22 | import numpy as np
23 | 
24 | # Constants
25 | LEARNING_RATES = [0.025, 0.25, 2.5]
26 | COLORS = ['#2A6EA6', '#FFCD33', '#FF7033']
27 | NUM_EPOCHS = 30
28 | 
29 | def main():
30 |     run_networks()
31 |     make_plot()
32 | 
33 | def run_networks():
34 |     """Train networks using three different values for the learning rate,
35 |     and store the cost curves in the file ``multiple_eta.json``, where
36 |     they can later be used by ``make_plot``.
37 | 
38 |     """
39 |     # Make results more easily reproducible
40 |     random.seed(12345678)
41 |     np.random.seed(12345678)
42 |     training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
43 |     results = []
44 |     for eta in LEARNING_RATES:
45 |         print "\nTrain a network using eta = "+str(eta)
46 |         net = network2.Network([784, 30, 10])
47 |         results.append(
48 |             net.SGD(training_data, NUM_EPOCHS, 10, eta, lmbda=5.0,
49 |                     evaluation_data=validation_data, 
50 |                     monitor_training_cost=True))
51 |     f = open("multiple_eta.json", "w")
52 |     json.dump(results, f)
53 |     f.close()
54 | 
55 | def make_plot():
56 |     f = open("multiple_eta.json", "r")
57 |     results = json.load(f)
58 |     f.close()
59 |     fig = plt.figure()
60 |     ax = fig.add_subplot(111)
61 |     for eta, result, color in zip(LEARNING_RATES, results, COLORS):
62 |         _, _, training_cost, _ = result
63 |         ax.plot(np.arange(NUM_EPOCHS), training_cost, "o-",
64 |                 label="$\eta$ = "+str(eta),
65 |                 color=color)
66 |     ax.set_xlim([0, NUM_EPOCHS])
67 |     ax.set_xlabel('Epoch')
68 |     ax.set_ylabel('Cost')
69 |     plt.legend(loc='upper right')
70 |     plt.show()
71 | 
72 | if __name__ == "__main__":
73 |     main()
74 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/overfitting1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/overfitting1.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/overfitting2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/overfitting2.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/overfitting3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/overfitting3.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/overfitting4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/overfitting4.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/overfitting_full.json:
--------------------------------------------------------------------------------
1 | [[0.56135590058630858, 0.47806921271034553, 0.457510836259925, 0.42504920544144992, 0.39449553344420019, 0.39810448800345, 0.37017079712250733, 0.37403997639944547, 0.36290253019659285, 0.4006868170859208, 0.36817548958488616, 0.37299310675826219, 0.36871967242261605, 0.37146610246666006, 0.35704621996697938, 0.35821464151288968, 0.38622103466509744, 0.37010939716781127, 0.36539832104327125, 0.35511546847032671, 0.3828088676932585, 0.36160025922354638, 0.37028708356461698, 0.37605182846277163, 0.36634313696187393, 0.36129044456360238, 0.37531885586439506, 0.36415225595876555, 0.35707895858237054, 0.36631987373588193], [9136, 9275, 9307, 9377, 9450, 9429, 9468, 9488, 9494, 9424, 9483, 9483, 9505, 9499, 9508, 9508, 9445, 9524, 9524, 9524, 9494, 9527, 9518, 9505, 9533, 9529, 9512, 9530, 9532, 9531], [0.55994588582554705, 0.44664870303435988, 0.42455329174078477, 0.38578320429266705, 0.33992291017592285, 0.33162477096795895, 0.3137480626518645, 0.30028971890544093, 0.27353890048167528, 0.30236927117202678, 0.26487026303889277, 0.2661714884193439, 0.24734280015146709, 0.26355551438395558, 0.23088530423416964, 0.22618350577327287, 0.25137541006767478, 0.23085585354651994, 0.21417931191800957, 0.20049587923059808, 0.23713128948069295, 0.20327728799861464, 0.21953883029836488, 0.20264436321820509, 0.19643949703516961, 0.18467980669870671, 0.18788606162530633, 0.18535916502880764, 0.18466759834259142, 0.17218286758911475], [45708, 46605, 46797, 47190, 47543, 47570, 47638, 47838, 48061, 47825, 48160, 48195, 48265, 48156, 48439, 48449, 48267, 48433, 48598, 48697, 48380, 48648, 48500, 48669, 48734, 48796, 48802, 48837, 48810, 48932]]


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/overfitting_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/overfitting_full.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/pca_hard_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/pca_hard_data.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/pca_hard_data_fit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/pca_hard_data_fit.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/pca_limitations.py:
--------------------------------------------------------------------------------
 1 | """
 2 | pca_limitations
 3 | ~~~~~~~~~~~~~~~
 4 | 
 5 | Plot graphs to illustrate the limitations of PCA.
 6 | """
 7 | 
 8 | # Third-party libraries
 9 | from mpl_toolkits.mplot3d import Axes3D
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | 
13 | # Plot just the data
14 | fig = plt.figure()
15 | ax = fig.gca(projection='3d')
16 | z = np.linspace(-2, 2, 20)
17 | theta = np.linspace(-4 * np.pi, 4 * np.pi, 20)
18 | x = np.sin(theta)+0.03*np.random.randn(20)
19 | y = np.cos(theta)+0.03*np.random.randn(20)
20 | ax.plot(x, y, z, 'ro')
21 | plt.show()
22 | 
23 | # Plot the data and the helix together
24 | fig = plt.figure()
25 | ax = fig.gca(projection='3d')
26 | z_helix = np.linspace(-2, 2, 100)
27 | theta_helix = np.linspace(-4 * np.pi, 4 * np.pi, 100)
28 | x_helix = np.sin(theta_helix)
29 | y_helix = np.cos(theta_helix)
30 | ax.plot(x, y, z, 'ro')
31 | ax.plot(x_helix, y_helix, z_helix, '')
32 | plt.show()
33 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/regularized1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/regularized1.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/regularized2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/regularized2.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/regularized_full.json:
--------------------------------------------------------------------------------
1 | [[4.3072791918656037, 2.9331304641086344, 2.1348073553576041, 1.6588303607817259, 1.330889938797851, 1.1963223601928472, 1.1170765304219505, 1.0170754480838433, 0.99110935015398149, 1.0071179800661803, 0.96280080386971378, 0.99226609521675169, 0.96023984363523895, 0.97253784945751276, 0.93966545596520334, 0.95330563342376551, 0.96378529404233837, 0.97367336858037301, 0.94435985290781166, 0.94622931411839994, 0.98392022263201184, 0.94091005661041272, 0.9496551347987412, 0.94714964684453073, 0.95026655456196552, 0.92915894672179755, 0.95831053042987979, 1.0153994919718721, 0.92940339906358749, 0.97682851862658082], [9212, 9341, 9375, 9424, 9532, 9537, 9504, 9541, 9578, 9538, 9579, 9530, 9590, 9543, 9607, 9597, 9576, 9546, 9600, 9634, 9544, 9606, 9614, 9607, 9621, 9637, 9620, 9511, 9649, 9561], [1.2925405259017666, 0.92479539229795305, 0.72611252037165497, 0.61618944188425839, 0.49142410439713557, 0.46552608507795468, 0.46074829841290343, 0.40775149802551902, 0.39671750686791218, 0.42031570708192345, 0.38057096091326847, 0.40768033915334978, 0.3895210257834103, 0.40585871820346864, 0.36003072887701948, 0.37700037701783806, 0.39300003862768451, 0.40774598935627593, 0.37194215157507704, 0.3662415845761452, 0.40722309031673021, 0.36476961463606117, 0.36988528906574514, 0.36112644707329011, 0.380710641602238, 0.35700998663848571, 0.37724740623797381, 0.44991741876110503, 0.35820321110078079, 0.39226034353556583], [45919, 46835, 47204, 47434, 47989, 47930, 47839, 48157, 48218, 48105, 48313, 48089, 48282, 48111, 48463, 48362, 48243, 48123, 48416, 48533, 48123, 48483, 48435, 48548, 48434, 48524, 48417, 47797, 48561, 48235]]


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/regularized_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/regularized_full.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/replaced_by_d3/README.md:
--------------------------------------------------------------------------------
1 | # Replaced by d3 directory
2 | 
3 | This directory contains python code which generated png figures which
4 | were later replaced by d3 in the live version of the site.  They've
5 | been preserved here on the off chance that they may be of use at some
6 | point in the future.
7 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/replaced_by_d3/relu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/replaced_by_d3/relu.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/replaced_by_d3/relu.py:
--------------------------------------------------------------------------------
 1 | """
 2 | relu
 3 | ~~~~
 4 | 
 5 | Plots a graph of the squashing function used by a rectified linear
 6 | unit."""
 7 | 
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | 
11 | z = np.arange(-2, 2, .1)
12 | zero = np.zeros(len(z))
13 | y = np.max([zero, z], axis=0)
14 | 
15 | fig = plt.figure()
16 | ax = fig.add_subplot(111)
17 | ax.plot(z, y)
18 | ax.set_ylim([-2.0, 2.0])
19 | ax.set_xlim([-2.0, 2.0])
20 | ax.grid(True)
21 | ax.set_xlabel('z')
22 | ax.set_title('Rectified linear unit')
23 | 
24 | plt.show()
25 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/replaced_by_d3/sigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/replaced_by_d3/sigmoid.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/replaced_by_d3/sigmoid.py:
--------------------------------------------------------------------------------
 1 | """
 2 | sigmoid
 3 | ~~~~~~~
 4 | 
 5 | Plots a graph of the sigmoid function."""
 6 | 
 7 | import numpy
 8 | import matplotlib.pyplot as plt
 9 | 
10 | z = numpy.arange(-5, 5, .1)
11 | sigma_fn = numpy.vectorize(lambda z: 1/(1+numpy.exp(-z)))
12 | sigma = sigma_fn(z)
13 | 
14 | fig = plt.figure()
15 | ax = fig.add_subplot(111)
16 | ax.plot(z, sigma)
17 | ax.set_ylim([-0.5, 1.5])
18 | ax.set_xlim([-5,5])
19 | ax.grid(True)
20 | ax.set_xlabel('z')
21 | ax.set_title('sigmoid function')
22 | 
23 | plt.show()
24 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/replaced_by_d3/step.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/replaced_by_d3/step.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/replaced_by_d3/step.py:
--------------------------------------------------------------------------------
 1 | """
 2 | step
 3 | ~~~~~~~
 4 | 
 5 | Plots a graph of a step function."""
 6 | 
 7 | import numpy
 8 | import matplotlib.pyplot as plt
 9 | 
10 | z = numpy.arange(-5, 5, .02)
11 | step_fn = numpy.vectorize(lambda z: 1.0 if z >= 0.0 else 0.0)
12 | step = step_fn(z)
13 | 
14 | fig = plt.figure()
15 | ax = fig.add_subplot(111)
16 | ax.plot(z, step)
17 | ax.set_ylim([-0.5, 1.5])
18 | ax.set_xlim([-5,5])
19 | ax.grid(True)
20 | ax.set_xlabel('z')
21 | ax.set_title('step function')
22 | 
23 | plt.show()
24 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/replaced_by_d3/tanh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/replaced_by_d3/tanh.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/replaced_by_d3/tanh.py:
--------------------------------------------------------------------------------
 1 | """
 2 | tanh
 3 | ~~~~
 4 | 
 5 | Plots a graph of the tanh function."""
 6 | 
 7 | import numpy as np
 8 | import matplotlib.pyplot as plt
 9 | 
10 | z = np.arange(-5, 5, .1)
11 | t = np.tanh(z)
12 | 
13 | fig = plt.figure()
14 | ax = fig.add_subplot(111)
15 | ax.plot(z, t)
16 | ax.set_ylim([-1.0, 1.0])
17 | ax.set_xlim([-5,5])
18 | ax.grid(True)
19 | ax.set_xlabel('z')
20 | ax.set_title('tanh function')
21 | 
22 | plt.show()
23 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/serialize_images_to_json.py:
--------------------------------------------------------------------------------
 1 | """
 2 | serialize_images_to_json
 3 | ~~~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Utility to serialize parts of the training and validation data to JSON, 
 6 | for use with Javascript.  """
 7 | 
 8 | #### Libraries
 9 | # Standard library
10 | import json 
11 | import sys
12 | 
13 | # My library
14 | sys.path.append('../src/')
15 | import mnist_loader
16 | 
17 | # Third-party libraries
18 | import numpy as np
19 | 
20 | 
21 | # Number of training and validation data images to serialize
22 | NTD = 1000
23 | NVD = 100
24 | 
25 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
26 | 
27 | def make_data_integer(td):
28 |     # This will be slow, due to the loop.  It'd be better if numpy did
29 |     # this directly.  But numpy.rint followed by tolist() doesn't
30 |     # convert to a standard Python int.
31 |     return [int(x) for x in (td*256).reshape(784).tolist()]
32 | 
33 | data = {"training": [
34 |     {"x": [x[0] for x in training_data[j][0].tolist()],
35 |      "y": [y[0] for y in training_data[j][1].tolist()]}
36 |     for j in xrange(NTD)],
37 |         "validation": [
38 |     {"x": [x[0] for x in validation_data[j][0].tolist()],
39 |      "y": validation_data[j][1]}
40 |             for j in xrange(NVD)]}
41 | 
42 | f = open("data_1000.json", "w")
43 | json.dump(data, f)
44 | f.close()
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/test.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/training_speed_2_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/training_speed_2_layers.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/training_speed_3_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/training_speed_3_layers.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/training_speed_4_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/training_speed_4_layers.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/valley.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/valley.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/valley.py:
--------------------------------------------------------------------------------
 1 | """
 2 | valley
 3 | ~~~~~~
 4 | 
 5 | Plots a function of two variables to minimize.  The function is a
 6 | fairly generic valley function."""
 7 | 
 8 | #### Libraries
 9 | # Third party libraries
10 | from matplotlib.ticker import LinearLocator
11 | # Note that axes3d is not explicitly used in the code, but is needed
12 | # to register the 3d plot type correctly
13 | from mpl_toolkits.mplot3d import axes3d 
14 | import matplotlib.pyplot as plt
15 | import numpy
16 | 
17 | fig = plt.figure()
18 | ax = fig.gca(projection='3d')
19 | X = numpy.arange(-1, 1, 0.1)
20 | Y = numpy.arange(-1, 1, 0.1)
21 | X, Y = numpy.meshgrid(X, Y)
22 | Z = X**2 + Y**2
23 | 
24 | colortuple = ('w', 'b')
25 | colors = numpy.empty(X.shape, dtype=str)
26 | for x in xrange(len(X)):
27 |     for y in xrange(len(Y)):
28 |         colors[x, y] = colortuple[(x + y) % 2]
29 | 
30 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
31 |         linewidth=0)
32 | 
33 | ax.set_xlim3d(-1, 1)
34 | ax.set_ylim3d(-1, 1)
35 | ax.set_zlim3d(0, 2)
36 | ax.w_xaxis.set_major_locator(LinearLocator(3))
37 | ax.w_yaxis.set_major_locator(LinearLocator(3))
38 | ax.w_zaxis.set_major_locator(LinearLocator(3))
39 | ax.text(1.79, 0, 1.62, "$C$", fontsize=20)
40 | ax.text(0.05, -1.8, 0, "$v_1$", fontsize=20)
41 | ax.text(1.5, -0.25, 0, "$v_2$", fontsize=20)
42 | 
43 | plt.show()
44 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/valley2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/valley2.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/valley2.py:
--------------------------------------------------------------------------------
 1 | """valley2.py
 2 | ~~~~~~~~~~~~~
 3 | 
 4 | Plots a function of two variables to minimize.  The function is a
 5 | fairly generic valley function.
 6 | 
 7 | Note that this is a duplicate of valley.py, but omits labels on the
 8 | axis.  It's bad practice to duplicate in this way, but I had
 9 | considerable trouble getting matplotlib to update a graph in the way I
10 | needed (adding or removing labels), so finally fell back on this as a
11 | kludge solution.
12 | 
13 | """
14 | 
15 | #### Libraries
16 | # Third party libraries
17 | from matplotlib.ticker import LinearLocator
18 | # Note that axes3d is not explicitly used in the code, but is needed
19 | # to register the 3d plot type correctly
20 | from mpl_toolkits.mplot3d import axes3d 
21 | import matplotlib.pyplot as plt
22 | import numpy
23 | 
24 | fig = plt.figure()
25 | ax = fig.gca(projection='3d')
26 | X = numpy.arange(-1, 1, 0.1)
27 | Y = numpy.arange(-1, 1, 0.1)
28 | X, Y = numpy.meshgrid(X, Y)
29 | Z = X**2 + Y**2
30 | 
31 | colortuple = ('w', 'b')
32 | colors = numpy.empty(X.shape, dtype=str)
33 | for x in xrange(len(X)):
34 |     for y in xrange(len(Y)):
35 |         colors[x, y] = colortuple[(x + y) % 2]
36 | 
37 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
38 |         linewidth=0)
39 | 
40 | ax.set_xlim3d(-1, 1)
41 | ax.set_ylim3d(-1, 1)
42 | ax.set_zlim3d(0, 2)
43 | ax.w_xaxis.set_major_locator(LinearLocator(3))
44 | ax.w_yaxis.set_major_locator(LinearLocator(3))
45 | ax.w_zaxis.set_major_locator(LinearLocator(3))
46 | ax.text(1.79, 0, 1.62, "$C$", fontsize=20)
47 | 
48 | plt.show()
49 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/weight_initialization.py:
--------------------------------------------------------------------------------
 1 | """weight_initialization 
 2 | ~~~~~~~~~~~~~~~~~~~~~~~~
 3 | 
 4 | This program shows how weight initialization affects training.  In
 5 | particular, we'll plot out how the classification accuracies improve
 6 | using either large starting weights, whose standard deviation is 1, or
 7 | the default starting weights, whose standard deviation is 1 over the
 8 | square root of the number of input neurons.
 9 | 
10 | """
11 | 
12 | # Standard library
13 | import json
14 | import random
15 | import sys
16 | 
17 | # My library
18 | sys.path.append('../src/')
19 | import mnist_loader
20 | import network2
21 | 
22 | # Third-party libraries
23 | import matplotlib.pyplot as plt
24 | import numpy as np
25 | 
26 | def main(filename, n, eta):
27 |     run_network(filename, n, eta)
28 |     make_plot(filename)
29 |                        
30 | def run_network(filename, n, eta):
31 |     """Train the network using both the default and the large starting
32 |     weights.  Store the results in the file with name ``filename``,
33 |     where they can later be used by ``make_plots``.
34 | 
35 |     """
36 |     # Make results more easily reproducible
37 |     random.seed(12345678)
38 |     np.random.seed(12345678)
39 |     training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
40 |     net = network2.Network([784, n, 10], cost=network2.CrossEntropyCost)
41 |     print "Train the network using the default starting weights."
42 |     default_vc, default_va, default_tc, default_ta \
43 |         = net.SGD(training_data, 30, 10, eta, lmbda=5.0,
44 |                   evaluation_data=validation_data, 
45 |                   monitor_evaluation_accuracy=True)
46 |     print "Train the network using the large starting weights."
47 |     net.large_weight_initializer()
48 |     large_vc, large_va, large_tc, large_ta \
49 |         = net.SGD(training_data, 30, 10, eta, lmbda=5.0,
50 |                   evaluation_data=validation_data, 
51 |                   monitor_evaluation_accuracy=True)
52 |     f = open(filename, "w")
53 |     json.dump({"default_weight_initialization":
54 |                [default_vc, default_va, default_tc, default_ta],
55 |                "large_weight_initialization":
56 |                [large_vc, large_va, large_tc, large_ta]}, 
57 |               f)
58 |     f.close()
59 | 
60 | def make_plot(filename):
61 |     """Load the results from the file ``filename``, and generate the
62 |     corresponding plot.
63 | 
64 |     """
65 |     f = open(filename, "r")
66 |     results = json.load(f)
67 |     f.close()
68 |     default_vc, default_va, default_tc, default_ta = results[
69 |         "default_weight_initialization"]
70 |     large_vc, large_va, large_tc, large_ta = results[
71 |         "large_weight_initialization"]
72 |     # Convert raw classification numbers to percentages, for plotting
73 |     default_va = [x/100.0 for x in default_va]
74 |     large_va = [x/100.0 for x in large_va]
75 |     fig = plt.figure()
76 |     ax = fig.add_subplot(111)
77 |     ax.plot(np.arange(0, 30, 1), large_va, color='#2A6EA6',
78 |             label="Old approach to weight initialization")
79 |     ax.plot(np.arange(0, 30, 1), default_va, color='#FFA933', 
80 |             label="New approach to weight initialization")
81 |     ax.set_xlim([0, 30])
82 |     ax.set_xlabel('Epoch')
83 |     ax.set_ylim([85, 100])
84 |     ax.set_title('Classification accuracy')
85 |     plt.legend(loc="lower right")
86 |     plt.show()
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/weight_initialization_100.json:
--------------------------------------------------------------------------------
1 | {"default_weight_initialization": [[], [9295, 9481, 9547, 9592, 9664, 9673, 9702, 9719, 9726, 9726, 9732, 9732, 9730, 9734, 9745, 9751, 9757, 9761, 9764, 9766, 9758, 9767, 9756, 9752, 9777, 9775, 9770, 9770, 9771, 9781], [], []], "large_weight_initialization": [[], [8994, 9181, 9260, 9364, 9427, 9449, 9497, 9512, 9560, 9578, 9603, 9616, 9626, 9629, 9644, 9671, 9674, 9679, 9700, 9708, 9707, 9717, 9729, 9720, 9719, 9745, 9751, 9754, 9755, 9742], [], []]}


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/weight_initialization_100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/weight_initialization_100.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/weight_initialization_30.json:
--------------------------------------------------------------------------------
1 | {"default_weight_initialization": [[], [9270, 9414, 9470, 9504, 9537, 9550, 9587, 9594, 9596, 9594, 9616, 9595, 9622, 9630, 9636, 9641, 9625, 9652, 9637, 9634, 9642, 9639, 9649, 9646, 9646, 9653, 9646, 9653, 9640, 9650], [], []], "large_weight_initialization": [[], [8643, 9044, 9141, 9231, 9299, 9327, 9385, 9416, 9433, 9449, 9476, 9489, 9500, 9535, 9521, 9548, 9564, 9573, 9585, 9592, 9596, 9615, 9607, 9605, 9606, 9622, 9637, 9648, 9635, 9637], [], []]}


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/fig/weight_initialization_30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/weight_initialization_30.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scikit-learn
3 | scipy
4 | Theano
5 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/expand_mnist.py:
--------------------------------------------------------------------------------
 1 | """expand_mnist.py
 2 | ~~~~~~~~~~~~~~~~~~
 3 | 
 4 | Take the 50,000 MNIST training images, and create an expanded set of
 5 | 250,000 images, by displacing each training image up, down, left and
 6 | right, by one pixel.  Save the resulting file to
 7 | ../data/mnist_expanded.pkl.gz.
 8 | 
 9 | Note that this program is memory intensive, and may not run on small
10 | systems.
11 | 
12 | """
13 | 
14 | from __future__ import print_function
15 | 
16 | #### Libraries
17 | 
18 | # Standard library
19 | import cPickle
20 | import gzip
21 | import os.path
22 | import random
23 | 
24 | # Third-party libraries
25 | import numpy as np
26 | 
27 | print("Expanding the MNIST training set")
28 | 
29 | if os.path.exists("../data/mnist_expanded.pkl.gz"):
30 |     print("The expanded training set already exists.  Exiting.")
31 | else:
32 |     f = gzip.open("../data/mnist.pkl.gz", 'rb')
33 |     training_data, validation_data, test_data = cPickle.load(f)
34 |     f.close()
35 |     expanded_training_pairs = []
36 |     j = 0 # counter
37 |     for x, y in zip(training_data[0], training_data[1]):
38 |         expanded_training_pairs.append((x, y))
39 |         image = np.reshape(x, (-1, 28))
40 |         j += 1
41 |         if j % 1000 == 0: print("Expanding image number", j)
42 |         # iterate over data telling us the details of how to
43 |         # do the displacement
44 |         for d, axis, index_position, index in [
45 |                 (1,  0, "first", 0),
46 |                 (-1, 0, "first", 27),
47 |                 (1,  1, "last",  0),
48 |                 (-1, 1, "last",  27)]:
49 |             new_img = np.roll(image, d, axis)
50 |             if index_position == "first": 
51 |                 new_img[index, :] = np.zeros(28)
52 |             else: 
53 |                 new_img[:, index] = np.zeros(28)
54 |             expanded_training_pairs.append((np.reshape(new_img, 784), y))
55 |     random.shuffle(expanded_training_pairs)
56 |     expanded_training_data = [list(d) for d in zip(*expanded_training_pairs)]
57 |     print("Saving expanded data. This may take a few minutes.")
58 |     f = gzip.open("../data/mnist_expanded.pkl.gz", "w")
59 |     cPickle.dump((expanded_training_data, validation_data, test_data), f)
60 |     f.close()
61 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/mnist_average_darkness.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mnist_average_darkness
 3 | ~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | A naive classifier for recognizing handwritten digits from the MNIST
 6 | data set.  The program classifies digits based on how dark they are
 7 | --- the idea is that digits like "1" tend to be less dark than digits
 8 | like "8", simply because the latter has a more complex shape.  When
 9 | shown an image the classifier returns whichever digit in the training
10 | data had the closest average darkness.
11 | 
12 | The program works in two steps: first it trains the classifier, and
13 | then it applies the classifier to the MNIST test data to see how many
14 | digits are correctly classified.
15 | 
16 | Needless to say, this isn't a very good way of recognizing handwritten
17 | digits!  Still, it's useful to show what sort of performance we get
18 | from naive ideas."""
19 | 
20 | #### Libraries
21 | # Standard library
22 | from collections import defaultdict
23 | 
24 | # My libraries
25 | import mnist_loader
26 | 
27 | def main():
28 |     training_data, validation_data, test_data = mnist_loader.load_data()
29 |     # training phase: compute the average darknesses for each digit,
30 |     # based on the training data
31 |     avgs = avg_darknesses(training_data)
32 |     # testing phase: see how many of the test images are classified
33 |     # correctly
34 |     num_correct = sum(int(guess_digit(image, avgs) == digit)
35 |                       for image, digit in zip(test_data[0], test_data[1]))
36 |     print "Baseline classifier using average darkness of image."
37 |     print "%s of %s values correct." % (num_correct, len(test_data[1]))
38 | 
39 | def avg_darknesses(training_data):
40 |     """ Return a defaultdict whose keys are the digits 0 through 9.
41 |     For each digit we compute a value which is the average darkness of
42 |     training images containing that digit.  The darkness for any
43 |     particular image is just the sum of the darknesses for each pixel."""
44 |     digit_counts = defaultdict(int)
45 |     darknesses = defaultdict(float)
46 |     for image, digit in zip(training_data[0], training_data[1]):
47 |         digit_counts[digit] += 1
48 |         darknesses[digit] += sum(image)
49 |     avgs = defaultdict(float)
50 |     for digit, n in digit_counts.iteritems():
51 |         avgs[digit] = darknesses[digit] / n
52 |     return avgs
53 | 
54 | def guess_digit(image, avgs):
55 |     """Return the digit whose average darkness in the training data is
56 |     closest to the darkness of ``image``.  Note that ``avgs`` is
57 |     assumed to be a defaultdict whose keys are 0...9, and whose values
58 |     are the corresponding average darknesses across the training data."""
59 |     darkness = sum(image)
60 |     distances = {k: abs(v-darkness) for k, v in avgs.iteritems()}
61 |     return min(distances, key=distances.get)
62 | 
63 | if __name__ == "__main__":
64 |     main()
65 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/mnist_loader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mnist_loader
 3 | ~~~~~~~~~~~~
 4 | 
 5 | A library to load the MNIST image data.  For details of the data
 6 | structures that are returned, see the doc strings for ``load_data``
 7 | and ``load_data_wrapper``.  In practice, ``load_data_wrapper`` is the
 8 | function usually called by our neural network code.
 9 | """
10 | 
11 | #### Libraries
12 | # Standard library
13 | import cPickle
14 | import gzip
15 | 
16 | # Third-party libraries
17 | import numpy as np
18 | 
19 | def load_data():
20 |     """Return the MNIST data as a tuple containing the training data,
21 |     the validation data, and the test data.
22 | 
23 |     The ``training_data`` is returned as a tuple with two entries.
24 |     The first entry contains the actual training images.  This is a
25 |     numpy ndarray with 50,000 entries.  Each entry is, in turn, a
26 |     numpy ndarray with 784 values, representing the 28 * 28 = 784
27 |     pixels in a single MNIST image.
28 | 
29 |     The second entry in the ``training_data`` tuple is a numpy ndarray
30 |     containing 50,000 entries.  Those entries are just the digit
31 |     values (0...9) for the corresponding images contained in the first
32 |     entry of the tuple.
33 | 
34 |     The ``validation_data`` and ``test_data`` are similar, except
35 |     each contains only 10,000 images.
36 | 
37 |     This is a nice data format, but for use in neural networks it's
38 |     helpful to modify the format of the ``training_data`` a little.
39 |     That's done in the wrapper function ``load_data_wrapper()``, see
40 |     below.
41 |     """
42 |     f = gzip.open('../data/mnist.pkl.gz', 'rb')
43 |     training_data, validation_data, test_data = cPickle.load(f)
44 |     f.close()
45 |     return (training_data, validation_data, test_data)
46 | 
47 | def load_data_wrapper():
48 |     """Return a tuple containing ``(training_data, validation_data,
49 |     test_data)``. Based on ``load_data``, but the format is more
50 |     convenient for use in our implementation of neural networks.
51 | 
52 |     In particular, ``training_data`` is a list containing 50,000
53 |     2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
54 |     containing the input image.  ``y`` is a 10-dimensional
55 |     numpy.ndarray representing the unit vector corresponding to the
56 |     correct digit for ``x``.
57 | 
58 |     ``validation_data`` and ``test_data`` are lists containing 10,000
59 |     2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
60 |     numpy.ndarry containing the input image, and ``y`` is the
61 |     corresponding classification, i.e., the digit values (integers)
62 |     corresponding to ``x``.
63 | 
64 |     Obviously, this means we're using slightly different formats for
65 |     the training data and the validation / test data.  These formats
66 |     turn out to be the most convenient for use in our neural network
67 |     code."""
68 |     tr_d, va_d, te_d = load_data()
69 |     training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
70 |     training_results = [vectorized_result(y) for y in tr_d[1]]
71 |     training_data = zip(training_inputs, training_results)
72 |     validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
73 |     validation_data = zip(validation_inputs, va_d[1])
74 |     test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
75 |     test_data = zip(test_inputs, te_d[1])
76 |     return (training_data, validation_data, test_data)
77 | 
78 | def vectorized_result(j):
79 |     """Return a 10-dimensional unit vector with a 1.0 in the jth
80 |     position and zeroes elsewhere.  This is used to convert a digit
81 |     (0...9) into a corresponding desired output from the neural
82 |     network."""
83 |     e = np.zeros((10, 1))
84 |     e[j] = 1.0
85 |     return e
86 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/mnist_svm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mnist_svm
 3 | ~~~~~~~~~
 4 | 
 5 | A classifier program for recognizing handwritten digits from the MNIST
 6 | data set, using an SVM classifier."""
 7 | 
 8 | #### Libraries
 9 | # My libraries
10 | import mnist_loader 
11 | 
12 | # Third-party libraries
13 | from sklearn import svm
14 | 
15 | def svm_baseline():
16 |     training_data, validation_data, test_data = mnist_loader.load_data()
17 |     # train
18 |     clf = svm.SVC()
19 |     clf.fit(training_data[0], training_data[1])
20 |     # test
21 |     predictions = [int(a) for a in clf.predict(test_data[0])]
22 |     num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1]))
23 |     print "Baseline classifier using an SVM."
24 |     print "%s of %s values correct." % (num_correct, len(test_data[1]))
25 | 
26 | if __name__ == "__main__":
27 |     svm_baseline()
28 |     
29 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/network.py:
--------------------------------------------------------------------------------
  1 | """
  2 | network.py
  3 | ~~~~~~~~~
  4 | 
  5 | A module to implement the stochastic gradient descent learning
  6 | algorithm for a feedforward neural network.  Gradients are calculated
  7 | using backpropagation.  Note that I have focused on making the code
  8 | simple, easily readable, and easily modifiable.  It is not optimized,
  9 | and omits many desirable features.
 10 | """
 11 | 
 12 | #### Libraries
 13 | # Standard library
 14 | import random
 15 | 
 16 | # Third-party libraries
 17 | import numpy as np
 18 | 
 19 | class Network(object):
 20 | 
 21 |     def __init__(self, sizes):
 22 |         """The list ``sizes`` contains the number of neurons in the
 23 |         respective layers of the network.  For example, if the list
 24 |         was [2, 3, 1] then it would be a three-layer network, with the
 25 |         first layer containing 2 neurons, the second layer 3 neurons,
 26 |         and the third layer 1 neuron.  The biases and weights for the
 27 |         network are initialized randomly, using a Gaussian
 28 |         distribution with mean 0, and variance 1.  Note that the first
 29 |         layer is assumed to be an input layer, and by convention we
 30 |         won't set any biases for those neurons, since biases are only
 31 |         ever used in computing the outputs from later layers."""
 32 |         self.num_layers = len(sizes)
 33 |         self.sizes = sizes
 34 |         self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
 35 |         self.weights = [np.random.randn(y, x)
 36 |                         for x, y in zip(sizes[:-1], sizes[1:])]
 37 | 
 38 |     def feedforward(self, a):
 39 |         """Return the output of the network if ``a`` is input."""
 40 |         for b, w in zip(self.biases, self.weights):
 41 |             a = sigmoid(np.dot(w, a)+b)
 42 |         return a
 43 | 
 44 |     def SGD(self, training_data, epochs, mini_batch_size, eta,
 45 |             test_data=None):
 46 |         """Train the neural network using mini-batch stochastic
 47 |         gradient descent.  The ``training_data`` is a list of tuples
 48 |         ``(x, y)`` representing the training inputs and the desired
 49 |         outputs.  The other non-optional parameters are
 50 |         self-explanatory.  If ``test_data`` is provided then the
 51 |         network will be evaluated against the test data after each
 52 |         epoch, and partial progress printed out.  This is useful for
 53 |         tracking progress, but slows things down substantially."""
 54 |         if test_data: n_test = len(test_data)
 55 |         n = len(training_data)
 56 |         for j in xrange(epochs):
 57 |             random.shuffle(training_data)
 58 |             mini_batches = [
 59 |                 training_data[k:k+mini_batch_size]
 60 |                 for k in xrange(0, n, mini_batch_size)]
 61 |             for mini_batch in mini_batches:
 62 |                 self.update_mini_batch(mini_batch, eta)
 63 |             if test_data:
 64 |                 print "Epoch {0}: {1} / {2}".format(
 65 |                     j, self.evaluate(test_data), n_test)
 66 |             else:
 67 |                 print "Epoch {0} complete".format(j)
 68 | 
 69 |     def update_mini_batch(self, mini_batch, eta):
 70 |         """Update the network's weights and biases by applying
 71 |         gradient descent using backpropagation to a single mini batch.
 72 |         The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
 73 |         is the learning rate."""
 74 |         nabla_b = [np.zeros(b.shape) for b in self.biases]
 75 |         nabla_w = [np.zeros(w.shape) for w in self.weights]
 76 |         for x, y in mini_batch:
 77 |             delta_nabla_b, delta_nabla_w = self.backprop(x, y)
 78 |             nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
 79 |             nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
 80 |         self.weights = [w-(eta/len(mini_batch))*nw
 81 |                         for w, nw in zip(self.weights, nabla_w)]
 82 |         self.biases = [b-(eta/len(mini_batch))*nb
 83 |                        for b, nb in zip(self.biases, nabla_b)]
 84 | 
 85 |     def backprop(self, x, y):
 86 |         """Return a tuple ``(nabla_b, nabla_w)`` representing the
 87 |         gradient for the cost function C_x.  ``nabla_b`` and
 88 |         ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
 89 |         to ``self.biases`` and ``self.weights``."""
 90 |         nabla_b = [np.zeros(b.shape) for b in self.biases]
 91 |         nabla_w = [np.zeros(w.shape) for w in self.weights]
 92 |         # feedforward
 93 |         activation = x
 94 |         activations = [x] # list to store all the activations, layer by layer
 95 |         zs = [] # list to store all the z vectors, layer by layer
 96 |         for b, w in zip(self.biases, self.weights):
 97 |             z = np.dot(w, activation)+b
 98 |             zs.append(z)
 99 |             activation = sigmoid(z)
100 |             activations.append(activation)
101 |         # backward pass
102 |         delta = self.cost_derivative(activations[-1], y) * \
103 |             sigmoid_prime(zs[-1])
104 |         nabla_b[-1] = delta
105 |         nabla_w[-1] = np.dot(delta, activations[-2].transpose())
106 |         # Note that the variable l in the loop below is used a little
107 |         # differently to the notation in Chapter 2 of the book.  Here,
108 |         # l = 1 means the last layer of neurons, l = 2 is the
109 |         # second-last layer, and so on.  It's a renumbering of the
110 |         # scheme in the book, used here to take advantage of the fact
111 |         # that Python can use negative indices in lists.
112 |         for l in xrange(2, self.num_layers):
113 |             z = zs[-l]
114 |             sp = sigmoid_prime(z)
115 |             delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
116 |             nabla_b[-l] = delta
117 |             nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
118 |         return (nabla_b, nabla_w)
119 | 
120 |     def evaluate(self, test_data):
121 |         """Return the number of test inputs for which the neural
122 |         network outputs the correct result. Note that the neural
123 |         network's output is assumed to be the index of whichever
124 |         neuron in the final layer has the highest activation."""
125 |         test_results = [(np.argmax(self.feedforward(x)), y)
126 |                         for (x, y) in test_data]
127 |         return sum(int(x == y) for (x, y) in test_results)
128 | 
129 |     def cost_derivative(self, output_activations, y):
130 |         """Return the vector of partial derivatives \partial C_x /
131 |         \partial a for the output activations."""
132 |         return (output_activations-y)
133 | 
134 | #### Miscellaneous functions
135 | def sigmoid(z):
136 |     """The sigmoid function."""
137 |     return 1.0/(1.0+np.exp(-z))
138 | 
139 | def sigmoid_prime(z):
140 |     """Derivative of the sigmoid function."""
141 |     return sigmoid(z)*(1-sigmoid(z))
142 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/blog/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/blog/__init__.py


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/blog/common_knowledge.py:
--------------------------------------------------------------------------------
 1 | """
 2 | common_knowledge
 3 | ~~~~~~~~~~~~~~~~
 4 | 
 5 | Try to determine whether or not it's possible to relate the
 6 | descriptions given by two different autoencoders.
 7 | 
 8 | """
 9 | 
10 | #### Libraries
11 | # My libraries
12 | from backprop2 import Network, sigmoid_vec
13 | import mnist_loader
14 | 
15 | # Third-party libraries
16 | import matplotlib
17 | import matplotlib.pyplot as plt
18 | import numpy as np
19 | 
20 | 
21 | #### Parameters
22 | # Size of the training sets.  May range from 1000 to 12,500.  Lower
23 | # will be faster, higher will give more accuracy.
24 | SIZE = 5000 
25 | # Number of hidden units in the autoencoder
26 | HIDDEN = 30
27 | 
28 | print "\nGenerating training data"
29 | training_data, _, _ = mnist_loader.load_data_nn()
30 | td_1 = [(x, x) for x, _ in training_data[0:SIZE]]
31 | td_2 = [(x, x) for x, _ in training_data[12500:12500+SIZE]]
32 | td_3 = [x for x, _ in training_data[25000:25000+SIZE]]
33 | test = [x for x, _ in training_data[37500:37500+SIZE]]
34 | 
35 | print "\nFinding first autoencoder"
36 | ae_1 = Network([784, HIDDEN, 784])
37 | ae_1.SGD(td_1, 4, 10, 0.01, 0.05)
38 | 
39 | print "\nFinding second autoencoder"
40 | ae_2 = Network([784, HIDDEN, 784])
41 | ae_2.SGD(td_1, 4, 10, 0.01, 0.05)
42 | 
43 | print "\nGenerating encoded training data"
44 | encoded_td_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0])
45 |                 for x in td_3]
46 | encoded_td_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0])
47 |                 for x in td_3]
48 | encoded_training_data = zip(encoded_td_1, encoded_td_2)
49 | 
50 | print "\nFinding mapping between theories"
51 | net = Network([HIDDEN, HIDDEN])
52 | net.SGD(encoded_training_data, 6, 10, 0.01, 0.05)
53 | 
54 | print """\nBaseline for comparison: decompress with the first autoencoder"""
55 | print """and compress with the second autoencoder"""
56 | encoded_test_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0])
57 |                   for x in test]
58 | encoded_test_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0])
59 |                   for x in test]
60 | test_data = zip(encoded_test_1, encoded_test_2)
61 | net_baseline = Network([HIDDEN, 784, HIDDEN])
62 | net_baseline.biases[0] = ae_1.biases[1]
63 | net_baseline.weights[0] = ae_1.weights[1]
64 | net_baseline.biases[1] = ae_2.biases[0]
65 | net_baseline.weights[1] = ae_2.weights[0]
66 | error_baseline = sum(np.linalg.norm(net_baseline.feedforward(x)-y, 1) 
67 |                      for (x, y) in test_data)
68 | print "Baseline average l1 error per training image: %s" % (error_baseline / SIZE,)
69 | 
70 | print "\nComparing theories with a simple interconversion"
71 | print "Mean desired output activation: %s" % (
72 |     sum(y.mean() for _, y in test_data) / SIZE,)
73 | error = sum(np.linalg.norm(net.feedforward(x)-y, 1) for (x, y) in test_data)
74 | print "Average l1 error per training image: %s" % (error / SIZE,)
75 | 
76 | print "\nComputing fiducial image inputs"
77 | fiducial_images_1 = [
78 |     ae_1.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:])
79 |     for j in range(HIDDEN)]
80 | fiducial_images_2 = [
81 |     ae_2.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:])
82 |     for j in range(HIDDEN)]
83 | image = np.concatenate([np.concatenate(fiducial_images_1, axis=1), 
84 |                         np.concatenate(fiducial_images_2, axis=1)])
85 | fig = plt.figure()
86 | ax = fig.add_subplot(111)
87 | ax.matshow(image, cmap = matplotlib.cm.binary)
88 | plt.xticks(np.array([]))
89 | plt.yticks(np.array([]))
90 | plt.show()
91 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/cost_vs_iterations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/cost_vs_iterations.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/cost_vs_iterations_trapped.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/cost_vs_iterations_trapped.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/deep_autoencoder.py:
--------------------------------------------------------------------------------
  1 | """
  2 | deep_autoencoder
  3 | ~~~~~~~~~~~~~~~~
  4 | 
  5 | A module which implements deep autoencoders.  
  6 | """
  7 | 
  8 | #### Libraries
  9 | # Standard library
 10 | import random
 11 | 
 12 | # My libraries
 13 | from backprop2 import Network, sigmoid_vec
 14 | 
 15 | # Third-party libraries
 16 | import numpy as np
 17 | 
 18 | 
 19 | def plot_helper(x):
 20 |     import matplotlib
 21 |     import matplotlib.pyplot as plt
 22 |     x = np.reshape(x, (-1, 28))
 23 |     fig = plt.figure()
 24 |     ax = fig.add_subplot(1, 1, 1)
 25 |     ax.matshow(x, cmap = matplotlib.cm.binary)
 26 |     plt.xticks(np.array([]))
 27 |     plt.yticks(np.array([]))
 28 |     plt.show()
 29 | 
 30 | 
 31 | class DeepAutoencoder(Network):
 32 | 
 33 |     def __init__(self, layers):
 34 |         """
 35 |         The list ``layers`` specifies the sizes of the nested
 36 |         autoencoders.  For example, if ``layers`` is [50, 20, 10] then
 37 |         the deep autoencoder will be a neural network with layers of
 38 |         size [50, 20, 10, 20, 50]."""
 39 |         self.layers = layers
 40 |         Network.__init__(self, layers+layers[-2::-1])
 41 | 
 42 |     def train(self, training_data, epochs, mini_batch_size, eta,
 43 |               lmbda):
 44 |         """
 45 |         Train the DeepAutoencoder.  The ``training_data`` is a list of
 46 |         training inputs, ``x``, ``mini_batch_size`` is a single
 47 |         positive integer, and ``epochs``, ``eta``, ``lmbda`` are lists
 48 |         of parameters, with the different list members corresponding
 49 |         to the different stages of training.  For example, ``eta[0]``
 50 |         is the learning rate used for the first nested autoencoder,
 51 |         ``eta[1]`` is the learning rate for the second nested
 52 |         autoencoder, and so on.  ``eta[-1]`` is the learning rate used
 53 |         for the final stage of fine-tuning.
 54 |         """
 55 |         print "\nTraining a %s deep autoencoder" % (
 56 |             "-".join([str(j) for j in self.sizes]),)
 57 |         training_data = double(training_data)
 58 |         cur_training_data = training_data[::]
 59 |         for j in range(len(self.layers)-1):
 60 |             print "\nTraining the %s-%s-%s nested autoencoder" % (
 61 |                 self.layers[j], self.layers[j+1], self.layers[j])
 62 |             print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % (
 63 |                 epochs[j], mini_batch_size, eta[j], lmbda[j])
 64 |             self.train_nested_autoencoder(
 65 |                 j, cur_training_data, epochs[j], mini_batch_size, eta[j],
 66 |                 lmbda[j])
 67 |             cur_training_data = [
 68 |                 (sigmoid_vec(np.dot(net.weights[0], x)+net.biases[0]),)*2
 69 |                 for (x, _) in cur_training_data]
 70 |         print "\nFine-tuning network weights with backpropagation"
 71 |         print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % (
 72 |                 epochs[-1], mini_batch_size, eta[-1], lmbda[-1])
 73 |         self.SGD(training_data, epochs[-1], mini_batch_size, eta[-1],
 74 |                  lmbda[-1])
 75 | 
 76 |     def train_nested_autoencoder(
 77 |         self, j, encoded_training_data, epochs, mini_batch_size, eta, lmbda):
 78 |         """
 79 |         Train the nested autoencoder that starts at layer ``j`` in the
 80 |         deep autoencoder.  Note that ``encoded_training_data`` is a
 81 |         list with entries of the form ``(x, x)``, where the ``x`` are
 82 |         encoded training inputs for layer ``j``."""
 83 |         net = Network([self.layers[j], self.layers[j+1], self.layers[j]])
 84 |         net.biases[0] = self.biases[j]
 85 |         net.biases[1] = self.biases[-j-1]
 86 |         net.weights[0] = self.weights[j]
 87 |         net.weights[1] = self.weights[-j-1]
 88 |         net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda)
 89 |         self.biases[j] = net.biases[0]
 90 |         self.biases[-j-1] = net.biases[1]
 91 |         self.weights[j] = net.weights[0]
 92 |         self.weights[-j-1] = net.weights[1]
 93 | 
 94 |     def train_nested_autoencoder_repl(
 95 |         self, j, training_data, epochs, mini_batch_size, eta, lmbda):
 96 |         """
 97 |         This is a convenience method that can be used from the REPL to
 98 |         train the nested autoencoder that starts at level ``j`` in the
 99 |         deep autoencoder.  Note that ``training_data`` is the input
100 |         data for the first layer of the network, and is a list of
101 |         entries ``x``."""
102 |         self.train_nested_autoencoder(
103 |             j, 
104 |             double(
105 |                 [self.feedforward(x, start=0, end=j) for x in training_data]),
106 |             epochs, mini_batch_size, eta, lmbda)
107 | 
108 |     def feature(self, j, k):
109 |         """
110 |         Return the output if neuron number ``k`` in layer ``j`` is
111 |         activated, and all others are not active.  """
112 |         a = np.zeros((self.sizes[j], 1))
113 |         a[k] = 1.0
114 |         return self.feedforward(a, start=j, end=self.num_layers)
115 | 
116 | def double(l):
117 |     return [(x, x) for x in l]
118 | 
119 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/deep_learning.py:
--------------------------------------------------------------------------------
 1 | """
 2 | deep_learning
 3 | ~~~~~~~~~~~~~
 4 | 
 5 | Module to do deep learning.  Most of the functionality needed is
 6 | already in the ``backprop2`` and ``deep_autoencoder`` modules, but
 7 | this adds convenience functions to help in doing things like unrolling
 8 | deep autoencoders, and adding and training a classifier layer."""
 9 | 
10 | # My Libraries
11 | from backprop2 import Network
12 | from deep_autoencoder import DeepAutoencoder
13 | 
14 | def unroll(deep_autoencoder):
15 |     """
16 |     Return a Network that contains the compression stage of the
17 |     ``deep_autoencoder``."""
18 |     net = Network(deep_autoencoder.layers)
19 |     net.weights = deep_autoencoder.weights[:len(deep_autoencoder.layers)-1]
20 |     net.biases = deep_autoencoder.biases[:len(deep_autoencoder.layers)-1]
21 |     return net
22 | 
23 | def add_classifier_layer(net, num_outputs):
24 |     """
25 |     Return the Network ``net``, but with an extra layer containing
26 |     ``num_outputs`` neurons appended."""
27 |     net_classifier = Network(net.sizes+[num_outputs])
28 |     net_classifier.weights[:-1] = net.weights
29 |     net_classifier.biases[:-1] = net.biases
30 |     return net_classifier
31 | 
32 | def SGD_final_layer(
33 |     self, training_data, epochs, mini_batch_size, eta, lmbda):
34 |     """
35 |     Run SGD on the final layer of the Network ``self``.  Note that
36 |     ``training_data`` is the input to the whole Network, not the
37 |     encoded training data input to the final layer. 
38 |     """
39 |     encoded_training_data = [
40 |         (self.feedforward(x, start=0, end=self.num_layers-2), y) 
41 |         for x, y in training_data]
42 |     net = Network(self.sizes[-2:])
43 |     net.biases[0] = self.biases[-1]
44 |     net.weights[0] = self.weights[-1]
45 |     net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda)
46 |     self.biases[-1] = net.biases[0]
47 |     self.weights[-1] = net.weights[0]
48 | 
49 | 
50 | # Add the SGD_final_layer method to the Network class
51 | Network.SGD_final_layer = SGD_final_layer
52 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/gradient_descent_hack.py:
--------------------------------------------------------------------------------
 1 | """
 2 | gradient_descent_hack
 3 | ~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | This program uses gradient descent to learn weights and biases for a
 6 | three-neuron network to compute the XOR function.  The program is a
 7 | quick-and-dirty hack meant to illustrate the basic ideas of gradient
 8 | descent, not a cleanly-designed and generalizable implementation."""
 9 | 
10 | #### Libraries
11 | # Third-party libraries
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 | 
15 | def sigmoid(z):
16 |     return 1.0/(1.0+np.exp(-z))
17 | 
18 | def neuron(w, x):
19 |     """ Return the output from the sigmoid neuron with weights ``w``
20 |     and inputs ``x``.  Both are numpy arrays, with three and two
21 |     elements, respectively.  The first input weight is the bias."""
22 |     return sigmoid(w[0]+np.inner(w[1:], x))
23 | 
24 | def h(w, x):
25 |     """ Return the output from the three-neuron network with weights
26 |     ``w`` and inputs ``x``.  Note that ``w`` is a numpy array with
27 |     nine elements, consisting of three weights for each neuron (the
28 |     bias plus two input weights).  ``x`` is a numpy array with just
29 |     two elements."""
30 |     neuron1_out = neuron(w[0:3], x) # top left neuron
31 |     neuron2_out = neuron(w[3:6], x) # bottom left neuron
32 |     return neuron(w[6:9], np.array([neuron1_out, neuron2_out]))
33 | 
34 | # inputs and corresponding outputs for the function we're computing (XOR)
35 | INPUTS = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]] 
36 | OUTPUTS = [0.0, 1.0, 1.0, 0.0]
37 | 
38 | def cost(w):
39 |     """ Return the cost when the neural network has weights ``w``.
40 |     The cost is computed with respect to the XOR function."""
41 |     return 0.5 * sum((y-h(w, np.array(x)))**2 for x, y in zip(INPUTS, OUTPUTS))
42 | 
43 | def partial(f, k, w):
44 |     """ Return the partial derivative of the function ``f`` with
45 |     respect to the ``k``th variable, at location ``w``.  Note that
46 |     ``f`` must take a numpy array as input, and the partial derivative
47 |     is evaluated with respect to the ``k``th element in that array.
48 |     Similarly, ``w`` is a numpy array which can be used as input to
49 |     ``f``."""
50 |     w_plus, w_minus = w.copy(), w.copy()
51 |     w_plus[k] += 0.01 # using epsilon = 0.01
52 |     w_minus[k] += -0.01
53 |     return (f(w_plus)-f(w_minus))/0.02
54 |     
55 | def gradient_descent(cost, eta, n):
56 |     """ Perform ``n`` iterations of the gradient descent algorithm to
57 |     minimize the ``cost`` function, with a learning rate ``eta``.
58 |     Return a tuple whose first entry is an array containing the final
59 |     weights, and whose second entry is a list of the values the
60 |     ``cost`` function took at different iterations."""
61 |     w = np.random.uniform(-1, 1, 9) # initialize weights randomly
62 |     costs = []
63 |     for j in xrange(n):
64 |         c = cost(w)
65 |         print "Current cost: {0:.3f}".format(c)
66 |         costs.append(c)
67 |         gradient = [partial(cost, k, w) for k in xrange(9)]
68 |         w = np.array([wt-eta*d for wt, d in zip(w, gradient)])
69 |     return w, costs
70 | 
71 | def main():
72 |     """ Perform gradient descent to find weights for a sigmoid neural
73 |     network to compute XOR.  10,000 iterations are used.  Outputs the
74 |     final value of the cost function, the final weights, and plots a
75 |     graph of cost as a function of iteration."""
76 |     w, costs = gradient_descent(cost, 0.1, 10000)
77 |     print "\nFinal cost: {0:.3f}".format(cost(w))
78 |     print "\nFinal weights: %s" % w
79 |     plt.plot(np.array(costs))
80 |     plt.xlabel('iteration')
81 |     plt.ylabel('cost')
82 |     plt.title('How cost decreases with the number of iterations')
83 |     plt.show()
84 | 
85 | if __name__ == "__main__":
86 |     main()
87 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/mnist_100_30_deep_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/mnist_100_30_deep_autoencoder.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/mnist_100_unit_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/mnist_100_unit_autoencoder.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/mnist_10_unit_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/mnist_10_unit_autoencoder.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/mnist_30_component_pca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/mnist_30_component_pca.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/mnist_30_unit_autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/mnist_30_unit_autoencoder.png


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/mnist_autoencoder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mnist_autoencoder
 3 | ~~~~~~~~~~~~~~~~~
 4 | 
 5 | Implements an autoencoder for the MNIST data.  The program can do two
 6 | things: (1) plot the autoencoder's output for the first ten images in
 7 | the MNIST test set; and (2) use the autoencoder to build a classifier.
 8 | The program is a quick-and-dirty hack --- we'll do things in a more
 9 | systematic way in the module ``deep_autoencoder``.
10 | """
11 | 
12 | # My Libraries
13 | from backprop2 import Network
14 | import mnist_loader 
15 | 
16 | # Third-party libraries
17 | import matplotlib
18 | import matplotlib.pyplot as plt
19 | import numpy as np
20 | 
21 | def autoencoder_results(hidden_units):
22 |     """
23 |     Train an autoencoder using the MNIST training data and plot the
24 |     results when the first ten MNIST test images are passed through
25 |     the autoencoder.
26 |     """
27 |     training_data, test_inputs, actual_test_results = \
28 |         mnist_loader.load_data_nn()
29 |     net = train_autoencoder(hidden_units, training_data)
30 |     plot_test_results(net, test_inputs)
31 | 
32 | def train_autoencoder(hidden_units, training_data):
33 |     "Return a trained autoencoder."
34 |     autoencoder_training_data = [(x, x) for x, _ in training_data]
35 |     net = Network([784, hidden_units, 784])
36 |     net.SGD(autoencoder_training_data, 6, 10, 0.01, 0.05)
37 |     return net
38 | 
39 | def plot_test_results(net, test_inputs):
40 |     """
41 |     Plot the results after passing the first ten test MNIST digits through
42 |     the autoencoder ``net``."""
43 |     fig = plt.figure()
44 |     ax = fig.add_subplot(111)
45 |     images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)]
46 |     images_out = [net.feedforward(test_inputs[j]).reshape(-1, 28) 
47 |                   for j in range(10)]
48 |     image_in = np.concatenate(images_in, axis=1)
49 |     image_out = np.concatenate(images_out, axis=1)
50 |     image = np.concatenate([image_in, image_out])
51 |     ax.matshow(image, cmap = matplotlib.cm.binary)
52 |     plt.xticks(np.array([]))
53 |     plt.yticks(np.array([]))
54 |     plt.show()
55 | 
56 | def classifier(hidden_units, n_unlabeled_inputs, n_labeled_inputs):
57 |     """
58 |     Train a semi-supervised classifier.  We begin with pretraining,
59 |     creating an autoencoder which uses ``n_unlabeled_inputs`` from the
60 |     MNIST training data.  This is then converted into a classifier
61 |     which is fine-tuned using the ``n_labeled_inputs``.
62 | 
63 |     For comparison a classifier is also created which does not make
64 |     use of the unlabeled data.
65 |     """
66 |     training_data, test_inputs, actual_test_results = \
67 |         mnist_loader.load_data_nn()
68 |     print "\nUsing pretraining and %s items of unlabeled data" %\
69 |         n_unlabeled_inputs
70 |     net_ae = train_autoencoder(hidden_units, training_data[:n_unlabeled_inputs])
71 |     net_c = Network([784, hidden_units, 10])
72 |     net_c.biases = net_ae.biases[:1]+[np.random.randn(10, 1)/np.sqrt(10)]
73 |     net_c.weights = net_ae.weights[:1]+\
74 |         [np.random.randn(10, hidden_units)/np.sqrt(10)]
75 |     net_c.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05)
76 |     print "Result on test data: %s / %s" % (
77 |         net_c.evaluate(test_inputs, actual_test_results), len(test_inputs))
78 |     print "Training a network with %s items of training data" % n_labeled_inputs
79 |     net = Network([784, hidden_units, 10])
80 |     net.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05)
81 |     print "Result on test data: %s / %s" % (
82 |         net.evaluate(test_inputs, actual_test_results), len(test_inputs))
83 |     return net_c
84 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/mnist_pca.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mnist_pca
 3 | ~~~~~~~~~
 4 | 
 5 | Use PCA to reconstruct some of the MNIST test digits.
 6 | """
 7 | 
 8 | # My libraries
 9 | import mnist_loader
10 | 
11 | # Third-party libraries
12 | import matplotlib
13 | import matplotlib.pyplot as plt
14 | import numpy as np
15 | from sklearn.decomposition import RandomizedPCA
16 | 
17 | 
18 | # Training
19 | training_data, test_inputs, actual_test_results = mnist_loader.load_data_nn()
20 | pca = RandomizedPCA(n_components=30)
21 | nn_images = [x for (x, y) in training_data]
22 | pca_images = np.concatenate(nn_images, axis=1).transpose()
23 | pca_r = pca.fit(pca_images)
24 | 
25 | # Try PCA on first ten test images
26 | test_images = np.array(test_inputs[:10]).reshape((10,784))
27 | test_outputs = pca_r.inverse_transform(pca_r.transform(test_images))
28 | 
29 | # Plot the first ten test images and the corresponding outputs
30 | fig = plt.figure()
31 | ax = fig.add_subplot(111)
32 | images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)]
33 | images_out = [test_outputs[j].reshape(-1, 28) for j in range(10)]
34 | image_in = np.concatenate(images_in, axis=1)
35 | image_out = np.concatenate(images_out, axis=1)
36 | image = np.concatenate([image_in, image_out])
37 | ax.matshow(image, cmap = matplotlib.cm.binary)
38 | plt.xticks(np.array([]))
39 | plt.yticks(np.array([]))
40 | plt.show()
41 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/old/perceptron_learning.py:
--------------------------------------------------------------------------------
 1 | """
 2 | perceptron_learning
 3 | ~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Demonstrates how a perceptron can learn the NAND gate, using the
 6 | perceptron learning algorithm."""
 7 | 
 8 | #### Libraries
 9 | # Third-party library
10 | import numpy as np
11 | 
12 | class Perceptron(object):
13 |     """ A Perceptron instance can take a function and attempt to
14 |     ``learn`` a bias and set of weights that compute that function,
15 |     using the perceptron learning algorithm."""
16 | 
17 |     def __init__(self, num_inputs=2):
18 |         """ Initialize the perceptron with the bias and all weights
19 |         set to 0.0. ``num_inputs`` is the number of input bits to the
20 |         perceptron."""
21 |         self.num_inputs = num_inputs
22 |         self.bias = 0.0
23 |         self.weights = np.zeros(num_inputs)
24 |         # self.inputs is a convenience attribute.  It's a list containing
25 |         # all possible binary inputs to the perceptron.  E.g., for three
26 |         # inputs it is: [np.array([0, 0, 0]), np.array([0, 0, 1]), ...]
27 |         self.inputs = [np.array([int(y)
28 |                         for y in bin(x).lstrip("0b").zfill(num_inputs)])
29 |                        for x in xrange(2**num_inputs)]
30 | 
31 |     def output(self, x):
32 |         """ Return the output (0 or 1) from the perceptron, with input
33 |         ``x``."""
34 |         return 1 if np.inner(self.weights, x)+self.bias > 0 else 0
35 | 
36 |     def learn(self, f, eta=0.1):
37 |         """ Find a bias and a set of weights for a perceptron that
38 |         computes the function ``f``. ``eta`` is the learning rate, and
39 |         should be a small positive number.  Does not terminate when
40 |         the function cannot be computed using a perceptron."""
41 |         # initialize the bias and weights with random values
42 |         self.bias = np.random.normal()
43 |         self.weights = np.random.randn(self.num_inputs)
44 |         number_of_errors = -1
45 |         while number_of_errors != 0:
46 |             number_of_errors = 0
47 |             print "Beginning iteration"
48 |             print "Bias: {:.3f}".format(self.bias)
49 |             print "Weights:", ", ".join(
50 |                 "{:.3f}".format(wt) for wt in self.weights)
51 |             for x in self.inputs:
52 |                 error = f(x)-self.output(x)
53 |                 if error:
54 |                     number_of_errors += 1
55 |                     self.bias = self.bias+eta*error
56 |                     self.weights = self.weights+eta*error*x
57 |             print "Number of errors:", number_of_errors, "\n"
58 | 
59 | def f(x):
60 |     """ Target function for the perceptron learning algorithm.  I've
61 |     chosen the NAND gate, but any function is okay, with the caveat
62 |     that the algorithm won't terminate if ``f`` cannot be computed by
63 |     a perceptron."""
64 |     return int(not (x[0] and x[1]))
65 | 
66 | if __name__ == "__main__":
67 |     Perceptron(2).learn(f, 0.1)
68 | 


--------------------------------------------------------------------------------
/neural-networks-and-deep-learning/src/run_network.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # coding: utf-8
 3 | 
 4 | # # Network from Nielsen's Chapter 1
 5 | # http://neuralnetworksanddeeplearning.com/chap1.html#implementing_our_network_to_classify_digits
 6 | 
 7 | # ## Load MNIST Data
 8 | 
 9 | # In[5]:
10 | 
11 | import mnist_loader
12 | 
13 | 
14 | # In[6]:
15 | 
16 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
17 | 
18 | 
19 | # ## Set up Network
20 | 
21 | # In[9]:
22 | 
23 | import network
24 | 
25 | 
26 | # In[10]:
27 | 
28 | # 784 (28 x 28 pixel images) input neurons; 30 hidden neurons; 10 output neurons
29 | net = network.Network([784, 30, 10])
30 | 
31 | 
32 | # ## Train Network
33 | 
34 | # In[12]:
35 | 
36 | # Use stochastic gradient descent over 30 epochs, with mini-batch size of 10, learning rate of 3.0
37 | net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
38 | 
39 | 
40 | # ## Exercise: Create network with just two layers
41 | 
42 | # In[13]:
43 | 
44 | two_layer_net = network.Network([784, 10])
45 | 
46 | 
47 | # In[14]:
48 | 
49 | two_layer_net.SGD(training_data, 10, 10, 1.0, test_data=test_data)
50 | 
51 | 
52 | # In[15]:
53 | 
54 | two_layer_net.SGD(training_data, 10, 10, 2.0, test_data=test_data)
55 | 
56 | 
57 | # In[16]:
58 | 
59 | two_layer_net.SGD(training_data, 10, 10, 3.0, test_data=test_data)
60 | 
61 | 
62 | # In[17]:
63 | 
64 | two_layer_net.SGD(training_data, 10, 10, 4.0, test_data=test_data)
65 | 
66 | 
67 | # In[18]:
68 | 
69 | two_layer_net.SGD(training_data, 20, 10, 3.0, test_data=test_data)
70 | 
71 | 
72 | # In[ ]:
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/nn-from-scratch/MNIST-loader.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Loading the MNIST data\n",
  8 |     "\n",
  9 |     "This is the MNIST data obtainable at http://yann.lecun.com/exdb/mnist/\n",
 10 |     "\n",
 11 |     "The data is supplied as IDX files compressed in gzip format. The code below unzips the data, converts the IDX file to an ndarray, reshapes and one-hot encodes as necessary, scales the data and finally pickles the data for easy loading into the main script.\n",
 12 |     "\n",
 13 |     "It's worth noting that the pickled data files are not backward compatible with Python 2.X, so if you haven't yet started using Python 3.X then you should download the gzips yourself and run this script locally to generate Python 2.X compatible pickle files. YMMV.\n",
 14 |     "\n",
 15 |     "Finally, the details of the data are available on the website above. But in a nutshell, the training data contains 60 000 images, and the testing data contains 10 000 images. I randomly removed 10 000 of the training data points to set aside as a validation set."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 93,
 21 |    "metadata": {
 22 |     "collapsed": false
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import pickle\n",
 27 |     "import gzip\n",
 28 |     "import idx2numpy\n",
 29 |     "import numpy as np\n",
 30 |     "from sklearn.cross_validation import train_test_split\n",
 31 |     "from sklearn.preprocessing import MinMaxScaler"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 94,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [
 41 |     {
 42 |      "name": "stderr",
 43 |      "output_type": "stream",
 44 |      "text": [
 45 |       "/home/nobody/anaconda3/lib/python3.5/site-packages/sklearn/utils/validation.py:420: DataConversionWarning: Data with input dtype uint8 was converted to float64 by MinMaxScaler.\n",
 46 |       "  warnings.warn(msg, DataConversionWarning)\n"
 47 |      ]
 48 |     }
 49 |    ],
 50 |    "source": [
 51 |     "# Uncompress the gzips and convert the IDX files to ndarray\n",
 52 |     "with gzip.open('data/gzips/train-images-idx3-ubyte.gz', 'rb') as f:\n",
 53 |     "    xtrain = idx2numpy.convert_from_file(f)\n",
 54 |     "\n",
 55 |     "with gzip.open('data/gzips/train-labels-idx1-ubyte.gz', 'rb') as f:\n",
 56 |     "    ytrain = idx2numpy.convert_from_file(f)\n",
 57 |     "\n",
 58 |     "# Reshape the images to an [nXm] array\n",
 59 |     "xtrain = xtrain.reshape(len(xtrain),-1)\n",
 60 |     "xtrain = MinMaxScaler().fit_transform(xtrain)\n",
 61 |     "# One-hot encode the y values\n",
 62 |     "ytrain = np.eye(10)[ytrain].reshape(len(ytrain),10)\n",
 63 |     "# Seperate out the validation set. Note: the random_state parameter will ensure you get the same results as me.\n",
 64 |     "xtrain, xval, ytrain, yval = train_test_split(xtrain, ytrain, test_size=10000, random_state=0)\n",
 65 |     "\n",
 66 |     "# Write the pickled files for importing easily into other scripts\n",
 67 |     "with open('data/pickled/xtrain.pickle', 'wb') as f:\n",
 68 |     "    pickle.dump(xtrain, f, pickle.HIGHEST_PROTOCOL)\n",
 69 |     "    \n",
 70 |     "with open('data/pickled/xval.pickle', 'wb') as f:\n",
 71 |     "    pickle.dump(xval, f, pickle.HIGHEST_PROTOCOL)\n",
 72 |     "\n",
 73 |     "with open('data/pickled/ytrain.pickle', 'wb') as f:\n",
 74 |     "    pickle.dump(ytrain, f, pickle.HIGHEST_PROTOCOL)\n",
 75 |     "    \n",
 76 |     "with open('data/pickled/yval.pickle', 'wb') as f:\n",
 77 |     "    pickle.dump(yval, f, pickle.HIGHEST_PROTOCOL)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 95,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stderr",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "/home/nobody/anaconda3/lib/python3.5/site-packages/sklearn/utils/validation.py:420: DataConversionWarning: Data with input dtype uint8 was converted to float64 by MinMaxScaler.\n",
 92 |       "  warnings.warn(msg, DataConversionWarning)\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "# As above, but for the test set\n",
 98 |     "with gzip.open('data/gzips/t10k-images-idx3-ubyte.gz', 'rb') as f:\n",
 99 |     "    xtest = idx2numpy.convert_from_file(f)\n",
100 |     "    \n",
101 |     "with gzip.open('data/gzips/t10k-labels-idx1-ubyte.gz', 'rb') as f:\n",
102 |     "    ytest = idx2numpy.convert_from_file(f)\n",
103 |     "\n",
104 |     "xtest = xtest.reshape(len(xtest),-1)\n",
105 |     "xtest = MinMaxScaler().fit_transform(xtest)\n",
106 |     "ytest = np.eye(10)[ytest].reshape(len(ytest),10)\n",
107 |     "\n",
108 |     "with open('data/pickled/xtest.pickle', 'wb') as f:\n",
109 |     "    pickle.dump(xtest, f, pickle.HIGHEST_PROTOCOL)\n",
110 |     "    \n",
111 |     "with open('data/pickled/ytest.pickle', 'wb') as f:\n",
112 |     "    pickle.dump(ytest, f, pickle.HIGHEST_PROTOCOL)"
113 |    ]
114 |   }
115 |  ],
116 |  "metadata": {
117 |   "kernelspec": {
118 |    "display_name": "Python 3",
119 |    "language": "python",
120 |    "name": "python3"
121 |   },
122 |   "language_info": {
123 |    "codemirror_mode": {
124 |     "name": "ipython",
125 |     "version": 3
126 |    },
127 |    "file_extension": ".py",
128 |    "mimetype": "text/x-python",
129 |    "name": "python",
130 |    "nbconvert_exporter": "python",
131 |    "pygments_lexer": "ipython3",
132 |    "version": "3.5.2"
133 |   }
134 |  },
135 |  "nbformat": 4,
136 |  "nbformat_minor": 0
137 | }
138 | 


--------------------------------------------------------------------------------
/nn-from-scratch/README.md:
--------------------------------------------------------------------------------
 1 | # NN from scratch
 2 | 
 3 | Update: I wrote a simple SGD version of the original scipy.optimize script, and then I re-wrote that to incoporate a flexible architecture. Also, I found an error in the weigh update part of the code. It is fixed here.
 4 | 
 5 | The purpose here was to  write a neural network "from scratch", which is to say without using any of the available libraries. The advantage being deeper understanding of the principles and how they work, the disadvantages being performance, versatility and effort.
 6 | 
 7 | This nn incorporates most of the features we've dealt with so far in the course (that is, up to somewhere in week 3): cross entropy, L2 regularization, and improved weight initialization.
 8 | 
 9 | Note: everything is done in Python 3.X so if you ahven't updated yet, expect some things to break (most obviously, print()). Also, if you're on Python 2.X you'll likely want to look at MNIST-loader.ipynb and pickle your own data.
10 | 
11 | MNIST-nn-scipy.ipynb uses the scipy.optimize L_BFGS optimizer to minimize the cost. This is the kind of method that was deployed in the Coursera course I referenced in the top of the file.
12 | 
13 | MNIST-nn-SGD.ipynb removes the optimizer in exchange for standard stochastic gradient descent. This more closely matches what we have been studying thus far in the Nielsen textbook and as such it will be where I develop this script further.
14 | 
15 | MNIST-nn-flex_arch.ipynb is the above SGD-based algorithm but with modifications for a more flexible architecture. This makes the individual steps of forward and backpropogation slightly more opaque, so if you're looking for ease-of-understanding, look elsewhere.
16 | 
17 | Lastly, the MNIST-loader notebook throws warnings about converting uint8 data into float64 during the scaling process. This didn't seem unusual to me. I'm sure I could suppress the warnings, or do the conversion in the array before passing to the scaler.
18 | 
19 | The to do list:
20 | - <del>Incoporate gradient descent</del>
21 | - <del>Create more versatility in terms of number of layers, number of neurons per layer</del>
22 | - Incoporate early stopping
23 | - Incoporate a learning rate schedule
24 | - Make use of the validation data (it's sort of ignored in these notebooks for now)


--------------------------------------------------------------------------------
/nn-from-scratch/data/gzips/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/nn-from-scratch/data/gzips/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/nn-from-scratch/data/gzips/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/nn-from-scratch/data/gzips/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/nn-from-scratch/data/gzips/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/nn-from-scratch/data/gzips/train-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/nn-from-scratch/data/gzips/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/nn-from-scratch/data/gzips/train-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/slides/2017-02-07__katya_vasilaky__ridge_regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-02-07__katya_vasilaky__ridge_regression.pdf


--------------------------------------------------------------------------------
/slides/2017-02-07__raphaela_sapire__billion_dollar_AI.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-02-07__raphaela_sapire__billion_dollar_AI.pdf


--------------------------------------------------------------------------------
/slides/2017-03-06__grant_beyleveld__u_net.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-03-06__grant_beyleveld__u_net.pdf


--------------------------------------------------------------------------------
/slides/2017-03-27__karl_habermas__CS224d_assignment1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-03-27__karl_habermas__CS224d_assignment1.pdf


--------------------------------------------------------------------------------
/slides/2017-04-19__claudia_perlich__predictability.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-04-19__claudia_perlich__predictability.pdf


--------------------------------------------------------------------------------
/slides/2017-10-17__thomas_balestri__reinforcement_learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-10-17__thomas_balestri__reinforcement_learning.pdf


--------------------------------------------------------------------------------
/slides/2017-12-09__keng_laura__RL.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-12-09__keng_laura__RL.pdf


--------------------------------------------------------------------------------
/slides/2019-10-16_dmitri_nesterenko__Capsule_Nets.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2019-10-16_dmitri_nesterenko__Capsule_Nets.pdf


--------------------------------------------------------------------------------
/slides/2019-10-16_grant_beyleveld__BERT.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2019-10-16_grant_beyleveld__BERT.pdf


--------------------------------------------------------------------------------
/weekly-work/week1/MNIST_for_beginners.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # ### MNIST for Beginners
  5 | # ### from https://www.tensorflow.org/versions/r0.9/tutorials/mnist/beginners/index.html
  6 | 
  7 | # ### The MNIST Data
  8 | 
  9 | # In[1]:
 10 | 
 11 | # The MNIST Data are hosted on Yann LeCun's website, but made available directly by the TensorFlow team.
 12 | from tensorflow.examples.tutorials.mnist import input_data
 13 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
 14 | 
 15 | 
 16 | # ### Implementing Softmax Regression
 17 | 
 18 | # In[2]:
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | 
 23 | # In[3]:
 24 | 
 25 | # Assign placeholder to x that will be filled during computation. 
 26 | # We'll be flattening MNIST images into a 784-dimensional vector, 
 27 | # represented as a 2-D tensor of floating-point numbers. 
 28 | x = tf.placeholder(tf.float32, [None, 784])
 29 | 
 30 | 
 31 | # In[4]:
 32 | 
 33 | # Assign the model parameters to Variables, which are modifiable tensors
 34 | # within a graph of interacting operations. 
 35 | # Initialize as zeros. 
 36 | W = tf.Variable(tf.zeros([784, 10]))
 37 | b = tf.Variable(tf.zeros([10]))
 38 | 
 39 | 
 40 | # In[5]:
 41 | 
 42 | # Implementation proper takes only one line. 
 43 | y = tf.nn.softmax(tf.matmul(x, W) + b)
 44 | 
 45 | 
 46 | # ### Training
 47 | 
 48 | # In[6]:
 49 | 
 50 | # Assign a placeholder into which we'll be inputting correct answers:
 51 | y_ = tf.placeholder(tf.float32, [None, 10])
 52 | 
 53 | 
 54 | # In[7]:
 55 | 
 56 | # Implement cross-entropy, which we'll use as the cost function: 
 57 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
 58 | 
 59 | 
 60 | # In[8]:
 61 | 
 62 | # Use gradient descent to minimize cost with learning rate of 0.5. 
 63 | # The beauty of TensorFlow is that we're effortlessly using backpropagation. 
 64 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 65 | 
 66 | 
 67 | # In[11]:
 68 | 
 69 | # Initialize all variables: 
 70 | init = tf.initialize_all_variables()
 71 | 
 72 | 
 73 | # In[12]:
 74 | 
 75 | # Launch the model within a session: 
 76 | sess = tf.Session()
 77 | sess.run(init)
 78 | 
 79 | 
 80 | # In[15]:
 81 | 
 82 | # Train with one thousand iterations. 
 83 | # Batches of one hundred random data points are used for stochastic training (i.e., SGD)
 84 | for i in range(1000):
 85 |     batch_xs, batch_ys = mnist.train.next_batch(100)
 86 |     sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
 87 | 
 88 | 
 89 | # 
 90 | # ### Model Evaluation
 91 | 
 92 | # In[16]:
 93 | 
 94 | # Use argmax to examine whether the most likely predicted label matches reality: 
 95 | correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
 96 | 
 97 | 
 98 | # In[17]:
 99 | 
100 | # Cast Booleans to floating point numbers and take mean to assess overall accuracy: 
101 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
102 | 
103 | 
104 | # In[18]:
105 | 
106 | # Run and output to screen: 
107 | print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
108 | 
109 | 
110 | # In[ ]:
111 | 
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/weekly-work/week1/README.md:
--------------------------------------------------------------------------------
 1 | # Week 1
 2 | meeting date: *08-17-2016*
 3 | 
 4 | ### Covered
 5 | - [Chapter 1](http://neuralnetworksanddeeplearning.com/chap1.html) from Nielsen's Ebook
 6 | - [Tensorflow Setup](https://www.tensorflow.org/versions/r0.9/get_started/os_setup.html)
 7 | - [Tensoflow intro tutorial](https://www.tensorflow.org/versions/r0.9/tutorials/mnist/beginners/index.html)
 8 | 
 9 | 
10 | ### Nielsen Chapter 1
11 | - 2 important artificial neuron
12 | 	- perceptron
13 | 	- sigmoid
14 | - Standard learning algo for neural networks:  Stochastic Gradient Descent (SGD)
15 | 
16 | #### Perceptrons
17 | 
18 | - developed in 1950s and 60s
19 | - takes N binary inputs and produces a single binary output
20 | 	- output is 1 if weighted sum of inputs is > some threshold
21 | 	- output = `{ 0 if w⋅x + b    0; 1 if w⋅b  > 0`
22 | - `bias (b)` is a measure of how easily the neuron "fires"
23 | - Proof
24 | 	- Perceptrons can simulate circuit of many NAND gates
25 | 	- NAND gates are universal for computation (we can build any computation out of them)
26 | 	- => perceptions are universial for computation
27 | - We can devise learning algos that automatically tune the weights and biases of a network of artificial neurons
28 | 	- this tuning happens in response to external stimuli
29 | - Instead of laying NAND gates out explicitly, neural networks can simply learn to solve problems
30 | 
31 | #### Sigmoid neurons
32 | 
33 | - Perceptrons are touchy. A small change in weight/bias can lead to drastic changes in outputs
34 | - Segmoid's small changes in weight/bias lead to small output changes
35 | - Inputs are not binary (any real between 0 and 1)
36 | - `w*x + b` is now input into the Sigmoid/Logistic function to get the final output
37 | - Sigmoid is a smoothed out step function (step function correlates to perceptron)
38 |   
39 | #### The architecture of neural networks
40 | 
41 | - input and output layers with hidden layers in between
42 | - multilayer networks can be called multilayer perceptrons (MLPs), despite containing Sigmoid neurons
43 | - Feedfoward
44 |  	- output from one layer is input for next (no loops)
45 | - Recurrent
46 |  	- can have loops
47 |  	- neurons fire for a limited duration
48 |  	- less influential so far
49 |  	- closer in spirit to how the human brain works
50 |   
51 | #### Simple network to classify digits
52 | 
53 | - 2 parts: digit segmentation and individual digit recognition
54 | - Having good idividual digit recognition allows you to validate segmentation algo, so we'll focus on digit recogintion first
55 | - Digit recognition
56 |  	- input neurons 784 = 28 x 28 grayscale image pixels
57 |  	- output neurons 10 and highest activation value corresponds to the digit estimate
58 |   
59 | #### Learning with Gradient Descent
60 | 
61 | - MNIST dataset
62 |  	- training: 60,000 handwritten 28 X 28 images from 250 people
63 |  	- test: 10,000 handwritten 28 X 28 images from 250 other people
64 |  	- `y = y(x) = (0,0,0,0,0,0,1,0,0,0)T`
65 |    	- x: 28 X 28 = 784-dime vector of pixel greyvalues
66 |    	- y: 10-dim vector of digit estimates
67 | - Cost function
68 |  	- measures network accurracy
69 |  	- `C(w,b)` closer to 0 => better
70 | - \#images classified correctly is not a smooth function of the weights and biases in the network
71 | - Smooth funciton like quadratic cost is smoother and hence easier to detect improvement of small changes
72 | - We could use calculus to minimize cost function, but that doesn't scale well (could have billions of weights and biases in a NN)
73 | - `Δv=−η∇C`
74 |  	- η is the learning rate or increment SGD algorithm uses to "descend" and minimize C
75 |  	- η too big => approximation could not hold and lead to increase in C
76 |  	- η too small => algorithm is slow
77 | - In practice computing the gradient requires computing individual gradients for each training input
78 |  	- this is very slow for many inputs
79 |  	- SGD approximates gradient by averaging indvidual gradients for a samll sample of inputs
80 |    		- this is called a mini-batch
81 | 
82 | #### Implementing our Network
83 | - backpropagation
84 |  	- fast way of computing gradient of cost function
85 | 


--------------------------------------------------------------------------------
/weekly-work/week1/basic_usage.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # ### from https://www.tensorflow.org/versions/r0.9/get_started/basic_usage.html
  5 | 
  6 | # ## Building the graph
  7 | 
  8 | # In[1]:
  9 | 
 10 | import tensorflow as tf
 11 | 
 12 | 
 13 | # In[2]:
 14 | 
 15 | # Create a Constant op that produces a 1x2 matrix.  The op is
 16 | # added as a node to the default graph.
 17 | #
 18 | # The value returned by the constructor represents the output
 19 | # of the Constant op.
 20 | matrix1 = tf.constant([[3., 3.]])
 21 | 
 22 | 
 23 | # In[3]:
 24 | 
 25 | # Create another Constant that produces a 2x1 matrix.
 26 | matrix2 = tf.constant([[2.],[2.]])
 27 | 
 28 | 
 29 | # In[4]:
 30 | 
 31 | # Create a Matmul op that takes 'matrix1' and 'matrix2' as inputs.
 32 | # The returned value, 'product', represents the result of the matrix
 33 | # multiplication.
 34 | product = tf.matmul(matrix1, matrix2)
 35 | 
 36 | 
 37 | # ## Launching the graph in a session
 38 | 
 39 | # In[5]:
 40 | 
 41 | # Launch the default graph.
 42 | sess = tf.Session()
 43 | 
 44 | 
 45 | # In[6]:
 46 | 
 47 | # To run the matmul op we call the session 'run()' method, passing 'product'
 48 | # which represents the output of the matmul op.  This indicates to the call
 49 | # that we want to get the output of the matmul op back.
 50 | #
 51 | # All inputs needed by the op are run automatically by the session.  They
 52 | # typically are run in parallel.
 53 | #
 54 | # The call 'run(product)' thus causes the execution of three ops in the
 55 | # graph: the two constants and matmul.
 56 | #
 57 | # The output of the op is returned in 'result' as a numpy `ndarray` object.
 58 | result = sess.run(product)
 59 | print(result)
 60 | 
 61 | 
 62 | # In[7]:
 63 | 
 64 | # Close the Session when we're done to release resources. 
 65 | sess.close()
 66 | 
 67 | 
 68 | # ## Alternative session launch with "with"
 69 | 
 70 | # In[8]:
 71 | 
 72 | with tf.Session() as sess:
 73 |     result = sess.run([product])
 74 |     print(result)
 75 | 
 76 | 
 77 | # In[9]:
 78 | 
 79 | # If you want to use more than GPU, you need to specify this explicitly,
 80 | # for which "with" comes in handy: 
 81 | #with tf.Session() as sess:
 82 | #    with tf.device("/gpu:1"): # zero-indexed, so this is the second GPU
 83 | #        matrix1 = tf.constant([[3., 3.]])
 84 | #        matrix2 = tf.constant([[2.],[2.]])
 85 | #        product = tf.matmul(matrix1, matrix2)
 86 | #        #etc.
 87 | 
 88 | 
 89 | # In[10]:
 90 | 
 91 | # "with" also comes in handy for launching the graph in a distributed session, e.g.:
 92 | #with tf.Session("http://example.org:2222") as sess:
 93 | 
 94 | 
 95 | # ## Interactive Usage
 96 | 
 97 | # In[11]:
 98 | 
 99 | # Great for use within IPython notebooks like this one :)
100 | import tensorflow as tf
101 | sess = tf.InteractiveSession()
102 | 
103 | 
104 | # In[12]:
105 | 
106 | x = tf.Variable([1., 2.])
107 | a = tf.constant([3., 3.])
108 | 
109 | 
110 | # In[13]:
111 | 
112 | # Initialize x with run() method of initializer op. 
113 | x.initializer.run()
114 | 
115 | 
116 | # In[14]:
117 | 
118 | # Add an op to subtract 'a' from 'x'. 
119 | sub = tf.sub(x, a)
120 | 
121 | 
122 | # In[15]:
123 | 
124 | # Print result.
125 | print(sub.eval())
126 | 
127 | 
128 | # In[16]:
129 | 
130 | sess.close()
131 | 
132 | 
133 | # ## Variables
134 | 
135 | # In[18]:
136 | 
137 | # Create a Variable, which will be initialized to the scalar zero.
138 | state = tf.Variable(0, name="counter")
139 | 
140 | 
141 | # In[20]:
142 | 
143 | # Create an Op to add one to 'state'.
144 | one = tf.constant(1)
145 | new_value = tf.add(state, one)
146 | update = tf.assign(state, new_value)
147 | 
148 | 
149 | # In[21]:
150 | 
151 | # Initialize variables. 
152 | init_op = tf.initialize_all_variables()
153 | 
154 | 
155 | # In[22]:
156 | 
157 | # Launch the grph and run the ops. 
158 | with tf.Session() as sess:
159 |     # Run the 'init' op.
160 |     sess.run(init_op)
161 |     # Print the initial value of 'state'.
162 |     print(sess.run(state))
163 |     # Run the op that updates 'state' and print 'state'.
164 |     for _ in range(3):
165 |         sess.run(update)
166 |         print(sess.run(state))
167 | 
168 | 
169 | # ## Fetches
170 | 
171 | # In[23]:
172 | 
173 | # To fetch op outputs, execute the graph with a run() call on the Session object
174 | # and pass in the tensors to retrieve. 
175 | input1 = tf.constant([3.])
176 | input2 = tf.constant([2.])
177 | input3 = tf.constant([5.])
178 | intermed = tf.add(input2, input3)
179 | mul = tf.mul(input1, intermed)
180 | 
181 | with tf.Session() as sess:
182 |     result = sess.run([mul, intermed])
183 |     print(result)
184 | 
185 | 
186 | # ## Feeds
187 | 
188 | # In[24]:
189 | 
190 | # TensorFlow provides a feed mechanism for patching a tensor directly 
191 | # into any operation in the graph. 
192 | input1 = tf.placeholder(tf.float32)
193 | input2 = tf.placeholder(tf.float32)
194 | output = tf.mul(input1, input2)
195 | 
196 | with tf.Session() as sess:
197 |     print(sess.run([output], feed_dict={input1:[7.], input2:[2.]}))
198 | 
199 | 
200 | # In[ ]:
201 | 
202 | 
203 | 
204 | 


--------------------------------------------------------------------------------
/weekly-work/week1/deep_MNIST.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # # Deep MNIST
  5 | 
  6 | # #### Construct a deep convolutional MNIST classifier
  7 | 
  8 | # #### from https://www.tensorflow.org/versions/r0.9/tutorials/mnist/pros/index.html
  9 | 
 10 | # ## Load MNIST Data
 11 | 
 12 | # In[2]:
 13 | 
 14 | from tensorflow.examples.tutorials.mnist import input_data
 15 | 
 16 | 
 17 | # In[3]:
 18 | 
 19 | # Load training, validation, and testing sets as NumPy arrays. 
 20 | mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
 21 | 
 22 | 
 23 | # ## Start TensorFlow InteractiveSession
 24 | 
 25 | # In[4]:
 26 | 
 27 | # The InteractiveSession class is ideal for IPython notebooks like this one. 
 28 | # It facilitates flexibility in how you structure your code, 
 29 | # and you can alternate between operations that build the computation graph
 30 | # with those that run that graph. 
 31 | import tensorflow as tf
 32 | sess = tf.InteractiveSession()
 33 | 
 34 | 
 35 | # ## Build a Softmax Regression Model
 36 | 
 37 | # In[ ]:
 38 | 
 39 | # Build a softmax regression model with a single linear layer. 
 40 | 
 41 | 
 42 | # In[5]:
 43 | 
 44 | # Create placeholder nodes for the input images and target output classes. 
 45 | x = tf.placeholder(tf.float32, shape=[None, 784])
 46 | y_ = tf.placeholder(tf.float32, shape=[None, 10])
 47 | 
 48 | 
 49 | # In[6]:
 50 | 
 51 | # Define the weights and biases for the model as Variables.
 52 | W = tf.Variable(tf.zeros([784,10]))
 53 | b = tf.Variable(tf.zeros([10]))
 54 | 
 55 | 
 56 | # In[8]:
 57 | 
 58 | # Initialize variables for use in session.
 59 | sess.run(tf.initialize_all_variables())
 60 | 
 61 | 
 62 | # In[9]:
 63 | 
 64 | # Implement as a softmax regression model. 
 65 | y = tf.nn.softmax(tf.matmul(x,W) + b)
 66 | 
 67 | 
 68 | # In[10]:
 69 | 
 70 | # Specify the model's cost function as cross-entropy. 
 71 | # Use reduce_sum to sum across all classes; reduce_mean to take sum of averages. 
 72 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
 73 | 
 74 | 
 75 | # #### Train the Model
 76 | 
 77 | # In[11]:
 78 | 
 79 | # Select steepest gradient descent, with step length of 0.5, to descend the cross entropy. 
 80 | # TensorFlow automatically adds operations to: 
 81 | # - compute gradients
 82 | # - compute parameter update steps
 83 | # - apply update steps to the parameters
 84 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 85 | 
 86 | 
 87 | # In[12]:
 88 | 
 89 | # Run train_step to repeatedly apply gradient descent updates to the parameters. 
 90 | # Each training iteration (batch) loads fifty training examples, 
 91 | # which feed_dict replaces placeholder tensors x and y_ with. 
 92 | for i in range(1000):
 93 |     batch = mnist.train.next_batch(50)
 94 |     train_step.run(feed_dict={x: batch[0], y_: batch[1]})
 95 | 
 96 | 
 97 | # #### Evaluate the Model
 98 | 
 99 | # In[13]:
100 | 
101 | # Use arg_max to identify the label that the model thinks is most likely for each input. 
102 | correct_prediction = tf.equal(tf.arg_max(y,1), tf.arg_max(y_,1))
103 | 
104 | 
105 | # In[14]:
106 | 
107 | # Convert booleans to floating point numbers. 
108 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
109 | 
110 | 
111 | # In[15]:
112 | 
113 | # Evaluate and print to screen. 
114 | print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
115 | 
116 | 
117 | # In[ ]:
118 | 
119 | # 90.92% classification accuracy. We can do better. 
120 | 
121 | 
122 | # # Build a Multilayer Convolutional Network
123 | 
124 | # #### Weight Initialization
125 | 
126 | # In[16]:
127 | 
128 | def weight_variable(shape):
129 |     initial = tf.truncated_normal(shape, stddev=0.1)
130 |     return tf.Variable(initial)
131 | 
132 | 
133 | # In[17]:
134 | 
135 | def bias_variable(shape):
136 |     initial = tf.constant(0.1, shape=shape)
137 |     return tf.Variable(initial)
138 | 
139 | 
140 | # #### Convolution and Pooling
141 | 
142 | # In[18]:
143 | 
144 | def conv2d(x, W):
145 |     return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
146 | 
147 | 
148 | # In[19]:
149 | 
150 | def max_pool_2x2(x):
151 |     return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
152 | 
153 | 
154 | # #### First Convolutional Layer
155 | 
156 | # In[20]:
157 | 
158 | W_conv1 = weight_variable([5, 5, 1, 32])
159 | b_conv1 = bias_variable([32])
160 | 
161 | 
162 | # In[21]:
163 | 
164 | x_image = tf.reshape(x, [-1,28,28,1])
165 | 
166 | 
167 | # In[22]:
168 | 
169 | h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
170 | h_pool1 = max_pool_2x2(h_conv1)
171 | 
172 | 
173 | # #### Second Convolutional Layer
174 | 
175 | # In[23]:
176 | 
177 | W_conv2 = weight_variable([5, 5, 32, 64])
178 | b_conv2 = bias_variable([64])
179 | 
180 | 
181 | # In[24]:
182 | 
183 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
184 | h_pool2 = max_pool_2x2(h_conv2)
185 | 
186 | 
187 | # #### Densely Connected Layer
188 | 
189 | # In[25]:
190 | 
191 | W_fc1 = weight_variable([7 * 7 * 64, 1024])
192 | b_fc1 = bias_variable([1024])
193 | 
194 | 
195 | # In[26]:
196 | 
197 | h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
198 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
199 | 
200 | 
201 | # In[28]:
202 | 
203 | # Apply dropout before readout layer to reduce overfitting. 
204 | keep_prob = tf.placeholder(tf.float32)
205 | h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
206 | 
207 | 
208 | # #### Readout Layer
209 | 
210 | # In[29]:
211 | 
212 | W_fc2 = weight_variable([1024, 10])
213 | b_fc2 = bias_variable([10])
214 | 
215 | 
216 | # In[30]:
217 | 
218 | y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
219 | 
220 | 
221 | # #### Train and Evaluate the Model
222 | 
223 | # In[34]:
224 | 
225 | # Use ADAM optimizer instead of steepest gradient descent. 
226 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
227 | train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
228 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
229 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
230 | sess.run(tf.initialize_all_variables())
231 | for i in range(20000):
232 |     batch = mnist.train.next_batch(50)
233 |     if i%100 == 0:
234 |         train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
235 |         print("step %d, training accuracy %g"%(i, train_accuracy))
236 |     train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
237 | 
238 | 
239 | # In[35]:
240 | 
241 | print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
242 | 
243 | 
244 | # In[ ]:
245 | 
246 | 
247 | 
248 | 


--------------------------------------------------------------------------------
/weekly-work/week1/exercise3.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | inputs = {
 5 |     0: np.array([0.990, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009]),
 6 |     1: np.array([0.009, 0.990, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009]),
 7 |     2: np.array([0.009, 0.009, 0.990, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009]),
 8 |     3: np.array([0.009, 0.009, 0.009, 0.990, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009]),
 9 |     4: np.array([0.009, 0.009, 0.009, 0.009, 0.990, 0.009, 0.009, 0.009, 0.009, 0.009]),
10 |     5: np.array([0.009, 0.009, 0.009, 0.009, 0.009, 0.990, 0.009, 0.009, 0.009, 0.009]),
11 |     6: np.array([0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.990, 0.009, 0.009, 0.009]),
12 |     7: np.array([0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.990, 0.009, 0.009]),
13 |     8: np.array([0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.990, 0.009]),
14 |     9: np.array([0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.990])
15 | }
16 | 
17 | expected_outputs = {
18 |     0: "0000",
19 |     1: "0001",
20 |     2: "0010",
21 |     3: "0011",
22 |     4: "0100",
23 |     5: "0101",
24 |     6: "0110",
25 |     7: "0111",
26 |     8: "1000",
27 |     9: "1001"
28 | }
29 | 
30 | 
31 | def sigmoid(x):
32 |     return 1 / (1 + math.exp(-x))
33 | 
34 | 
35 | def evaluate(W, B):
36 |     """
37 |     Evalute whether Neuron weights and biases generate
38 |     expected output for each digit from 0 -> 9
39 |     """
40 |     for n in xrange(10):
41 |         print "Determining if neurons produce proper bitwise representntation for: {}".format(n)
42 | 
43 |         neuron_zs = [np.dot(inputs[n], w) - b for w, b in zip(W, B)]
44 |         neuron_outputs = [sigmoid(z) for z in neuron_zs]
45 |         bitwise_string = ''.join([str(int(round(x))) for x in neuron_outputs])
46 |         expected = expected_outputs[n]
47 | 
48 |         print "  neuron output: {}".format(bitwise_string)
49 |         print "  expected output: {}".format(expected)
50 |         assert(bitwise_string == expected)
51 |         print "  correct!"
52 | 
53 | 
54 | def main():
55 |     # Neuron weights and biases that should be tuned to generate expected output
56 |     w0, b0 = [-10, -10, -10, -10, -10, -10, -10, -10,  10,  10], 0
57 |     w1, b1 = [-10, -10, -10, -10,  10,  10,  10,  10, -10, -10], 0
58 |     w2, b2 = [-10, -10,  10,  10, -10, -10,  10,  10, -10, -10], 0
59 |     w3, b3 = [-10,  10, -10,  10, -10,  10, -10,  10, -10,  10], 0
60 | 
61 |     W = np.array([w0, w1, w2, w3])
62 |     B = np.array([b0, b1, b2, b3])
63 | 
64 |     evaluate(W, B)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     main()
69 | 


--------------------------------------------------------------------------------
/weekly-work/week1/get_started.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TensorFlow Getting Started Tutorial"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "#### from https://www.tensorflow.org/versions/r0.10/get_started/basic_usage.html#interactive-usage"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import tensorflow as tf"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {
 32 |     "collapsed": true
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "sess = tf.InteractiveSession()"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 3,
 42 |    "metadata": {
 43 |     "collapsed": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "x = tf.Variable([1.0, 2.0])\n",
 48 |     "a = tf.constant([3.0, 3.0])"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 4,
 54 |    "metadata": {
 55 |     "collapsed": true
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "# Initialize 'x' using the run() method of its initializer op.\n",
 60 |     "x.initializer.run()"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 5,
 66 |    "metadata": {
 67 |     "collapsed": false
 68 |    },
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "[-2. -1.]\n"
 75 |      ]
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "# Add an op to subtract 'a' from 'x'.  Run it and print the result\n",
 80 |     "sub = tf.sub(x, a)\n",
 81 |     "print(sub.eval())"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {
 88 |     "collapsed": true
 89 |    },
 90 |    "outputs": [],
 91 |    "source": []
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "#### from https://www.tensorflow.org/versions/r0.9/get_started/index.html"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 6,
103 |    "metadata": {
104 |     "collapsed": true
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "import numpy as np"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 7,
114 |    "metadata": {
115 |     "collapsed": true
116 |    },
117 |    "outputs": [],
118 |    "source": [
119 |     "# Create 100 phony x, y data points in NumPy, y = x * 0.1 + 0.3\n",
120 |     "x_data = np.random.rand(100).astype(np.float32)\n",
121 |     "y_data = x_data * 0.1 + 0.3"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 8,
127 |    "metadata": {
128 |     "collapsed": true
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "# Try to find values for W and b that compute y_data = W * x_data + b\n",
133 |     "# (We know that W should be 0.1 and b 0.3, but Tensorflow will\n",
134 |     "# figure that out for us.)\n",
135 |     "W = tf.Variable(tf.random_uniform([1], -1.0, 1.0))\n",
136 |     "b = tf.Variable(tf.zeros([1]))\n",
137 |     "y = W * x_data + b"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 9,
143 |    "metadata": {
144 |     "collapsed": true
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "# Minimize the mean squared errors\n",
149 |     "loss = tf.reduce_mean(tf.square(y - y_data))\n",
150 |     "optimizer = tf.train.GradientDescentOptimizer(0.5)\n",
151 |     "train = optimizer.minimize(loss)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 10,
157 |    "metadata": {
158 |     "collapsed": true
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "# Before starting, initialize the variables. We will 'run' this first. \n",
163 |     "init = tf.initialize_all_variables()"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 11,
169 |    "metadata": {
170 |     "collapsed": true
171 |    },
172 |    "outputs": [],
173 |    "source": [
174 |     "# Launch the graph.\n",
175 |     "sess = tf.Session()\n",
176 |     "sess.run(init)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 12,
182 |    "metadata": {
183 |     "collapsed": false
184 |    },
185 |    "outputs": [
186 |     {
187 |      "name": "stdout",
188 |      "output_type": "stream",
189 |      "text": [
190 |       "(0, array([ 0.56993598], dtype=float32), array([ 0.07380156], dtype=float32))\n",
191 |       "(20, array([ 0.25244093], dtype=float32), array([ 0.22169706], dtype=float32))\n",
192 |       "(40, array([ 0.15010804], dtype=float32), array([ 0.27426147], dtype=float32))\n",
193 |       "(60, array([ 0.11647075], dtype=float32), array([ 0.29153964], dtype=float32))\n",
194 |       "(80, array([ 0.10541401], dtype=float32), array([ 0.29721904], dtype=float32))\n",
195 |       "(100, array([ 0.1017796], dtype=float32), array([ 0.29908589], dtype=float32))\n",
196 |       "(120, array([ 0.10058497], dtype=float32), array([ 0.29969954], dtype=float32))\n",
197 |       "(140, array([ 0.10019229], dtype=float32), array([ 0.29990125], dtype=float32))\n",
198 |       "(160, array([ 0.10006322], dtype=float32), array([ 0.29996753], dtype=float32))\n",
199 |       "(180, array([ 0.1000208], dtype=float32), array([ 0.29998934], dtype=float32))\n",
200 |       "(200, array([ 0.10000685], dtype=float32), array([ 0.2999965], dtype=float32))\n"
201 |      ]
202 |     }
203 |    ],
204 |    "source": [
205 |     "# Fit the line.\n",
206 |     "for step in range(201):\n",
207 |     "    sess.run(train)\n",
208 |     "    if step % 20 == 0:\n",
209 |     "        print(step, sess.run(W), sess.run(b))"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": null,
215 |    "metadata": {
216 |     "collapsed": true
217 |    },
218 |    "outputs": [],
219 |    "source": []
220 |   }
221 |  ],
222 |  "metadata": {
223 |   "kernelspec": {
224 |    "display_name": "Python 2",
225 |    "language": "python",
226 |    "name": "python2"
227 |   },
228 |   "language_info": {
229 |    "codemirror_mode": {
230 |     "name": "ipython",
231 |     "version": 2
232 |    },
233 |    "file_extension": ".py",
234 |    "mimetype": "text/x-python",
235 |    "name": "python",
236 |    "nbconvert_exporter": "python",
237 |    "pygments_lexer": "ipython2",
238 |    "version": "2.7.11"
239 |   }
240 |  },
241 |  "nbformat": 4,
242 |  "nbformat_minor": 0
243 | }
244 | 


--------------------------------------------------------------------------------
/weekly-work/week1/get_started.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # # TensorFlow Getting Started Tutorial
  5 | 
  6 | # #### from https://www.tensorflow.org/versions/r0.10/get_started/basic_usage.html#interactive-usage
  7 | 
  8 | # In[1]:
  9 | 
 10 | import tensorflow as tf
 11 | 
 12 | 
 13 | # In[2]:
 14 | 
 15 | sess = tf.InteractiveSession()
 16 | 
 17 | 
 18 | # In[3]:
 19 | 
 20 | x = tf.Variable([1.0, 2.0])
 21 | a = tf.constant([3.0, 3.0])
 22 | 
 23 | 
 24 | # In[4]:
 25 | 
 26 | # Initialize 'x' using the run() method of its initializer op.
 27 | x.initializer.run()
 28 | 
 29 | 
 30 | # In[5]:
 31 | 
 32 | # Add an op to subtract 'a' from 'x'.  Run it and print the result
 33 | sub = tf.sub(x, a)
 34 | print(sub.eval())
 35 | 
 36 | 
 37 | # In[ ]:
 38 | 
 39 | 
 40 | 
 41 | 
 42 | # #### from https://www.tensorflow.org/versions/r0.9/get_started/index.html
 43 | 
 44 | # In[6]:
 45 | 
 46 | import numpy as np
 47 | 
 48 | 
 49 | # In[7]:
 50 | 
 51 | # Create 100 phony x, y data points in NumPy, y = x * 0.1 + 0.3
 52 | x_data = np.random.rand(100).astype(np.float32)
 53 | y_data = x_data * 0.1 + 0.3
 54 | 
 55 | 
 56 | # In[8]:
 57 | 
 58 | # Try to find values for W and b that compute y_data = W * x_data + b
 59 | # (We know that W should be 0.1 and b 0.3, but Tensorflow will
 60 | # figure that out for us.)
 61 | W = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
 62 | b = tf.Variable(tf.zeros([1]))
 63 | y = W * x_data + b
 64 | 
 65 | 
 66 | # In[9]:
 67 | 
 68 | # Minimize the mean squared errors
 69 | loss = tf.reduce_mean(tf.square(y - y_data))
 70 | optimizer = tf.train.GradientDescentOptimizer(0.5)
 71 | train = optimizer.minimize(loss)
 72 | 
 73 | 
 74 | # In[10]:
 75 | 
 76 | # Before starting, initialize the variables. We will 'run' this first. 
 77 | init = tf.initialize_all_variables()
 78 | 
 79 | 
 80 | # In[11]:
 81 | 
 82 | # Launch the graph.
 83 | sess = tf.Session()
 84 | sess.run(init)
 85 | 
 86 | 
 87 | # In[12]:
 88 | 
 89 | # Fit the line.
 90 | for step in range(201):
 91 |     sess.run(train)
 92 |     if step % 20 == 0:
 93 |         print(step, sess.run(W), sess.run(b))
 94 | 
 95 | 
 96 | # In[ ]:
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/weekly-work/week1/softmax_vs_convolutional_nn.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.examples.tutorials.mnist import input_data
  3 | 
  4 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
  5 | 
  6 | 
  7 | def softmax():
  8 |     # Model variables
  9 |     x = tf.placeholder(tf.float32, shape=[None, 784])
 10 |     y_ = tf.placeholder(tf.float32, shape=[None, 10])
 11 |     W = tf.Variable(tf.zeros([784, 10]))
 12 |     b = tf.Variable(tf.zeros([10]))
 13 |     y = tf.nn.softmax(tf.matmul(x, W) + b)
 14 | 
 15 |     # Setup cost function and Gradient Descent Optimizer
 16 |     cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
 17 |     train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 18 | 
 19 |     # Setup input variables and session
 20 |     sess = tf.InteractiveSession()
 21 |     sess.run(tf.initialize_all_variables())
 22 | 
 23 |     for i in xrange(1000):
 24 |         if (i + 1) % 100 == 0:
 25 |             print "training step {}".format(i + 1)
 26 |         batch_xs, batch_ys = mnist.train.next_batch(50)
 27 |         train_step.run(feed_dict={x: batch_xs, y_: batch_ys})
 28 | 
 29 |     # Evaluate model
 30 |     correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
 31 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 32 |     print "Test accuracy: {}".format(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
 33 | 
 34 | 
 35 | def weight_variable(shape):
 36 |     initial = tf.truncated_normal(shape, stddev=0.1)
 37 |     return tf.Variable(initial)
 38 | 
 39 | 
 40 | def bias_variable(shape):
 41 |     initial = tf.constant(0.1, shape=shape)
 42 |     return tf.Variable(initial)
 43 | 
 44 | 
 45 | def conv2d(x, W):
 46 |     return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
 47 | 
 48 | 
 49 | def max_pool_2x2(x):
 50 |     return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 51 | 
 52 | 
 53 | def convolutional_network():
 54 |     # Model variables
 55 |     x = tf.placeholder(tf.float32, shape=[None, 784])
 56 |     y_ = tf.placeholder(tf.float32, shape=[None, 10])
 57 | 
 58 |     # Layer 1
 59 |     W_conv1 = weight_variable([5, 5, 1, 32])
 60 |     b_conv1 = bias_variable([32])
 61 | 
 62 |     x_image = tf.reshape(x, [-1, 28, 28, 1])
 63 | 
 64 |     h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
 65 |     h_pool1 = max_pool_2x2(h_conv1)
 66 | 
 67 |     # Layer 2
 68 |     W_conv2 = weight_variable([5, 5, 32, 64])
 69 |     b_conv2 = bias_variable([64])
 70 | 
 71 |     h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
 72 |     h_pool2 = max_pool_2x2(h_conv2)
 73 | 
 74 |     # Layer 3
 75 |     W_fc1 = weight_variable([7 * 7 * 64, 1024])
 76 |     b_fc1 = bias_variable([1024])
 77 | 
 78 |     h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
 79 |     h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
 80 | 
 81 |     # Dropout
 82 |     keep_prob = tf.placeholder(tf.float32)
 83 |     h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
 84 | 
 85 |     # Softmax readout layer
 86 |     W_fc2 = weight_variable([1024, 10])
 87 |     b_fc2 = bias_variable([10])
 88 | 
 89 |     y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
 90 | 
 91 |     # Evaluate model
 92 |     cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
 93 |     train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
 94 |     correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
 95 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 96 | 
 97 |     # Setup input variables and session
 98 |     sess = tf.InteractiveSession()
 99 |     sess.run(tf.initialize_all_variables())
100 | 
101 |     for i in xrange(20000):
102 |         batch = mnist.train.next_batch(50)
103 |         if i % 100 == 0:
104 |             train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
105 |             print("step %d, training accuracy %g" % (i, train_accuracy))
106 |         train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
107 | 
108 |     print("Test accuracy: %g" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
109 | 
110 | 
111 | def main():
112 |     print "\nRunning softmax model..."
113 |     softmax()
114 | 
115 |     print "\nRunning convolutional neural network..."
116 |     convolutional_network()
117 | 
118 | if __name__ == '__main__':
119 |     main()
120 | 


--------------------------------------------------------------------------------
/weekly-work/week11/sutskever_et_al_2014__PCA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week11/sutskever_et_al_2014__PCA.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/CNN_feature_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/CNN_feature_map.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/CTC_peaks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/CTC_peaks.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/GRU_gates.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/GRU_gates.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/GRU_shortcut.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/GRU_shortcut.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/GRU_visualisation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/GRU_visualisation.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/LSTM_secret.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/LSTM_secret.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/RNN_visualisation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/RNN_visualisation.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/RNNs_vs_CNNs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/RNNs_vs_CNNs.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/are_languages_recursive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/are_languages_recursive.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/attention_for_long_sentences_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/attention_for_long_sentences_plot.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/attn_hidden_state.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/attn_hidden_state.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/bilinear_form.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/bilinear_form.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/bldg_on_WVSMs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/bldg_on_WVSMs.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/choosing_better_targets.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/choosing_better_targets.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/choosing_output_targets.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/choosing_output_targets.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/decoding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/decoding.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/doubly_attention.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/doubly_attention.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/end_to_end_ASR_as_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/end_to_end_ASR_as_model.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/global_vs_local.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/global_vs_local.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/heres_the-church_here_are_the_people.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/heres_the-church_here_are_the_people.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/learned_tree_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/learned_tree_structure.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/lstm_vs_rnn_127.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/lstm_vs_rnn_127.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/lstm_vs_rnn_32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/lstm_vs_rnn_32.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/nn_ASR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/nn_ASR.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/octopus-gan.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/octopus-gan.gif


--------------------------------------------------------------------------------
/weekly-work/week12/img/phrases_in_vector_space.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/phrases_in_vector_space.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/recursive_vs_recurrent_NN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/recursive_vs_recurrent_NN.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/scoring_attention.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/scoring_attention.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/sentiment_distributions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/sentiment_distributions.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/seq2seq_ASR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/seq2seq_ASR.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/seq2seq_ASR_attn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/seq2seq_ASR_attn.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/single_layer_CNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/single_layer_CNN.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/traditional_ASR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/traditional_ASR.png


--------------------------------------------------------------------------------
/weekly-work/week12/img/what_is_a_convolution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/what_is_a_convolution.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/QA_independence.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/QA_independence.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/SNLI_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/SNLI_results.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/SPINN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/SPINN.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/arch_search_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/arch_search_2.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/arch_search_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/arch_search_3.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/arch_search_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/arch_search_4.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/architecture_search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/architecture_search.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/b_cubed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/b_cubed.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/chunking_training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/chunking_training.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/diff_inputs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/diff_inputs.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/dynamic_memory_network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/dynamic_memory_network.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/episodic_module.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/episodic_module.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/harder_questions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/harder_questions.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/inference_corpus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/inference_corpus.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/input_module.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/input_module.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/more_qa_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/more_qa_examples.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/obstacle_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/obstacle_1.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/obstacle_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/obstacle_2.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/pointer_mixture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/pointer_mixture.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/qa_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/qa_examples.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/question_module.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/question_module.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/semantic_relatedness.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/semantic_relatedness.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/sharper_attn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/sharper_attn.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/state_of_the_art.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/state_of_the_art.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/tackling_joint_training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/tackling_joint_training.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/tennis_Qs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/tennis_Qs.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/touch.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/weekly-work/week13/img/tying_word_vectors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/tying_word_vectors.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/visual_attn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/visual_attn.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/visual_attn_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/visual_attn_2.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/visual_attn_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/visual_attn_3.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/where_SPINN_is_better.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/where_SPINN_is_better.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/writing_systems.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/writing_systems.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/ws_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/ws_2.png


--------------------------------------------------------------------------------
/weekly-work/week13/img/ws_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/ws_3.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/WnT1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/WnT1.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/WnT2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/WnT2.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/WnT3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/WnT3.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/emmaRL.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/emmaRL.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/finn1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/finn1.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/finn1617.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/finn1617.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/finn2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/finn2.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/markovDP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/markovDP.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/oh15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/oh15.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/silverVenn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/silverVenn.png


--------------------------------------------------------------------------------
/weekly-work/week14/img/tan14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/tan14.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/Q-star.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/Q-star.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/Qvalue-fxn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/Qvalue-fxn.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/atari-case-study.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/atari-case-study.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/atari-case-study2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/atari-case-study2.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/bellman-exn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/bellman-exn.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/dnn-for-q-learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/dnn-for-q-learning.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/grid-world-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/grid-world-1.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/grid-world-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/grid-world-2.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/mdp-defn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/mdp-defn.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/mdp-process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/mdp-process.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/policy-grad-defn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/policy-grad-defn.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/q-learning-fxn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/q-learning-fxn.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/reinforce-in-axn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/reinforce-in-axn.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/reinforce-in-axn2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/reinforce-in-axn2.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/value-fxn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/value-fxn.png


--------------------------------------------------------------------------------
/weekly-work/week15/img/value-itn-algo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/value-itn-algo.png


--------------------------------------------------------------------------------
/weekly-work/week17/README.md:
--------------------------------------------------------------------------------
 1 | # Session XVII: Capsule Nets, BERT & Book Launch
 2 | 
 3 | *Meeting date: October 16th, 2019* 
 4 | 
 5 | For this session, [Dmitri Nesterenko](https://www.linkedin.com/in/dmitri-nesterenko/) and [Grant Beyleveld](https://www.linkedin.com/in/grantbey/) presented on Capsule Networks (slides [here](https://github.com/the-deep-learners/study-group/blob/master/slides/2019-10-16_dmitri_nesterenko__Capsule_Nets.pdf); code demo [here](https://colab.research.google.com/drive/1MOoxhzAZnkqQyCozVg0-_OM6gtJY9K6r#scrollTo=TL4jNoGqmKMB)) and "BERT & Friends" (slides [here](https://github.com/the-deep-learners/study-group/blob/master/slides/2019-10-16_grant_beyleveld__BERT.pdf)), respectively. 
 6 | 
 7 | This session also served as a book launch for Jon Krohn, Grant Beyleveld, and Aglae Bassens' book, [Deep Learning Illustrated](https://www.deeplearningillustrated.com), the content of which was influenced in large part by the previous sessions of the Deep Learning Study Group. The first photo of the three authors together with their book is provided here: 
 8 | 
 9 | ![dli-authors](https://github.com/the-deep-learners/study-group/blob/master/wiki-resources/dlsg-dli-authors.jpg)
10 | 
11 | ---
12 | ## Recommended Preparatory Work
13 | 
14 | 1. Capsule Networks: [here's](https://arxiv.org/abs/1710.09829) the original paper, and there are many Medium posts and YouTube videos providing a higher-level summary of the topic
15 | 2. [BERT](https://arxiv.org/abs/1810.04805)/[RoBERTa](https://arxiv.org/abs/1907.11692): those are the original papers; [here's](https://venturebeat.com/2019/07/29/facebook-ais-roberta-improves-googles-bert-pretraining-methods/) a news piece summarizing their significance
16 | 
17 | ![dli-launch](https://github.com/the-deep-learners/study-group/blob/master/wiki-resources/dlsg-xvii.jpg)
18 | 
19 | ---
20 | 
21 | 
22 | ## Up Next
23 | 
24 | Topics that the Group suggested could be worth studying for our next meeting: 
25 | 
26 | * Dmitri providing three minutes' worth of Capsule Net applications
27 | * NLP:
28 | 	* model distillation / other distilled BERT derivatives
29 | 	* multi-task learning, e.g., [decaNLP](https://decanlp.com/)
30 | * curiosity-driven Reinforcement Learning
31 | * [Rubix cube-solving robot](https://www.youtube.com/watch?v=OZu9gjQJUQs)
32 | 
33 | 
34 | ![dmitri-on-caps](https://github.com/the-deep-learners/study-group/blob/master/wiki-resources/IMG_2697.jpeg)
35 | 


--------------------------------------------------------------------------------
/weekly-work/week2/README.md:
--------------------------------------------------------------------------------
  1 | # Week 2
  2 | meeting date: *09-06-2016*
  3 | 
  4 | ### Covered
  5 | - [Chapter 2](http://neuralnetworksanddeeplearning.com/chap2.html) from Nielsen's Ebook
  6 | - Setup and get comfortable with [Keras](https://keras.io/)
  7 | 	- use [backend for Tensorflow](https://keras.io/backend/)
  8 | 	- [Getting Started](https://keras.io/getting-started/sequential-model-guide/) (only skim examples at the bottom)
  9 | 	- explore the [Keras GitHub](https://github.com/fchollet/keras/tree/master/examples) for interesting models
 10 | 
 11 | 
 12 | ### Nielsen Chapter 2
 13 | 
 14 | - *backpropagation*
 15 | 	- fast alogrithm for computing gradients
 16 | 	- introduced in 1970s
 17 | 	- [This 1986 paper](http://www.nature.com/nature/journal/v323/n6088/pdf/323533a0.pdf) recognized the usefulness of its application in neural nets
 18 | 
 19 | #### Warm up: a fast matrix-based approach to computing output of neural net
 20 | 
 21 | - 3 neuron components
 22 | 	- `w`: weight
 23 | 	- `b`: bias
 24 | 	- `a`: activation
 25 | - `a_l_j = σ(∑_k w_l_jk * a_l-1_k + b_l_j)`
 26 | - Weight matrix entry W_l_jk
 27 | 	- `lth` layer
 28 | 	- jth neruon in layer l
 29 | 	- kth neuron in layer l - 1
 30 | - `a_l = σ(w_l*a_l-1 + b_l)`
 31 | - `z_l = w_l*a_l-1 + b_l`
 32 | 	- Weighted input of the neurons in layer l
 33 | 
 34 | #### The two assumptions we need about the cost function
 35 | 
 36 | - goal of backpropagation is to compute the partial derivatives of the cost function C with respect to any weight w aor bias b
 37 | - 2 assumptions about cost function
 38 | 	1. can be written as an average C = (1/n)∑_x C_x
 39 | 		- allows us to get partial derivatives by averaging partial derivatives of individual training samples
 40 | 	2. can be written as a function of the outputs from the neural network
 41 | 
 42 | #### The Hadamard product
 43 | 
 44 | - `s⊙t` denotes *elementwise* product of two vectors s and t
 45 | 	- `(s⊙t)_j = s_j*t_j`
 46 | 	- called *Hadamard* or *Schur* product
 47 | 
 48 | #### The four fundamental equations behind backpropagation
 49 | - backpropagation is about understanding how changing the weights and biases in a network changes the cost function.
 50 | - `δ_l_j` represents the *error* in the jth neuron in the lth layer.
 51 | 	- with backpropagation we compute this error and then relate it to the partial derivatives
 52 | 	- `δ_l_j ≡ ∂C/∂z_l_j`
 53 | 		- error for jth neuron layer l
 54 | - **4 fundamental equations of backpropagation**
 55 | 	1. `δ_L_j = ∂C/∂a_L_j * σ′(z_L_j)`
 56 | 		- `δ_L = ∇_aC⊙σ′(z_L)` in matrix form
 57 | 	2. `δ_l=((w_l+1)^T*δ_l+1) ⊙ σ′(z_l)`
 58 | 		- expresses the errors in layer *l* in terms of error in the next layer *l+1*
 59 | 		- combining equation 1 and 2 allows us to compute the error for any layer in the net
 60 | 	3. `∂C/∂b_l_j = δ_l_j`
 61 | 		- rate of change of the cost with respect to any bias in the network
 62 | 	4. `∂C/∂w_l_jk = a_l−1_k*δ_l_j`
 63 | 		- rate of change of the cost with respect to any wight in the network
 64 | 		- when activation is small, the gradient term with respect to w will tend to be small
 65 | 		- weights output from low-activation neurons learn slowly
 66 | 
 67 | - sigmoid function is very flat around 0 or 1, so `σ′(z_L_j) ≈ 0`
 68 | 	- From equation 1 output neuron in final layer will learn slowly if the output neuron is either low or high activation (0 or 1).
 69 | 	- From equation 2, error is likely to get small if neuron is near saturation
 70 | - These 4 equations hold for any activation function, not just the sigmoid function
 71 | 	- proofs don't use an special properties of σ
 72 | 	- so we could pick an activation function whose derivative that is never close to 0 to prevent the slow-down of learning that occurs with saturated Sigmoid functions
 73 | 	
 74 | #### Proof of the four fundamental equations (optional)
 75 | - All four equations are consequences of the chain rule from multivariable calculus
 76 | - because `a_L_j = σ(z_L_j)`, `∂a_L_j/∂z_L_j = σ′(z_L_j)`
 77 | - We can think of backpropagation as a way of computing the gradient of the cost function by systematically applying chain rule from multi-variable calculus
 78 |   
 79 | #### The backpropagation algorithm
 80 | - High-level steps
 81 | 	1. **Input** x: set corresponding activation a_1 for the input layer
 82 | 	2. **Feedforward**: for each l = 2, 3, ..., L compute `z_l = w_l*a_l-1 + b_l` and `a_l = σ(z_l)`
 83 | 	3. **Output error** δ_L: compute the vector `δ_L = ∇_a*C ⊙ σ′(z_L)`
 84 | 	4. **Backpropagate the error:** for each l = L-1, L-2, ..., 2 compute `δ_l = ((w_l+1)T*δ_l+1) ⊙ σ′(z_l)`
 85 | 	5. **Output:** The gradient of the cost function is given by `∂C/∂w_l_jk = a_l−1_k * δl_j` and `∂C/∂b_l_j = δ_l_j`
 86 | 
 87 | - Error vectors are computed backward starting with final layer.
 88 | 	- cost is a function of outputs from the network
 89 | 	- to understand how cost varies with earlier weights and biases, we need to apply the chain rule backwards through layers
 90 | - Backpropagation algo computs gradient of cost function for a single training sample
 91 | 	- C = C_x
 92 | - Common to combine backpropagation with a learning algo such as stochastic gradient descent (SGD) to compute gradient for many training samples
 93 | - Example learning step of gradient descent with mini-batch of m training samples
 94 | 	1. **Input a set of training examples**
 95 | 	2. **For each training example** x: set the corresponding input activation a_x,1 and perform the following steps
 96 | 		a. **Feedforward**
 97 | 		b. **Output error**
 98 | 		c. **Backpropagate the error**
 99 | 	3. **Gradient descent**: Fore each l = L, L-1, ..., 2 update the weights and biases based on learning rules for mini-batch
100 |   		
101 | #### The code for backpropagation
102 | - refers mostly to Nielsen's GitHub project [neural-networks-and-deep-learning](https://github.com/mnielsen/neural-networks-and-deep-learning)
103 |   	
104 | #### In what sense is backpropagation a fast algorithm?
105 | - Example calculation without backpropagation:
106 | 	- approximate deriviative of cost: `∂C/∂wj ≈ (C(w+ϵej)−C(w))/ϵ`
107 | 	- this is easy but very slow
108 | 		- if we have 1M weights in a network, to compute gradient we must compute the cost function 1M times, each requiring an forward pass through the network per training sample.
109 | - backpropagation allows us to simultaneously comput *all* partial derivatives using just one forward pass through the network, followed by a backward pass per training sample.
110 | 	- this is MUCH faster
111 |   
112 | #### Backpropagation: the big picture
113 | - 2 mysteries
114 | 	1. Building deeper intuition around what's going on during all these matrix and vector multiplications
115 | 	2. How could someone ever discover backpropagation in the first place?
116 | - Tracking how a change in weight or bias at a particular layer propagates through he network and results in a change in Cost leads to a complex sum over a product of partial derivatives of activations between layers
117 | 	- this expression expressed and manipulated with some calculus and linear algebra will lead to the 4 equations of backpropagation
118 | 	
119 | 


--------------------------------------------------------------------------------
/weekly-work/week3/README.md:
--------------------------------------------------------------------------------
 1 | # Week 3
 2 | meeting date: *9-28-2016*
 3 | 
 4 | ## Covered
 5 | - [Chapter 3](http://neuralnetworksanddeeplearning.com/chap3.html) from Nielsen's Ebook
 6 | - "Part I: Introduction" of Peleg and Maggio's [Keras tutorial](https://github.com/leriomaggio/deep-learning-keras-euroscipy2016) from EuroSciPy in August
 7 | 
 8 | ## Nielsen Chapter 3: Improving the way Neural Networks Learn
 9 | 
10 | #### to avoid learning slowdown
11 | 
12 | - choose cost functions that learn more quickly when the predicted output is far from the desired one, e.g.:
13 | 	- if you’d like to consider outputs independently, select sigmoid neurons paired with cross-entropy cost
14 | 	- if you’d like to consider outputs simultaneously and as probability distributions, select a softmax layer of neurons with log-likelihood cost
15 | 	
16 | #### to avoid overfitting 
17 | 
18 | - **stop training early**, i.e., when classification accuracy on test data flattens
19 | - use the popular [dropout](https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf) methodology
20 | - artificially expand your data set, e.g., by rotating MNIST digits slightly or adding noise to audio recordings
21 | - regularize: we covered [L1 and L2 regularization](https://www.quora.com/What-is-the-difference-between-L1-and-L2-regularization) in detail, with nuclear physicist [Thomas Balestri](https://www.linkedin.com/in/thomasbalestri) leading elucidation 
22 | 
23 | #### to initialize weights and biases
24 | 
25 | - to avoid initial saturation of neurons, sample randomly from a normal         distribution with mean of zero and a standard deviation of 1/√(n inputs)
26 | 
27 | #### Nielsen’s suggested sequence for choosing hyper-parameters
28 | 
29 | 1. Broad Strategy 
30 | 	- first, achieve any level of learning that is better than chance
31 | 	- this may require simplifying the problem the network is trying to solve (e.g., distinguishing the digits 0 and 1 instead of attempting to classify all ten digits)
32 | 	- this may require simplifying the network architecture or reducing the size of the training data by orders of magnitude
33 | 	- speed up experimentation by maximizing the frequency with which you can monitor your network, thereby getting instantaneous feedback on performance (and, in my opinion, reducing the opportunity to be distracted by other tasks)
34 | 2. Learning Rate 𝜼
35 | 	- monitor cost to tune 𝜼 but monitor accuracy for the other hyper-parameters covered here
36 | 	- initially adjust 𝜼 by orders of magnitude to find a relatively smooth cost curve, i.e., with minimal oscillation
37 | 	- fine-tune 𝜼 to the smooth cost further
38 | 	- last, consider a variable learning rate schedule that begins fast (large 𝜼) and slows down (smaller 𝜼), perhaps repeatedly
39 | 3. Number of Epochs
40 | 	- as mentioned above, early stopping (when classification accuracy on test data flattens out) prevents overfitting
41 | 	- having a no-accuracy-improvement-in-n rule (e.g., n = 10 epochs) introduces another hyper-parameter that you could potentially fit as networks can plateau for a while before improving again, but try not to obsess over it
42 | 4. Regularization Parameter ƛ
43 | 	- initially start with no regularization (i.e., ƛ = 0) while determining the above hyper-parameters
44 | 	- use the validation data to select a better ƛ starting with ƛ = 1.0
45 | 	- increase or decrease ƛ by orders of magnitude, then fine tune
46 | 	- re-visit and re-optimize 𝜼
47 | 5. Mini-Batch Size
48 | 	- optimal mini-batch size varies as a function of the available memory on your machine, the dimensionality of your data, and the complexity of your neural network architecture
49 | 	- if too large, model weights aren’t updated enough; if too small, hardware and software resources are wasted
50 | 	- after tuning 𝜼 and ƛ, plot validation accuracy versus real elapsed time to close in on a mini-batch size that maximizes training speed
51 | 	- re-visit and re-optimize both 𝜼 and ƛ
52 | 6. Automated Techniques
53 | 	- you can use a grid search, including open-source software, to optimize hyper-parameters automatically (e.g., [Spearmint](https://github.com/JasperSnoek/spearmint))
54 | 
55 | #### Variations on Stochastic Gradient Descent
56 | 
57 | - **Hessian optimization**
58 | 	- incorporates the gradient descent analogue of momentum (second-order changes) into weight and bias optimization
59 | 	- demonstrably converges on a minimum in fewer steps than standard gradient descent
60 | 	- requires considerably more memory than standard gradient descent because of the enormity of the Hessian matrix
61 | - **Momentum-based gradient descent**
62 | 	- inspired by Hessian optimization but avoids excessively large matrices
63 | 	- to balance between speed and avoiding overshooting a minimum, involves tuning the momentum coefficient μ between zero and one on validation data
64 | - BFGS, limited-memory BFGS, Nesterov’s accelerated gradient
65 | 	- these are further popular alternative methods, but we didn’t cover them in any detail
66 | 
67 | #### Alternative Artificial Neurons
68 | 
69 | - **tanh**
70 | 	- bizarrely, apparently pronounced *tanch*
71 | 	- shape approximates the sigmoid function, but ranges from -1 to 1 instead of zero to one, thereby facilitating both positive and negative activations
72 | 	- [some evidence](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf) suggests it outperforms sigmoid neurons
73 | - **ReLU**
74 | 	- rectified linear unit or rectified linear neuron
75 | 	- linear, so computationally simpler relative to sigmoid or tanh, but in a network can approximate their performance and nevertheless compute any function
76 | 
77 | 
78 | ## Applications
79 | 
80 | In addition to the theoretical work above, we applied our knowledge to software applications:
81 | 
82 | - untapt’s lead engineer Gabe Rives-Corbett demonstrated the high-level deep-learning library Keras with some of our in-house models as well as Peleg and Maggio’s (above-mentioned) tutorial
83 | - virologist [Grant Beyleveld](https://grantbeyleveld.wordpress.com/) unveiled the neural network he built from scratch in Python and committed into the study group repo [here](https://github.com/the-deep-learners/study-group/tree/master/nn-from-scratch)
84 | 


--------------------------------------------------------------------------------
/weekly-work/week4/README.md:
--------------------------------------------------------------------------------
 1 | # Week 4
 2 | 
 3 | meeting date: 10-20-2016
 4 | 
 5 | ## Covered
 6 | 
 7 | * Recommended reading from Nielsen's electronic text: 
 8 | 	* [Chapter Four](http://neuralnetworksanddeeplearning.com/chap4.html) 
 9 | 	* [Chapter Five](http://neuralnetworksanddeeplearning.com/chap5.html) 
10 | 	
11 | ### Proof Neural Nets can Compute any Function
12 | 
13 | * Neural nets can compute any function (i.e., they are *universal*), assuming that:
14 | 	1. we accept they are an *approximation* (that can be improved by the inclusion of additional hidden neurons), as opposed to an *exact* solution
15 | 	2. the function they are explaining is *continuous* (e.g., no sharp jumps)
16 | 	
17 | * For the first time in our study session, we moved from whiteboarding to a projector to cover this content
18 | * In his fourth chapter, Michael Nielsen did a tremendous job of developing thematically-coherent, interactive Java applets that facilitate a clear visual understanding of this proof; try it for yourself!
19 | * A fair bit of our discussion centered on the practicalities of expanding the proof beyond two inputs features into *n*-dimensional space
20 | 
21 | ### Factors making Deep Neural Networks Difficult to Train
22 | 
23 | * We primarily discussed the causes of, implications of, and methods to mitigate *unstable* gradients, which in deep neural nets tend to *vanish* but under certain circumstances can instead *explode*
24 | * We also touched on other factors that can make deep nets difficult to train, e.g., the propensity for sigmoid neurons to saturate in later layers, the perils of fully-random weight initialization 
25 | 
26 | ### Visualizing the Function of Particular Hidden Layers
27 | 
28 | * [Thomas Balestri](https://www.linkedin.com/in/thomasbalestri) introduced us to Jason Yosinski's breathtaking [Deep Visualization Toolbox](https://www.youtube.com/watch?v=AgkfIQ4IGaM) for developing an understanding of how individual layers contribute to  a convolutional NN
29 | 
30 | ## Applications
31 | 
32 | * We took a break from applications for this session to focus on finishing shortly Nielsen's text, but we'll return to practical work for the next session
33 | 


--------------------------------------------------------------------------------
/weekly-work/week5/README.md:
--------------------------------------------------------------------------------
 1 | # Session 5: Deep (Conv)Nets
 2 | 
 3 | Meeting date: November 10th, 2016
 4 | 
 5 | ## Recommended Preparatory Work
 6 | 
 7 | * [Ch. 6 of Michael Nielsen's text (the final chapter)](http://neuralnetworksanddeeplearning.com/chap6.html)
 8 | * [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/index.html)
 9 | 
10 | ## Summary
11 | 
12 | #### Three Key Properties of Convolutional Neural Networks
13 | 
14 | 1. local receptive fields
15 | 2. shared weights and biases (within a given _kernel_ or _filter_)
16 | 3. pooling layers
17 | 
18 | #### Architecture Changes That Can Improve Classification Accuracy
19 | 
20 | See [this Jupyter notebook](https://github.com/the-deep-learners/study-group/blob/master/weekly-work/week5/network3.ipynb) for a Theano-focused script (based on Nielsen's code and text) that incrementally improves MNIST digit classification accuracy by: 
21 | 
22 | 1. increasing the number of convolutional-pooling layers
23 | 2. using ReLU units in place of the sigmoid or _tanh_ variety
24 | 3. algorithmically expanding the training data
25 | 4. adding fully-connected layers (modest improvement)
26 | 5. using an ensemble of networks
27 | 
28 | #### Why Does ConvNet Training Work (Despite Unstable, e.g., Vanishing, Gradients)?
29 | 
30 | 1. convolutional layers have fewer parameters because of weight- and bias-sharing
31 | 2. "powerful" regularization techniques (e.g., dropout) to reduce overfitting
32 | 3. ReLU units (quicker training relative to sigmoid/_tanh_)
33 | 4. using GPUs if we're training for many epochs
34 | 5. sufficiently large set of training data (including algorithmic expansion if possible)
35 | 6. appropriate cost function choice
36 | 7. sensible weight initialization
37 | 
38 | #### Other Classes of Deep Neural Nets We Touched on Briefly
39 | 
40 | 1. _recurrent neural networks_ (RNNs), with special discussion of _long short-term memory units_ (LSTMs)
41 | 2. _deep belief networks_ (DBNs)
42 | 
43 | #### TensorFlow for Poets
44 | 
45 | * makes it trivial to leverage the powerful neural net image-classification architecture of _Inception v3_
46 | * study group member Thomas Balestri quickly trained it into an impressive image-classification tool for consumer products
47 | 
48 | ## Up Next
49 | 
50 | [CS231n Convolutional Neural Networks for Visual Recognition](http://cs231n.github.io/) notes and lectures
51 | 


--------------------------------------------------------------------------------
/weekly-work/week6/README.md:
--------------------------------------------------------------------------------
 1 | # Session 6: Convolutional Neural Networks for Visual Recognition
 2 | 
 3 | Meeting date: November 30th, 2016
 4 | 
 5 | This was our first session since completing Michael Nielsen's [Neural Networks and Deep Learning](http://neuralnetworksanddeeplearning.com) text. 
 6 | 
 7 | ## Recommended Preparatory Work
 8 | 
 9 | 1. the [first six lectures](https://www.youtube.com/watch?v=g-PvXUjD6qg&list=PLlJy-eBtNFt6EuMxFYRiNRS07MCWN5UIA) of Stanford's Winter 2016 CS231n course
10 | 1. the first four sets of [course notes](http://cs231n.github.io/), which cover:
11 |   * [classification](http://cs231n.github.io/classification/)
12 |   * [linear classification](http://cs231n.github.io/linear-classify/)
13 |   * [optimization](http://cs231n.github.io/optimization-1/)
14 |   * [more optimization](http://cs231n.github.io/optimization-2/)
15 | 1. optionally, [module 0](http://cs231n.github.io/) in the course notes, which provide an introduction to Python, NumPy, Jupyter Notebooks, the Unix command line, and Amazon Web Services
16 | 
17 | ## Summary
18 | 
19 | The course notes linked to above provide excellent summaries of the material covered in the second lecture onward. For the first lecture, given by the illustrious Fei-Fei Li, here are my (i.e., Jon Krohn) own notes: 
20 | 
21 | #### Context
22 | 
23 | * Cisco: 85% of data on Internet is in the form of pixels ("dark matter")
24 | * more video sensors on earth than people
25 | * every minute, 150 hours of video are uploaded to YouTube
26 | 
27 | #### A History of Vision and Vision Research
28 | 
29 | * 543m years ago, explosion in speciation; Andrew Parker theorises this is due to the evolution of eyes (a simple pinhole light sensor in Trilobites)
30 | * first well-documented effort to duplicate the visual world: da Vinci's Camera Obscura (15th century)
31 | * Hubel & Wiesel (1959) Harvard postdocs 1981 Nobel Prize-winning work
32 | 	* vision starts with simple structures (edges) not fish
33 | 
34 | #### A History of Computer Vision
35 | 
36 | * Larry Roberts (1963) "Block World"
37 | 	* theorised that edge detection enables recognition of blocks from many angles
38 | * first two AI labs:
39 | 	1. Marvin Minsky at MIT
40 | 	2. John McCarthy at Stanford: coined "Artificial Intelligence" term
41 | * David Marr (1970s): 
42 | 	* "Stages of Visual Representation" (it is hierarchical)
43 | 	* first stage is "edge image" akin to H&W
44 | 	* second stage is 2-D sketch
45 | 	* final, third stage is 3D representation; enables guidance and manipulation in the real world
46 | * David Lowe (1987): use edges to distinguish monochromatic razors
47 | * Shi & Malik (1997): "Normalized Cut" was first stages of distinguishing objects in image (segmentation)
48 | * Viola & Jones (2001): face detection within image
49 | 	* used in FujiFilm digital cameras in 2006; the first with face detection
50 | 	* first algorithm fast enough to be used for instantaneous machine vision
51 | * David Lowe (1999): "SIFT" Object Recognition via (a handful of key) features, as opposed to full figure
52 | 	* this was the basis of machine vision for a decade -- until age of Deep Learning
53 | 	* features that Deep Learning networks learn are similar to features programmed by engineers
54 | * prior to Deep Learning approaches, primary techniques were graphical models and SVMs
55 | 	* e.g., "Deformable Part Model", which used "something like" SVM
56 | * PASCAL Video Object Challenge (2006-12) demonstrated improved classification performance on twenty object categories
57 | 	* IMAGENET (image-net.org) built in response by Fei-Fei Li and her colleagues (2009) with 22k categories and 14M images
58 | 	* IMAGENET Large Scale Visual Recognition Challenge: uses 1000 of IMAGENET object classes and 1.4M images
59 | 		* error rate decreases year-over-year, but in 2012 error rate was cut in half by ConvNet (*SuperVision* by Krizhevsky & Hinton; seven-layer)
60 | 		* CONVNET invented in '70s but confluence of techniques enabled it to be transformative in that year
61 | 		* in 2014, best architectures were GoogLeNet and VGG
62 | 		* winning architecture in 2015 is MRSA (Microsoft Asia Researchers), which has 100 layers
63 | 		
64 | #### CS231n course overview
65 | 
66 | * focus on the visual recognition problem, specifically image classification, within IMAGENET
67 | * also covers *object detection*, *image captioning*, and *action classification*
68 | * CNNs were "not invented overnight"
69 | 	* major contributions were:
70 | 		* 1980s: LeCun and Hinton worked out backpropagation mathematics
71 | 		* LeCun et al. (1998): MNIST digit classification, eventually sold to U.S. Mail and banks (for cheques)
72 | 		* Krizhevsky et al. (2012): similar architecture to 1998, but able to leverage GPUs with three orders of magnitude more transistors, and able to train on IMAGENET, which has seven orders of magnitude more pixels than MNIST data; additional, but less important changes, include the use of ReLU in place of sigmoid neurons
73 | * problems that still need to be solved in machine vision
74 | 	* classification of *all* objects in image
75 | 	* recognition within three dimensions, e.g., for use in robotics
76 | 	* anything related to motion
77 | 	* "understanding" the relationship between objects, as opposed to just labelling objects (e.g., Justin Jacobs' *Visual Genome* Project)
78 | 	* the "holy grail" is to be able to narrate a scene; people can write an essay after seeing a scene for just 500ms (Fei-Fei et al., 2007)
79 | * machines vision facilitates better robots and will save lives
80 | 
81 | ## Application
82 | 
83 | Our colourful study group member [Dmitri Nesterenko](https://www.linkedin.com/in/dmitri-nesterenko-7ba4484), who is Director of Software Engineering at the **XO Group** downtown, went into considerable, helpful detail describing his adventures writing a *k*-Nearest Neighbours algorithm [from scratch](https://www.linkedin.com/in/dmitri-nesterenko-7ba4484). 
84 | 
85 | ## Up Next
86 | 
87 | 1. the remaining lectures and notes of CS231n, split over two sessions, in January and February
88 | 1. RNN/LSTM courses and materials from Richard Socher and Chris Olah
89 | 1. a hands-on TensorFlow tutorial with engineers from the Google office in New York
90 | 1. the [Deep Learning Papers Reading Roadmap](https://github.com/songrotek/Deep-Learning-Papers-Reading-Roadmap)
91 | 


--------------------------------------------------------------------------------
/weekly-work/week7/README.md:
--------------------------------------------------------------------------------
  1 | # Session VII: Implementing Convolutional Nets
  2 | 
  3 | *Meeting date: January 12th, 2017*
  4 | 
  5 | By continuing to make our way through the material from Andrej Karpathy and Justin Johnson's CS231n (Stanford) lectures, we covered a broad range of practicalities and best practices for implementing convolutional neural nets. 
  6 | 
  7 | ## Recommended Preparatory Work
  8 | 
  9 | 1. The final thirteen minutes of the [sixth CS231n lecture](https://www.youtube.com/watch?v=KaR4lIdI1MQ&index=1&list=LLup-fnSNRaByeuXOWqfnykw) (i.e., starting from the 57:30 mark)
 10 | 2. Lectures [seven](https://www.youtube.com/watch?v=AQirPKrAyDg) through [twelve](https://www.youtube.com/watch?v=XgFlBsl0Lq4) of CS231n 
 11 | 3. CS231n lecture notes [five](http://cs231n.github.io/neural-networks-1/), [six](http://cs231n.github.io/neural-networks-2/), and [seven](http://cs231n.github.io/neural-networks-3/)
 12 | 
 13 | ## Summary
 14 | 
 15 | Topic highlights of the session included: 
 16 | 
 17 | #### From Lecture 7
 18 | 
 19 | * common settings for the four hyperparameters of a convolutional layer, working through examples as the numbers must work out:
 20 | 	* **K**: the number of filters (typically in powers of two -- some libraries optimise calculations to these levels)
 21 | 	* **F**: spatial extent of the filters
 22 | 	* **S**: stride length
 23 | 	* **P**: the amount of zero padding
 24 | * famous convolutional net architectures, while focusing on their changes (associated with classification accuracy improvements in ILSVRC) over time: 
 25 | 	* LeNet-5 (LeCun et al., 1998)
 26 | 	* SuperVision / "AlexNet" (Krizhevsky et al., 2012)
 27 | 	* ZFNet (Zeiler & Fergus, 2013)
 28 | 	* VGGNet (Simonyan & Zisserman, 2014)
 29 | 	* GoogLeNet (Szegedy et al., 2014)
 30 | 	* ResNet (He et al., 2015)
 31 | * network depth versus ILSVRC classification accuracy over time
 32 | * ResNet network depth versus CIFAR-10 classification accuracy
 33 | 
 34 | #### From Lecture 8
 35 | 
 36 | * comparing computer vision tasks, e.g.: 
 37 | 	* single object
 38 | 		* classification
 39 | 		* classification + localisation
 40 | 	* multiple object
 41 | 		* object detection
 42 | 		* instance segmentation
 43 | * the ILSVRC localisation error of famous ConvNet architectures: 
 44 | 	* AlexNet (2012)
 45 | 	* Overfeat (2013)
 46 | 	* VGG (2014)
 47 | 	* ResNet (2015)
 48 | * object detection data sets: 
 49 | 	* PASCAL VOC (2010): classic
 50 | 	* ILSVRC *Detection* (2014): most classes and images per class
 51 | 	* MS-COCO (2014): most objects per image
 52 | * as with image recognition, *R-CNN* greatly outperforms pre-ConvNet methods
 53 | * *Fast R-CNN* and the subsequent *Faster R-CNN* maintain classification accuracy but are 25 and 250 times faster than R-CNN, respectively
 54 | 	* code for all three networks are available in the Caffe Zoo
 55 | 	
 56 | #### From Lecture 9
 57 | 
 58 | * "deconvolutional" approaches for visualising and understanding individual neurons within convolutional neural networks: 
 59 | 	1. feed an image into the net
 60 | 	2. pick a layer, set the gradient there to be all zero except for one
 61 | 	3. for some neuron of interest, backprop to image
 62 | * NeuralStyle (Gatys et al., 2015): set an image to any style
 63 | * intuitive explanations for fooling ConvNets (e.g., Nguyen, Yosinski & Clune, 2014; Szegedy et al., 2013):
 64 | 	* visually: cases with parameters cleverly outside of the training set (Goodfellow, Shlens & Szegedy, 2014)
 65 | 	* manually working through the arithmetic of fooling a binary linear classifier
 66 | 	
 67 | #### From Lecture 10
 68 | 
 69 | * interpretable RNN neurons, as identified manually within text by [Karpathy, Johnson and Li (2015)](https://arxiv.org/abs/1506.02078):
 70 | 	* quote detection 
 71 | 	* line length 
 72 | 	* if statements 
 73 | 	* quotes or comments
 74 | 	* code indent depth
 75 | * image captioning becomes possible by supplementing ConvNets with LSTMs (five key papers are provided on slide 51)
 76 | 	* this requires image-sentence datasets, e.g.:
 77 | 		* MS-COCO (2014), again (120k images, 5 sentences each)
 78 | 	* ResNet is to vanilla ConvNet ~as LSTM is to RNN
 79 | 	* GRUs (Cho et al., 2014) are the key alternative to LSTMs
 80 | 	* Jozefowicz et al. (2015) provides a helpful, empirical comparison of RNN architectures
 81 | 	
 82 | #### From Lecture 11
 83 | 
 84 | * NVIDIA chips are much more common than AMD for deep learning
 85 | * GPUs greatly outperform CPUs
 86 | * SSDs greatly outperform classic hard disks
 87 | * disk size can become a limiting factor
 88 | * floating point precision can go very low:
 89 | 	* Courbariaux and Bengio (2016) train with single-bit activations and weights, so they are all simply either +1 or -1, though gradients require greater precision
 90 | 	
 91 | #### From Lecture 12
 92 | 
 93 | * see [blog post](https://insights.untapt.com/fundamental-deep-learning-code-in-tflearn-keras-theano-and-tensorflow-66be10a03227) I (Jon Krohn) published that summarises the pros and cons of the four primary deep learning libraries (TensorFlow, Theano, Torch, and Caffe) as covered by Justin Johnson in this lecture
 94 | * in addition, here are Justin's broad recommendations: 
 95 | 	* for feature extraction or fine-tuning existing models: use Caffe
 96 | 	* for complex uses of pretrained models: use Lasagne or Torch
 97 | 	* for writing your own layers: use Torch
 98 | 	* for "crazy" RNNs: use Theano or TensorFlow
 99 | 	* for a very large model that requires parallelism: use TensorFlow
100 | 
101 | ## Up Next
102 | 
103 | 1. the remaining lectures and notes of CS231n, in February
104 | 1. Richard Socher's CS224d (also out of Stanford) on Deep Learning for Natural Language Processing, in early March
105 | 


--------------------------------------------------------------------------------
/weekly-work/week8/README.md:
--------------------------------------------------------------------------------
 1 | # Session VIII: Unsupervised Learning, Regularisation, and Venture Capital
 2 | 
 3 | *Meeting date: February 7th, 2017*
 4 | 
 5 | With this session, we wrapped up our coverage of [CS231n](http://cs231n.github.io/) (Stanford) lectures, which were delivered by now-familiar faces Andrej Karpathy and Justin Johnson as well as guest lecturer, Google Senior Fellow Jeff Dean. 
 6 | 
 7 | In addition, we were delighted to hear from guest speakers of our own: 
 8 | 
 9 | 1. **[Raphaela Sapire](https://angel.co/raphaela-sapire)** on her experience as a venture capitalist at Blue Seed Capital, particularly her insight into the machine- and deep-learning start-up market (slides [here](https://github.com/the-deep-learners/study-group/blob/master/slides/2017-02-07__raphaela_sapire__billion_dollar_AI.pdf))
10 | 2. **[Katya Vasilaky](https://kathrynthegreat.github.io/)** on her research into L2 Regularization, the popular method to avoid overfitting in a wide range of models, including the deep-learning variety (slides [here]())
11 | 
12 | A summary blog post, replete with photos of the session, can be found [here](https://medium.com/@jjpkrohn/deep-learning-study-group-viii-unsupervised-learning-regularisation-and-venture-capital-9aba67fc931c). 
13 | 
14 | 
15 | ## Recommended Preparatory Work
16 | 
17 | 1. The final three lectures from CS231n ([13](https://www.youtube.com/watch?v=UFnO-ADC-k0&list=PLlJy-eBtNFt6EuMxFYRiNRS07MCWN5UIA&index=13), [14](https://www.youtube.com/watch?v=I-i1KBuShCc&list=PLlJy-eBtNFt6EuMxFYRiNRS07MCWN5UIA&index=14), and [15](https://www.youtube.com/watch?v=s63vOy1kvsU&list=PLlJy-eBtNFt6EuMxFYRiNRS07MCWN5UIA&index=15))
18 | 2. as well as the final four sets of notes ([one](http://cs231n.github.io/neural-networks-case-study/), [two](http://cs231n.github.io/convolutional-networks/), [three](http://cs231n.github.io/understanding-cnn/), and [four](http://cs231n.github.io/transfer-learning/))
19 | 
20 | 
21 | ## Summary
22 | 
23 | 
24 | Topic highlights of the session included: 
25 | 
26 | 
27 | #### From Lecture 14
28 | 
29 | ##### Karpathy on ConvNets applied to motion (videos)
30 | 
31 | * "fancy" spatio-temporal video ConvNets:
32 | 	* for detecting global motion: provide limited or no benefit over LSTM applied to individual video frames 
33 | 	* for detecting local motion, use a 3D ConvNet
34 | 	* try using Optical Flow in a second stream or GRU-RCN (the latter being Karpathy's favourite)
35 | 
36 | ##### Johnson on unsupervised learning:
37 | 
38 | * autoencoder overview
39 | 	* traditional: 
40 | 		* try to reconstruct input
41 | 		* used to learn features, initialise supervised model
42 | 		* no longer predominant 
43 | 	* variational: 
44 | 		* Bayesian statistics crossed with Deep Learning (<3)
45 | 		* generate samples, e.g., images by sampling
46 | 	* Generative Adversarial Networks: Generate samples
47 | * autoencoders in practice
48 | 	* input data (x) --> [encoder] --> features (z) --> [decoder] --> reconstructed input data (x)
49 | 	* the [encoder] and [decoder] often share weights
50 | 	* decoders evolved via this sequence: 
51 | 		1. linear + sigmoid neurons
52 | 		2. deep, fully-connected
53 | 		3. ReLU ConvNet ("upconv")
54 | * Greedy Training of Autoencoders:
55 | 	* "Restricted Boltzmann Machines" (RBMs) were common in mid-2000s
56 | 		* train one layer at a time
57 | 		* start with first layer, freeze it, move to second layer, etc.
58 | * Varational Autoencoders:
59 | 	* generate data by using Bayesian statistics within an autoencoder framework to sample from prior and posterior distributions
60 | 	* can, e.g., output smooth interpolations of input data
61 | * Generative Adversarial Networks:
62 | 	* seminal paper is Goodfellow et al. (NIPS 2014)
63 | 	* random noise --> [generator] --> fake images (plus, separately, real images from a data set) --> [discriminator] --> trained to distinguish real images from fake
64 | 	* image generation with "less math"
65 | 	* train generator and discriminator jointly; after training, image generation is straightforward
66 | 	* Denton et al. (NIPS 2015): expanded work by enabling discriminators to work at every scale (applied to single classes of CIFAR-10 dataset)
67 | 	* Radford et al. (ICLR 2016): 
68 | 		* create realistic, latent space-interpolatable images of bedrooms
69 | 		* their generator: upsampling network with fractionally-strided convolutions
70 | 		* their discriminator: a ConvNet
71 | 		* "Architecture guidlines for stable Deep Conv GANs" (from Johnson):
72 | 			* replace any pooling layers with strided convolutions (discriminator) and fractional-strided convolutions (generator)
73 | 			* user batch normalisation in both the generator and discriminator
74 | 			* remove fully-connected hidden layers for deeper architectures
75 | 			* use ReLU activation in generator for all layers except for output (Tanh)
76 | 			* use Leaky ReLU activation in all layers of discriminator
77 | 		* vector math: 
78 | 			* [smiling woman] - [neutral woman] + [neutral man] = [smiling man]
79 | 			* [man with glasses] - [man without glasses] + [woman without glasses] = [woman with glasses]
80 | 	* Dosovitskiy & Brox (arXiv 2016):
81 | 		* creates convincing new ImageNet samples
82 | 		* trained on all ImageNet classes together
83 | 		* broadly a Variational Autoencoder fed into both (1.) a Discriminator network and (2.) a pretrained AlexNet (see slide 128 "Putting everything together" for diagram)
84 | 
85 | 
86 | ## Up Next
87 | 
88 | 1. Richard Socher's [CS224d](https://cs224d.stanford.edu/) (also out of Stanford) on Deep Learning for Natural Language Processing, in early March
89 | 


--------------------------------------------------------------------------------
/weekly-work/week9/02_pros_and_cons_of_counting_vs_w2v.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/02_pros_and_cons_of_counting_vs_w2v.png


--------------------------------------------------------------------------------
/weekly-work/week9/03_05_GloVe_visualizations_gender.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/03_05_GloVe_visualizations_gender.png


--------------------------------------------------------------------------------
/weekly-work/week9/03_06_GloVe_visualizations_CEO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/03_06_GloVe_visualizations_CEO.png


--------------------------------------------------------------------------------
/weekly-work/week9/03_07_GloVe_visualizations_superlatives.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/03_07_GloVe_visualizations_superlatives.png


--------------------------------------------------------------------------------
/weekly-work/week9/2017_02_skipgram_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/2017_02_skipgram_diagram.png


--------------------------------------------------------------------------------
/weekly-work/week9/2017_02_softmax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/2017_02_softmax.png


--------------------------------------------------------------------------------
/weekly-work/week9/2017_02_w2v_dot_products.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/2017_02_w2v_dot_products.png


--------------------------------------------------------------------------------
/weekly-work/week9/2017_02_word2vec_definition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/2017_02_word2vec_definition.png


--------------------------------------------------------------------------------
/weekly-work/week9/fun_glove_expressions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/fun_glove_expressions.png


--------------------------------------------------------------------------------
/weekly-work/week9/w2v_objective_fxn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/w2v_objective_fxn.png


--------------------------------------------------------------------------------
/wiki-resources/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/3.jpg


--------------------------------------------------------------------------------
/wiki-resources/5_cropped.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/5_cropped.jpg


--------------------------------------------------------------------------------
/wiki-resources/IMG_2697.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_2697.jpeg


--------------------------------------------------------------------------------
/wiki-resources/IMG_5959.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_5959.JPG


--------------------------------------------------------------------------------
/wiki-resources/IMG_5974.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_5974.JPG


--------------------------------------------------------------------------------
/wiki-resources/IMG_7624.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_7624.JPG


--------------------------------------------------------------------------------
/wiki-resources/IMG_7641.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_7641.JPG


--------------------------------------------------------------------------------
/wiki-resources/IMG_9147.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_9147.JPG


--------------------------------------------------------------------------------
/wiki-resources/dlsg-dli-authors.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/dlsg-dli-authors.jpg


--------------------------------------------------------------------------------
/wiki-resources/dlsg-xvii.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/dlsg-xvii.jpg


--------------------------------------------------------------------------------
/wiki-resources/gitflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/gitflow.png


--------------------------------------------------------------------------------
/wiki-resources/jk-at-dlsg-xviii.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/jk-at-dlsg-xviii.jpg


--------------------------------------------------------------------------------
/wiki-resources/session_XI_crew.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/session_XI_crew.JPG


--------------------------------------------------------------------------------