├── .gitignore ├── LICENSE ├── README.md ├── demos-for-talks ├── AlexNet.ipynb ├── Keras_MNIST_ConvNet.ipynb ├── VGGNet.ipynb ├── imdb_lstm.ipynb └── simple_dnn.ipynb ├── neural-networks-and-deep-learning ├── .gitignore ├── README.md ├── data │ └── mnist.pkl.gz ├── fig │ ├── backprop_magnitude_nabla.png │ ├── backprop_magnitude_nabla.py │ ├── data_1000.json │ ├── digits.png │ ├── digits_separate.png │ ├── false_minima.png │ ├── false_minima.py │ ├── generate_gradient.py │ ├── initial_gradient.json │ ├── misleading_gradient.png │ ├── misleading_gradient.py │ ├── misleading_gradient_contours.png │ ├── misleading_gradient_contours.py │ ├── mnist.py │ ├── mnist_100_digits.png │ ├── mnist_2_and_1.png │ ├── mnist_complete_zero.png │ ├── mnist_first_digit.png │ ├── mnist_other_features.png │ ├── mnist_really_bad_images.png │ ├── mnist_top_left_feature.png │ ├── more_data.json │ ├── more_data.png │ ├── more_data.py │ ├── more_data_5.png │ ├── more_data_comparison.png │ ├── more_data_log.png │ ├── more_data_rotated_5.png │ ├── more_data_svm.json │ ├── multiple_eta.json │ ├── multiple_eta.png │ ├── multiple_eta.py │ ├── norms_during_training_2_layers.json │ ├── norms_during_training_3_layers.json │ ├── norms_during_training_4_layers.json │ ├── overfitting.json │ ├── overfitting.py │ ├── overfitting1.png │ ├── overfitting2.png │ ├── overfitting3.png │ ├── overfitting4.png │ ├── overfitting_full.json │ ├── overfitting_full.png │ ├── pca_hard_data.png │ ├── pca_hard_data_fit.png │ ├── pca_limitations.py │ ├── regularized.json │ ├── regularized1.png │ ├── regularized2.png │ ├── regularized_full.json │ ├── regularized_full.png │ ├── replaced_by_d3 │ │ ├── README.md │ │ ├── relu.png │ │ ├── relu.py │ │ ├── sigmoid.png │ │ ├── sigmoid.py │ │ ├── step.png │ │ ├── step.py │ │ ├── tanh.png │ │ └── tanh.py │ ├── serialize_images_to_json.py │ ├── test.png │ ├── training_speed_2_layers.png │ ├── training_speed_3_layers.png │ ├── training_speed_4_layers.png │ ├── valley.png │ ├── valley.py │ ├── valley2.png │ ├── valley2.py │ ├── weight_initialization.py │ ├── weight_initialization_100.json │ ├── weight_initialization_100.png │ ├── weight_initialization_30.json │ └── weight_initialization_30.png ├── requirements.txt └── src │ ├── conv.py │ ├── expand_mnist.py │ ├── mnist_average_darkness.py │ ├── mnist_loader.py │ ├── mnist_svm.py │ ├── network.py │ ├── network2.py │ ├── network3.py │ ├── old │ ├── blog │ │ ├── __init__.py │ │ └── common_knowledge.py │ ├── cost_vs_iterations.png │ ├── cost_vs_iterations_trapped.png │ ├── deep_autoencoder.py │ ├── deep_learning.py │ ├── gradient_descent_hack.py │ ├── mnist_100_30_deep_autoencoder.png │ ├── mnist_100_unit_autoencoder.png │ ├── mnist_10_unit_autoencoder.png │ ├── mnist_30_component_pca.png │ ├── mnist_30_unit_autoencoder.png │ ├── mnist_autoencoder.py │ ├── mnist_pca.py │ └── perceptron_learning.py │ ├── run_network.ipynb │ └── run_network.py ├── nn-from-scratch ├── MNIST-loader.ipynb ├── MNIST-nn-SGD-flex_arch.ipynb ├── MNIST-nn-SGD.ipynb ├── MNIST-nn-scipy.ipynb ├── README.md └── data │ ├── gzips │ ├── t10k-images-idx3-ubyte.gz │ ├── t10k-labels-idx1-ubyte.gz │ ├── train-images-idx3-ubyte.gz │ └── train-labels-idx1-ubyte.gz │ └── pickled │ ├── xtest.pickle │ ├── xtrain.pickle │ ├── xval.pickle │ ├── ytest.pickle │ ├── ytrain.pickle │ └── yval.pickle ├── slides ├── 2017-02-07__katya_vasilaky__ridge_regression.pdf ├── 2017-02-07__raphaela_sapire__billion_dollar_AI.pdf ├── 2017-03-06__grant_beyleveld__u_net.pdf ├── 2017-03-27__karl_habermas__CS224d_assignment1.pdf ├── 2017-04-19__claudia_perlich__predictability.pdf ├── 2017-10-17__thomas_balestri__reinforcement_learning.pdf ├── 2017-12-09__keng_laura__RL.pdf ├── 2019-10-16_dmitri_nesterenko__Capsule_Nets.pdf └── 2019-10-16_grant_beyleveld__BERT.pdf ├── weekly-work ├── week1 │ ├── MNIST_for_beginners.ipynb │ ├── MNIST_for_beginners.py │ ├── README.md │ ├── basic_usage.ipynb │ ├── basic_usage.py │ ├── deep_MNIST.ipynb │ ├── deep_MNIST.py │ ├── exercise3.py │ ├── get_started.ipynb │ ├── get_started.py │ └── softmax_vs_convolutional_nn.py ├── week10 │ └── README.md ├── week11 │ ├── README.md │ └── sutskever_et_al_2014__PCA.png ├── week12 │ ├── README.md │ └── img │ │ ├── CNN_feature_map.png │ │ ├── CTC_peaks.png │ │ ├── GRU_gates.png │ │ ├── GRU_shortcut.png │ │ ├── GRU_visualisation.png │ │ ├── LSTM_secret.png │ │ ├── RNN_visualisation.png │ │ ├── RNNs_vs_CNNs.png │ │ ├── are_languages_recursive.png │ │ ├── attention_for_long_sentences_plot.png │ │ ├── attn_hidden_state.png │ │ ├── bilinear_form.png │ │ ├── bldg_on_WVSMs.png │ │ ├── choosing_better_targets.png │ │ ├── choosing_output_targets.png │ │ ├── decoding.png │ │ ├── doubly_attention.png │ │ ├── end_to_end_ASR_as_model.png │ │ ├── global_vs_local.png │ │ ├── heres_the-church_here_are_the_people.png │ │ ├── learned_tree_structure.png │ │ ├── lstm_vs_rnn_127.png │ │ ├── lstm_vs_rnn_32.png │ │ ├── nn_ASR.png │ │ ├── octopus-gan.gif │ │ ├── phrases_in_vector_space.png │ │ ├── recursive_vs_recurrent_NN.png │ │ ├── scoring_attention.png │ │ ├── sentiment_distributions.png │ │ ├── seq2seq_ASR.png │ │ ├── seq2seq_ASR_attn.png │ │ ├── single_layer_CNN.png │ │ ├── traditional_ASR.png │ │ └── what_is_a_convolution.png ├── week13 │ ├── README.md │ └── img │ │ ├── QA_independence.png │ │ ├── SNLI_results.png │ │ ├── SPINN.png │ │ ├── arch_search_2.png │ │ ├── arch_search_3.png │ │ ├── arch_search_4.png │ │ ├── architecture_search.png │ │ ├── b_cubed.png │ │ ├── chunking_training.png │ │ ├── diff_inputs.png │ │ ├── dynamic_memory_network.png │ │ ├── episodic_module.png │ │ ├── harder_questions.png │ │ ├── inference_corpus.png │ │ ├── input_module.png │ │ ├── more_qa_examples.png │ │ ├── obstacle_1.png │ │ ├── obstacle_2.png │ │ ├── pointer_mixture.png │ │ ├── qa_examples.png │ │ ├── question_module.png │ │ ├── semantic_relatedness.png │ │ ├── sharper_attn.png │ │ ├── state_of_the_art.png │ │ ├── tackling_joint_training.png │ │ ├── tennis_Qs.png │ │ ├── touch.txt │ │ ├── tying_word_vectors.png │ │ ├── visual_attn.png │ │ ├── visual_attn_2.png │ │ ├── visual_attn_3.png │ │ ├── where_SPINN_is_better.png │ │ ├── writing_systems.png │ │ ├── ws_2.png │ │ └── ws_3.png ├── week14 │ ├── README.md │ └── img │ │ ├── WnT1.png │ │ ├── WnT2.png │ │ ├── WnT3.png │ │ ├── emmaRL.png │ │ ├── finn1.png │ │ ├── finn1617.png │ │ ├── finn2.png │ │ ├── markovDP.png │ │ ├── oh15.png │ │ ├── silverVenn.png │ │ └── tan14.png ├── week15 │ ├── README.md │ └── img │ │ ├── Q-star.png │ │ ├── Qvalue-fxn.png │ │ ├── atari-case-study.png │ │ ├── atari-case-study2.png │ │ ├── bellman-exn.png │ │ ├── dnn-for-q-learning.png │ │ ├── grid-world-1.png │ │ ├── grid-world-2.png │ │ ├── mdp-defn.png │ │ ├── mdp-process.png │ │ ├── policy-grad-defn.png │ │ ├── q-learning-fxn.png │ │ ├── reinforce-in-axn.png │ │ ├── reinforce-in-axn2.png │ │ ├── value-fxn.png │ │ └── value-itn-algo.png ├── week16 │ └── README.md ├── week17 │ └── README.md ├── week2 │ ├── MNIST_hierarchical_rnn.ipynb │ ├── README.md │ ├── getting_started_with_keras.ipynb │ └── reuters_mlp.ipynb ├── week3 │ └── README.md ├── week4 │ └── README.md ├── week5 │ ├── README.md │ └── network3.ipynb ├── week6 │ └── README.md ├── week7 │ └── README.md ├── week8 │ └── README.md └── week9 │ ├── 02_pros_and_cons_of_counting_vs_w2v.png │ ├── 03_05_GloVe_visualizations_gender.png │ ├── 03_06_GloVe_visualizations_CEO.png │ ├── 03_07_GloVe_visualizations_superlatives.png │ ├── 2017_02_skipgram_diagram.png │ ├── 2017_02_softmax.png │ ├── 2017_02_w2v_dot_products.png │ ├── 2017_02_word2vec_definition.png │ ├── README.md │ ├── fun_glove_expressions.png │ └── w2v_objective_fxn.png └── wiki-resources ├── 3.jpg ├── 5_cropped.jpg ├── IMG_2697.jpeg ├── IMG_5959.JPG ├── IMG_5974.JPG ├── IMG_7624.JPG ├── IMG_7641.JPG ├── IMG_9147.JPG ├── dlsg-dli-authors.jpg ├── dlsg-xvii.jpg ├── gitflow.png ├── jk-at-dlsg-xviii.jpg └── session_XI_crew.JPG /.gitignore: -------------------------------------------------------------------------------- 1 | MNIST_data/ 2 | demos-for-talks/mnist/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # IPython Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # dotenv 82 | .env 83 | 84 | # virtualenv 85 | venv/ 86 | ENV/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | 91 | # Rope project settings 92 | .ropeproject 93 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 jonkrohn 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.org 3 | *.pem 4 | *.pkl 5 | *.pyc 6 | .DS_Store 7 | loc.py 8 | src/ec2 9 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/README.md: -------------------------------------------------------------------------------- 1 | # Code samples for "Neural Networks and Deep Learning" 2 | 3 | This repository contains code samples for my (forthcoming) book on 4 | "Neural Networks and Deep Learning". 5 | 6 | As the code is written to accompany the book, I don't intend to add 7 | new features. However, bug reports are welcome, and you should feel 8 | free to fork and modify the code. 9 | 10 | ## License 11 | 12 | MIT License 13 | 14 | Copyright (c) 2012-2015 Michael Nielsen 15 | 16 | Permission is hereby granted, free of charge, to any person obtaining 17 | a copy of this software and associated documentation files (the 18 | "Software"), to deal in the Software without restriction, including 19 | without limitation the rights to use, copy, modify, merge, publish, 20 | distribute, sublicense, and/or sell copies of the Software, and to 21 | permit persons to whom the Software is furnished to do so, subject to 22 | the following conditions: 23 | 24 | The above copyright notice and this permission notice shall be 25 | included in all copies or substantial portions of the Software. 26 | 27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 31 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 32 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 33 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 34 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/data/mnist.pkl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/data/mnist.pkl.gz -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/backprop_magnitude_nabla.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/backprop_magnitude_nabla.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/backprop_magnitude_nabla.py: -------------------------------------------------------------------------------- 1 | """ 2 | backprop_magnitude_nabla 3 | ~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | Using backprop2 I constructed a 784-30-30-30-30-30-10 network to classify 6 | MNIST data. I ran ten mini-batches of size 100, with eta = 0.01 and 7 | lambda = 0.05, using: 8 | 9 | net.SGD(otd[:1000], 1, 100, 0.01, 0.05, 10 | 11 | I obtained the following norms for the (unregularized) nabla_w for the 12 | respective mini-batches: 13 | 14 | [0.90845722175923671, 2.8852730656073566, 10.696793986223632, 37.75701921183488, 157.7365422527995, 304.43990075227839] 15 | [0.22493835119537842, 0.6555126517964851, 2.6036801277234076, 11.408825365731225, 46.882319190445472, 70.499637502698221] 16 | [0.11935180022357521, 0.19756069137133489, 0.8152794148335869, 3.4590802543293977, 15.470507965493903, 31.032396017142556] 17 | [0.15130005837653659, 0.39687135985664701, 1.4810006139254532, 4.392519005642268, 16.831939776937311, 34.082104455938733] 18 | [0.11594085276308999, 0.17177668061395848, 0.72204558746599512, 3.05062409378366, 14.133001132214286, 29.776204839994385] 19 | [0.10790389807606221, 0.20707152756018626, 0.96348134037828603, 3.9043824079499561, 15.986873430586924, 39.195258080490895] 20 | [0.088613291101645356, 0.129173436407863, 0.4242933114455002, 1.6154682713449411, 7.5451567587160069, 20.180545544006566] 21 | [0.086175380639289575, 0.12571016850457151, 0.44231149185805047, 1.8435833504677326, 7.61973813981073, 19.474539356281781] 22 | [0.095372080184163904, 0.15854489503205446, 0.70244235144444678, 2.6294803575724157, 10.427062019753425, 24.309420272033819] 23 | [0.096453131000155692, 0.13574642196947601, 0.53551377709415471, 2.0247466793066895, 9.4503978546018068, 21.73772148470092] 24 | 25 | Note that results are listed in order of layer. They clearly show how 26 | the magnitude of nabla_w decreases as we go back through layers. 27 | 28 | In this program I take min-batches 7, 8, 9 as representative and plot 29 | them. I omit the results from the first and final layers since they 30 | correspond to 784 input neurons and 10 output neurons, not 30 as in 31 | the other layers, making it difficult to compare results. 32 | 33 | Note that I haven't attempted to preserve the whole workflow here. It 34 | involved some minor hacking around with backprop2, which messed up 35 | that code. That's why I've simply put the results in by hand below. 36 | """ 37 | 38 | # Third-party libraries 39 | import matplotlib.pyplot as plt 40 | 41 | nw1 = [0.129173436407863, 0.4242933114455002, 42 | 1.6154682713449411, 7.5451567587160069] 43 | nw2 = [0.12571016850457151, 0.44231149185805047, 44 | 1.8435833504677326, 7.61973813981073] 45 | nw3 = [0.15854489503205446, 0.70244235144444678, 46 | 2.6294803575724157, 10.427062019753425] 47 | plt.plot(range(1, 5), nw1, "ro-", range(1, 5), nw2, "go-", 48 | range(1, 5), nw3, "bo-") 49 | plt.xlabel('Layer $l$') 50 | plt.ylabel(r"$\Vert\nabla C^l_w\Vert$") 51 | plt.xticks([1, 2, 3, 4]) 52 | plt.show() 53 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/digits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/digits.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/digits_separate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/digits_separate.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/false_minima.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/false_minima.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/false_minima.py: -------------------------------------------------------------------------------- 1 | """ 2 | false_minimum 3 | ~~~~~~~~~~~~~ 4 | 5 | Plots a function of two variables with many false minima.""" 6 | 7 | #### Libraries 8 | # Third party libraries 9 | from matplotlib.ticker import LinearLocator 10 | # Note that axes3d is not explicitly used in the code, but is needed 11 | # to register the 3d plot type correctly 12 | from mpl_toolkits.mplot3d import axes3d 13 | import matplotlib.pyplot as plt 14 | import numpy 15 | 16 | fig = plt.figure() 17 | ax = fig.gca(projection='3d') 18 | X = numpy.arange(-5, 5, 0.1) 19 | Y = numpy.arange(-5, 5, 0.1) 20 | X, Y = numpy.meshgrid(X, Y) 21 | Z = numpy.sin(X)*numpy.sin(Y)+0.2*X 22 | 23 | colortuple = ('w', 'b') 24 | colors = numpy.empty(X.shape, dtype=str) 25 | for x in xrange(len(X)): 26 | for y in xrange(len(Y)): 27 | colors[x, y] = colortuple[(x + y) % 2] 28 | 29 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors, 30 | linewidth=0) 31 | 32 | ax.set_xlim3d(-5, 5) 33 | ax.set_ylim3d(-5, 5) 34 | ax.set_zlim3d(-2, 2) 35 | ax.w_xaxis.set_major_locator(LinearLocator(3)) 36 | ax.w_yaxis.set_major_locator(LinearLocator(3)) 37 | ax.w_zaxis.set_major_locator(LinearLocator(3)) 38 | 39 | plt.show() 40 | 41 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/generate_gradient.py: -------------------------------------------------------------------------------- 1 | """generate_gradient.py 2 | ~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | Use network2 to figure out the average starting values of the gradient 5 | error terms \delta^l_j = \partial C / \partial z^l_j = \partial C / 6 | \partial b^l_j. 7 | 8 | """ 9 | 10 | #### Libraries 11 | # Standard library 12 | import json 13 | import math 14 | import random 15 | import shutil 16 | import sys 17 | sys.path.append("../src/") 18 | 19 | # My library 20 | import mnist_loader 21 | import network2 22 | 23 | # Third-party libraries 24 | import matplotlib.pyplot as plt 25 | import numpy as np 26 | 27 | def main(): 28 | # Load the data 29 | full_td, _, _ = mnist_loader.load_data_wrapper() 30 | td = full_td[:1000] # Just use the first 1000 items of training data 31 | epochs = 500 # Number of epochs to train for 32 | 33 | print "\nTwo hidden layers:" 34 | net = network2.Network([784, 30, 30, 10]) 35 | initial_norms(td, net) 36 | abbreviated_gradient = [ 37 | ag[:6] for ag in get_average_gradient(net, td)[:-1]] 38 | print "Saving the averaged gradient for the top six neurons in each "+\ 39 | "layer.\nWARNING: This will affect the look of the book, so be "+\ 40 | "sure to check the\nrelevant material (early chapter 5)." 41 | f = open("initial_gradient.json", "w") 42 | json.dump(abbreviated_gradient, f) 43 | f.close() 44 | shutil.copy("initial_gradient.json", "../../js/initial_gradient.json") 45 | training(td, net, epochs, "norms_during_training_2_layers.json") 46 | plot_training( 47 | epochs, "norms_during_training_2_layers.json", 2) 48 | 49 | print "\nThree hidden layers:" 50 | net = network2.Network([784, 30, 30, 30, 10]) 51 | initial_norms(td, net) 52 | training(td, net, epochs, "norms_during_training_3_layers.json") 53 | plot_training( 54 | epochs, "norms_during_training_3_layers.json", 3) 55 | 56 | print "\nFour hidden layers:" 57 | net = network2.Network([784, 30, 30, 30, 30, 10]) 58 | initial_norms(td, net) 59 | training(td, net, epochs, 60 | "norms_during_training_4_layers.json") 61 | plot_training( 62 | epochs, "norms_during_training_4_layers.json", 4) 63 | 64 | def initial_norms(training_data, net): 65 | average_gradient = get_average_gradient(net, training_data) 66 | norms = [list_norm(avg) for avg in average_gradient[:-1]] 67 | print "Average gradient for the hidden layers: "+str(norms) 68 | 69 | def training(training_data, net, epochs, filename): 70 | norms = [] 71 | for j in range(epochs): 72 | average_gradient = get_average_gradient(net, training_data) 73 | norms.append([list_norm(avg) for avg in average_gradient[:-1]]) 74 | print "Epoch: %s" % j 75 | net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0) 76 | f = open(filename, "w") 77 | json.dump(norms, f) 78 | f.close() 79 | 80 | def plot_training(epochs, filename, num_layers): 81 | f = open(filename, "r") 82 | norms = json.load(f) 83 | f.close() 84 | fig = plt.figure() 85 | ax = fig.add_subplot(111) 86 | colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"] 87 | for j in range(num_layers): 88 | ax.plot(np.arange(epochs), 89 | [n[j] for n in norms], 90 | color=colors[j], 91 | label="Hidden layer %s" % (j+1,)) 92 | ax.set_xlim([0, epochs]) 93 | ax.grid(True) 94 | ax.set_xlabel('Number of epochs of training') 95 | ax.set_title('Speed of learning: %s hidden layers' % num_layers) 96 | ax.set_yscale('log') 97 | plt.legend(loc="upper right") 98 | fig_filename = "training_speed_%s_layers.png" % num_layers 99 | plt.savefig(fig_filename) 100 | shutil.copy(fig_filename, "../../images/"+fig_filename) 101 | plt.show() 102 | 103 | def get_average_gradient(net, training_data): 104 | nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data] 105 | gradient = list_sum(nabla_b_results) 106 | return [(np.reshape(g, len(g))/len(training_data)).tolist() 107 | for g in gradient] 108 | 109 | def zip_sum(a, b): 110 | return [x+y for (x, y) in zip(a, b)] 111 | 112 | def list_sum(l): 113 | return reduce(zip_sum, l) 114 | 115 | def list_norm(l): 116 | return math.sqrt(sum([x*x for x in l])) 117 | 118 | if __name__ == "__main__": 119 | main() 120 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/initial_gradient.json: -------------------------------------------------------------------------------- 1 | [[-0.003970677333144113, -0.0031684316985881185, 0.008103235909196014, 0.012598010584130365, -0.026465907331998335, 0.0017583319323150341], [0.04152906589960523, 0.044025552524932406, -0.009669682279354514, 0.046736871369353235, 0.03877302528270452, 0.012336459551975156]] -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/misleading_gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/misleading_gradient.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/misleading_gradient.py: -------------------------------------------------------------------------------- 1 | """ 2 | misleading_gradient 3 | ~~~~~~~~~~~~~~~~~~~ 4 | 5 | Plots a function which misleads the gradient descent algorithm.""" 6 | 7 | #### Libraries 8 | # Third party libraries 9 | from matplotlib.ticker import LinearLocator 10 | # Note that axes3d is not explicitly used in the code, but is needed 11 | # to register the 3d plot type correctly 12 | from mpl_toolkits.mplot3d import axes3d 13 | import matplotlib.pyplot as plt 14 | import numpy 15 | 16 | fig = plt.figure() 17 | ax = fig.gca(projection='3d') 18 | X = numpy.arange(-1, 1, 0.025) 19 | Y = numpy.arange(-1, 1, 0.025) 20 | X, Y = numpy.meshgrid(X, Y) 21 | Z = X**2 + 10*Y**2 22 | 23 | colortuple = ('w', 'b') 24 | colors = numpy.empty(X.shape, dtype=str) 25 | for x in xrange(len(X)): 26 | for y in xrange(len(Y)): 27 | colors[x, y] = colortuple[(x + y) % 2] 28 | 29 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors, 30 | linewidth=0) 31 | 32 | ax.set_xlim3d(-1, 1) 33 | ax.set_ylim3d(-1, 1) 34 | ax.set_zlim3d(0, 12) 35 | ax.w_xaxis.set_major_locator(LinearLocator(3)) 36 | ax.w_yaxis.set_major_locator(LinearLocator(3)) 37 | ax.w_zaxis.set_major_locator(LinearLocator(3)) 38 | ax.text(0.05, -1.8, 0, "$w_1$", fontsize=20) 39 | ax.text(1.5, -0.25, 0, "$w_2$", fontsize=20) 40 | ax.text(1.79, 0, 9.62, "$C$", fontsize=20) 41 | 42 | plt.show() 43 | 44 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/misleading_gradient_contours.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/misleading_gradient_contours.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/misleading_gradient_contours.py: -------------------------------------------------------------------------------- 1 | """ 2 | misleading_gradient_contours 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | Plots the contours of the function from misleading_gradient.py""" 6 | 7 | #### Libraries 8 | # Third party libraries 9 | import matplotlib.pyplot as plt 10 | import numpy 11 | 12 | X = numpy.arange(-1, 1, 0.02) 13 | Y = numpy.arange(-1, 1, 0.02) 14 | X, Y = numpy.meshgrid(X, Y) 15 | Z = X**2 + 10*Y**2 16 | 17 | plt.figure() 18 | CS = plt.contour(X, Y, Z, levels=[0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]) 19 | plt.xlabel("$w_1$", fontsize=16) 20 | plt.ylabel("$w_2$", fontsize=16) 21 | plt.show() 22 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/mnist_100_digits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_100_digits.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/mnist_2_and_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_2_and_1.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/mnist_complete_zero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_complete_zero.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/mnist_first_digit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_first_digit.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/mnist_other_features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_other_features.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/mnist_really_bad_images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_really_bad_images.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/mnist_top_left_feature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/mnist_top_left_feature.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/more_data.json: -------------------------------------------------------------------------------- 1 | [69.09, 76.37, 85.29, 88.85, 91.27, 93.24, 94.89, 95.85, 95.97] -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/more_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/more_data.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/more_data.py: -------------------------------------------------------------------------------- 1 | """more_data 2 | ~~~~~~~~~~~~ 3 | 4 | Plot graphs to illustrate the performance of MNIST when different size 5 | training sets are used. 6 | 7 | """ 8 | 9 | # Standard library 10 | import json 11 | import random 12 | import sys 13 | 14 | # My library 15 | sys.path.append('../src/') 16 | import mnist_loader 17 | import network2 18 | 19 | # Third-party libraries 20 | import matplotlib.pyplot as plt 21 | import numpy as np 22 | from sklearn import svm 23 | 24 | # The sizes to use for the different training sets 25 | SIZES = [100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000] 26 | 27 | def main(): 28 | run_networks() 29 | run_svms() 30 | make_plots() 31 | 32 | def run_networks(): 33 | # Make results more easily reproducible 34 | random.seed(12345678) 35 | np.random.seed(12345678) 36 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper() 37 | net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost()) 38 | accuracies = [] 39 | for size in SIZES: 40 | print "\n\nTraining network with data set size %s" % size 41 | net.large_weight_initializer() 42 | num_epochs = 1500000 / size 43 | net.SGD(training_data[:size], num_epochs, 10, 0.5, lmbda = size*0.0001) 44 | accuracy = net.accuracy(validation_data) / 100.0 45 | print "Accuracy was %s percent" % accuracy 46 | accuracies.append(accuracy) 47 | f = open("more_data.json", "w") 48 | json.dump(accuracies, f) 49 | f.close() 50 | 51 | def run_svms(): 52 | svm_training_data, svm_validation_data, svm_test_data \ 53 | = mnist_loader.load_data() 54 | accuracies = [] 55 | for size in SIZES: 56 | print "\n\nTraining SVM with data set size %s" % size 57 | clf = svm.SVC() 58 | clf.fit(svm_training_data[0][:size], svm_training_data[1][:size]) 59 | predictions = [int(a) for a in clf.predict(svm_validation_data[0])] 60 | accuracy = sum(int(a == y) for a, y in 61 | zip(predictions, svm_validation_data[1])) / 100.0 62 | print "Accuracy was %s percent" % accuracy 63 | accuracies.append(accuracy) 64 | f = open("more_data_svm.json", "w") 65 | json.dump(accuracies, f) 66 | f.close() 67 | 68 | def make_plots(): 69 | f = open("more_data.json", "r") 70 | accuracies = json.load(f) 71 | f.close() 72 | f = open("more_data_svm.json", "r") 73 | svm_accuracies = json.load(f) 74 | f.close() 75 | make_linear_plot(accuracies) 76 | make_log_plot(accuracies) 77 | make_combined_plot(accuracies, svm_accuracies) 78 | 79 | def make_linear_plot(accuracies): 80 | fig = plt.figure() 81 | ax = fig.add_subplot(111) 82 | ax.plot(SIZES, accuracies, color='#2A6EA6') 83 | ax.plot(SIZES, accuracies, "o", color='#FFA933') 84 | ax.set_xlim(0, 50000) 85 | ax.set_ylim(60, 100) 86 | ax.grid(True) 87 | ax.set_xlabel('Training set size') 88 | ax.set_title('Accuracy (%) on the validation data') 89 | plt.show() 90 | 91 | def make_log_plot(accuracies): 92 | fig = plt.figure() 93 | ax = fig.add_subplot(111) 94 | ax.plot(SIZES, accuracies, color='#2A6EA6') 95 | ax.plot(SIZES, accuracies, "o", color='#FFA933') 96 | ax.set_xlim(100, 50000) 97 | ax.set_ylim(60, 100) 98 | ax.set_xscale('log') 99 | ax.grid(True) 100 | ax.set_xlabel('Training set size') 101 | ax.set_title('Accuracy (%) on the validation data') 102 | plt.show() 103 | 104 | def make_combined_plot(accuracies, svm_accuracies): 105 | fig = plt.figure() 106 | ax = fig.add_subplot(111) 107 | ax.plot(SIZES, accuracies, color='#2A6EA6') 108 | ax.plot(SIZES, accuracies, "o", color='#2A6EA6', 109 | label='Neural network accuracy (%)') 110 | ax.plot(SIZES, svm_accuracies, color='#FFA933') 111 | ax.plot(SIZES, svm_accuracies, "o", color='#FFA933', 112 | label='SVM accuracy (%)') 113 | ax.set_xlim(100, 50000) 114 | ax.set_ylim(25, 100) 115 | ax.set_xscale('log') 116 | ax.grid(True) 117 | ax.set_xlabel('Training set size') 118 | plt.legend(loc="lower right") 119 | plt.show() 120 | 121 | if __name__ == "__main__": 122 | main() 123 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/more_data_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/more_data_5.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/more_data_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/more_data_comparison.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/more_data_log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/more_data_log.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/more_data_rotated_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/more_data_rotated_5.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/more_data_svm.json: -------------------------------------------------------------------------------- 1 | [25.07, 48.93, 75.13, 83.87, 88.49, 91.46, 92.45, 93.47, 94.48] -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/multiple_eta.json: -------------------------------------------------------------------------------- 1 | [[[], [], [0.87809508908377998, 0.67406552530098141, 0.59798920430275404, 0.55533015743656189, 0.51751101003208144, 0.4942033354556824, 0.47255041042913526, 0.46069879353359433, 0.44304475294352064, 0.43099562372228112, 0.42310993427766375, 0.41408265298981006, 0.40573464183982105, 0.40110722961828227, 0.39162028064538967, 0.38705015774740958, 0.38116357043417587, 0.37603986695304614, 0.37297012040237154, 0.37057334627661631, 0.36551756338853658, 0.36335674264586654, 0.35745296185579917, 0.35535960956849127, 0.35365591135061097, 0.35011353300568238, 0.34946519495897871, 0.34604661988238178, 0.34386077098862522, 0.33919980880230349], []], [[], [], [0.49501954654296704, 0.4063145129425576, 0.40482383242804637, 0.37156577828840276, 0.37380111172151681, 0.37152751786000143, 0.35371985224004426, 0.3557161388797867, 0.34323780090168027, 0.3433514311156789, 0.3367645441708797, 0.34532085892085329, 0.33506383267050244, 0.34760988079085842, 0.34921493732996928, 0.33853424834583179, 0.32837282561262077, 0.33175599401109612, 0.33132920379429243, 0.33024353325326034, 0.32736756892399654, 0.3259638557593546, 0.32004264784244907, 0.33424319076405928, 0.33878125802305081, 0.32521839878261177, 0.32679267619514646, 0.32488571435373748, 0.33056367198473002, 0.33879633130932685], []], [[], [], [0.92489293305102116, 0.83919130289246469, 0.88748421594232696, 0.79625231780396133, 0.78117959228699174, 1.1365919079387048, 0.78787239608336346, 0.76778614131217449, 0.73689525303227721, 0.80127437393519696, 0.74433665287336681, 0.73725544607013882, 0.80249602203179993, 0.85190338199210014, 0.79872168623645712, 0.80243104440756152, 0.80649160680410659, 0.81467254023600921, 0.82526467696100858, 0.75042379852601759, 0.93658673378777402, 0.88236662906752283, 0.86121396033520892, 0.72492681699401829, 0.80405009868466648, 0.83959963179208197, 0.83387510808276821, 0.88282498566307899, 0.88583473645177979, 0.86068501713490919], []]] -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/multiple_eta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/multiple_eta.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/multiple_eta.py: -------------------------------------------------------------------------------- 1 | """multiple_eta 2 | ~~~~~~~~~~~~~~~ 3 | 4 | This program shows how different values for the learning rate affect 5 | training. In particular, we'll plot out how the cost changes using 6 | three different values for eta. 7 | 8 | """ 9 | 10 | # Standard library 11 | import json 12 | import random 13 | import sys 14 | 15 | # My library 16 | sys.path.append('../src/') 17 | import mnist_loader 18 | import network2 19 | 20 | # Third-party libraries 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | 24 | # Constants 25 | LEARNING_RATES = [0.025, 0.25, 2.5] 26 | COLORS = ['#2A6EA6', '#FFCD33', '#FF7033'] 27 | NUM_EPOCHS = 30 28 | 29 | def main(): 30 | run_networks() 31 | make_plot() 32 | 33 | def run_networks(): 34 | """Train networks using three different values for the learning rate, 35 | and store the cost curves in the file ``multiple_eta.json``, where 36 | they can later be used by ``make_plot``. 37 | 38 | """ 39 | # Make results more easily reproducible 40 | random.seed(12345678) 41 | np.random.seed(12345678) 42 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper() 43 | results = [] 44 | for eta in LEARNING_RATES: 45 | print "\nTrain a network using eta = "+str(eta) 46 | net = network2.Network([784, 30, 10]) 47 | results.append( 48 | net.SGD(training_data, NUM_EPOCHS, 10, eta, lmbda=5.0, 49 | evaluation_data=validation_data, 50 | monitor_training_cost=True)) 51 | f = open("multiple_eta.json", "w") 52 | json.dump(results, f) 53 | f.close() 54 | 55 | def make_plot(): 56 | f = open("multiple_eta.json", "r") 57 | results = json.load(f) 58 | f.close() 59 | fig = plt.figure() 60 | ax = fig.add_subplot(111) 61 | for eta, result, color in zip(LEARNING_RATES, results, COLORS): 62 | _, _, training_cost, _ = result 63 | ax.plot(np.arange(NUM_EPOCHS), training_cost, "o-", 64 | label="$\eta$ = "+str(eta), 65 | color=color) 66 | ax.set_xlim([0, NUM_EPOCHS]) 67 | ax.set_xlabel('Epoch') 68 | ax.set_ylabel('Cost') 69 | plt.legend(loc='upper right') 70 | plt.show() 71 | 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/overfitting1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/overfitting1.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/overfitting2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/overfitting2.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/overfitting3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/overfitting3.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/overfitting4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/overfitting4.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/overfitting_full.json: -------------------------------------------------------------------------------- 1 | [[0.56135590058630858, 0.47806921271034553, 0.457510836259925, 0.42504920544144992, 0.39449553344420019, 0.39810448800345, 0.37017079712250733, 0.37403997639944547, 0.36290253019659285, 0.4006868170859208, 0.36817548958488616, 0.37299310675826219, 0.36871967242261605, 0.37146610246666006, 0.35704621996697938, 0.35821464151288968, 0.38622103466509744, 0.37010939716781127, 0.36539832104327125, 0.35511546847032671, 0.3828088676932585, 0.36160025922354638, 0.37028708356461698, 0.37605182846277163, 0.36634313696187393, 0.36129044456360238, 0.37531885586439506, 0.36415225595876555, 0.35707895858237054, 0.36631987373588193], [9136, 9275, 9307, 9377, 9450, 9429, 9468, 9488, 9494, 9424, 9483, 9483, 9505, 9499, 9508, 9508, 9445, 9524, 9524, 9524, 9494, 9527, 9518, 9505, 9533, 9529, 9512, 9530, 9532, 9531], [0.55994588582554705, 0.44664870303435988, 0.42455329174078477, 0.38578320429266705, 0.33992291017592285, 0.33162477096795895, 0.3137480626518645, 0.30028971890544093, 0.27353890048167528, 0.30236927117202678, 0.26487026303889277, 0.2661714884193439, 0.24734280015146709, 0.26355551438395558, 0.23088530423416964, 0.22618350577327287, 0.25137541006767478, 0.23085585354651994, 0.21417931191800957, 0.20049587923059808, 0.23713128948069295, 0.20327728799861464, 0.21953883029836488, 0.20264436321820509, 0.19643949703516961, 0.18467980669870671, 0.18788606162530633, 0.18535916502880764, 0.18466759834259142, 0.17218286758911475], [45708, 46605, 46797, 47190, 47543, 47570, 47638, 47838, 48061, 47825, 48160, 48195, 48265, 48156, 48439, 48449, 48267, 48433, 48598, 48697, 48380, 48648, 48500, 48669, 48734, 48796, 48802, 48837, 48810, 48932]] -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/overfitting_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/overfitting_full.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/pca_hard_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/pca_hard_data.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/pca_hard_data_fit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/pca_hard_data_fit.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/pca_limitations.py: -------------------------------------------------------------------------------- 1 | """ 2 | pca_limitations 3 | ~~~~~~~~~~~~~~~ 4 | 5 | Plot graphs to illustrate the limitations of PCA. 6 | """ 7 | 8 | # Third-party libraries 9 | from mpl_toolkits.mplot3d import Axes3D 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | 13 | # Plot just the data 14 | fig = plt.figure() 15 | ax = fig.gca(projection='3d') 16 | z = np.linspace(-2, 2, 20) 17 | theta = np.linspace(-4 * np.pi, 4 * np.pi, 20) 18 | x = np.sin(theta)+0.03*np.random.randn(20) 19 | y = np.cos(theta)+0.03*np.random.randn(20) 20 | ax.plot(x, y, z, 'ro') 21 | plt.show() 22 | 23 | # Plot the data and the helix together 24 | fig = plt.figure() 25 | ax = fig.gca(projection='3d') 26 | z_helix = np.linspace(-2, 2, 100) 27 | theta_helix = np.linspace(-4 * np.pi, 4 * np.pi, 100) 28 | x_helix = np.sin(theta_helix) 29 | y_helix = np.cos(theta_helix) 30 | ax.plot(x, y, z, 'ro') 31 | ax.plot(x_helix, y_helix, z_helix, '') 32 | plt.show() 33 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/regularized1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/regularized1.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/regularized2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/regularized2.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/regularized_full.json: -------------------------------------------------------------------------------- 1 | [[4.3072791918656037, 2.9331304641086344, 2.1348073553576041, 1.6588303607817259, 1.330889938797851, 1.1963223601928472, 1.1170765304219505, 1.0170754480838433, 0.99110935015398149, 1.0071179800661803, 0.96280080386971378, 0.99226609521675169, 0.96023984363523895, 0.97253784945751276, 0.93966545596520334, 0.95330563342376551, 0.96378529404233837, 0.97367336858037301, 0.94435985290781166, 0.94622931411839994, 0.98392022263201184, 0.94091005661041272, 0.9496551347987412, 0.94714964684453073, 0.95026655456196552, 0.92915894672179755, 0.95831053042987979, 1.0153994919718721, 0.92940339906358749, 0.97682851862658082], [9212, 9341, 9375, 9424, 9532, 9537, 9504, 9541, 9578, 9538, 9579, 9530, 9590, 9543, 9607, 9597, 9576, 9546, 9600, 9634, 9544, 9606, 9614, 9607, 9621, 9637, 9620, 9511, 9649, 9561], [1.2925405259017666, 0.92479539229795305, 0.72611252037165497, 0.61618944188425839, 0.49142410439713557, 0.46552608507795468, 0.46074829841290343, 0.40775149802551902, 0.39671750686791218, 0.42031570708192345, 0.38057096091326847, 0.40768033915334978, 0.3895210257834103, 0.40585871820346864, 0.36003072887701948, 0.37700037701783806, 0.39300003862768451, 0.40774598935627593, 0.37194215157507704, 0.3662415845761452, 0.40722309031673021, 0.36476961463606117, 0.36988528906574514, 0.36112644707329011, 0.380710641602238, 0.35700998663848571, 0.37724740623797381, 0.44991741876110503, 0.35820321110078079, 0.39226034353556583], [45919, 46835, 47204, 47434, 47989, 47930, 47839, 48157, 48218, 48105, 48313, 48089, 48282, 48111, 48463, 48362, 48243, 48123, 48416, 48533, 48123, 48483, 48435, 48548, 48434, 48524, 48417, 47797, 48561, 48235]] -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/regularized_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/regularized_full.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/replaced_by_d3/README.md: -------------------------------------------------------------------------------- 1 | # Replaced by d3 directory 2 | 3 | This directory contains python code which generated png figures which 4 | were later replaced by d3 in the live version of the site. They've 5 | been preserved here on the off chance that they may be of use at some 6 | point in the future. 7 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/replaced_by_d3/relu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/replaced_by_d3/relu.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/replaced_by_d3/relu.py: -------------------------------------------------------------------------------- 1 | """ 2 | relu 3 | ~~~~ 4 | 5 | Plots a graph of the squashing function used by a rectified linear 6 | unit.""" 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | z = np.arange(-2, 2, .1) 12 | zero = np.zeros(len(z)) 13 | y = np.max([zero, z], axis=0) 14 | 15 | fig = plt.figure() 16 | ax = fig.add_subplot(111) 17 | ax.plot(z, y) 18 | ax.set_ylim([-2.0, 2.0]) 19 | ax.set_xlim([-2.0, 2.0]) 20 | ax.grid(True) 21 | ax.set_xlabel('z') 22 | ax.set_title('Rectified linear unit') 23 | 24 | plt.show() 25 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/replaced_by_d3/sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/replaced_by_d3/sigmoid.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/replaced_by_d3/sigmoid.py: -------------------------------------------------------------------------------- 1 | """ 2 | sigmoid 3 | ~~~~~~~ 4 | 5 | Plots a graph of the sigmoid function.""" 6 | 7 | import numpy 8 | import matplotlib.pyplot as plt 9 | 10 | z = numpy.arange(-5, 5, .1) 11 | sigma_fn = numpy.vectorize(lambda z: 1/(1+numpy.exp(-z))) 12 | sigma = sigma_fn(z) 13 | 14 | fig = plt.figure() 15 | ax = fig.add_subplot(111) 16 | ax.plot(z, sigma) 17 | ax.set_ylim([-0.5, 1.5]) 18 | ax.set_xlim([-5,5]) 19 | ax.grid(True) 20 | ax.set_xlabel('z') 21 | ax.set_title('sigmoid function') 22 | 23 | plt.show() 24 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/replaced_by_d3/step.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/replaced_by_d3/step.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/replaced_by_d3/step.py: -------------------------------------------------------------------------------- 1 | """ 2 | step 3 | ~~~~~~~ 4 | 5 | Plots a graph of a step function.""" 6 | 7 | import numpy 8 | import matplotlib.pyplot as plt 9 | 10 | z = numpy.arange(-5, 5, .02) 11 | step_fn = numpy.vectorize(lambda z: 1.0 if z >= 0.0 else 0.0) 12 | step = step_fn(z) 13 | 14 | fig = plt.figure() 15 | ax = fig.add_subplot(111) 16 | ax.plot(z, step) 17 | ax.set_ylim([-0.5, 1.5]) 18 | ax.set_xlim([-5,5]) 19 | ax.grid(True) 20 | ax.set_xlabel('z') 21 | ax.set_title('step function') 22 | 23 | plt.show() 24 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/replaced_by_d3/tanh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/replaced_by_d3/tanh.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/replaced_by_d3/tanh.py: -------------------------------------------------------------------------------- 1 | """ 2 | tanh 3 | ~~~~ 4 | 5 | Plots a graph of the tanh function.""" 6 | 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | z = np.arange(-5, 5, .1) 11 | t = np.tanh(z) 12 | 13 | fig = plt.figure() 14 | ax = fig.add_subplot(111) 15 | ax.plot(z, t) 16 | ax.set_ylim([-1.0, 1.0]) 17 | ax.set_xlim([-5,5]) 18 | ax.grid(True) 19 | ax.set_xlabel('z') 20 | ax.set_title('tanh function') 21 | 22 | plt.show() 23 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/serialize_images_to_json.py: -------------------------------------------------------------------------------- 1 | """ 2 | serialize_images_to_json 3 | ~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | Utility to serialize parts of the training and validation data to JSON, 6 | for use with Javascript. """ 7 | 8 | #### Libraries 9 | # Standard library 10 | import json 11 | import sys 12 | 13 | # My library 14 | sys.path.append('../src/') 15 | import mnist_loader 16 | 17 | # Third-party libraries 18 | import numpy as np 19 | 20 | 21 | # Number of training and validation data images to serialize 22 | NTD = 1000 23 | NVD = 100 24 | 25 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper() 26 | 27 | def make_data_integer(td): 28 | # This will be slow, due to the loop. It'd be better if numpy did 29 | # this directly. But numpy.rint followed by tolist() doesn't 30 | # convert to a standard Python int. 31 | return [int(x) for x in (td*256).reshape(784).tolist()] 32 | 33 | data = {"training": [ 34 | {"x": [x[0] for x in training_data[j][0].tolist()], 35 | "y": [y[0] for y in training_data[j][1].tolist()]} 36 | for j in xrange(NTD)], 37 | "validation": [ 38 | {"x": [x[0] for x in validation_data[j][0].tolist()], 39 | "y": validation_data[j][1]} 40 | for j in xrange(NVD)]} 41 | 42 | f = open("data_1000.json", "w") 43 | json.dump(data, f) 44 | f.close() 45 | 46 | 47 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/test.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/training_speed_2_layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/training_speed_2_layers.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/training_speed_3_layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/training_speed_3_layers.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/training_speed_4_layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/training_speed_4_layers.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/valley.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/valley.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/valley.py: -------------------------------------------------------------------------------- 1 | """ 2 | valley 3 | ~~~~~~ 4 | 5 | Plots a function of two variables to minimize. The function is a 6 | fairly generic valley function.""" 7 | 8 | #### Libraries 9 | # Third party libraries 10 | from matplotlib.ticker import LinearLocator 11 | # Note that axes3d is not explicitly used in the code, but is needed 12 | # to register the 3d plot type correctly 13 | from mpl_toolkits.mplot3d import axes3d 14 | import matplotlib.pyplot as plt 15 | import numpy 16 | 17 | fig = plt.figure() 18 | ax = fig.gca(projection='3d') 19 | X = numpy.arange(-1, 1, 0.1) 20 | Y = numpy.arange(-1, 1, 0.1) 21 | X, Y = numpy.meshgrid(X, Y) 22 | Z = X**2 + Y**2 23 | 24 | colortuple = ('w', 'b') 25 | colors = numpy.empty(X.shape, dtype=str) 26 | for x in xrange(len(X)): 27 | for y in xrange(len(Y)): 28 | colors[x, y] = colortuple[(x + y) % 2] 29 | 30 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors, 31 | linewidth=0) 32 | 33 | ax.set_xlim3d(-1, 1) 34 | ax.set_ylim3d(-1, 1) 35 | ax.set_zlim3d(0, 2) 36 | ax.w_xaxis.set_major_locator(LinearLocator(3)) 37 | ax.w_yaxis.set_major_locator(LinearLocator(3)) 38 | ax.w_zaxis.set_major_locator(LinearLocator(3)) 39 | ax.text(1.79, 0, 1.62, "$C$", fontsize=20) 40 | ax.text(0.05, -1.8, 0, "$v_1$", fontsize=20) 41 | ax.text(1.5, -0.25, 0, "$v_2$", fontsize=20) 42 | 43 | plt.show() 44 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/valley2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/valley2.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/valley2.py: -------------------------------------------------------------------------------- 1 | """valley2.py 2 | ~~~~~~~~~~~~~ 3 | 4 | Plots a function of two variables to minimize. The function is a 5 | fairly generic valley function. 6 | 7 | Note that this is a duplicate of valley.py, but omits labels on the 8 | axis. It's bad practice to duplicate in this way, but I had 9 | considerable trouble getting matplotlib to update a graph in the way I 10 | needed (adding or removing labels), so finally fell back on this as a 11 | kludge solution. 12 | 13 | """ 14 | 15 | #### Libraries 16 | # Third party libraries 17 | from matplotlib.ticker import LinearLocator 18 | # Note that axes3d is not explicitly used in the code, but is needed 19 | # to register the 3d plot type correctly 20 | from mpl_toolkits.mplot3d import axes3d 21 | import matplotlib.pyplot as plt 22 | import numpy 23 | 24 | fig = plt.figure() 25 | ax = fig.gca(projection='3d') 26 | X = numpy.arange(-1, 1, 0.1) 27 | Y = numpy.arange(-1, 1, 0.1) 28 | X, Y = numpy.meshgrid(X, Y) 29 | Z = X**2 + Y**2 30 | 31 | colortuple = ('w', 'b') 32 | colors = numpy.empty(X.shape, dtype=str) 33 | for x in xrange(len(X)): 34 | for y in xrange(len(Y)): 35 | colors[x, y] = colortuple[(x + y) % 2] 36 | 37 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors, 38 | linewidth=0) 39 | 40 | ax.set_xlim3d(-1, 1) 41 | ax.set_ylim3d(-1, 1) 42 | ax.set_zlim3d(0, 2) 43 | ax.w_xaxis.set_major_locator(LinearLocator(3)) 44 | ax.w_yaxis.set_major_locator(LinearLocator(3)) 45 | ax.w_zaxis.set_major_locator(LinearLocator(3)) 46 | ax.text(1.79, 0, 1.62, "$C$", fontsize=20) 47 | 48 | plt.show() 49 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/weight_initialization.py: -------------------------------------------------------------------------------- 1 | """weight_initialization 2 | ~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | This program shows how weight initialization affects training. In 5 | particular, we'll plot out how the classification accuracies improve 6 | using either large starting weights, whose standard deviation is 1, or 7 | the default starting weights, whose standard deviation is 1 over the 8 | square root of the number of input neurons. 9 | 10 | """ 11 | 12 | # Standard library 13 | import json 14 | import random 15 | import sys 16 | 17 | # My library 18 | sys.path.append('../src/') 19 | import mnist_loader 20 | import network2 21 | 22 | # Third-party libraries 23 | import matplotlib.pyplot as plt 24 | import numpy as np 25 | 26 | def main(filename, n, eta): 27 | run_network(filename, n, eta) 28 | make_plot(filename) 29 | 30 | def run_network(filename, n, eta): 31 | """Train the network using both the default and the large starting 32 | weights. Store the results in the file with name ``filename``, 33 | where they can later be used by ``make_plots``. 34 | 35 | """ 36 | # Make results more easily reproducible 37 | random.seed(12345678) 38 | np.random.seed(12345678) 39 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper() 40 | net = network2.Network([784, n, 10], cost=network2.CrossEntropyCost) 41 | print "Train the network using the default starting weights." 42 | default_vc, default_va, default_tc, default_ta \ 43 | = net.SGD(training_data, 30, 10, eta, lmbda=5.0, 44 | evaluation_data=validation_data, 45 | monitor_evaluation_accuracy=True) 46 | print "Train the network using the large starting weights." 47 | net.large_weight_initializer() 48 | large_vc, large_va, large_tc, large_ta \ 49 | = net.SGD(training_data, 30, 10, eta, lmbda=5.0, 50 | evaluation_data=validation_data, 51 | monitor_evaluation_accuracy=True) 52 | f = open(filename, "w") 53 | json.dump({"default_weight_initialization": 54 | [default_vc, default_va, default_tc, default_ta], 55 | "large_weight_initialization": 56 | [large_vc, large_va, large_tc, large_ta]}, 57 | f) 58 | f.close() 59 | 60 | def make_plot(filename): 61 | """Load the results from the file ``filename``, and generate the 62 | corresponding plot. 63 | 64 | """ 65 | f = open(filename, "r") 66 | results = json.load(f) 67 | f.close() 68 | default_vc, default_va, default_tc, default_ta = results[ 69 | "default_weight_initialization"] 70 | large_vc, large_va, large_tc, large_ta = results[ 71 | "large_weight_initialization"] 72 | # Convert raw classification numbers to percentages, for plotting 73 | default_va = [x/100.0 for x in default_va] 74 | large_va = [x/100.0 for x in large_va] 75 | fig = plt.figure() 76 | ax = fig.add_subplot(111) 77 | ax.plot(np.arange(0, 30, 1), large_va, color='#2A6EA6', 78 | label="Old approach to weight initialization") 79 | ax.plot(np.arange(0, 30, 1), default_va, color='#FFA933', 80 | label="New approach to weight initialization") 81 | ax.set_xlim([0, 30]) 82 | ax.set_xlabel('Epoch') 83 | ax.set_ylim([85, 100]) 84 | ax.set_title('Classification accuracy') 85 | plt.legend(loc="lower right") 86 | plt.show() 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/weight_initialization_100.json: -------------------------------------------------------------------------------- 1 | {"default_weight_initialization": [[], [9295, 9481, 9547, 9592, 9664, 9673, 9702, 9719, 9726, 9726, 9732, 9732, 9730, 9734, 9745, 9751, 9757, 9761, 9764, 9766, 9758, 9767, 9756, 9752, 9777, 9775, 9770, 9770, 9771, 9781], [], []], "large_weight_initialization": [[], [8994, 9181, 9260, 9364, 9427, 9449, 9497, 9512, 9560, 9578, 9603, 9616, 9626, 9629, 9644, 9671, 9674, 9679, 9700, 9708, 9707, 9717, 9729, 9720, 9719, 9745, 9751, 9754, 9755, 9742], [], []]} -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/weight_initialization_100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/weight_initialization_100.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/weight_initialization_30.json: -------------------------------------------------------------------------------- 1 | {"default_weight_initialization": [[], [9270, 9414, 9470, 9504, 9537, 9550, 9587, 9594, 9596, 9594, 9616, 9595, 9622, 9630, 9636, 9641, 9625, 9652, 9637, 9634, 9642, 9639, 9649, 9646, 9646, 9653, 9646, 9653, 9640, 9650], [], []], "large_weight_initialization": [[], [8643, 9044, 9141, 9231, 9299, 9327, 9385, 9416, 9433, 9449, 9476, 9489, 9500, 9535, 9521, 9548, 9564, 9573, 9585, 9592, 9596, 9615, 9607, 9605, 9606, 9622, 9637, 9648, 9635, 9637], [], []]} -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/fig/weight_initialization_30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/fig/weight_initialization_30.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scikit-learn 3 | scipy 4 | Theano 5 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/expand_mnist.py: -------------------------------------------------------------------------------- 1 | """expand_mnist.py 2 | ~~~~~~~~~~~~~~~~~~ 3 | 4 | Take the 50,000 MNIST training images, and create an expanded set of 5 | 250,000 images, by displacing each training image up, down, left and 6 | right, by one pixel. Save the resulting file to 7 | ../data/mnist_expanded.pkl.gz. 8 | 9 | Note that this program is memory intensive, and may not run on small 10 | systems. 11 | 12 | """ 13 | 14 | from __future__ import print_function 15 | 16 | #### Libraries 17 | 18 | # Standard library 19 | import cPickle 20 | import gzip 21 | import os.path 22 | import random 23 | 24 | # Third-party libraries 25 | import numpy as np 26 | 27 | print("Expanding the MNIST training set") 28 | 29 | if os.path.exists("../data/mnist_expanded.pkl.gz"): 30 | print("The expanded training set already exists. Exiting.") 31 | else: 32 | f = gzip.open("../data/mnist.pkl.gz", 'rb') 33 | training_data, validation_data, test_data = cPickle.load(f) 34 | f.close() 35 | expanded_training_pairs = [] 36 | j = 0 # counter 37 | for x, y in zip(training_data[0], training_data[1]): 38 | expanded_training_pairs.append((x, y)) 39 | image = np.reshape(x, (-1, 28)) 40 | j += 1 41 | if j % 1000 == 0: print("Expanding image number", j) 42 | # iterate over data telling us the details of how to 43 | # do the displacement 44 | for d, axis, index_position, index in [ 45 | (1, 0, "first", 0), 46 | (-1, 0, "first", 27), 47 | (1, 1, "last", 0), 48 | (-1, 1, "last", 27)]: 49 | new_img = np.roll(image, d, axis) 50 | if index_position == "first": 51 | new_img[index, :] = np.zeros(28) 52 | else: 53 | new_img[:, index] = np.zeros(28) 54 | expanded_training_pairs.append((np.reshape(new_img, 784), y)) 55 | random.shuffle(expanded_training_pairs) 56 | expanded_training_data = [list(d) for d in zip(*expanded_training_pairs)] 57 | print("Saving expanded data. This may take a few minutes.") 58 | f = gzip.open("../data/mnist_expanded.pkl.gz", "w") 59 | cPickle.dump((expanded_training_data, validation_data, test_data), f) 60 | f.close() 61 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/mnist_average_darkness.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_average_darkness 3 | ~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | A naive classifier for recognizing handwritten digits from the MNIST 6 | data set. The program classifies digits based on how dark they are 7 | --- the idea is that digits like "1" tend to be less dark than digits 8 | like "8", simply because the latter has a more complex shape. When 9 | shown an image the classifier returns whichever digit in the training 10 | data had the closest average darkness. 11 | 12 | The program works in two steps: first it trains the classifier, and 13 | then it applies the classifier to the MNIST test data to see how many 14 | digits are correctly classified. 15 | 16 | Needless to say, this isn't a very good way of recognizing handwritten 17 | digits! Still, it's useful to show what sort of performance we get 18 | from naive ideas.""" 19 | 20 | #### Libraries 21 | # Standard library 22 | from collections import defaultdict 23 | 24 | # My libraries 25 | import mnist_loader 26 | 27 | def main(): 28 | training_data, validation_data, test_data = mnist_loader.load_data() 29 | # training phase: compute the average darknesses for each digit, 30 | # based on the training data 31 | avgs = avg_darknesses(training_data) 32 | # testing phase: see how many of the test images are classified 33 | # correctly 34 | num_correct = sum(int(guess_digit(image, avgs) == digit) 35 | for image, digit in zip(test_data[0], test_data[1])) 36 | print "Baseline classifier using average darkness of image." 37 | print "%s of %s values correct." % (num_correct, len(test_data[1])) 38 | 39 | def avg_darknesses(training_data): 40 | """ Return a defaultdict whose keys are the digits 0 through 9. 41 | For each digit we compute a value which is the average darkness of 42 | training images containing that digit. The darkness for any 43 | particular image is just the sum of the darknesses for each pixel.""" 44 | digit_counts = defaultdict(int) 45 | darknesses = defaultdict(float) 46 | for image, digit in zip(training_data[0], training_data[1]): 47 | digit_counts[digit] += 1 48 | darknesses[digit] += sum(image) 49 | avgs = defaultdict(float) 50 | for digit, n in digit_counts.iteritems(): 51 | avgs[digit] = darknesses[digit] / n 52 | return avgs 53 | 54 | def guess_digit(image, avgs): 55 | """Return the digit whose average darkness in the training data is 56 | closest to the darkness of ``image``. Note that ``avgs`` is 57 | assumed to be a defaultdict whose keys are 0...9, and whose values 58 | are the corresponding average darknesses across the training data.""" 59 | darkness = sum(image) 60 | distances = {k: abs(v-darkness) for k, v in avgs.iteritems()} 61 | return min(distances, key=distances.get) 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/mnist_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_loader 3 | ~~~~~~~~~~~~ 4 | 5 | A library to load the MNIST image data. For details of the data 6 | structures that are returned, see the doc strings for ``load_data`` 7 | and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the 8 | function usually called by our neural network code. 9 | """ 10 | 11 | #### Libraries 12 | # Standard library 13 | import cPickle 14 | import gzip 15 | 16 | # Third-party libraries 17 | import numpy as np 18 | 19 | def load_data(): 20 | """Return the MNIST data as a tuple containing the training data, 21 | the validation data, and the test data. 22 | 23 | The ``training_data`` is returned as a tuple with two entries. 24 | The first entry contains the actual training images. This is a 25 | numpy ndarray with 50,000 entries. Each entry is, in turn, a 26 | numpy ndarray with 784 values, representing the 28 * 28 = 784 27 | pixels in a single MNIST image. 28 | 29 | The second entry in the ``training_data`` tuple is a numpy ndarray 30 | containing 50,000 entries. Those entries are just the digit 31 | values (0...9) for the corresponding images contained in the first 32 | entry of the tuple. 33 | 34 | The ``validation_data`` and ``test_data`` are similar, except 35 | each contains only 10,000 images. 36 | 37 | This is a nice data format, but for use in neural networks it's 38 | helpful to modify the format of the ``training_data`` a little. 39 | That's done in the wrapper function ``load_data_wrapper()``, see 40 | below. 41 | """ 42 | f = gzip.open('../data/mnist.pkl.gz', 'rb') 43 | training_data, validation_data, test_data = cPickle.load(f) 44 | f.close() 45 | return (training_data, validation_data, test_data) 46 | 47 | def load_data_wrapper(): 48 | """Return a tuple containing ``(training_data, validation_data, 49 | test_data)``. Based on ``load_data``, but the format is more 50 | convenient for use in our implementation of neural networks. 51 | 52 | In particular, ``training_data`` is a list containing 50,000 53 | 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray 54 | containing the input image. ``y`` is a 10-dimensional 55 | numpy.ndarray representing the unit vector corresponding to the 56 | correct digit for ``x``. 57 | 58 | ``validation_data`` and ``test_data`` are lists containing 10,000 59 | 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional 60 | numpy.ndarry containing the input image, and ``y`` is the 61 | corresponding classification, i.e., the digit values (integers) 62 | corresponding to ``x``. 63 | 64 | Obviously, this means we're using slightly different formats for 65 | the training data and the validation / test data. These formats 66 | turn out to be the most convenient for use in our neural network 67 | code.""" 68 | tr_d, va_d, te_d = load_data() 69 | training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]] 70 | training_results = [vectorized_result(y) for y in tr_d[1]] 71 | training_data = zip(training_inputs, training_results) 72 | validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]] 73 | validation_data = zip(validation_inputs, va_d[1]) 74 | test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]] 75 | test_data = zip(test_inputs, te_d[1]) 76 | return (training_data, validation_data, test_data) 77 | 78 | def vectorized_result(j): 79 | """Return a 10-dimensional unit vector with a 1.0 in the jth 80 | position and zeroes elsewhere. This is used to convert a digit 81 | (0...9) into a corresponding desired output from the neural 82 | network.""" 83 | e = np.zeros((10, 1)) 84 | e[j] = 1.0 85 | return e 86 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/mnist_svm.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_svm 3 | ~~~~~~~~~ 4 | 5 | A classifier program for recognizing handwritten digits from the MNIST 6 | data set, using an SVM classifier.""" 7 | 8 | #### Libraries 9 | # My libraries 10 | import mnist_loader 11 | 12 | # Third-party libraries 13 | from sklearn import svm 14 | 15 | def svm_baseline(): 16 | training_data, validation_data, test_data = mnist_loader.load_data() 17 | # train 18 | clf = svm.SVC() 19 | clf.fit(training_data[0], training_data[1]) 20 | # test 21 | predictions = [int(a) for a in clf.predict(test_data[0])] 22 | num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1])) 23 | print "Baseline classifier using an SVM." 24 | print "%s of %s values correct." % (num_correct, len(test_data[1])) 25 | 26 | if __name__ == "__main__": 27 | svm_baseline() 28 | 29 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/network.py: -------------------------------------------------------------------------------- 1 | """ 2 | network.py 3 | ~~~~~~~~~ 4 | 5 | A module to implement the stochastic gradient descent learning 6 | algorithm for a feedforward neural network. Gradients are calculated 7 | using backpropagation. Note that I have focused on making the code 8 | simple, easily readable, and easily modifiable. It is not optimized, 9 | and omits many desirable features. 10 | """ 11 | 12 | #### Libraries 13 | # Standard library 14 | import random 15 | 16 | # Third-party libraries 17 | import numpy as np 18 | 19 | class Network(object): 20 | 21 | def __init__(self, sizes): 22 | """The list ``sizes`` contains the number of neurons in the 23 | respective layers of the network. For example, if the list 24 | was [2, 3, 1] then it would be a three-layer network, with the 25 | first layer containing 2 neurons, the second layer 3 neurons, 26 | and the third layer 1 neuron. The biases and weights for the 27 | network are initialized randomly, using a Gaussian 28 | distribution with mean 0, and variance 1. Note that the first 29 | layer is assumed to be an input layer, and by convention we 30 | won't set any biases for those neurons, since biases are only 31 | ever used in computing the outputs from later layers.""" 32 | self.num_layers = len(sizes) 33 | self.sizes = sizes 34 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]] 35 | self.weights = [np.random.randn(y, x) 36 | for x, y in zip(sizes[:-1], sizes[1:])] 37 | 38 | def feedforward(self, a): 39 | """Return the output of the network if ``a`` is input.""" 40 | for b, w in zip(self.biases, self.weights): 41 | a = sigmoid(np.dot(w, a)+b) 42 | return a 43 | 44 | def SGD(self, training_data, epochs, mini_batch_size, eta, 45 | test_data=None): 46 | """Train the neural network using mini-batch stochastic 47 | gradient descent. The ``training_data`` is a list of tuples 48 | ``(x, y)`` representing the training inputs and the desired 49 | outputs. The other non-optional parameters are 50 | self-explanatory. If ``test_data`` is provided then the 51 | network will be evaluated against the test data after each 52 | epoch, and partial progress printed out. This is useful for 53 | tracking progress, but slows things down substantially.""" 54 | if test_data: n_test = len(test_data) 55 | n = len(training_data) 56 | for j in xrange(epochs): 57 | random.shuffle(training_data) 58 | mini_batches = [ 59 | training_data[k:k+mini_batch_size] 60 | for k in xrange(0, n, mini_batch_size)] 61 | for mini_batch in mini_batches: 62 | self.update_mini_batch(mini_batch, eta) 63 | if test_data: 64 | print "Epoch {0}: {1} / {2}".format( 65 | j, self.evaluate(test_data), n_test) 66 | else: 67 | print "Epoch {0} complete".format(j) 68 | 69 | def update_mini_batch(self, mini_batch, eta): 70 | """Update the network's weights and biases by applying 71 | gradient descent using backpropagation to a single mini batch. 72 | The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta`` 73 | is the learning rate.""" 74 | nabla_b = [np.zeros(b.shape) for b in self.biases] 75 | nabla_w = [np.zeros(w.shape) for w in self.weights] 76 | for x, y in mini_batch: 77 | delta_nabla_b, delta_nabla_w = self.backprop(x, y) 78 | nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] 79 | nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] 80 | self.weights = [w-(eta/len(mini_batch))*nw 81 | for w, nw in zip(self.weights, nabla_w)] 82 | self.biases = [b-(eta/len(mini_batch))*nb 83 | for b, nb in zip(self.biases, nabla_b)] 84 | 85 | def backprop(self, x, y): 86 | """Return a tuple ``(nabla_b, nabla_w)`` representing the 87 | gradient for the cost function C_x. ``nabla_b`` and 88 | ``nabla_w`` are layer-by-layer lists of numpy arrays, similar 89 | to ``self.biases`` and ``self.weights``.""" 90 | nabla_b = [np.zeros(b.shape) for b in self.biases] 91 | nabla_w = [np.zeros(w.shape) for w in self.weights] 92 | # feedforward 93 | activation = x 94 | activations = [x] # list to store all the activations, layer by layer 95 | zs = [] # list to store all the z vectors, layer by layer 96 | for b, w in zip(self.biases, self.weights): 97 | z = np.dot(w, activation)+b 98 | zs.append(z) 99 | activation = sigmoid(z) 100 | activations.append(activation) 101 | # backward pass 102 | delta = self.cost_derivative(activations[-1], y) * \ 103 | sigmoid_prime(zs[-1]) 104 | nabla_b[-1] = delta 105 | nabla_w[-1] = np.dot(delta, activations[-2].transpose()) 106 | # Note that the variable l in the loop below is used a little 107 | # differently to the notation in Chapter 2 of the book. Here, 108 | # l = 1 means the last layer of neurons, l = 2 is the 109 | # second-last layer, and so on. It's a renumbering of the 110 | # scheme in the book, used here to take advantage of the fact 111 | # that Python can use negative indices in lists. 112 | for l in xrange(2, self.num_layers): 113 | z = zs[-l] 114 | sp = sigmoid_prime(z) 115 | delta = np.dot(self.weights[-l+1].transpose(), delta) * sp 116 | nabla_b[-l] = delta 117 | nabla_w[-l] = np.dot(delta, activations[-l-1].transpose()) 118 | return (nabla_b, nabla_w) 119 | 120 | def evaluate(self, test_data): 121 | """Return the number of test inputs for which the neural 122 | network outputs the correct result. Note that the neural 123 | network's output is assumed to be the index of whichever 124 | neuron in the final layer has the highest activation.""" 125 | test_results = [(np.argmax(self.feedforward(x)), y) 126 | for (x, y) in test_data] 127 | return sum(int(x == y) for (x, y) in test_results) 128 | 129 | def cost_derivative(self, output_activations, y): 130 | """Return the vector of partial derivatives \partial C_x / 131 | \partial a for the output activations.""" 132 | return (output_activations-y) 133 | 134 | #### Miscellaneous functions 135 | def sigmoid(z): 136 | """The sigmoid function.""" 137 | return 1.0/(1.0+np.exp(-z)) 138 | 139 | def sigmoid_prime(z): 140 | """Derivative of the sigmoid function.""" 141 | return sigmoid(z)*(1-sigmoid(z)) 142 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/blog/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/blog/__init__.py -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/blog/common_knowledge.py: -------------------------------------------------------------------------------- 1 | """ 2 | common_knowledge 3 | ~~~~~~~~~~~~~~~~ 4 | 5 | Try to determine whether or not it's possible to relate the 6 | descriptions given by two different autoencoders. 7 | 8 | """ 9 | 10 | #### Libraries 11 | # My libraries 12 | from backprop2 import Network, sigmoid_vec 13 | import mnist_loader 14 | 15 | # Third-party libraries 16 | import matplotlib 17 | import matplotlib.pyplot as plt 18 | import numpy as np 19 | 20 | 21 | #### Parameters 22 | # Size of the training sets. May range from 1000 to 12,500. Lower 23 | # will be faster, higher will give more accuracy. 24 | SIZE = 5000 25 | # Number of hidden units in the autoencoder 26 | HIDDEN = 30 27 | 28 | print "\nGenerating training data" 29 | training_data, _, _ = mnist_loader.load_data_nn() 30 | td_1 = [(x, x) for x, _ in training_data[0:SIZE]] 31 | td_2 = [(x, x) for x, _ in training_data[12500:12500+SIZE]] 32 | td_3 = [x for x, _ in training_data[25000:25000+SIZE]] 33 | test = [x for x, _ in training_data[37500:37500+SIZE]] 34 | 35 | print "\nFinding first autoencoder" 36 | ae_1 = Network([784, HIDDEN, 784]) 37 | ae_1.SGD(td_1, 4, 10, 0.01, 0.05) 38 | 39 | print "\nFinding second autoencoder" 40 | ae_2 = Network([784, HIDDEN, 784]) 41 | ae_2.SGD(td_1, 4, 10, 0.01, 0.05) 42 | 43 | print "\nGenerating encoded training data" 44 | encoded_td_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0]) 45 | for x in td_3] 46 | encoded_td_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0]) 47 | for x in td_3] 48 | encoded_training_data = zip(encoded_td_1, encoded_td_2) 49 | 50 | print "\nFinding mapping between theories" 51 | net = Network([HIDDEN, HIDDEN]) 52 | net.SGD(encoded_training_data, 6, 10, 0.01, 0.05) 53 | 54 | print """\nBaseline for comparison: decompress with the first autoencoder""" 55 | print """and compress with the second autoencoder""" 56 | encoded_test_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0]) 57 | for x in test] 58 | encoded_test_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0]) 59 | for x in test] 60 | test_data = zip(encoded_test_1, encoded_test_2) 61 | net_baseline = Network([HIDDEN, 784, HIDDEN]) 62 | net_baseline.biases[0] = ae_1.biases[1] 63 | net_baseline.weights[0] = ae_1.weights[1] 64 | net_baseline.biases[1] = ae_2.biases[0] 65 | net_baseline.weights[1] = ae_2.weights[0] 66 | error_baseline = sum(np.linalg.norm(net_baseline.feedforward(x)-y, 1) 67 | for (x, y) in test_data) 68 | print "Baseline average l1 error per training image: %s" % (error_baseline / SIZE,) 69 | 70 | print "\nComparing theories with a simple interconversion" 71 | print "Mean desired output activation: %s" % ( 72 | sum(y.mean() for _, y in test_data) / SIZE,) 73 | error = sum(np.linalg.norm(net.feedforward(x)-y, 1) for (x, y) in test_data) 74 | print "Average l1 error per training image: %s" % (error / SIZE,) 75 | 76 | print "\nComputing fiducial image inputs" 77 | fiducial_images_1 = [ 78 | ae_1.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:]) 79 | for j in range(HIDDEN)] 80 | fiducial_images_2 = [ 81 | ae_2.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:]) 82 | for j in range(HIDDEN)] 83 | image = np.concatenate([np.concatenate(fiducial_images_1, axis=1), 84 | np.concatenate(fiducial_images_2, axis=1)]) 85 | fig = plt.figure() 86 | ax = fig.add_subplot(111) 87 | ax.matshow(image, cmap = matplotlib.cm.binary) 88 | plt.xticks(np.array([])) 89 | plt.yticks(np.array([])) 90 | plt.show() 91 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/cost_vs_iterations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/cost_vs_iterations.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/cost_vs_iterations_trapped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/cost_vs_iterations_trapped.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/deep_autoencoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | deep_autoencoder 3 | ~~~~~~~~~~~~~~~~ 4 | 5 | A module which implements deep autoencoders. 6 | """ 7 | 8 | #### Libraries 9 | # Standard library 10 | import random 11 | 12 | # My libraries 13 | from backprop2 import Network, sigmoid_vec 14 | 15 | # Third-party libraries 16 | import numpy as np 17 | 18 | 19 | def plot_helper(x): 20 | import matplotlib 21 | import matplotlib.pyplot as plt 22 | x = np.reshape(x, (-1, 28)) 23 | fig = plt.figure() 24 | ax = fig.add_subplot(1, 1, 1) 25 | ax.matshow(x, cmap = matplotlib.cm.binary) 26 | plt.xticks(np.array([])) 27 | plt.yticks(np.array([])) 28 | plt.show() 29 | 30 | 31 | class DeepAutoencoder(Network): 32 | 33 | def __init__(self, layers): 34 | """ 35 | The list ``layers`` specifies the sizes of the nested 36 | autoencoders. For example, if ``layers`` is [50, 20, 10] then 37 | the deep autoencoder will be a neural network with layers of 38 | size [50, 20, 10, 20, 50].""" 39 | self.layers = layers 40 | Network.__init__(self, layers+layers[-2::-1]) 41 | 42 | def train(self, training_data, epochs, mini_batch_size, eta, 43 | lmbda): 44 | """ 45 | Train the DeepAutoencoder. The ``training_data`` is a list of 46 | training inputs, ``x``, ``mini_batch_size`` is a single 47 | positive integer, and ``epochs``, ``eta``, ``lmbda`` are lists 48 | of parameters, with the different list members corresponding 49 | to the different stages of training. For example, ``eta[0]`` 50 | is the learning rate used for the first nested autoencoder, 51 | ``eta[1]`` is the learning rate for the second nested 52 | autoencoder, and so on. ``eta[-1]`` is the learning rate used 53 | for the final stage of fine-tuning. 54 | """ 55 | print "\nTraining a %s deep autoencoder" % ( 56 | "-".join([str(j) for j in self.sizes]),) 57 | training_data = double(training_data) 58 | cur_training_data = training_data[::] 59 | for j in range(len(self.layers)-1): 60 | print "\nTraining the %s-%s-%s nested autoencoder" % ( 61 | self.layers[j], self.layers[j+1], self.layers[j]) 62 | print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % ( 63 | epochs[j], mini_batch_size, eta[j], lmbda[j]) 64 | self.train_nested_autoencoder( 65 | j, cur_training_data, epochs[j], mini_batch_size, eta[j], 66 | lmbda[j]) 67 | cur_training_data = [ 68 | (sigmoid_vec(np.dot(net.weights[0], x)+net.biases[0]),)*2 69 | for (x, _) in cur_training_data] 70 | print "\nFine-tuning network weights with backpropagation" 71 | print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % ( 72 | epochs[-1], mini_batch_size, eta[-1], lmbda[-1]) 73 | self.SGD(training_data, epochs[-1], mini_batch_size, eta[-1], 74 | lmbda[-1]) 75 | 76 | def train_nested_autoencoder( 77 | self, j, encoded_training_data, epochs, mini_batch_size, eta, lmbda): 78 | """ 79 | Train the nested autoencoder that starts at layer ``j`` in the 80 | deep autoencoder. Note that ``encoded_training_data`` is a 81 | list with entries of the form ``(x, x)``, where the ``x`` are 82 | encoded training inputs for layer ``j``.""" 83 | net = Network([self.layers[j], self.layers[j+1], self.layers[j]]) 84 | net.biases[0] = self.biases[j] 85 | net.biases[1] = self.biases[-j-1] 86 | net.weights[0] = self.weights[j] 87 | net.weights[1] = self.weights[-j-1] 88 | net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda) 89 | self.biases[j] = net.biases[0] 90 | self.biases[-j-1] = net.biases[1] 91 | self.weights[j] = net.weights[0] 92 | self.weights[-j-1] = net.weights[1] 93 | 94 | def train_nested_autoencoder_repl( 95 | self, j, training_data, epochs, mini_batch_size, eta, lmbda): 96 | """ 97 | This is a convenience method that can be used from the REPL to 98 | train the nested autoencoder that starts at level ``j`` in the 99 | deep autoencoder. Note that ``training_data`` is the input 100 | data for the first layer of the network, and is a list of 101 | entries ``x``.""" 102 | self.train_nested_autoencoder( 103 | j, 104 | double( 105 | [self.feedforward(x, start=0, end=j) for x in training_data]), 106 | epochs, mini_batch_size, eta, lmbda) 107 | 108 | def feature(self, j, k): 109 | """ 110 | Return the output if neuron number ``k`` in layer ``j`` is 111 | activated, and all others are not active. """ 112 | a = np.zeros((self.sizes[j], 1)) 113 | a[k] = 1.0 114 | return self.feedforward(a, start=j, end=self.num_layers) 115 | 116 | def double(l): 117 | return [(x, x) for x in l] 118 | 119 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/deep_learning.py: -------------------------------------------------------------------------------- 1 | """ 2 | deep_learning 3 | ~~~~~~~~~~~~~ 4 | 5 | Module to do deep learning. Most of the functionality needed is 6 | already in the ``backprop2`` and ``deep_autoencoder`` modules, but 7 | this adds convenience functions to help in doing things like unrolling 8 | deep autoencoders, and adding and training a classifier layer.""" 9 | 10 | # My Libraries 11 | from backprop2 import Network 12 | from deep_autoencoder import DeepAutoencoder 13 | 14 | def unroll(deep_autoencoder): 15 | """ 16 | Return a Network that contains the compression stage of the 17 | ``deep_autoencoder``.""" 18 | net = Network(deep_autoencoder.layers) 19 | net.weights = deep_autoencoder.weights[:len(deep_autoencoder.layers)-1] 20 | net.biases = deep_autoencoder.biases[:len(deep_autoencoder.layers)-1] 21 | return net 22 | 23 | def add_classifier_layer(net, num_outputs): 24 | """ 25 | Return the Network ``net``, but with an extra layer containing 26 | ``num_outputs`` neurons appended.""" 27 | net_classifier = Network(net.sizes+[num_outputs]) 28 | net_classifier.weights[:-1] = net.weights 29 | net_classifier.biases[:-1] = net.biases 30 | return net_classifier 31 | 32 | def SGD_final_layer( 33 | self, training_data, epochs, mini_batch_size, eta, lmbda): 34 | """ 35 | Run SGD on the final layer of the Network ``self``. Note that 36 | ``training_data`` is the input to the whole Network, not the 37 | encoded training data input to the final layer. 38 | """ 39 | encoded_training_data = [ 40 | (self.feedforward(x, start=0, end=self.num_layers-2), y) 41 | for x, y in training_data] 42 | net = Network(self.sizes[-2:]) 43 | net.biases[0] = self.biases[-1] 44 | net.weights[0] = self.weights[-1] 45 | net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda) 46 | self.biases[-1] = net.biases[0] 47 | self.weights[-1] = net.weights[0] 48 | 49 | 50 | # Add the SGD_final_layer method to the Network class 51 | Network.SGD_final_layer = SGD_final_layer 52 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/gradient_descent_hack.py: -------------------------------------------------------------------------------- 1 | """ 2 | gradient_descent_hack 3 | ~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | This program uses gradient descent to learn weights and biases for a 6 | three-neuron network to compute the XOR function. The program is a 7 | quick-and-dirty hack meant to illustrate the basic ideas of gradient 8 | descent, not a cleanly-designed and generalizable implementation.""" 9 | 10 | #### Libraries 11 | # Third-party libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | 15 | def sigmoid(z): 16 | return 1.0/(1.0+np.exp(-z)) 17 | 18 | def neuron(w, x): 19 | """ Return the output from the sigmoid neuron with weights ``w`` 20 | and inputs ``x``. Both are numpy arrays, with three and two 21 | elements, respectively. The first input weight is the bias.""" 22 | return sigmoid(w[0]+np.inner(w[1:], x)) 23 | 24 | def h(w, x): 25 | """ Return the output from the three-neuron network with weights 26 | ``w`` and inputs ``x``. Note that ``w`` is a numpy array with 27 | nine elements, consisting of three weights for each neuron (the 28 | bias plus two input weights). ``x`` is a numpy array with just 29 | two elements.""" 30 | neuron1_out = neuron(w[0:3], x) # top left neuron 31 | neuron2_out = neuron(w[3:6], x) # bottom left neuron 32 | return neuron(w[6:9], np.array([neuron1_out, neuron2_out])) 33 | 34 | # inputs and corresponding outputs for the function we're computing (XOR) 35 | INPUTS = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]] 36 | OUTPUTS = [0.0, 1.0, 1.0, 0.0] 37 | 38 | def cost(w): 39 | """ Return the cost when the neural network has weights ``w``. 40 | The cost is computed with respect to the XOR function.""" 41 | return 0.5 * sum((y-h(w, np.array(x)))**2 for x, y in zip(INPUTS, OUTPUTS)) 42 | 43 | def partial(f, k, w): 44 | """ Return the partial derivative of the function ``f`` with 45 | respect to the ``k``th variable, at location ``w``. Note that 46 | ``f`` must take a numpy array as input, and the partial derivative 47 | is evaluated with respect to the ``k``th element in that array. 48 | Similarly, ``w`` is a numpy array which can be used as input to 49 | ``f``.""" 50 | w_plus, w_minus = w.copy(), w.copy() 51 | w_plus[k] += 0.01 # using epsilon = 0.01 52 | w_minus[k] += -0.01 53 | return (f(w_plus)-f(w_minus))/0.02 54 | 55 | def gradient_descent(cost, eta, n): 56 | """ Perform ``n`` iterations of the gradient descent algorithm to 57 | minimize the ``cost`` function, with a learning rate ``eta``. 58 | Return a tuple whose first entry is an array containing the final 59 | weights, and whose second entry is a list of the values the 60 | ``cost`` function took at different iterations.""" 61 | w = np.random.uniform(-1, 1, 9) # initialize weights randomly 62 | costs = [] 63 | for j in xrange(n): 64 | c = cost(w) 65 | print "Current cost: {0:.3f}".format(c) 66 | costs.append(c) 67 | gradient = [partial(cost, k, w) for k in xrange(9)] 68 | w = np.array([wt-eta*d for wt, d in zip(w, gradient)]) 69 | return w, costs 70 | 71 | def main(): 72 | """ Perform gradient descent to find weights for a sigmoid neural 73 | network to compute XOR. 10,000 iterations are used. Outputs the 74 | final value of the cost function, the final weights, and plots a 75 | graph of cost as a function of iteration.""" 76 | w, costs = gradient_descent(cost, 0.1, 10000) 77 | print "\nFinal cost: {0:.3f}".format(cost(w)) 78 | print "\nFinal weights: %s" % w 79 | plt.plot(np.array(costs)) 80 | plt.xlabel('iteration') 81 | plt.ylabel('cost') 82 | plt.title('How cost decreases with the number of iterations') 83 | plt.show() 84 | 85 | if __name__ == "__main__": 86 | main() 87 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/mnist_100_30_deep_autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/mnist_100_30_deep_autoencoder.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/mnist_100_unit_autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/mnist_100_unit_autoencoder.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/mnist_10_unit_autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/mnist_10_unit_autoencoder.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/mnist_30_component_pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/mnist_30_component_pca.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/mnist_30_unit_autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/neural-networks-and-deep-learning/src/old/mnist_30_unit_autoencoder.png -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/mnist_autoencoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_autoencoder 3 | ~~~~~~~~~~~~~~~~~ 4 | 5 | Implements an autoencoder for the MNIST data. The program can do two 6 | things: (1) plot the autoencoder's output for the first ten images in 7 | the MNIST test set; and (2) use the autoencoder to build a classifier. 8 | The program is a quick-and-dirty hack --- we'll do things in a more 9 | systematic way in the module ``deep_autoencoder``. 10 | """ 11 | 12 | # My Libraries 13 | from backprop2 import Network 14 | import mnist_loader 15 | 16 | # Third-party libraries 17 | import matplotlib 18 | import matplotlib.pyplot as plt 19 | import numpy as np 20 | 21 | def autoencoder_results(hidden_units): 22 | """ 23 | Train an autoencoder using the MNIST training data and plot the 24 | results when the first ten MNIST test images are passed through 25 | the autoencoder. 26 | """ 27 | training_data, test_inputs, actual_test_results = \ 28 | mnist_loader.load_data_nn() 29 | net = train_autoencoder(hidden_units, training_data) 30 | plot_test_results(net, test_inputs) 31 | 32 | def train_autoencoder(hidden_units, training_data): 33 | "Return a trained autoencoder." 34 | autoencoder_training_data = [(x, x) for x, _ in training_data] 35 | net = Network([784, hidden_units, 784]) 36 | net.SGD(autoencoder_training_data, 6, 10, 0.01, 0.05) 37 | return net 38 | 39 | def plot_test_results(net, test_inputs): 40 | """ 41 | Plot the results after passing the first ten test MNIST digits through 42 | the autoencoder ``net``.""" 43 | fig = plt.figure() 44 | ax = fig.add_subplot(111) 45 | images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)] 46 | images_out = [net.feedforward(test_inputs[j]).reshape(-1, 28) 47 | for j in range(10)] 48 | image_in = np.concatenate(images_in, axis=1) 49 | image_out = np.concatenate(images_out, axis=1) 50 | image = np.concatenate([image_in, image_out]) 51 | ax.matshow(image, cmap = matplotlib.cm.binary) 52 | plt.xticks(np.array([])) 53 | plt.yticks(np.array([])) 54 | plt.show() 55 | 56 | def classifier(hidden_units, n_unlabeled_inputs, n_labeled_inputs): 57 | """ 58 | Train a semi-supervised classifier. We begin with pretraining, 59 | creating an autoencoder which uses ``n_unlabeled_inputs`` from the 60 | MNIST training data. This is then converted into a classifier 61 | which is fine-tuned using the ``n_labeled_inputs``. 62 | 63 | For comparison a classifier is also created which does not make 64 | use of the unlabeled data. 65 | """ 66 | training_data, test_inputs, actual_test_results = \ 67 | mnist_loader.load_data_nn() 68 | print "\nUsing pretraining and %s items of unlabeled data" %\ 69 | n_unlabeled_inputs 70 | net_ae = train_autoencoder(hidden_units, training_data[:n_unlabeled_inputs]) 71 | net_c = Network([784, hidden_units, 10]) 72 | net_c.biases = net_ae.biases[:1]+[np.random.randn(10, 1)/np.sqrt(10)] 73 | net_c.weights = net_ae.weights[:1]+\ 74 | [np.random.randn(10, hidden_units)/np.sqrt(10)] 75 | net_c.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05) 76 | print "Result on test data: %s / %s" % ( 77 | net_c.evaluate(test_inputs, actual_test_results), len(test_inputs)) 78 | print "Training a network with %s items of training data" % n_labeled_inputs 79 | net = Network([784, hidden_units, 10]) 80 | net.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05) 81 | print "Result on test data: %s / %s" % ( 82 | net.evaluate(test_inputs, actual_test_results), len(test_inputs)) 83 | return net_c 84 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/mnist_pca.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_pca 3 | ~~~~~~~~~ 4 | 5 | Use PCA to reconstruct some of the MNIST test digits. 6 | """ 7 | 8 | # My libraries 9 | import mnist_loader 10 | 11 | # Third-party libraries 12 | import matplotlib 13 | import matplotlib.pyplot as plt 14 | import numpy as np 15 | from sklearn.decomposition import RandomizedPCA 16 | 17 | 18 | # Training 19 | training_data, test_inputs, actual_test_results = mnist_loader.load_data_nn() 20 | pca = RandomizedPCA(n_components=30) 21 | nn_images = [x for (x, y) in training_data] 22 | pca_images = np.concatenate(nn_images, axis=1).transpose() 23 | pca_r = pca.fit(pca_images) 24 | 25 | # Try PCA on first ten test images 26 | test_images = np.array(test_inputs[:10]).reshape((10,784)) 27 | test_outputs = pca_r.inverse_transform(pca_r.transform(test_images)) 28 | 29 | # Plot the first ten test images and the corresponding outputs 30 | fig = plt.figure() 31 | ax = fig.add_subplot(111) 32 | images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)] 33 | images_out = [test_outputs[j].reshape(-1, 28) for j in range(10)] 34 | image_in = np.concatenate(images_in, axis=1) 35 | image_out = np.concatenate(images_out, axis=1) 36 | image = np.concatenate([image_in, image_out]) 37 | ax.matshow(image, cmap = matplotlib.cm.binary) 38 | plt.xticks(np.array([])) 39 | plt.yticks(np.array([])) 40 | plt.show() 41 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/old/perceptron_learning.py: -------------------------------------------------------------------------------- 1 | """ 2 | perceptron_learning 3 | ~~~~~~~~~~~~~~~~~~~ 4 | 5 | Demonstrates how a perceptron can learn the NAND gate, using the 6 | perceptron learning algorithm.""" 7 | 8 | #### Libraries 9 | # Third-party library 10 | import numpy as np 11 | 12 | class Perceptron(object): 13 | """ A Perceptron instance can take a function and attempt to 14 | ``learn`` a bias and set of weights that compute that function, 15 | using the perceptron learning algorithm.""" 16 | 17 | def __init__(self, num_inputs=2): 18 | """ Initialize the perceptron with the bias and all weights 19 | set to 0.0. ``num_inputs`` is the number of input bits to the 20 | perceptron.""" 21 | self.num_inputs = num_inputs 22 | self.bias = 0.0 23 | self.weights = np.zeros(num_inputs) 24 | # self.inputs is a convenience attribute. It's a list containing 25 | # all possible binary inputs to the perceptron. E.g., for three 26 | # inputs it is: [np.array([0, 0, 0]), np.array([0, 0, 1]), ...] 27 | self.inputs = [np.array([int(y) 28 | for y in bin(x).lstrip("0b").zfill(num_inputs)]) 29 | for x in xrange(2**num_inputs)] 30 | 31 | def output(self, x): 32 | """ Return the output (0 or 1) from the perceptron, with input 33 | ``x``.""" 34 | return 1 if np.inner(self.weights, x)+self.bias > 0 else 0 35 | 36 | def learn(self, f, eta=0.1): 37 | """ Find a bias and a set of weights for a perceptron that 38 | computes the function ``f``. ``eta`` is the learning rate, and 39 | should be a small positive number. Does not terminate when 40 | the function cannot be computed using a perceptron.""" 41 | # initialize the bias and weights with random values 42 | self.bias = np.random.normal() 43 | self.weights = np.random.randn(self.num_inputs) 44 | number_of_errors = -1 45 | while number_of_errors != 0: 46 | number_of_errors = 0 47 | print "Beginning iteration" 48 | print "Bias: {:.3f}".format(self.bias) 49 | print "Weights:", ", ".join( 50 | "{:.3f}".format(wt) for wt in self.weights) 51 | for x in self.inputs: 52 | error = f(x)-self.output(x) 53 | if error: 54 | number_of_errors += 1 55 | self.bias = self.bias+eta*error 56 | self.weights = self.weights+eta*error*x 57 | print "Number of errors:", number_of_errors, "\n" 58 | 59 | def f(x): 60 | """ Target function for the perceptron learning algorithm. I've 61 | chosen the NAND gate, but any function is okay, with the caveat 62 | that the algorithm won't terminate if ``f`` cannot be computed by 63 | a perceptron.""" 64 | return int(not (x[0] and x[1])) 65 | 66 | if __name__ == "__main__": 67 | Perceptron(2).learn(f, 0.1) 68 | -------------------------------------------------------------------------------- /neural-networks-and-deep-learning/src/run_network.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Network from Nielsen's Chapter 1 5 | # http://neuralnetworksanddeeplearning.com/chap1.html#implementing_our_network_to_classify_digits 6 | 7 | # ## Load MNIST Data 8 | 9 | # In[5]: 10 | 11 | import mnist_loader 12 | 13 | 14 | # In[6]: 15 | 16 | training_data, validation_data, test_data = mnist_loader.load_data_wrapper() 17 | 18 | 19 | # ## Set up Network 20 | 21 | # In[9]: 22 | 23 | import network 24 | 25 | 26 | # In[10]: 27 | 28 | # 784 (28 x 28 pixel images) input neurons; 30 hidden neurons; 10 output neurons 29 | net = network.Network([784, 30, 10]) 30 | 31 | 32 | # ## Train Network 33 | 34 | # In[12]: 35 | 36 | # Use stochastic gradient descent over 30 epochs, with mini-batch size of 10, learning rate of 3.0 37 | net.SGD(training_data, 30, 10, 3.0, test_data=test_data) 38 | 39 | 40 | # ## Exercise: Create network with just two layers 41 | 42 | # In[13]: 43 | 44 | two_layer_net = network.Network([784, 10]) 45 | 46 | 47 | # In[14]: 48 | 49 | two_layer_net.SGD(training_data, 10, 10, 1.0, test_data=test_data) 50 | 51 | 52 | # In[15]: 53 | 54 | two_layer_net.SGD(training_data, 10, 10, 2.0, test_data=test_data) 55 | 56 | 57 | # In[16]: 58 | 59 | two_layer_net.SGD(training_data, 10, 10, 3.0, test_data=test_data) 60 | 61 | 62 | # In[17]: 63 | 64 | two_layer_net.SGD(training_data, 10, 10, 4.0, test_data=test_data) 65 | 66 | 67 | # In[18]: 68 | 69 | two_layer_net.SGD(training_data, 20, 10, 3.0, test_data=test_data) 70 | 71 | 72 | # In[ ]: 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /nn-from-scratch/MNIST-loader.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Loading the MNIST data\n", 8 | "\n", 9 | "This is the MNIST data obtainable at http://yann.lecun.com/exdb/mnist/\n", 10 | "\n", 11 | "The data is supplied as IDX files compressed in gzip format. The code below unzips the data, converts the IDX file to an ndarray, reshapes and one-hot encodes as necessary, scales the data and finally pickles the data for easy loading into the main script.\n", 12 | "\n", 13 | "It's worth noting that the pickled data files are not backward compatible with Python 2.X, so if you haven't yet started using Python 3.X then you should download the gzips yourself and run this script locally to generate Python 2.X compatible pickle files. YMMV.\n", 14 | "\n", 15 | "Finally, the details of the data are available on the website above. But in a nutshell, the training data contains 60 000 images, and the testing data contains 10 000 images. I randomly removed 10 000 of the training data points to set aside as a validation set." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 93, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import pickle\n", 27 | "import gzip\n", 28 | "import idx2numpy\n", 29 | "import numpy as np\n", 30 | "from sklearn.cross_validation import train_test_split\n", 31 | "from sklearn.preprocessing import MinMaxScaler" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 94, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stderr", 43 | "output_type": "stream", 44 | "text": [ 45 | "/home/nobody/anaconda3/lib/python3.5/site-packages/sklearn/utils/validation.py:420: DataConversionWarning: Data with input dtype uint8 was converted to float64 by MinMaxScaler.\n", 46 | " warnings.warn(msg, DataConversionWarning)\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "# Uncompress the gzips and convert the IDX files to ndarray\n", 52 | "with gzip.open('data/gzips/train-images-idx3-ubyte.gz', 'rb') as f:\n", 53 | " xtrain = idx2numpy.convert_from_file(f)\n", 54 | "\n", 55 | "with gzip.open('data/gzips/train-labels-idx1-ubyte.gz', 'rb') as f:\n", 56 | " ytrain = idx2numpy.convert_from_file(f)\n", 57 | "\n", 58 | "# Reshape the images to an [nXm] array\n", 59 | "xtrain = xtrain.reshape(len(xtrain),-1)\n", 60 | "xtrain = MinMaxScaler().fit_transform(xtrain)\n", 61 | "# One-hot encode the y values\n", 62 | "ytrain = np.eye(10)[ytrain].reshape(len(ytrain),10)\n", 63 | "# Seperate out the validation set. Note: the random_state parameter will ensure you get the same results as me.\n", 64 | "xtrain, xval, ytrain, yval = train_test_split(xtrain, ytrain, test_size=10000, random_state=0)\n", 65 | "\n", 66 | "# Write the pickled files for importing easily into other scripts\n", 67 | "with open('data/pickled/xtrain.pickle', 'wb') as f:\n", 68 | " pickle.dump(xtrain, f, pickle.HIGHEST_PROTOCOL)\n", 69 | " \n", 70 | "with open('data/pickled/xval.pickle', 'wb') as f:\n", 71 | " pickle.dump(xval, f, pickle.HIGHEST_PROTOCOL)\n", 72 | "\n", 73 | "with open('data/pickled/ytrain.pickle', 'wb') as f:\n", 74 | " pickle.dump(ytrain, f, pickle.HIGHEST_PROTOCOL)\n", 75 | " \n", 76 | "with open('data/pickled/yval.pickle', 'wb') as f:\n", 77 | " pickle.dump(yval, f, pickle.HIGHEST_PROTOCOL)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 95, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [ 87 | { 88 | "name": "stderr", 89 | "output_type": "stream", 90 | "text": [ 91 | "/home/nobody/anaconda3/lib/python3.5/site-packages/sklearn/utils/validation.py:420: DataConversionWarning: Data with input dtype uint8 was converted to float64 by MinMaxScaler.\n", 92 | " warnings.warn(msg, DataConversionWarning)\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "# As above, but for the test set\n", 98 | "with gzip.open('data/gzips/t10k-images-idx3-ubyte.gz', 'rb') as f:\n", 99 | " xtest = idx2numpy.convert_from_file(f)\n", 100 | " \n", 101 | "with gzip.open('data/gzips/t10k-labels-idx1-ubyte.gz', 'rb') as f:\n", 102 | " ytest = idx2numpy.convert_from_file(f)\n", 103 | "\n", 104 | "xtest = xtest.reshape(len(xtest),-1)\n", 105 | "xtest = MinMaxScaler().fit_transform(xtest)\n", 106 | "ytest = np.eye(10)[ytest].reshape(len(ytest),10)\n", 107 | "\n", 108 | "with open('data/pickled/xtest.pickle', 'wb') as f:\n", 109 | " pickle.dump(xtest, f, pickle.HIGHEST_PROTOCOL)\n", 110 | " \n", 111 | "with open('data/pickled/ytest.pickle', 'wb') as f:\n", 112 | " pickle.dump(ytest, f, pickle.HIGHEST_PROTOCOL)" 113 | ] 114 | } 115 | ], 116 | "metadata": { 117 | "kernelspec": { 118 | "display_name": "Python 3", 119 | "language": "python", 120 | "name": "python3" 121 | }, 122 | "language_info": { 123 | "codemirror_mode": { 124 | "name": "ipython", 125 | "version": 3 126 | }, 127 | "file_extension": ".py", 128 | "mimetype": "text/x-python", 129 | "name": "python", 130 | "nbconvert_exporter": "python", 131 | "pygments_lexer": "ipython3", 132 | "version": "3.5.2" 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 0 137 | } 138 | -------------------------------------------------------------------------------- /nn-from-scratch/README.md: -------------------------------------------------------------------------------- 1 | # NN from scratch 2 | 3 | Update: I wrote a simple SGD version of the original scipy.optimize script, and then I re-wrote that to incoporate a flexible architecture. Also, I found an error in the weigh update part of the code. It is fixed here. 4 | 5 | The purpose here was to write a neural network "from scratch", which is to say without using any of the available libraries. The advantage being deeper understanding of the principles and how they work, the disadvantages being performance, versatility and effort. 6 | 7 | This nn incorporates most of the features we've dealt with so far in the course (that is, up to somewhere in week 3): cross entropy, L2 regularization, and improved weight initialization. 8 | 9 | Note: everything is done in Python 3.X so if you ahven't updated yet, expect some things to break (most obviously, print()). Also, if you're on Python 2.X you'll likely want to look at MNIST-loader.ipynb and pickle your own data. 10 | 11 | MNIST-nn-scipy.ipynb uses the scipy.optimize L_BFGS optimizer to minimize the cost. This is the kind of method that was deployed in the Coursera course I referenced in the top of the file. 12 | 13 | MNIST-nn-SGD.ipynb removes the optimizer in exchange for standard stochastic gradient descent. This more closely matches what we have been studying thus far in the Nielsen textbook and as such it will be where I develop this script further. 14 | 15 | MNIST-nn-flex_arch.ipynb is the above SGD-based algorithm but with modifications for a more flexible architecture. This makes the individual steps of forward and backpropogation slightly more opaque, so if you're looking for ease-of-understanding, look elsewhere. 16 | 17 | Lastly, the MNIST-loader notebook throws warnings about converting uint8 data into float64 during the scaling process. This didn't seem unusual to me. I'm sure I could suppress the warnings, or do the conversion in the array before passing to the scaler. 18 | 19 | The to do list: 20 | - Incoporate gradient descent 21 | - Create more versatility in terms of number of layers, number of neurons per layer 22 | - Incoporate early stopping 23 | - Incoporate a learning rate schedule 24 | - Make use of the validation data (it's sort of ignored in these notebooks for now) -------------------------------------------------------------------------------- /nn-from-scratch/data/gzips/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/nn-from-scratch/data/gzips/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /nn-from-scratch/data/gzips/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/nn-from-scratch/data/gzips/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /nn-from-scratch/data/gzips/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/nn-from-scratch/data/gzips/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /nn-from-scratch/data/gzips/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/nn-from-scratch/data/gzips/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /slides/2017-02-07__katya_vasilaky__ridge_regression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-02-07__katya_vasilaky__ridge_regression.pdf -------------------------------------------------------------------------------- /slides/2017-02-07__raphaela_sapire__billion_dollar_AI.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-02-07__raphaela_sapire__billion_dollar_AI.pdf -------------------------------------------------------------------------------- /slides/2017-03-06__grant_beyleveld__u_net.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-03-06__grant_beyleveld__u_net.pdf -------------------------------------------------------------------------------- /slides/2017-03-27__karl_habermas__CS224d_assignment1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-03-27__karl_habermas__CS224d_assignment1.pdf -------------------------------------------------------------------------------- /slides/2017-04-19__claudia_perlich__predictability.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-04-19__claudia_perlich__predictability.pdf -------------------------------------------------------------------------------- /slides/2017-10-17__thomas_balestri__reinforcement_learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-10-17__thomas_balestri__reinforcement_learning.pdf -------------------------------------------------------------------------------- /slides/2017-12-09__keng_laura__RL.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2017-12-09__keng_laura__RL.pdf -------------------------------------------------------------------------------- /slides/2019-10-16_dmitri_nesterenko__Capsule_Nets.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2019-10-16_dmitri_nesterenko__Capsule_Nets.pdf -------------------------------------------------------------------------------- /slides/2019-10-16_grant_beyleveld__BERT.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/slides/2019-10-16_grant_beyleveld__BERT.pdf -------------------------------------------------------------------------------- /weekly-work/week1/MNIST_for_beginners.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ### MNIST for Beginners 5 | # ### from https://www.tensorflow.org/versions/r0.9/tutorials/mnist/beginners/index.html 6 | 7 | # ### The MNIST Data 8 | 9 | # In[1]: 10 | 11 | # The MNIST Data are hosted on Yann LeCun's website, but made available directly by the TensorFlow team. 12 | from tensorflow.examples.tutorials.mnist import input_data 13 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 14 | 15 | 16 | # ### Implementing Softmax Regression 17 | 18 | # In[2]: 19 | 20 | import tensorflow as tf 21 | 22 | 23 | # In[3]: 24 | 25 | # Assign placeholder to x that will be filled during computation. 26 | # We'll be flattening MNIST images into a 784-dimensional vector, 27 | # represented as a 2-D tensor of floating-point numbers. 28 | x = tf.placeholder(tf.float32, [None, 784]) 29 | 30 | 31 | # In[4]: 32 | 33 | # Assign the model parameters to Variables, which are modifiable tensors 34 | # within a graph of interacting operations. 35 | # Initialize as zeros. 36 | W = tf.Variable(tf.zeros([784, 10])) 37 | b = tf.Variable(tf.zeros([10])) 38 | 39 | 40 | # In[5]: 41 | 42 | # Implementation proper takes only one line. 43 | y = tf.nn.softmax(tf.matmul(x, W) + b) 44 | 45 | 46 | # ### Training 47 | 48 | # In[6]: 49 | 50 | # Assign a placeholder into which we'll be inputting correct answers: 51 | y_ = tf.placeholder(tf.float32, [None, 10]) 52 | 53 | 54 | # In[7]: 55 | 56 | # Implement cross-entropy, which we'll use as the cost function: 57 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) 58 | 59 | 60 | # In[8]: 61 | 62 | # Use gradient descent to minimize cost with learning rate of 0.5. 63 | # The beauty of TensorFlow is that we're effortlessly using backpropagation. 64 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 65 | 66 | 67 | # In[11]: 68 | 69 | # Initialize all variables: 70 | init = tf.initialize_all_variables() 71 | 72 | 73 | # In[12]: 74 | 75 | # Launch the model within a session: 76 | sess = tf.Session() 77 | sess.run(init) 78 | 79 | 80 | # In[15]: 81 | 82 | # Train with one thousand iterations. 83 | # Batches of one hundred random data points are used for stochastic training (i.e., SGD) 84 | for i in range(1000): 85 | batch_xs, batch_ys = mnist.train.next_batch(100) 86 | sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) 87 | 88 | 89 | # 90 | # ### Model Evaluation 91 | 92 | # In[16]: 93 | 94 | # Use argmax to examine whether the most likely predicted label matches reality: 95 | correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) 96 | 97 | 98 | # In[17]: 99 | 100 | # Cast Booleans to floating point numbers and take mean to assess overall accuracy: 101 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 102 | 103 | 104 | # In[18]: 105 | 106 | # Run and output to screen: 107 | print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})) 108 | 109 | 110 | # In[ ]: 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /weekly-work/week1/README.md: -------------------------------------------------------------------------------- 1 | # Week 1 2 | meeting date: *08-17-2016* 3 | 4 | ### Covered 5 | - [Chapter 1](http://neuralnetworksanddeeplearning.com/chap1.html) from Nielsen's Ebook 6 | - [Tensorflow Setup](https://www.tensorflow.org/versions/r0.9/get_started/os_setup.html) 7 | - [Tensoflow intro tutorial](https://www.tensorflow.org/versions/r0.9/tutorials/mnist/beginners/index.html) 8 | 9 | 10 | ### Nielsen Chapter 1 11 | - 2 important artificial neuron 12 | - perceptron 13 | - sigmoid 14 | - Standard learning algo for neural networks: Stochastic Gradient Descent (SGD) 15 | 16 | #### Perceptrons 17 | 18 | - developed in 1950s and 60s 19 | - takes N binary inputs and produces a single binary output 20 | - output is 1 if weighted sum of inputs is > some threshold 21 | - output = `{ 0 if w⋅x + b 0; 1 if w⋅b > 0` 22 | - `bias (b)` is a measure of how easily the neuron "fires" 23 | - Proof 24 | - Perceptrons can simulate circuit of many NAND gates 25 | - NAND gates are universal for computation (we can build any computation out of them) 26 | - => perceptions are universial for computation 27 | - We can devise learning algos that automatically tune the weights and biases of a network of artificial neurons 28 | - this tuning happens in response to external stimuli 29 | - Instead of laying NAND gates out explicitly, neural networks can simply learn to solve problems 30 | 31 | #### Sigmoid neurons 32 | 33 | - Perceptrons are touchy. A small change in weight/bias can lead to drastic changes in outputs 34 | - Segmoid's small changes in weight/bias lead to small output changes 35 | - Inputs are not binary (any real between 0 and 1) 36 | - `w*x + b` is now input into the Sigmoid/Logistic function to get the final output 37 | - Sigmoid is a smoothed out step function (step function correlates to perceptron) 38 | 39 | #### The architecture of neural networks 40 | 41 | - input and output layers with hidden layers in between 42 | - multilayer networks can be called multilayer perceptrons (MLPs), despite containing Sigmoid neurons 43 | - Feedfoward 44 | - output from one layer is input for next (no loops) 45 | - Recurrent 46 | - can have loops 47 | - neurons fire for a limited duration 48 | - less influential so far 49 | - closer in spirit to how the human brain works 50 | 51 | #### Simple network to classify digits 52 | 53 | - 2 parts: digit segmentation and individual digit recognition 54 | - Having good idividual digit recognition allows you to validate segmentation algo, so we'll focus on digit recogintion first 55 | - Digit recognition 56 | - input neurons 784 = 28 x 28 grayscale image pixels 57 | - output neurons 10 and highest activation value corresponds to the digit estimate 58 | 59 | #### Learning with Gradient Descent 60 | 61 | - MNIST dataset 62 | - training: 60,000 handwritten 28 X 28 images from 250 people 63 | - test: 10,000 handwritten 28 X 28 images from 250 other people 64 | - `y = y(x) = (0,0,0,0,0,0,1,0,0,0)T` 65 | - x: 28 X 28 = 784-dime vector of pixel greyvalues 66 | - y: 10-dim vector of digit estimates 67 | - Cost function 68 | - measures network accurracy 69 | - `C(w,b)` closer to 0 => better 70 | - \#images classified correctly is not a smooth function of the weights and biases in the network 71 | - Smooth funciton like quadratic cost is smoother and hence easier to detect improvement of small changes 72 | - We could use calculus to minimize cost function, but that doesn't scale well (could have billions of weights and biases in a NN) 73 | - `Δv=−η∇C` 74 | - η is the learning rate or increment SGD algorithm uses to "descend" and minimize C 75 | - η too big => approximation could not hold and lead to increase in C 76 | - η too small => algorithm is slow 77 | - In practice computing the gradient requires computing individual gradients for each training input 78 | - this is very slow for many inputs 79 | - SGD approximates gradient by averaging indvidual gradients for a samll sample of inputs 80 | - this is called a mini-batch 81 | 82 | #### Implementing our Network 83 | - backpropagation 84 | - fast way of computing gradient of cost function 85 | -------------------------------------------------------------------------------- /weekly-work/week1/basic_usage.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ### from https://www.tensorflow.org/versions/r0.9/get_started/basic_usage.html 5 | 6 | # ## Building the graph 7 | 8 | # In[1]: 9 | 10 | import tensorflow as tf 11 | 12 | 13 | # In[2]: 14 | 15 | # Create a Constant op that produces a 1x2 matrix. The op is 16 | # added as a node to the default graph. 17 | # 18 | # The value returned by the constructor represents the output 19 | # of the Constant op. 20 | matrix1 = tf.constant([[3., 3.]]) 21 | 22 | 23 | # In[3]: 24 | 25 | # Create another Constant that produces a 2x1 matrix. 26 | matrix2 = tf.constant([[2.],[2.]]) 27 | 28 | 29 | # In[4]: 30 | 31 | # Create a Matmul op that takes 'matrix1' and 'matrix2' as inputs. 32 | # The returned value, 'product', represents the result of the matrix 33 | # multiplication. 34 | product = tf.matmul(matrix1, matrix2) 35 | 36 | 37 | # ## Launching the graph in a session 38 | 39 | # In[5]: 40 | 41 | # Launch the default graph. 42 | sess = tf.Session() 43 | 44 | 45 | # In[6]: 46 | 47 | # To run the matmul op we call the session 'run()' method, passing 'product' 48 | # which represents the output of the matmul op. This indicates to the call 49 | # that we want to get the output of the matmul op back. 50 | # 51 | # All inputs needed by the op are run automatically by the session. They 52 | # typically are run in parallel. 53 | # 54 | # The call 'run(product)' thus causes the execution of three ops in the 55 | # graph: the two constants and matmul. 56 | # 57 | # The output of the op is returned in 'result' as a numpy `ndarray` object. 58 | result = sess.run(product) 59 | print(result) 60 | 61 | 62 | # In[7]: 63 | 64 | # Close the Session when we're done to release resources. 65 | sess.close() 66 | 67 | 68 | # ## Alternative session launch with "with" 69 | 70 | # In[8]: 71 | 72 | with tf.Session() as sess: 73 | result = sess.run([product]) 74 | print(result) 75 | 76 | 77 | # In[9]: 78 | 79 | # If you want to use more than GPU, you need to specify this explicitly, 80 | # for which "with" comes in handy: 81 | #with tf.Session() as sess: 82 | # with tf.device("/gpu:1"): # zero-indexed, so this is the second GPU 83 | # matrix1 = tf.constant([[3., 3.]]) 84 | # matrix2 = tf.constant([[2.],[2.]]) 85 | # product = tf.matmul(matrix1, matrix2) 86 | # #etc. 87 | 88 | 89 | # In[10]: 90 | 91 | # "with" also comes in handy for launching the graph in a distributed session, e.g.: 92 | #with tf.Session("http://example.org:2222") as sess: 93 | 94 | 95 | # ## Interactive Usage 96 | 97 | # In[11]: 98 | 99 | # Great for use within IPython notebooks like this one :) 100 | import tensorflow as tf 101 | sess = tf.InteractiveSession() 102 | 103 | 104 | # In[12]: 105 | 106 | x = tf.Variable([1., 2.]) 107 | a = tf.constant([3., 3.]) 108 | 109 | 110 | # In[13]: 111 | 112 | # Initialize x with run() method of initializer op. 113 | x.initializer.run() 114 | 115 | 116 | # In[14]: 117 | 118 | # Add an op to subtract 'a' from 'x'. 119 | sub = tf.sub(x, a) 120 | 121 | 122 | # In[15]: 123 | 124 | # Print result. 125 | print(sub.eval()) 126 | 127 | 128 | # In[16]: 129 | 130 | sess.close() 131 | 132 | 133 | # ## Variables 134 | 135 | # In[18]: 136 | 137 | # Create a Variable, which will be initialized to the scalar zero. 138 | state = tf.Variable(0, name="counter") 139 | 140 | 141 | # In[20]: 142 | 143 | # Create an Op to add one to 'state'. 144 | one = tf.constant(1) 145 | new_value = tf.add(state, one) 146 | update = tf.assign(state, new_value) 147 | 148 | 149 | # In[21]: 150 | 151 | # Initialize variables. 152 | init_op = tf.initialize_all_variables() 153 | 154 | 155 | # In[22]: 156 | 157 | # Launch the grph and run the ops. 158 | with tf.Session() as sess: 159 | # Run the 'init' op. 160 | sess.run(init_op) 161 | # Print the initial value of 'state'. 162 | print(sess.run(state)) 163 | # Run the op that updates 'state' and print 'state'. 164 | for _ in range(3): 165 | sess.run(update) 166 | print(sess.run(state)) 167 | 168 | 169 | # ## Fetches 170 | 171 | # In[23]: 172 | 173 | # To fetch op outputs, execute the graph with a run() call on the Session object 174 | # and pass in the tensors to retrieve. 175 | input1 = tf.constant([3.]) 176 | input2 = tf.constant([2.]) 177 | input3 = tf.constant([5.]) 178 | intermed = tf.add(input2, input3) 179 | mul = tf.mul(input1, intermed) 180 | 181 | with tf.Session() as sess: 182 | result = sess.run([mul, intermed]) 183 | print(result) 184 | 185 | 186 | # ## Feeds 187 | 188 | # In[24]: 189 | 190 | # TensorFlow provides a feed mechanism for patching a tensor directly 191 | # into any operation in the graph. 192 | input1 = tf.placeholder(tf.float32) 193 | input2 = tf.placeholder(tf.float32) 194 | output = tf.mul(input1, input2) 195 | 196 | with tf.Session() as sess: 197 | print(sess.run([output], feed_dict={input1:[7.], input2:[2.]})) 198 | 199 | 200 | # In[ ]: 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /weekly-work/week1/deep_MNIST.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Deep MNIST 5 | 6 | # #### Construct a deep convolutional MNIST classifier 7 | 8 | # #### from https://www.tensorflow.org/versions/r0.9/tutorials/mnist/pros/index.html 9 | 10 | # ## Load MNIST Data 11 | 12 | # In[2]: 13 | 14 | from tensorflow.examples.tutorials.mnist import input_data 15 | 16 | 17 | # In[3]: 18 | 19 | # Load training, validation, and testing sets as NumPy arrays. 20 | mnist = input_data.read_data_sets('MNIST_data', one_hot=True) 21 | 22 | 23 | # ## Start TensorFlow InteractiveSession 24 | 25 | # In[4]: 26 | 27 | # The InteractiveSession class is ideal for IPython notebooks like this one. 28 | # It facilitates flexibility in how you structure your code, 29 | # and you can alternate between operations that build the computation graph 30 | # with those that run that graph. 31 | import tensorflow as tf 32 | sess = tf.InteractiveSession() 33 | 34 | 35 | # ## Build a Softmax Regression Model 36 | 37 | # In[ ]: 38 | 39 | # Build a softmax regression model with a single linear layer. 40 | 41 | 42 | # In[5]: 43 | 44 | # Create placeholder nodes for the input images and target output classes. 45 | x = tf.placeholder(tf.float32, shape=[None, 784]) 46 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 47 | 48 | 49 | # In[6]: 50 | 51 | # Define the weights and biases for the model as Variables. 52 | W = tf.Variable(tf.zeros([784,10])) 53 | b = tf.Variable(tf.zeros([10])) 54 | 55 | 56 | # In[8]: 57 | 58 | # Initialize variables for use in session. 59 | sess.run(tf.initialize_all_variables()) 60 | 61 | 62 | # In[9]: 63 | 64 | # Implement as a softmax regression model. 65 | y = tf.nn.softmax(tf.matmul(x,W) + b) 66 | 67 | 68 | # In[10]: 69 | 70 | # Specify the model's cost function as cross-entropy. 71 | # Use reduce_sum to sum across all classes; reduce_mean to take sum of averages. 72 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) 73 | 74 | 75 | # #### Train the Model 76 | 77 | # In[11]: 78 | 79 | # Select steepest gradient descent, with step length of 0.5, to descend the cross entropy. 80 | # TensorFlow automatically adds operations to: 81 | # - compute gradients 82 | # - compute parameter update steps 83 | # - apply update steps to the parameters 84 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 85 | 86 | 87 | # In[12]: 88 | 89 | # Run train_step to repeatedly apply gradient descent updates to the parameters. 90 | # Each training iteration (batch) loads fifty training examples, 91 | # which feed_dict replaces placeholder tensors x and y_ with. 92 | for i in range(1000): 93 | batch = mnist.train.next_batch(50) 94 | train_step.run(feed_dict={x: batch[0], y_: batch[1]}) 95 | 96 | 97 | # #### Evaluate the Model 98 | 99 | # In[13]: 100 | 101 | # Use arg_max to identify the label that the model thinks is most likely for each input. 102 | correct_prediction = tf.equal(tf.arg_max(y,1), tf.arg_max(y_,1)) 103 | 104 | 105 | # In[14]: 106 | 107 | # Convert booleans to floating point numbers. 108 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 109 | 110 | 111 | # In[15]: 112 | 113 | # Evaluate and print to screen. 114 | print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels})) 115 | 116 | 117 | # In[ ]: 118 | 119 | # 90.92% classification accuracy. We can do better. 120 | 121 | 122 | # # Build a Multilayer Convolutional Network 123 | 124 | # #### Weight Initialization 125 | 126 | # In[16]: 127 | 128 | def weight_variable(shape): 129 | initial = tf.truncated_normal(shape, stddev=0.1) 130 | return tf.Variable(initial) 131 | 132 | 133 | # In[17]: 134 | 135 | def bias_variable(shape): 136 | initial = tf.constant(0.1, shape=shape) 137 | return tf.Variable(initial) 138 | 139 | 140 | # #### Convolution and Pooling 141 | 142 | # In[18]: 143 | 144 | def conv2d(x, W): 145 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 146 | 147 | 148 | # In[19]: 149 | 150 | def max_pool_2x2(x): 151 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 152 | 153 | 154 | # #### First Convolutional Layer 155 | 156 | # In[20]: 157 | 158 | W_conv1 = weight_variable([5, 5, 1, 32]) 159 | b_conv1 = bias_variable([32]) 160 | 161 | 162 | # In[21]: 163 | 164 | x_image = tf.reshape(x, [-1,28,28,1]) 165 | 166 | 167 | # In[22]: 168 | 169 | h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) 170 | h_pool1 = max_pool_2x2(h_conv1) 171 | 172 | 173 | # #### Second Convolutional Layer 174 | 175 | # In[23]: 176 | 177 | W_conv2 = weight_variable([5, 5, 32, 64]) 178 | b_conv2 = bias_variable([64]) 179 | 180 | 181 | # In[24]: 182 | 183 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) 184 | h_pool2 = max_pool_2x2(h_conv2) 185 | 186 | 187 | # #### Densely Connected Layer 188 | 189 | # In[25]: 190 | 191 | W_fc1 = weight_variable([7 * 7 * 64, 1024]) 192 | b_fc1 = bias_variable([1024]) 193 | 194 | 195 | # In[26]: 196 | 197 | h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) 198 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) 199 | 200 | 201 | # In[28]: 202 | 203 | # Apply dropout before readout layer to reduce overfitting. 204 | keep_prob = tf.placeholder(tf.float32) 205 | h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) 206 | 207 | 208 | # #### Readout Layer 209 | 210 | # In[29]: 211 | 212 | W_fc2 = weight_variable([1024, 10]) 213 | b_fc2 = bias_variable([10]) 214 | 215 | 216 | # In[30]: 217 | 218 | y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) 219 | 220 | 221 | # #### Train and Evaluate the Model 222 | 223 | # In[34]: 224 | 225 | # Use ADAM optimizer instead of steepest gradient descent. 226 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) 227 | train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 228 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) 229 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 230 | sess.run(tf.initialize_all_variables()) 231 | for i in range(20000): 232 | batch = mnist.train.next_batch(50) 233 | if i%100 == 0: 234 | train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0}) 235 | print("step %d, training accuracy %g"%(i, train_accuracy)) 236 | train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) 237 | 238 | 239 | # In[35]: 240 | 241 | print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})) 242 | 243 | 244 | # In[ ]: 245 | 246 | 247 | 248 | -------------------------------------------------------------------------------- /weekly-work/week1/exercise3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | inputs = { 5 | 0: np.array([0.990, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009]), 6 | 1: np.array([0.009, 0.990, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009]), 7 | 2: np.array([0.009, 0.009, 0.990, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009]), 8 | 3: np.array([0.009, 0.009, 0.009, 0.990, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009]), 9 | 4: np.array([0.009, 0.009, 0.009, 0.009, 0.990, 0.009, 0.009, 0.009, 0.009, 0.009]), 10 | 5: np.array([0.009, 0.009, 0.009, 0.009, 0.009, 0.990, 0.009, 0.009, 0.009, 0.009]), 11 | 6: np.array([0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.990, 0.009, 0.009, 0.009]), 12 | 7: np.array([0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.990, 0.009, 0.009]), 13 | 8: np.array([0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.990, 0.009]), 14 | 9: np.array([0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.009, 0.990]) 15 | } 16 | 17 | expected_outputs = { 18 | 0: "0000", 19 | 1: "0001", 20 | 2: "0010", 21 | 3: "0011", 22 | 4: "0100", 23 | 5: "0101", 24 | 6: "0110", 25 | 7: "0111", 26 | 8: "1000", 27 | 9: "1001" 28 | } 29 | 30 | 31 | def sigmoid(x): 32 | return 1 / (1 + math.exp(-x)) 33 | 34 | 35 | def evaluate(W, B): 36 | """ 37 | Evalute whether Neuron weights and biases generate 38 | expected output for each digit from 0 -> 9 39 | """ 40 | for n in xrange(10): 41 | print "Determining if neurons produce proper bitwise representntation for: {}".format(n) 42 | 43 | neuron_zs = [np.dot(inputs[n], w) - b for w, b in zip(W, B)] 44 | neuron_outputs = [sigmoid(z) for z in neuron_zs] 45 | bitwise_string = ''.join([str(int(round(x))) for x in neuron_outputs]) 46 | expected = expected_outputs[n] 47 | 48 | print " neuron output: {}".format(bitwise_string) 49 | print " expected output: {}".format(expected) 50 | assert(bitwise_string == expected) 51 | print " correct!" 52 | 53 | 54 | def main(): 55 | # Neuron weights and biases that should be tuned to generate expected output 56 | w0, b0 = [-10, -10, -10, -10, -10, -10, -10, -10, 10, 10], 0 57 | w1, b1 = [-10, -10, -10, -10, 10, 10, 10, 10, -10, -10], 0 58 | w2, b2 = [-10, -10, 10, 10, -10, -10, 10, 10, -10, -10], 0 59 | w3, b3 = [-10, 10, -10, 10, -10, 10, -10, 10, -10, 10], 0 60 | 61 | W = np.array([w0, w1, w2, w3]) 62 | B = np.array([b0, b1, b2, b3]) 63 | 64 | evaluate(W, B) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /weekly-work/week1/get_started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TensorFlow Getting Started Tutorial" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "#### from https://www.tensorflow.org/versions/r0.10/get_started/basic_usage.html#interactive-usage" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import tensorflow as tf" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "sess = tf.InteractiveSession()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "x = tf.Variable([1.0, 2.0])\n", 48 | "a = tf.constant([3.0, 3.0])" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "# Initialize 'x' using the run() method of its initializer op.\n", 60 | "x.initializer.run()" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 5, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "[-2. -1.]\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "# Add an op to subtract 'a' from 'x'. Run it and print the result\n", 80 | "sub = tf.sub(x, a)\n", 81 | "print(sub.eval())" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": true 89 | }, 90 | "outputs": [], 91 | "source": [] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "#### from https://www.tensorflow.org/versions/r0.9/get_started/index.html" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 6, 103 | "metadata": { 104 | "collapsed": true 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "import numpy as np" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 7, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "# Create 100 phony x, y data points in NumPy, y = x * 0.1 + 0.3\n", 120 | "x_data = np.random.rand(100).astype(np.float32)\n", 121 | "y_data = x_data * 0.1 + 0.3" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "# Try to find values for W and b that compute y_data = W * x_data + b\n", 133 | "# (We know that W should be 0.1 and b 0.3, but Tensorflow will\n", 134 | "# figure that out for us.)\n", 135 | "W = tf.Variable(tf.random_uniform([1], -1.0, 1.0))\n", 136 | "b = tf.Variable(tf.zeros([1]))\n", 137 | "y = W * x_data + b" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 9, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "# Minimize the mean squared errors\n", 149 | "loss = tf.reduce_mean(tf.square(y - y_data))\n", 150 | "optimizer = tf.train.GradientDescentOptimizer(0.5)\n", 151 | "train = optimizer.minimize(loss)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 10, 157 | "metadata": { 158 | "collapsed": true 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "# Before starting, initialize the variables. We will 'run' this first. \n", 163 | "init = tf.initialize_all_variables()" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 11, 169 | "metadata": { 170 | "collapsed": true 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "# Launch the graph.\n", 175 | "sess = tf.Session()\n", 176 | "sess.run(init)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 12, 182 | "metadata": { 183 | "collapsed": false 184 | }, 185 | "outputs": [ 186 | { 187 | "name": "stdout", 188 | "output_type": "stream", 189 | "text": [ 190 | "(0, array([ 0.56993598], dtype=float32), array([ 0.07380156], dtype=float32))\n", 191 | "(20, array([ 0.25244093], dtype=float32), array([ 0.22169706], dtype=float32))\n", 192 | "(40, array([ 0.15010804], dtype=float32), array([ 0.27426147], dtype=float32))\n", 193 | "(60, array([ 0.11647075], dtype=float32), array([ 0.29153964], dtype=float32))\n", 194 | "(80, array([ 0.10541401], dtype=float32), array([ 0.29721904], dtype=float32))\n", 195 | "(100, array([ 0.1017796], dtype=float32), array([ 0.29908589], dtype=float32))\n", 196 | "(120, array([ 0.10058497], dtype=float32), array([ 0.29969954], dtype=float32))\n", 197 | "(140, array([ 0.10019229], dtype=float32), array([ 0.29990125], dtype=float32))\n", 198 | "(160, array([ 0.10006322], dtype=float32), array([ 0.29996753], dtype=float32))\n", 199 | "(180, array([ 0.1000208], dtype=float32), array([ 0.29998934], dtype=float32))\n", 200 | "(200, array([ 0.10000685], dtype=float32), array([ 0.2999965], dtype=float32))\n" 201 | ] 202 | } 203 | ], 204 | "source": [ 205 | "# Fit the line.\n", 206 | "for step in range(201):\n", 207 | " sess.run(train)\n", 208 | " if step % 20 == 0:\n", 209 | " print(step, sess.run(W), sess.run(b))" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": { 216 | "collapsed": true 217 | }, 218 | "outputs": [], 219 | "source": [] 220 | } 221 | ], 222 | "metadata": { 223 | "kernelspec": { 224 | "display_name": "Python 2", 225 | "language": "python", 226 | "name": "python2" 227 | }, 228 | "language_info": { 229 | "codemirror_mode": { 230 | "name": "ipython", 231 | "version": 2 232 | }, 233 | "file_extension": ".py", 234 | "mimetype": "text/x-python", 235 | "name": "python", 236 | "nbconvert_exporter": "python", 237 | "pygments_lexer": "ipython2", 238 | "version": "2.7.11" 239 | } 240 | }, 241 | "nbformat": 4, 242 | "nbformat_minor": 0 243 | } 244 | -------------------------------------------------------------------------------- /weekly-work/week1/get_started.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # TensorFlow Getting Started Tutorial 5 | 6 | # #### from https://www.tensorflow.org/versions/r0.10/get_started/basic_usage.html#interactive-usage 7 | 8 | # In[1]: 9 | 10 | import tensorflow as tf 11 | 12 | 13 | # In[2]: 14 | 15 | sess = tf.InteractiveSession() 16 | 17 | 18 | # In[3]: 19 | 20 | x = tf.Variable([1.0, 2.0]) 21 | a = tf.constant([3.0, 3.0]) 22 | 23 | 24 | # In[4]: 25 | 26 | # Initialize 'x' using the run() method of its initializer op. 27 | x.initializer.run() 28 | 29 | 30 | # In[5]: 31 | 32 | # Add an op to subtract 'a' from 'x'. Run it and print the result 33 | sub = tf.sub(x, a) 34 | print(sub.eval()) 35 | 36 | 37 | # In[ ]: 38 | 39 | 40 | 41 | 42 | # #### from https://www.tensorflow.org/versions/r0.9/get_started/index.html 43 | 44 | # In[6]: 45 | 46 | import numpy as np 47 | 48 | 49 | # In[7]: 50 | 51 | # Create 100 phony x, y data points in NumPy, y = x * 0.1 + 0.3 52 | x_data = np.random.rand(100).astype(np.float32) 53 | y_data = x_data * 0.1 + 0.3 54 | 55 | 56 | # In[8]: 57 | 58 | # Try to find values for W and b that compute y_data = W * x_data + b 59 | # (We know that W should be 0.1 and b 0.3, but Tensorflow will 60 | # figure that out for us.) 61 | W = tf.Variable(tf.random_uniform([1], -1.0, 1.0)) 62 | b = tf.Variable(tf.zeros([1])) 63 | y = W * x_data + b 64 | 65 | 66 | # In[9]: 67 | 68 | # Minimize the mean squared errors 69 | loss = tf.reduce_mean(tf.square(y - y_data)) 70 | optimizer = tf.train.GradientDescentOptimizer(0.5) 71 | train = optimizer.minimize(loss) 72 | 73 | 74 | # In[10]: 75 | 76 | # Before starting, initialize the variables. We will 'run' this first. 77 | init = tf.initialize_all_variables() 78 | 79 | 80 | # In[11]: 81 | 82 | # Launch the graph. 83 | sess = tf.Session() 84 | sess.run(init) 85 | 86 | 87 | # In[12]: 88 | 89 | # Fit the line. 90 | for step in range(201): 91 | sess.run(train) 92 | if step % 20 == 0: 93 | print(step, sess.run(W), sess.run(b)) 94 | 95 | 96 | # In[ ]: 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /weekly-work/week1/softmax_vs_convolutional_nn.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.examples.tutorials.mnist import input_data 3 | 4 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 5 | 6 | 7 | def softmax(): 8 | # Model variables 9 | x = tf.placeholder(tf.float32, shape=[None, 784]) 10 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 11 | W = tf.Variable(tf.zeros([784, 10])) 12 | b = tf.Variable(tf.zeros([10])) 13 | y = tf.nn.softmax(tf.matmul(x, W) + b) 14 | 15 | # Setup cost function and Gradient Descent Optimizer 16 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) 17 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 18 | 19 | # Setup input variables and session 20 | sess = tf.InteractiveSession() 21 | sess.run(tf.initialize_all_variables()) 22 | 23 | for i in xrange(1000): 24 | if (i + 1) % 100 == 0: 25 | print "training step {}".format(i + 1) 26 | batch_xs, batch_ys = mnist.train.next_batch(50) 27 | train_step.run(feed_dict={x: batch_xs, y_: batch_ys}) 28 | 29 | # Evaluate model 30 | correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) 31 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 32 | print "Test accuracy: {}".format(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels})) 33 | 34 | 35 | def weight_variable(shape): 36 | initial = tf.truncated_normal(shape, stddev=0.1) 37 | return tf.Variable(initial) 38 | 39 | 40 | def bias_variable(shape): 41 | initial = tf.constant(0.1, shape=shape) 42 | return tf.Variable(initial) 43 | 44 | 45 | def conv2d(x, W): 46 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 47 | 48 | 49 | def max_pool_2x2(x): 50 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 51 | 52 | 53 | def convolutional_network(): 54 | # Model variables 55 | x = tf.placeholder(tf.float32, shape=[None, 784]) 56 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 57 | 58 | # Layer 1 59 | W_conv1 = weight_variable([5, 5, 1, 32]) 60 | b_conv1 = bias_variable([32]) 61 | 62 | x_image = tf.reshape(x, [-1, 28, 28, 1]) 63 | 64 | h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) 65 | h_pool1 = max_pool_2x2(h_conv1) 66 | 67 | # Layer 2 68 | W_conv2 = weight_variable([5, 5, 32, 64]) 69 | b_conv2 = bias_variable([64]) 70 | 71 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) 72 | h_pool2 = max_pool_2x2(h_conv2) 73 | 74 | # Layer 3 75 | W_fc1 = weight_variable([7 * 7 * 64, 1024]) 76 | b_fc1 = bias_variable([1024]) 77 | 78 | h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) 79 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) 80 | 81 | # Dropout 82 | keep_prob = tf.placeholder(tf.float32) 83 | h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) 84 | 85 | # Softmax readout layer 86 | W_fc2 = weight_variable([1024, 10]) 87 | b_fc2 = bias_variable([10]) 88 | 89 | y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) 90 | 91 | # Evaluate model 92 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) 93 | train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 94 | correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) 95 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 96 | 97 | # Setup input variables and session 98 | sess = tf.InteractiveSession() 99 | sess.run(tf.initialize_all_variables()) 100 | 101 | for i in xrange(20000): 102 | batch = mnist.train.next_batch(50) 103 | if i % 100 == 0: 104 | train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0}) 105 | print("step %d, training accuracy %g" % (i, train_accuracy)) 106 | train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) 107 | 108 | print("Test accuracy: %g" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})) 109 | 110 | 111 | def main(): 112 | print "\nRunning softmax model..." 113 | softmax() 114 | 115 | print "\nRunning convolutional neural network..." 116 | convolutional_network() 117 | 118 | if __name__ == '__main__': 119 | main() 120 | -------------------------------------------------------------------------------- /weekly-work/week11/sutskever_et_al_2014__PCA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week11/sutskever_et_al_2014__PCA.png -------------------------------------------------------------------------------- /weekly-work/week12/img/CNN_feature_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/CNN_feature_map.png -------------------------------------------------------------------------------- /weekly-work/week12/img/CTC_peaks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/CTC_peaks.png -------------------------------------------------------------------------------- /weekly-work/week12/img/GRU_gates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/GRU_gates.png -------------------------------------------------------------------------------- /weekly-work/week12/img/GRU_shortcut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/GRU_shortcut.png -------------------------------------------------------------------------------- /weekly-work/week12/img/GRU_visualisation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/GRU_visualisation.png -------------------------------------------------------------------------------- /weekly-work/week12/img/LSTM_secret.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/LSTM_secret.png -------------------------------------------------------------------------------- /weekly-work/week12/img/RNN_visualisation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/RNN_visualisation.png -------------------------------------------------------------------------------- /weekly-work/week12/img/RNNs_vs_CNNs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/RNNs_vs_CNNs.png -------------------------------------------------------------------------------- /weekly-work/week12/img/are_languages_recursive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/are_languages_recursive.png -------------------------------------------------------------------------------- /weekly-work/week12/img/attention_for_long_sentences_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/attention_for_long_sentences_plot.png -------------------------------------------------------------------------------- /weekly-work/week12/img/attn_hidden_state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/attn_hidden_state.png -------------------------------------------------------------------------------- /weekly-work/week12/img/bilinear_form.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/bilinear_form.png -------------------------------------------------------------------------------- /weekly-work/week12/img/bldg_on_WVSMs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/bldg_on_WVSMs.png -------------------------------------------------------------------------------- /weekly-work/week12/img/choosing_better_targets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/choosing_better_targets.png -------------------------------------------------------------------------------- /weekly-work/week12/img/choosing_output_targets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/choosing_output_targets.png -------------------------------------------------------------------------------- /weekly-work/week12/img/decoding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/decoding.png -------------------------------------------------------------------------------- /weekly-work/week12/img/doubly_attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/doubly_attention.png -------------------------------------------------------------------------------- /weekly-work/week12/img/end_to_end_ASR_as_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/end_to_end_ASR_as_model.png -------------------------------------------------------------------------------- /weekly-work/week12/img/global_vs_local.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/global_vs_local.png -------------------------------------------------------------------------------- /weekly-work/week12/img/heres_the-church_here_are_the_people.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/heres_the-church_here_are_the_people.png -------------------------------------------------------------------------------- /weekly-work/week12/img/learned_tree_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/learned_tree_structure.png -------------------------------------------------------------------------------- /weekly-work/week12/img/lstm_vs_rnn_127.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/lstm_vs_rnn_127.png -------------------------------------------------------------------------------- /weekly-work/week12/img/lstm_vs_rnn_32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/lstm_vs_rnn_32.png -------------------------------------------------------------------------------- /weekly-work/week12/img/nn_ASR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/nn_ASR.png -------------------------------------------------------------------------------- /weekly-work/week12/img/octopus-gan.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/octopus-gan.gif -------------------------------------------------------------------------------- /weekly-work/week12/img/phrases_in_vector_space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/phrases_in_vector_space.png -------------------------------------------------------------------------------- /weekly-work/week12/img/recursive_vs_recurrent_NN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/recursive_vs_recurrent_NN.png -------------------------------------------------------------------------------- /weekly-work/week12/img/scoring_attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/scoring_attention.png -------------------------------------------------------------------------------- /weekly-work/week12/img/sentiment_distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/sentiment_distributions.png -------------------------------------------------------------------------------- /weekly-work/week12/img/seq2seq_ASR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/seq2seq_ASR.png -------------------------------------------------------------------------------- /weekly-work/week12/img/seq2seq_ASR_attn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/seq2seq_ASR_attn.png -------------------------------------------------------------------------------- /weekly-work/week12/img/single_layer_CNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/single_layer_CNN.png -------------------------------------------------------------------------------- /weekly-work/week12/img/traditional_ASR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/traditional_ASR.png -------------------------------------------------------------------------------- /weekly-work/week12/img/what_is_a_convolution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week12/img/what_is_a_convolution.png -------------------------------------------------------------------------------- /weekly-work/week13/img/QA_independence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/QA_independence.png -------------------------------------------------------------------------------- /weekly-work/week13/img/SNLI_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/SNLI_results.png -------------------------------------------------------------------------------- /weekly-work/week13/img/SPINN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/SPINN.png -------------------------------------------------------------------------------- /weekly-work/week13/img/arch_search_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/arch_search_2.png -------------------------------------------------------------------------------- /weekly-work/week13/img/arch_search_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/arch_search_3.png -------------------------------------------------------------------------------- /weekly-work/week13/img/arch_search_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/arch_search_4.png -------------------------------------------------------------------------------- /weekly-work/week13/img/architecture_search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/architecture_search.png -------------------------------------------------------------------------------- /weekly-work/week13/img/b_cubed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/b_cubed.png -------------------------------------------------------------------------------- /weekly-work/week13/img/chunking_training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/chunking_training.png -------------------------------------------------------------------------------- /weekly-work/week13/img/diff_inputs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/diff_inputs.png -------------------------------------------------------------------------------- /weekly-work/week13/img/dynamic_memory_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/dynamic_memory_network.png -------------------------------------------------------------------------------- /weekly-work/week13/img/episodic_module.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/episodic_module.png -------------------------------------------------------------------------------- /weekly-work/week13/img/harder_questions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/harder_questions.png -------------------------------------------------------------------------------- /weekly-work/week13/img/inference_corpus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/inference_corpus.png -------------------------------------------------------------------------------- /weekly-work/week13/img/input_module.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/input_module.png -------------------------------------------------------------------------------- /weekly-work/week13/img/more_qa_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/more_qa_examples.png -------------------------------------------------------------------------------- /weekly-work/week13/img/obstacle_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/obstacle_1.png -------------------------------------------------------------------------------- /weekly-work/week13/img/obstacle_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/obstacle_2.png -------------------------------------------------------------------------------- /weekly-work/week13/img/pointer_mixture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/pointer_mixture.png -------------------------------------------------------------------------------- /weekly-work/week13/img/qa_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/qa_examples.png -------------------------------------------------------------------------------- /weekly-work/week13/img/question_module.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/question_module.png -------------------------------------------------------------------------------- /weekly-work/week13/img/semantic_relatedness.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/semantic_relatedness.png -------------------------------------------------------------------------------- /weekly-work/week13/img/sharper_attn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/sharper_attn.png -------------------------------------------------------------------------------- /weekly-work/week13/img/state_of_the_art.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/state_of_the_art.png -------------------------------------------------------------------------------- /weekly-work/week13/img/tackling_joint_training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/tackling_joint_training.png -------------------------------------------------------------------------------- /weekly-work/week13/img/tennis_Qs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/tennis_Qs.png -------------------------------------------------------------------------------- /weekly-work/week13/img/touch.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /weekly-work/week13/img/tying_word_vectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/tying_word_vectors.png -------------------------------------------------------------------------------- /weekly-work/week13/img/visual_attn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/visual_attn.png -------------------------------------------------------------------------------- /weekly-work/week13/img/visual_attn_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/visual_attn_2.png -------------------------------------------------------------------------------- /weekly-work/week13/img/visual_attn_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/visual_attn_3.png -------------------------------------------------------------------------------- /weekly-work/week13/img/where_SPINN_is_better.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/where_SPINN_is_better.png -------------------------------------------------------------------------------- /weekly-work/week13/img/writing_systems.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/writing_systems.png -------------------------------------------------------------------------------- /weekly-work/week13/img/ws_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/ws_2.png -------------------------------------------------------------------------------- /weekly-work/week13/img/ws_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week13/img/ws_3.png -------------------------------------------------------------------------------- /weekly-work/week14/img/WnT1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/WnT1.png -------------------------------------------------------------------------------- /weekly-work/week14/img/WnT2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/WnT2.png -------------------------------------------------------------------------------- /weekly-work/week14/img/WnT3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/WnT3.png -------------------------------------------------------------------------------- /weekly-work/week14/img/emmaRL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/emmaRL.png -------------------------------------------------------------------------------- /weekly-work/week14/img/finn1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/finn1.png -------------------------------------------------------------------------------- /weekly-work/week14/img/finn1617.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/finn1617.png -------------------------------------------------------------------------------- /weekly-work/week14/img/finn2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/finn2.png -------------------------------------------------------------------------------- /weekly-work/week14/img/markovDP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/markovDP.png -------------------------------------------------------------------------------- /weekly-work/week14/img/oh15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/oh15.png -------------------------------------------------------------------------------- /weekly-work/week14/img/silverVenn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/silverVenn.png -------------------------------------------------------------------------------- /weekly-work/week14/img/tan14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week14/img/tan14.png -------------------------------------------------------------------------------- /weekly-work/week15/img/Q-star.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/Q-star.png -------------------------------------------------------------------------------- /weekly-work/week15/img/Qvalue-fxn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/Qvalue-fxn.png -------------------------------------------------------------------------------- /weekly-work/week15/img/atari-case-study.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/atari-case-study.png -------------------------------------------------------------------------------- /weekly-work/week15/img/atari-case-study2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/atari-case-study2.png -------------------------------------------------------------------------------- /weekly-work/week15/img/bellman-exn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/bellman-exn.png -------------------------------------------------------------------------------- /weekly-work/week15/img/dnn-for-q-learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/dnn-for-q-learning.png -------------------------------------------------------------------------------- /weekly-work/week15/img/grid-world-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/grid-world-1.png -------------------------------------------------------------------------------- /weekly-work/week15/img/grid-world-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/grid-world-2.png -------------------------------------------------------------------------------- /weekly-work/week15/img/mdp-defn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/mdp-defn.png -------------------------------------------------------------------------------- /weekly-work/week15/img/mdp-process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/mdp-process.png -------------------------------------------------------------------------------- /weekly-work/week15/img/policy-grad-defn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/policy-grad-defn.png -------------------------------------------------------------------------------- /weekly-work/week15/img/q-learning-fxn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/q-learning-fxn.png -------------------------------------------------------------------------------- /weekly-work/week15/img/reinforce-in-axn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/reinforce-in-axn.png -------------------------------------------------------------------------------- /weekly-work/week15/img/reinforce-in-axn2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/reinforce-in-axn2.png -------------------------------------------------------------------------------- /weekly-work/week15/img/value-fxn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/value-fxn.png -------------------------------------------------------------------------------- /weekly-work/week15/img/value-itn-algo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week15/img/value-itn-algo.png -------------------------------------------------------------------------------- /weekly-work/week17/README.md: -------------------------------------------------------------------------------- 1 | # Session XVII: Capsule Nets, BERT & Book Launch 2 | 3 | *Meeting date: October 16th, 2019* 4 | 5 | For this session, [Dmitri Nesterenko](https://www.linkedin.com/in/dmitri-nesterenko/) and [Grant Beyleveld](https://www.linkedin.com/in/grantbey/) presented on Capsule Networks (slides [here](https://github.com/the-deep-learners/study-group/blob/master/slides/2019-10-16_dmitri_nesterenko__Capsule_Nets.pdf); code demo [here](https://colab.research.google.com/drive/1MOoxhzAZnkqQyCozVg0-_OM6gtJY9K6r#scrollTo=TL4jNoGqmKMB)) and "BERT & Friends" (slides [here](https://github.com/the-deep-learners/study-group/blob/master/slides/2019-10-16_grant_beyleveld__BERT.pdf)), respectively. 6 | 7 | This session also served as a book launch for Jon Krohn, Grant Beyleveld, and Aglae Bassens' book, [Deep Learning Illustrated](https://www.deeplearningillustrated.com), the content of which was influenced in large part by the previous sessions of the Deep Learning Study Group. The first photo of the three authors together with their book is provided here: 8 | 9 | ![dli-authors](https://github.com/the-deep-learners/study-group/blob/master/wiki-resources/dlsg-dli-authors.jpg) 10 | 11 | --- 12 | ## Recommended Preparatory Work 13 | 14 | 1. Capsule Networks: [here's](https://arxiv.org/abs/1710.09829) the original paper, and there are many Medium posts and YouTube videos providing a higher-level summary of the topic 15 | 2. [BERT](https://arxiv.org/abs/1810.04805)/[RoBERTa](https://arxiv.org/abs/1907.11692): those are the original papers; [here's](https://venturebeat.com/2019/07/29/facebook-ais-roberta-improves-googles-bert-pretraining-methods/) a news piece summarizing their significance 16 | 17 | ![dli-launch](https://github.com/the-deep-learners/study-group/blob/master/wiki-resources/dlsg-xvii.jpg) 18 | 19 | --- 20 | 21 | 22 | ## Up Next 23 | 24 | Topics that the Group suggested could be worth studying for our next meeting: 25 | 26 | * Dmitri providing three minutes' worth of Capsule Net applications 27 | * NLP: 28 | * model distillation / other distilled BERT derivatives 29 | * multi-task learning, e.g., [decaNLP](https://decanlp.com/) 30 | * curiosity-driven Reinforcement Learning 31 | * [Rubix cube-solving robot](https://www.youtube.com/watch?v=OZu9gjQJUQs) 32 | 33 | 34 | ![dmitri-on-caps](https://github.com/the-deep-learners/study-group/blob/master/wiki-resources/IMG_2697.jpeg) 35 | -------------------------------------------------------------------------------- /weekly-work/week2/README.md: -------------------------------------------------------------------------------- 1 | # Week 2 2 | meeting date: *09-06-2016* 3 | 4 | ### Covered 5 | - [Chapter 2](http://neuralnetworksanddeeplearning.com/chap2.html) from Nielsen's Ebook 6 | - Setup and get comfortable with [Keras](https://keras.io/) 7 | - use [backend for Tensorflow](https://keras.io/backend/) 8 | - [Getting Started](https://keras.io/getting-started/sequential-model-guide/) (only skim examples at the bottom) 9 | - explore the [Keras GitHub](https://github.com/fchollet/keras/tree/master/examples) for interesting models 10 | 11 | 12 | ### Nielsen Chapter 2 13 | 14 | - *backpropagation* 15 | - fast alogrithm for computing gradients 16 | - introduced in 1970s 17 | - [This 1986 paper](http://www.nature.com/nature/journal/v323/n6088/pdf/323533a0.pdf) recognized the usefulness of its application in neural nets 18 | 19 | #### Warm up: a fast matrix-based approach to computing output of neural net 20 | 21 | - 3 neuron components 22 | - `w`: weight 23 | - `b`: bias 24 | - `a`: activation 25 | - `a_l_j = σ(∑_k w_l_jk * a_l-1_k + b_l_j)` 26 | - Weight matrix entry W_l_jk 27 | - `lth` layer 28 | - jth neruon in layer l 29 | - kth neuron in layer l - 1 30 | - `a_l = σ(w_l*a_l-1 + b_l)` 31 | - `z_l = w_l*a_l-1 + b_l` 32 | - Weighted input of the neurons in layer l 33 | 34 | #### The two assumptions we need about the cost function 35 | 36 | - goal of backpropagation is to compute the partial derivatives of the cost function C with respect to any weight w aor bias b 37 | - 2 assumptions about cost function 38 | 1. can be written as an average C = (1/n)∑_x C_x 39 | - allows us to get partial derivatives by averaging partial derivatives of individual training samples 40 | 2. can be written as a function of the outputs from the neural network 41 | 42 | #### The Hadamard product 43 | 44 | - `s⊙t` denotes *elementwise* product of two vectors s and t 45 | - `(s⊙t)_j = s_j*t_j` 46 | - called *Hadamard* or *Schur* product 47 | 48 | #### The four fundamental equations behind backpropagation 49 | - backpropagation is about understanding how changing the weights and biases in a network changes the cost function. 50 | - `δ_l_j` represents the *error* in the jth neuron in the lth layer. 51 | - with backpropagation we compute this error and then relate it to the partial derivatives 52 | - `δ_l_j ≡ ∂C/∂z_l_j` 53 | - error for jth neuron layer l 54 | - **4 fundamental equations of backpropagation** 55 | 1. `δ_L_j = ∂C/∂a_L_j * σ′(z_L_j)` 56 | - `δ_L = ∇_aC⊙σ′(z_L)` in matrix form 57 | 2. `δ_l=((w_l+1)^T*δ_l+1) ⊙ σ′(z_l)` 58 | - expresses the errors in layer *l* in terms of error in the next layer *l+1* 59 | - combining equation 1 and 2 allows us to compute the error for any layer in the net 60 | 3. `∂C/∂b_l_j = δ_l_j` 61 | - rate of change of the cost with respect to any bias in the network 62 | 4. `∂C/∂w_l_jk = a_l−1_k*δ_l_j` 63 | - rate of change of the cost with respect to any wight in the network 64 | - when activation is small, the gradient term with respect to w will tend to be small 65 | - weights output from low-activation neurons learn slowly 66 | 67 | - sigmoid function is very flat around 0 or 1, so `σ′(z_L_j) ≈ 0` 68 | - From equation 1 output neuron in final layer will learn slowly if the output neuron is either low or high activation (0 or 1). 69 | - From equation 2, error is likely to get small if neuron is near saturation 70 | - These 4 equations hold for any activation function, not just the sigmoid function 71 | - proofs don't use an special properties of σ 72 | - so we could pick an activation function whose derivative that is never close to 0 to prevent the slow-down of learning that occurs with saturated Sigmoid functions 73 | 74 | #### Proof of the four fundamental equations (optional) 75 | - All four equations are consequences of the chain rule from multivariable calculus 76 | - because `a_L_j = σ(z_L_j)`, `∂a_L_j/∂z_L_j = σ′(z_L_j)` 77 | - We can think of backpropagation as a way of computing the gradient of the cost function by systematically applying chain rule from multi-variable calculus 78 | 79 | #### The backpropagation algorithm 80 | - High-level steps 81 | 1. **Input** x: set corresponding activation a_1 for the input layer 82 | 2. **Feedforward**: for each l = 2, 3, ..., L compute `z_l = w_l*a_l-1 + b_l` and `a_l = σ(z_l)` 83 | 3. **Output error** δ_L: compute the vector `δ_L = ∇_a*C ⊙ σ′(z_L)` 84 | 4. **Backpropagate the error:** for each l = L-1, L-2, ..., 2 compute `δ_l = ((w_l+1)T*δ_l+1) ⊙ σ′(z_l)` 85 | 5. **Output:** The gradient of the cost function is given by `∂C/∂w_l_jk = a_l−1_k * δl_j` and `∂C/∂b_l_j = δ_l_j` 86 | 87 | - Error vectors are computed backward starting with final layer. 88 | - cost is a function of outputs from the network 89 | - to understand how cost varies with earlier weights and biases, we need to apply the chain rule backwards through layers 90 | - Backpropagation algo computs gradient of cost function for a single training sample 91 | - C = C_x 92 | - Common to combine backpropagation with a learning algo such as stochastic gradient descent (SGD) to compute gradient for many training samples 93 | - Example learning step of gradient descent with mini-batch of m training samples 94 | 1. **Input a set of training examples** 95 | 2. **For each training example** x: set the corresponding input activation a_x,1 and perform the following steps 96 | a. **Feedforward** 97 | b. **Output error** 98 | c. **Backpropagate the error** 99 | 3. **Gradient descent**: Fore each l = L, L-1, ..., 2 update the weights and biases based on learning rules for mini-batch 100 | 101 | #### The code for backpropagation 102 | - refers mostly to Nielsen's GitHub project [neural-networks-and-deep-learning](https://github.com/mnielsen/neural-networks-and-deep-learning) 103 | 104 | #### In what sense is backpropagation a fast algorithm? 105 | - Example calculation without backpropagation: 106 | - approximate deriviative of cost: `∂C/∂wj ≈ (C(w+ϵej)−C(w))/ϵ` 107 | - this is easy but very slow 108 | - if we have 1M weights in a network, to compute gradient we must compute the cost function 1M times, each requiring an forward pass through the network per training sample. 109 | - backpropagation allows us to simultaneously comput *all* partial derivatives using just one forward pass through the network, followed by a backward pass per training sample. 110 | - this is MUCH faster 111 | 112 | #### Backpropagation: the big picture 113 | - 2 mysteries 114 | 1. Building deeper intuition around what's going on during all these matrix and vector multiplications 115 | 2. How could someone ever discover backpropagation in the first place? 116 | - Tracking how a change in weight or bias at a particular layer propagates through he network and results in a change in Cost leads to a complex sum over a product of partial derivatives of activations between layers 117 | - this expression expressed and manipulated with some calculus and linear algebra will lead to the 4 equations of backpropagation 118 | 119 | -------------------------------------------------------------------------------- /weekly-work/week3/README.md: -------------------------------------------------------------------------------- 1 | # Week 3 2 | meeting date: *9-28-2016* 3 | 4 | ## Covered 5 | - [Chapter 3](http://neuralnetworksanddeeplearning.com/chap3.html) from Nielsen's Ebook 6 | - "Part I: Introduction" of Peleg and Maggio's [Keras tutorial](https://github.com/leriomaggio/deep-learning-keras-euroscipy2016) from EuroSciPy in August 7 | 8 | ## Nielsen Chapter 3: Improving the way Neural Networks Learn 9 | 10 | #### to avoid learning slowdown 11 | 12 | - choose cost functions that learn more quickly when the predicted output is far from the desired one, e.g.: 13 | - if you’d like to consider outputs independently, select sigmoid neurons paired with cross-entropy cost 14 | - if you’d like to consider outputs simultaneously and as probability distributions, select a softmax layer of neurons with log-likelihood cost 15 | 16 | #### to avoid overfitting 17 | 18 | - **stop training early**, i.e., when classification accuracy on test data flattens 19 | - use the popular [dropout](https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf) methodology 20 | - artificially expand your data set, e.g., by rotating MNIST digits slightly or adding noise to audio recordings 21 | - regularize: we covered [L1 and L2 regularization](https://www.quora.com/What-is-the-difference-between-L1-and-L2-regularization) in detail, with nuclear physicist [Thomas Balestri](https://www.linkedin.com/in/thomasbalestri) leading elucidation 22 | 23 | #### to initialize weights and biases 24 | 25 | - to avoid initial saturation of neurons, sample randomly from a normal distribution with mean of zero and a standard deviation of 1/√(n inputs) 26 | 27 | #### Nielsen’s suggested sequence for choosing hyper-parameters 28 | 29 | 1. Broad Strategy 30 | - first, achieve any level of learning that is better than chance 31 | - this may require simplifying the problem the network is trying to solve (e.g., distinguishing the digits 0 and 1 instead of attempting to classify all ten digits) 32 | - this may require simplifying the network architecture or reducing the size of the training data by orders of magnitude 33 | - speed up experimentation by maximizing the frequency with which you can monitor your network, thereby getting instantaneous feedback on performance (and, in my opinion, reducing the opportunity to be distracted by other tasks) 34 | 2. Learning Rate 𝜼 35 | - monitor cost to tune 𝜼 but monitor accuracy for the other hyper-parameters covered here 36 | - initially adjust 𝜼 by orders of magnitude to find a relatively smooth cost curve, i.e., with minimal oscillation 37 | - fine-tune 𝜼 to the smooth cost further 38 | - last, consider a variable learning rate schedule that begins fast (large 𝜼) and slows down (smaller 𝜼), perhaps repeatedly 39 | 3. Number of Epochs 40 | - as mentioned above, early stopping (when classification accuracy on test data flattens out) prevents overfitting 41 | - having a no-accuracy-improvement-in-n rule (e.g., n = 10 epochs) introduces another hyper-parameter that you could potentially fit as networks can plateau for a while before improving again, but try not to obsess over it 42 | 4. Regularization Parameter ƛ 43 | - initially start with no regularization (i.e., ƛ = 0) while determining the above hyper-parameters 44 | - use the validation data to select a better ƛ starting with ƛ = 1.0 45 | - increase or decrease ƛ by orders of magnitude, then fine tune 46 | - re-visit and re-optimize 𝜼 47 | 5. Mini-Batch Size 48 | - optimal mini-batch size varies as a function of the available memory on your machine, the dimensionality of your data, and the complexity of your neural network architecture 49 | - if too large, model weights aren’t updated enough; if too small, hardware and software resources are wasted 50 | - after tuning 𝜼 and ƛ, plot validation accuracy versus real elapsed time to close in on a mini-batch size that maximizes training speed 51 | - re-visit and re-optimize both 𝜼 and ƛ 52 | 6. Automated Techniques 53 | - you can use a grid search, including open-source software, to optimize hyper-parameters automatically (e.g., [Spearmint](https://github.com/JasperSnoek/spearmint)) 54 | 55 | #### Variations on Stochastic Gradient Descent 56 | 57 | - **Hessian optimization** 58 | - incorporates the gradient descent analogue of momentum (second-order changes) into weight and bias optimization 59 | - demonstrably converges on a minimum in fewer steps than standard gradient descent 60 | - requires considerably more memory than standard gradient descent because of the enormity of the Hessian matrix 61 | - **Momentum-based gradient descent** 62 | - inspired by Hessian optimization but avoids excessively large matrices 63 | - to balance between speed and avoiding overshooting a minimum, involves tuning the momentum coefficient μ between zero and one on validation data 64 | - BFGS, limited-memory BFGS, Nesterov’s accelerated gradient 65 | - these are further popular alternative methods, but we didn’t cover them in any detail 66 | 67 | #### Alternative Artificial Neurons 68 | 69 | - **tanh** 70 | - bizarrely, apparently pronounced *tanch* 71 | - shape approximates the sigmoid function, but ranges from -1 to 1 instead of zero to one, thereby facilitating both positive and negative activations 72 | - [some evidence](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf) suggests it outperforms sigmoid neurons 73 | - **ReLU** 74 | - rectified linear unit or rectified linear neuron 75 | - linear, so computationally simpler relative to sigmoid or tanh, but in a network can approximate their performance and nevertheless compute any function 76 | 77 | 78 | ## Applications 79 | 80 | In addition to the theoretical work above, we applied our knowledge to software applications: 81 | 82 | - untapt’s lead engineer Gabe Rives-Corbett demonstrated the high-level deep-learning library Keras with some of our in-house models as well as Peleg and Maggio’s (above-mentioned) tutorial 83 | - virologist [Grant Beyleveld](https://grantbeyleveld.wordpress.com/) unveiled the neural network he built from scratch in Python and committed into the study group repo [here](https://github.com/the-deep-learners/study-group/tree/master/nn-from-scratch) 84 | -------------------------------------------------------------------------------- /weekly-work/week4/README.md: -------------------------------------------------------------------------------- 1 | # Week 4 2 | 3 | meeting date: 10-20-2016 4 | 5 | ## Covered 6 | 7 | * Recommended reading from Nielsen's electronic text: 8 | * [Chapter Four](http://neuralnetworksanddeeplearning.com/chap4.html) 9 | * [Chapter Five](http://neuralnetworksanddeeplearning.com/chap5.html) 10 | 11 | ### Proof Neural Nets can Compute any Function 12 | 13 | * Neural nets can compute any function (i.e., they are *universal*), assuming that: 14 | 1. we accept they are an *approximation* (that can be improved by the inclusion of additional hidden neurons), as opposed to an *exact* solution 15 | 2. the function they are explaining is *continuous* (e.g., no sharp jumps) 16 | 17 | * For the first time in our study session, we moved from whiteboarding to a projector to cover this content 18 | * In his fourth chapter, Michael Nielsen did a tremendous job of developing thematically-coherent, interactive Java applets that facilitate a clear visual understanding of this proof; try it for yourself! 19 | * A fair bit of our discussion centered on the practicalities of expanding the proof beyond two inputs features into *n*-dimensional space 20 | 21 | ### Factors making Deep Neural Networks Difficult to Train 22 | 23 | * We primarily discussed the causes of, implications of, and methods to mitigate *unstable* gradients, which in deep neural nets tend to *vanish* but under certain circumstances can instead *explode* 24 | * We also touched on other factors that can make deep nets difficult to train, e.g., the propensity for sigmoid neurons to saturate in later layers, the perils of fully-random weight initialization 25 | 26 | ### Visualizing the Function of Particular Hidden Layers 27 | 28 | * [Thomas Balestri](https://www.linkedin.com/in/thomasbalestri) introduced us to Jason Yosinski's breathtaking [Deep Visualization Toolbox](https://www.youtube.com/watch?v=AgkfIQ4IGaM) for developing an understanding of how individual layers contribute to a convolutional NN 29 | 30 | ## Applications 31 | 32 | * We took a break from applications for this session to focus on finishing shortly Nielsen's text, but we'll return to practical work for the next session 33 | -------------------------------------------------------------------------------- /weekly-work/week5/README.md: -------------------------------------------------------------------------------- 1 | # Session 5: Deep (Conv)Nets 2 | 3 | Meeting date: November 10th, 2016 4 | 5 | ## Recommended Preparatory Work 6 | 7 | * [Ch. 6 of Michael Nielsen's text (the final chapter)](http://neuralnetworksanddeeplearning.com/chap6.html) 8 | * [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/index.html) 9 | 10 | ## Summary 11 | 12 | #### Three Key Properties of Convolutional Neural Networks 13 | 14 | 1. local receptive fields 15 | 2. shared weights and biases (within a given _kernel_ or _filter_) 16 | 3. pooling layers 17 | 18 | #### Architecture Changes That Can Improve Classification Accuracy 19 | 20 | See [this Jupyter notebook](https://github.com/the-deep-learners/study-group/blob/master/weekly-work/week5/network3.ipynb) for a Theano-focused script (based on Nielsen's code and text) that incrementally improves MNIST digit classification accuracy by: 21 | 22 | 1. increasing the number of convolutional-pooling layers 23 | 2. using ReLU units in place of the sigmoid or _tanh_ variety 24 | 3. algorithmically expanding the training data 25 | 4. adding fully-connected layers (modest improvement) 26 | 5. using an ensemble of networks 27 | 28 | #### Why Does ConvNet Training Work (Despite Unstable, e.g., Vanishing, Gradients)? 29 | 30 | 1. convolutional layers have fewer parameters because of weight- and bias-sharing 31 | 2. "powerful" regularization techniques (e.g., dropout) to reduce overfitting 32 | 3. ReLU units (quicker training relative to sigmoid/_tanh_) 33 | 4. using GPUs if we're training for many epochs 34 | 5. sufficiently large set of training data (including algorithmic expansion if possible) 35 | 6. appropriate cost function choice 36 | 7. sensible weight initialization 37 | 38 | #### Other Classes of Deep Neural Nets We Touched on Briefly 39 | 40 | 1. _recurrent neural networks_ (RNNs), with special discussion of _long short-term memory units_ (LSTMs) 41 | 2. _deep belief networks_ (DBNs) 42 | 43 | #### TensorFlow for Poets 44 | 45 | * makes it trivial to leverage the powerful neural net image-classification architecture of _Inception v3_ 46 | * study group member Thomas Balestri quickly trained it into an impressive image-classification tool for consumer products 47 | 48 | ## Up Next 49 | 50 | [CS231n Convolutional Neural Networks for Visual Recognition](http://cs231n.github.io/) notes and lectures 51 | -------------------------------------------------------------------------------- /weekly-work/week6/README.md: -------------------------------------------------------------------------------- 1 | # Session 6: Convolutional Neural Networks for Visual Recognition 2 | 3 | Meeting date: November 30th, 2016 4 | 5 | This was our first session since completing Michael Nielsen's [Neural Networks and Deep Learning](http://neuralnetworksanddeeplearning.com) text. 6 | 7 | ## Recommended Preparatory Work 8 | 9 | 1. the [first six lectures](https://www.youtube.com/watch?v=g-PvXUjD6qg&list=PLlJy-eBtNFt6EuMxFYRiNRS07MCWN5UIA) of Stanford's Winter 2016 CS231n course 10 | 1. the first four sets of [course notes](http://cs231n.github.io/), which cover: 11 | * [classification](http://cs231n.github.io/classification/) 12 | * [linear classification](http://cs231n.github.io/linear-classify/) 13 | * [optimization](http://cs231n.github.io/optimization-1/) 14 | * [more optimization](http://cs231n.github.io/optimization-2/) 15 | 1. optionally, [module 0](http://cs231n.github.io/) in the course notes, which provide an introduction to Python, NumPy, Jupyter Notebooks, the Unix command line, and Amazon Web Services 16 | 17 | ## Summary 18 | 19 | The course notes linked to above provide excellent summaries of the material covered in the second lecture onward. For the first lecture, given by the illustrious Fei-Fei Li, here are my (i.e., Jon Krohn) own notes: 20 | 21 | #### Context 22 | 23 | * Cisco: 85% of data on Internet is in the form of pixels ("dark matter") 24 | * more video sensors on earth than people 25 | * every minute, 150 hours of video are uploaded to YouTube 26 | 27 | #### A History of Vision and Vision Research 28 | 29 | * 543m years ago, explosion in speciation; Andrew Parker theorises this is due to the evolution of eyes (a simple pinhole light sensor in Trilobites) 30 | * first well-documented effort to duplicate the visual world: da Vinci's Camera Obscura (15th century) 31 | * Hubel & Wiesel (1959) Harvard postdocs 1981 Nobel Prize-winning work 32 | * vision starts with simple structures (edges) not fish 33 | 34 | #### A History of Computer Vision 35 | 36 | * Larry Roberts (1963) "Block World" 37 | * theorised that edge detection enables recognition of blocks from many angles 38 | * first two AI labs: 39 | 1. Marvin Minsky at MIT 40 | 2. John McCarthy at Stanford: coined "Artificial Intelligence" term 41 | * David Marr (1970s): 42 | * "Stages of Visual Representation" (it is hierarchical) 43 | * first stage is "edge image" akin to H&W 44 | * second stage is 2-D sketch 45 | * final, third stage is 3D representation; enables guidance and manipulation in the real world 46 | * David Lowe (1987): use edges to distinguish monochromatic razors 47 | * Shi & Malik (1997): "Normalized Cut" was first stages of distinguishing objects in image (segmentation) 48 | * Viola & Jones (2001): face detection within image 49 | * used in FujiFilm digital cameras in 2006; the first with face detection 50 | * first algorithm fast enough to be used for instantaneous machine vision 51 | * David Lowe (1999): "SIFT" Object Recognition via (a handful of key) features, as opposed to full figure 52 | * this was the basis of machine vision for a decade -- until age of Deep Learning 53 | * features that Deep Learning networks learn are similar to features programmed by engineers 54 | * prior to Deep Learning approaches, primary techniques were graphical models and SVMs 55 | * e.g., "Deformable Part Model", which used "something like" SVM 56 | * PASCAL Video Object Challenge (2006-12) demonstrated improved classification performance on twenty object categories 57 | * IMAGENET (image-net.org) built in response by Fei-Fei Li and her colleagues (2009) with 22k categories and 14M images 58 | * IMAGENET Large Scale Visual Recognition Challenge: uses 1000 of IMAGENET object classes and 1.4M images 59 | * error rate decreases year-over-year, but in 2012 error rate was cut in half by ConvNet (*SuperVision* by Krizhevsky & Hinton; seven-layer) 60 | * CONVNET invented in '70s but confluence of techniques enabled it to be transformative in that year 61 | * in 2014, best architectures were GoogLeNet and VGG 62 | * winning architecture in 2015 is MRSA (Microsoft Asia Researchers), which has 100 layers 63 | 64 | #### CS231n course overview 65 | 66 | * focus on the visual recognition problem, specifically image classification, within IMAGENET 67 | * also covers *object detection*, *image captioning*, and *action classification* 68 | * CNNs were "not invented overnight" 69 | * major contributions were: 70 | * 1980s: LeCun and Hinton worked out backpropagation mathematics 71 | * LeCun et al. (1998): MNIST digit classification, eventually sold to U.S. Mail and banks (for cheques) 72 | * Krizhevsky et al. (2012): similar architecture to 1998, but able to leverage GPUs with three orders of magnitude more transistors, and able to train on IMAGENET, which has seven orders of magnitude more pixels than MNIST data; additional, but less important changes, include the use of ReLU in place of sigmoid neurons 73 | * problems that still need to be solved in machine vision 74 | * classification of *all* objects in image 75 | * recognition within three dimensions, e.g., for use in robotics 76 | * anything related to motion 77 | * "understanding" the relationship between objects, as opposed to just labelling objects (e.g., Justin Jacobs' *Visual Genome* Project) 78 | * the "holy grail" is to be able to narrate a scene; people can write an essay after seeing a scene for just 500ms (Fei-Fei et al., 2007) 79 | * machines vision facilitates better robots and will save lives 80 | 81 | ## Application 82 | 83 | Our colourful study group member [Dmitri Nesterenko](https://www.linkedin.com/in/dmitri-nesterenko-7ba4484), who is Director of Software Engineering at the **XO Group** downtown, went into considerable, helpful detail describing his adventures writing a *k*-Nearest Neighbours algorithm [from scratch](https://www.linkedin.com/in/dmitri-nesterenko-7ba4484). 84 | 85 | ## Up Next 86 | 87 | 1. the remaining lectures and notes of CS231n, split over two sessions, in January and February 88 | 1. RNN/LSTM courses and materials from Richard Socher and Chris Olah 89 | 1. a hands-on TensorFlow tutorial with engineers from the Google office in New York 90 | 1. the [Deep Learning Papers Reading Roadmap](https://github.com/songrotek/Deep-Learning-Papers-Reading-Roadmap) 91 | -------------------------------------------------------------------------------- /weekly-work/week7/README.md: -------------------------------------------------------------------------------- 1 | # Session VII: Implementing Convolutional Nets 2 | 3 | *Meeting date: January 12th, 2017* 4 | 5 | By continuing to make our way through the material from Andrej Karpathy and Justin Johnson's CS231n (Stanford) lectures, we covered a broad range of practicalities and best practices for implementing convolutional neural nets. 6 | 7 | ## Recommended Preparatory Work 8 | 9 | 1. The final thirteen minutes of the [sixth CS231n lecture](https://www.youtube.com/watch?v=KaR4lIdI1MQ&index=1&list=LLup-fnSNRaByeuXOWqfnykw) (i.e., starting from the 57:30 mark) 10 | 2. Lectures [seven](https://www.youtube.com/watch?v=AQirPKrAyDg) through [twelve](https://www.youtube.com/watch?v=XgFlBsl0Lq4) of CS231n 11 | 3. CS231n lecture notes [five](http://cs231n.github.io/neural-networks-1/), [six](http://cs231n.github.io/neural-networks-2/), and [seven](http://cs231n.github.io/neural-networks-3/) 12 | 13 | ## Summary 14 | 15 | Topic highlights of the session included: 16 | 17 | #### From Lecture 7 18 | 19 | * common settings for the four hyperparameters of a convolutional layer, working through examples as the numbers must work out: 20 | * **K**: the number of filters (typically in powers of two -- some libraries optimise calculations to these levels) 21 | * **F**: spatial extent of the filters 22 | * **S**: stride length 23 | * **P**: the amount of zero padding 24 | * famous convolutional net architectures, while focusing on their changes (associated with classification accuracy improvements in ILSVRC) over time: 25 | * LeNet-5 (LeCun et al., 1998) 26 | * SuperVision / "AlexNet" (Krizhevsky et al., 2012) 27 | * ZFNet (Zeiler & Fergus, 2013) 28 | * VGGNet (Simonyan & Zisserman, 2014) 29 | * GoogLeNet (Szegedy et al., 2014) 30 | * ResNet (He et al., 2015) 31 | * network depth versus ILSVRC classification accuracy over time 32 | * ResNet network depth versus CIFAR-10 classification accuracy 33 | 34 | #### From Lecture 8 35 | 36 | * comparing computer vision tasks, e.g.: 37 | * single object 38 | * classification 39 | * classification + localisation 40 | * multiple object 41 | * object detection 42 | * instance segmentation 43 | * the ILSVRC localisation error of famous ConvNet architectures: 44 | * AlexNet (2012) 45 | * Overfeat (2013) 46 | * VGG (2014) 47 | * ResNet (2015) 48 | * object detection data sets: 49 | * PASCAL VOC (2010): classic 50 | * ILSVRC *Detection* (2014): most classes and images per class 51 | * MS-COCO (2014): most objects per image 52 | * as with image recognition, *R-CNN* greatly outperforms pre-ConvNet methods 53 | * *Fast R-CNN* and the subsequent *Faster R-CNN* maintain classification accuracy but are 25 and 250 times faster than R-CNN, respectively 54 | * code for all three networks are available in the Caffe Zoo 55 | 56 | #### From Lecture 9 57 | 58 | * "deconvolutional" approaches for visualising and understanding individual neurons within convolutional neural networks: 59 | 1. feed an image into the net 60 | 2. pick a layer, set the gradient there to be all zero except for one 61 | 3. for some neuron of interest, backprop to image 62 | * NeuralStyle (Gatys et al., 2015): set an image to any style 63 | * intuitive explanations for fooling ConvNets (e.g., Nguyen, Yosinski & Clune, 2014; Szegedy et al., 2013): 64 | * visually: cases with parameters cleverly outside of the training set (Goodfellow, Shlens & Szegedy, 2014) 65 | * manually working through the arithmetic of fooling a binary linear classifier 66 | 67 | #### From Lecture 10 68 | 69 | * interpretable RNN neurons, as identified manually within text by [Karpathy, Johnson and Li (2015)](https://arxiv.org/abs/1506.02078): 70 | * quote detection 71 | * line length 72 | * if statements 73 | * quotes or comments 74 | * code indent depth 75 | * image captioning becomes possible by supplementing ConvNets with LSTMs (five key papers are provided on slide 51) 76 | * this requires image-sentence datasets, e.g.: 77 | * MS-COCO (2014), again (120k images, 5 sentences each) 78 | * ResNet is to vanilla ConvNet ~as LSTM is to RNN 79 | * GRUs (Cho et al., 2014) are the key alternative to LSTMs 80 | * Jozefowicz et al. (2015) provides a helpful, empirical comparison of RNN architectures 81 | 82 | #### From Lecture 11 83 | 84 | * NVIDIA chips are much more common than AMD for deep learning 85 | * GPUs greatly outperform CPUs 86 | * SSDs greatly outperform classic hard disks 87 | * disk size can become a limiting factor 88 | * floating point precision can go very low: 89 | * Courbariaux and Bengio (2016) train with single-bit activations and weights, so they are all simply either +1 or -1, though gradients require greater precision 90 | 91 | #### From Lecture 12 92 | 93 | * see [blog post](https://insights.untapt.com/fundamental-deep-learning-code-in-tflearn-keras-theano-and-tensorflow-66be10a03227) I (Jon Krohn) published that summarises the pros and cons of the four primary deep learning libraries (TensorFlow, Theano, Torch, and Caffe) as covered by Justin Johnson in this lecture 94 | * in addition, here are Justin's broad recommendations: 95 | * for feature extraction or fine-tuning existing models: use Caffe 96 | * for complex uses of pretrained models: use Lasagne or Torch 97 | * for writing your own layers: use Torch 98 | * for "crazy" RNNs: use Theano or TensorFlow 99 | * for a very large model that requires parallelism: use TensorFlow 100 | 101 | ## Up Next 102 | 103 | 1. the remaining lectures and notes of CS231n, in February 104 | 1. Richard Socher's CS224d (also out of Stanford) on Deep Learning for Natural Language Processing, in early March 105 | -------------------------------------------------------------------------------- /weekly-work/week8/README.md: -------------------------------------------------------------------------------- 1 | # Session VIII: Unsupervised Learning, Regularisation, and Venture Capital 2 | 3 | *Meeting date: February 7th, 2017* 4 | 5 | With this session, we wrapped up our coverage of [CS231n](http://cs231n.github.io/) (Stanford) lectures, which were delivered by now-familiar faces Andrej Karpathy and Justin Johnson as well as guest lecturer, Google Senior Fellow Jeff Dean. 6 | 7 | In addition, we were delighted to hear from guest speakers of our own: 8 | 9 | 1. **[Raphaela Sapire](https://angel.co/raphaela-sapire)** on her experience as a venture capitalist at Blue Seed Capital, particularly her insight into the machine- and deep-learning start-up market (slides [here](https://github.com/the-deep-learners/study-group/blob/master/slides/2017-02-07__raphaela_sapire__billion_dollar_AI.pdf)) 10 | 2. **[Katya Vasilaky](https://kathrynthegreat.github.io/)** on her research into L2 Regularization, the popular method to avoid overfitting in a wide range of models, including the deep-learning variety (slides [here]()) 11 | 12 | A summary blog post, replete with photos of the session, can be found [here](https://medium.com/@jjpkrohn/deep-learning-study-group-viii-unsupervised-learning-regularisation-and-venture-capital-9aba67fc931c). 13 | 14 | 15 | ## Recommended Preparatory Work 16 | 17 | 1. The final three lectures from CS231n ([13](https://www.youtube.com/watch?v=UFnO-ADC-k0&list=PLlJy-eBtNFt6EuMxFYRiNRS07MCWN5UIA&index=13), [14](https://www.youtube.com/watch?v=I-i1KBuShCc&list=PLlJy-eBtNFt6EuMxFYRiNRS07MCWN5UIA&index=14), and [15](https://www.youtube.com/watch?v=s63vOy1kvsU&list=PLlJy-eBtNFt6EuMxFYRiNRS07MCWN5UIA&index=15)) 18 | 2. as well as the final four sets of notes ([one](http://cs231n.github.io/neural-networks-case-study/), [two](http://cs231n.github.io/convolutional-networks/), [three](http://cs231n.github.io/understanding-cnn/), and [four](http://cs231n.github.io/transfer-learning/)) 19 | 20 | 21 | ## Summary 22 | 23 | 24 | Topic highlights of the session included: 25 | 26 | 27 | #### From Lecture 14 28 | 29 | ##### Karpathy on ConvNets applied to motion (videos) 30 | 31 | * "fancy" spatio-temporal video ConvNets: 32 | * for detecting global motion: provide limited or no benefit over LSTM applied to individual video frames 33 | * for detecting local motion, use a 3D ConvNet 34 | * try using Optical Flow in a second stream or GRU-RCN (the latter being Karpathy's favourite) 35 | 36 | ##### Johnson on unsupervised learning: 37 | 38 | * autoencoder overview 39 | * traditional: 40 | * try to reconstruct input 41 | * used to learn features, initialise supervised model 42 | * no longer predominant 43 | * variational: 44 | * Bayesian statistics crossed with Deep Learning (<3) 45 | * generate samples, e.g., images by sampling 46 | * Generative Adversarial Networks: Generate samples 47 | * autoencoders in practice 48 | * input data (x) --> [encoder] --> features (z) --> [decoder] --> reconstructed input data (x) 49 | * the [encoder] and [decoder] often share weights 50 | * decoders evolved via this sequence: 51 | 1. linear + sigmoid neurons 52 | 2. deep, fully-connected 53 | 3. ReLU ConvNet ("upconv") 54 | * Greedy Training of Autoencoders: 55 | * "Restricted Boltzmann Machines" (RBMs) were common in mid-2000s 56 | * train one layer at a time 57 | * start with first layer, freeze it, move to second layer, etc. 58 | * Varational Autoencoders: 59 | * generate data by using Bayesian statistics within an autoencoder framework to sample from prior and posterior distributions 60 | * can, e.g., output smooth interpolations of input data 61 | * Generative Adversarial Networks: 62 | * seminal paper is Goodfellow et al. (NIPS 2014) 63 | * random noise --> [generator] --> fake images (plus, separately, real images from a data set) --> [discriminator] --> trained to distinguish real images from fake 64 | * image generation with "less math" 65 | * train generator and discriminator jointly; after training, image generation is straightforward 66 | * Denton et al. (NIPS 2015): expanded work by enabling discriminators to work at every scale (applied to single classes of CIFAR-10 dataset) 67 | * Radford et al. (ICLR 2016): 68 | * create realistic, latent space-interpolatable images of bedrooms 69 | * their generator: upsampling network with fractionally-strided convolutions 70 | * their discriminator: a ConvNet 71 | * "Architecture guidlines for stable Deep Conv GANs" (from Johnson): 72 | * replace any pooling layers with strided convolutions (discriminator) and fractional-strided convolutions (generator) 73 | * user batch normalisation in both the generator and discriminator 74 | * remove fully-connected hidden layers for deeper architectures 75 | * use ReLU activation in generator for all layers except for output (Tanh) 76 | * use Leaky ReLU activation in all layers of discriminator 77 | * vector math: 78 | * [smiling woman] - [neutral woman] + [neutral man] = [smiling man] 79 | * [man with glasses] - [man without glasses] + [woman without glasses] = [woman with glasses] 80 | * Dosovitskiy & Brox (arXiv 2016): 81 | * creates convincing new ImageNet samples 82 | * trained on all ImageNet classes together 83 | * broadly a Variational Autoencoder fed into both (1.) a Discriminator network and (2.) a pretrained AlexNet (see slide 128 "Putting everything together" for diagram) 84 | 85 | 86 | ## Up Next 87 | 88 | 1. Richard Socher's [CS224d](https://cs224d.stanford.edu/) (also out of Stanford) on Deep Learning for Natural Language Processing, in early March 89 | -------------------------------------------------------------------------------- /weekly-work/week9/02_pros_and_cons_of_counting_vs_w2v.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/02_pros_and_cons_of_counting_vs_w2v.png -------------------------------------------------------------------------------- /weekly-work/week9/03_05_GloVe_visualizations_gender.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/03_05_GloVe_visualizations_gender.png -------------------------------------------------------------------------------- /weekly-work/week9/03_06_GloVe_visualizations_CEO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/03_06_GloVe_visualizations_CEO.png -------------------------------------------------------------------------------- /weekly-work/week9/03_07_GloVe_visualizations_superlatives.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/03_07_GloVe_visualizations_superlatives.png -------------------------------------------------------------------------------- /weekly-work/week9/2017_02_skipgram_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/2017_02_skipgram_diagram.png -------------------------------------------------------------------------------- /weekly-work/week9/2017_02_softmax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/2017_02_softmax.png -------------------------------------------------------------------------------- /weekly-work/week9/2017_02_w2v_dot_products.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/2017_02_w2v_dot_products.png -------------------------------------------------------------------------------- /weekly-work/week9/2017_02_word2vec_definition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/2017_02_word2vec_definition.png -------------------------------------------------------------------------------- /weekly-work/week9/fun_glove_expressions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/fun_glove_expressions.png -------------------------------------------------------------------------------- /weekly-work/week9/w2v_objective_fxn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/weekly-work/week9/w2v_objective_fxn.png -------------------------------------------------------------------------------- /wiki-resources/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/3.jpg -------------------------------------------------------------------------------- /wiki-resources/5_cropped.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/5_cropped.jpg -------------------------------------------------------------------------------- /wiki-resources/IMG_2697.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_2697.jpeg -------------------------------------------------------------------------------- /wiki-resources/IMG_5959.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_5959.JPG -------------------------------------------------------------------------------- /wiki-resources/IMG_5974.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_5974.JPG -------------------------------------------------------------------------------- /wiki-resources/IMG_7624.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_7624.JPG -------------------------------------------------------------------------------- /wiki-resources/IMG_7641.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_7641.JPG -------------------------------------------------------------------------------- /wiki-resources/IMG_9147.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/IMG_9147.JPG -------------------------------------------------------------------------------- /wiki-resources/dlsg-dli-authors.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/dlsg-dli-authors.jpg -------------------------------------------------------------------------------- /wiki-resources/dlsg-xvii.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/dlsg-xvii.jpg -------------------------------------------------------------------------------- /wiki-resources/gitflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/gitflow.png -------------------------------------------------------------------------------- /wiki-resources/jk-at-dlsg-xviii.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/jk-at-dlsg-xviii.jpg -------------------------------------------------------------------------------- /wiki-resources/session_XI_crew.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-deep-learners/study-group/c4b41c0f92b45da5c5d3767bc7bdbefeb5f54091/wiki-resources/session_XI_crew.JPG --------------------------------------------------------------------------------