├── prdg ├── BA │ └── ae_theory.tex ├── BW │ └── temp ├── GS │ └── temp ├── KF │ └── temp ├── MG │ └── temp ├── YC │ └── temp └── QC │ └── 13 │ └── theory.tex ├── arch ├── BA │ ├── temp │ └── gnn_theory.tex ├── BW │ └── temp ├── GS │ ├── temp │ ├── convolutions.tex │ └── convolutions_practice.tex ├── MG │ └── temp ├── RM │ └── temp ├── YC │ ├── temp │ └── 05-lab │ │ └── theory.tex ├── .DS_Store ├── IC │ ├── .DS_Store │ └── graph-cnn.tex ├── PK │ └── digression_fourier_transform.tex └── EN │ ├── hierarchical_representation.tex │ └── nonlinear_dim_expansion.tex ├── code ├── BA │ ├── temp │ └── gnn_coding.tex ├── BW │ └── temp ├── GS │ └── temp ├── MG │ └── temp ├── PK │ └── temp ├── RM │ └── temp ├── YC │ └── temp ├── .DS_Store ├── IC │ └── Multimodule_Systems_coding.tex └── KF │ └── bayesian-nn-coding.tex ├── prct ├── BA │ └── temp ├── BW │ ├── temp │ └── 09-lab │ │ └── practice.tex ├── GS │ └── temp ├── MG │ ├── temp │ └── truck-backer-upper.tex ├── YC │ ├── temp │ ├── .DS_Store │ └── 01-b │ │ ├── practice.tex │ │ └── coding.tex ├── .DS_Store ├── RM │ ├── .DS_Store │ └── 12-a_sparse-coding │ │ └── references.bib └── KF │ ├── convolution_demonstration.tex │ ├── visualizing_2D_interpolations.tex │ ├── loss_functions_non-convex.tex │ └── automatic_differentiation.tex ├── .DS_Store ├── figs ├── 1.png ├── 2.png ├── 3.png ├── 4.png ├── 1-1.png ├── 1-2.png ├── 1-3.png ├── 1-4.png ├── 1-5.png ├── EBM3.png ├── EBM4.png ├── EBM5.png ├── GRU.png ├── L1.PNG ├── L2.PNG ├── NLP.png ├── PIC5.PNG ├── PIC6.PNG ├── PSD.png ├── Pen.PNG ├── SDNN.png ├── baby.png ├── bptt.png ├── cake.png ├── conv.png ├── dim3.png ├── dim5.png ├── loss.png ├── lstm.png ├── nn.png ├── od.png ├── od1.png ├── pic1.png ├── pic2.png ├── pic3.png ├── pic4.png ├── pic7.png ├── pic8.png ├── pic9.png ├── rnn.png ├── sp_0.jpg ├── sp_1.jpg ├── sp_2.jpg ├── sp_3.jpg ├── sp_4.jpg ├── su.png ├── tanh.png ├── unet.png ├── vae.png ├── yolo.png ├── 1-1-1.png ├── 3-cog.png ├── 3DConv.png ├── EBM6-2.jpg ├── EBM7-2.jpg ├── Hinge.png ├── Model.PNG ├── VOC07.png ├── agent1.PNG ├── basis.png ├── code1.png ├── code2.png ├── exp_1.png ├── exp_2.png ├── exp_3.png ├── exp_4.png ├── gibson.png ├── heidi.jpg ├── ic_01.png ├── ic_02.png ├── ic_03.png ├── ic_04.png ├── jigsaw.png ├── kernel.png ├── linear.png ├── loss.jpeg ├── lvm_2.png ├── r-cnn.png ├── shear.png ├── sparse.png ├── state.png ├── state2.png ├── views.png ├── 1-Lorentz.png ├── Biology.png ├── Dropout.png ├── EBM1-2.jpeg ├── EBM2-2.jpeg ├── Identity.PNG ├── Log_Loss.png ├── Rotation.png ├── Scaling.png ├── SuperVote.png ├── cnn_asr.png ├── combined.png ├── conv_net.png ├── decoder.png ├── deepmsk.png ├── ebm_train.png ├── enco_deco.png ├── encoder.png ├── ernergy.png ├── feature.png ├── figure3.png ├── figure4.png ├── jigsaw1.png ├── jigsaw2.png ├── locality.png ├── loss_zoo.png ├── maskrcnn.png ├── mixture.png ├── natural.png ├── natural_0.png ├── pic9old.jpg ├── places205.png ├── pointnet.jpg ├── slides2.PNG ├── sparsity.png ├── spiral1.png ├── spiral2.png ├── ssl_types.png ├── td_inputs.png ├── vae_expl.png ├── variable.png ├── wn-nouns2.jpg ├── 2-Davidson.png ├── AE_kernels.png ├── Autoencoder.png ├── ConvnetArch.png ├── FISTALISTA.jpg ├── MaxPooling.png ├── PATCHvCONV.jpg ├── PyTorchCNN.png ├── Reflection.png ├── SlowPenalty.jpg ├── Stochastic.png ├── VAE and GAN.png ├── audio-video.png ├── bubbles_kl.png ├── bubbles_rec.png ├── complexity.png ├── evaluation.png ├── fast_r-cnn.png ├── generative.png ├── lin_nonlin.png ├── merged_acc.png ├── reduce_code.png ├── retina_net.png ├── rnn_easy_10.png ├── seq_to_seq.png ├── seq_to_vec.png ├── supervised.png ├── translation.png ├── tree_graphs.PNG ├── word-level.png ├── 05-first_reg.png ├── Data_manifold.png ├── Denoising_AE.png ├── EBM_function.png ├── Early_Stopping.png ├── FISTAFlowGraph.jpg ├── Good_Bad_Loss.png ├── Learning rate.png ├── ReLU_function.jpg ├── SceneParsing.png ├── Square_Square.png ├── StanfordBGDS8.png ├── Why_normalize.png ├── architecture.png ├── bptt_formula.png ├── checkerboard0.png ├── checkerboard1.png ├── checkerboard2.png ├── cnn_hierarchy.png ├── colorization1.png ├── colorization2.png ├── covers_theorem.png ├── energy-based-1.png ├── energy-based-2.png ├── interpolation.png ├── keyword model.png ├── lstm_easy_10.png ├── lstm_easy_100.png ├── non-max-supp.png ├── parameterize.png ├── random+decoder.png ├── rnn-example-1.gif ├── rnn_easy_100.png ├── shuffle-learn.png ├── standardnormal.png ├── stationarity.png ├── step_function.png ├── tanh_function.png ├── tanh_sandwich.png ├── weight_sharing.png ├── weights_dist.png ├── 05-piecewise_reg.png ├── 3dunderstanding.png ├── Computer_Vision.png ├── ConvolutionATrou.png ├── Embedding_Graph.png ├── GAN illustration.png ├── Learning rate_1.png ├── Learning rate_3.png ├── MultipleFilters.jpg ├── RelativePosition.jpg ├── Self_supervised.png ├── conditional_EBM.png ├── gradient_descent.jpg ├── image_captioning.png ├── rnn_moderate_100.png ├── sigmoid_function.png ├── skip_connection.png ├── 05-random_at_first.png ├── CO2 concentration.png ├── Denoising_kernels.png ├── Embedding_ConvNet.png ├── FeatureExtraction.png ├── details-of-emulator.png ├── learning_to_execute.png ├── lstm_moderate_100.png ├── merged_loss_graph.png ├── merged_weights_hist.png ├── multimodule_cascade.jpg ├── nonlinear_expansion.png ├── normal_distribution.png ├── odd-one-out-network.png ├── reflection_example.png ├── seq_to_vec_to_seq.png ├── the-two-part-model.png ├── unconditional_EBM.png ├── Classification_binary.png ├── Classification_input.png ├── Classification_multi.png ├── GNN Papers Published.png ├── Regression using TanH.png ├── phrase_representation.png ├── semantic-segmentation.png ├── with_and_without_skip.png ├── Benefit of normalization.png ├── Contractive_AutoEncoder.png ├── Corrpution_and_Denoising.png ├── FISTAFlowGraphTimeUnfold.jpg ├── latent_EBM_architecture.png ├── poincareball_geodesics.PNG ├── poincareball_semantics.PNG ├── regularisation_dropout.png ├── training-the-controller.png ├── tree_graphs_limitation.PNG ├── VOC2007_SVM_classification.png ├── Regression with uncertainty.png ├── Classical Graph LEarning Tasks.png ├── Places205_linear_classification.png ├── Regression without uncertainty.png ├── quadratic_function_to_optimise.png ├── regularisation_and_overfitting.PNG ├── truck-trailer-and-loading-dock.png ├── Regression using Gaussian Process.png ├── Under_(over)_complete_Autoencoder.png ├── NonLinearlySeparableParametricCurves.png ├── Screen Shot 2019-05-03 at 2.03.51 PM.png ├── deep-learning-hierarchical-features.png ├── python code of getting mean and var.PNG ├── relational_learning_graphical_model.png ├── Schematic_Illustration_of_Autoencoder.png └── ArchitectureForClassificationAndVisualizationInTheInputSpace.png ├── labs ├── 13 │ ├── images │ │ ├── nn.png │ │ ├── loss.jpeg │ │ ├── state.png │ │ ├── state2.png │ │ ├── figure1.png │ │ ├── figure2.png │ │ ├── figure3.png │ │ ├── figure4.png │ │ ├── figure5.png │ │ ├── figure6.png │ │ ├── variable.png │ │ └── Screen Shot 2019-05-03 at 2.03.51 PM.png │ └── theory.tex ├── .DS_Store ├── 01 │ └── images │ │ ├── tanh.png │ │ ├── shear.png │ │ ├── Rotation.png │ │ ├── Scaling.png │ │ ├── Reflection.png │ │ ├── translation.png │ │ ├── standardnormal.png │ │ ├── tanh_sandwich.png │ │ └── reflection_example.png ├── 05 │ ├── images │ │ └── loss.png │ └── theory.tex ├── 09 │ ├── images │ │ ├── L1.PNG │ │ ├── L2.PNG │ │ ├── dropout.png │ │ ├── hist_l1.png │ │ ├── hist_l2.png │ │ ├── loss_l1.png │ │ ├── loss_l2.png │ │ ├── merged_acc.png │ │ ├── val_acc_l1.png │ │ ├── val_acc_l2.png │ │ ├── Early_Stopping.png │ │ ├── hist_dropout.png │ │ ├── hist_nothing.png │ │ ├── loss_dropout.png │ │ ├── loss_nothing.png │ │ ├── weights_dist.png │ │ ├── val_acc_dropout.png │ │ ├── val_acc_nothing.png │ │ ├── merged_loss_graph.png │ │ ├── merged_weights_hist.png │ │ ├── normal_distribution.png │ │ └── regularisation_and_overfitting.PNG │ └── practice.tex └── 03 │ └── images │ ├── 05-first_reg.png │ ├── 05-piecewise_reg.png │ └── 05-random_at_first.png ├── lectures ├── .DS_Store ├── 08-b │ ├── 1.png │ ├── 2.png │ ├── 4.png │ ├── Hinge.png │ ├── Log_Loss.png │ ├── Good_Bad_Loss.png │ └── Square_Square.png ├── 02-b │ ├── Dropout.png │ ├── Identity.PNG │ ├── Stochastic.png │ ├── Learning rate.png │ ├── Learning rate_1.png │ ├── Learning rate_3.png │ ├── ReLU_function.jpg │ ├── Why_normalize.png │ ├── step_function.png │ ├── tanh_function.png │ ├── sigmoid_function.png │ └── Benefit of normalization.png ├── 07-b │ └── images │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 4.png │ │ ├── 5.png │ │ ├── 6-2.jpg │ │ ├── 6.png │ │ ├── 7-2.jpg │ │ ├── 7.png │ │ ├── 1-2.jpeg │ │ └── 2-2.jpeg ├── 01-a │ └── dl_features.png ├── 08-a │ └── images │ │ ├── 1-2.jpeg │ │ ├── 2-2.jpeg │ │ ├── loss_zoo.png │ │ ├── ebm_train.png │ │ ├── EBM_function.png │ │ ├── keyword model.png │ │ ├── obj_detection.png │ │ ├── conditional_EBM.png │ │ ├── unconditional_EBM.png │ │ └── latent_EBM_architecture.png └── 06-a │ └── 03-a │ └── images │ ├── relu.png │ ├── linear.png │ ├── not_wide.png │ ├── squash.png │ ├── stretch.png │ ├── data_noise.png │ ├── data_no_noise.png │ └── top_view_boundary.png ├── Figures └── quadratic_function_to_optimise.jpg ├── LICENSE.md ├── preface.tex ├── preamble.tex ├── instructions.tex └── main.tex /prdg/BA/ae_theory.tex: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /arch/BA/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /arch/BW/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /arch/GS/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /arch/MG/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /arch/RM/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /arch/YC/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /code/BA/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /code/BW/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /code/GS/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /code/MG/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /code/PK/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /code/RM/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /code/YC/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /prct/BA/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /prct/BW/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /prct/GS/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /prct/MG/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /prct/YC/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /prdg/BW/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /prdg/GS/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /prdg/KF/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /prdg/MG/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /prdg/YC/temp: -------------------------------------------------------------------------------- 1 | placeholder 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/.DS_Store -------------------------------------------------------------------------------- /figs/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/1.png -------------------------------------------------------------------------------- /figs/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/2.png -------------------------------------------------------------------------------- /figs/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/3.png -------------------------------------------------------------------------------- /figs/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/4.png -------------------------------------------------------------------------------- /figs/1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/1-1.png -------------------------------------------------------------------------------- /figs/1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/1-2.png -------------------------------------------------------------------------------- /figs/1-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/1-3.png -------------------------------------------------------------------------------- /figs/1-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/1-4.png -------------------------------------------------------------------------------- /figs/1-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/1-5.png -------------------------------------------------------------------------------- /figs/EBM3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/EBM3.png -------------------------------------------------------------------------------- /figs/EBM4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/EBM4.png -------------------------------------------------------------------------------- /figs/EBM5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/EBM5.png -------------------------------------------------------------------------------- /figs/GRU.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/GRU.png -------------------------------------------------------------------------------- /figs/L1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/L1.PNG -------------------------------------------------------------------------------- /figs/L2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/L2.PNG -------------------------------------------------------------------------------- /figs/NLP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/NLP.png -------------------------------------------------------------------------------- /figs/PIC5.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/PIC5.PNG -------------------------------------------------------------------------------- /figs/PIC6.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/PIC6.PNG -------------------------------------------------------------------------------- /figs/PSD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/PSD.png -------------------------------------------------------------------------------- /figs/Pen.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Pen.PNG -------------------------------------------------------------------------------- /figs/SDNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/SDNN.png -------------------------------------------------------------------------------- /figs/baby.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/baby.png -------------------------------------------------------------------------------- /figs/bptt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/bptt.png -------------------------------------------------------------------------------- /figs/cake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/cake.png -------------------------------------------------------------------------------- /figs/conv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/conv.png -------------------------------------------------------------------------------- /figs/dim3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/dim3.png -------------------------------------------------------------------------------- /figs/dim5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/dim5.png -------------------------------------------------------------------------------- /figs/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/loss.png -------------------------------------------------------------------------------- /figs/lstm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/lstm.png -------------------------------------------------------------------------------- /figs/nn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/nn.png -------------------------------------------------------------------------------- /figs/od.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/od.png -------------------------------------------------------------------------------- /figs/od1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/od1.png -------------------------------------------------------------------------------- /figs/pic1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/pic1.png -------------------------------------------------------------------------------- /figs/pic2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/pic2.png -------------------------------------------------------------------------------- /figs/pic3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/pic3.png -------------------------------------------------------------------------------- /figs/pic4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/pic4.png -------------------------------------------------------------------------------- /figs/pic7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/pic7.png -------------------------------------------------------------------------------- /figs/pic8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/pic8.png -------------------------------------------------------------------------------- /figs/pic9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/pic9.png -------------------------------------------------------------------------------- /figs/rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/rnn.png -------------------------------------------------------------------------------- /figs/sp_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/sp_0.jpg -------------------------------------------------------------------------------- /figs/sp_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/sp_1.jpg -------------------------------------------------------------------------------- /figs/sp_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/sp_2.jpg -------------------------------------------------------------------------------- /figs/sp_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/sp_3.jpg -------------------------------------------------------------------------------- /figs/sp_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/sp_4.jpg -------------------------------------------------------------------------------- /figs/su.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/su.png -------------------------------------------------------------------------------- /figs/tanh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/tanh.png -------------------------------------------------------------------------------- /figs/unet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/unet.png -------------------------------------------------------------------------------- /figs/vae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/vae.png -------------------------------------------------------------------------------- /figs/yolo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/yolo.png -------------------------------------------------------------------------------- /arch/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/arch/.DS_Store -------------------------------------------------------------------------------- /code/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/code/.DS_Store -------------------------------------------------------------------------------- /figs/1-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/1-1-1.png -------------------------------------------------------------------------------- /figs/3-cog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/3-cog.png -------------------------------------------------------------------------------- /figs/3DConv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/3DConv.png -------------------------------------------------------------------------------- /figs/EBM6-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/EBM6-2.jpg -------------------------------------------------------------------------------- /figs/EBM7-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/EBM7-2.jpg -------------------------------------------------------------------------------- /figs/Hinge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Hinge.png -------------------------------------------------------------------------------- /figs/Model.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Model.PNG -------------------------------------------------------------------------------- /figs/VOC07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/VOC07.png -------------------------------------------------------------------------------- /figs/agent1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/agent1.PNG -------------------------------------------------------------------------------- /figs/basis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/basis.png -------------------------------------------------------------------------------- /figs/code1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/code1.png -------------------------------------------------------------------------------- /figs/code2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/code2.png -------------------------------------------------------------------------------- /figs/exp_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/exp_1.png -------------------------------------------------------------------------------- /figs/exp_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/exp_2.png -------------------------------------------------------------------------------- /figs/exp_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/exp_3.png -------------------------------------------------------------------------------- /figs/exp_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/exp_4.png -------------------------------------------------------------------------------- /figs/gibson.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/gibson.png -------------------------------------------------------------------------------- /figs/heidi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/heidi.jpg -------------------------------------------------------------------------------- /figs/ic_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ic_01.png -------------------------------------------------------------------------------- /figs/ic_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ic_02.png -------------------------------------------------------------------------------- /figs/ic_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ic_03.png -------------------------------------------------------------------------------- /figs/ic_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ic_04.png -------------------------------------------------------------------------------- /figs/jigsaw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/jigsaw.png -------------------------------------------------------------------------------- /figs/kernel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/kernel.png -------------------------------------------------------------------------------- /figs/linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/linear.png -------------------------------------------------------------------------------- /figs/loss.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/loss.jpeg -------------------------------------------------------------------------------- /figs/lvm_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/lvm_2.png -------------------------------------------------------------------------------- /figs/r-cnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/r-cnn.png -------------------------------------------------------------------------------- /figs/shear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/shear.png -------------------------------------------------------------------------------- /figs/sparse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/sparse.png -------------------------------------------------------------------------------- /figs/state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/state.png -------------------------------------------------------------------------------- /figs/state2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/state2.png -------------------------------------------------------------------------------- /figs/views.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/views.png -------------------------------------------------------------------------------- /labs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/.DS_Store -------------------------------------------------------------------------------- /prct/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/prct/.DS_Store -------------------------------------------------------------------------------- /arch/IC/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/arch/IC/.DS_Store -------------------------------------------------------------------------------- /figs/1-Lorentz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/1-Lorentz.png -------------------------------------------------------------------------------- /figs/Biology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Biology.png -------------------------------------------------------------------------------- /figs/Dropout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Dropout.png -------------------------------------------------------------------------------- /figs/EBM1-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/EBM1-2.jpeg -------------------------------------------------------------------------------- /figs/EBM2-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/EBM2-2.jpeg -------------------------------------------------------------------------------- /figs/Identity.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Identity.PNG -------------------------------------------------------------------------------- /figs/Log_Loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Log_Loss.png -------------------------------------------------------------------------------- /figs/Rotation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Rotation.png -------------------------------------------------------------------------------- /figs/Scaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Scaling.png -------------------------------------------------------------------------------- /figs/SuperVote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/SuperVote.png -------------------------------------------------------------------------------- /figs/cnn_asr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/cnn_asr.png -------------------------------------------------------------------------------- /figs/combined.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/combined.png -------------------------------------------------------------------------------- /figs/conv_net.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/conv_net.png -------------------------------------------------------------------------------- /figs/decoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/decoder.png -------------------------------------------------------------------------------- /figs/deepmsk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/deepmsk.png -------------------------------------------------------------------------------- /figs/ebm_train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ebm_train.png -------------------------------------------------------------------------------- /figs/enco_deco.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/enco_deco.png -------------------------------------------------------------------------------- /figs/encoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/encoder.png -------------------------------------------------------------------------------- /figs/ernergy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ernergy.png -------------------------------------------------------------------------------- /figs/feature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/feature.png -------------------------------------------------------------------------------- /figs/figure3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/figure3.png -------------------------------------------------------------------------------- /figs/figure4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/figure4.png -------------------------------------------------------------------------------- /figs/jigsaw1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/jigsaw1.png -------------------------------------------------------------------------------- /figs/jigsaw2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/jigsaw2.png -------------------------------------------------------------------------------- /figs/locality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/locality.png -------------------------------------------------------------------------------- /figs/loss_zoo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/loss_zoo.png -------------------------------------------------------------------------------- /figs/maskrcnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/maskrcnn.png -------------------------------------------------------------------------------- /figs/mixture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/mixture.png -------------------------------------------------------------------------------- /figs/natural.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/natural.png -------------------------------------------------------------------------------- /figs/natural_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/natural_0.png -------------------------------------------------------------------------------- /figs/pic9old.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/pic9old.jpg -------------------------------------------------------------------------------- /figs/places205.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/places205.png -------------------------------------------------------------------------------- /figs/pointnet.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/pointnet.jpg -------------------------------------------------------------------------------- /figs/slides2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/slides2.PNG -------------------------------------------------------------------------------- /figs/sparsity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/sparsity.png -------------------------------------------------------------------------------- /figs/spiral1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/spiral1.png -------------------------------------------------------------------------------- /figs/spiral2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/spiral2.png -------------------------------------------------------------------------------- /figs/ssl_types.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ssl_types.png -------------------------------------------------------------------------------- /figs/td_inputs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/td_inputs.png -------------------------------------------------------------------------------- /figs/vae_expl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/vae_expl.png -------------------------------------------------------------------------------- /figs/variable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/variable.png -------------------------------------------------------------------------------- /figs/wn-nouns2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/wn-nouns2.jpg -------------------------------------------------------------------------------- /lectures/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/.DS_Store -------------------------------------------------------------------------------- /prct/RM/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/prct/RM/.DS_Store -------------------------------------------------------------------------------- /prct/YC/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/prct/YC/.DS_Store -------------------------------------------------------------------------------- /figs/2-Davidson.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/2-Davidson.png -------------------------------------------------------------------------------- /figs/AE_kernels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/AE_kernels.png -------------------------------------------------------------------------------- /figs/Autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Autoencoder.png -------------------------------------------------------------------------------- /figs/ConvnetArch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ConvnetArch.png -------------------------------------------------------------------------------- /figs/FISTALISTA.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/FISTALISTA.jpg -------------------------------------------------------------------------------- /figs/MaxPooling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/MaxPooling.png -------------------------------------------------------------------------------- /figs/PATCHvCONV.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/PATCHvCONV.jpg -------------------------------------------------------------------------------- /figs/PyTorchCNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/PyTorchCNN.png -------------------------------------------------------------------------------- /figs/Reflection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Reflection.png -------------------------------------------------------------------------------- /figs/SlowPenalty.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/SlowPenalty.jpg -------------------------------------------------------------------------------- /figs/Stochastic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Stochastic.png -------------------------------------------------------------------------------- /figs/VAE and GAN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/VAE and GAN.png -------------------------------------------------------------------------------- /figs/audio-video.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/audio-video.png -------------------------------------------------------------------------------- /figs/bubbles_kl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/bubbles_kl.png -------------------------------------------------------------------------------- /figs/bubbles_rec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/bubbles_rec.png -------------------------------------------------------------------------------- /figs/complexity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/complexity.png -------------------------------------------------------------------------------- /figs/evaluation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/evaluation.png -------------------------------------------------------------------------------- /figs/fast_r-cnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/fast_r-cnn.png -------------------------------------------------------------------------------- /figs/generative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/generative.png -------------------------------------------------------------------------------- /figs/lin_nonlin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/lin_nonlin.png -------------------------------------------------------------------------------- /figs/merged_acc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/merged_acc.png -------------------------------------------------------------------------------- /figs/reduce_code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/reduce_code.png -------------------------------------------------------------------------------- /figs/retina_net.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/retina_net.png -------------------------------------------------------------------------------- /figs/rnn_easy_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/rnn_easy_10.png -------------------------------------------------------------------------------- /figs/seq_to_seq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/seq_to_seq.png -------------------------------------------------------------------------------- /figs/seq_to_vec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/seq_to_vec.png -------------------------------------------------------------------------------- /figs/supervised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/supervised.png -------------------------------------------------------------------------------- /figs/translation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/translation.png -------------------------------------------------------------------------------- /figs/tree_graphs.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/tree_graphs.PNG -------------------------------------------------------------------------------- /figs/word-level.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/word-level.png -------------------------------------------------------------------------------- /lectures/08-b/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-b/1.png -------------------------------------------------------------------------------- /lectures/08-b/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-b/2.png -------------------------------------------------------------------------------- /lectures/08-b/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-b/4.png -------------------------------------------------------------------------------- /figs/05-first_reg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/05-first_reg.png -------------------------------------------------------------------------------- /figs/Data_manifold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Data_manifold.png -------------------------------------------------------------------------------- /figs/Denoising_AE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Denoising_AE.png -------------------------------------------------------------------------------- /figs/EBM_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/EBM_function.png -------------------------------------------------------------------------------- /figs/Early_Stopping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Early_Stopping.png -------------------------------------------------------------------------------- /figs/FISTAFlowGraph.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/FISTAFlowGraph.jpg -------------------------------------------------------------------------------- /figs/Good_Bad_Loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Good_Bad_Loss.png -------------------------------------------------------------------------------- /figs/Learning rate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Learning rate.png -------------------------------------------------------------------------------- /figs/ReLU_function.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ReLU_function.jpg -------------------------------------------------------------------------------- /figs/SceneParsing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/SceneParsing.png -------------------------------------------------------------------------------- /figs/Square_Square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Square_Square.png -------------------------------------------------------------------------------- /figs/StanfordBGDS8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/StanfordBGDS8.png -------------------------------------------------------------------------------- /figs/Why_normalize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Why_normalize.png -------------------------------------------------------------------------------- /figs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/architecture.png -------------------------------------------------------------------------------- /figs/bptt_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/bptt_formula.png -------------------------------------------------------------------------------- /figs/checkerboard0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/checkerboard0.png -------------------------------------------------------------------------------- /figs/checkerboard1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/checkerboard1.png -------------------------------------------------------------------------------- /figs/checkerboard2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/checkerboard2.png -------------------------------------------------------------------------------- /figs/cnn_hierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/cnn_hierarchy.png -------------------------------------------------------------------------------- /figs/colorization1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/colorization1.png -------------------------------------------------------------------------------- /figs/colorization2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/colorization2.png -------------------------------------------------------------------------------- /figs/covers_theorem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/covers_theorem.png -------------------------------------------------------------------------------- /figs/energy-based-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/energy-based-1.png -------------------------------------------------------------------------------- /figs/energy-based-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/energy-based-2.png -------------------------------------------------------------------------------- /figs/interpolation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/interpolation.png -------------------------------------------------------------------------------- /figs/keyword model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/keyword model.png -------------------------------------------------------------------------------- /figs/lstm_easy_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/lstm_easy_10.png -------------------------------------------------------------------------------- /figs/lstm_easy_100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/lstm_easy_100.png -------------------------------------------------------------------------------- /figs/non-max-supp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/non-max-supp.png -------------------------------------------------------------------------------- /figs/parameterize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/parameterize.png -------------------------------------------------------------------------------- /figs/random+decoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/random+decoder.png -------------------------------------------------------------------------------- /figs/rnn-example-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/rnn-example-1.gif -------------------------------------------------------------------------------- /figs/rnn_easy_100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/rnn_easy_100.png -------------------------------------------------------------------------------- /figs/shuffle-learn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/shuffle-learn.png -------------------------------------------------------------------------------- /figs/standardnormal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/standardnormal.png -------------------------------------------------------------------------------- /figs/stationarity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/stationarity.png -------------------------------------------------------------------------------- /figs/step_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/step_function.png -------------------------------------------------------------------------------- /figs/tanh_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/tanh_function.png -------------------------------------------------------------------------------- /figs/tanh_sandwich.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/tanh_sandwich.png -------------------------------------------------------------------------------- /figs/weight_sharing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/weight_sharing.png -------------------------------------------------------------------------------- /figs/weights_dist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/weights_dist.png -------------------------------------------------------------------------------- /labs/01/images/tanh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/01/images/tanh.png -------------------------------------------------------------------------------- /labs/05/images/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/05/images/loss.png -------------------------------------------------------------------------------- /labs/09/images/L1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/L1.PNG -------------------------------------------------------------------------------- /labs/09/images/L2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/L2.PNG -------------------------------------------------------------------------------- /labs/13/images/nn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/nn.png -------------------------------------------------------------------------------- /lectures/08-b/Hinge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-b/Hinge.png -------------------------------------------------------------------------------- /figs/05-piecewise_reg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/05-piecewise_reg.png -------------------------------------------------------------------------------- /figs/3dunderstanding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/3dunderstanding.png -------------------------------------------------------------------------------- /figs/Computer_Vision.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Computer_Vision.png -------------------------------------------------------------------------------- /figs/ConvolutionATrou.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ConvolutionATrou.png -------------------------------------------------------------------------------- /figs/Embedding_Graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Embedding_Graph.png -------------------------------------------------------------------------------- /figs/GAN illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/GAN illustration.png -------------------------------------------------------------------------------- /figs/Learning rate_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Learning rate_1.png -------------------------------------------------------------------------------- /figs/Learning rate_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Learning rate_3.png -------------------------------------------------------------------------------- /figs/MultipleFilters.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/MultipleFilters.jpg -------------------------------------------------------------------------------- /figs/RelativePosition.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/RelativePosition.jpg -------------------------------------------------------------------------------- /figs/Self_supervised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Self_supervised.png -------------------------------------------------------------------------------- /figs/conditional_EBM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/conditional_EBM.png -------------------------------------------------------------------------------- /figs/gradient_descent.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/gradient_descent.jpg -------------------------------------------------------------------------------- /figs/image_captioning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/image_captioning.png -------------------------------------------------------------------------------- /figs/rnn_moderate_100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/rnn_moderate_100.png -------------------------------------------------------------------------------- /figs/sigmoid_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/sigmoid_function.png -------------------------------------------------------------------------------- /figs/skip_connection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/skip_connection.png -------------------------------------------------------------------------------- /labs/01/images/shear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/01/images/shear.png -------------------------------------------------------------------------------- /labs/13/images/loss.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/loss.jpeg -------------------------------------------------------------------------------- /labs/13/images/state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/state.png -------------------------------------------------------------------------------- /labs/13/images/state2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/state2.png -------------------------------------------------------------------------------- /lectures/02-b/Dropout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/Dropout.png -------------------------------------------------------------------------------- /figs/05-random_at_first.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/05-random_at_first.png -------------------------------------------------------------------------------- /figs/CO2 concentration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/CO2 concentration.png -------------------------------------------------------------------------------- /figs/Denoising_kernels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Denoising_kernels.png -------------------------------------------------------------------------------- /figs/Embedding_ConvNet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Embedding_ConvNet.png -------------------------------------------------------------------------------- /figs/FeatureExtraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/FeatureExtraction.png -------------------------------------------------------------------------------- /figs/details-of-emulator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/details-of-emulator.png -------------------------------------------------------------------------------- /figs/learning_to_execute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/learning_to_execute.png -------------------------------------------------------------------------------- /figs/lstm_moderate_100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/lstm_moderate_100.png -------------------------------------------------------------------------------- /figs/merged_loss_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/merged_loss_graph.png -------------------------------------------------------------------------------- /figs/merged_weights_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/merged_weights_hist.png -------------------------------------------------------------------------------- /figs/multimodule_cascade.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/multimodule_cascade.jpg -------------------------------------------------------------------------------- /figs/nonlinear_expansion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/nonlinear_expansion.png -------------------------------------------------------------------------------- /figs/normal_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/normal_distribution.png -------------------------------------------------------------------------------- /figs/odd-one-out-network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/odd-one-out-network.png -------------------------------------------------------------------------------- /figs/reflection_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/reflection_example.png -------------------------------------------------------------------------------- /figs/seq_to_vec_to_seq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/seq_to_vec_to_seq.png -------------------------------------------------------------------------------- /figs/the-two-part-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/the-two-part-model.png -------------------------------------------------------------------------------- /figs/unconditional_EBM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/unconditional_EBM.png -------------------------------------------------------------------------------- /labs/01/images/Rotation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/01/images/Rotation.png -------------------------------------------------------------------------------- /labs/01/images/Scaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/01/images/Scaling.png -------------------------------------------------------------------------------- /labs/09/images/dropout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/dropout.png -------------------------------------------------------------------------------- /labs/09/images/hist_l1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/hist_l1.png -------------------------------------------------------------------------------- /labs/09/images/hist_l2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/hist_l2.png -------------------------------------------------------------------------------- /labs/09/images/loss_l1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/loss_l1.png -------------------------------------------------------------------------------- /labs/09/images/loss_l2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/loss_l2.png -------------------------------------------------------------------------------- /labs/13/images/figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/figure1.png -------------------------------------------------------------------------------- /labs/13/images/figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/figure2.png -------------------------------------------------------------------------------- /labs/13/images/figure3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/figure3.png -------------------------------------------------------------------------------- /labs/13/images/figure4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/figure4.png -------------------------------------------------------------------------------- /labs/13/images/figure5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/figure5.png -------------------------------------------------------------------------------- /labs/13/images/figure6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/figure6.png -------------------------------------------------------------------------------- /labs/13/images/variable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/variable.png -------------------------------------------------------------------------------- /lectures/02-b/Identity.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/Identity.PNG -------------------------------------------------------------------------------- /lectures/02-b/Stochastic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/Stochastic.png -------------------------------------------------------------------------------- /lectures/07-b/images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/1.png -------------------------------------------------------------------------------- /lectures/07-b/images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/2.png -------------------------------------------------------------------------------- /lectures/07-b/images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/3.png -------------------------------------------------------------------------------- /lectures/07-b/images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/4.png -------------------------------------------------------------------------------- /lectures/07-b/images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/5.png -------------------------------------------------------------------------------- /lectures/07-b/images/6-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/6-2.jpg -------------------------------------------------------------------------------- /lectures/07-b/images/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/6.png -------------------------------------------------------------------------------- /lectures/07-b/images/7-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/7-2.jpg -------------------------------------------------------------------------------- /lectures/07-b/images/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/7.png -------------------------------------------------------------------------------- /lectures/08-b/Log_Loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-b/Log_Loss.png -------------------------------------------------------------------------------- /figs/Classification_binary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Classification_binary.png -------------------------------------------------------------------------------- /figs/Classification_input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Classification_input.png -------------------------------------------------------------------------------- /figs/Classification_multi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Classification_multi.png -------------------------------------------------------------------------------- /figs/GNN Papers Published.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/GNN Papers Published.png -------------------------------------------------------------------------------- /figs/Regression using TanH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Regression using TanH.png -------------------------------------------------------------------------------- /figs/phrase_representation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/phrase_representation.png -------------------------------------------------------------------------------- /figs/semantic-segmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/semantic-segmentation.png -------------------------------------------------------------------------------- /figs/with_and_without_skip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/with_and_without_skip.png -------------------------------------------------------------------------------- /labs/01/images/Reflection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/01/images/Reflection.png -------------------------------------------------------------------------------- /labs/01/images/translation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/01/images/translation.png -------------------------------------------------------------------------------- /labs/09/images/merged_acc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/merged_acc.png -------------------------------------------------------------------------------- /labs/09/images/val_acc_l1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/val_acc_l1.png -------------------------------------------------------------------------------- /labs/09/images/val_acc_l2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/val_acc_l2.png -------------------------------------------------------------------------------- /lectures/01-a/dl_features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/01-a/dl_features.png -------------------------------------------------------------------------------- /lectures/07-b/images/1-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/1-2.jpeg -------------------------------------------------------------------------------- /lectures/07-b/images/2-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/07-b/images/2-2.jpeg -------------------------------------------------------------------------------- /lectures/08-a/images/1-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-a/images/1-2.jpeg -------------------------------------------------------------------------------- /lectures/08-a/images/2-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-a/images/2-2.jpeg -------------------------------------------------------------------------------- /figs/Benefit of normalization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Benefit of normalization.png -------------------------------------------------------------------------------- /figs/Contractive_AutoEncoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Contractive_AutoEncoder.png -------------------------------------------------------------------------------- /figs/Corrpution_and_Denoising.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Corrpution_and_Denoising.png -------------------------------------------------------------------------------- /figs/FISTAFlowGraphTimeUnfold.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/FISTAFlowGraphTimeUnfold.jpg -------------------------------------------------------------------------------- /figs/latent_EBM_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/latent_EBM_architecture.png -------------------------------------------------------------------------------- /figs/poincareball_geodesics.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/poincareball_geodesics.PNG -------------------------------------------------------------------------------- /figs/poincareball_semantics.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/poincareball_semantics.PNG -------------------------------------------------------------------------------- /figs/regularisation_dropout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/regularisation_dropout.png -------------------------------------------------------------------------------- /figs/training-the-controller.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/training-the-controller.png -------------------------------------------------------------------------------- /figs/tree_graphs_limitation.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/tree_graphs_limitation.PNG -------------------------------------------------------------------------------- /labs/01/images/standardnormal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/01/images/standardnormal.png -------------------------------------------------------------------------------- /labs/01/images/tanh_sandwich.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/01/images/tanh_sandwich.png -------------------------------------------------------------------------------- /labs/03/images/05-first_reg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/03/images/05-first_reg.png -------------------------------------------------------------------------------- /labs/09/images/Early_Stopping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/Early_Stopping.png -------------------------------------------------------------------------------- /labs/09/images/hist_dropout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/hist_dropout.png -------------------------------------------------------------------------------- /labs/09/images/hist_nothing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/hist_nothing.png -------------------------------------------------------------------------------- /labs/09/images/loss_dropout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/loss_dropout.png -------------------------------------------------------------------------------- /labs/09/images/loss_nothing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/loss_nothing.png -------------------------------------------------------------------------------- /labs/09/images/weights_dist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/weights_dist.png -------------------------------------------------------------------------------- /lectures/02-b/Learning rate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/Learning rate.png -------------------------------------------------------------------------------- /lectures/02-b/Learning rate_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/Learning rate_1.png -------------------------------------------------------------------------------- /lectures/02-b/Learning rate_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/Learning rate_3.png -------------------------------------------------------------------------------- /lectures/02-b/ReLU_function.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/ReLU_function.jpg -------------------------------------------------------------------------------- /lectures/02-b/Why_normalize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/Why_normalize.png -------------------------------------------------------------------------------- /lectures/02-b/step_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/step_function.png -------------------------------------------------------------------------------- /lectures/02-b/tanh_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/tanh_function.png -------------------------------------------------------------------------------- /lectures/08-a/images/loss_zoo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-a/images/loss_zoo.png -------------------------------------------------------------------------------- /lectures/08-b/Good_Bad_Loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-b/Good_Bad_Loss.png -------------------------------------------------------------------------------- /lectures/08-b/Square_Square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-b/Square_Square.png -------------------------------------------------------------------------------- /figs/VOC2007_SVM_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/VOC2007_SVM_classification.png -------------------------------------------------------------------------------- /labs/03/images/05-piecewise_reg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/03/images/05-piecewise_reg.png -------------------------------------------------------------------------------- /labs/09/images/val_acc_dropout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/val_acc_dropout.png -------------------------------------------------------------------------------- /labs/09/images/val_acc_nothing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/val_acc_nothing.png -------------------------------------------------------------------------------- /lectures/02-b/sigmoid_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/sigmoid_function.png -------------------------------------------------------------------------------- /lectures/06-a/03-a/images/relu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/06-a/03-a/images/relu.png -------------------------------------------------------------------------------- /lectures/08-a/images/ebm_train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-a/images/ebm_train.png -------------------------------------------------------------------------------- /figs/Regression with uncertainty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Regression with uncertainty.png -------------------------------------------------------------------------------- /labs/01/images/reflection_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/01/images/reflection_example.png -------------------------------------------------------------------------------- /labs/03/images/05-random_at_first.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/03/images/05-random_at_first.png -------------------------------------------------------------------------------- /labs/09/images/merged_loss_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/merged_loss_graph.png -------------------------------------------------------------------------------- /labs/09/images/merged_weights_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/merged_weights_hist.png -------------------------------------------------------------------------------- /labs/09/images/normal_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/normal_distribution.png -------------------------------------------------------------------------------- /lectures/06-a/03-a/images/linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/06-a/03-a/images/linear.png -------------------------------------------------------------------------------- /lectures/06-a/03-a/images/not_wide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/06-a/03-a/images/not_wide.png -------------------------------------------------------------------------------- /lectures/06-a/03-a/images/squash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/06-a/03-a/images/squash.png -------------------------------------------------------------------------------- /lectures/06-a/03-a/images/stretch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/06-a/03-a/images/stretch.png -------------------------------------------------------------------------------- /lectures/08-a/images/EBM_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-a/images/EBM_function.png -------------------------------------------------------------------------------- /lectures/08-a/images/keyword model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-a/images/keyword model.png -------------------------------------------------------------------------------- /lectures/08-a/images/obj_detection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-a/images/obj_detection.png -------------------------------------------------------------------------------- /figs/Classical Graph LEarning Tasks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Classical Graph LEarning Tasks.png -------------------------------------------------------------------------------- /figs/Places205_linear_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Places205_linear_classification.png -------------------------------------------------------------------------------- /figs/Regression without uncertainty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Regression without uncertainty.png -------------------------------------------------------------------------------- /figs/quadratic_function_to_optimise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/quadratic_function_to_optimise.png -------------------------------------------------------------------------------- /figs/regularisation_and_overfitting.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/regularisation_and_overfitting.PNG -------------------------------------------------------------------------------- /figs/truck-trailer-and-loading-dock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/truck-trailer-and-loading-dock.png -------------------------------------------------------------------------------- /lectures/06-a/03-a/images/data_noise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/06-a/03-a/images/data_noise.png -------------------------------------------------------------------------------- /lectures/08-a/images/conditional_EBM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-a/images/conditional_EBM.png -------------------------------------------------------------------------------- /Figures/quadratic_function_to_optimise.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/Figures/quadratic_function_to_optimise.jpg -------------------------------------------------------------------------------- /figs/Regression using Gaussian Process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Regression using Gaussian Process.png -------------------------------------------------------------------------------- /figs/Under_(over)_complete_Autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Under_(over)_complete_Autoencoder.png -------------------------------------------------------------------------------- /lectures/02-b/Benefit of normalization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/02-b/Benefit of normalization.png -------------------------------------------------------------------------------- /lectures/06-a/03-a/images/data_no_noise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/06-a/03-a/images/data_no_noise.png -------------------------------------------------------------------------------- /lectures/08-a/images/unconditional_EBM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-a/images/unconditional_EBM.png -------------------------------------------------------------------------------- /figs/NonLinearlySeparableParametricCurves.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/NonLinearlySeparableParametricCurves.png -------------------------------------------------------------------------------- /figs/Screen Shot 2019-05-03 at 2.03.51 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Screen Shot 2019-05-03 at 2.03.51 PM.png -------------------------------------------------------------------------------- /figs/deep-learning-hierarchical-features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/deep-learning-hierarchical-features.png -------------------------------------------------------------------------------- /figs/python code of getting mean and var.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/python code of getting mean and var.PNG -------------------------------------------------------------------------------- /figs/relational_learning_graphical_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/relational_learning_graphical_model.png -------------------------------------------------------------------------------- /figs/Schematic_Illustration_of_Autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/Schematic_Illustration_of_Autoencoder.png -------------------------------------------------------------------------------- /lectures/06-a/03-a/images/top_view_boundary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/06-a/03-a/images/top_view_boundary.png -------------------------------------------------------------------------------- /lectures/08-a/images/latent_EBM_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/lectures/08-a/images/latent_EBM_architecture.png -------------------------------------------------------------------------------- /labs/09/images/regularisation_and_overfitting.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/09/images/regularisation_and_overfitting.PNG -------------------------------------------------------------------------------- /labs/13/images/Screen Shot 2019-05-03 at 2.03.51 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/labs/13/images/Screen Shot 2019-05-03 at 2.03.51 PM.png -------------------------------------------------------------------------------- /figs/ArchitectureForClassificationAndVisualizationInTheInputSpace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/SP19-DL-collaborative-notes/master/figs/ArchitectureForClassificationAndVisualizationInTheInputSpace.png -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | This work is licensed under the Creative Commons 2 | Attribution-NonCommercial-ShareAlike 4.0 International License. 3 | To view a copy of this license, visit 4 | http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 5 | Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. -------------------------------------------------------------------------------- /preface.tex: -------------------------------------------------------------------------------- 1 | \chapter*{Preface} \label{chp:preface} 2 | 3 | This document aims to be a collection of lecture and laboratory notes, in an attempt to uniform the mathematical notation and gather all resources related to understand and master \emph{deep learning} in one, single location. 4 | 5 | These notes are divided in five parts, which will constantly link to each other. 6 | These are \fullref{prt:theory}, where the main topics will be introduced, \fullref{prt:practice}, where an intuition will be built about abstract concepts, \fullref{prt:coding}, where we'll see how to get our hands dirty with actual neural nets, on a computer, \fullref{prt:apps} where we'll encounter real life examples and applications, and \fullref{prt:papers} where short summary of the papers we've discussed will be nicely collected. -------------------------------------------------------------------------------- /prct/RM/12-a_sparse-coding/references.bib: -------------------------------------------------------------------------------- 1 | @article{kavukcuoglu2010fast, 2 | title={Fast inference in sparse coding algorithms with applications to object recognition}, 3 | author={Kavukcuoglu, Koray and Ranzato, Marc'Aurelio and LeCun, Yann}, 4 | journal={arXiv preprint arXiv:1010.3467}, 5 | year={2010} 6 | } 7 | @article{olshausen1997sparse, 8 | title={Sparse coding with an overcomplete basis set: A strategy employed by V1?}, 9 | author={Olshausen, Bruno A and Field, David J}, 10 | journal={Vision research}, 11 | volume={37}, 12 | number={23}, 13 | pages={3311--3325}, 14 | year={1997}, 15 | publisher={Elsevier} 16 | } 17 | @inproceedings{gregor2010learning, 18 | title={Learning fast approximations of sparse coding}, 19 | author={Gregor, Karol and LeCun, Yann}, 20 | booktitle={Proceedings of the 27th International Conference on International Conference on Machine Learning}, 21 | pages={399--406}, 22 | year={2010}, 23 | organization={Omnipress} 24 | } -------------------------------------------------------------------------------- /prct/YC/01-b/practice.tex: -------------------------------------------------------------------------------- 1 | \chapter{The Manifold Hypothesis}\label{chp: manifold_hypothesis} 2 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 3 | % Lecture date: 1.28.19 4 | 5 | \section{Facial Expressions Thought Experiment} 6 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 7 | % Lecture date: 1.28.19 8 | 9 | Say we have infinitely many pictures of a person making all possible facial expressions. 10 | Each image is $2000 \times 1000$ pixels and has $3$ color channels, so each image is $6,000,000$-dimensional vector. 11 | 12 | The set of all images is a small subset of $6,000,000$-dimensional space. 13 | What does that subset look like? 14 | What is the dimensions of the surface? 15 | On a patch of that surface, how many dimensions can you move and still stay on that surface? 16 | 17 | That surface is a manifold (roughly speaking, a continuous surface). 18 | Moreover, it is limited by the number of degrees of freedom in the human face, which is bounded above by the number of muscle groups a person can control in his face. 19 | Ergo that subset of $\mathbb{R}^{6000000}$ is relatively low-dimensional. 20 | 21 | This thought experiment illustrates the manifold hypothesis, which postulates that natural data in high dimensional space generally has a low dimensional structure. -------------------------------------------------------------------------------- /arch/PK/digression_fourier_transform.tex: -------------------------------------------------------------------------------- 1 | \chapter{A digression on the Fourier Transform} 2 | % \author{David Brandfonbrener, Min Jae Song} 3 | % \date{3/11/19} 4 | 5 | \section{Definition} 6 | 7 | The Fourier transform takes a function of time (sometimes called a signal) and decomposes it into its frequencies. The Fourier transform refers to both the operation that decomposes the function and the resulting decomposition. The domain of the Fourier transform $\hat{f}(\xi)$ is called the ``frequency domain". 8 | 9 | The Fourier transform maps a function $ f: \mathbb{R} \to \mathbb{C}$ into a function $\hat f$ as follows: 10 | \begin{align} 11 | \hat f(\xi) = \int_{-\infty}^\infty f(x) e^{-2\pi i \xi x}dx 12 | \end{align} 13 | Note that this can be inverted so that given the Fourier transform $ \hat f $ in the frequency domain we can recover the original function $ f$. 14 | 15 | In machine learning, we discretize the data and use the discrete Fourier transform. Given a sequence $x_0,\dots, x_{n-1}$ we can map this to a sequence of frequencies $ \hat x_0,\dots, \hat x_{n-1}$: 16 | \begin{align} 17 | \hat x_k = \sum_{j=0}^{n-1} x_j e^{2\pi i (kj/n)} = \sum_{j=0}^{n-1} x_j (\cos(2\pi kj/n) - i\sin(2\pi kj/n)) 18 | \end{align} 19 | The discrete Fourier transform can be calculated quickly by the Fast Fourier Transform (FFT) algorithm. There is also an analog of the Fourier transform for graphs, which is closely related to the graph Laplacian. 20 | 21 | \section{Connection to convolution} 22 | One nice property of the Fourier transform called the convolution theorem says that Fourier transform of the convolution of two functions is equivalent to product of the Fourier transforms of the functions in the frequency domain. 23 | A similar theorem applies to cross-correlations (which is what Convnets are doing). 24 | One nice application of this is that according to Yann sometimes cudnn calculates convolutions by finding the FFT and performing multiplication in the frequency domain and then using the inverse Fourier transform to recover the result. -------------------------------------------------------------------------------- /prct/KF/convolution_demonstration.tex: -------------------------------------------------------------------------------- 1 | \chapter{Convolution Demonstration} 2 | % Authors: Mimee Xu , Sai Anirudh Kondaveeti, Rui Jiang(rj1407), 2/20/18. 3 | \section{Natural Signal Patterns} 4 | Neural networks can be used to model audio, image, text, or other signals. The signals are represented as sequences of scalars. Audio is often represented as waveform heights, images are often represented as pixel values, and text is often represented as one hot vectors. 5 | 6 | Natural signals (not artificial or synthetic) tend to exhibit two important features, which are both crucial for convolutional neural networks. 7 | 8 | 1. Stationarity - The waveform heights of audio signals form a sinusoidal pattern with similar sub-segments of peaks and valleys occurring repeatedly throughout the signal. 9 | 10 | 2. Locality - The correlation is high between two peaks in the waveform at nearby points in time, but the correlation is low between distant peaks. Sounds have “local” properties such as being more transient or smoother in the time-frequency domain. If an audio signal was shuffled, so that the index of the data no longer represented a position in time, the audio signal would no longer adhere to the stationarity and locality assumptions. 11 | 12 | 13 | 14 | \section{Audio Example} 15 | Here, we use a python library "librosa", which is for audio and music analysis. After loading the audio file, we can display a wave form: 16 | 17 | %% Insert wave form here 18 | \begin{figure}[H] 19 | \centering 20 | \includegraphics[width=220pt]{figs/1.png} 21 | \label{fig:waveform} 22 | \end{figure} 23 | 24 | The wave form constitutes of the amplitude in the time domain. To analyze it in the frequency domain we use discrete Fourier transform. As we want to know the frequencies over time we do Fourier transform over the windowed signal. The resulting graph is called a spectrogram, which represents the signal in the frequency domain. 25 | 26 | %% Insert two spectrograms here 27 | \begin{figure}[ht] 28 | \centering 29 | \includegraphics[width=300pt]{figs/2.png} 30 | \label{fig:spectrogram} 31 | \end{figure} 32 | 33 | The challenge is to figure out which corresponding piano keys we should play to reproduce the audio. We can either pick the frequencies from the spectrogram or we can use all frequencies of piano keys. When we do a convolution of the audio sample with this reconstructed sample we will observe that the frequencies of audio sample separated. 34 | 35 | %% Add link to the video 36 | In the lab we did convolution with a reconstructed sample which consisted of the frequencies we guessed from spectrogram. The spectrogram of the natural signal and the one we generated can be seen below. 37 | %% insert two spectrograms of orginal and reconstruction 38 | \begin{figure}[ht] 39 | \centering 40 | \includegraphics[width=300pt]{figs/3.png} 41 | \label{fig:spectrograms of original and reconstruction} 42 | \end{figure} 43 | -------------------------------------------------------------------------------- /arch/IC/graph-cnn.tex: -------------------------------------------------------------------------------- 1 | \chapter{Graph CNN and spectral networks} 2 | % \author{Yuqiong Li} 3 | % \date{March 2019} 4 | 5 | \section{Why do CNNs work so well} 6 | % Authors: Yuqiong Li 7 | % Lecture date: 3/11/2019 8 | CNNs have worked well on Euclidean structures (e.g. a regular grid) because of the following properties: 9 | \begin{enumerate} 10 | \item Images, videos and speeches have translation invariant properties. For example, one can consider images as sampled instances from distributions on the Euclidean space. They are stationary in that they are shift-invariant. 11 | \item Images are scale separation 12 | \item Features are usually localized, meaning they are much smaller than the input image 13 | \item CNNs have a fixed number of parameters 14 | \item With advancement in graphic gards, CNNs can be computed efficiently 15 | \end{enumerate} 16 | However, it is not immediately obvious how to extend CNNs to graphs because they do not have these properties as in Euclidean space. 17 | 18 | \section{Extend CNN on Graphs} 19 | % Authors: Yuqiong Li 20 | % Lecture date: 3/11/2019 21 | \subsection{A bit of graph theories} 22 | 23 | \begin{enumerate} 24 | \item A weighted undirected graph G with 25 | vertices $V = {1, . . . , n}$, edges $E \subseteq V × V$ 26 | and edge weights $wij \geq 0$ for $(i, j) \in E$. 27 | \item We can define functions over the vertices $L^2(V) = {V \to R}$, which is also a vectors $f = (f_1, . . . , f_n)$. 28 | \item We can define the unnormalized Laplacian for $f$ as $(\bigtriangleup f)_i = \sum_{j: (i, j)\in E} w_{ij} (f_i - f_j) $ which is the difference of $f$ and its local average. 29 | \end{enumerate} 30 | 31 | By doing eigendecomposition of a graph Laplacian, we can obtain the its orthogonal eigenvectors as well as the corresponding non-negative eigenvalues. 32 | 33 | \subsection{Fourier analysis on Euclidean spaces} 34 | A function $f : [−π, π] → R$ can be written as a Fourier series as the following formula: 35 | \begin{align} 36 | f(x) = \sum_{k \geq 0} \langle f, e^{ikx} \rangle_{L^2([-\pi, \pi]) e^{ikx}} 37 | \end{align} 38 | and the corresponding Fourier basis are the Laplacian eigenfunctions: $k^2 e^{ikx}$ 39 | 40 | 41 | \subsection{Fourier analysis on graphs } 42 | A function $f : V \to R$ can be written as a Fourier series as the following formula: 43 | \begin{align} 44 | f(x) = \sum_{k=1}^{n} \langle f, \phi_k \rangle_{L^2(V) \phi_k} 45 | \end{align} 46 | and the corresponding Fourier basis are the Laplacian eigenfunctions: $\lambda_k \phi_k$ with $\lambda_k$ being the frequency. 47 | 48 | \subsection{Convolution on Euclidean space } 49 | Given two functions $f, g \in [-\pi, \pi] \to R$, their convolution can be written as 50 | \begin{align} 51 | (f \star g) (x) = \int_{-\pi}^{\pi} f(x') g(x-x') dx' 52 | \end{align} 53 | 54 | \subsection{Spectral convolution} 55 | Finally, spectral convolution can be defined by analogy to convolution on Euclidean space as 56 | \begin{align} 57 | (f \star g) (x) = \sum{k\geq 1} \langle f, \phi_k \rangle_{L^2_(v)}\phi_k 58 | \end{align} 59 | which is the inverse Fourier transform. -------------------------------------------------------------------------------- /arch/EN/hierarchical_representation.tex: -------------------------------------------------------------------------------- 1 | \chapter{Hierarchical Representation}\label{chp:Hierarchical Representation} 2 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 3 | % Lecture date: 1.28.19 4 | 5 | \section{The World as a Hierarchy} 6 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 7 | % Lecture date: 1.28.19 8 | 9 | The world is inherently compositional and hierarchical: smaller pieces combine to form larger objects. 10 | Humans interpret the world as a hierarchy; even the visual cortex in mammals is hierarchical in nature. 11 | 12 | The goal of deep learning is to have a machine correctly extract hierarchical representations. 13 | Ideally, a deep neural network should detect features at one level, then detect combinations of those features at the next level. 14 | It is important to note that not every combination of features at one level exists in the next. 15 | 16 | \subsection{Images} 17 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 18 | % Lecture date: 1.28.19 19 | 20 | For example, from a patch of pixels, we want to detect edges (usually by an abrupt change in color of adjacent pixels). 21 | From edges we can discern textons (e.g. corners, crosses). 22 | From textons we can detect motifs, then parts of objects, and finally those parts can be pieced together to detect objects within the image. 23 | 24 | Geometrically, if we take all $5\times5$ patches of pixels in an image, we will get a collection of $25$-dimensional vectors. 25 | These vectors, however, would likely comprise a small (low-dimensional) part of $\mathbb{R}^{25}$. 26 | 27 | \begin{figure}[ht] 28 | \centering 29 | \includegraphics[width=100mm]{figs/cnn_hierarchy.png} 30 | \caption{Hierarchy from a convolutional neural network} 31 | \label{fig:cnn_hierarchy} 32 | \end{figure} 33 | 34 | \Cref{fig:cnn_hierarchy} shows an example from a convolutional neural network. 35 | The left pane shows detected edges, color patches, and gradients. 36 | The middle pane has pieced those attributes together to detect textures and shapes, such as round shapes and corners. 37 | Finally, the right pane contains discernible parts of objects. 38 | 39 | \subsection{Text} 40 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 41 | % Lecture date: 1.28.19 42 | 43 | The same idea can be applied to textual analysis: combinations of characters become words, combinations of words make word groups, which assemble to make clauses, which can be grouped to make sentences, and finally a collection of sentences create a story. 44 | 45 | Again, not every combination of features at one level becomes significant in the next, e.g. not every combination of words forms a valid sentence. 46 | 47 | \subsection{Speech} 48 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 49 | % Lecture date: 1.28.19 50 | 51 | An audio sample is just a single number, but the frequency content of a waveform can be represented by a feature vector, and those waveforms can be pieced together to form sounds, which can then be pieced together to form syllables, etc. 52 | Ultimately phones and phonemes are formed and pieced into words. 53 | -------------------------------------------------------------------------------- /prct/KF/visualizing_2D_interpolations.tex: -------------------------------------------------------------------------------- 1 | \chapter{Visualizing 2D interpolation} 2 | 3 | % Authors: Pedro Manuel Herrero Vidal, Doruk Kilitcioglu 4 | % Lecture date: 2/27/2019 5 | 6 | We take the data simulated in the 04-spiral\_classification notebook (\href{https://github.com/Atcold/pytorch-Deep-Learning-Minicourse/blob/master/04-spiral\_classification.ipynb} {"Spiral Classification"}), 7 | consisting of 3,000 samples of dimension 2 (Fig. ~\ref{fig:NonLinearlySeparableParametricCurves2}). 8 | The data is generated from the following expression: 9 | 10 | \[ 11 | X_c(t) = t 12 | \begin{bmatrix} 13 | \sin{\frac{2\pi}{C} (2t+c+1) + \mathcal{N} (0, \sigma^2)} \\ 14 | \cos{\frac{2\pi}{C} (2t+c+1) + \mathcal{N} (0, \sigma^2)} 15 | \end{bmatrix} 16 | \] 17 | %\noindent 18 | where $0 \leq t \leq 1$ and classes $c=1, ..., C$. 19 | 20 | 21 | \begin{figure}[ht] 22 | \centering 23 | \includegraphics[width=0.5\textwidth]{figs/NonLinearlySeparableParametricCurves.png} 24 | \caption{3 non linearly separable curves consisting in 3,000 samples from $X \in {{\rm I\!R}}^2$.} 25 | \label{fig:NonLinearlySeparableParametricCurves2} 26 | \end{figure} 27 | 28 | \noindent 29 | To classify the data into the three categories, we are going to use a three layer net with: 30 | \begin{itemize} 31 | \item[(1)] 2 input units (dimensionality of the data). 32 | \item[(2)] A 100-units hidden layer (used to increase the space and extract features). 33 | \item[(3)] Followed by a non-linear ReLU activation function. 34 | \item[(4)] We then go back down to 2D again which is useful for visualization. 35 | \item[(5)] Lastly we can bring it back up to 3D with a final linear transformation to a 3-unit output layer with softmax activation function for classification 36 | (Fig. ~\ref{fig:ArchitectureForClassificationAndVisualizationInTheInputSpace}). 37 | The last linear transformation involves the multiplication of $\matr{A^{(2)}}$ and $\matr{W^{(2)}}$, 38 | where the different rows of $\matr{W^{(2)}}$ are the 2D vectors that will point at the different classes in the 2D space. 39 | In this representation, the different classes are linearly separable. 40 | \end{itemize} 41 | 42 | \begin{figure}[!h] 43 | \centering 44 | \includegraphics[width=170mm]{figs/ArchitectureForClassificationAndVisualizationInTheInputSpace.png} 45 | \caption{Neural network architecture for classification and visualization in the input space. (a) Architecture of the neural network, which takes 2D inputs and visualizes in the same dimensional space before classification. (b) Linear transformation that takes $\matr{A^{(2)}}$ and returns $\matr{A^{(3)}}$ via multiplication with $\matr{W^{(2)}}$. Rectangular color boxes in $\matr{W^{(2)}}$ represent the three 2D vectors that will define each class. Calculation of $\matr{A^{(3)}}$ is followed by a softmax activation function for classification. In the bottom half of (b), the transformed data after and the population vectors (plot credit: Alfredo Canziani).} 46 | \label{fig:ArchitectureForClassificationAndVisualizationInTheInputSpace} 47 | \end{figure} 48 | 49 | \noindent 50 | 51 | In order to visualize each transformation, we do a linear interpolation from the input to the output at the 2D hidden layer using: 52 | 53 | \[ 54 | (1-\alpha)(x^{(1)}) + \alpha \phi (x^{(i)}) ~~\text{ where } 0 \leq \alpha \leq 1 55 | \] 56 | -------------------------------------------------------------------------------- /prct/BW/09-lab/practice.tex: -------------------------------------------------------------------------------- 1 | \chapter{Regularization} 2 | % Authors: Liangzhi Li (editor), Lekha Iyengar, Subhadarshi Panda, 04/02/19. 3 | \section{Problem} 4 | We attempt to study the effect of L1 regularization,L2 regularization and Dropout in sentiment analysis of IMDB dataset. We split our data into training (17500 samples), valiation (7500 samples) and test set(2500 samples). 5 | 6 | \section{Data processing} 7 | We follow the following steps while processing the IMDB dataset 8 | \begin{itemize} 9 | \item[1)] Tokenization: break sentence into individual words 10 | \begin{itemize} 11 | \item Before: "PyTorch seems really easy to use!" 12 | \item After: ["PyTorch", "seems", "really", "easy", "to", "use", "!"] 13 | \end{itemize} 14 | \item[2)] Building vocabulary: build an index of words associated with unique numbers 15 | \begin{itemize} 16 | \item Before: ["PyTorch", "seems", "really", "easy", "to", "use", "!"] 17 | \item After: {"Pytorch: 0, "seems": 1, "really": 2, ...} 18 | \end{itemize} 19 | We use a vocabulary of 1000 words in this example 20 | \item[3)] Convert to numerals: map words to unique numbers (indices) 21 | \begin{itemize} 22 | \item Before: {"Pytorch: 0, "seems": 1, "really": 2, ...} 23 | \item After: [0, 1, 2, ...] 24 | \end{itemize} 25 | \item[4)] Embedding look-up: map sentences (indices now) to fixed matrices 26 | \begin{itemize} 27 | \item [[0.1, 0.4, 0.3], [0.8, 0.1, 0.5], ...] 28 | \end{itemize} 29 | \end{itemize} 30 | 31 | \section{Architecture} 32 | We use a Feed-forward neural network(FNN) for our task. (Not RNN/LSTM/GRU). FNN does not contain any cycles or loops in the network. There are no feedback connections in which outputs of the model are fed back. It cannot handle flexible sequence lengths so we have to fix the length of the input. We use a 3 layer FNN where the first layer is the embedding layer. This is followed by a hidden layer with RelU activation. We have a third output layer. We apply a sigmoid function to our output so that our loss doesn't have to go through sigmoid again. 33 | There are two sentiments for a review - negative or positive. This is a binary classification problem so Binary Cross Entropy loss (BCE loss) is used. 34 | 35 | \section{Experiments} 36 | We experiment with the following regularizations in the classification problem: 37 | \begin{itemize} 38 | \item No regularization 39 | \item Add L1 regularization 40 | \item Add L2 regularization 41 | \item Add Dropout 42 | \end{itemize} 43 | When no regularization is applied, the network overfits the training data and the validation loss is much larger than the training loss. When we apply L1, L2 or Dropout regularization, the validation loss is comparable to the training loss and the network does not overfit anymore. 44 | 45 | \begin{figure} 46 | \centering 47 | \includegraphics[scale=0.25]{figs/merged_loss_graph.png} 48 | \caption{Loss Graph} 49 | \label{fig:Loss Graph} 50 | \end{figure} 51 | 52 | \begin{figure} 53 | \centering 54 | \includegraphics[scale=0.25]{figs/merged_acc.png} 55 | \caption{Validation accuracy} 56 | \label{fig:Validation accuracy} 57 | \end{figure} 58 | 59 | We see that when we apply dropout or no regularization to the network, the weights are more distributed. For L1 and L2 regularization though, most of the weights are close to 0. The L2 weights distribution is more spread out than L1 as it is like a Gaussian distribution, while L1 weights are like Laplacian distribution. 60 | 61 | \begin{figure} 62 | \centering 63 | \includegraphics[scale=0.25]{figs/merged_weights_hist.png} 64 | \caption{Weights distribution for different regularizations } 65 | \label{fig:Weights distribution for different regularizations} 66 | \end{figure} 67 | 68 | \begin{figure} 69 | \centering 70 | \includegraphics[scale=0.3]{figs/weights_dist.png} 71 | \caption{Weights distribution comparison} 72 | \label{fig:Weights distribution comparison} 73 | \end{figure} -------------------------------------------------------------------------------- /preamble.tex: -------------------------------------------------------------------------------- 1 | \usepackage[UKenglish]{babel} 2 | \usepackage{graphicx} 3 | \usepackage{natbib} 4 | \usepackage[colorlinks]{hyperref} 5 | \usepackage[dvipsnames]{xcolor} 6 | \usepackage{ amssymb, dsfont } 7 | \usepackage{tikz} 8 | \usepackage{dsfont} 9 | \usepackage{mathrsfs} 10 | 11 | 12 | \hypersetup{ 13 | linkcolor = Magenta, 14 | citecolor = Aquamarine, 15 | urlcolor = Periwinkle, 16 | linktoc = page 17 | } 18 | \usepackage{algorithm} 19 | \usepackage[noend]{algpseudocode} 20 | \usepackage{amsmath,amssymb,bm} 21 | 22 | \usepackage{enumitem} 23 | \usepackage{subcaption} 24 | \usepackage{wrapfig} 25 | \usepackage{minted} % Code embedding in document 26 | \usepackage[none]{hyphenat} 27 | 28 | \usepackage{mathtools} 29 | \usepackage{verbatim} 30 | \usepackage[export]{adjustbox} 31 | 32 | 33 | \makeatletter 34 | \newcommand\RedeclareMathOperator{% 35 | \@ifstar{\def\rmo@s{m}\rmo@redeclare}{\def\rmo@s{o}\rmo@redeclare}% 36 | } 37 | % this is taken from \renew@command 38 | \newcommand\rmo@redeclare[2]{% 39 | \begingroup \escapechar\m@ne\xdef\@gtempa{{\string#1}}\endgroup 40 | \expandafter\@ifundefined\@gtempa 41 | {\@latex@error{\noexpand#1undefined}\@ehc}% 42 | \relax 43 | \expandafter\rmo@declmathop\rmo@s{#1}{#2}} 44 | % This is just \@declmathop without \@ifdefinable 45 | \newcommand\rmo@declmathop[3]{% 46 | \DeclareRobustCommand{#2}{\qopname\newmcodes@#1{#3}}% 47 | } 48 | \@onlypreamble\RedeclareMathOperator 49 | \makeatother 50 | 51 | 52 | 53 | 54 | \DeclarePairedDelimiter\ceil{\lceil}{\rceil} 55 | \DeclarePairedDelimiter\floor{\lfloor}{\rfloor} 56 | 57 | \DeclareMathOperator*{\argmax}{arg\,max} % in your preamble 58 | \DeclareMathOperator*{\argmin}{arg\,min} % in your preamble 59 | 60 | \usepackage[nameinlink,noabbrev]{cleveref} 61 | \newcommand*{\fullref}[1]{\hyperref[{#1}]{\Cref*{#1} -- \nameref*{#1}}} 62 | 63 | 64 | 65 | 66 | % multiline comment 67 | \renewcommand{\comment}[1]{} 68 | 69 | % Number sets 70 | \newcommand{\N}{\mathbb{N}} % Natural Numbers 71 | \newcommand{\Z}{\mathbb{Z}} % Integers 72 | \newcommand{\Q}{\mathbb{Q}} % Quotient 73 | \newcommand{\R}{\mathbb{R}} % Real Numbers 74 | \newcommand{\C}{\mathbb{C}} % Complex Numbers 75 | 76 | % Probability 77 | \newcommand{\freq}[1]{\text{Freq}\left(#1\right)} % Frequency 78 | \newcommand{\pr}[1]{\mathbb{P}\left(#1\right)} % Probability 79 | \newcommand{\expectation}[1]{\mathbb{E}_{#1}} 80 | \newcommand{\E}[2][]{\expectation{#1}\left[#2\right]} % Expectation 81 | \newcommand{\var}[1]{\mathbb{V}{\left(#1\right)}} % Variance 82 | \newcommand{\cov}[1]{\text{Cov}{\left(#1\right)}} % Covariance 83 | \newcommand{\corr}[1]{\rho{\left(#1\right)}} % Correlation 84 | 85 | % Probability Distributions 86 | \newcommand{\dber}[1]{\mathrm{Bern}(#1)} %Bernoulli 87 | \newcommand{\dbeta}[1]{\mathrm{Beta}(#1)} %Beta 88 | \newcommand{\dbin}[1]{\mathrm{Bin}(#1)} %Binomial 89 | \newcommand{\dpoi}[1]{\mathrm{Poi}(#1)} %Poisson 90 | \newcommand{\ddir}[1]{\mathrm{Dir}(#1)} %Dirichlet 91 | \newcommand{\dgamma}[1]{\mathrm{Gam}(#1)} %Gamma 92 | \newcommand{\dnorm}[1]{\mathcal{N}(#1)} %Normal/Gaussian 93 | \newcommand{\dst}[1]{\mathrm{St}(#1)} %Student t 94 | \newcommand{\duni}[1]{\mathrm{U}(#1)} %Uniform 95 | \newcommand{\dwis}[1]{\mathcal{W}(#1)} %Wishart 96 | 97 | % Linear Algebra 98 | \newcommand{\vect}[1]{\bm{#1}} % vectors 99 | \newcommand{\matr}[1]{\bm{#1}} % matrices and tensors 100 | \newcommand{\tr}[1]{#1^{\top}} % transpose 101 | \renewcommand{\sp}[2]{#1^{\top}#2} %scalar product 102 | 103 | % Analysis 104 | \let\dbar\d \renewcommand{\d}{\mathrm{d}} 105 | \newcommand{\D}{\,\mathrm{d}} % differential 106 | \newcommand{\deriv}[2]{\frac{\d #1}{\d #2}} % derivative 107 | \newcommand{\pd}[2]{\frac{\partial #1}{\partial #2}} % partial derivative 108 | \newcommand{\set}[1]{\left\lbrace #1 \right\rbrace} % set 109 | \newcommand{\where}{\ \middle\vert\;} % where 110 | \newcommand{\abs}[1]{\left\vert #1 \right\vert} % absolute value 111 | \newcommand{\norm}[2][]{{\left\Vert #2 \right\Vert}_{#1}} % norm -------------------------------------------------------------------------------- /labs/09/practice.tex: -------------------------------------------------------------------------------- 1 | \chapter{Regularization} 2 | % Authors: Liangzhi Li (editor), Lekha Iyengar, Subhadarshi Panda, 04/02/19. 3 | \section{Problem} 4 | We attempt to study the effect of L1 regularization,L2 regularization and Dropout in sentiment analysis of IMDB dataset. We split our data into training (17500 samples), valiation (7500 samples) and test set(2500 samples). 5 | 6 | \section{Data processing} 7 | We follow the following steps while processing the IMDB dataset 8 | \begin{itemize} 9 | \item[1)] Tokenization: break sentence into individual words 10 | \begin{itemize} 11 | \item Before: "PyTorch seems really easy to use!" 12 | \item After: ["PyTorch", "seems", "really", "easy", "to", "use", "!"] 13 | \end{itemize} 14 | \item[2)] Building vocabulary: build an index of words associated with unique numbers 15 | \begin{itemize} 16 | \item Before: ["PyTorch", "seems", "really", "easy", "to", "use", "!"] 17 | \item After: {"Pytorch: 0, "seems": 1, "really": 2, ...} 18 | \end{itemize} 19 | We use a vocabulary of 1000 words in this example 20 | \item[3)] Convert to numerals: map words to unique numbers (indices) 21 | \begin{itemize} 22 | \item Before: {"Pytorch: 0, "seems": 1, "really": 2, ...} 23 | \item After: [0, 1, 2, ...] 24 | \end{itemize} 25 | \item[4)] Embedding look-up: map sentences (indices now) to fixed matrices 26 | \begin{itemize} 27 | \item [[0.1, 0.4, 0.3], [0.8, 0.1, 0.5], ...] 28 | \end{itemize} 29 | \end{itemize} 30 | 31 | \section{Architecture} 32 | We use a Feed-forward neural network(FNN) for our task. (Not RNN/LSTM/GRU). FNN does not contain any cycles or loops in the network. There are no feedback connections in which outputs of the model are fed back. It cannot handle flexible sequence lengths so we have to fix the length of the input. We use a 3 layer FNN where the first layer is the embedding layer. This is followed by a hidden layer with RelU activation. We have a third output layer. We apply a sigmoid function to our output so that our loss doesn't have to go through sigmoid again. 33 | There are two sentiments for a review - negative or positive. This is a binary classification problem so Binary Cross Entropy loss (BCE loss) is used. 34 | 35 | \section{Experiments} 36 | We experiment with the following regularizations in the classification problem: 37 | \begin{itemize} 38 | \item No regularization 39 | \item Add L1 regularization 40 | \item Add L2 regularization 41 | \item Add Dropout 42 | \end{itemize} 43 | When no regularization is applied, the network overfits the training data and the validation loss is much larger than the training loss. When we apply L1, L2 or Dropout regularization, the validation loss is comparable to the training loss and the network does not overfit anymore. 44 | 45 | \begin{figure} 46 | \centering 47 | \includegraphics[scale=0.25]{labs/09/images/merged_loss_graph.png} 48 | \caption{Loss Graph} 49 | \label{fig:Loss Graph} 50 | \end{figure} 51 | 52 | \begin{figure} 53 | \centering 54 | \includegraphics[scale=0.25]{labs/09/images/merged_acc.png} 55 | \caption{Validation accuracy} 56 | \label{fig:Validation accuracy} 57 | \end{figure} 58 | 59 | We see that when we apply dropout or no regularization to the network, the weights are more distributed. For L1 and L2 regularization though, most of the weights are close to 0. The L2 weights distribution is more spread out than L1 as it is like a Gaussian distribution, while L1 weights are like Laplacian distribution. 60 | 61 | \begin{figure} 62 | \centering 63 | \includegraphics[scale=0.25]{labs/09/images/merged_weights_hist.png} 64 | \caption{Weights distribution for different regularizations } 65 | \label{fig:Weights distribution for different regularizations} 66 | \end{figure} 67 | 68 | \begin{figure} 69 | \centering 70 | \includegraphics[scale=0.3]{labs/09/images/weights_dist.png} 71 | \caption{Weights distribution comparison} 72 | \label{fig:Weights distribution comparison} 73 | \end{figure} -------------------------------------------------------------------------------- /prct/KF/loss_functions_non-convex.tex: -------------------------------------------------------------------------------- 1 | \chapter{Loss Functions in Deep Learning are non-convex}\label{chp:Loss Functions in Deep Learning are non-convex} 2 | % Authors: tn1050@nyu.edu, vy404@nyu.edu, sk7685@nyu.edu. 3 | % Lecture date: 2.4.19 4 | 5 | Things like regression for a single layer are convex and the function you are optimizing is quadratic in x. However 6 | loss functions of pretty much every multi-layer network are non-convex and have multiple local minima. 7 | Luckily, these local minima are all more or less equivalent. 8 | When we train a neural net with different initial conditions, then the solutions we get at the end are very different from each other. 9 | However, performances of all the solutions are more or less the same. 10 | 11 | \section{Example: Identity Function} 12 | % Authors: tn1050@nyu.edu, vy404@nyu.edu, sk7685@nyu.edu. 13 | % Lecture date: 2.4.19 14 | The figure shows the loss surface of a two-layer neural net with one input, one hidden unit and one output and the network tries to approximate the identity function. 15 | The input and the output are both 0.5. The cost function is squared error 16 | \begin{equation} 17 | L = (0.5 - \tanh (W_1 \tanh (W_0 * 0.5)))^2 18 | \end{equation} 19 | % L = (0.5 - tanh(W_1 tanh (W_0*0.5)))^2 20 | The objective function looks as follows in the space of $W_1$, $W_2$. 21 | We get a saddle point at (0,0) where the curvature is positive in two directions and negative along the other two directions.\\ 22 | 23 | \begin{figure}[ht] 24 | \centering 25 | \includegraphics[width=100mm]{figs/Identity.PNG} 26 | \caption{Loss Surface of Identity Function} 27 | \label{fig:idfn} 28 | \end{figure} 29 | 30 | Neural networks with at least one non-linear activation function in its hidden layer(s) have non-convex loss surfaces. 31 | Convex optimization methods do not work if there is more than one global minimum. 32 | In this instance, we get two solutions symmetrical to both sides of the saddle point. 33 | Those solutions are essentially hyperbolas. If we didn't have the hyperbolic tangents and the activation functions were linear, then we can add one value for the weight $W_0$ and the inverse value for weight $W_1$ and get the identity function. 34 | For instance, if we set $W_0$ to 2 and $W_1$ to $\frac{1}{2}$ and forget about the hyperbolic tangents then we get an identity function.\\ 35 | 36 | We can choose both positive and negative weights and still get a solution as we have the other solution space on the other side. 37 | This network has a highly non-convex objective function. If we start at points that are a small distance apart from each other, we can end into different local minima. 38 | Actually, those two minima are equivalent, so it does not matter where we start. 39 | There is empirical evidence that in neural networks, it is very often the case, that there are lots of different solutions, but they are basically equivalent. \\ 40 | 41 | Even if we have a simpler version without hyperbolic tangent and completely linear functions, still we see that the solution is similar. 42 | One other explanation for why regularizers are bad at the beginning of the training is that when we initialize the weights, we have to make sure the weights are initialized to non-zero values since otherwise the learning never takes off and it stays at a saddle point where there is no gradient. 43 | Similarly, the backpropagation with zero weights does not update anything and the neural net never takes off. 44 | The magnitude of the weights is very important and we must pay special attention that we initialize the weights appropriately.\\ 45 | 46 | The intuition behind this is that if you have a unit with many inputs that are normalized, then the weighted sum of this unit is the weighted sum of random variables with standard deviation/variance one. 47 | The variance of the output will thus be the weighted sum of the variance of the inputs. 48 | The variance of the weighted sum is the sum of the variances of the inputs multiplied by the square of the weights. 49 | If we want our output to have variance once to preserve the variance, then we need to set the weights accordingly - to smaller values if we have many inputs and to bigger values if we have fewer inputs. 50 | Scaling the weights with a factor that is $\frac{1}{\sqrt{N}}$ would preserve variance of inputs in different layers. 51 | -------------------------------------------------------------------------------- /instructions.tex: -------------------------------------------------------------------------------- 1 | \chapter*{Instructions} 2 | 3 | There are overall 42 hours of lectures. 4 | For each hour there is a group of people assigned to summarise what happened in class \textbf{in roughly three pages}. 5 | Each group is made of three students: two writers and a reviewer. 6 | 7 | \subsection*{Writing directions} 8 | 9 | Split your writing across the five parts of this document according to where it seems fit (see \nameref{chp:preface}). 10 | Be consistent with the notation here specified. 11 | \begin{itemize}[noitemsep,nolistsep] 12 | \item Use \verb|\vect{}| and \verb|\matr{}| to decorate vectors and matrices respectively. 13 | \item Start a new line \textbf{only and every time} you end a sentence with a period `.'; the \LaTeX\ engine will ignore this, but \verb|git| will love you. 14 | \item Leave an empty line to start a new paragraph, and don't use the `\verb|\\|' break line (see \url{tex.stackexchange.com/a/225925/33287}). 15 | \item Add date and group's authors name \textbf{as a comment}, after every \verb|\chapter|, \verb|\section|, and \verb|\subsection|. 16 | \item The transposition symbol is obtained with \verb|^\top|. For example $(AB)^\top = B^\top A^\top$. 17 | \end{itemize} 18 | 19 | Feel free to create new chapters, sections, and subsections with corresponding labels \verb|\label{chp:}|, \verb|\label{sct:}|, and \verb|\label{ssc:}|. 20 | 21 | \section*{Peer reviewing within groups} 22 | 23 | Check for notation consistency, correctness, grammar, figure captioning, usage of \verb|\cref{}| instead of \verb|\ref{}|, \verb|\vect{}|, and \verb|\matr{}| to decorate vectors and matrices respectively, unnecessary use of bullet points or itemisations, missing references and use of links to papers PDF instead, usage of $\pr{}$, $\E{}$, and $\var{}$ for probability, expectation, and variance respectively using \verb|\Pr|, \verb|\E{}|, and \verb|\var{}|, \verb|\mid| for the conditional vertical bar, missing backslashes for $\log, \exp, \max$ and any badly formatted functions, $\ast$ for convolutions, $\odot$ for elementwise multiplication, usage of \verb|\caption[Short caption]{Full caption}|, use of the correct transposition symbol obtained with \verb|\tr{}|, just to name a few. 24 | 25 | \section*{Taking inspiration} 26 | 27 | You can take inspiration from the work done by the students at NYU, who collectively wrote up the lecture notes in \href{https://www.overleaf.com/read/pchjywcxjkxn 28 | }{this document} last year. 29 | For example, you may reuse the following constructs, and others, at your convenience: 30 | \[ 31 | \matr{X} = \begin{bmatrix} 32 | \rule[0.5mm]{0.8cm}{0.1mm} \; \vect{x}^{(1)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 33 | \rule[0.5mm]{0.8cm}{0.1mm} \; \vect{x}^{(2)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 34 | \vdots \\ 35 | \rule[0.5mm]{0.8cm}{0.1mm} \; \vect{x}^{(m)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 36 | \end{bmatrix}_{m \times n} 37 | \matr{Y} = \begin{bmatrix} 38 | \rule[0.5mm]{0.8cm}{0.1mm} \; \vect{y}^{(1)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 39 | \rule[0.5mm]{0.8cm}{0.1mm} \; \vect{y}^{(2)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 40 | \vdots \\ 41 | \rule[0.5mm]{0.8cm}{0.1mm} \; \vect{y}^{(m)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 42 | \end{bmatrix}_{m \times K} 43 | \] 44 | \[ 45 | \hat{\matr{A}}\vect{x} = 46 | \begin{bmatrix} 47 | \rule[0.5mm]{0.8cm}{0.1mm} \; \hat{\vect{a}}^{(1)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 48 | \rule[0.5mm]{0.8cm}{0.1mm} \; \hat{\vect{a}}^{(2)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 49 | \vdots \\ 50 | \rule[0.5mm]{0.8cm}{0.1mm} \; \hat{\vect{a}}^{(m)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 51 | \end{bmatrix} 52 | \begin{pmatrix} 53 | \vrule height 0.6cm \\ \vect{x} \\ \vrule height 0.6cm 54 | \end{pmatrix} = 55 | \begin{pmatrix} 56 | \hat{\vect{a}}^{(1)} \vect{x} \\ \hat{\vect{a}}^{(2)} \vect{x} \\ \vdots \\ \hat{\vect{a}}^{(m)} \vect{x} 57 | \end{pmatrix}_{m \times 1} 58 | \] 59 | \[ 60 | \matr{T}^{(1)}\vect{x} = 61 | \begin{bmatrix} 62 | a_{1,1} & a_{1,2} & \dotsc & a_{1,k} & 0 & 0 & \dotsc & 0 \\ 63 | 0 & a_{1,1} & a_{1,2} & \dotsc & a_{1,k} & 0 & \dotsc & 0 \\ 64 | 0 & 0 & a_{1,1} & a_{1,2} & \dotsc & a_{1,k} & \dotsc & 0 \\ 65 | \vdots & \vdots & \vdots & \ddots & \ddots & \ddots & \ddots & \vdots \\ 66 | 0 & \dotsc & 0 & 0 & a_{1,1} & a_{1,2} & \dotsc & a_{1,k} 67 | \end{bmatrix}_{(n-k+1) \times n} = 68 | \begin{pmatrix} 69 | \vect{a}^{(1)} \vect{x}_{1:1+k-1} \\ \vect{a}^{(1)} \vect{x}_{2:2+k-1} \\ \vdots \\ \vect{a}^{(1)} \vect{x}_{n-k+1:n} 70 | \end{pmatrix}_{(n-k+1) \times 1} 71 | \] 72 | 73 | Have fun! 74 | -------------------------------------------------------------------------------- /arch/BA/gnn_theory.tex: -------------------------------------------------------------------------------- 1 | \chapter{Graph Neural Networks} 2 | % Authors: Yu Cao 5/7/19. 3 | % Editor: Ben Ahlbrand 4 | \section{Graph Neural Network} 5 | Graph contains vertexes and edges and the messages (information) associated, and can be used to formalize texts and images. Deep learning on graph structures - GNNs. These GNNs learn embeddings on graphs. 6 | 7 | \begin{figure}[ht] 8 | \begin{center} 9 | % \fbox{\rule{0pt}{2in} \rule{0.9\linewidth}{0pt}} 10 | % \includegraphics[width=0.9\linewidth]{imgs/PnL_Problem} 11 | \includegraphics[width=3.38in]{figs/Embedding_ConvNet.png} 12 | \end{center} 13 | \caption{Convolutional Neural Networks: Learn embedding of an image} 14 | \label{fig:CV} 15 | \end{figure} 16 | 17 | \begin{figure}[ht] 18 | \begin{center} 19 | % \fbox{\rule{0pt}{2in} \rule{0.9\linewidth}{0pt}} 20 | % \includegraphics[width=0.9\linewidth]{imgs/PnL_Problem} 21 | \includegraphics[width=3.38in]{figs/Embedding_Graph.png} 22 | \end{center} 23 | \caption{Graph Neural Network: Learn embedding on graphs} 24 | \label{fig:CV} 25 | \end{figure} 26 | 27 | It is widely applied in the tasks of Natural Language Processing. Medical Computation as well as Computer Vision as shown on figures 3, 4 and 5 below.. 28 | \begin{figure}[ht] 29 | \begin{center} 30 | % \fbox{\rule{0pt}{2in} \rule{0.9\linewidth}{0pt}} 31 | % \includegraphics[width=0.9\linewidth]{imgs/PnL_Problem} 32 | \includegraphics[width=3.38in]{figs/NLP.png} 33 | \end{center} 34 | \caption{Application in NLP} 35 | \label{fig:CV} 36 | \end{figure} 37 | 38 | \begin{figure}[ht] 39 | \begin{center} 40 | % \fbox{\rule{0pt}{2in} \rule{0.9\linewidth}{0pt}} 41 | % \includegraphics[width=0.9\linewidth]{imgs/PnL_Problem} 42 | \includegraphics[width=3.38in]{figs/Biology.png} 43 | \end{center} 44 | \caption{Application in Biology} 45 | \label{fig:CV} 46 | \end{figure} 47 | 48 | \begin{figure}[ht] 49 | \begin{center} 50 | % \fbox{\rule{0pt}{2in} \rule{0.9\linewidth}{0pt}} 51 | % \includegraphics[width=0.9\linewidth]{imgs/PnL_Problem} 52 | \includegraphics[width=3.38in]{figs/Computer_Vision.png} 53 | \end{center} 54 | \caption{Application in Computer Vision} 55 | \label{fig:CV} 56 | \end{figure} 57 | 58 | Graph neural network has been an emerging topic of research as shown in the graph below: 59 | 60 | \begin{figure}[ht] 61 | \begin{center} 62 | % \fbox{\rule{0pt}{2in} \rule{0.9\linewidth}{0pt}} 63 | % \includegraphics[width=0.9\linewidth]{imgs/PnL_Problem} 64 | \includegraphics[width=3.38in]{figs/GNN Papers Published.png} 65 | \end{center} 66 | \caption{GNN papers published} 67 | \label{fig:CV} 68 | \end{figure} 69 | 70 | \section{Message Passing on Graphs} 71 | In this section, we will focus on how to perform computation on graph structures following Message Passing paradigm. Many graph neural networks follow the \textit{message passing} computation model \href{https://arxiv.org/abs/1704.01212}{Gilmer et al, 2017}: 72 | \begin{itemize} 73 | \item Each node receives and aggregates messages from its neighbors 74 | \begin{gather} 75 | m_v^{t+1} = \sum\limits_{w\in \mathcal{N}(v)}M_t(h_v^t, h_w^t, e_{vw}^t) 76 | \end{gather} 77 | where $\mathcal{N}(v)$ is the neighbour set of node $v$. 78 | 79 | \item Each node update its own embedding using aggregated messages. 80 | \end{itemize} 81 | % [(a)] 82 | % \item 83 | % Each node receives and aggregates messages from its neighbors 84 | % % \begin{gather} 85 | % $$m_v^{t+1} = \sum\limits_{w\in \mathcal{N}(v)}M_t(h_v^t, h_w^t, e_{vw}^t)$$ 86 | % % \end{gather} 87 | % where $\mathcal{N}(v)$ is the neighbor set of node $v$. 88 | 89 | % \item 90 | % Each node update its own embedding using aggregated messages 91 | % $$h_v^{t+1} = U_t(h_v^t, m_v^{t+1})$$ 92 | 93 | % \end{enumerate} 94 | 95 | 96 | \section{Graph Convolutional Network} 97 | 98 | Graph convolutional network (GCN) is a popular model proposed by \href{https://arxiv.org/abs/1609.02907}{Kipf \& Welling} to encode graph structure by message passing. The high-level idea is similar to our toy task: node features are updated by aggregating the messages from the neighbors. Here is the message passing equation: 99 | 100 | $$ 101 | h_{v_i}^{(l+1)} = \sigma \left(\sum_{j\in\mathcal{N}(i)}\frac{1}{c_{ij}}h_{v_j}^{(l)}W^{(l)} \right), 102 | $$ 103 | 104 | where $v_i$ is any node in the graph; $h_{v_i}$ is the feature of node $v_i$; $\mathcal{N}(i)$ denotes the neighborhood of $v_i$; $c_{ij}$ is the normalization constant related to node degrees; $W$ is the parameter and $\sigma$ is a non-linear activation function. 105 | -------------------------------------------------------------------------------- /prct/KF/automatic_differentiation.tex: -------------------------------------------------------------------------------- 1 | \chapter{Automatic Differentiation} 2 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 3 | Automatic differentiation is a hybrid of symbolic differentiation and numerical differentiation. 4 | It is extremely efficient when it is desired to differentiate functions of the form 5 | $$y_n=f_n(w_{n-1}, f_{n-1}(w_{n-2},\cdots (f_0(w_0, x_0)) $$ 6 | When back-propagation is performed, there is a desire that $\partial y_{k}/\partial w_{\ell}$ can be explicitly expressed in terms of $y_k$. 7 | Thankfully, there are packages that handle auto-differentiation such that it does not have to be done manually. 8 | 9 | In \texttt{pytorch}, each tensor has an attribute called \texttt{grad\_fn} which handles the process of calculating $\partial y/\partial x$ with a given $y$. 10 | Note that \texttt{grad\_fn=None} for those tensors that cannot be differentiated. 11 | For each derived tensor, \texttt{pytorch} internally builds a computation graph. 12 | Moreover, gradients can be constructed implicitly with the \texttt{backward()} method. 13 | 14 | For both memory and efficiency reasons, the computation graph would be discarded once it is backwarded. 15 | By default, a computation graph can not be backwarded twice. 16 | Once the parameters are updated, the backwarded scalar must be recomputed before the computation graph can be constructed at a new position. 17 | 18 | In general the \texttt{backward()} method requires an input of the same size as the backwarded tensor. 19 | \begin{minted}{python} 20 | y.backward(h) 21 | # h * J(x) = x.grad, where J(x) is the Jacobian calculated from y 22 | # though pytorch does not calculate and store Jacobian internally 23 | \end{minted} 24 | 25 | By default, explicitly constructed tensor \texttt{x} 26 | has \texttt{x.requires\_grad=False}. 27 | The \texttt{requires\_grad} attribute can be manually turned on or off for leaf nodes on the computation graph (they do not depend on another tensor). 28 | If it is desired to turn off a \texttt{requires\_grad} for an intermediate result, it must be copied and without reference. 29 | The \texttt{.detach()} method is handy for this purpose. 30 | \begin{minted}{python} 31 | n = 3 32 | x = torch.randn(n, requires_grad=True) 33 | w = torch.ones(n, requires_grad=True) 34 | 35 | z = w @ x 36 | 37 | z1 = z.detach() 38 | 39 | z2 = w * z1 @ x; 40 | z2.backward() 41 | print(w.grad, z1.grad, x.grad, sep='\n') 42 | \end{minted} 43 | The output would be: 44 | \begin{minted}{python} 45 | tensor([-0.2010, -0.0613, 0.5340]) 46 | None 47 | tensor([0.5213, 0.5213, 0.5213]) 48 | \end{minted} 49 | 50 | \chapter{Random Projections} 51 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 52 | The dimensionality of data significantly impacts how well a machine learning model can generalize. 53 | Consider unit orthogonal vectors in space. 54 | With increasing dimensions, the number of orthogonal vectors increases exponentially (\href{https://www.cs.princeton.edu/courses/archive/fall15/cos521/lecnotes/lec12.pdf}{source}). 55 | To generalize well, the model needs at least \texttt{exp(d)} points, where \texttt{d} is the data dimension. 56 | This is because, for a new data point, the models should find another point in the dataset, which is reasonably close to it. 57 | 58 | The \href{https://github.com/Atcold/pytorch-Deep-Learning-Minicourse/blob/master/extra/a-projections.ipynb}{Jupyter notebook} illustrates the orthogonality of random projections using a simple example. 59 | It plots the projection \texttt{p} of a random matrix \texttt{A} with one of its randomly chosen rows \texttt{a}. 60 | Since \texttt{A} is a random matrix of unit vectors (normalized with unitary L-2 norm), \texttt{p} will have a value of \texttt{1} for one row. 61 | 62 | Figure 2 shows the matrix plots for \texttt{d=3}. 63 | There are at least three rows in the projections which have a value higher than \texttt{0.800}. 64 | Figure 3 shows the matrix plots for \texttt{d=5}. 65 | There are no rows with a value close to \texttt{1}, except for the one corresponding to \texttt{a}. 66 | 67 | The intuition is that in low dimensions random vectors point roughly in same directions. 68 | However, in high dimensions, they point orthogonal to each other. 69 | As the dimensionality of the data increases, more points become further apart and there are fewer matches. 70 | 71 | \begin{figure} 72 | \centering 73 | \includegraphics[width=0.85\textwidth]{figs/dim3.png} 74 | \caption{Projecting p for A with dimensions (10, 3)} 75 | \label{fig:my_label1} 76 | \vspace{5mm} 77 | \includegraphics[width=0.85\textwidth]{figs/dim5.png} 78 | \caption{Projecting p for A with dimensions (10, 5)} 79 | \label{fig:my_label2} 80 | \end{figure} -------------------------------------------------------------------------------- /prct/YC/01-b/coding.tex: -------------------------------------------------------------------------------- 1 | \chapter{Multimodule Systems} 2 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 3 | % Lecture date: 1.28.19 4 | 5 | \section{Multi-layer Neural Network}\label{sec: Multilayer Neural Network} 6 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 7 | % Lecture date: 1.28.19 8 | 9 | In \cref{sec: Systems with Multiple Modules}, we discussed a forward multimodule system. 10 | The following code includes three different ways of building a two-layer neural network. 11 | 12 | \begin{minted}{python} 13 | import torch 14 | from torch import nn 15 | 16 | image = torch.randn(3, 10, 20) 17 | in_size = image.nelement() 18 | h_size = 60 19 | out_size = 6 20 | 21 | #### Functional paradigm 22 | m1 = nn.Linear(in_size, h_size) 23 | m2 = nn.Linear(h_size, out_size) 24 | # forward prop 25 | hid = torch.relu(m1(image.view(-1))) 26 | out = m2(hid) 27 | 28 | #### Using containers 29 | model = nn.Sequential(m1, nn.relu(), m2) 30 | # forward prop 31 | out = model(image.view(-1)) 32 | 33 | #### Using object oriented programming 34 | class Net(nn.Module): 35 | def __init__(self, in_s, h_s, out_s): 36 | super().__init__() 37 | self.m1 = nn.Linear(in_s, h_s) 38 | self.m2 = nn.Linear(h_s, out_s) 39 | 40 | def forward(self, x): 41 | x = torch.relu(self.m1(x.view(-1))) 42 | x = self.m2(x) 43 | return x 44 | 45 | model = Net(in_size, h_size, out_size) 46 | out = model(image) 47 | \end{minted} 48 | 49 | First, we import torch and the \texttt{nn} module from torch in Python. 50 | The \texttt{nn} module has several predefined modules. 51 | The input is a random matrix of size $3 \times 10 \times 20$. 52 | We can think of this matrix as an image with $3$ RGB components and is $10$ rows by $20$ columns. 53 | The total size is obtained by \texttt{image.nelement()}. 54 | Next, we build a neural network with two linear layers, which are multiplications by matrix. 55 | 56 | Using functional paradigm, we create the first module \texttt{m1} using \texttt{nn.Linear()} and give the sizes of the input \texttt{in\_size} and output \texttt{h\_size}. 57 | Similarly, the second module \texttt{m2}, which is also a linear module, is created with \texttt{nn.Linear()}. 58 | It takes the vector of the same size \texttt{h\_size} and produces the vector of the output size \texttt{out\_size}, which is $6$ (a $6$-way classification). 59 | Then, the forward propagation calls functions \texttt{m1} and \texttt{m2}. 60 | The variable \texttt{hid} first applies the module \texttt{m1} to \texttt{image.view($-1$)}, that is, it takes the \texttt{image} and uses \texttt{view.($-1$)} to turn a $3$-dimensional tensor into a single vector. 61 | Then, \texttt{hid} applies \texttt{ReLU} (Rectifier Linear Unit) to the single vector. 62 | Each component of the vector is passed through a halfway rectifier. 63 | Recall that the output of the \texttt{ReLU} function is the identity function if the argument is positive and $0$ if the argument is negative. 64 | Lastly, we obtain the result \texttt{out} by taking the result \texttt{hid} and applying the second module \texttt{m2} to it. 65 | 66 | Another way of building a neural network is to use containers that define certain predefined structures. 67 | Instead of writing functions individually for each module, variable \texttt{model} uses a container, \texttt{nn.Sequential()}, to build a graph and pass the signal in the order of the input modules \texttt{m1}, \texttt{nn.ReLU()}, and \texttt{m2}. 68 | Note that the list of modules is called a sequence. 69 | Then, the forward propagation obtains the result \texttt{out} by taking \texttt{model} and applying \texttt{image.view($-1$)} to it. 70 | 71 | Lastly, we can define a class using object oriented programming for this particular two-layer neural network. 72 | First, we initiate parameters \texttt{in\_s}, \texttt{h\_s}, and \texttt{out\_s} for input and output sizes. 73 | Then, we create two linear module \texttt{m1} and \texttt{m2} using \texttt{nn.Linear()}. 74 | Next, the forward function first takes the input \texttt{x} and turns it into a single vector. 75 | Then, the function applies \texttt{m1} module to the vector and applies \texttt{ReLU} to the result. 76 | The module \texttt{m2} is then applied to the result \texttt{x}. 77 | The variable \texttt{model} then calls the class \texttt{Net} and the variable \texttt{out} applies the \texttt{image} to the class to obtain the final result. 78 | Note that this method is already being implemented by PyTorch. 79 | 80 | When we run a feedforward neural network, PyTorch automatically calculates the gradient of the weight using backpropagation. -------------------------------------------------------------------------------- /arch/EN/nonlinear_dim_expansion.tex: -------------------------------------------------------------------------------- 1 | \chapter{Nonlinear Dimensionality Expansion} 2 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 3 | % Lecture date: 1.28.19 4 | 5 | \section{Motivation} 6 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 7 | % Lecture date: 1.28.19 8 | 9 | A network is ``deep'' if it has more than one stage of non-linear feature transformation. A natural question arises: why are deep networks necessary? 10 | Theoretically, kernel machines can approximate any function with as much precision as desired. 11 | However, that might be computationally expensive---too expensive to achieve anything in practice. 12 | 13 | \subsection{Cover's Theorem} 14 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 15 | % Lecture date: 1.28.19 16 | 17 | One way to rectify this problem is to bring the sample data into a higher dimension. 18 | Thomas Cover's theorem (1965) formalized this argument. 19 | 20 | \textbf{Cover's Theorem:} say you have a linear classifier in $N$ dimensional space with $P$ sample points, each randomly labeled with one of two class labels. 21 | Then \cref{fig:covers_theorem} roughly illustrates the probability that these points are linearly separable. 22 | 23 | \begin{figure}[ht] 24 | \centering 25 | \includegraphics[width=40mm]{figs/covers_theorem.png} 26 | \caption{Cover's Theorem: probability of being separable vs. dimension} 27 | \label{fig:covers_theorem} 28 | \end{figure} 29 | 30 | Ergo it makes sense to expand dimensionality of a representation because the data is more likely to be separable in a higher dimensional space. 31 | One caveat: the expansion must be \emph{nonlinear}. 32 | That brings us to deep networks, which by definition are networks with more than one nonlinear stage. 33 | 34 | \subsection{The Manifold Hypothesis} 35 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 36 | % Lecture date: 1.28.19 37 | 38 | Before we discuss how to expand dimensionality in a nonlinear way, we should address one concern: will working in a higher introduce an intractability problem? 39 | The manifold hypothesis suggests that it will not pose a problem. 40 | The manifold hypothesis postulates that natural data in high-dimensional space generally has a low-dimensional structure (see \cref{chp: manifold_hypothesis} for further discussion). 41 | The shape of that low-dimensional space is dictated by the intrinsic factors of variation in the data, and our ideal feature extractor would extract those factors of variation. 42 | 43 | \section{How to Expand Dimensionality Nonlinearly}\label{sec: expand_dim} 44 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 45 | % Lecture date: 1.28.19 46 | 47 | As described in the previous section, Cover's theorem and the manifold hypothesis urge us to expand the dimension of the representation (nonlinearly) and explore it in higher dimensional space. 48 | This is the pipeline for nonlinear dimensionality expansion: 49 | \begin{enumerate} 50 | \item Linearly expand the dimension (this can be done by multiplying by matrix with more rows than columns) 51 | \item Apply a nonlinear transformation to each component of the vector 52 | \item Compress the data back into a smaller dimension (linearly or with pooling) 53 | \end{enumerate} 54 | 55 | \Cref{fig:nonlinear_expansion} illustrates the process of nonlinear dimensionality expansion. 56 | Between the first and second pane, the data is projected into higher-dimensional space linearly (observe the three-dimensional axes) and transformed nonlinearly. 57 | Between the second and third pane, the data is brought back down to a smaller dimension (observe the two-dimensional axes) via pooling/aggregation. 58 | 59 | \begin{figure}[ht] 60 | \centering 61 | \includegraphics[width=90mm]{figs/nonlinear_expansion.png} 62 | \caption{Nonlinear dimensionality expansion} 63 | \label{fig:nonlinear_expansion} 64 | \end{figure} 65 | 66 | \section{In the Context of the Deep Learning System Architecture} 67 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 68 | % Lecture date: 1.28.19 69 | 70 | \Cref{sec: expand_dim} outlined the process of expanding dimensionality in a nonlinear fashion. 71 | Here are those same steps again, this time in the context of the overall architecture for a deep learning system: 72 | \begin{enumerate} 73 | \item Begin with a representation of input data 74 | \item Normalize the input (mean $= 0$, standard deviation $= 1$) 75 | \item Linearly expand the dimension (this can be done by multiplying by matrix with more rows than columns) 76 | \item Apply a nonlinear transformation to each component of the vector 77 | \item Compress the data back into a smaller dimension (linearly or with pooling) 78 | \end{enumerate} 79 | This process can be repeated multiple times. 80 | -------------------------------------------------------------------------------- /code/IC/Multimodule_Systems_coding.tex: -------------------------------------------------------------------------------- 1 | \chapter{Multimodule Systems} 2 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 3 | % Lecture date: 1.28.19 4 | 5 | \section{Multi-layer Neural Network}\label{sec: Multilayer Neural Network} 6 | % Authors: Hongyu (Florence) Lu, Michael Gold, Erica Dominic. 7 | % Lecture date: 1.28.19 8 | 9 | In \cref{sec: Systems with Multiple Modules}, we discussed a forward multimodule system. 10 | The following code includes three different ways of building a two-layer neural network. 11 | 12 | \begin{minted}{python} 13 | import torch 14 | from torch import nn 15 | 16 | image = torch.randn(3, 10, 20) 17 | in_size = image.nelement() 18 | h_size = 60 19 | out_size = 6 20 | 21 | #### Functional paradigm 22 | m1 = nn.Linear(in_size, h_size) 23 | m2 = nn.Linear(h_size, out_size) 24 | # forward prop 25 | hid = torch.relu(m1(image.view(-1))) 26 | out = m2(hid) 27 | 28 | #### Using containers 29 | model = nn.Sequential(m1, nn.relu(), m2) 30 | # forward prop 31 | out = model(image.view(-1)) 32 | 33 | #### Using object oriented programming 34 | class Net(nn.Module): 35 | def __init__(self, in_s, h_s, out_s): 36 | super().__init__() 37 | self.m1 = nn.Linear(in_s, h_s) 38 | self.m2 = nn.Linear(h_s, out_s) 39 | 40 | def forward(self, x): 41 | x = torch.relu(self.m1(x.view(-1))) 42 | x = self.m2(x) 43 | return x 44 | 45 | model = Net(in_size, h_size, out_size) 46 | out = model(image) 47 | \end{minted} 48 | 49 | First, we import torch and the \texttt{nn} module from torch in Python. 50 | The \texttt{nn} module has several predefined modules. 51 | The input is a random matrix of size $3 \times 10 \times 20$. 52 | We can think of this matrix as an image with $3$ RGB components and is $10$ rows by $20$ columns. 53 | The total size is obtained by \texttt{image.nelement()}. 54 | Next, we build a neural network with two linear layers, which are multiplications by matrix. 55 | 56 | Using functional paradigm, we create the first module \texttt{m1} using \texttt{nn.Linear()} and give the sizes of the input \texttt{in\_size} and output \texttt{h\_size}. 57 | Similarly, the second module \texttt{m2}, which is also a linear module, is created with \texttt{nn.Linear()}. 58 | It takes the vector of the same size \texttt{h\_size} and produces the vector of the output size \texttt{out\_size}, which is $6$ (a $6$-way classification). 59 | Then, the forward propagation calls functions \texttt{m1} and \texttt{m2}. 60 | The variable \texttt{hid} first applies the module \texttt{m1} to \texttt{image.view($-1$)}, that is, it takes the \texttt{image} and uses \texttt{view.($-1$)} to turn a $3$-dimensional tensor into a single vector. 61 | Then, \texttt{hid} applies \texttt{ReLU} (Rectifier Linear Unit) to the single vector. 62 | Each component of the vector is passed through a halfway rectifier. 63 | Recall that the output of the \texttt{ReLU} function is the identity function if the argument is positive and $0$ if the argument is negative. 64 | Lastly, we obtain the result \texttt{out} by taking the result \texttt{hid} and applying the second module \texttt{m2} to it. 65 | 66 | Another way of building a neural network is to use containers that define certain predefined structures. 67 | Instead of writing functions individually for each module, variable \texttt{model} uses a container, \texttt{nn.Sequential()}, to build a graph and pass the signal in the order of the input modules \texttt{m1}, \texttt{nn.ReLU()}, and \texttt{m2}. 68 | Note that the list of modules is called a sequence. 69 | Then, the forward propagation obtains the result \texttt{out} by taking \texttt{model} and applying \texttt{image.view($-1$)} to it. 70 | 71 | Lastly, we can define a class using object oriented programming for this particular two-layer neural network. 72 | First, we initiate parameters \texttt{in\_s}, \texttt{h\_s}, and \texttt{out\_s} for input and output sizes. 73 | Then, we create two linear module \texttt{m1} and \texttt{m2} using \texttt{nn.Linear()}. 74 | Next, the forward function first takes the input \texttt{x} and turns it into a single vector. 75 | Then, the function applies \texttt{m1} module to the vector and applies \texttt{ReLU} to the result. 76 | The module \texttt{m2} is then applied to the result \texttt{x}. 77 | The variable \texttt{model} then calls the class \texttt{Net} and the variable \texttt{out} applies the \texttt{image} to the class to obtain the final result. 78 | Note that this method is already being implemented by PyTorch. 79 | 80 | When we run a feedforward neural network, PyTorch automatically calculates the gradient of the weight using backpropagation. -------------------------------------------------------------------------------- /arch/GS/convolutions.tex: -------------------------------------------------------------------------------- 1 | \chapter{Convolutions Theory} 2 | % Authors: Mimee Xu , Sai Anirudh Kondaveeti, Rui Jiang(rj1407), 2/20/18. 3 | The following chapter clarifies working with convolutions. 4 | Neural networks can be used to model audio, image, text, or other signals. The signals are represented as sequences of scalars. Audio is often represented as waveform heights. 5 | \section{Matrix Multiplication Review} 6 | Let's review multiplication between a matrix and a vector. 7 | \[ 8 | \vect{z} = {\matr{A}}\vect{x} = 9 | \begin{bmatrix} 10 | \rule[0.5mm]{0.8cm}{0.1mm} \; {\vect{a}}^{(1)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 11 | \rule[0.5mm]{0.8cm}{0.1mm} \; {\vect{a}}^{(2)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 12 | \vdots \\ 13 | \rule[0.5mm]{0.8cm}{0.1mm} \; {\vect{a}}^{(m)} \; \rule[0.5mm]{0.8cm}{0.1mm} \\ 14 | \end{bmatrix} 15 | \begin{pmatrix} 16 | \vrule height 0.6cm \\ \vect{x} \\ \vrule height 0.6cm 17 | \end{pmatrix} = 18 | \begin{pmatrix} 19 | {\vect{a}}^{(1)} \vect{x} \\ {\vect{a}}^{(2)} \vect{x} \\ \vdots \\ {\vect{a}}^{(m)} \vect{x} 20 | \end{pmatrix}_{m \times 1} 21 | \] 22 | Alternatively, we could write $\vect{z}$ as 23 | \[z = \begin{bmatrix} z_1 \\ z_2 \\ \cdots \\z_m\end{bmatrix} = \begin{bmatrix} \matr{a}^{(1)}\vect{x} \\ \matr{a}^{(2)}\vect{x} \\\cdots \\\matr{a}^{(m)}\vect{x}\end{bmatrix} 24 | = \begin{pmatrix} 25 | \vrule height 0.6cm \\ \vect{a}^{(1)} \\ \vrule height 0.6cm 26 | \end{pmatrix} \vect{x_1} + \begin{pmatrix} 27 | \vrule height 0.6cm \\ \vect{a}^{(2)} \\ \vrule height 0.6cm 28 | \end{pmatrix} \vect{x_2}+\cdots+\begin{pmatrix} 29 | \vrule height 0.6cm \\ \vect{a}^{(n)} \\ \vrule height 0.6cm 30 | \end{pmatrix} \vect{x_n}\] 31 | 32 | What does this look like in practice? 33 | Suppose $n=2$, we have the $i$-th component of $\vect{z}$ 34 | \begin{align} 35 | z_i & = \vect{a}^T\vect{x} \\ 36 | & = a_1x_1 + a_2x_2 \\ 37 | &= \Vert\vect{a}\Vert \cos \alpha \Vert\vect{x}\Vert \cos\xi + \Vert\vect{a}\Vert \sin \alpha \Vert\vect{x}\Vert \sin\xi \\ 38 | &= \Vert\vect{a}\Vert\Vert\vect{x}\Vert (\cos\alpha \cos\xi + sin\alpha \sin\xi) \\ 39 | &=\Vert\vect{a}\Vert \Vert\vect{x}\Vert\cos(\alpha-\xi) 40 | \end{align} 41 | 42 | 43 | This means $\vect{z}$ represents the \emph{alignment} between each row of $A$ and $\vect{x}$. 44 | If we $\vect{x}$ unitary, we have the norms as 1, then $\vect{z}$ has just cosine values as entries. 45 | Here, aligning perfectly gives us the maximum value of positive $1$, while going the other direction achieves the most negative value $-1$. 46 | 47 | The next section talks about how this relates to convolutions. 48 | \section{Convolutions} 49 | Let $\matr{A}$ of dimension $3\times 4$ be our kernel. Consider the mapping that transforms $x_{11}, x_{12}, x_{13}:\, \to \vect{a}^{(1)} x[1:3]$, and filling in the values as the top left $1\times 3$ block of a new matrix. Completing this mapping, we construct the following transformation $\matr{T}$: every component $\matr{T^{(i)}}$ is multiplied by $\vect{x}$ to obtain a convolution. More generally for width $k$, we have 50 | \[ 51 | \matr{T}^{(1)}\vect{x} = 52 | \begin{bmatrix} 53 | a_{1,1} & a_{1,2} & \dotsc & a_{1,k} & 0 & 0 & \dotsc & 0 \\ 54 | 0 & a_{1,1} & a_{1,2} & \dotsc & a_{1,k} & 0 & \dotsc & 0 \\ 55 | 0 & 0 & a_{1,1} & a_{1,2} & \dotsc & a_{1,k} & \dotsc & 0 \\ 56 | \vdots & \vdots & \vdots & \ddots & \ddots & \ddots & \ddots & \vdots \\ 57 | 0 & \dotsc & 0 & 0 & a_{1,1} & a_{1,2} & \dotsc & a_{1,k} 58 | \end{bmatrix}_{(n-k+1) \times n} \vect{x}= 59 | \begin{pmatrix} 60 | \vect{a}^{(1)} \vect{x}_{1:1+k-1} \\ \vect{a}^{(1)} \vect{x}_{2:2+k-1} \\ \vdots \\ \vect{a}^{(1)} \vect{x}_{n-k+1:n} 61 | \end{pmatrix}_{(n-k+1) \times 1} 62 | \] 63 | $\matr{T^{(i)}}$ here is a \textit{Toeplitz matrix}. What is the reason that $\matr{T^{(i)}}$ has $0$s padded in on the top right and lower left corners? 64 | 65 | Natural signals (not artificial or synthetic) tend to exhibit two important patterns, which are both crucial for convolutional neural networks: 66 | 67 | \subsubsection{Stationarity} 68 | 1. Stationarity - Patterns within the signal repeat themselves in multiple places throughout the signal. 69 | The type of features encountered in the signal don’t depend on the location within the signal. The statistics of one part of the signal are the same as any other part. Some sources also note that stationarity implies that “features that are useful in one region are also likely to be useful for other regions. We keep the input stationary by utilizing parameter-sharing, so $\vect{a}^{(1)}$ is used through out all rows of $\matr{T^{(i)}}$. 70 | 71 | \subsubsection{Locality} 72 | 2. Locality - The correlation is high between nearby datapoints, but lower and lower between data points that are farther and farther away. 73 | So we don't care about the points that are far away, since they tend to be less related in natural signals. 74 | 75 | 76 | -------------------------------------------------------------------------------- /prdg/QC/13/theory.tex: -------------------------------------------------------------------------------- 1 | \chapter{The Truck Backer-Upper} 2 | % Authors: Xiao Jing, Arnav Kansal, Changgeng Zhao, 4/30/19. 3 | 4 | \section{Set up} 5 | We try to design a self-learning nonlinear controller to control the steering of a trailer truck while backing up a loading dock from an arbitrary initial position. Only backing up is allowed to park the truck parallel to the dock and trying to match ($x_{trailer}$, $y_{trailer}$) with ($x_{dock}$, $y_{dock}$) as closely as possible. In the lecture, some students tried to play this game and we found it's very hard even for humans to accomplish the task. 6 | \\ 7 | \begin{figure}[H] 8 | \centering 9 | \includegraphics[width=0.7\textwidth]{figs/truck-trailer-and-loading-dock.png} 10 | \caption{The Truck, trailer and loading dock} 11 | \label{fig:general} 12 | \end{figure} 13 | State variables representing the position of the truck: $\theta_{cab}$, the angle of the truck, $x_{cab}$ and $y_{cab}$, the cartesian position of the yoke, $x_{trailer}$ and $y_{trailer}$, the cartesian position of the rear of the center of the trailer, $\theta_{trailer}$, the angle of the trailer. 14 | 15 | \section{Model and two stage learning} 16 | \subsection{Model} 17 | \begin{figure}[H] 18 | \centering 19 | \includegraphics[width=0.6\textwidth]{figs/the-two-part-model.png} 20 | \caption{The two part model} 21 | \label{fig:learn} 22 | \end{figure} 23 | The proposed model has two elements: the \textbf{Kinematics Component} (emulator) and the \textbf{Neural Net Controller}. The trailer truck Kinematics Component of the model tries to learn the next state of the truck given the current state and the steering action. The Neural Net Controller learns the steering action given the current state of the truck. We train the two models separately: first train the Kinematics Component and then based on this model we train the Controller by back-prop through Kinematics model. 24 | 25 | 26 | 27 | \subsection{Variable Input} 28 | In the paper, there are 6 parameters. However, we just need 4 variables in state $k$, which are shown in the next slide, since cab coordinate can be calculated according to the four values we have. 29 | \begin{figure}[H] 30 | \centering 31 | \includegraphics[width=0.6\textwidth]{figs/variable.png} 32 | \caption{Variable} 33 | \label{fig:variable} 34 | \end{figure} 35 | 36 | \subsection{Flow chart} 37 | The block labeled $C$ represent the controller and $T$ represent the emulator. The initial position of truck is chosen at random. The final error is used by back-propagation to adapt to each state Controller/Trailer. 38 | \begin{figure}[H] 39 | \centering 40 | \includegraphics[width=0.8\textwidth]{figs/training-the-controller.png} 41 | \label{fig:loss} 42 | \end{figure} 43 | Each time, we have 6 inputs for controller at state $k$, and we add bias and forward the inputs to 25 hidden units, and give 1 output unit. So there are 3 layers in the controller. 44 | \\ 45 | Then we put the controller's output to the Emulator, which has 45 hidden units and 6 output for the next state. It is analogous to a neural network having a number of layers equal to four times the number of backing up steps. 46 | \begin{figure}[H] 47 | \centering 48 | \includegraphics[width=0.8\textwidth]{figs/details-of-emulator.png} 49 | \label{fig:nn} 50 | \end{figure} 51 | 52 | \subsection{Loss Function} 53 | Here the $\theta_c$ is $\theta_0$, the $\theta_t$ is $\theta_1$ 54 | 55 | \begin{equation*} 56 | ||\theta_{t}||^2 + ||(x_{\text{trailer}}, y_{\text{trailer}}) - (x_{\text{dock}}, y_{\text{dock}})||^2 57 | \end{equation*} 58 | 59 | % \begin{figure}[H] 60 | % \centering 61 | % \includegraphics[width=0.7\textwidth]{labs/13/images/loss.jpeg} 62 | % \caption{loss} 63 | % \label{fig:loss} 64 | % \end{figure} 65 | 66 | 67 | 68 | \subsection{Two stage Learning}\label{sec:truck-model} 69 | In the first stage of learning, the emulator is fed as input : states of the truck along with random steering actions and the actual state of the truck (according to the physics of the system) as output to learn. The emulator can then predict the next state of the truck given an initial state and the steering action. 70 | 71 | The second stage of learning involves unrolling the controller for the length of the actions of the episode and recursively applying the controller to the states produced by the action the controller takes. This produces a final state of the truck which is used to calculate the loss and used to update the parameters of the controller via back propagation through time. 72 | 73 | \begin{itemize} 74 | \item Q: Why don't we use the kinematics equations directly as we already know how a track move? 75 | -- A: Because we want to show that the model can learn the kinematics facts automatically, which is useful in other complicated scenarios where we can not know how it works. 76 | \end{itemize} 77 | 78 | \subsection{Initial to final state} 79 | Traces of trajectory guided by the learned system in action. 80 | \begin{figure}[H] 81 | \centering 82 | \includegraphics[width=0.45\textwidth]{figs/state.png} 83 | \label{fig:state} 84 | \includegraphics[width=0.45\textwidth]{figs/state2.png} 85 | \label{fig:state2} 86 | \end{figure} -------------------------------------------------------------------------------- /labs/13/theory.tex: -------------------------------------------------------------------------------- 1 | \chapter{The Truck Backer-Upper} 2 | % Authors: Xiao Jing, Arnav Kansal, Changgeng Zhao, 4/30/19. 3 | 4 | \section{Set up} 5 | We try to design a self-learning nonlinear controller to control the steering of a trailer truck while backing up a loading dock from an arbitrary initial position. Only backing up is allowed to park the truck parallel to the dock and trying to match ($x_{trailer}$, $y_{trailer}$) with ($x_{dock}$, $y_{dock}$) as closely as possible. In the lecture, some students tried to play this game and we found it's very hard even for humans to accomplish the task. 6 | \\ 7 | \begin{figure}[H] 8 | \centering 9 | \includegraphics[width=0.7\textwidth]{labs/13/images/figure1.png} 10 | \caption{The Truck, trailer and loading dock} 11 | \label{fig:general} 12 | \end{figure} 13 | State variables representing the position of the truck: $\theta_{cab}$, the angle of the truck, $x_{cab}$ and $y_{cab}$, the cartesian position of the yoke, $x_{trailer}$ and $y_{trailer}$, the cartesian position of the rear of the center of the trailer, $\theta_{trailer}$, the angle of the trailer. 14 | 15 | \section{Model and two stage learning} 16 | \subsection{Model} 17 | \begin{figure}[H] 18 | \centering 19 | \includegraphics[width=0.6\textwidth]{labs/13/images/figure2.png} 20 | \caption{The two part model} 21 | \label{fig:learn} 22 | \end{figure} 23 | The proposed model has two elements: the \textbf{Kinematics Component} (emulator) and the \textbf{Neural Net Controller}. The trailer truck Kinematics Component of the model tries to learn the next state of the truck given the current state and the steering action. The Neural Net Controller learns the steering action given the current state of the truck. We train the two models separately: first train the Kinematics Component and then based on this model we train the Controller by back-prop through Kinematics model. 24 | 25 | 26 | 27 | \subsection{Variable Input} 28 | In the paper, there are 6 parameters. However, we just need 4 variables in state $k$, which are shown in the next slide, since cab coordinate can be calculated according to the four values we have. 29 | \begin{figure}[H] 30 | \centering 31 | \includegraphics[width=0.6\textwidth]{labs/13/images/variable.png} 32 | \caption{Variable} 33 | \label{fig:variable} 34 | \end{figure} 35 | 36 | \subsection{Flow chart} 37 | The block labeled $C$ represent the controller and $T$ represent the emulator. The initial position of truck is chosen at random. The final error is used by back-propagation to adapt to each state Controller/Trailer. 38 | \begin{figure}[H] 39 | \centering 40 | \includegraphics[width=0.8\textwidth]{labs/13/images/figure5.png} 41 | \label{fig:loss} 42 | \end{figure} 43 | Each time, we have 6 inputs for controller at state $k$, and we add bias and forward the inputs to 25 hidden units, and give 1 output unit. So there are 3 layers in the controller. 44 | \\ 45 | Then we put the controller's output to the Emulator, which has 45 hidden units and 6 output for the next state. It is analogous to a neural network having a number of layers equal to four times the number of backing up steps. 46 | \begin{figure}[H] 47 | \centering 48 | \includegraphics[width=0.8\textwidth]{labs/13/images/figure6.png} 49 | \label{fig:nn} 50 | \end{figure} 51 | 52 | \subsection{Loss Function} 53 | Here the $\theta_c$ is $\theta_0$, the $\theta_t$ is $\theta_1$ 54 | 55 | \begin{equation*} 56 | ||\theta_{t}||^2 + ||(x_{\text{trailer}}, y_{\text{trailer}}) - (x_{\text{dock}}, y_{\text{dock}})||^2 57 | \end{equation*} 58 | 59 | % \begin{figure}[H] 60 | % \centering 61 | % \includegraphics[width=0.7\textwidth]{labs/13/images/loss.jpeg} 62 | % \caption{loss} 63 | % \label{fig:loss} 64 | % \end{figure} 65 | 66 | 67 | 68 | \subsection{Two stage Learning}\label{sec:truck-model} 69 | In the first stage of learning, the emulator is fed as input : states of the truck along with random steering actions and the actual state of the truck (according to the physics of the system) as output to learn. The emulator can then predict the next state of the truck given an initial state and the steering action. 70 | 71 | The second stage of learning involves unrolling the controller for the length of the actions of the episode and recursively applying the controller to the states produced by the action the controller takes. This produces a final state of the truck which is used to calculate the loss and used to update the parameters of the controller via back propagation through time. 72 | 73 | \begin{itemize} 74 | \item Q: Why don't we use the kinematics equations directly as we already know how a track move? 75 | -- A: Because we want to show that the model can learn the kinematics facts automatically, which is useful in other complicated scenarios where we can not know how it works. 76 | \end{itemize} 77 | 78 | \subsection{Initial to final state} 79 | Traces of trajectory guided by the learned system in action. 80 | \begin{figure}[H] 81 | \centering 82 | \includegraphics[width=0.45\textwidth]{labs/13/images/state.png} 83 | \label{fig:state} 84 | \includegraphics[width=0.45\textwidth]{labs/13/images/state2.png} 85 | \label{fig:state2} 86 | \end{figure} -------------------------------------------------------------------------------- /code/KF/bayesian-nn-coding.tex: -------------------------------------------------------------------------------- 1 | \chapter{Bayesian Neural Networks} 2 | % Authors: Jong Yeob Kim, Zilin Bian, Di Sha, 4/23/19. 3 | 4 | In this section, we perform experiments using Bayesian neural networks. We show that Bayesian neural networks capture model uncertainty, which cannot be captured by standard neural networks or deep learning tools. 5 | 6 | \section{Libraries} 7 | 8 | \begin{minted}{python} 9 | import torch 10 | from torch import nn, optim 11 | from matplotlib import pyplot as plt 12 | from plot_lib import set_default 13 | \end{minted} 14 | 15 | \section{Creating the data} 16 | \begin{minted}{python} 17 | # Set style (needs to be in a new cell) 18 | set_default(figsize=(16, 8)) 19 | 20 | # Training set 21 | m = 20 # nb of training pairs 22 | x = (torch.rand(m) - 0.5) * 10 # inputs, sampled from -5 to +5 23 | y = x * torch.sin(x) # targets 24 | 25 | # View training points 26 | plt.plot(x.numpy(), y.numpy(), 'o') 27 | plt.axis('equal') 28 | plt.ylim([-10, 5]) 29 | \end{minted} 30 | 31 | \begin{figure}[H] 32 | \centering 33 | \includegraphics[width=\textwidth]{figs/exp_1.png} 34 | \caption{View training data} 35 | \label{fig:training_data} 36 | \end{figure} 37 | 38 | We have defined 20 training data points from -5 to +5. Now, we are going to fit a network over the data points. 39 | 40 | \section{Experiments using Bayesian neural networks} 41 | \begin{minted}{python} 42 | # Define network architecture (try different non-linearities) 43 | 44 | non_linear = nn.Tanh 45 | # non_linear = nn.ReLU 46 | 47 | net = nn.Sequential( 48 | nn.Dropout(p=0.05), 49 | nn.Linear(1, 20), 50 | non_linear(), 51 | nn.Dropout(p=0.05), 52 | nn.Linear(20, 20), 53 | non_linear(), 54 | nn.Linear(20, 1) 55 | ) 56 | \end{minted} 57 | In this neural network, the dropout acts directly on our inputs. We use tanh function for non-linearity in our first experiment. 58 | 59 | \begin{minted}{python} 60 | # Training objective and optimiser 61 | criterion = nn.MSELoss() 62 | optimiser = optim.SGD(net.parameters(), lr=0.01, weight_decay=0.00001) 63 | 64 | # Training loop 65 | for epoch in range(1000): 66 | y_hat = net(x.view(-1, 1)) 67 | loss = criterion(y_hat, y.view(-1, 1)) 68 | optimiser.zero_grad() 69 | loss.backward() 70 | optimiser.step() 71 | # print(loss.item()) 72 | 73 | # Define a denser input range 74 | xx = torch.linspace(-15, 15, 1000) 75 | 76 | # Evaluate net over denser input (try both eval() and train() modes) 77 | 78 | net.eval() 79 | # net.train() 80 | 81 | with torch.no_grad(): 82 | plt.plot(xx.numpy(), net(xx.view(-1, 1)).squeeze().numpy(), 'C1') 83 | plt.plot(x.numpy(), y.numpy(), 'oC0') 84 | plt.axis('equal') 85 | plt.ylim([-10, 5]) 86 | \end{minted} 87 | 88 | \begin{figure}[H] 89 | \centering 90 | \includegraphics[width=\textwidth]{figs/exp_2.png} 91 | \caption{Fit standard neural network (with Tanh non-linearity)} 92 | \label{fig:fit_standardNN} 93 | \end{figure} 94 | 95 | We set the model to evaluation mode to turn-off the dropout. As we can see in Fig. \ref{fig:fit_standardNN}, it gives us a fine approximation. However, this standard neural network does not show any uncertainty. Standard neural networks only provide point estimates of $y$ values in different locations corresponding to $x$ values. Then, how do we extract uncertainty from the network? First, we start with setting the network to train mode. 96 | 97 | \begin{minted}{python} 98 | # Multiple (100) runs for denser input 99 | net.train() 100 | y_hat = list() 101 | with torch.no_grad(): 102 | for t in range(100): 103 | y_hat.append(net(xx.view(-1, 1)).squeeze()) 104 | 105 | # Evaluate mean and std over denser input 106 | y_hat = torch.stack(y_hat) 107 | mean = y_hat.mean(0) 108 | std = y_hat.std(0) 109 | 110 | # Visualise mean and mean ± std -> confidence range 111 | plt.plot(xx.numpy(), mean.numpy(), 'C1') 112 | plt.fill_between(xx.numpy(), (mean + std).numpy(), (mean - std).numpy(), color='C2') 113 | plt.plot(x.numpy(), y.numpy(), 'oC0') 114 | plt.axis('equal') 115 | plt.ylim([-10, 5]) 116 | \end{minted} 117 | 118 | \begin{figure}[H] 119 | \centering 120 | \includegraphics[width=\textwidth]{figs/exp_3.png} 121 | \caption{Fit Bayesian neural network (with Tanh non-linearity)} 122 | \label{fig:fit_BayesianNN} 123 | \end{figure} 124 | 125 | Fig. \ref{fig:fit_BayesianNN} provides so much more information compared to the results in Fig. \ref{fig:fit_standardNN}. The red curve shows an average of 100 evaluations, and the purple curve represents the mean $\pm$ one standard deviation of the predictions. Thus, this plot not only presents mean estimates but also shows variances that provide uncertainty of the results. 126 | 127 | \begin{figure}[H] 128 | \centering 129 | \includegraphics[width=\textwidth]{figs/exp_4.png} 130 | \caption{Fit Bayesian neural network (with ReLU non-linearity)} 131 | \label{fig:fit_BayesianNN_ReLU} 132 | \end{figure} 133 | 134 | Fig. \ref{fig:fit_BayesianNN_ReLU} presents estimate results using ReLU function. We note that if we use different non-linearities in our network, it provides different uncertainty estimates. It is similar to changing kernels in Gaussian process. Furthermore, the uncertainty (or variance) increases as we go farther from the training points. 135 | -------------------------------------------------------------------------------- /prct/MG/truck-backer-upper.tex: -------------------------------------------------------------------------------- 1 | \chapter{The Truck Backer-Upper} 2 | % Authors: Xiao Jing, Changgeng Zhao, 4/30/2019 3 | 4 | In this chapter, we present the process of collecting the data, training an emulator, training the control neural net and setting the loss function. 5 | \section{Setup} 6 | 7 | First we build a model of a truck basing on those kinematics: 8 | \begin{align} 9 | \dot x &= s \cos \theta_0 \\ 10 | \dot y &= s \sin \theta_0 \\ 11 | \dot \theta_0 &= \frac{s}{L} \tan \phi \\ 12 | \dot \theta_1 &= \frac{s}{d_1} \sin(\theta_1 - \theta_0) 13 | \end{align} 14 | 15 | where $s$: signed speed, $\phi$: negative steering angle, $\theta_0$: cab angle, $\theta_1$: trailer angle, $(x, y)$: position of cab. Dotted characters are the next state results of the current steering angle. 16 | 17 | Please refer to the \href{https://github.com/Atcold/pytorch-Deep-Learning-Minicourse/blob/master/14-truck_backer-upper.ipynb}{notebook} for code details for implementing the Truck class. The basic functions of the class are: 18 | \begin{itemize} 19 | \item reset: Randomly reset the initial position of the truck. 20 | \item step: Check whether the truck is in a valid condition(not jackknifed nor off screen), then move the track backwards based on the current position and a given steering angle, according to the above equations. 21 | \item draw: draw the parking lot and the ``cab-trailer'' to give an interactive interface of the game. The interface is shown in Figure \ref{fig:truck-interface}. 22 | \end{itemize} 23 | 24 | \begin{figure}[H] 25 | \centering 26 | \includegraphics[width=0.7\textwidth]{labs/13/images/Screen Shot 2019-05-03 at 2.03.51 PM.png} 27 | \caption{The interactive interface of the truck game. A player can set the angle of the front wheels 28 | \label{fig:truck-interface} 29 | of the truck at each step.} 30 | \end{figure} 31 | 32 | \section{Collect Data} 33 | In this section of the code, we generate training data for \textbf{Emulator} by randomly initializing the game and randomly steering the truck until it reaches an invalid state(jackknifes or hits the wall). We hope that this random dataset can help the \textbf{Emulator} learn the kinematics of the car. 34 | 35 | \begin{minted}{python} 36 | episodes = 10 37 | data_set = list() 38 | truck = Truck(); 39 | 40 | for episode in tqdm(range(episodes)): 41 | 42 | truck.reset() 43 | states = list() 44 | 45 | while truck.valid(): 46 | ϕ = (random() - 0.5) * π / 2 47 | states.append((ϕ, *truck.step(ϕ))) 48 | truck.draw() 49 | 50 | data_set.append(states) 51 | \end{minted} 52 | 53 | \section{Emulator Network} 54 | The \textbf{Emulator} is simply a 2-layer fully connected network. The input is the 6-tuple state position as well as the steering angle, and the output is the 6-tuple position of the next state. We use 45 hidden units in this particular case. 55 | 56 | \begin{minted}{python} 57 | state_size = 6 58 | steering_size = 1 59 | hidden_units_e = 45 60 | 61 | emulator = nn.Sequential( 62 | nn.Linear(steering_size + state_size, hidden_units_e), 63 | nn.ReLU(), 64 | nn.Linear(hidden_units_e, state_size) 65 | ) 66 | \end{minted} 67 | 68 | \section{Train the emulator} 69 | 70 | We use MSE loss and train the model state by state. Each sample in the training set is composed of the current state(input) and the next state(output). 71 | 72 | \begin{minted}{python} 73 | i = 0 74 | for episode in range(len(train_set)): 75 | episode_loss = 0 76 | for _ in range(len(train_set[episode]) - 1): 77 | ϕ_state = train_data[i] 78 | next_state_prediction = emulator(ϕ_state) 79 | 80 | next_state = train_data[i + 1, 1:] 81 | loss = criterion(next_state_prediction, next_state) 82 | episode_loss += loss.item() 83 | 84 | optimiser_e.zero_grad() 85 | loss.backward() 86 | optimiser_e.step() 87 | i += 1 88 | 89 | # Skip end, because there is no next_frame 90 | i += 1 91 | 92 | if (episode + 1) % 1000 == 0 or episode == 0: 93 | print(f'{episode + 1:4d} / {len(train_set)}, {episode_loss:.10f}') 94 | 95 | 96 | \end{minted} 97 | 98 | \section{Test the emulator} 99 | After training the emulator, we can do evaluation on the test set, which is generated by the same method as training set. 100 | 101 | \begin{minted}{python} 102 | i = 0 103 | total_loss = 0 104 | with torch.no_grad(): 105 | for episode in range(len(test_set)): 106 | for _ in range(len(test_set[episode]) - 1): 107 | ϕ_state = test_data[i] 108 | next_state_prediction = emulator(ϕ_state) 109 | 110 | next_state = train_data[i + 1, 1:] 111 | total_loss += criterion(next_state_prediction, next_state).item() 112 | 113 | i += 1 114 | 115 | # Skip end, because there is no next_frame 116 | i += 1 117 | 118 | print(f'Test loss: {loss.item():.10f}') 119 | \end{minted} 120 | 121 | \section{Controller Net} 122 | The Controller Net is defined in Section~\ref{sec:truck-model} of this book and you can find it in the original paper ~\cite{nguyen1990truck}. If you can finish the remaining part of the controller network and make it work, you can \href{https://github.com/Atcold/pytorch-Deep-Learning-Minicourse/blob/master/14-truck_backer-upper.ipynb}{submit a pull request}! -------------------------------------------------------------------------------- /main.tex: -------------------------------------------------------------------------------- 1 | \documentclass[oneside]{book} 2 | \usepackage{float} 3 | \input{preamble} 4 | \input{jupyter} 5 | 6 | \title{SP19 DL collaborative notes} 7 | \author{ 8 | The students of SP19 Deep Learning\\ 9 | Editor: Alfredo Canziani\\ 10 | NYU 11 | } 12 | \date{\today} 13 | 14 | \begin{document} 15 | 16 | \maketitle 17 | 18 | \input{preface} 19 | \input{instructions} 20 | 21 | \tableofcontents 22 | 23 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 24 | % NEW PARTS 25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 26 | 27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 28 | \part{Paradigms}\label{prt:prdg} 29 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 30 | \input{prdg/QC/01-a/theory} 31 | \input{prdg/YC/02-a/theory} %backprogation ch 5 32 | \input{prdg/RM/10-a_unsup_learning/unsupervised-learning.tex} 33 | \input{prdg/RM/10-b_unsup_energy/Energy_based_unsupervised_learning.tex} 34 | \input{prdg/QC/13/theory} 35 | \input{prdg/BW/07-b/energy_based_models} % Energy based models ch 16 36 | %\input{prdg/BW/08-a/theory} % Energy based models ch 17 37 | \input{prdg/BA/self-supervised-learning} 38 | %\input{labs/12/theory} 39 | \input{prdg/BA/self_supervised_intro_theory} 40 | \input{prdg/BA/self_supervised_theory} 41 | \input{arch/BA/gnn_theory} 42 | \input{prdg/BA/vae_gan} 43 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 44 | \part{Architectures}\label{prt:arch} 45 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 46 | \input{arch/KF/convolutional-nets} % ch 7 47 | \input{arch/YC/05-lab/theory} %components of cnn 48 | \input{arch/MG/rnn.tex} 49 | 50 | \input{arch/BW/08-b/Latent_Variable_Models} %Latent variable models ch 18 51 | \input{arch/IC/graph-cnn.tex} 52 | \input{arch/GS/convolutions.tex} 53 | \input{arch/GS/convolutions_practice.tex} 54 | \input{arch/GS/striding.tex} 55 | \input{arch/IC/Bayesian_Neural_Networks.tex} 56 | \input{arch/IC//optimization.tex} 57 | 58 | 59 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 60 | \part{Practical \& applications}\label{prt:prct} 61 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 62 | \input{prct/YC/02-b/theory} %backprogation in practice ch 6 63 | \input{prct/YC/01-b/practice} %manifold hypo ch 31 64 | \input{prct/YC/01-lab/practice} % tensor transform ch 32 65 | \input{prct/KF/supervised_classification} % ch 33 66 | \input{prct/KF/loss_functions_non-convex} % ch 34 67 | \input{prct/KF/visualizing_2D_interpolations} % ch 36 68 | \input{prct/KF/convolution_demonstration} % ch 37 69 | \input{prct/KF/automatic_differentiation} % ch 38, 39 + random projections 70 | \input{prct/KF/img_classification_comparison} 71 | \input{prct/BW/09-lab/theory} % Regularisation ch 19 72 | \input{prct/BW/09-lab/practice} % Regularisation ch 42 73 | \input{prct/RM/11-a_repr_learning/representation_learning.tex} 74 | \input{prct/RM/11-b_repr_poincarre/Poincarre_embeddings.tex} 75 | \input{prct/RM/12-a_sparse-coding/sparse_coding.tex} 76 | \input{prct/MG/truck-backer-upper.tex} % lab 13 77 | \input{prct/GS/CNN_applications.tex} 78 | \input{prct/GS/CNN_applications_2.tex} 79 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 80 | \part{Coding}\label{prt:code} 81 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 82 | \input{code/MG/pytorch-and-tensors.tex} % lab 01 83 | \input{code/BW/03-lab/coding} % regression coding ch 48 84 | \input{code/IC/Multimodule_Systems_coding.tex} 85 | \input{code/IC/Sequence_Modeling.tex} 86 | \input{code/BA/ae_coding} 87 | \input{code/BA/vae_coding} 88 | \input{code/BA/gnn_coding} 89 | \input{code/KF/bayesian-nn-coding} 90 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 91 | % OLD PARTS 92 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 93 | 94 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 95 | \part{Theory}\label{prt:theory} 96 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 97 | % \input{lectures/01-a/theory} 98 | % \input{lectures/01-b/theory} 99 | \input{arch/EN/hierarchical_representation.tex} 100 | \input{arch/EN/nonlinear_dim_expansion.tex} 101 | \input{arch/EN/modular_approach.tex} 102 | %\input{lectures/02-a/theory} 103 | %\input{lectures/02-b/theory} 104 | \input{arch/KF/convolutional-nets} 105 | \input{arch/PK/convolutional_neural_nets.tex} 106 | %\input{lectures/04-a/theory} 107 | %\input{labs/04/theory} 108 | %\input{labs/05/theory} 109 | \input{arch/PK/digression_fourier_transform.tex} 110 | % \input{lectures/06-b/graph-cnn} 111 | %\input{lectures/06-b/theory-rnn} 112 | %\input{labs/07/theory} 113 | 114 | %\input{lectures/07-a/optimization} 115 | %\input{lectures/07-b/energy_based_models} 116 | %\input{lectures/08-a/theory} 117 | %\input{lectures/08-b/Latent_Variable_Models} 118 | %\input{labs/09/theory} 119 | 120 | 121 | % \input{lectures/10-a/unsupervised-learning} 122 | % \input{lectures/10-b/Energy_based_unsupervised_learning} 123 | % \input{lectures/11-a/representation_learning} 124 | % \input{lectures/12-a/sparse_coding} 125 | 126 | % \input{labs/13/theory} 127 | 128 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 129 | \part{Practice}\label{prt:practice} 130 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 131 | %\input{lectures/01-b/practice} 132 | %\input{labs/01/practice} 133 | %\input{prct/KF/supervised_classification} 134 | %\input{prct/KF/loss_functions_non-convex} 135 | %\input{labs/03/practice} 136 | %\input{prct/KF/visualizing_2D_interpolations} 137 | %\input{prct/KF/convolution_demonstration} 138 | %\input{prct/KF/automatic_differentiation} 139 | %\input{prct/KF/img_classification_comparison} 140 | %\input{labs/08/practice} 141 | %\input{labs/09/practice} 142 | %\input{labs/13/practice} 143 | 144 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 145 | \part{Coding}\label{prt:coding} 146 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 147 | % \input{lectures/01-b/coding} 148 | %\input{labs/01/coding} 149 | %\input{labs/03/coding} 150 | %\input{labs/08/coding} 151 | 152 | 153 | 154 | 155 | 156 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 157 | \part{Applications}\label{prt:apps} 158 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 159 | %\input{lectures/04-b/applications} 160 | %\input{lectures/06-a/applications} 161 | 162 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 163 | \part{Papers summary}\label{prt:papers} 164 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 165 | 166 | The end. % \part needs to be followed by something. 167 | 168 | \bibliographystyle{plainnat} 169 | \clearpage\bibliography{references} 170 | 171 | \end{document} 172 | 173 | -------------------------------------------------------------------------------- /code/BA/gnn_coding.tex: -------------------------------------------------------------------------------- 1 | \chapter{Graph Neural Networks} 2 | % Authors: Yu Cao, 5/7/19. 3 | \section{DGL} 4 | In this section, we try to utilize Deep Graph Library to implement graph neural networks. 5 | 6 | \subsection{Basics} 7 | We start by creating the well-known \textbf{Zachary's karate club} social network. The network captures 34 members of a karate club, documenting pairwise links between members who interacted outside the club. The club later splits into two communities led by the instructor (node 0) and club president (node 33). You could read more about the story in the \href{https://en.wikipedia.org/wiki/Zachary\%27s_karate_club}{wiki page}. 8 | 9 | \begin{minted}{python} 10 | import dgl 11 | G = dgl.DGLGraph() 12 | G.add_nodes(34) 13 | G.add_edge(1, 0) 14 | print('Now we have %d edges!' % G.number_of_edges()) 15 | # add two edges 2->0 and 2->1 using list 16 | G.add_edges([2, 2], [0, 1]) 17 | 18 | # add three edges 3->0, 3->1 and 3->2 using torch tensor 19 | src = torch.tensor([3, 3, 3]) 20 | dst = torch.tensor([0, 1, 2]) 21 | G.add_edges(src, dst) 22 | 23 | print('Now we have %d edges!' % G.number_of_edges()) 24 | 25 | # add two edges 4->0, 5->0 using list 26 | G.add_edges([4, 5], 0) 27 | 28 | # add three edges 6->0 6->4 6->5 using torch tensor 29 | G.add_edges(6, torch.tensor([0, 4, 5])) 30 | 31 | print('Now we have %d edges!' % G.number_of_edges()) 32 | 33 | edge_list = [(7, 0), (7, 1), (7, 2), (7, 3), (8, 0), (8, 2), (9, 2), (10, 0), (10, 4), (10, 5), 34 | (11, 0), (12, 0), (12, 3), (13, 0), (13, 1), (13, 2), (13, 3), (16, 5), (16, 6), 35 | (17, 0), (17, 1), (19, 0), (19, 1), (21, 0), (21, 1), (25, 23), (25, 24), (27, 2), 36 | (27, 23), (27, 24), (28, 2), (29, 23), (29, 26), (30, 1), (30, 8), (31, 0), (31, 24), 37 | (31, 25), (31, 28), (32, 2), (32, 8), (32, 14), (32, 15), (32, 18), (32, 20), (32, 22), 38 | (32, 23), (32, 29), (32, 30), (32, 31), (33, 8), (33, 9), (33, 13), (33, 14), (33, 15), 39 | (33, 18), (33, 19), (33, 20), (33, 22), (33, 23), (33, 26), (33, 27), (33, 28), 40 | (33, 29), (33, 30), (33, 31), (33, 32)] 41 | 42 | src, dst = zip(*edge_list) 43 | G.add_edges(src, dst) 44 | 45 | # We should have 78 edges now! 46 | print('Now we have %d edges!' % G.number_of_edges()) 47 | \end{minted} 48 | 49 | \subsection{Message passing on graph} 50 | Suppose the karate club president (node 33) is sending out an invitation of their annual karate match. The president also asks the club members to broadcast the news to, of course, their friends in the club. We use a scalar to represent whether the member has received the invitation or not (1 for invited, 0 for not invited). Initially, everyone is 0 except node 33. 51 | \begin{minted}{python} 52 | # We first convert the uni-directional edges to bi-directional so messages can 53 | # be sent in both direction. 54 | src, dst = G.edges() 55 | G.add_edges(dst, src) 56 | # add self loop for each nodes for convenience 57 | v = G.nodes() 58 | G.add_edges(v, v) 59 | print('We now have %d edges!' % G.number_of_edges()) 60 | 61 | # init the state 62 | G.ndata['invited'] = torch.zeros((34,)) 63 | G.nodes[33].data['invited'] = torch.tensor([1.]) 64 | print(G.ndata['invited']) 65 | \end{minted} 66 | 67 | We then define the function that computes the messages. In DGL, the message function is an \textbf{Edge UDF} that takes in a single argument `edges`. It has three members `src`, `dst`, and `data` for accessing source node features, destination node features, and edge features respectively. 68 | \begin{minted}{python} 69 | def message_func(edges): 70 | # The message is simply the 'invited' state of the source nodes. 71 | return {'msg' : edges.src['invited']} 72 | \end{minted} 73 | 74 | 75 | Next, we define the reduce function which accumulates and consume the messages to update the node features. In DGL, the reduce function is a \textbf{Node UDF} that takes in a single argument `nodes`, which has two members `data` and `mailbox`. `data` contains the node features while `mailbox` contains all incoming message features, stacked along the second dimension (hence the `dim=1` argument). 76 | 77 | \begin{minted}{python} 78 | def reduce_func(nodes): 79 | # The reduce function sets the 'invited' state to be one if the node has already 80 | # been invited or any of the received messages contains an invitation (is one). 81 | # This can be done using sum and clamp operations as follows. 82 | accum = nodes.mailbox['msg'].sum(dim=1) # note that messages are stacked on dim=1 83 | return {'invited' : accum.clamp(max=1)} 84 | \end{minted} 85 | 86 | To trigger the message and reduce function, one can use the `send` and `recv` APIs. Following codes send out the messages from node 33. We then call `recv` on the receiver nodes to trigger the reduce function. 87 | 88 | \begin{minted}{python} 89 | G.send((33, G.successors(33)), message_func) 90 | G.recv(G.successors(33), reduce_func) 91 | \end{minted} 92 | 93 | \subsection{Graph Convolutional Network} 94 | The steps to implement GCN in DGL is also similar to the toy task (\verb|2_MessagePassing.ipynb|): 95 | 96 | \begin{itemize} 97 | \item Define the message function. 98 | \item Define the reduce function. 99 | \item Define how they are triggered using `send` and `recv`. 100 | \end{itemize} 101 | 102 | \begin{minted}{python} 103 | # A bit of setup, just ignore this cell 104 | import matplotlib.pyplot as plt 105 | 106 | # for auto-reloading external modules 107 | %load_ext autoreload 108 | %autoreload 2 109 | 110 | %matplotlib inline 111 | plt.rcParams['figure.figsize'] = (8.0, 6.0) # set default size of plots 112 | plt.rcParams['image.interpolation'] = 'nearest' 113 | plt.rcParams['image.cmap'] = 'gray' 114 | plt.rcParams['animation.html'] = 'html5' 115 | import torch.nn as nn 116 | import torch.nn.functional as F 117 | 118 | # Define the GCN module 119 | class GCN(nn.Module): 120 | def __init__(self, in_feats, out_feats): 121 | super(GCN, self).__init__() 122 | self.linear = nn.Linear(in_feats, out_feats) 123 | 124 | def forward(self, g, inputs): 125 | # g is the graph and the inputs is the input node features 126 | # first perform linear transformation 127 | h = self.linear(inputs) 128 | # set the node features 129 | g.ndata['h'] = h 130 | # trigger message passing, gcn_message and gcn_reduce will be defined later 131 | g.send(g.edges(), gcn_message) 132 | g.recv(g.nodes(), gcn_reduce) 133 | # get the result node features 134 | h = g.ndata.pop('h') 135 | return h 136 | \end{minted} -------------------------------------------------------------------------------- /arch/YC/05-lab/theory.tex: -------------------------------------------------------------------------------- 1 | \chapter{Components of a CNN} 2 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 3 | 4 | \section{Variety of layers} 5 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 6 | A convolutional neural network may consist of various kinds of layers. 7 | Each layer is responsible for extracting relevant information and creating higher-level interpretations of the input. 8 | In a standard CNN, an input will go through a series of layers such as convolution, non-linearity, pooling, and batch normalization layers. 9 | 10 | \subsection{Convolution} 11 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 12 | A convolution layer is the backbone of all convolutional neural networks. 13 | The layer contains a set of learnable filters that will produce an activation map displaying how the input has responded to each filter. 14 | For instance, a filter could attempt to detect a certain kind of edge present in the input, or in higher levels of the network detect a shape. 15 | This is achieved by using small spatial filters that convolve over the height and width of the image, computing the dot product at every position. 16 | The output of each filter will produce a two-dimensional activation map which will be stacked along the depth-dimension to create the output for the layer. 17 | 18 | \subsection{Non-linearity} 19 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 20 | Without a non-linearity module present in the network, no matter how many layers are defined, the neural network would behave like a single-layered model. 21 | This is due to the fact that summing these layers would simply output another linear function. 22 | Additionally, with the inclusion of non-linearity modules, non-linearity is introduced into the model and thus more complex concepts can be learned. 23 | The ReLU activation function is typically used as opposed to hyperbolic tangent as it is faster to train. 24 | Despite the theoretical need for nonlinearities to be added, there is a paper that was able to train a deep model without nonlinearity layers. 25 | Interestingly enough, the floating point approximations from the calculations were enough of a nonlinearity to train the model. 26 | 27 | \subsection{Pooling} 28 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 29 | Downsampling the input is an important component of learning representations with a CNN. 30 | A pooling layer after a convolution layer is used to progressively downsample the spatial size of the representation. 31 | Thus, the amount of parameters and computation in the network is reduced. 32 | This helps in preventing overfitting. 33 | Additionally, a pooling layer is essential in establishing translation invariance. 34 | An edge detected at one corner of the image will still be an edge when picked up at a different part of the image (and possibly rotated as well). 35 | Max-pooling will let the network preserve this edge despite losing its location. 36 | 37 | In practice, using $2\times 2$ pooling with a stride of 2 for instance, 75 percent of the information will be lost. 38 | Since much of the information is lost, it is generally a rule of thumb that the depth of the feature map is doubled using a convolutional layer beforehand. 39 | Then, the pooling is performed with only a 50 percent information loss instead. 40 | 41 | \subsection{Batch normalization} 42 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 43 | The batch normalization layer is typically invoked after a non-linearity. 44 | $$ 45 | \begin{aligned} 46 | \mu_B & =\frac{1}{m} \sum_{i}^m x_i\\ 47 | \sigma_B & = \frac{1}{m}\sum_i^m (x_i-\mu_B)^2\\ 48 | \hat{x}_i & = \frac{x_i -\mu_B}{\sqrt{\sigma_B +\epsilon}} 49 | \end{aligned} 50 | $$ 51 | 52 | Batch normalization is a commonly used trick to speed up training performance in convolutional neural networks, while providing robustness to the network. 53 | A batch normalization layer normalizes the data in each batch to have zero mean and unit variance. 54 | It provides the ability to decouple subsequent layers such that the current layer does not have to keep track of the moving statistics of the previous layers. 55 | If batch normalization is not applied, every time a weight is changed in the lower layers everything propagates up and there is too much interaction between the upper and lower layers. 56 | It also provides some consistency between layers by reducing internal covariate shift (\href{https://arxiv.org/abs/1502.03167}{paper}). 57 | 58 | \section{Residual Connections} 59 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 60 | An important problem with training deeper neural networks has been the loss of gradients as deeper parts of the network are approached. 61 | This is commonly referred to as the \textbf{vanishing gradient problem}. 62 | One way the deep learning community has been able to overcome this issue is by using residual connections in the neural network (\href{https://arxiv.org/abs/1512.03385}{paper}). 63 | A residual connection (or skip connection) connects an output of a layer to another layer by jumping over layers. 64 | The weights of the skip connections are learned as the network learns. 65 | By skipping over layers, the network is able to propagate gradients and ensure better representation learning. 66 | General construction looks like: 67 | $$ 68 | \begin{aligned} 69 | y_k & = h(x_k) +\mathcal{F}(x_k, \mathcal{W}_k)\\ 70 | x_{k+1} & = f(y_k), 71 | \end{aligned} 72 | $$ 73 | where $h(x_k)$ is the bypass short cut and $h(\_)$ is commonly taken to be identity map. 74 | 75 | There are a decent amount of variants proposed in the research community which are based on skip connections. 76 | DenseNets (\href{https://arxiv.org/abs/1608.06993}{paper}) has several parallel skip connections within each block. 77 | 78 | Figure 1 (\href{https://arxiv.org/pdf/1712.09913.pdf}{original paper}) depicts how the use of residual connections makes the loss landscape smooth, which allows the network to converge. 79 | 80 | \begin{figure} 81 | \centering 82 | \includegraphics[width=\textwidth]{figs/loss.png} 83 | \caption{ The loss surfaces of ResNet-56 with/without skip connections.} 84 | \label{fig:my_label0} 85 | \end{figure} 86 | 87 | \section{Information Bottleneck} 88 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 89 | As information moves up the layers, it passes through different layers which filter out relevant features. 90 | This commonly requires altering the dimensions of the information. 91 | For example, recall that one reason pooling is used is to reduce the size of the feature maps. 92 | This dimensionality reduction plays an important role in how the network learns. 93 | The method of extracting relevant information and forgetting the rest is known as a procedure called ``Information Bottleneck". 94 | There has been an increasing interest in the information theory aspect of how deep neural networks work, and how the architectures help them generalize well (\href{https://openreview.net/forum?id=ry_WPG-A-}{paper}). -------------------------------------------------------------------------------- /labs/05/theory.tex: -------------------------------------------------------------------------------- 1 | \chapter{Components of a CNN} 2 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 3 | 4 | \section{Variety of layers} 5 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 6 | A convolutional neural network may consist of various kinds of layers. 7 | Each layer is responsible for extracting relevant information and creating higher-level interpretations of the input. 8 | In a standard CNN, an input will go through a series of layers such as convolution, non-linearity, pooling, and batch normalization layers. 9 | 10 | \subsection{Convolution} 11 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 12 | A convolution layer is the backbone of all convolutional neural networks. 13 | The layer contains a set of learnable filters that will produce an activation map displaying how the input has responded to each filter. 14 | For instance, a filter could attempt to detect a certain kind of edge present in the input, or in higher levels of the network detect a shape. 15 | This is achieved by using small spatial filters that convolve over the height and width of the image, computing the dot product at every position. 16 | The output of each filter will produce a two-dimensional activation map which will be stacked along the depth-dimension to create the output for the layer. 17 | 18 | \subsection{Non-linearity} 19 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 20 | Without a non-linearity module present in the network, no matter how many layers are defined, the neural network would behave like a single-layered model. 21 | This is due to the fact that summing these layers would simply output another linear function. 22 | Additionally, with the inclusion of non-linearity modules, non-linearity is introduced into the model and thus more complex concepts can be learned. 23 | The ReLU activation function is typically used as opposed to hyperbolic tangent as it is faster to train. 24 | Despite the theoretical need for nonlinearities to be added, there is a paper that was able to train a deep model without nonlinearity layers. 25 | Interestingly enough, the floating point approximations from the calculations were enough of a nonlinearity to train the model. 26 | 27 | \subsection{Pooling} 28 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 29 | Downsampling the input is an important component of learning representations with a CNN. 30 | A pooling layer after a convolution layer is used to progressively downsample the spatial size of the representation. 31 | Thus, the amount of parameters and computation in the network is reduced. 32 | This helps in preventing overfitting. 33 | Additionally, a pooling layer is essential in establishing translation invariance. 34 | An edge detected at one corner of the image will still be an edge when picked up at a different part of the image (and possibly rotated as well). 35 | Max-pooling will let the network preserve this edge despite losing its location. 36 | 37 | In practice, using $2\times 2$ pooling with a stride of 2 for instance, 75 percent of the information will be lost. 38 | Since much of the information is lost, it is generally a rule of thumb that the depth of the feature map is doubled using a convolutional layer beforehand. 39 | Then, the pooling is performed with only a 50 percent information loss instead. 40 | 41 | \subsection{Batch normalization} 42 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 43 | The batch normalization layer is typically invoked after a non-linearity. 44 | $$ 45 | \begin{aligned} 46 | \mu_B & =\frac{1}{m} \sum_{i}^m x_i\\ 47 | \sigma_B & = \frac{1}{m}\sum_i^m (x_i-\mu_B)^2\\ 48 | \hat{x}_i & = \frac{x_i -\mu_B}{\sqrt{\sigma_B +\epsilon}} 49 | \end{aligned} 50 | $$ 51 | 52 | Batch normalization is a commonly used trick to speed up training performance in convolutional neural networks, while providing robustness to the network. 53 | A batch normalization layer normalizes the data in each batch to have zero mean and unit variance. 54 | It provides the ability to decouple subsequent layers such that the current layer does not have to keep track of the moving statistics of the previous layers. 55 | If batch normalization is not applied, every time a weight is changed in the lower layers everything propagates up and there is too much interaction between the upper and lower layers. 56 | It also provides some consistency between layers by reducing internal covariate shift (\href{https://arxiv.org/abs/1502.03167}{paper}). 57 | 58 | \section{Residual Connections} 59 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 60 | An important problem with training deeper neural networks has been the loss of gradients as deeper parts of the network are approached. 61 | This is commonly referred to as the \textbf{vanishing gradient problem}. 62 | One way the deep learning community has been able to overcome this issue is by using residual connections in the neural network (\href{https://arxiv.org/abs/1512.03385}{paper}). 63 | A residual connection (or skip connection) connects an output of a layer to another layer by jumping over layers. 64 | The weights of the skip connections are learned as the network learns. 65 | By skipping over layers, the network is able to propagate gradients and ensure better representation learning. 66 | General construction looks like: 67 | $$ 68 | \begin{aligned} 69 | y_k & = h(x_k) +\mathcal{F}(x_k, \mathcal{W}_k)\\ 70 | x_{k+1} & = f(y_k), 71 | \end{aligned} 72 | $$ 73 | where $h(x_k)$ is the bypass short cut and $h(\_)$ is commonly taken to be identity map. 74 | 75 | There are a decent amount of variants proposed in the research community which are based on skip connections. 76 | DenseNets (\href{https://arxiv.org/abs/1608.06993}{paper}) has several parallel skip connections within each block. 77 | 78 | Figure 1 (\href{https://arxiv.org/pdf/1712.09913.pdf}{original paper}) depicts how the use of residual connections makes the loss landscape smooth, which allows the network to converge. 79 | 80 | \begin{figure} 81 | \centering 82 | \includegraphics[width=\textwidth]{labs/05/images/loss.png} 83 | \caption{ The loss surfaces of ResNet-56 with/without skip connections.} 84 | \label{fig:my_label0} 85 | \end{figure} 86 | 87 | \section{Information Bottleneck} 88 | % Authors: Joanna Bitton, Divyansh Khanna, Lind Xiao, 2/26/19. 89 | As information moves up the layers, it passes through different layers which filter out relevant features. 90 | This commonly requires altering the dimensions of the information. 91 | For example, recall that one reason pooling is used is to reduce the size of the feature maps. 92 | This dimensionality reduction plays an important role in how the network learns. 93 | The method of extracting relevant information and forgetting the rest is known as a procedure called ``Information Bottleneck". 94 | There has been an increasing interest in the information theory aspect of how deep neural networks work, and how the architectures help them generalize well (\href{https://openreview.net/forum?id=ry_WPG-A-}{paper}). -------------------------------------------------------------------------------- /arch/GS/convolutions_practice.tex: -------------------------------------------------------------------------------- 1 | \chapter{Convolutions in Practice} 2 | % Authors: Rafael Moraes, Jiachen Zhu, Kabir Singh; 2019-02-19 3 | \section{Natural Data Properties} 4 | % Authors: Rafael Moraes, Jiachen Zhu, Kabir Singh; 2019-02-19 5 | Natural signals (e.g. audio, images, text) have 3 properties that make convolutions a good choice to process them: 6 | \begin{enumerate} 7 | \item \textbf{Stationarity}: similar patterns repeat in the data. 8 | For example, in an image, similar patches appear in different locations and not all patches are equally frequent. Formally we define this as "a process or data where the mean, variance and autocorrelation structure does not change over time." More simply put, in data, we can observe stationarity when a pattern repeats over degrees of freedom. 9 | 10 | \begin{figure}[H] 11 | \begin{center} 12 | \includegraphics[width=200pt]{figs/stationarity.png} 13 | \end{center} 14 | \captionsetup{justification=centering, margin=2cm} 15 | \caption{Example of stationarity - we can observe similar waveforms over different parts of one data set.} 16 | \end{figure} 17 | 18 | \item \textbf{Locality}: points close to one another have more information about each other than points far apart. 19 | In other words, two points far apart are less likely to have a higher correlation than two points closer to each other. 20 | For example, audio signals or pixels in an image. 21 | 22 | \begin{figure}[H] 23 | \begin{center} 24 | \includegraphics[width=200pt]{figs/locality.png} 25 | \end{center} 26 | \captionsetup{justification=centering, margin=2cm} 27 | \caption{Data points are more "relevant" to other data points nearby, less relevant as you move further away.} 28 | \end{figure} 29 | 30 | \item \textbf{Compositionality}: the world we live in is formed from a hierarchy of structure. Each level is composed from a group of structures from the lower levels. Complex expressions are formed by a combination of its simpler constituent expressions. For example, images are composed of pixels, pixels together form edges and color patters, these together form motifs, which then form shapes, objects, scenes and so forth. 31 | This implies that a good way to identify a scene, for example, is to first understand the edges and color patterns, then the motifs and so on, which translates into the convolutional layers successfully used in this task. 32 | This compositionality characteristic of images was first introduced in biology, by analyzing how the human brain processes visual signals at each different stage of the visual cortex. 33 | \end{enumerate} 34 | 35 | \section{Exploiting The Properties} 36 | % Authors: Rafael Moraes, Jiachen Zhu, Kabir Singh; 2019-02-19 37 | By making use of these properties, some simplifications can be introduced to our models: 38 | 39 | \begin{enumerate} 40 | \item \textbf{Locality \(\Rightarrow\) Sparsity}: 41 | 42 | Since the information that is most relevant to identify a particular region of the signal (e.g. image) is close to that region, our models do not need to analyze regions far away from each region of interest. 43 | Thus, in an neural network, a single unit does not need to be directly connected to a large portion of the input signal. 44 | In other words, the connections can be sparse: mostly zero, except in the areas surrounding the region of interest. 45 | In biology, this phenomenon is called the Receptive Field: "an individual neuron relates to a specific sensory space (e.g., the body surface, or the visual field) in which a stimulus will modify the firing of that particular neuron"; aka, neurons and their analogous 46 | receptive fields are highly localized. 47 | 48 | \begin{figure}[H] 49 | \begin{center} 50 | \includegraphics[width=200pt]{figs/sparsity.png} 51 | \end{center} 52 | \captionsetup{justification=centering, margin=2cm} 53 | \caption{Like receptive fields, we can restrict input connections from incoming layers, instead of being fully connected.} 54 | \end{figure} 55 | 56 | 57 | \item \textbf{Stationarity \(\Rightarrow\) Parameter Sharing}: 58 | 59 | Since we know only a portion of the input needs to perfuse to each unit, we then need to determine which parameters to connect the adjacent layers. 60 | We have previously explained that similar patterns repeat over and over in the data, so it becomes clear that sharing parameters across the input space is a good practice. 61 | We can have multiple sets of parameters (i.e. kernels), each that focus on identifying a specific pattern, and use each of these sets across the whole input data. 62 | 63 | \begin{figure}[H] 64 | \begin{center} 65 | \includegraphics[width=200pt]{figs/kernel.png} 66 | \end{center} 67 | \captionsetup{justification=centering, margin=2cm} 68 | \caption{Example of parameter sharing using kernels. Colored connections refer to kernel space} 69 | \end{figure} 70 | 71 | \end{enumerate} 72 | 73 | \section{Resulting Improvements} 74 | % Authors: Rafael Moraes, Jiachen Zhu, Kabir Singh; 2019-02-19 75 | The use of Sparsity and Parameter Sharing leads to: 76 | \begin{enumerate} 77 | \item \textbf{Faster convergence} \(\rightarrow\) Fewer weights to tune and ability to optimize the same parameters using multiple parts of the network/data. 78 | \item \textbf{Better model generalization}\(\rightarrow\) Fewer parameters leads to less overfitting. 79 | \item \textbf{Models not constrained to input size}\(\rightarrow\)Can keep applying same sets of parameters to small regions of the input independent to its size. 80 | \item \textbf{Kernel independence}\(\rightarrow\) leads to higher parallelization capabilities. 81 | \item \textbf{Reduced amount of computation}\(\rightarrow\) efficiency! 82 | \end{enumerate} 83 | 84 | \section{Notes} 85 | % Authors: Rafael Moraes, Jiachen Zhu, Kabir Singh; 2019-02-19 86 | Two important aspects to keep in mind: 87 | \begin{itemize} 88 | \item \textbf{Kernel Format}: In PyTorch, the order that the kernels are stored in the tensor is: 89 | \begin{equation} 90 | \underset{\# kernels}{\mathrm{N}} \times 91 | \underset{\# channels}{\mathrm{C}} \times 92 | \underset{h \times w \text{ of kernel}}{K} 93 | \end{equation} 94 | 95 | \item \textbf{Zero-Padding}: Operation of introducing zeros to the borders of the input. 96 | It is commonly used in order to maintain the size of the input in the output after a convolutional transformation. 97 | \end{itemize} 98 | 99 | --------------------------------------------------------------------------------