├── docs ├── 404.html ├── static ├── _layouts │ └── default.html ├── serve.sh ├── _config.yml ├── en │ └── faq.md └── index.md ├── .gitmodules ├── .gitignore ├── README.md └── Backprop.ipynb /docs/404.html: -------------------------------------------------------------------------------- 1 | jekyllbook/404.html -------------------------------------------------------------------------------- /docs/static: -------------------------------------------------------------------------------- 1 | jekyllbook/static/ -------------------------------------------------------------------------------- /docs/_layouts/default.html: -------------------------------------------------------------------------------- 1 | ../jekyllbook/_layouts/default.html -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "docs/jekyllbook"] 2 | path = docs/jekyllbook 3 | url = https://github.com/ebetica/jekyllbook 4 | -------------------------------------------------------------------------------- /docs/serve.sh: -------------------------------------------------------------------------------- 1 | # /usr/local/lib/ruby/gems/3.1.0/bin/jekyll serve --trace --baseurl '/NYU-DLFL22' 2 | /opt/homebrew/lib/ruby/gems/3.2.0/bin/jekyll serve --trace --baseurl '/NYU-DLFL22' 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Remove Jupyter caching 2 | __pycache__ 3 | .ipynb_checkpoints 4 | 5 | # Remove Mac shit 6 | .DS_Store 7 | 8 | # Remove Vim temp files 9 | *sw* 10 | 11 | # Ignore Data files 12 | .jekyll-cache 13 | _site 14 | .vscode -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | permalink: pretty 2 | 3 | # Setup 4 | title: 'Deep Learning' 5 | url: https://atcold.github.io/NYU-DLFL22/ 6 | baseurl: '/NYU-DLFL22' 7 | homepage_title: Home 8 | default_lang: 'en' 9 | 10 | # About/contact 11 | author: 12 | name: atcold 13 | url: https://twitter.com/alfcnz 14 | github: 15 | repo: https://github.com/atcold/NYU-DLFL22 16 | 17 | # Custom vars 18 | version: dlfl22 19 | 20 | src: "." 21 | default_theme: "ayu" 22 | 23 | defaults: 24 | - scope: 25 | path: "" # an empty string here means all files in the project 26 | values: 27 | layout: "default" 28 | 29 | # For Maths 30 | markdown: kramdown 31 | 32 | # To use hljs, disable the default highlighter 33 | kramdown: 34 | syntax_highlighter_opts: 35 | disable: true 36 | math_engine: null 37 | 38 | exclude: 39 | - jekyllbook 40 | - en/index.md 41 | - vendor 42 | 43 | 44 | ################################### English #################################### 45 | prologues: 46 | - path: en/faq.md 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NYU Deep Learning Fall 2022 (NYU-DLFL22) 2 | 3 | Differential edition based on [NYU-DLSP21](https://github.com/Atcold/NYU-DLSP21/blob/master/README.md). 4 | 5 | 6 | ## New content 7 | 8 | After the SP21 there have been three more editions (FL21, SP22, and FL22). 9 | Some of the existing content has been updated and new lectures have been crafted. 10 | So, I felt it was about time for releasing the goodies online. 11 | 12 | Now, editing *all* the lectures while keeping my *current quality* is rather onerous. 13 | Moreover, lectures that have not been changed substantially wouldn't bring much value *in addition to* those that have already been pushed online in the previous edition. 14 | Therefore, this FL22 edition is going to be a *differential* one. 15 | Meaning, it's going to be an ‘expansion’ based on the SP21 version. 16 | 17 | You can choose to watch only the new videos (if you've already watched SP21) or you can watch video from the two editions, back to back. 18 | Check out the table of contents on the [class website](https://atcold.github.io/NYU-DLFL22/) to create your own roadmap. 19 | 20 | 21 | ## Previous releases 22 | 23 | Before NYU-DLFL22 there were… 24 | 25 | - [NYU-DLSP21](https://github.com/Atcold/NYU-DLSP21/) (best video quality, full course) 26 | - [NYU-DLSP20](https://github.com/Atcold/NYU-DLSP20) (major release, 16 translations) 27 | - [NYU-DLSP19](https://github.com/Atcold/NYU-DLSP20/releases/tag/dlsp19) 28 | - [AIMS-DLFL19](https://github.com/Atcold/NYU-DLSP20/releases/tag/aims-fl18) 29 | - [CoDaS-HEP18](https://github.com/Atcold/NYU-DLSP20/releases/tag/v1.0.0) 30 | - [NYU-DLSP18](https://docs.google.com/document/d/1_p1Mw-NtMGN_vpas_pchLsQC2u0NM5mTnRapBrQ2ivk/) 31 | - [Purdue-DLFL16](https://docs.google.com/document/d/1ugJRMqQ_cCUQC1B8mSE0iro7sKrDT8-BnppTZv0rA08/) 32 | - [torch-Video-Tutorials](https://github.com/Atcold/torch-Video-Tutorials) 33 | 34 | ## More info 35 | 36 | Keep reading on the [class website](https://atcold.github.io/NYU-DLFL22/). 37 | -------------------------------------------------------------------------------- /docs/en/faq.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Foreword, FAQ and disclaimer 3 | author: Alfredo Canziani 4 | date: 17 Dec 2022 5 | lang-ref: faq 6 | --- 7 | 8 | 9 | # Foreword 10 | 11 | This course concerns the latest techniques in deep learning and representation learning, focusing on supervised and unsupervised deep learning, embedding methods, metric learning, convolutional and recurrent nets, with applications to computer vision, natural language understanding, and speech recognition. 12 | The prerequisites include: [DS-GA 1001 Intro to Data Science](https://cds.nyu.edu/academics/ms-curriculum/) or a graduate-level machine learning course. 13 | 14 | 15 | # FAQ 16 | 17 | Here are some answers to frequently asked questions: 18 | - **Does taking this course lead to certification?** 19 | > No, it does not. In order to offer a certification, we would have to be able to evaluate you, but the content has not been designed for this (unlike a MOOC for example). As this is a frequent request, we are thinking about proposing a certification for future editions of the course. 20 | - **How much time should I spend on this course?** 21 | > For each week, there is approximately 2h30/3h of video content. With the time dedicated to note taking and playing with the notebooks, a total estimate of 5 hours per week seems reasonable. For the rest, it depends on the level of immersion you want to achieve in a given topic (reading the referenced articles, applying what was seen in class to your own projects, etc.). 22 | - **Where to ask a question after watching a video?** 23 | > You can ask it directly in the comments section under the YouTube video in question, and Alfredo will be happy to answer it. If the question is about a specific point in the video, please include the time stamp. 24 | > You can also do this on the class [Discord](https://discord.gg/CthuqsX8Pb) specifically for students. It is also used to coordinate viewing groups, discuss assignments, suggest improvements, or generally discuss any topic related to the course. 25 | - **Can I use this course?** 26 | > Of course, the course is under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-nc-sa/4.0/). 27 | > This means that: 28 | > - You may not use the material for commercial purposes. 29 | > - You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use. 30 | > - If you remix, transform, or build upon the material, you must distribute your contributions under the same license as the original. 31 | > 32 | > For credit, you can use the following BibTeX: 33 | > ```bibtex 34 | > @misc{canziani2022nyudlfl22, 35 | > author = {Canziani, Alfredo & LeCun, Yann}, 36 | > title = {{NYU Deep Learning, Fall 2022}}, 37 | > howpublished = "\url{https://atcold.github.io/NYU-DLFL22}", 38 | > year = {2022}, 39 | > note = "[Online; accessed ]" 40 | > } 41 | > ``` -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: DEEP LEARNING 4 | author: Alfredo Canziani 5 | lang-ref: home 6 | --- 7 | 8 | **DS-GA 1008 · FALL 2022 · [NYU CENTER FOR DATA SCIENCE](http://cds.nyu.edu/)** 9 | 10 | | INSTRUCTOR | Alfredo Canziani, Yann LeCun | 11 | | LECTURES | Wednesday 16:55 – 18:55, Zoom | 12 | | PRACTICA | Tuesdays 16:55 – 17:55, Zoom | 13 | | FORUM | [r/NYU_DeepLearning](https://www.reddit.com/r/NYU_DeepLearning/) | 14 | | DISCORD | [NYU DL](https://discord.gg/CthuqsX8Pb) | 15 | | MATERIAL | [2022 repo](https://github.com/Atcold/NYU-DLFL22) | 16 | 17 | 18 | ## 2022 edition disclaimer 19 | 20 | Check the repo's [`README.md`](https://github.com/Atcold/NYU-DLFL22/blob/master/README.md) and learn about: 21 | 22 | - New content and presentation 23 | - This semester repository 24 | - Previous releases 25 | 26 | 27 | ## Lectures 28 | 29 | Only the new lessons (either material or presentation) will come online. 30 | Context similar to the [SP21 edition](../NYU-DLSP21/), semitransparent and shown in italic, is not going to be edited and/or pushed online. 31 | 32 | **Legend**: 🖥 slides, 📝 notes, 📓 Jupyter notebook, 🎥 YouTube video. 33 | 34 | 44 | 45 | ### Theme 1: Introduction 46 | 47 | - 00 – Introduction to NYU-DLFL22 [🎥](https://youtu.be/00s9ireCnCw) 48 | - 01 – History (see [🎥](https://youtu.be/mTtDfKgLm54)) 49 | - 02 – Gradient descent and the backpropagation algorithm (see [🎥](https://youtu.be/nTlCqaL7fCY)) 50 | - 03 – Resources and neural nets inference [🎥](https://youtu.be/QwZQrxIk6Dg) 51 | 52 | 53 | ### Theme 2: Classification, an energy perspective 54 | 55 | - 05 – Notation and introduction [🎥](https://youtu.be/9cpBu8yt9B8) [🖥](https://drive.google.com/file/d/1c0aElks9f9A2PWRNDJO1P_5_n9ODA--o/) 56 | - 06 – Backprop and contrastive learning [🎥](https://youtu.be/SC6ljsFFVcY) [🖥](https://drive.google.com/file/d/1c0aElks9f9A2PWRNDJO1P_5_n9ODA--o/) 57 | - 07 – PyTorch 5-step training code [🎥](https://youtu.be/PXXE7aJ_siw) [🖥](https://drive.google.com/file/d/1c0aElks9f9A2PWRNDJO1P_5_n9ODA--o/) 58 | 59 | 60 | ### Theme 3: Parameter sharing 61 | 62 | - 04 – Recurrent and convolutional nets (see [🎥](https://youtu.be/7dU3TFBJl-0) [🖥](https://drive.google.com/file/d/1GtI4ywzI84oamyr_W5k_wzgfRN139aFD/) [📝 ](https://drive.google.com/file/d/12jP4ssUIoGURAU8jGj6QwKXyZVdXW0o6/)) 63 | - 08 – Natural signals, ConvNets kernels and sizes, comparison with fully-connected architecture (see [🎥](https://youtu.be/KvvNkE2vQVk) [🖥](https://github.com/Atcold/NYU-DLSP20/blob/master/slides/02%20-%20CNN.pdf) [📓](https://github.com/Atcold/NYU-DLSP20/blob/master/06-convnet.ipynb) and [🎥](https://youtu.be/d2GixptaHjk?t=2211)) 64 | - 09 – Recurrent neural nets, vanilla and gated (LSTM) [🎥](https://youtu.be/5KSGNomPJTE) [🖥](https://github.com/Atcold/NYU-DLSP20/blob/master/slides/04%20-%20RNN.pdf) [📓](https://github.com/Atcold/NYU-DLSP20/blob/master/08-seq_classification.ipynb)[📓](https://github.com/Atcold/NYU-DLSP20/blob/master/09-echo_data.ipynb) ① 65 | 66 | 67 | ### Theme 4: Energy-based models, a compendium 68 | 69 | - 11 – Inference for latent variable energy-based models (LV-EBMs) [🎥](https://youtu.be/xA_OPjRby5g) [🖥](https://github.com/Atcold/NYU-DLSP20/blob/master/slides/12%20-%20EBM.pdf) 70 | - 13 – Training LV-EBMs [🎥](https://youtu.be/XIMaWj5YjOQ) [🖥](https://github.com/Atcold/NYU-DLSP20/blob/master/slides/12%20-%20EBM.pdf) 71 | - 14 – From latent-variable EBMs (K-means, sparse coding), to target propagation to autoencoders [🎥](https://youtu.be/oo9Z9jKJ9iM) [🖥](https://drive.google.com/file/d/1eAFH58VazIdpEPfkD_xDNadQe7Jss_uY/) 72 | 73 | --- 74 | 75 | ① I did create some new RNN diagrams (see [tweet](https://twitter.com/alfcnz/status/1448005146684928005) and quoted one), so this lesson may get published, at some time. For now I'm focussing on the energy lessons first. 76 | -------------------------------------------------------------------------------- /Backprop.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "7b058d8d-d3c2-442b-a97c-d8de34395908", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import torch\n", 11 | "from torch import nn" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "5842a614-724a-4e1c-9e61-8acffc09dacd", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# Fixing nomenclature\n", 22 | "nn.LogSoftArgMax = nn.LogSoftmax\n", 23 | "torch.softargmax = torch.softmax" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "id": "6b1111f4-f324-4002-bfb8-2dfd4383dfaf", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# Get a random input\n", 34 | "torch.manual_seed(0)\n", 35 | "x = torch.randn(1, 2)\n", 36 | "print(f'{x = }')" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "id": "5c7ed57a-0daf-4ab6-ab1e-d74b3f80a118", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# Generate random target\n", 47 | "torch.manual_seed(2)\n", 48 | "y = torch.zeros(5, dtype=torch.long)\n", 49 | "c = torch.randint(5, (1,))\n", 50 | "y[c] = 1\n", 51 | "print(f'{c = }, {y = }')" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "id": "2b163485-82d5-4db7-831b-766dcce0e6f9", 57 | "metadata": {}, 58 | "source": [ 59 | "Model definition:\n", 60 | "\n", 61 | "$$\\begin{eqnarray*}\n", 62 | "h &=&\n", 63 | "f(\\boldsymbol{W_h x} + \\boldsymbol{b_h}) \\\\\n", 64 | "\\boldsymbol{s} &=&\n", 65 | "a(\\boldsymbol{h}) = \\boldsymbol{W_y h} + \\boldsymbol{b_y} \\\\\n", 66 | "\\boldsymbol{o} &=&\n", 67 | "g(\\boldsymbol{s}) \\\\\n", 68 | "f &=&\n", 69 | "(\\cdot)^+ \\\\\n", 70 | "g &=&\n", 71 | "\\operatorname{logsoftargmax} \\\\\n", 72 | "D(\\boldsymbol{y}, \\boldsymbol{o}) &=&\n", 73 | "- \\boldsymbol{y}^\\top \\boldsymbol{o}\n", 74 | "\\end{eqnarray*}$$" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "id": "cca6b27d-dbc4-4543-b67a-3d4b6deca1e0", 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "# Define model\n", 85 | "torch.manual_seed(1)\n", 86 | "predictor = nn.Sequential(\n", 87 | " nn.Linear(2, 7),\n", 88 | " nn.ReLU(),\n", 89 | ")\n", 90 | "a = nn.Linear(7, 5)\n", 91 | "g = nn.LogSoftArgMax(dim=-1)\n", 92 | "\n", 93 | "D = nn.NLLLoss()" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "5e68dc73-f9c2-41f8-bd9e-04a15a8ac631", 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# Generate intermediate and final output\n", 104 | "h = predictor(x)\n", 105 | "s = a(h)\n", 106 | "o = g(s)\n", 107 | "\n", 108 | "s.retain_grad()\n", 109 | "o.retain_grad()\n", 110 | "\n", 111 | "print(f'{s = },\\n{s.retains_grad = },\\n{s.grad = }\\n')\n", 112 | "print(f'{o = },\\n{o.retains_grad = },\\n{o.grad = }')" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "id": "6a23082a-4adb-4828-b5cc-fdf167f7d7da", 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "# Compute cost, energy, and loss\n", 123 | "L = F = D(o, c)\n", 124 | "print(f'{L = }')" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "id": "20be1174-622c-4987-a9cb-dc83b09793aa", 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "# Run back-propagation & grad accumulation\n", 135 | "L.backward()" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "id": "be3fd9df-3997-4261-9ff4-6e8a4f9a503c", 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "# Show computed gradients\n", 146 | "print(f'{o.grad = }\\n{s.grad = }')" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "id": "896627c1-5103-4baf-a54c-fd76cff87271", 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "# Check for correctness\n", 157 | "torch.softargmax(s.detach(), dim=-1) - y" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "id": "09f53f75-7d40-4d17-a683-bed5bc97414d", 163 | "metadata": {}, 164 | "source": [ 165 | "What about the affine transformation?\n", 166 | "\n", 167 | "$$\n", 168 | "\\begin{gather}\n", 169 | "a: \\mathbb{R}^d \\to \\mathbb{R}^K, \\quad \\boldsymbol{h} \\mapsto a(\\boldsymbol{h}) = \\boldsymbol{s}\\\\\n", 170 | "\\boldsymbol{s} = \\boldsymbol{W_y h} + \\boldsymbol{b_y} =\n", 171 | "\\boldsymbol{w}_1 h_1 +\n", 172 | "\\boldsymbol{w}_2 h_2 + \\cdots +\n", 173 | "\\boldsymbol{w}_d h_d + \n", 174 | "\\boldsymbol{b_y} \\\\\n", 175 | "\\boldsymbol{W_y} =\n", 176 | "[\\boldsymbol{w}_1\\; \\boldsymbol{w}_2\\; \\cdots\\; \\boldsymbol{w}_d]\n", 177 | "\\in \\mathbb{R}^{K \\times d}, \\quad \\boldsymbol{b_y} \\in \\mathbb{R}^K\\\\\n", 178 | "\\Rightarrow\n", 179 | "{\\partial \\mathcal{L} \\over \\partial \\boldsymbol{b_y}} = \\cdots, \\quad\n", 180 | "{\\partial \\mathcal{L} \\over \\partial \\boldsymbol{W_y}} = \\cdots\n", 181 | "\\end{gather}\n", 182 | "$$" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "id": "42a14504-de64-4095-a66c-34110adcfd1a", 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "# Check gradBias\n", 193 | "print(f'{a.bias.grad = }')" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "id": "2e8b975f-50f3-4728-ab6f-d681c13f9fb4", 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "# Check sizes\n", 204 | "print(f'''{h.size() = }\\n{s.grad.size() = }\n", 205 | "{a.weight.size() = }\\n{a.weight.grad.size() = }''')" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "id": "9fc0abe1-325b-4832-ac4c-097e8ee7f705", 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "# Compute gradWeight by hand\n", 216 | "s.grad.t() @ h.detach()" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "id": "4fdafdd9-7767-4221-89fb-a613815c0cd2", 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "# Verify what PyTorch computed\n", 227 | "print(f'a.weight.grad =\\n{a.weight.grad}')" 228 | ] 229 | } 230 | ], 231 | "metadata": { 232 | "kernelspec": { 233 | "display_name": "Python 3 (ipykernel)", 234 | "language": "python", 235 | "name": "python3" 236 | }, 237 | "language_info": { 238 | "codemirror_mode": { 239 | "name": "ipython", 240 | "version": 3 241 | }, 242 | "file_extension": ".py", 243 | "mimetype": "text/x-python", 244 | "name": "python", 245 | "nbconvert_exporter": "python", 246 | "pygments_lexer": "ipython3", 247 | "version": "3.10.0" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 5 252 | } 253 | --------------------------------------------------------------------------------