├── .github └── workflows │ └── deploy.yml ├── .gitignore ├── 404.md ├── LICENSE ├── Makefile ├── README.md ├── _assets ├── CAM_bm.jpg ├── ENS_logo.jpg ├── X_logo.png ├── dataflowr_logo.png ├── dataflowr_violet_plain_square.png ├── favicon.png ├── hamburger.svg ├── mnist_disentangled.gif ├── rndimg.jpg └── scripts │ ├── generate_results.jl │ ├── output │ ├── script1.out │ └── script2.svg │ ├── script1.jl │ └── script2.jl ├── _css ├── custom.css ├── franklin.css └── poole_hyde.css ├── _layout ├── foot.html ├── foot_highlight.html ├── foot_katex.html ├── head.html ├── head_highlight.html ├── head_katex.html ├── page_foot.html ├── sidebar.html ├── style.html └── tag.html ├── _libs ├── highlight │ ├── github.min.css │ └── highlight.pack.js └── katex │ ├── auto-render.min.js │ ├── fonts │ ├── KaTeX_AMS-Regular.ttf │ ├── KaTeX_AMS-Regular.woff │ ├── KaTeX_AMS-Regular.woff2 │ ├── KaTeX_Caligraphic-Bold.ttf │ ├── KaTeX_Caligraphic-Bold.woff │ ├── KaTeX_Caligraphic-Bold.woff2 │ ├── KaTeX_Caligraphic-Regular.ttf │ ├── KaTeX_Caligraphic-Regular.woff │ ├── KaTeX_Caligraphic-Regular.woff2 │ ├── KaTeX_Fraktur-Bold.ttf │ ├── KaTeX_Fraktur-Bold.woff │ ├── KaTeX_Fraktur-Bold.woff2 │ ├── KaTeX_Fraktur-Regular.ttf │ ├── KaTeX_Fraktur-Regular.woff │ ├── KaTeX_Fraktur-Regular.woff2 │ ├── KaTeX_Main-Bold.ttf │ ├── KaTeX_Main-Bold.woff │ ├── KaTeX_Main-Bold.woff2 │ ├── KaTeX_Main-BoldItalic.ttf │ ├── KaTeX_Main-BoldItalic.woff │ ├── KaTeX_Main-BoldItalic.woff2 │ ├── KaTeX_Main-Italic.ttf │ ├── KaTeX_Main-Italic.woff │ ├── KaTeX_Main-Italic.woff2 │ ├── KaTeX_Main-Regular.ttf │ ├── KaTeX_Main-Regular.woff │ ├── KaTeX_Main-Regular.woff2 │ ├── KaTeX_Math-BoldItalic.ttf │ ├── KaTeX_Math-BoldItalic.woff │ ├── KaTeX_Math-BoldItalic.woff2 │ ├── KaTeX_Math-Italic.ttf │ ├── KaTeX_Math-Italic.woff │ ├── KaTeX_Math-Italic.woff2 │ ├── KaTeX_SansSerif-Bold.ttf │ ├── KaTeX_SansSerif-Bold.woff │ ├── KaTeX_SansSerif-Bold.woff2 │ ├── KaTeX_SansSerif-Italic.ttf │ ├── KaTeX_SansSerif-Italic.woff │ ├── KaTeX_SansSerif-Italic.woff2 │ ├── KaTeX_SansSerif-Regular.ttf │ ├── KaTeX_SansSerif-Regular.woff │ ├── KaTeX_SansSerif-Regular.woff2 │ ├── KaTeX_Script-Regular.ttf │ ├── KaTeX_Script-Regular.woff │ ├── KaTeX_Script-Regular.woff2 │ ├── KaTeX_Size1-Regular.ttf │ ├── KaTeX_Size1-Regular.woff │ ├── KaTeX_Size1-Regular.woff2 │ ├── KaTeX_Size2-Regular.ttf │ ├── KaTeX_Size2-Regular.woff │ ├── KaTeX_Size2-Regular.woff2 │ ├── KaTeX_Size3-Regular.ttf │ ├── KaTeX_Size3-Regular.woff │ ├── KaTeX_Size3-Regular.woff2 │ ├── KaTeX_Size4-Regular.ttf │ ├── KaTeX_Size4-Regular.woff │ ├── KaTeX_Size4-Regular.woff2 │ ├── KaTeX_Typewriter-Regular.ttf │ ├── KaTeX_Typewriter-Regular.woff │ └── KaTeX_Typewriter-Regular.woff2 │ ├── katex.min.css │ └── katex.min.js ├── config.md ├── homework ├── 1-mlp-from-scratch.md ├── 2-CAM-adversarial.md └── 3-VAE.md ├── index.md ├── modules ├── 0-julia-setup.md ├── 0-sotfware-installation.md ├── 1-intro-general-overview.md ├── 10-generative-adversarial-networks.md ├── 11a-recurrent-neural-networks-theory.md ├── 11b-recurrent-neural-networks-practice.md ├── 11c-batches-with-sequences.md ├── 12-attention.md ├── 12-intro-julia.md ├── 13-siamese.md ├── 14a-depth.md ├── 14b-depth.md ├── 15-dropout.md ├── 16-batchnorm.md ├── 17-resnets.md ├── 18a-energy.md ├── 18b-diffusion.md ├── 19-clip.md ├── 2a-pytorch-tensors.md ├── 2b-automatic-differentiation.md ├── 2c-jax.md ├── 3-loss-functions-for-classification.md ├── 4-optimization-for-deep-learning.md ├── 5-stacking-layers.md ├── 6-convolutional-neural-network.md ├── 7-dataloading.md ├── 8a-embedding-layers.md ├── 8b-collaborative-filtering.md ├── 8c-word2vec.md ├── 9a-autoencoders.md ├── 9b-unet.md ├── 9c-flows.md ├── extras │ ├── Convolutions_first.md │ ├── GCN_inductivebias_spectral.md │ ├── GCN_inductivebias_spectral_files │ │ ├── GCN_inductivebias_spectral_10_0.png │ │ ├── GCN_inductivebias_spectral_13_0.png │ │ ├── GCN_inductivebias_spectral_17_0.png │ │ ├── GCN_inductivebias_spectral_19_0.png │ │ ├── GCN_inductivebias_spectral_23_0.png │ │ ├── GCN_inductivebias_spectral_27_0.png │ │ ├── GCN_inductivebias_spectral_29_0.png │ │ ├── GCN_inductivebias_spectral_31_0.png │ │ ├── GCN_inductivebias_spectral_33_0.png │ │ ├── GCN_inductivebias_spectral_35_0.png │ │ ├── GCN_inductivebias_spectral_37_1.png │ │ ├── GCN_inductivebias_spectral_39_0.png │ │ └── GCN_inductivebias_spectral_7_0.png │ ├── ODIN │ │ └── original_optimal_shade.png │ ├── attention │ │ ├── attention_bahdanau.png │ │ ├── attention_matrix.png │ │ ├── attention_matrix2.png │ │ ├── attention_translate.jpeg │ │ ├── block_transformer.png │ │ ├── transformer_block_nocode.png │ │ └── transformer_vizu.gif │ ├── clip │ │ └── diagram.png │ ├── conv_files │ │ ├── deeplabcityscape.gif │ │ ├── jl_conv.gif │ │ ├── jl_grad.gif │ │ ├── target_plot.png │ │ └── training_plot.png │ ├── diffusions │ │ ├── ddpm.png │ │ ├── diffusion_finetuning.png │ │ ├── energy.png │ │ ├── horses.png │ │ ├── mnist_diffusion.gif │ │ ├── mnist_result.png │ │ ├── ships.png │ │ └── trucks.png │ ├── flows │ │ ├── Real_NVP.png │ │ └── three-generative-models.png │ ├── graph_invariant.md │ ├── graph_invariant │ │ └── Prairie.pdf │ ├── invariant_equivariant.md │ ├── jupyterlab.md │ ├── nerf │ │ └── pipeline.jpg │ └── unet │ │ └── unet.png ├── graph0.md ├── graph1.md ├── graph2.md ├── graph3.md └── privacy-preserving-ML.md ├── notebooks_md ├── 01_intro.md └── 02a_basics.md ├── utils.jl └── youtube_videos.jl /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Build and Deploy 2 | on: 3 | push: 4 | # NOTE: For a **project** site (username.github.io/project/), push things 5 | # to the **master** branch and make sure to set the line below to 6 | # `- master`; also, at the end of the file, change to `BRANCH: gh-pages` 7 | # For a **personal** site (username.github.io/), push things to a **dev** 8 | # branch and make sure to set the line below to `- dev` this is because 9 | # for user pages GitHub pages **requires** the deployment to be on the 10 | # master branch; also, at the end of the file, change to `BRANCH: master` 11 | branches: 12 | - master 13 | jobs: 14 | build-and-deploy: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Set Actions Allow Unsecure Commands 18 | run: | 19 | echo "ACTIONS_ALLOW_UNSECURE_COMMANDS=true" >> $GITHUB_ENV 20 | - name: Checkout 21 | uses: actions/checkout@v2 22 | with: 23 | persist-credentials: false 24 | - name: Install SSH Client 25 | uses: webfactory/ssh-agent@v0.2.0 26 | with: 27 | ssh-private-key: ${{ secrets.FRANKLIN_PRIV }} 28 | # Python is necessary for pre-rendering steps as well as to install 29 | # matplotlib which is necessary if you intend to use PyPlot. If you do 30 | # not, then you can remove the `run: pip install matplotlib` line. 31 | #- name: Install python 32 | # uses: actions/setup-python@v1 33 | # with: 34 | # python-version: '3.x' 35 | #- run: pip install matplotlib # if you use PyPlot this is needed 36 | - name: Install Julia 37 | uses: julia-actions/setup-julia@v1 38 | with: 39 | version: 1.7 40 | # This ensures that NodeJS and Franklin are loaded then it installs 41 | # highlight.js which is needed for the prerendering step. 42 | # Then the environment is activated and instantiated to install all 43 | # Julia packages which may be required to successfully build your site. 44 | # NOTE: the last line should be `optimize()`, you may want to give it 45 | # specific arguments, see the documentation or ?optimize in the REPL. 46 | - run: julia -e ' 47 | using Pkg; Pkg.add(["NodeJS", "JSON", "Franklin"]); 48 | using NodeJS; run(`$(npm_cmd()) install highlight.js`); 49 | using Franklin; 50 | Pkg.activate("."); Pkg.instantiate(); 51 | optimize()' 52 | - name: Build and Deploy 53 | uses: JamesIves/github-pages-deploy-action@releases/v3 54 | with: 55 | SSH: true 56 | # Set this to `BRANCH: gh-pages` for a **project** page and to 57 | # `BRANCH: master` for a **personal** page 58 | BRANCH: gh-pages 59 | FOLDER: __site 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __site/ 2 | -------------------------------------------------------------------------------- /404.md: -------------------------------------------------------------------------------- 1 | # 404: File not found 2 | 3 | The requested file was not found. 4 | 5 | Please [click here](/) to go to the home page, or have a look at the 6 | website modules below. 7 | 8 | ## Modules 9 | 10 | - [Module 1: Introduction & General Overview](./modules/1-intro-general-overview) 11 | - [Module 2a: Pytorch Tensors](./modules/2a-pytorch-tensors) 12 | - [Module 2b: Automatic Differentiation](./modules/2b-automatic-differentiation) 13 | - [Module 3: Loss functions for classification](./modules/3-loss-functions-for-classification) 14 | - [Module 4: Optimization for Deep Learning](./modules/4-optimization-for-deep-learning) 15 | - [Module 5: Stacking layers](./modules/5-stacking-layers) 16 | - [Module 6: Convolutional Neural Network](./modules/6-convolutional-neural-network) 17 | - [Module 7: Dataloading](./modules/7-dataloading.md) 18 | - [Module 8a: Embedding layers](./modules/8a-embedding-layers.md) 19 | - [Module 8b: Collaborative Filtering](./modules/8b-collaborative-filtering.md) 20 | - [Modules 9: Autoencoders](./modules/9-autoencoders) 21 | - [Module 10: Generative Adversarial Networks](./modules/10-generative-adversarial-networks) 22 | - [Module 11a: Recurrent Neural Networks theory](./modules/11a-recurrent-neural-networks-theory) 23 | - [Module 11b: Recurrent Neural Networks practice](./modules/11b-recurrent-neural-networks-practice) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install: 2 | julia -O0 -e 'import Pkg; Pkg.add("Franklin"); Pkg.add("JSON")' 3 | 4 | serve: 5 | julia -O0 -e 'using Franklin; serve(clear=true)' 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning Do It Yourself 2 | 3 | This site collects resources to learn Deep Learning in the form of Modules 4 | available through the sidebar on the left. As a student, you can walk 5 | through the modules at your own pace and interact with others thanks 6 | to the associated digital platforms. Then we hope you'll become a 7 | contributor by adding modules to this site! 8 | 9 | ## Setup to run the website locally 10 | 11 | 1. Install [Julia](https://julialang.org/downloads/) (make sure to 'add to PATH' so you can use step 2 without having to type the full path) 12 | 2. Launch Julia from the command line: 13 | 14 | ``` text 15 | julia 16 | ``` 17 | 18 | 3. Install the required packages: 19 | 20 | ``` julia 21 | using Pkg 22 | Pkg.add("Franklin") 23 | Pkg.add("JSON") 24 | ``` 25 | 26 | 4. Serve the website (on [localhost:8000](http://localhost:8000)): 27 | 28 | ``` julia 29 | using Franklin 30 | serve() 31 | ``` 32 | 33 | Note: steps 3 and 4 are automated in the [Makefile](./Makefile), 34 | so you can just run `make install` and `make serve`. 35 | -------------------------------------------------------------------------------- /_assets/CAM_bm.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/_assets/CAM_bm.jpg -------------------------------------------------------------------------------- /_assets/ENS_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/_assets/ENS_logo.jpg -------------------------------------------------------------------------------- /_assets/X_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/_assets/X_logo.png -------------------------------------------------------------------------------- /_assets/dataflowr_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/_assets/dataflowr_logo.png -------------------------------------------------------------------------------- /_assets/dataflowr_violet_plain_square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/_assets/dataflowr_violet_plain_square.png -------------------------------------------------------------------------------- /_assets/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/_assets/favicon.png -------------------------------------------------------------------------------- /_assets/hamburger.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /_assets/mnist_disentangled.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/_assets/mnist_disentangled.gif -------------------------------------------------------------------------------- /_assets/rndimg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/_assets/rndimg.jpg -------------------------------------------------------------------------------- /_assets/scripts/generate_results.jl: -------------------------------------------------------------------------------- 1 | # Parent file to run all scripts which may generate 2 | # some output that you want to display on the website. 3 | # this can be used as a tester to check that all the code 4 | # on your website runs properly. 5 | 6 | dir = @__DIR__ 7 | 8 | """ 9 | genplain(s) 10 | 11 | Small helper function to run some code and redirect the output (stdout) to a file. 12 | """ 13 | function genplain(s::String) 14 | open(joinpath(dir, "output", "$(splitext(s)[1]).txt"), "w") do outf 15 | redirect_stdout(outf) do 16 | include(joinpath(dir, s)) 17 | end 18 | end 19 | end 20 | 21 | # output 22 | 23 | genplain("script1.jl") 24 | 25 | # plots 26 | 27 | include("script2.jl") 28 | -------------------------------------------------------------------------------- /_assets/scripts/output/script1.out: -------------------------------------------------------------------------------- 1 | *---------1.3 2 | **--------1.3 3 | ***-------1.3 4 | ****------1.3 5 | *****-----1.3 6 | -------------------------------------------------------------------------------- /_assets/scripts/script1.jl: -------------------------------------------------------------------------------- 1 | using LinearAlgebra # HIDE 2 | using Random:seed! # HIDE 3 | seed!(0) # HIDE 4 | # HIDE 5 | x = randn(5) 6 | y = randn(5) 7 | 8 | for i in 1:5 9 | println(rpad("*"^i, 10, '-'), round(dot(x, y), digits=1)) 10 | end 11 | -------------------------------------------------------------------------------- /_assets/scripts/script2.jl: -------------------------------------------------------------------------------- 1 | using PyPlot 2 | x = range(0, stop=1, length=50) 3 | plot(x, sin.(2x).*exp.(-x/3)) 4 | savefig(joinpath(@__DIR__, "output", "script2.svg")) 5 | -------------------------------------------------------------------------------- /_css/custom.css: -------------------------------------------------------------------------------- 1 | @import url('https://fonts.googleapis.com/css2?family=PT+Sans+Caption:wght@700&family=PT+Sans:ital,wght@0,400;0,700;1,400;1,700&display=swap'); 2 | 3 | h1 { 4 | border-bottom: 1px solid #999 !important; 5 | margin-bottom: 1em !important; 6 | } 7 | 8 | h1, h2 { 9 | color: #555; 10 | font-family: 'PT Sans Caption'; 11 | font-weight: bold; 12 | } 13 | 14 | html, body { 15 | font-family: 'PT Sans', sans-serif; 16 | font-size: 18px; 17 | } 18 | 19 | 20 | .sidebar { 21 | z-index: 2; 22 | overflow-y: auto; 23 | overflow-x: hidden; 24 | } 25 | 26 | .content { 27 | padding-top: 0; 28 | } 29 | 30 | .frankin-content { 31 | margin-top: 2em; 32 | } 33 | .franklin-content pre > code { 34 | border: 1px solid #6B8E2340; 35 | padding: 1.5em; 36 | border-radius: 0; 37 | } 38 | pre.code-output { 39 | background: #6B8E2340; 40 | padding-left: 0.5em; 41 | } 42 | 43 | .code-output code { 44 | border-radius: 0; 45 | padding-left: 1em !important; 46 | background: white; 47 | } 48 | 49 | .docstring { 50 | border: 1px solid #f1f1f1; 51 | padding: 1em; 52 | margin-bottom: 2em; 53 | } 54 | 55 | .franklin-content .docstring h2.doc-header { 56 | font-size: 1.25em; 57 | color: #008080; 58 | margin: 16px 0; 59 | } 60 | 61 | .franklin-content .docstring h1 { 62 | font-size: 1.2em; 63 | } 64 | .franklin-content .docstring h2 { 65 | font-size: 1em; 66 | } 67 | 68 | .docstring .doc-type { 69 | font-size: 1em; 70 | color: #6B8E2350; 71 | float: right; 72 | } 73 | 74 | .franklin-content h1 { 75 | margin-bottom: 0; 76 | font-size: 1.66em; 77 | border: none; 78 | } 79 | .franklin-content h2 { 80 | border: none; 81 | font-size: 1.33em; 82 | font-style: normal; 83 | font-weight: bold; 84 | } 85 | 86 | .sidebar-about h1 { 87 | font-family: 'Recursive', sans-serif; 88 | border-bottom: none !important; 89 | } 90 | 91 | @media (min-width: 768px) { 92 | .sidebar-nav { 93 | } 94 | } 95 | .sidebar .container.sidebar-sticky { 96 | top: 1rem; 97 | } 98 | .sidebar-nav .sidebar-nav-item.active { 99 | box-sizing: border-box; 100 | background: #f0f8ff40; 101 | margin-right: -1em; 102 | margin-left: -0.5em; 103 | padding-left: 0.5em; 104 | color: white; 105 | font-weight: normal !important; 106 | } 107 | .sidebar-nav .sidebar-nav-item { 108 | color: #cccccc; 109 | margin: 0.25em 0; 110 | } 111 | 112 | .sidebar-nav .week { 113 | font-style: italic; 114 | display: flex; 115 | align-items: center; 116 | color: #c08282; 117 | } 118 | .sidebar-nav .week::before, 119 | .sidebar-nav .week::after { 120 | content: " "; 121 | flex: 1; 122 | border-bottom: 1px dashed #c08282; 123 | } 124 | .sidebar-nav .week::before { 125 | margin-right: .25em; 126 | } 127 | .sidebar-nav .week::after { 128 | margin-left: .25em; 129 | } 130 | 131 | h1 { 132 | margin-top: 2em; 133 | } 134 | h2 { 135 | margin-top: 1em; 136 | } 137 | 138 | 139 | .page-foot img.github-logo { 140 | width: 1em; 141 | height: 1em; 142 | margin: 0; 143 | padding: 0; 144 | display: inline-block; 145 | margin-bottom: -2px; 146 | } 147 | 148 | .page-foot { 149 | margin-bottom: 0; 150 | } 151 | 152 | .colbox-blue{background-color:#EEF3F5;padding-top:5px;padding-right:10px;padding-left:10px;padding-bottom:5px;margin-left:5px;margin-top:5px;margin-bottom:5px;border-radius:0 10px 10px 0;border-left:5px solid #4C9CF1} -------------------------------------------------------------------------------- /_css/franklin.css: -------------------------------------------------------------------------------- 1 | /* ================================================================== 2 | DEFAULT FONT AND LAYOUT 3 | ================================================================== */ 4 | 5 | html { 6 | font-family: Helvetica, Arial, sans-serif; 7 | font-size : 17px; 8 | color: #1c1c1c; 9 | } 10 | 11 | /* ================================================================== 12 | BASIC GRID FOR PROFILE PIC 13 | ================================================================== */ 14 | 15 | .franklin-content .row { 16 | display: block; } 17 | 18 | .franklin-content .left { 19 | float: left; 20 | margin-right: 15px; } 21 | 22 | .franklin-content .right { 23 | float: right; } 24 | 25 | .franklin-content .container img { 26 | width: auto; 27 | padding-left: 0; 28 | border-radius: 10px; } 29 | 30 | .franklin-content .footnote { 31 | position: relative; 32 | top: -0.5em; 33 | font-size: 70%; 34 | } 35 | 36 | /* ================================================================== 37 | FOOT / COPYRIGHT 38 | ================================================================== */ 39 | 40 | .franklin-content .page-foot a { 41 | text-decoration: none; 42 | color: #a6a2a0; 43 | text-decoration: underline; } 44 | 45 | .page-foot { 46 | font-size: 80%; 47 | font-family: Arial, serif; 48 | color: #a6a2a0; 49 | text-align: center; 50 | margin-top: 6em; 51 | border-top: 1px solid lightgrey; 52 | padding-top: 2em; 53 | margin-bottom: 4em; } 54 | 55 | /* ================================================================== 56 | TEXT GEOMETRY 57 | ================================================================== */ 58 | 59 | .franklin-content { 60 | position: relative; 61 | padding-left: 12.5%; 62 | padding-right: 12.5%; 63 | line-height: 1.35em; } 64 | 65 | /* on wide screens, fix content width to a max value */ 66 | @media (min-width: 940px) { 67 | .franklin-content { 68 | width: 705px; 69 | margin-left: auto; 70 | margin-right: auto; } 71 | } 72 | 73 | /* on narrow device, reduce margins */ 74 | @media (max-width: 480px) { 75 | .franklin-content { 76 | padding-left: 6%; 77 | padding-right: 6%; } 78 | } 79 | 80 | /* ================================================================== 81 | TITLES 82 | ================================================================== */ 83 | 84 | .franklin-content h1 { font-size: 24px; } 85 | .franklin-content h2 { font-size: 22px; } 86 | .franklin-content h3 { font-size: 20px; } 87 | 88 | .franklin-content h1, h2, h3, h4, h5, h6 { 89 | text-align: left; } 90 | 91 | .franklin-content h1 { 92 | padding-bottom: 0.5em; 93 | border-bottom: 3px double lightgrey; 94 | margin-top: 1.5em; 95 | margin-bottom: 1em; } 96 | 97 | .franklin-content h2 { 98 | padding-bottom: 0.3em; 99 | border-bottom: 1px solid lightgrey; 100 | margin-top: 2em; 101 | margin-bottom: 1em; } 102 | 103 | .franklin-content h1 a { color: inherit; } 104 | .franklin-content h1 a:hover {text-decoration: none;} 105 | .franklin-content h2 a { color: inherit; } 106 | .franklin-content h2 a:hover {text-decoration: none;} 107 | .franklin-content h3 a { color: inherit; } 108 | .franklin-content h3 a:hover {text-decoration: none;} 109 | .franklin-content h4 a { color: inherit; } 110 | .franklin-content h4 a:hover {text-decoration: none;} 111 | .franklin-content h5 a { color: inherit; } 112 | .franklin-content h5 a:hover {text-decoration: none;} 113 | .franklin-content h6 a { color: inherit; } 114 | .franklin-content h6 a:hover {text-decoration: none;} 115 | 116 | .franklin-content table { 117 | margin-left: auto; 118 | margin-right: auto; 119 | border-collapse: collapse; 120 | text-align: center;} 121 | .franklin-content th, td{ 122 | padding: 10px; 123 | border: 1px solid black;} 124 | 125 | .franklin-content blockquote { 126 | background: #eeeeee; 127 | border-left: 7px solid #a8a8a8; 128 | margin: 1.5em 10px; 129 | padding: 0.5em 10px; 130 | font-style: italic;} 131 | 132 | .franklin-content blockquote p { 133 | display: inline; } 134 | 135 | /* ================================================================== 136 | GENERAL FORMATTING 137 | ================================================================== */ 138 | 139 | /* spacing between bullet points */ 140 | .franklin-content li p { 141 | margin: 10px 0; } 142 | 143 | .franklin-content a { 144 | color: #004de6; 145 | text-decoration: none; } 146 | 147 | .franklin-content a:hover { 148 | text-decoration: underline; } 149 | 150 | /* ================================================================== 151 | HYPERREFS AND FOOTNOTES 152 | ================================================================== */ 153 | 154 | .franklin-content .eqref a { color: green; } 155 | .franklin-content .bibref a { color: green; } 156 | 157 | .franklin-content sup { 158 | font-size: 70%; 159 | vertical-align: super; 160 | line-height: 0; } 161 | 162 | .franklin-content table.fndef { 163 | margin: 0; 164 | margin-bottom: 10px;} 165 | .franklin-content .fndef tr, td { 166 | padding: 0; 167 | border: 0; 168 | text-align: left;} 169 | .franklin-content .fndef tr { 170 | border-left: 2px solid lightgray; 171 | } 172 | .franklin-content .fndef td.fndef-backref { 173 | vertical-align: top; 174 | font-size: 70%; 175 | padding-left: 5px;} 176 | .franklin-content .fndef td.fndef-content { 177 | font-size: 80%; 178 | padding-left: 10px;} 179 | 180 | /* ================================================================== 181 | IMAGES in CONTENT 182 | ================================================================== */ 183 | 184 | .franklin-content img { 185 | width: 70%; 186 | text-align: center; 187 | padding-left: 10%; } 188 | 189 | .franklin-content .img-small img { 190 | width: 50%; 191 | text-align: center; 192 | padding-left: 20%; } 193 | 194 | /* ================================================================== 195 | KATEX 196 | ================================================================== */ 197 | 198 | body { counter-reset: eqnum; } 199 | 200 | .katex { font-size: 1em !important; } 201 | 202 | .katex-display .katex { 203 | display: inline-block; } /* overwrite katex settings */ 204 | 205 | .katex-display::after { 206 | counter-increment: eqnum; 207 | content: "(" counter(eqnum) ")"; 208 | position: relative; 209 | float: right; 210 | padding-right: 5px; } 211 | 212 | /* ================================================================== 213 | CODE & HIGHLIGHT.JS 214 | ================================================================== */ 215 | 216 | code { 217 | background-color: rgba(27,31,35,0.05); 218 | padding: 0.1em 0.2em; 219 | border-radius: 2px; 220 | font-size: 90%; 221 | } 222 | 223 | /* .franklin-content code { */ 224 | /* background-color: rgba(27,31,35,0.05); */ 225 | /* padding: 0.1em 0.2em; */ 226 | /* border-radius: 2px; */ 227 | /* font-size: 90%; } */ 228 | 229 | .hljs { 230 | font-size: 90%; 231 | line-height: 1.35em; 232 | border-radius: 10px; } 233 | 234 | .hljs-meta, .hljs-metas, .hljs-metap { font-weight: bold; } 235 | 236 | .hljs-meta { color: rgb(25, 179, 51); } 237 | 238 | .hljs-metas { color: red; } 239 | 240 | .hljs-metap { color: rgb(51, 131, 231); } 241 | 242 | /* ================================================================== 243 | BOXES 244 | ================================================================== */ 245 | 246 | .franklin-content .colbox-blue { 247 | background-color: #EEF3F5; 248 | padding-top: 5px; 249 | padding-right: 10px; 250 | padding-left: 10px; 251 | padding-bottom: 5px; 252 | margin-left: 5px; 253 | margin-top: 5px; 254 | margin-bottom: 5px; 255 | border-radius: 0 10px 10px 0; 256 | border-left: 5px solid #4C9CF1; } 257 | -------------------------------------------------------------------------------- /_css/poole_hyde.css: -------------------------------------------------------------------------------- 1 | /* 2 | * ___ 3 | * /\_ \ 4 | * _____ ___ ___\//\ \ __ 5 | * /\ '__`\ / __`\ / __`\\ \ \ /'__`\ 6 | * \ \ \_\ \/\ \_\ \/\ \_\ \\_\ \_/\ __/ 7 | * \ \ ,__/\ \____/\ \____//\____\ \____\ 8 | * \ \ \/ \/___/ \/___/ \/____/\/____/ 9 | * \ \_\ 10 | * \/_/ 11 | * 12 | * Designed, built, and released under MIT license by @mdo. Learn more at 13 | * https://github.com/poole/poole. 14 | */ 15 | 16 | 17 | /* 18 | * Contents 19 | * 20 | * Body resets 21 | * Custom type 22 | * Messages 23 | * Container 24 | * Masthead 25 | * Posts and pages 26 | * Pagination 27 | * Reverse layout 28 | * Themes 29 | */ 30 | 31 | 32 | /* 33 | * Body resets 34 | * 35 | * Update the foundational and global aspects of the page. 36 | */ 37 | 38 | * { 39 | -webkit-box-sizing: border-box; 40 | -moz-box-sizing: border-box; 41 | box-sizing: border-box; 42 | } 43 | 44 | html, 45 | body { 46 | margin: 0; 47 | padding: 0; 48 | } 49 | 50 | html { 51 | font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; 52 | font-size: 16px; 53 | line-height: 1.5; 54 | } 55 | @media (min-width: 38em) { 56 | html { 57 | font-size: 20px; 58 | } 59 | } 60 | 61 | body { 62 | color: #515151; 63 | background-color: #fff; 64 | -webkit-text-size-adjust: 100%; 65 | -ms-text-size-adjust: 100%; 66 | } 67 | 68 | /* No `:visited` state is required by default (browsers will use `a`) */ 69 | a { 70 | color: #268bd2; 71 | text-decoration: none; 72 | } 73 | a strong { 74 | color: inherit; 75 | } 76 | /* `:focus` is linked to `:hover` for basic accessibility */ 77 | a:hover, 78 | a:focus { 79 | text-decoration: underline; 80 | } 81 | 82 | /* Headings */ 83 | h1, h2, h3, h4, h5, h6 { 84 | margin-bottom: .5rem; 85 | font-weight: bold; 86 | line-height: 1.25; 87 | color: #313131; 88 | text-rendering: optimizeLegibility; 89 | } 90 | h1 { 91 | font-size: 2rem; 92 | } 93 | h2 { 94 | margin-top: 1rem; 95 | font-size: 1.5rem; 96 | } 97 | h3 { 98 | margin-top: 1.5rem; 99 | font-size: 1.25rem; 100 | } 101 | h4, h5, h6 { 102 | margin-top: 1rem; 103 | font-size: 1rem; 104 | } 105 | 106 | /* Body text */ 107 | p { 108 | margin-top: 0; 109 | margin-bottom: 1rem; 110 | } 111 | 112 | strong { 113 | color: #303030; 114 | } 115 | 116 | 117 | /* Lists */ 118 | ul, ol, dl { 119 | margin-top: 0; 120 | margin-bottom: 1rem; 121 | } 122 | 123 | dt { 124 | font-weight: bold; 125 | } 126 | dd { 127 | margin-bottom: .5rem; 128 | } 129 | 130 | /* Misc */ 131 | hr { 132 | position: relative; 133 | margin: 1.5rem 0; 134 | border: 0; 135 | border-top: 1px solid #eee; 136 | border-bottom: 1px solid #fff; 137 | } 138 | 139 | abbr { 140 | font-size: 85%; 141 | font-weight: bold; 142 | color: #555; 143 | text-transform: uppercase; 144 | } 145 | abbr[title] { 146 | cursor: help; 147 | border-bottom: 1px dotted #e5e5e5; 148 | } 149 | 150 | /* Quotes */ 151 | blockquote { 152 | padding: .5rem 1rem; 153 | margin: .8rem 0; 154 | color: #7a7a7a; 155 | border-left: .25rem solid #e5e5e5; 156 | } 157 | blockquote p:last-child { 158 | margin-bottom: 0; 159 | } 160 | @media (min-width: 30em) { 161 | blockquote { 162 | padding-right: 5rem; 163 | padding-left: 1.25rem; 164 | } 165 | } 166 | 167 | img { 168 | display: block; 169 | max-width: 100%; 170 | margin: 0 0 1rem; 171 | border-radius: 5px; 172 | } 173 | 174 | /* Tables */ 175 | table { 176 | margin-bottom: 1rem; 177 | width: 100%; 178 | border: 1px solid #e5e5e5; 179 | border-collapse: collapse; 180 | } 181 | td, 182 | th { 183 | padding: .25rem .5rem; 184 | border: 1px solid #e5e5e5; 185 | } 186 | tbody tr:nth-child(odd) td, 187 | tbody tr:nth-child(odd) th { 188 | background-color: #f9f9f9; 189 | } 190 | 191 | /* 192 | * __ __ 193 | * /\ \ /\ \ 194 | * \ \ \___ __ __ \_\ \ __ 195 | * \ \ _ `\/\ \/\ \ /'_` \ /'__`\ 196 | * \ \ \ \ \ \ \_\ \/\ \_\ \/\ __/ 197 | * \ \_\ \_\/`____ \ \___,_\ \____\ 198 | * \/_/\/_/`/___/> \/__,_ /\/____/ 199 | * /\___/ 200 | * \/__/ 201 | * 202 | * Designed, built, and released under MIT license by @mdo. Learn more at 203 | * https://github.com/poole/hyde. 204 | */ 205 | 206 | 207 | /* 208 | * Contents 209 | * 210 | * Global resets 211 | * Sidebar 212 | * Container 213 | * Reverse layout 214 | * Themes 215 | */ 216 | 217 | 218 | /* 219 | * Global resets 220 | * 221 | * Update the foundational and global aspects of the page. 222 | */ 223 | 224 | html { 225 | font-family: "PT Sans", Helvetica, Arial, sans-serif; 226 | } 227 | 228 | /* 229 | * Sidebar 230 | * 231 | * Flexible banner for housing site name, intro, and "footer" content. Starts 232 | * out above content in mobile and later moves to the side with wider viewports. 233 | */ 234 | 235 | .sidebar { 236 | text-align: center; 237 | padding: 2rem 1rem; 238 | color: rgba(255,255,255,.5); 239 | background-color: #202020; 240 | } 241 | @media (min-width: 768px) { 242 | .sidebar { 243 | position: fixed; 244 | top: 0; 245 | left: 0; 246 | bottom: 0; 247 | width: 15rem; 248 | text-align: left; 249 | } 250 | } 251 | 252 | /* Sidebar links */ 253 | .sidebar a { 254 | color: #fff; 255 | } 256 | 257 | /* About section */ 258 | .sidebar-about h1 { 259 | color: #fff; 260 | margin-top: 0; 261 | font-family: "Abril Fatface", serif; 262 | font-size: 3.25rem; 263 | } 264 | 265 | /* Sidebar nav */ 266 | .sidebar-nav { 267 | margin-bottom: 1rem; 268 | } 269 | .sidebar-nav-item { 270 | display: block; 271 | line-height: 1.75; 272 | } 273 | a.sidebar-nav-item:hover, 274 | a.sidebar-nav-item:focus { 275 | text-decoration: underline; 276 | } 277 | .sidebar-nav-item.active { 278 | font-weight: bold; 279 | } 280 | 281 | /* Sticky sidebar 282 | * 283 | * Add the `sidebar-sticky` class to the sidebar's container to affix it the 284 | * contents to the bottom of the sidebar in tablets and up. 285 | */ 286 | 287 | @media (min-width: 768px) { 288 | .sidebar-sticky { 289 | position: absolute; 290 | right: 1rem; 291 | bottom: 1rem; 292 | left: 1rem; 293 | } 294 | } 295 | 296 | 297 | /* Container 298 | * 299 | * Align the contents of the site above the proper threshold with some margin-fu 300 | * with a 25%-wide `.sidebar`. 301 | */ 302 | 303 | .content { 304 | padding-top: 4rem; 305 | padding-bottom: 4rem; 306 | } 307 | 308 | @media (min-width: 768px) { 309 | .content { 310 | max-width: 45rem; 311 | margin-left: 15rem; 312 | margin-right: 2rem; 313 | } 314 | } 315 | -------------------------------------------------------------------------------- /_layout/foot.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {{ if hasmath }} 4 | {{ insert foot_katex.html }} 5 | {{ end }} 6 | {{ if hascode }} 7 | {{ insert foot_highlight.html }} 8 | {{ end }} 9 | 10 | 11 | -------------------------------------------------------------------------------- /_layout/foot_highlight.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /_layout/foot_katex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /_layout/head.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | {{if hasmath}} {{insert head_katex.html }} {{end}} 7 | {{if hascode}} {{insert head_highlight.html }} {{end}} 8 | {{insert style.html}} 9 | {{isdef title}} {{fill title}} {{end}} 10 | 11 | 12 | {{insert sidebar.html}} 13 | 14 | -------------------------------------------------------------------------------- /_layout/head_highlight.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /_layout/head_katex.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /_layout/page_foot.html: -------------------------------------------------------------------------------- 1 |
2 | 6 |
7 | -------------------------------------------------------------------------------- /_layout/sidebar.html: -------------------------------------------------------------------------------- 1 | 219 |
-------------------------------------------------------------------------------- /_layout/style.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 15 | 16 | -------------------------------------------------------------------------------- /_layout/tag.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | {{insert style.html}} 7 | Tag: {{fill fd_tag}} 8 | 9 | 10 | {{insert sidebar.html}} 11 |
12 |

Tag: {{fill fd_tag}}

13 | {{taglist}} 14 | {{insert page_foot.html}} 15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /_libs/highlight/github.min.css: -------------------------------------------------------------------------------- 1 | .hljs{display:block;overflow-x:auto;padding:0.5em;color:#333;background:#f8f8f8}.hljs-comment,.hljs-quote{color:#998;font-style:italic}.hljs-keyword,.hljs-selector-tag,.hljs-subst{color:#333;font-weight:bold}.hljs-literal,.hljs-number,.hljs-tag .hljs-attr,.hljs-template-variable,.hljs-variable{color:#008080}.hljs-doctag,.hljs-string{color:#d14}.hljs-section,.hljs-selector-id,.hljs-title{color:#900;font-weight:bold}.hljs-subst{font-weight:normal}.hljs-class .hljs-title,.hljs-type{color:#458;font-weight:bold}.hljs-attribute,.hljs-name,.hljs-tag{color:#000080;font-weight:normal}.hljs-link,.hljs-regexp{color:#009926}.hljs-bullet,.hljs-symbol{color:#990073}.hljs-built_in,.hljs-builtin-name{color:#0086b3}.hljs-meta{color:#999;font-weight:bold}.hljs-deletion{background:#fdd}.hljs-addition{background:#dfd}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:bold} 2 | -------------------------------------------------------------------------------- /_libs/katex/auto-render.min.js: -------------------------------------------------------------------------------- 1 | !function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t(require("katex")):"function"==typeof define&&define.amd?define(["katex"],t):"object"==typeof exports?exports.renderMathInElement=t(require("katex")):e.renderMathInElement=t(e.katex)}("undefined"!=typeof self?self:this,function(e){return function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}return r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=1)}([function(t,r){t.exports=e},function(e,t,r){"use strict";r.r(t);var n=r(0),o=r.n(n),a=function(e,t,r){for(var n=r,o=0,a=e.length;n 6 | @def title = "Dataflowr - Deep Learning DIY" 7 | @def website_descr = "Website for deep learning course" 8 | @def prepath = "website" 9 | @def website_url = "https://dataflowr.github.io/website/" 10 | 11 | @def author = "Marc Lelarge" 12 | 13 | 38 | @def use_header_img = true 39 | @def use_hero = false 40 | @def hero_width = "80%" 41 | @def hero_margin_top = "100px" 42 | 43 | @def add_github_view = true 44 | @def add_github_star = true 45 | @def github_repo = "dataflowr/website" 46 | 47 | 53 | @def section_width = 10 54 | 55 | 68 | @def header_color = "#3f6388" 69 | @def link_color = "#2669DD" 70 | @def link_hover_color = "teal" 71 | @def section_bg_color = "#f6f8fa" 72 | @def footer_link_color = "cornflowerblue" 73 | 74 | 85 | @def highlight_theme = "atom-one-dark" 86 | @def code_border_radius = "10px" 87 | @def code_output_indent = "15px" 88 | 89 | 90 | 94 | \newcommand{\note}[1]{@@note @@title ⚠ Note@@ @@content #1 @@ @@} 95 | \newcommand{\warn}[1]{@@warning @@title ⚠ Warning!@@ @@content #1 @@ @@} 96 | 97 | \newcommand{\E}{\mathbb E} 98 | \newcommand{\R}{\mathbb R} 99 | \newcommand{\Sc}{\mathcal S} 100 | \newcommand{\bx}{{\bf x}} 101 | \newcommand{\by}{{\bf y}} 102 | \newcommand{\be}{{\bf e}} 103 | \newcommand{\ba}{{\bf a}} 104 | \newcommand{\bb}{{\bf b}} 105 | \newcommand{\bv}{{\bf v}} 106 | \newcommand{\bw}{{\bf w}} 107 | 108 | 113 | @def sections = Pair{String,String}[] 114 | @def section_counter = 1 115 | @def showall = true 116 | 117 | 118 | @def mintoclevel = 2 119 | 120 | 125 | @def ignore = ["node_modules/", "franklin", "franklin.pub"] 126 | 127 | 133 | \newcommand{\R}{\mathbb R} 134 | \newcommand{\scal}[1]{\langle #1 \rangle} 135 | \newcommand{\blurb}[1]{~~~

~~~#1~~~

~~~} 136 | -------------------------------------------------------------------------------- /homework/1-mlp-from-scratch.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["module0"] 2 | 3 | # Homework 1: MLP from scratch 4 | 5 | Homework 1 is in the form of a jupyter notebook. You must complete it and submit it on moodle (for students enrolled on this course). 6 | 7 | [The Jupyter notebook](https://github.com/dataflowr/notebooks/blob/master/HW1/hw1_mlp.ipynb) 8 | 9 | This homework will run fine on regular CPU (no need for GPU). If you want to run it locally (on your laptop), you can follow the procedure described in [Module 0](/module0/). Note that if you cloned the [GitHub repository](https://github.com/dataflowr/notebooks), the homework will be in the folder `/notebooks/HW1` 10 | -------------------------------------------------------------------------------- /homework/2-CAM-adversarial.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["module0"] 2 | 3 | # Homework 2: Class Activation Map and adversarial examples 4 | 5 | ## Can you see the cat below? No? Have a look at the code ;-) 6 | 7 | ~~~ 9 | ~~~ 10 | 11 | Homework 2 is in the form of a jupyter notebook. You must complete it and submit it on moodle (for students enrolled on this course). 12 | 13 | [The Jupyter notebook](https://github.com/dataflowr/notebooks/blob/master/HW2/HW2_CAM_Adversarial.ipynb) 14 | 15 | This homework will run fine on regular CPU (no need for GPU). If you want to run it locally (on your laptop), you can follow the procedure described in [Module 0](/module0/). Note that if you cloned the [GitHub repository](https://github.com/dataflowr/notebooks), the homework will be in the folder `/notebooks/HW2` 16 | -------------------------------------------------------------------------------- /homework/3-VAE.md: -------------------------------------------------------------------------------- 1 | # Homework 3: VAE for MNIST clustering and generation 2 | 3 | ~~~ 5 | ~~~ 6 | 7 | [Image source](https://github.com/Schlumberger/joint-vae) 8 | 9 | Homework 3 is in the form of a jupyter notebook. You must complete it and submit it on moodle (for students enrolled on this course). 10 | 11 | [The Jupyter notebook](https://github.com/dataflowr/notebooks/blob/master/HW3/VAE_clustering_empty.ipynb) -------------------------------------------------------------------------------- /modules/0-julia-setup.md: -------------------------------------------------------------------------------- 1 | # Projects in Julia 2 | 3 | Even for a personal project, we recommend to make a simple Julia package (like in python you use virtual environment). This is a simple tutorial to help you coding an app in Julia. 4 | 5 | ## Prerequisite 6 | 7 | You need to have [Julia](https://julialang.org/downloads/) installed and a [GitHub](https://docs.github.com/en/get-started/signing-up-for-github/signing-up-for-a-new-github-account) account. 8 | 9 | ## Creating the Julia Package 10 | 11 | We'll be using [PkgSkeleton.jl](https://github.com/tpapp/PkgSkeleton.jl) which allows to simplify the creation of packages. First check your git configuration (as it will be used to create the package) with: 12 | ``` 13 | git config --list 14 | ``` 15 | You should see your `user.name`, your `user.email` and `github.user`, if not then use for example: 16 | ``` 17 | git config --global user.name "firstname lastname" 18 | git config --global user.email "bla.bla@domain.ext" 19 | git config --global github.user "ghuser" 20 | ``` 21 | 22 | Now, I describe the steps used to make a package called [KalmanAD.jl](https://github.com/mlelarge/KalmanAD.jl), you need to replace `KalmanAD.jl` by the name of your package and `mlelarge` by your `github.user`! This particular package has nothing specific but since naming (with extension `.jl` for example) can be a little bit tricky, if you have any doubt, you can have a look at the package on GitHub to see how things are organized... 23 | 24 | Move to the folder where you want to create your package: we will use Julia to create a folder `KalmanAD.jl` with the right structure. For this, start Julia and enter the package installation mode by typing `]` then run (to exit the `pkg>` mode just type on Backspace) 25 | ``` 26 | pkg> add PkgSkeleton 27 | julia> using PkgSkeleton 28 | julia> PkgSkeleton.generate("KalmanAD.jl") 29 | ``` 30 | Now, when your exit Julia, you should have a folder `KalmanAD.jl` with some files and folders in it. 31 | 32 | ## Connecting to GitHub 33 | 34 | Go on GitHub.com and create a repository with the same name: `KalmanAD.jl` by following [these steps](https://docs.github.com/en/get-started/quickstart/create-a-repo) (make it public). 35 | 36 | On your computer, inside your `KalmanAD.jl` run the following commands: 37 | ``` 38 | git add . && git commit -m "initial commit" 39 | git remote add origin git@github.com:mlelarge/KalmanAD.jl.git 40 | git branch -M main 41 | git push -u origin main 42 | ``` 43 | The first command add the files created by `PkgSkeleton` and commit them. The last 3 commands are connecting your git repo to GitHub (of course you need to replace `mlelarge` and `KalmanAD.jl` by the appropriate values). Now, you should see on your GitHub account the repository you created on your computer. 44 | 45 | ## Start coding 46 | 47 | You have 3 sub-folders in your Package `docs`, `src` and `test`. Your code should be in the `src` subfolder. In particular, in the `src` folder there should be a file with the same name as the package, (i.e. `KalmanAD.jl` here) and this file contains: 48 | ``` 49 | module KalmanAD 50 | end #module 51 | ``` 52 | This file will need to be modified as it will define the `module` of your package. 53 | 54 | Now, when you start coding, you will use other Julia packages. For example, you can see in [KalmanAD.jl](https://github.com/mlelarge/KalmanAD.jl/blob/main/src/KalmanAD.jl) that I am using the package `LinearAlgebra`. So I need to add it as a dependency of my own package (a bit like a virtual env in python), to do so you need to run Julia and activate the environment of your package with the command: 55 | `julia --project`. Now if you type `]`, you should see: 56 | ``` 57 | (KalmanAD) pkg> 58 | ``` 59 | and you can now add the packages you need, for example: 60 | ``` 61 | (KalmanAD) pkg> add LinearAlgebra 62 | ``` 63 | as a result this will modify (automatically) the files `Project.toml` and `Manifest.toml`. Next time, you commit, do not forget to add these files. 64 | 65 | ## Start testing 66 | 67 | In the `test` folder, there is a `runtest.jl` file that should contain the tests for your package. You need to use the macro `@test` which will raise an error if the expression does not evaluate to `true`. 68 | In order to run your tests, you can use: 69 | ``` 70 | julia --project test/runtest.jl 71 | ``` 72 | 73 | ## Start documenting 74 | 75 | TBD -------------------------------------------------------------------------------- /modules/0-sotfware-installation.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | ## Running the notebooks locally 4 | 5 | To run the [notebooks](https://github.com/dataflowr/notebooks) locally, we recommend the following procedure: 6 | 7 | - First clone the GitHub repository containing the notebooks. The following command will create a directory `notebooks` with all the files from the repository inside: 8 | ``` 9 | $ git clone https://github.com/dataflowr/notebooks.git 10 | ``` 11 | 12 | - Then, create a [virtual environment](https://docs.python.org/3/tutorial/venv.html): the following command will create a directory `dldiy` and also create directories inside it (so you might want to create this directory inside `/notebooks`) 13 | ``` 14 | $ python3 -m venv dldiy 15 | ``` 16 | - Activate the virtual environment: 17 | ``` 18 | $ source dldiy/bin/activate 19 | ``` 20 | - In order to be able to [use this virtual environment with your jupyter notebook](https://anbasile.github.io/posts/2017-06-25-jupyter-venv/), you need to add a kernel. Inside your environment, install first `ipykernel` 21 | ``` 22 | (dldiy)$ pip install ipykernel 23 | (dldiy)$ ipython kernel install --user --name=dldiy 24 | ``` 25 | - Now, you need all the relevant packages in your virtual environment. 26 | ``` 27 | (dldiy)$ cd notebooks 28 | (dldiy)/notebooks$ pip install -r requirements.txt 29 | ``` 30 | - You ae all set! If you launch `jupyter notebook`, you should be able to change the kernel to `dldiy`. 31 | 32 | ### tl;dr 33 | 34 | ``` 35 | $ git clone https://github.com/dataflowr/notebooks.git 36 | $ python3 -m venv dldiy 37 | $ source dldiy/bin/activate 38 | (dldiy)$ pip install ipykernel 39 | (dldiy)$ ipython kernel install --user --name=dldiy 40 | (dldiy)$ cd notebooks 41 | (dldiy)/notebooks$ pip install -r requirements.txt 42 | (dldiy)/notebooks$ jupyter notebook 43 | ``` 44 | -------------------------------------------------------------------------------- /modules/1-intro-general-overview.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["course-intro"] 2 | 3 | # Module 1 - Introduction & General Overview 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | 10 | ## Introduction & General Overview 11 | 12 | {{youtube_placeholder course-intro}} 13 | 14 | {{yt_tsp 0 0 Intro}} 15 | {{yt_tsp 31 0 Goal of this lecture}} 16 | {{yt_tsp 128 0 What is deep learning?}} 17 | {{yt_tsp 426 0 Why deep learning now?}} 18 | {{yt_tsp 573 0 Deep learning pipeline}} 19 | {{yt_tsp 737 0 General overview}} 20 | {{yt_tsp 962 0 Organization of the course}} 21 | {{yt_tsp 1104 0 A first example in Colab (setting)}} 22 | {{yt_tsp 1175 0 Dogs vs cats (data wrangling)}} 23 | {{yt_tsp 1550 0 Data processing (dataset and dataloader)}} 24 | {{yt_tsp 2451 0 VGG model}} 25 | {{yt_tsp 2755 0 Modifying the last layer}} 26 | {{yt_tsp 2990 0 Choosing your loss and optimizer for training}} 27 | {{yt_tsp 3460 0 Precomputing features}} 28 | {{yt_tsp 3819 0 Qualitative analysis}} 29 | 30 | ## Slides and Notebook 31 | 32 | - [slides](https://dataflowr.github.io/slides/module1.html) 33 | - :warning: Dogs and Cats with VGG: [static notebook](/notebooks_md/01_intro), [code (GitHub)](https://github.com/dataflowr/notebooks/blob/master/Module1/01_intro.ipynb) or running in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module1/01_intro.ipynb) GPU is required for this notebook :warning: 34 | 35 | ## Practicals 36 | 37 | - :warning: [More dogs and cats with VGG and resnet](https://github.com/dataflowr/notebooks/blob/master/Module1/01_practical_empty.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module1/01_practical_empty.ipynb) GPU is required for this notebook :warning: 38 | 39 | -------------------------------------------------------------------------------- /modules/10-generative-adversarial-networks.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["gan"] 2 | 3 | # Module 10 - Generative Adversarial Networks 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Generative Adversarial Networks 12 | 13 | {{youtube_placeholder gan}} 14 | {{yt_tsp 0 0 Recap}} 15 | {{yt_tsp 15 0 Presentation of GANs}} 16 | {{yt_tsp 109 0 GAN learning}} 17 | {{yt_tsp 253 0 Learning the discriminator}} 18 | {{yt_tsp 376 0 Learning the generator}} 19 | {{yt_tsp 445 0 A trick for learning the generator}} 20 | {{yt_tsp 600 0 GAN for 2d-point clouds}} 21 | {{yt_tsp 711 0 Training loop in PyTorch}} 22 | {{yt_tsp 908 0 Loss curves}} 23 | {{yt_tsp 972 0 Generation with GANs}} 24 | {{yt_tsp 1035 0 Mode collapse}} 25 | {{yt_tsp 1200 0 Conditional GAN}} 26 | {{yt_tsp 1275 0 InfoGAN}} 27 | {{yt_tsp 1374 0 Deep convolutional GAN}} 28 | {{yt_tsp 1545 0 Practicals}} 29 | {{yt_tsp 1718 0 Non convergence for GANs}} 30 | {{yt_tsp 1980 0 Coding a conditional GAN}} 31 | {{yt_tsp 2353 0 Coding an InfoGAN}} 32 | {{yt_tsp 2615 0 Examples of failures}} 33 | 34 | ## Slides 35 | 36 | - [slides](https://dataflowr.github.io/slides/module10.html) 37 | 38 | ## Practicals 39 | 40 | - [Conditional GAN and InfoGAN](https://github.com/dataflowr/notebooks/blob/master/Module10/10_GAN_double_moon.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module10/10_GAN_double_moon.ipynb) 41 | -------------------------------------------------------------------------------- /modules/11a-recurrent-neural-networks-theory.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["rnn-1"] 2 | 3 | # Module 11a - Recurrent Neural Networks theory 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | 10 | ## Theory of RNNs 11 | 12 | {{youtube_placeholder rnn-1}} 13 | {{yt_tsp 0 0 Recap}} 14 | {{yt_tsp 52 0 Introduction to RNNs}} 15 | {{yt_tsp 77 0 1D convolutional networks for sequences}} 16 | {{yt_tsp 136 0 Various tasks for RNNs}} 17 | {{yt_tsp 315 0 Theory of RNN}} 18 | {{yt_tsp 479 0 Backprop for RNN}} 19 | {{yt_tsp 630 0 A binary classification problem for sequences}} 20 | {{yt_tsp 1037 0 Elman network}} 21 | {{yt_tsp 1262 0 Training RNN}} 22 | {{yt_tsp 1371 0 Results for Elman network}} 23 | {{yt_tsp 1462 0 Gating for RNN}} 24 | {{yt_tsp 1690 0 Gated RNN in PyTorch}} 25 | {{yt_tsp 1767 0 Results for gated RNN}} 26 | {{yt_tsp 1812 0 LSTM and GRU}} 27 | {{yt_tsp 2051 0 Equations for GRU}} 28 | {{yt_tsp 2243 0 Equations for LSTM}} 29 | {{yt_tsp 2431 0 LSTM in PyTorch}} 30 | {{yt_tsp 2564 0 Results for LSTM}} 31 | {{yt_tsp 2623 0 Empirical results for LSTM and GRU}} 32 | 33 | ## Slides 34 | 35 | - [slides](https://dataflowr.github.io/slides/module11.html) 36 | 37 | ## References 38 | 39 | - [Understanding LSTM Networks](http://colah.github.io/posts/2015-08-Understanding-LSTMs/) by Christopher Olah 40 | 41 | -------------------------------------------------------------------------------- /modules/11b-recurrent-neural-networks-practice.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["rnn-2"] 2 | 3 | # Module 11b - Recurrent Neural Networks practice 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Theory of RNNs 12 | 13 | {{youtube_placeholder rnn-2}} 14 | {{yt_tsp 0 0 Generating the dataset for binary classification of parentheses}} 15 | {{yt_tsp 296 0 Elman network}} 16 | {{yt_tsp 685 0 RNN with gating}} 17 | {{yt_tsp 846 0 LSTM}} 18 | {{yt_tsp 1113 0 Be careful with errors given on the training set!}} 19 | 20 | ## Notebook 21 | 22 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module11/11_RNN.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module11/11_RNN.ipynb) 23 | 24 | ## Practicals 25 | 26 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module11/11_predictions_RNN_empty.ipynb) (or opened in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module11/11_predictions_RNN_empty.ipynb)) for predicting engine failure with RNN. 27 | 28 | ## References 29 | 30 | > RNNs can generate bounded hierarchical languages with optimal memory (2020) John Hewitt, Michael Hahn, Surya Ganguli, Percy Liang, Christopher D. Manning [arXiv:2010.07515](https://arxiv.org/abs/2010.07515) 31 | 32 | > Self-Attention Networks Can Process Bounded Hierarchical Languages (2021) Shunyu Yao, Binghui Peng, Christos Papadimitriou, Karthik Narasimhan [arXiv:2105.11115](https://arxiv.org/abs/2105.11115) -------------------------------------------------------------------------------- /modules/11c-batches-with-sequences.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["batches-seq"] 2 | 3 | # Module 11c - Batches with sequences in Pytorch 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Pytorch tutorial on batch for sequences 12 | 13 | {{youtube_placeholder batches-seq}} 14 | {{yt_tsp 0 0 Presentation}} 15 | {{yt_tsp 135 0 Step 1: Construct Vocabulary}} 16 | {{yt_tsp 170 0 Step 2: Load indexed data (list of instances, where each instance is list of character indices)}} 17 | {{yt_tsp 225 0 Step 3: Make Model}} 18 | {{yt_tsp 290 0 Step 4: Pad instances with 0s till max length sequence}} 19 | {{yt_tsp 347 0 Step 5: Sort instances by sequence length in descending order}} 20 | {{yt_tsp 415 0 Step 6: Embed the instances}} 21 | {{yt_tsp 550 0 Step 7: Call pack_padded_sequence with embeded instances and sequence lengths}} 22 | {{yt_tsp 761 0 Step 8: Forward with LSTM}} 23 | {{yt_tsp 878 0 Step 9: Call unpack_padded_sequences if required / or just pick last hidden vector}} 24 | 25 | ## Notebook 26 | 27 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module11/11_Tutorial_packing_sequences.ipynb) -------------------------------------------------------------------------------- /modules/12-attention.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["attention"] 2 | 3 | # Module 12 - Attention and Transformers 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | 10 | 11 | ## Attention with RNNs 12 | 13 | The first attention mechanism was proposed in [Neural Machine Translation by Jointly Learning to Align and Translate](https://arxiv.org/abs/1409.0473) by Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio (presented at ICLR 2015). 14 | 15 | The task considered is English-to-French translation and the attention mechanism is proposed to extend a seq2seq architecture by adding a context vector $c_i$ in the RNN decoder so that, the hidden states for the decoder are computed recursively as $s_i = f(s_{i-1}, y_{i-1}, c_i)$ where $y_{i-1}$ is the previously predicted token and predictions are made in a probabilist manner as $y_i \sim g(y_{i-1},s_i,c_i)$ where $s_i$ and $c_i$ are the current hidden state and context of the decoder. 16 | 17 | Now the main novelty is the introduction of the context $c_i$ which is a weighted average of all the hidden states of the encoder: $c_i = \sum_{j=1}^T \alpha_{i,j} h_j$ where $T$ is the length of the input sequence, $h_1,\dots, h_T$ are the corresponding hidden states of the decoder and $\sum_j \alpha_{i,j}=1$. Hence the context allows passing direct information from the 'relevant' part of the input to the decoder. The coefficients $(\alpha_{i,j})_{j=1}^T$ are computed from the current hidden state of the decoder $s_{i-1}$ and all the hidden states from the encoder $(h_1, \dots, h_T)$ as explained below (taken from the original paper): 18 | 19 | 20 | ~~~ 22 | ~~~ 23 | 24 | ## PyTorch implementation 25 | 26 | In [Attention for seq2seq](https://github.com/dataflowr/notebooks/blob/master/Module12/12_seq2seq_attention.ipynb), you can play with a simple model and code the attention mechanism proposed in the paper. For the alignment network $a$ (used to define the coefficient $\alpha_{i,j} = softmax_{j}(a(s_{i-1},h_j))$), we take a MLP with $\tanh$ activations. 27 | 28 | You will learn about seq2seq, teacher-forcing for RNNs and build the attention mechanism. To simplify things, we do not deal with batches (see [Batches with sequences in Pytorch](/modules/11c-batches-with-sequences) for more on that). The solution for this practical is provided in [Attention for seq2seq- solution](https://github.com/dataflowr/notebooks/blob/master/Module12/12_seq2seq_attention_solution.ipynb) 29 | 30 | 31 | Note that each $\alpha_{i,j}$ is a real number so that we can display the matrix of $\alpha_{i,j}$'s where $j$ ranges over the input tokens and $i$ over the output tokens, see below (taken from the paper): 32 | 33 | ~~~ 35 | ~~~ 36 | 37 | ## (Self-)Attention in Transformers 38 | 39 | We now describe the attention mechanism proposed in [Attention Is All You Need](https://arxiv.org/abs/1706.03762) by Vaswani et al. First, we recall basic notions from retrieval systems: query/key/value illustrated by an example: search for videos on Youtube. In this example, the query is the text in the search bar, the key is the metadata associated with the videos which are the values. Hence a score can be computed from the query and all the keys. Finally, the matched video with the highest score is returned. 40 | 41 | We see that we can formalize this process as follows: if $Q_s$ is the current query and $K_t$ and $V_t$ are all the keys and values in the database, we return $$ 42 | Y_s = \sum_{t=1}^T\text{softmax}_{t}(\text{score}(Q_s, K_t))V_t, 43 | $$ 44 | where $\sum_{t=1}^T\text{softmax}_{t}(\text{score}(Q_s, K_t))=1$. 45 | 46 | Note that this formalism allows us to recover the way contexts were computed above (where the score function was called the alignment network). Now, we will change the score function and consider dot-product attention: 47 | $ \text{score}(Q_s, K_t) = \frac{Q_s^TK_t}{\sqrt{d}}$. Note that for this definition to make sense, both the query $Q_s$ and the key $K_t$ need to live in the same space and $d$ is the dimension of this space. 48 | 49 | Given $s$ inputs in $\mathbb{R}^{d_{\text{in}}}$ denoted by a matrix $X\in \mathbb{R}^{d_{\text{in}}\times s}$ and a database containing $t$ samples in $\mathbb{R}^{d'}$ denoted by a matrix $X'\in \mathbb{R}^{d'\times t}$, we define: 50 | $$ 51 | \text{the queries: } Q = W_Q X, \text{ with, } W_Q\in \mathbb{R}^{k\times d_{\text{in}}}\\ 52 | \text{the keys: } K = W_K X', \text{ with, } W_K\in \mathbb{R}^{k\times d'}\\ 53 | \text{the values: } V = W_V X', \text{ with, } W_V\in \mathbb{R}^{d_{\text{out}}\times d'} 54 | $$ 55 | 56 | Now self-attention is simply obtained with $X=X'$ (so that $d'=d_{\text{in}}$) and $d_{\text{in}} = d_{\text{out}} = d$. In summary, self-attention layer can take as input any tensor of the form $X \in \mathbb{R}^{d\times T}$ (for any $T$) has parameters: 57 | $$ 58 | W_Q\in \mathbb{R}^{k\times d}, W_K\in \mathbb{R}^{k\times d}, W_V\in \mathbb{R}^{d\times d}, 59 | $$ 60 | and produce $Y \in \mathbb{R}^{d\times T}$ (with same $d$ and $t$ as for the input). $d$ is the dimension of the input and $k$ is a hyper-parameter of the self-attention layer: 61 | $$ 62 | Y_s = \sum_{t=1}^T\text{softmax}_{t}\left(\frac{X_s^TW_Q^TW_KX_t}{\sqrt{k}}\right)W_VX_t, 63 | $$ 64 | with the convention that $X_t\in \mathbb{R}^d$ (resp. $Y_s\in \mathbb{R}^d$) is the $t$-th column of $X$ (resp. the $s$-th column of $Y$). Note that the notation $\text{softmax}_{t}(.)$ might be a bit confusing. Recall that $\text{softmax}$ is always taking as input a vector and returning a (normalized) vector. In practice, most of the time, we are dealing with batches so that the $\text{softmax}$ function is taking as input a matrix (or tensor) and we need to normalize according to the right axis! Named tensor notation see [below](#transformers_using_named_tensor_notation) deals with this notational issue. I also find the interpretation given below helpful: 65 | 66 | **Mental model for self-attention:** self-attention interpreted as taking expectation 67 | $$ 68 | y_s = \sum_{t=1}^T p(x_t | x_s) v(x_t) = \mathbb{E}[v(x) | x_s],\\ 69 | \text{with, } p(x_t|x_s) = \frac{\exp(q(x_s)k(x_t))}{\sum_{r}\exp(q(x_s)k(x_r))}, 70 | $$ 71 | where the mappings $q(.), k(.)$ and $v(.)$ represent query, key and value. 72 | 73 | Multi-head attention combines several such operations in parallel, and $Y$ is the concatenation of the results along the feature dimension to which is applied one more linear transformation. 74 | 75 | 76 | ## Transformer block 77 | 78 | 79 | ~~~ 81 | ~~~ 82 | 83 | To finish the description of a transformer block, we need to define two last layers: Layer Norm and Feed Forward Network. 84 | 85 | The Layer Norm used in the transformer block is particularly simple as it acts on vectors and standardizes it as follows: for $x\in \mathbb{R}^d$, we define 86 | $$ 87 | \text{mean}(x) =\frac{1}{d}\sum_{i=1}^d x_i\in \mathbb{R}\\ 88 | \text{std}(x)^2 = \frac{1}{d}\sum_{i=1}^d(x_i-\text{mean}(x))^2\in \mathbb{R} 89 | $$ 90 | and then the Layer Norm has two parameters $\gamma, \beta\in \mathbb{R}^d$ and 91 | $$ 92 | LN(x) = \gamma \cdot \frac{x-\text{mean}(x)}{\text{std}(x)}+\beta, 93 | $$ 94 | where we used the natural broadcasting rule for subtracting the mean and dividing by std and $\cdot$ is component-wise multiplication. 95 | 96 | A Feed Forward Network is an MLP acting on vectors: for $x\in \mathbb{R}^d$, we define $$ 97 | FFN(x) = \max(0,xW_1+b_1)W_2+b_2, 98 | $$ 99 | where $W_1\in \mathbb{R}^{d\times h}$, $b_1\in \mathbb{R}^h$, $W_2\in \mathbb{R}^{h\times d}$, $b_2\in \mathbb{R}^d$. 100 | 101 | Each of these layers is applied on each of the inputs given to the transformer block as depicted below: 102 | 103 | ![](/modules/extras/attention/transformer_block_nocode.png) 104 | 105 | Note that this block is equivariant: if we permute the inputs, then the outputs will be permuted with the same permutation. As a result, the order of the input is irrelevant to the transformer block. In particular, this order cannot be used. 106 | The important notion of positional encoding allows us to take order into account. It is a deterministic unique encoding for each time step that is added to the input tokens. 107 | 108 | ## LLM Visualization. 109 | 110 | Have a look at Brendan Bycroft’s beautifully crafted interactive explanation of the transformers architecture: 111 | 112 | [![gif](/modules/extras/attention/transformer_vizu.gif)](https://bbycroft.net/llm) 113 | 114 | ## Transformers using Named Tensor Notation 115 | 116 | In [Transformers using Named Tensor Notation](https://hackmd.io/@mlelarge/HkVlvrc8j), we derive the formal equations for the Transformer block using named tensor notation. 117 | 118 | 119 | ## Hacking a simple Transformer block 120 | 121 | Now is the time to have fun building a simple transformer block and to [think like transformers](https://github.com/dataflowr/notebooks/blob/master/Module12/GPT_hist.ipynb) (open in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module12/GPT_hist.ipynb)). 122 | 123 | ~~~ 125 | ~~~ 126 | 127 | ~~~ 129 | ~~~ -------------------------------------------------------------------------------- /modules/12-intro-julia.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["intro-julia"] 2 | 3 | # Module - Introduction to Julia: Automatic differentiation with dual numbers 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Introduction to Julia: Automatic differentiation with dual numbers 12 | 13 | {{youtube_placeholder intro-julia}} 14 | {{yt_tsp 0 0 Dual numbers in Julia}} 15 | {{yt_tsp 527 0 Using conversion and promotion}} 16 | {{yt_tsp 805 0 Automatic differentiation for polynomials}} 17 | {{yt_tsp 1055 0 Using Babylonian algorithm for the square root}} 18 | {{yt_tsp 1467 0 Checking the derivative by hand}} 19 | {{yt_tsp 1537 0 Pkg}} [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl) 20 | 21 | ## Notebook 22 | 23 | - [notebook](https://github.com/dataflowr/julia-notebooks.jl/blob/main/AD_with_dual_numbers_Julia.ipynb) (you need to install Julia) or use: 24 | 25 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/dataflowr/julia-notebooks.jl/HEAD) -------------------------------------------------------------------------------- /modules/13-siamese.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["siamese"] 2 | 3 | # Module 13 - Siamese Networks and Representation Learning 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Siamese Networks and Representation Learning 12 | 13 | {{youtube_placeholder siamese}} 14 | {{yt_tsp 57 0 Siamese networks for face recognition}} 15 | {{yt_tsp 261 0 Siamese architecture}} 16 | {{yt_tsp 369 0 Contrastive loss}} 17 | {{yt_tsp 683 0 Training siamese networks}} 18 | {{yt_tsp 866 0 Triplet architecture}} 19 | {{yt_tsp 900 0 Triplet loss}} 20 | {{yt_tsp 1036 0 Training with triplet loss}} 21 | {{yt_tsp 1065 0 Pytorch code}} 22 | {{yt_tsp 1220 0 Hard negative sampling}} 23 | {{yt_tsp 1375 0 Applications}} 24 | {{yt_tsp 1860 0 N-pair loss}} 25 | {{yt_tsp 1926 0 Histogram loss}} 26 | {{yt_tsp 2015 0 Prototypical networks}} 27 | {{yt_tsp 2170 0 Take-away}} 28 | 29 | ## Slides and Notebook 30 | 31 | - [slides](https://abursuc.github.io//slides/polytechnique/13-siamese-networks.html#1) 32 | 33 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module13/13_siamese_triplet_mnist_empty.ipynb) 34 | 35 | -------------------------------------------------------------------------------- /modules/14a-depth.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["ben-depth"] 2 | 3 | # Module 14a - The Benefits of Depth 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Benefits of Depth 12 | 13 | {{youtube_placeholder ben-depth}} 14 | 15 | ## Slides 16 | 17 | - [slides](https://abursuc.github.io/slides/polytechnique/14-01-deeper.html#1) 18 | -------------------------------------------------------------------------------- /modules/14b-depth.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["pbs-depth"] 2 | 3 | # Module 14b - The Problems with Depth 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## The Problems with Depth 12 | 13 | {{youtube_placeholder pbs-depth}} 14 | 15 | ## Slides 16 | 17 | - [slides](https://abursuc.github.io/slides/polytechnique/14-02-problems.html#1) 18 | -------------------------------------------------------------------------------- /modules/15-dropout.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["dropout"] 2 | 3 | # Module 15 - Dropout 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Dropout 12 | 13 | {{youtube_placeholder dropout}} 14 | 15 | ## Slides and Notebook 16 | 17 | - [Dropout](https://abursuc.github.io/slides/polytechnique/15-01-dropout.html#1) 18 | - [notebook 1](https://github.com/dataflowr/notebooks/blob/master/Module15/15a_dropout_intro.ipynb) 19 | - [notebook 2](https://github.com/dataflowr/notebooks/blob/master/Module15/15b_dropout_mnist.ipynb) 20 | - [Uncertainty estimation - MC Dropout](https://abursuc.github.io/slides/polytechnique/15-02-uncertainty-estimation-dropout.html#1) -------------------------------------------------------------------------------- /modules/16-batchnorm.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["batchnorm"] 2 | 3 | # Module 16 - Batchnorm 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Batchnorm 12 | 13 | {{youtube_placeholder batchnorm}} 14 | 15 | 16 | ## Slides and Notebook 17 | 18 | - [slides](https://abursuc.github.io/slides/polytechnique/14-04-batchnorm.html#1) 19 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module16/16_batchnorm_simple.ipynb) 20 | -------------------------------------------------------------------------------- /modules/17-resnets.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["resnets"] 2 | 3 | # Module 17 - Resnets 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Resnets 12 | 13 | {{youtube_placeholder resnets}} 14 | 15 | 16 | ## Slides 17 | 18 | - [slides](https://abursuc.github.io/slides/polytechnique/14-05-resnets.html#1) 19 | 20 | ## Practicals 21 | 22 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module17/ODIN_mobilenet_empty.ipynb) for transforming your classifier into an out-of-distribution detector with [ODIN](https://arxiv.org/abs/1706.02690) 23 | 24 | ![](../extras/ODIN/original_optimal_shade.png) -------------------------------------------------------------------------------- /modules/18a-energy.md: -------------------------------------------------------------------------------- 1 | @def title = "energy" 2 | @def hasmath = true 3 | 4 | 5 | # Module 18a - Denoising Score Matching for Energy Based Models 6 | 7 | This module is based on the work: [How to Train Your Energy-Based Models](https://arxiv.org/abs/2101.03288) by Yang Song and Diederik P. Kingma (2021). 8 | 9 | ~~~ 11 | ~~~ 12 | 13 | **Table of Contents** 14 | 15 | \toc 16 | 17 | ## Theory for Energy-Based Models (EBM) 18 | 19 | The density given by an EBM is: 20 | \begin{eqnarray*} 21 | p_{\theta}(x) = \frac{\exp(-E_\theta(x))}{Z_\theta}, 22 | \end{eqnarray*} 23 | where $E_\theta:\mathbb{R}^d \to \mathbb{R}$ and $Z_\theta=\int \exp(-E_\theta(x)) dx$. 24 | 25 | Given samples $x_1,\dots, x_N$ in $\mathbb{R}^d$, we want to find the parameter $\theta$ maximizing the log-likelihood $\max_\theta \sum_{i=1}^N \log p_{\theta}(x_i)$. Since $Z_\theta$ is a function of $\theta$, evaluation and differentiation of $\log p_{\theta}(x)$ w.r.t. $\theta$ involves a typically intractable integral. 26 | 27 | ### Maximum Likelihood Training with MCMC 28 | 29 | We can estimate the gradient of the log-likelihood with MCMC approaches: 30 | \begin{eqnarray*} 31 | \nabla_\theta \log p_\theta(x) = -\nabla_\theta E_\theta(x)-\nabla_\theta 32 | \log Z_\theta. 33 | \end{eqnarray*} 34 | The first term is simple to compute (with automatic differentiation). 35 | 36 | **Maths: computing $\nabla_\theta \log Z_\theta$** 37 | We have: 38 | \begin{eqnarray*} 39 | \nabla_\theta \log Z_\theta = \mathbb{E}_{p_{\theta}(x)}\left[-\nabla_\theta E_\theta(x)\right] \left(= \int p_{\theta}(x) \left[-\nabla_\theta E_\theta(x)\right] dx \right). 40 | \end{eqnarray*} 41 | **Proof:** 42 | \begin{eqnarray*} 43 | \nabla_\theta \log Z_\theta &=& \frac{\nabla_\theta Z_\theta}{Z_\theta}\\ 44 | &=& \frac{1}{Z_\theta} \int \nabla_\theta \exp (-E_\theta(x))dx\\ 45 | &=& \frac{-1}{Z_\theta} \int \nabla_\theta E_\theta(x) \exp (-E_\theta(x))dx\\ 46 | &=& \mathbb{E}_{p_{\theta}(x)}\left[-\nabla_\theta E_\theta(x)\right] 47 | \end{eqnarray*} 48 | 49 | Thus, we can obtain an unbiased one-sample Monte Carlo estimate of the log-likelihood gradient by 50 | \begin{eqnarray*} 51 | \nabla_\theta \log Z_\theta \approx -\nabla_\theta E_\theta(\tilde{x}), 52 | \end{eqnarray*} 53 | with $\tilde{x}\sim p_\theta(x)$, i.e. a random sample from the distribution given by the EBM. Therefore, we need to draw random samples from the model. As explained during the course, this can be done using Langevin MCMC. First note that the gradient of the log-probability w.r.t. $x$ (which is the score) is easy to calculate: 54 | \begin{eqnarray*} 55 | \nabla_x \log p_\theta(x) = -\nabla_x E_\theta(x) \text{ since } \nabla_x \log Z_\theta = 0. 56 | \end{eqnarray*} 57 | Hence, in this case, Langevin MCMC is given by: 58 | \begin{eqnarray*} 59 | x_t = x_{t-1} - \epsilon \nabla_x E_\theta(x_{t-1}) +\sqrt{2\epsilon}z_t, 60 | \end{eqnarray*} 61 | where $z_t\sim \mathcal{N}(0,I)$. When $\epsilon\to 0$ and $t\to \infty$, $x_t$ will be distributed as $p_\theta(x)$ (under some regularity conditions). 62 | 63 | In this homework, we will consider an alternative learning procedure. 64 | 65 | ### Score Matching 66 | 67 | The score (which was used in Langevin MCMC above) is defined as $$ s_\theta(x) = \nabla_x\log p_\theta(x) = -\nabla_x E_\theta(x) = -\left( \frac{\partial E_\theta(x)}{\partial x_1},\dots, \frac{\partial E_\theta(x)}{\partial x_d}\right).$$ 68 | 69 | If $p(x)$ denote the (unknown) data distribution, the basic score matching objective minimizes: 70 | $$ 71 | \mathbb{E}_{p(x)} \|\nabla_x \log p(x) - s_\theta(x)\|^2. 72 | $$ 73 | 74 | 75 | The problem with this objective is that we cannot compute $\nabla_x \log p(x)$ as $p(x)$ is unknown. We can only compute (approximate) averages with respect to $p(x)$ with empirical averages. 76 | Fortunately, we can solve this issue as we have: 77 | $$ 78 | \mathbb{E}_{p(x)} \|\nabla_x \log p(x) - s_\theta(x)\|^2 = c + \mathbb{E}_{p(x)}\left[ \sum_{i=1}^d\left ( \frac{\partial E_\theta(x)}{\partial x_i}\right)^2+2\frac{\partial^2 E_\theta(x)}{\partial x^2_i}\right], 79 | $$ 80 | where $c$ is a constant (not depending on $\theta$). 81 | 82 | **Proof:** 83 | \begin{eqnarray*} 84 | \mathbb{E}_{p(x)} \|\nabla_x \log p(x) - s_\theta(x)\|^2 &=&\mathbb{E}_{p(x)} \|\nabla_x \log p(x) \|^2 +\mathbb{E}_{p(x)} \| s_\theta(x)\|^2 - 2 \mathbb{E}_{p(x)} \langle \nabla_x \log p(x) , s_\theta(x)\rangle\\ 85 | &=& c + \mathbb{E}_{p(x)}\left[ \sum_{i=1}^d\left ( \frac{\partial E_\theta(x)}{\partial x_i}\right)^2\right] - 2 \int p(x) \langle \frac{\nabla_x p(x)}{p(x)} , s_\theta(x)\rangle dx\\ 86 | &=& c + \mathbb{E}_{p(x)}\left[ \sum_{i=1}^d\left ( \frac{\partial E_\theta(x)}{\partial x_i}\right)^2\right] + 2\int p(x) \nabla_x \cdot s_\theta(x) dx, 87 | \end{eqnarray*} 88 | by integration by parts where for a vector valued function $v(x_1,x_2,x_3)$ $\nabla_x \cdot v = \frac{\partial v_1}{\partial x_1} + \frac{\partial v_2}{\partial x_2}+ \frac{\partial v_3}{\partial x_3}$. The statement follows. 89 | 90 | ### Denoising Score Matching 91 | 92 | There are several drawbacks about the score matching approach: computing the trace of the Hessian is expensive and scores will not be accurately estimated in low-density regions, see [Generative Modeling by Estimating Gradients of the Data Distribution](https://yang-song.net/blog/2021/score/#naive-score-based-generative-modeling-and-its-pitfalls) 93 | 94 | Denoising score matching is an elegant and scalable solution. Consider the random variable $Y = X+\sigma Z$, where $X\sim p(x)$ and $Z\sim\mathcal{N}(0,I)$. We denote by $p^\sigma(y)$ the distribution of $Y$ so that we have: 95 | $$ 96 | \nabla_y\log p^\sigma(y) = -\frac{1}{\sigma}\mathbb{E}\left[ Z |Y=y\right] = -\frac{1}{\sigma}\mathbb{E}\left[ Z |X+\sigma Z=y\right]. 97 | $$ 98 | **Proof:** 99 | \begin{eqnarray*} 100 | \nabla_y\log p^\sigma(y) = \frac{\nabla_y p^\sigma(y)}{p^\sigma(y)} 101 | \end{eqnarray*} 102 | We denote by $\varphi$ the density of $\mathcal{N}(0,\sigma^2 I)$. We have $p^\sigma(y) = \int p(x) \varphi(y-x) dx$ so that using the fact that $\nabla_z \varphi(z) = -\frac{z}{\sigma^2} \varphi(z)$, we get 103 | \begin{eqnarray*} 104 | \nabla_y p^\sigma(y) &=& \int p(x) \nabla_y \varphi(y-x) dx\\ 105 | &=& \int p(x) \frac{-(y-x)}{\sigma^2} \varphi(y-x) dx \\ 106 | &=& -\frac{1}{\sigma}\mathbb{E}\left[ \frac{Y-X}{\sigma} |Y=y\right]\\ 107 | &=& -\frac{1}{\sigma}\mathbb{E}\left[ Z |Y=y\right] 108 | \end{eqnarray*} 109 | 110 | The denoising score matching objective is now 111 | $$ 112 | \mathbb{E}_{p^\sigma(y)}\|\nabla_y \log p^\sigma(y) - s_\theta(y)\|^2, 113 | $$ 114 | that we will minimize thanks to a gradient descent in the parameter $\theta$. 115 | 116 | In practice, we use the following relation: 117 | $$ 118 | \mathbb{E}_{p^\sigma(y)}\|\nabla_y \log p^\sigma(y) - s_\theta(y)\|^2 = \mathbb{E}\left\| \frac{Z}{\sigma}+s_\theta(X+\sigma Z)\right\|^2-C 119 | $$ 120 | where $C$ does not depend on $\theta$ (made explicit below). 121 | 122 | **Proof:** 123 | We have 124 | \begin{eqnarray*} 125 | \mathbb{E}_{p^\sigma(y)}\|\nabla_y \log p^\sigma(y) - s_\theta(y)\|^2 &=& \mathbb{E} \left[\left\| \mathbb{E} \left[\frac{Z}{\sigma} | Y\right] +s_\theta(Y)\right\|^2\right]\\ 126 | &=& \mathbb{E} \left[\left\| \mathbb{E} \left[\frac{Z}{\sigma} | Y\right]\right\|^2 + \left\|s_\theta(Y)\right\|^2 + 2 \left\langle \mathbb{E} \left[\frac{Z}{\sigma} | Y\right], s_\theta(Y)\right\rangle \right]\\ 127 | &=& \mathbb{E} \left[\left\| \mathbb{E} \left[\frac{Z}{\sigma} | Y\right]\right\|^2 \right] + \mathbb{E} \left[ \mathbb{E} \left[ \left\|s_\theta(Y)\right\|^2 + 2 \left\langle \frac{Z}{\sigma}, s_\theta(Y)\right\rangle | Y \right]\right]\\ 128 | &=& \mathbb{E} \left[\left\| \mathbb{E} \left[\frac{Z}{\sigma} | Y\right]\right\|^2 \right] + \mathbb{E} \left[ \left\|s_\theta(Y)\right\|^2 + 2 \left\langle \frac{Z}{\sigma}, s_\theta(Y)\right\rangle \right]\\ 129 | &=& \mathbb{E} \left[\left\| \mathbb{E} \left[\frac{Z}{\sigma} | Y\right]\right\|^2 \right] + \mathbb{E} \left[ \left\|s_\theta(Y) + \frac{Z}{\sigma} \right\|^2 \right] - \mathbb{E} \left[ \left\|\frac{Z}{\sigma}\right\|^2\right]\\ 130 | &=& \mathbb{E}\left\| \frac{Z}{\sigma}+s_\theta(X+\sigma Z)\right\|^2 - \mathbb{E} \left[ \left\|\frac{Z}{\sigma}\right\|^2 - \left\| \mathbb{E} \left[\frac{Z}{\sigma} | Y\right]\right\|^2 \right]. 131 | \end{eqnarray*} 132 | 133 | Hence, in practice, we will minimize the (random) loss: 134 | $$ 135 | \ell(\theta; x_1,\dots, x_N) = \frac{1}{N} \sum_{i=1}^N \left\| \frac{z_i}{\sigma}+s_\theta(x_i+\sigma z_i)\right\|^2, 136 | $$ 137 | where the $z_i$ are iid Gaussian. As the dataset is too large, we will run SGD algorithm, i.e. make batches and use automatic differentiation to get the gradient w.r.t. $\theta$ over each batch. 138 | 139 | ## Code for Energy Based Models 140 | 141 | - Denoising Score Matching for Energy Based Models for a simple case [Denoising\_Score\_Matching\_Energy\_Model.ipynb](https://github.com/dataflowr/notebooks/blob/master/Module18/Denoising_Score_Matching_Energy_Model_empty.ipynb). Here is the corresponding solution: [solution](https://github.com/dataflowr/notebooks/blob/master/Module18/Denoising_Score_Matching_Energy_Model_sol.ipynb) 142 | 143 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /modules/18b-diffusion.md: -------------------------------------------------------------------------------- 1 | @def title = "ddpm" 2 | @def hasmath = true 3 | 4 | 5 | # Module 18b - Denoising Diffusion Probabilistic Models 6 | 7 | This module presents the work: [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) by Jonathan Ho, Ajay Jain, Pieter Abbeel (2020). It starts with a description of the algorithm, then provides some notebooks to implement it on MNIST and CIFAR10 and finishes with some technical details. 8 | 9 | **Table of Contents** 10 | 11 | \toc 12 | ## Algorithm 13 | 14 | ~~~ 16 | ~~~ 17 | 18 | ### Forward diffusion process 19 | 20 | Given a schedule $\beta_1<\beta_2<\dots <\beta_T$, 21 | \begin{align*} 22 | q(x_t|x_{t-1}) &= \mathcal{N}(x_t; \sqrt{1-\beta_t}x_{t-1},\beta_t I)\\ 23 | q(x_{1:T}|x_0) &= \prod_{t=1}^T q(x_t|x_{t-1}) 24 | \end{align*} 25 | 26 | We define $\alpha_t = 1-\beta_t$ and $\overline{\alpha_t} = \prod_{i=1}^t\alpha_i$, then we have 27 | \begin{align*} 28 | x_t &= \sqrt{\alpha_t} x_{t-1} + \sqrt{1-\alpha_t}\epsilon_{t-1},\text{ with }\epsilon_{t-1}\sim\mathcal{N}(0,I)\\ 29 | &= \sqrt{\alpha_t\alpha_{t-1}} x_{t-2} +\sqrt{\alpha_t(1-\alpha_{t-1})}\epsilon_{t-2}+\sqrt{1-\alpha_t}\epsilon_{t-1}\\ 30 | &= \sqrt{\alpha_t\alpha_{t-1}} x_{t-2} + \sqrt{1-\alpha_t\alpha_{t-1}}\tilde{\epsilon}_{t} 31 | \end{align*} 32 | Hence, we have 33 | \begin{align*} 34 | x_t = \sqrt{\overline{\alpha}_t}x_0 + \sqrt{1-\overline{\alpha}_t}\epsilon 35 | \end{align*} 36 | ```python 37 | class DDPM(nn.Module): 38 | def __init__(self, network, num_timesteps, 39 | beta_start=0.0001, beta_end=0.02, device=device): 40 | super(DDPM, self).__init__() 41 | self.num_timesteps = num_timesteps 42 | self.betas = torch.linspace(beta_start, beta_end, 43 | num_timesteps, dtype=torch.float32).to(device) 44 | self.alphas = 1.0 - self.betas 45 | self.alphas_cumprod = torch.cumprod(self.alphas, axis=0) 46 | self.network = network 47 | self.device = device 48 | self.sqrt_alphas_cumprod = 49 | self.alphas_cumprod ** 0.5 50 | self.sqrt_one_minus_alphas_cumprod = 51 | (1 - self.alphas_cumprod) ** 0.5 52 | 53 | def add_noise(self, x_start, noise, timesteps): 54 | # The forward process 55 | # x_start and noise (bs, n_c, w, d) 56 | # timesteps (bs) 57 | s1 = self.sqrt_alphas_cumprod[timesteps] # bs 58 | s2 = self.sqrt_one_minus_alphas_cumprod[timesteps] # bs 59 | s1 = s1.reshape(-1,1,1,1) # (bs, 1, 1, 1) 60 | s2 = s2.reshape(-1,1,1,1) # (bs, 1, 1, 1) 61 | return s1 * x_start + s2 * noise 62 | 63 | def reverse(self, x, t): 64 | # The network estimates the noise added 65 | return self.network(x, t) 66 | ``` 67 | ### Approximating the reversed diffusion 68 | 69 | Note that the law $q(x_{t-1}|x_t,x_0)$ is explicit: 70 | \begin{align*} 71 | q(x_{t-1}|x_t,x_0) = \mathcal{N}(x_{t-1};\mu(x_t,x_0), \gamma_t I), 72 | \end{align*} 73 | with 74 | \begin{align*} 75 | \mu(x_t,x_0) &= \frac{\sqrt{\alpha_t}(1-\overline{\alpha}_{t-1})}{1-\overline{\alpha}_{t}}x_t + \frac{\beta_t\sqrt{\overline{\alpha}_{t-1}}}{1-\overline{\alpha}_{t}}x_0\\ 76 | \gamma_t &= \frac{1-\overline{\alpha}_{t-1}}{1-\overline{\alpha}_{t}}\beta_t 77 | \end{align*} 78 | but we know that $x_0 = 1/\sqrt{\overline{\alpha}_t}\left( x_t-\sqrt{1-\overline{\alpha}_t}\epsilon\right)$, hence we have 79 | \begin{align*} 80 | \mu(x_t,x_0) &= \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon\right) = \mu(x_t,t), 81 | \end{align*} 82 | where we removed the dependence in $x_0$ and replace it with a dependence in $t$. 83 | 84 | The idea is to approximate $q(x_{t-1}|x_t)$ by a neural network according to: 85 | \begin{align*} 86 | p_{\theta}(x_{t-1}|x_t) = \mathcal{N}(x_{t-1}; \mu_{\theta}(x_t,t), \beta_t I) 87 | \end{align*} 88 | and we approximate $q(x_{0:T})$ by 89 | \begin{align*} 90 | p(x_{0:T}) = p(x_T)\prod_{t=1}^T p_{\theta}(x_{t-1}|x_t), 91 | \end{align*} 92 | where $p(x_T) \sim \mathcal{N}(0,I)$. Note that the variance parameter is fixed to $\beta_t$ which is the forward variance (mainly for simplicity, variations have been proposed). 93 | 94 | The neural network is trained by maximizing the usual Variational bound: 95 | \begin{align*} 96 | \mathbb{E}_{q(x_0)} \ln p_{\theta}(x_0) &\geq \mathbb{E}_{q(x_{0:T})}\left[ \ln\frac{q(x_{1:T}|x_0)}{p_\theta(x_{0:T})}\right]\\ 97 | &=\mathbb{E}_q\left[ \text{KL}\left( q(x_T|x_0)\|p(x_T)\right)+\sum_{t=2}^T\text{KL}\left( q(x_{t-1}|x_t,x_0)\|p_{\theta}(x_{t-1}|x_t)\right)-\ln p_{\theta}(x_0|x_1)\right]\\ 98 | &= L_T +\sum_{t=2}^T L_{t-1}+L_0. 99 | \end{align*} 100 | Note that $L_T$ does not depend on $\theta$ and for the other terms, they correspond to a KL between Gaussian distributions with an explicit expression: 101 | \begin{align*} 102 | L_{t-1} = \mathbb{E}_q\left[ \frac{1}{2\beta_t^2}\|\mu_\theta(x_t,t) -\mu(x_t,t)\|^2\right] 103 | \end{align*} 104 | Now, we make the change of variable: 105 | \begin{align*} 106 | \mu_\theta(x_t,t) = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon_\theta(x_t,t)\right), 107 | \end{align*} 108 | so that we have 109 | \begin{align*} 110 | \|\mu_\theta(x_t,t) -\mu(x_t,t)\|^2 = \frac{(1-\alpha_t)^2}{1-\overline{\alpha}_t}\|\epsilon - \epsilon_\theta(\sqrt{\overline{\alpha}_t}x_0 + \sqrt{1-\overline{\alpha}_t}\epsilon, t)\|^2 111 | \end{align*} 112 | Empirically, the prefactor is removed in the loss and instead of summing over all $t$, we average over a random $\tau\in [0,T-1]$, so that the loss is finally: 113 | \begin{align*} 114 | \ell(\theta) = \mathbb{E}_\tau\mathbb{E}_\epsilon \left[ \|\epsilon - \epsilon_\theta(\sqrt{\overline{\alpha}_\tau}x_0 + \sqrt{1-\overline{\alpha}_\tau}\epsilon, \tau)\|^2\right] 115 | \end{align*} 116 | 117 | ```python 118 | # inside the training loop 119 | for step, batch in enumerate(dataloader): 120 | batch = batch[0].to(device) 121 | noise = torch.randn(batch.shape).to(device) 122 | timesteps = torch.randint(0, num_timesteps, (batch.shape[0],)).long().to(device) 123 | 124 | noisy = model.add_noise(batch, noise, timesteps) 125 | noise_pred = model.reverse(noisy, timesteps) 126 | loss = F.mse_loss(noise_pred, noise) 127 | optimizer.zero_grad() 128 | loss.backward() 129 | optimizer.step() 130 | ``` 131 | 132 | 133 | ### Sampling 134 | 135 | For sampling, we need to simulate the reversed diffusion (Markov chain) starting from $x_T\sim \mathcal{N}(0,I)$ and then: 136 | \begin{align*} 137 | x_{t-1} = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon_\theta(x_t,t)\right)+\sqrt{\beta_t}\epsilon,\text{ with } \epsilon\sim\mathcal{N}(0,I). 138 | \end{align*} 139 | ```python 140 | # inside Module DDPM 141 | def step(self, model_output, timestep, sample): 142 | # one step of sampling 143 | # timestep (1) 144 | t = timestep 145 | coef_epsilon = (1-self.alphas)/ 146 | self.sqrt_one_minus_alphas_cumprod 147 | coef_eps_t = coef_epsilon[t].reshape(-1,1,1,1) 148 | coef_first = 1/self.alphas ** 0.5 149 | coef_first_t = coef_first[t].reshape(-1,1,1,1) 150 | pred_prev_sample = 151 | coef_first_t*(sample-coef_eps_t*model_output) 152 | 153 | variance = 0 154 | if t > 0: 155 | noise = torch.randn_like(model_output).to(self.device) 156 | variance = ((self.betas[t] ** 0.5) * noise) 157 | 158 | pred_prev_sample = pred_prev_sample + variance 159 | 160 | return pred_prev_sample 161 | ``` 162 | ## Summary: [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) 163 | (J. Ho, A. Jain, P. Abbeel 2020) 164 | 165 | ~~~ 167 | ~~~ 168 | @@colbox-blue 169 | Given a schedule $\beta_1<\beta_2<\dots <\beta_T$, the **forward diffusion process** is defined by: 170 | $q(x_t|x_{t-1}) = \mathcal{N}(x_t; \sqrt{1-\beta_t}x_{t-1},\beta_t I)$ and $q(x_{1:T}|x_0) = \prod_{t=1}^T q(x_t|x_{t-1})$. 171 | 172 | With $\alpha_t = 1-\beta_t$ and $\overline{\alpha_t} = \prod_{i=1}^t\alpha_i$, we see that, with $\epsilon\sim\mathcal{N}(0,I)$: 173 | \begin{align*} 174 | x_t = \sqrt{\overline{\alpha}_t}x_0 + \sqrt{1-\overline{\alpha}_t}\epsilon. 175 | \end{align*} 176 | The law $q(x_{t-1}|x_t,\epsilon)$ is explicit: $q(x_{t-1}|x_t,\epsilon) = \mathcal{N}(x_{t-1};\mu(x_t,\epsilon,t), \gamma_t I)$ with, 177 | \begin{align*} 178 | \mu(x_t,\epsilon, t) = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon\right)\text{ and, } 179 | \gamma_t = \frac{1-\overline{\alpha}_{t-1}}{1-\overline{\alpha}_{t}}\beta_t 180 | \end{align*} 181 | @@ 182 | 183 | @@colbox-blue 184 | **Training**: to approximate **the reversed diffusion** $q(x_{t-1}|x_t)$ by a neural network given by $p_{\theta}(x_{t-1}|x_t) = \mathcal{N}(x_{t-1}; \mu_{\theta}(x_t,t), \beta_t I)$ and $p(x_T) \sim \mathcal{N}(0,I)$, we maximize the usual Variational bound: 185 | \begin{align*} 186 | \mathbb{E}_{q(x_0)} \ln p_{\theta}(x_0) &\geq L_T +\sum_{t=2}^T L_{t-1}+L_0 \text{ with, }L_{t-1} = \mathbb{E}_q\left[ \frac{1}{2\sigma_t^2}\|\mu_\theta(x_t,t) -\mu(x_t,\epsilon,t)\|^2\right]. 187 | \end{align*} 188 | With the change of variable: 189 | \begin{align*} 190 | \mu_\theta(x_t,t) = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon_\theta(x_t,t)\right), 191 | \end{align*} 192 | ignoring the prefactor and sampling $\tau$ instead of summing over all $t$, the loss is finally: 193 | \begin{align*} 194 | \ell(\theta) = \mathbb{E}_\tau\mathbb{E}_\epsilon \left[ \|\epsilon - \epsilon_\theta(\sqrt{\overline{\alpha}_\tau}x_0 + \sqrt{1-\overline{\alpha}_\tau}\epsilon, \tau)\|^2\right] 195 | \end{align*} 196 | @@ 197 | 198 | @@colbox-blue 199 | **Sampling**: to simulate the reversed diffusion with the learned $\epsilon_\theta(x_t,t)$ starting from $x_T\sim \mathcal{N}(0,I)$, iterate for $t=T,\dots, 1$: 200 | \begin{align*} 201 | x_{t-1} = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon_\theta(x_t,t)\right)+\sqrt{\beta_t}\epsilon,\text{ with } \epsilon\sim\mathcal{N}(0,I). 202 | \end{align*} 203 | @@ 204 | 205 | ## Implementation 206 | 207 | ![](../extras/diffusions/mnist_diffusion.gif) 208 | ### MNIST 209 | 210 | The training of this notebook on colab takes approximately 20 minutes. 211 | 212 | - [ddpm\_nano\_empty.ipynb](https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_nano_empty.ipynb) is the notebook where you code the DDPM algorithm (a simple UNet is provided for the network $\epsilon_\theta(x,t)$), its training and the sampling. You should get results like this: 213 | 214 | ![](../extras/diffusions/mnist_result.png) 215 | 216 | - Here is the corresponding solution: [ddpm\_nano\_sol.ipynb](https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_nano_sol.ipynb) 217 | 218 | 219 | ### CIFAR10 220 | 221 | The training of this notebook on colab takes approximately 20 minutes (so do not expect high-quality pictures!). Still, after finetuning on specific classes, we see that the model learns features of the class. 222 | 223 | ~~~ 225 | ~~~ 226 | 227 | - [ddpm\_micro\_sol.ipynb](https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_micro_sol.ipynb) 228 | 229 | With a bit more training (100 epochs), you can get results like this: 230 | 231 | ![](../extras/diffusions/ships.png) 232 | 233 | ![](../extras/diffusions/horses.png) 234 | 235 | ![](../extras/diffusions/trucks.png) 236 | 237 | 238 | ## Technical details 239 | 240 | Note that the Denoising Diffusion Probabilistic Model is the same for MNIST and CIFAR10, we only change the UNet learning to reverse the noise. For CIFAR10, we adapt the UNet provided in [Module 9b](../9b-unet). Indeed, you can still use the code provided here for DDPM with other architectures like more complex ones with self-attention like this [Unet](https://github.com/lucidrains/denoising-diffusion-pytorch/blob/main/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py#L271) coded by [lucidrains](https://github.com/lucidrains) which is the one used in the original paper. 241 | 242 | In the paper, the authors used Exponential Moving Average (EMA) on model parameters with a decay factor of $0.999$. This is not implemented here to keep the code as simple as possible. 243 | -------------------------------------------------------------------------------- /modules/19-clip.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["clip"] 2 | 3 | # Module 19 - Zero-shot classification with CLIP 4 | 5 | ~~~ 7 | ~~~ 8 | 9 | 10 | ## Notebook 11 | 12 | - in [Zeroshot\_with\_CLIP.ipynb](https://github.com/dataflowr/notebooks/blob/master/Module19/Zeroshot_with_CLIP.ipynb) we build a zero-shop classifier using the pretrained CLIP network and improve its performance with descriptors generated with GPT. 13 | 14 | ## References 15 | 16 | > [CLIP](https://github.com/openai/CLIP) Learning Transferable Visual Models From Natural Language Supervision (ICML 2021) Alec Radford et al. 17 | 18 | 19 | > [Visual Classification via Description from Large Language Models](https://github.com/sachit-menon/classify_by_description_release) (ICLR 2023) Menon, Sachit and Vondrick, Carl -------------------------------------------------------------------------------- /modules/2a-pytorch-tensors.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["pytorch-tensors"] 2 | 3 | # Module 2a - Pytorch tensors 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | 10 | ## Pytorch tensors 11 | 12 | {{youtube_placeholder pytorch-tensors}} 13 | 14 | {{yt_tsp 0 0 Recap}} 15 | {{yt_tsp 103 0 Introduction to tensors}} 16 | {{yt_tsp 272 0 Sizes}} 17 | {{yt_tsp 325 0 Bridge to numpy}} 18 | {{yt_tsp 670 0 Broadcasting}} 19 | {{yt_tsp 875 0 Inplace modification}} 20 | {{yt_tsp 990 0 Shared memory}} 21 | {{yt_tsp 1120 0 Cuda}} 22 | {{yt_tsp 1354 0 CIFAR dataset}} 23 | 24 | ## Notebook 25 | 26 | - [static notebook](/notebooks_md/02a_basics), [code (GitHub)](https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb) 27 | 28 | 29 | ## Quiz 30 | 31 | To check your understanding of the material, you can do the [quizzes](https://dataflowr.github.io/quiz/module2.html) 32 | 33 | -------------------------------------------------------------------------------- /modules/2b-automatic-differentiation.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["automatic-diff"] 2 | 3 | # Module 2b - Automatic differentiation 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | 10 | ## Automatic differentiation 11 | 12 | {{youtube_placeholder automatic-diff}} 13 | 14 | {{yt_tsp 0 0 Recap}} 15 | {{yt_tsp 40 0 A simple example (more in the practicals)}} 16 | {{yt_tsp 224 0 Pytorch tensor: requires_grad field}} 17 | {{yt_tsp 404 0 Pytorch backward function}} 18 | {{yt_tsp 545 0 The chain rule on our example}} 19 | {{yt_tsp 960 0 Linear regression}} 20 | {{yt_tsp 1080 0 Gradient descent with numpy...}} 21 | {{yt_tsp 1650 0 ... with pytorch tensors}} 22 | {{yt_tsp 1890 0 Using autograd}} 23 | {{yt_tsp 2075 0 Using a neural network (linear layer)}} 24 | {{yt_tsp 2390 0 Using a pytorch optimizer}} 25 | {{yt_tsp 2640 Backprop algorithm: how automatic differentiation works}} 26 | 27 | ## Slides and Notebook 28 | 29 | - Automatic differentiation: a simple example [static notebook](/notebooks_md/02a_basics), [code (GitHub)](https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb) 30 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module2/02b_linear_reg.ipynb) used in the video for the linear regression. If you want to open it in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02b_linear_reg.ipynb) 31 | - [backprop slide](https://raw.githubusercontent.com/dataflowr/slides/master/backprop.pdf) (used for the practical below) 32 | 33 | ## Quiz 34 | 35 | To check your understanding of automatic differentiation, you can do the [quizzes](https://dataflowr.github.io/quiz/module2.html) 36 | ## Practicals 37 | 38 | ![](https://dataflowr.github.io/notebooks/Module2/img/backprop3.png) 39 | 40 | - [practicals](https://github.com/dataflowr/notebooks/blob/master/Module2/02_backprop.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02_backprop.ipynb) Coding backprop. 41 | 42 | ## Challenge 43 | 44 | Adapt your code to solve the following challenge: 45 | 46 | ![](https://dataflowr.github.io/notebooks/Module2/img/backprop4.png) 47 | 48 | Some small modifications: 49 | - First modification: we now generate points $(x_t,y_t)$ where $y_t= \exp(w^*\cos(x_t)+b^*)$, i.e $y^*_t$ is obtained by applying a deterministic function to $x_t$ with parameters $w^*$ and $b^*$. Our goal is still to recover the parameters $w^*$ and $b^*$ from the observations $(x_t,y_t)$. 50 | 51 | - Second modification: we now generate points $(x_t,y_t)$ where $y_t= \exp(w^*\cos(p^*x_t)+b^*)$, i.e $y^*_t$ is obtained by applying a deterministic function to $x_t$ with parameters $p^*$, $w^*$ and $b^*$. Our goal is still to recover the parameters from the observations $(x_t,y_t)$. 52 | 53 | ## Bonus: 54 | 55 | - [JAX](https://jax.readthedocs.io/en/latest/index.html) implementation of the linear regression [notebook](https://github.com/dataflowr/notebooks/blob/master/Module2/linear_regression_jax.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/linear_regression_jax.ipynb) see [Module 2c](/modules/2c-jax) for more details. -------------------------------------------------------------------------------- /modules/2c-jax.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["jax"] 2 | 3 | # Module 2c - Automatic differentiation: VJP and intro to JAX 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | # Autodiff and Backpropagation 10 | 11 | 12 | ## Jacobian 13 | 14 | Let $\mathbf{f}:\mathbb{R}^n\to \mathbb{R}^m$, we define its Jacobian as: 15 | \begin{align*} 16 | \frac{\partial \mathbf{f}}{\partial \mathbf{x}} = J_{\mathbf{f}}(\mathbf{x}) &= \left( \begin{array}{ccc} 17 | \frac{\partial f_1}{\partial x_1}&\dots& \frac{\partial f_1}{\partial x_n}\\ 18 | \vdots&&\vdots\\ 19 | \frac{\partial f_m}{\partial x_1}&\dots& \frac{\partial f_m}{\partial x_n} 20 | \end{array}\right)\\ 21 | &=\left( \frac{\partial \mathbf{f}}{\partial x_1},\dots, \frac{\partial \mathbf{f}}{\partial x_n}\right)\\ 22 | &=\left( 23 | \begin{array}{c} 24 | \nabla f_1(\mathbf{x})^T\\ 25 | \vdots\\ 26 | \nabla f_m(x)^T 27 | \end{array}\right) 28 | \end{align*} 29 | 30 | Hence the Jacobian $J_{\mathbf{f}}(\mathbf{x})\in \mathbb{R}^{m\times n}$ is a linear map from $\mathbb{R}^n$ to $\mathbb{R}^m$ such that for $\mathbf{x},\mathbf{v} \in \mathbb{R}^n$ and $h\in \mathbb{R}$: 31 | \begin{align*} 32 | \mathbf{f}(\mathbf{x}+h\mathbf{v}) = \mathbf{f}(\mathbf{x}) + h J_{\mathbf{f}}(\mathbf{x})\mathbf{v} +o(h). 33 | \end{align*} 34 | The term $J_{\mathbf{f}}(\mathbf{x})\mathbf{v}\in \mathbb{R}^m$ is a Jacobian Vector Product (**JVP**), corresponding to the interpretation where the Jacobian is the linear map: $J_{\mathbf{f}}(\mathbf{x}):\mathbb{R}^n \to \mathbb{R}^m$, where $J_{\mathbf{f}}(\mathbf{x})(\mathbf{v})=J_{\mathbf{f}}(\mathbf{x})\mathbf{v}$. 35 | 36 | ## Chain composition 37 | 38 | In machine learning, we are computing gradient of the loss function with respect to the parameters. In particular, if the parameters are high-dimensional, the loss is a real number. Hence, consider a real-valued function $\mathbf{f}:\mathbb{R}^n\stackrel{\mathbf{g}_1}{\to}\mathbb{R}^m \stackrel{\mathbf{g}_2}{\to}\mathbb{R}^d\stackrel{h}{\to}\mathbb{R}$, so that $\mathbf{f}(\mathbf{x}) = h(\mathbf{g}_2(\mathbf{g}_1(\mathbf{x})))\in \mathbb{R}$. We have 39 | \begin{align*} 40 | \underbrace{\nabla\mathbf{f}(\mathbf{x})}_{n\times 1}=\underbrace{J_{\mathbf{g}_1}(\mathbf{x})^T}_{n\times m}\underbrace{J_{\mathbf{g}_2}(\mathbf{g}_1(\mathbf{x}))^T}_{m\times d}\underbrace{\nabla h(\mathbf{g}_2(\mathbf{g}_1(\mathbf{x})))}_{d\times 1}. 41 | \end{align*} 42 | To do this computation, if we start from the right so that we start with a matrix times a vector to obtain a vector (of size $m$) and we need to make another matrix times a vector, resulting in $O(nm+md)$ operations. If we start from the left with the matrix-matrix multiplication, we get $O(nmd+nd)$ operations. Hence we see that as soon as $m\approx d$, starting for the right is much more efficient. Note however that doing the computation from the right to the left requires keeping in memory the values of $\mathbf{g}_1(\mathbf{x})\in\mathbb{R}^m$, and $\mathbf{x}\in \mathbb{R}^n$. 43 | 44 | **Backpropagation** is an efficient algorithm computing the gradient "from the right to the left", i.e. backward. In particular, we will need to compute quantities of the form: $J_{\mathbf{f}}(\mathbf{x})^T\mathbf{u} \in \mathbb{R}^n$ with $\mathbf{u} \in\mathbb{R}^m$ which can be rewritten $\mathbf{u}^T J_{\mathbf{f}}(\mathbf{x})$ which is a Vector Jacobian Product (**VJP**), correponding to the interpretation where the Jacobian is the linear map: $J_{\mathbf{f}}(\mathbf{x}):\mathbb{R}^n \to \mathbb{R}^m$, composed with the linear map $\mathbf{u}:\mathbb{R}^m\to \mathbb{R}$ so that $\mathbf{u}^TJ_{\mathbf{f}}(\mathbf{x}) = \mathbf{u} \circ J_{\mathbf{f}}(\mathbf{x})$. 45 | 46 | **example:** let $\mathbf{f}(\mathbf{x}, W) = \mathbf{x} W\in \mathbb{R}^b$ where $W\in \mathbb{R}^{a\times b}$ and $\mathbf{x}\in \mathbb{R}^a$. We clearly have 47 | $$ 48 | J_{\mathbf{f}}(\mathbf{x}) = W^T. 49 | $$ 50 | Note that here, we are slightly abusing notations and considering the partial function $\mathbf{x}\mapsto \mathbf{f}(\mathbf{x}, W)$. To see this, we can write $f_j = \sum_{i}x_iW_{ij}$ so that 51 | $$ 52 | \frac{\partial \mathbf{f}}{\partial x_i}= \left( W_{i1}\dots W_{ib}\right)^T 53 | $$ 54 | Then recall from definitions that 55 | $$ 56 | J_{\mathbf{f}}(\mathbf{x}) = \left( \frac{\partial \mathbf{f}}{\partial x_1},\dots, \frac{\partial \mathbf{f}}{\partial x_n}\right)=W^T. 57 | $$ 58 | Now we clearly have 59 | $$ 60 | J_{\mathbf{f}}(W) = \mathbf{x} \text{ since, } \mathbf{f}(\mathbf{x}, W+\Delta W) = \mathbf{f}(\mathbf{x}, W) + \mathbf{x} \Delta W. 61 | $$ 62 | Note that multiplying $\mathbf{x}$ on the left is actually convenient when using broadcasting, i.e. we can take a batch of input vectors of shape $\text{bs}\times a$ without modifying the math above. 63 | 64 | ## Implementation 65 | 66 | In PyTorch, `torch.autograd` provides classes and functions implementing automatic differentiation of arbitrary scalar-valued functions. To create a custom [autograd.Function](https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function), subclass this class and implement the `forward()` and `backward()` static methods. Here is an example: 67 | ```python 68 | class Exp(Function): 69 | @staticmethod 70 | def forward(ctx, i): 71 | result = i.exp() 72 | ctx.save_for_backward(result) 73 | return result 74 | @staticmethod 75 | def backward(ctx, grad_output): 76 | result, = ctx.saved_tensors 77 | return grad_output * result 78 | # Use it by calling the apply method: 79 | output = Exp.apply(input) 80 | ``` 81 | You can have a look at [Module 2b](https://dataflowr.github.io/website/modules/2b-automatic-differentiation) to learn more about this approach as well as [MLP from scratch](https://dataflowr.github.io/website/homework/1-mlp-from-scratch/). 82 | 83 | ### Backprop the functional way 84 | 85 | Here we will implement in `numpy` a different approach mimicking the functional approach of [JAX](https://jax.readthedocs.io/en/latest/index.html) see [The Autodiff Cookbook](https://jax.readthedocs.io/en/latest/notebooks/autodiff_cookbook.html#). 86 | 87 | Each function will take 2 arguments: one being the input `x` and the other being the parameters `w`. For each function, we build 2 **vjp** functions taking as argument a gradient $\mathbf{u}$, and corresponding to $J_{\mathbf{f}}(\mathbf{x})$ and $J_{\mathbf{f}}(\mathbf{w})$ so that these functions return $J_{\mathbf{f}}(\mathbf{x})^T \mathbf{u}$ and $J_{\mathbf{f}}(\mathbf{w})^T \mathbf{u}$ respectively. To summarize, for $\mathbf{x} \in \mathbb{R}^n$, $\mathbf{w} \in \mathbb{R}^d$, and, $\mathbf{f}(\mathbf{x},\mathbf{w}) \in \mathbb{R}^m$, 88 | \begin{align*} 89 | {\bf vjp}_\mathbf{x}(\mathbf{u}) &= J_{\mathbf{f}}(\mathbf{x})^T \mathbf{u}, \text{ with } J_{\mathbf{f}}(\mathbf{x})\in\mathbb{R}^{m\times n}, \mathbf{u}\in \mathbb{R}^m\\ 90 | {\bf vjp}_\mathbf{w}(\mathbf{u}) &= J_{\mathbf{f}}(\mathbf{w})^T \mathbf{u}, \text{ with } J_{\mathbf{f}}(\mathbf{w})\in\mathbb{R}^{m\times d}, \mathbf{u}\in \mathbb{R}^m 91 | \end{align*} 92 | Then backpropagation is simply done by first computing the gradient of the loss and then composing the **vjp** functions in the right order. 93 | 94 | ## Practice 95 | 96 | - intro to JAX: autodiff the functional way [autodiff\_functional\_empty.ipynb](https://github.com/dataflowr/notebooks/blob/master/Module2/autodiff_functional_empty.ipynb) and its solution [autodiff\_functional\_sol.ipynb](https://github.com/dataflowr/notebooks/blob/master/Module2/autodiff_functional_sol.ipynb) 97 | - Linear regression in JAX [linear\_regression\_jax.ipynb](https://github.com/dataflowr/notebooks/blob/master/Module2/linear_regression_jax.ipynb) -------------------------------------------------------------------------------- /modules/3-loss-functions-for-classification.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["loss-functions"] 2 | 3 | # Module 3 - Loss functions for classification 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | 10 | ## Loss functions for classification 11 | 12 | {{youtube_placeholder loss-functions}} 13 | 14 | {{yt_tsp 0 0 Recap}} 15 | {{yt_tsp 145 0 How to choose your loss?}} 16 | {{yt_tsp 198 0 A probabilistic model for linear regression}} 17 | {{yt_tsp 470 0 Gradient descent, learning rate, SGD}} 18 | {{yt_tsp 690 0 Pytorch code for gradient descent}} 19 | {{yt_tsp 915 0 A probabilistic model for logistic regression}} 20 | {{yt_tsp 1047 0 Notations (information theory)}} 21 | {{yt_tsp 1258 0 Likelihood for logistic regression}} 22 | {{yt_tsp 1363 0 BCELoss}} 23 | {{yt_tsp 1421 0 BCEWithLogitsLoss}} 24 | {{yt_tsp 1537 0 Beware of the reduction parameter}} 25 | {{yt_tsp 1647 0 Softmax regression}} 26 | {{yt_tsp 1852 0 NLLLoss}} 27 | {{yt_tsp 2088 0 Classification in pytorch}} 28 | {{yt_tsp 2196 0 Why maximizing accuracy directly is hard?}} 29 | {{yt_tsp 2304 0 Classification in deep learning}} 30 | {{yt_tsp 2450 0 Regression without knowing the underlying model}} 31 | {{yt_tsp 2578 0 Overfitting in polynomial regression}} 32 | {{yt_tsp 2720 0 Validation set}} 33 | {{yt_tsp 2935 0 Notion of risk and hypothesis space}} 34 | {{yt_tsp 3280 0 estimation error and approximation error}} 35 | 36 | ## Slides and Notebook 37 | 38 | - [slides](https://dataflowr.github.io/slides/module3.html) 39 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module3/03_polynomial_regression.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module3/03_polynomial_regression.ipynb) An explanation of underfitting and overfitting with polynomial regression. 40 | 41 | ## Minimal working examples 42 | 43 | ### [`BCELoss`](https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html#torch.nn.BCELoss) 44 | ```python 45 | import torch.nn as nn 46 | m = nn.Sigmoid() 47 | loss = nn.BCELoss() 48 | input = torch.randn(3,4,5) 49 | target = torch.randn(3,4,5) 50 | loss(m(input), target) 51 | ``` 52 | 53 | ### [`NLLLoss`](https://pytorch.org/docs/stable/generated/torch.nn.NLLLoss.html#torch.nn.NLLLoss) and [`CrossEntropyLoss`](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html#torch.nn.CrossEntropyLoss) 54 | ```python 55 | import torch.nn as nn 56 | m = nn.LogSoftmax(dim=1) 57 | loss1 = nn.NLLLoss() 58 | loss2 = nn.CrossEntropyLoss() 59 | C = 8 60 | input = torch.randn(3,C,4,5) 61 | target = torch.empty(3,4,5, dtype=torch.long).random_(0,C) 62 | assert loss1(m(input),target) == loss2(input,target) 63 | ``` 64 | 65 | ## Quiz 66 | 67 | To check you know your loss, you can do the [quizzes](https://dataflowr.github.io/quiz/module3.html) 68 | 69 | -------------------------------------------------------------------------------- /modules/4-optimization-for-deep-learning.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["optim-basics"] 2 | 3 | # Module 4 - Optimization for deep leaning 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Optimization for deep leaning 12 | 13 | {{youtube_placeholder optim-basics}} 14 | 15 | {{yt_tsp 0 0 Recap}} 16 | {{yt_tsp 31 0 Plan}} 17 | {{yt_tsp 74 0 Optimization in deep learning}} 18 | {{yt_tsp 224 0 Gradient descent variants}} 19 | {{yt_tsp 478 0 Setting for the jupyter notebook}} 20 | {{yt_tsp 589 0 Vanilla gradient descent}} 21 | {{yt_tsp 734 0 Momentum}} 22 | {{yt_tsp 938 0 Nesterov accelerated gradient descent}} 23 | {{yt_tsp 1080 0 Adagrad}} 24 | {{yt_tsp 1206 0 RMSProp}} 25 | {{yt_tsp 1331 0 Adam}} 26 | {{yt_tsp 1479 0 AMSGrad}} 27 | {{yt_tsp 1629 0 Pytorch optimizers}} 28 | 29 | ## Slides and Practicals 30 | 31 | - [slides](https://dataflowr.github.io/slides/module4.html) 32 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module4/04_gradient_descent_optimization_algorithms_empty.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module4/04_gradient_descent_optimization_algorithms_empty.ipynb) Code your optimizers. 33 | 34 | ## References 35 | 36 | - [An overview of gradient descent optimization algorithms](https://arxiv.org/abs/1609.04747) by Sebastian Ruder 37 | - [Gradient-based optimization](https://drive.google.com/file/d/1e_9W8q9PL20iqOR-pfK89eILc_VtYaw1/view) A short introduction to optimization in Deep Learning, by Christian S. Perone 38 | -------------------------------------------------------------------------------- /modules/5-stacking-layers.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["pytorch-module"] 2 | 3 | # Module 5 - Stacking layers 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | 10 | ## Stacking layers 11 | 12 | {{youtube_placeholder pytorch-module}} 13 | 14 | {{yt_tsp 0 0 Recap}} 15 | {{yt_tsp 95 0 Plan of the lesson: define a NN model}} 16 | {{yt_tsp 144 0 MLP with pytorch Sequential}} 17 | {{yt_tsp 401 0 Using Torch.nn.module}} 18 | {{yt_tsp 608 0 Writing a pytorch module}} 19 | 20 | ## Slides 21 | 22 | - [slides](https://dataflowr.github.io/slides/module5.html) 23 | 24 | ## Practicals 25 | 26 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module5/Stacking_layers_MLP_CIFAR10.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module5/Stacking_layers_MLP_CIFAR10.ipynb) -------------------------------------------------------------------------------- /modules/6-convolutional-neural-network.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["conv"] 2 | 3 | # Module 6 - Convolutional neural network 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | 10 | ## Convolutional neural network 11 | 12 | {{youtube_placeholder conv}} 13 | 14 | {{yt_tsp 0 0 Recap}} 15 | {{yt_tsp 52 0 MNIST dataset}} 16 | {{yt_tsp 176 0 A simple binary classifier}} 17 | {{yt_tsp 381 0 Precision and recall}} 18 | {{yt_tsp 524 0 Filters and convolutions}} 19 | {{yt_tsp 1180 0 Max pooling}} 20 | 21 | 22 | ## Notebook 23 | 24 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module6/06_convolution_digit_recognizer.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module6/06_convolution_digit_recognizer.ipynb) Convolutions by examples. 25 | 26 | ## Practicals 27 | 28 | {{yt_tsp 1704 0 Practicals: your first CNN}} 29 | 30 | 31 | ## Post 32 | 33 | - [Convolutions (and Discrete Fourier Transform) from first principles](../extras/Convolutions_first/) -------------------------------------------------------------------------------- /modules/7-dataloading.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["dataloading-emb"] 2 | 3 | # Module 7 - Dataloading 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | 10 | ## Dataloading 11 | 12 | {{youtube_placeholder dataloading-emb}} 13 | 14 | {{yt_tsp 0 0 Recap}} 15 | {{yt_tsp 69 0 Plan of the lesson}} 16 | {{yt_tsp 128 0 Dataloading}} 17 | {{yt_tsp 280 0 Example 1: torchvision.datasets.Imagefolder}} 18 | {{yt_tsp 585 0 Example 2: dataset from numpy arrays}} 19 | {{yt_tsp 887 0 Example 3: custom dataloader}} 20 | 21 | ## Slides 22 | 23 | - [slides](https://dataflowr.github.io/slides/module7.html) 24 | -------------------------------------------------------------------------------- /modules/8a-embedding-layers.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["dataloading-emb"] 2 | 3 | # Module 8a - Embedding layers 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Embedding layers 12 | 13 | {{youtube_placeholder dataloading-emb}} 14 | {{yt_tsp 1066 0 Dealing with symbolic data}} 15 | {{yt_tsp 1111 0 One-hot encoding}} 16 | {{yt_tsp 1366 0 Embeddings}} 17 | {{yt_tsp 1660 0 Pytorch sparse layer}} 18 | 19 | ## Slides 20 | 21 | - [slides](https://dataflowr.github.io/slides/module8a.html) 22 | -------------------------------------------------------------------------------- /modules/8b-collaborative-filtering.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["collab-filt"] 2 | 3 | # Module 8b - Collaborative filtering 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Collaborative filtering 12 | 13 | {{youtube_placeholder collab-filt}} 14 | {{yt_tsp 0 0 Collaborative filtering}} 15 | {{yt_tsp 410 0 Movielens dataset: data wrangling with pandas}} 16 | {{yt_tsp 696 0 Test/train split with sklearn}} 17 | {{yt_tsp 831 0 The dot model neural network}} 18 | {{yt_tsp 1143 0 Checking your model}} 19 | {{yt_tsp 1279 0 Coding the training loop}} 20 | {{yt_tsp 1309 0 Checking your training loop}} 21 | {{yt_tsp 1407 0 FactorizationModel: a deep learning framework}} 22 | {{yt_tsp 1656 0 Checking your FactorizationModel}} 23 | {{yt_tsp 1855 0 Sorting the movies}} 24 | {{yt_tsp 1980 0 PCA of movies embeddings}} 25 | {{yt_tsp 2200 0The SPOTLIGHT lib}} 26 | 27 | ## Notebook 28 | 29 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_empty.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_empty.ipynb) Collaborative filtering. 30 | 31 | ## Practicals 32 | 33 | {{yt_tsp 831 0 Start with your implementation of the dot model}} 34 | 35 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_1M.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_1M.ipynb) refactoring the code. 36 | 37 | 38 | -------------------------------------------------------------------------------- /modules/8c-word2vec.md: -------------------------------------------------------------------------------- 1 | # Module 8c - Word2vec 2 | 3 | 4 | **Table of Contents** 5 | 6 | \toc 7 | 8 | ## Practicals 9 | 10 | - [Word Embedding with Word2vec](https://github.com/dataflowr/notebooks/blob/master/Module8/08_Word2vec_pytorch_empty.ipynb) 11 | 12 | - [implementing word2vec as matrix factorization](https://github.com/dataflowr/notebooks/blob/master/Module8/08_word2vec.ipynb) 13 | 14 | ## References 15 | 16 | -[word2vec Explained: deriving Mikolov et al.'s negative-sampling word-embedding method](https://arxiv.org/abs/1402.3722) by Yoav Goldberg and Omer Levy -------------------------------------------------------------------------------- /modules/9a-autoencoders.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["autoencoder"] 2 | 3 | # Module 9a - Autoencoders 4 | 5 | **Table of Contents** 6 | 7 | \toc 8 | 9 | 10 | ## Autoencoders 11 | 12 | {{youtube_placeholder autoencoder}} 13 | {{yt_tsp 0 0 Recap and unsupervised learning}} 14 | {{yt_tsp 139 0 Plan}} 15 | {{yt_tsp 189 0 Theory of autoencoders}} 16 | {{yt_tsp 449 0 Practice of autoencoders in PyTorch}} 17 | {{yt_tsp 679 0 Representation learning with autoencoders}} 18 | {{yt_tsp 955 0 Practicals}} 19 | {{yt_tsp 1009 0 A simple autoencoder}} 20 | {{yt_tsp 1210 0 Stacked autoencoders}} 21 | {{yt_tsp 1336 0 Interpolation}} 22 | {{yt_tsp 1349 0 Denoising autoencoder}} 23 | 24 | ## Slides 25 | 26 | - [slides](https://dataflowr.github.io/slides/module9.html) 27 | 28 | ## Practicals 29 | 30 | - [Autoencoders and Noisy Autoencoders](https://github.com/dataflowr/notebooks/blob/master/Module9/09_AE_NoisyAE.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module9/09_AE_NoisyAE.ipynb) 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /modules/9b-unet.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["UNet"] 2 | 3 | # Module 9b - UNets 4 | 5 | ![](../extras/unet/unet.png) 6 | 7 | - [UNet for image segmentation](https://github.com/dataflowr/notebooks/blob/master/Module9/UNet_image_seg.ipynb) 8 | 9 | 10 | -------------------------------------------------------------------------------- /modules/9c-flows.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["flows"] 2 | 3 | # Module 9c - Flows 4 | 5 | ![](../extras/flows/Real_NVP.png) 6 | 7 | **Table of Contents** 8 | 9 | \toc 10 | 11 | 12 | # Normalizing flows 13 | 14 | The image below is taken from this very good blog post on normalizing flows: [blogpost](https://lilianweng.github.io/lil-log/2018/10/13/flow-based-deep-generative-models.html) 15 | 16 | ![](../extras/flows/three-generative-models.png) 17 | 18 | Here we only describe flow-based generative models, you can have look at [VAE](/homework/3-VAE) and [GAN](/modules/10-generative-adversarial-networks). 19 | 20 | A **flow-based generative model** is constructed by a sequence of **invertible** transformations. The main advantage of flows is that the model explicitly learns the data distribution $p(\mathbf{x})$ and therefore the loss function is simply the negative log-likelihood. 21 | 22 | Given a sample $\mathbf{x}$ and a prior $p(\mathbf{z})$, we compute $f(\mathbf{x}) = \mathbf{z}$ with an invertible function $f$ that will be learned. Given $f$ and the prior $p(\mathbf{z})$, we can compute the evidence $p(\mathbf{x})$ thanks to the change of variable formula: 23 | \begin{align*} 24 | \mathbf{z} &\sim p(\mathbf{z}), \mathbf{z} = f(\mathbf{x}), \\ 25 | p(\mathbf{x}) 26 | &= p(\mathbf{z}) \left\vert \det \dfrac{d \mathbf{z}}{d \mathbf{x}} \right\vert 27 | = p(f(\mathbf{x})) \left\vert \det \dfrac{\partial f(\mathbf{x})}{\partial \mathbf{x}} \right\vert 28 | \end{align*} 29 | 30 | where $\dfrac{\partial f(\mathbf{x})}{\partial \mathbf{x}}$ is the Jacobian matrix of $f$. 31 | Recall that given a function mapping a $n$-dimensional input vector $\mathbf{x}$ to a $m$-dimensional output vector, $f: \mathbb{R}^n \mapsto \mathbb{R}^m$, the matrix of all first-order partial derivatives of this function is called the **Jacobian matrix**, $J_f$ where one entry on the i-th row and j-th column is $(J_f(\mathbf{x}))_{ij} = \frac{\partial f_i(\mathbf{x})}{\partial x_j}$: 32 | \begin{align*} 33 | {J_f(\mathbf{x})} = \begin{bmatrix} 34 | \frac{\partial f_1(\mathbf{x})}{\partial x_1} & \dots & \frac{\partial f_1(\mathbf{x})}{\partial x_n} \\[6pt] 35 | \vdots & \ddots & \vdots \\[6pt] 36 | \frac{\partial f_m(\mathbf{x})}{\partial x_1} & \dots & \frac{\partial f_m(\mathbf{x})}{\partial x_n} \\[6pt] 37 | \end{bmatrix} 38 | \end{align*} 39 | Below, we will parametrize $f$ with a neural network and learn $f$ by maximizing $\ln p(\mathbf{x})$. More precisely, given a dataset $(\mathbf{x}_1,\dots,\mathbf{x}_n)$ and a model provided by a prior $p(\mathbf{z})$ and a neural network $f$, we optimize the weights of $f$ by minimizing: 40 | \begin{align*} 41 | -\sum_{i}\ln p(\mathbf{x_i}) = \sum_i -\ln p(f(\mathbf{x}_i)) -\ln\left\vert \det \dfrac{\partial f(\mathbf{x}_i)}{\partial \mathbf{x}} \right\vert. 42 | \end{align*} 43 | 44 | **We need to ensure that $f$ is always invertible and that the determinant is simple to compute.** 45 | 46 | ## Density estimation using Real NVP 47 | 48 | 49 | [Real NVP](https://arxiv.org/abs/1605.08803) (introduced by Laurent Dinh, Jascha Sohl-Dickstein, Samy Bengio in 2016) uses function $f$ obtained by stacking affine coupling layers which for an input $\mathbf{x}\in \mathbb{R}^D$ produce the output $\mathbf{y}\in\mathbb{R}^D$ defined by (with $ d < D $ ): 50 | \begin{align} 51 | \label{eq:aff} 52 | \mathbf{y}_{1:d} &= \mathbf{x}_{1:d}\\ 53 | \mathbf{y}_{d+1:D} &= \mathbf{x}_{d+1:D} \odot \exp\left(s(\mathbf{x}_{1:d})\right) +t(\mathbf{x}_{1:d}) , 54 | \end{align} 55 | where $s$ (scale) and $t$ (translation) are neural networks mapping $\mathbb{R}^d$ to $\mathbb{R}^{D-d}$ and $\odot$ is the element-wise product. 56 | 57 | For any functions $s$ and $t$, the affine coupling layer is invertible: 58 | \begin{align*} 59 | \begin{cases} 60 | \mathbf{y}_{1:d} &= \mathbf{x}_{1:d} \\ 61 | \mathbf{y}_{d+1:D} &= \mathbf{x}_{d+1:D} \odot \exp({s(\mathbf{x}_{1:d})}) + t(\mathbf{x}_{1:d}) 62 | \end{cases} 63 | \Leftrightarrow 64 | \begin{cases} 65 | \mathbf{x}_{1:d} &= \mathbf{y}_{1:d} \\ 66 | \mathbf{x}_{d+1:D} &= (\mathbf{y}_{d+1:D} - t(\mathbf{y}_{1:d})) \odot \exp(-s(\mathbf{y}_{1:d})) 67 | \end{cases} 68 | \end{align*} 69 | 70 | The Jacobian of an affine coupling layer is a lower triangular matrix: 71 | \begin{align*} 72 | J(\mathbf{x}) = \frac{\partial \mathbf{y}}{\partial \mathbf{x}}= 73 | \begin{bmatrix} 74 | \mathbb{I}_d & \mathbf{0}_{d\times(D-d)} \\[5pt] 75 | \frac{\partial \mathbf{y}_{d+1:D}}{\partial \mathbf{x}_{1:d}} & \text{diag}(\exp(s(\mathbf{x}_{1:d}))) 76 | \end{bmatrix} 77 | \end{align*} 78 | Hence the determinant is simply the product of terms on the diagonal: 79 | \begin{align*} 80 | \left\vert\det(J(\mathbf{x}))\right\vert 81 | = \prod_{j=1}^{D-d}\exp(s(\mathbf{x}_{1:d}))_j 82 | = \exp\left(\sum_{j=1}^{D-d} s(\mathbf{x}_{1:d})_j\right) 83 | \end{align*} 84 | Note that, we do not need to compute the Jacobian of $s$ or $t$ and to compute $f^{-1}$, we do not need to compute the inverse of $s$ or $t$ (which might not exist!). In other words, we can take arbitrary complex functions for $s$ and $t$. 85 | 86 | In one affine coupling layer, some dimensions (channels) remain unchanged. To make sure all the inputs have a chance to be altered, the model reverses the ordering in each layer so that different components are left unchanged. Following such an alternating pattern, the set of units which remain identical in one transformation layer are always modified in the next. 87 | 88 | This can be implemented with binary masks. First, we can extend the scale and neural networks to mappings form $\mathbb{R}^D$ to $\mathbb{R}^D$. Then taking a mask $\mathbf{b} = (1,\dots,1,0,\dots,0)$ with $d$ ones, so that we have for the affine layer: 89 | \begin{align*} 90 | \mathbf{y} = \mathbf{x} \odot \exp\big((1-\mathbf{b}) \odot s(\mathbf{b} \odot \mathbf{x})\big) + (1-\mathbf{b}) \odot t(\mathbf{b} \odot \mathbf{x}). 91 | \end{align*} 92 | Note that we have 93 | \begin{align*} 94 | \ln \left\vert\det(J(\mathbf{x}))\right\vert = \sum_{j=1}^{D} \Big((1-\mathbf{b})\odot s(\mathbf{b} \odot \mathbf{x})\Big)_j, 95 | \end{align*} 96 | and to invert the affine layer: 97 | \begin{align*} 98 | \mathbf{x} = \left( \mathbf{y} -(1-\mathbf{b}) \odot t(\mathbf{b} \odot \mathbf{y})\right)\odot \exp\left( -(1-\mathbf{b}) \odot s(\mathbf{b} \odot \mathbf{y})\right). 99 | \end{align*} 100 | Now we alternates the binary mask $\mathbf{b}$ from one coupling layer to the other. 101 | 102 | Note, that the formula given in the paper is slightly different: 103 | $$\mathbf{y} = \mathbf{b} \odot \mathbf{x} + (1 - \mathbf{b}) \odot \Big(\mathbf{x} \odot \exp\big(s(\mathbf{b} \odot \mathbf{x})\big) + t(\mathbf{b} \odot \mathbf{x})\Big),$$ 104 | but the 2 formulas give the same result! 105 | 106 | ## Implementation of Real NVP 107 | 108 | - you can now implement your [own NVP](https://github.com/dataflowr/notebooks/blob/master/Module9/Normalizing_flows_empty.ipynb) 109 | - and here is the [solution](https://github.com/dataflowr/notebooks/blob/master/Module9/Normalizing_flows_sol.ipynb) -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_10_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_13_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_13_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_17_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_17_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_19_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_19_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_23_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_23_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_27_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_27_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_29_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_29_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_31_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_31_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_33_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_33_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_35_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_35_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_37_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_37_1.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_39_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_39_0.png -------------------------------------------------------------------------------- /modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/GCN_inductivebias_spectral_files/GCN_inductivebias_spectral_7_0.png -------------------------------------------------------------------------------- /modules/extras/ODIN/original_optimal_shade.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/ODIN/original_optimal_shade.png -------------------------------------------------------------------------------- /modules/extras/attention/attention_bahdanau.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/attention/attention_bahdanau.png -------------------------------------------------------------------------------- /modules/extras/attention/attention_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/attention/attention_matrix.png -------------------------------------------------------------------------------- /modules/extras/attention/attention_matrix2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/attention/attention_matrix2.png -------------------------------------------------------------------------------- /modules/extras/attention/attention_translate.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/attention/attention_translate.jpeg -------------------------------------------------------------------------------- /modules/extras/attention/block_transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/attention/block_transformer.png -------------------------------------------------------------------------------- /modules/extras/attention/transformer_block_nocode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/attention/transformer_block_nocode.png -------------------------------------------------------------------------------- /modules/extras/attention/transformer_vizu.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/attention/transformer_vizu.gif -------------------------------------------------------------------------------- /modules/extras/clip/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/clip/diagram.png -------------------------------------------------------------------------------- /modules/extras/conv_files/deeplabcityscape.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/conv_files/deeplabcityscape.gif -------------------------------------------------------------------------------- /modules/extras/conv_files/jl_conv.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/conv_files/jl_conv.gif -------------------------------------------------------------------------------- /modules/extras/conv_files/jl_grad.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/conv_files/jl_grad.gif -------------------------------------------------------------------------------- /modules/extras/conv_files/target_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/conv_files/target_plot.png -------------------------------------------------------------------------------- /modules/extras/conv_files/training_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/conv_files/training_plot.png -------------------------------------------------------------------------------- /modules/extras/diffusions/ddpm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/diffusions/ddpm.png -------------------------------------------------------------------------------- /modules/extras/diffusions/diffusion_finetuning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/diffusions/diffusion_finetuning.png -------------------------------------------------------------------------------- /modules/extras/diffusions/energy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/diffusions/energy.png -------------------------------------------------------------------------------- /modules/extras/diffusions/horses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/diffusions/horses.png -------------------------------------------------------------------------------- /modules/extras/diffusions/mnist_diffusion.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/diffusions/mnist_diffusion.gif -------------------------------------------------------------------------------- /modules/extras/diffusions/mnist_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/diffusions/mnist_result.png -------------------------------------------------------------------------------- /modules/extras/diffusions/ships.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/diffusions/ships.png -------------------------------------------------------------------------------- /modules/extras/diffusions/trucks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/diffusions/trucks.png -------------------------------------------------------------------------------- /modules/extras/flows/Real_NVP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/flows/Real_NVP.png -------------------------------------------------------------------------------- /modules/extras/flows/three-generative-models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/flows/three-generative-models.png -------------------------------------------------------------------------------- /modules/extras/graph_invariant.md: -------------------------------------------------------------------------------- 1 | @def title="Exploiting Graph Invariants in Deep Learning" 2 | 3 | # Exploiting Graph Invariants in Deep Learning 4 | 5 | {{youtube_placeholder graphinv}} 6 | 7 | {{yt_tsp 48 0 Skip the french part!}} 8 | 9 | - [Slides](Prairie.pdf) 10 | - [code](https://github.com/mlelarge/graph_neural_net) 11 | - [Paper](https://openreview.net/forum?id=lxHgXYN4bwl) 12 | - Related post: [Invariant and equivariant layers with applications to GNN, PointNet and Transformers](https://dataflowr.github.io/website/modules/extras/invariant_equivariant/) -------------------------------------------------------------------------------- /modules/extras/graph_invariant/Prairie.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/graph_invariant/Prairie.pdf -------------------------------------------------------------------------------- /modules/extras/invariant_equivariant.md: -------------------------------------------------------------------------------- 1 | @def title = "Invariant and Equivariant layers" 2 | @def hasmath = true 3 | 4 | # Invariant and equivariant layers with applications to GNN, PointNet and Transformers 5 | 6 | __author: [Marc Lelarge](https://www.di.ens.fr/~lelarge/), course: [dataflowr](https://dataflowr.github.io/website/)__ 7 | 8 | date: April 23, 2021 9 | 10 | ## Invariant and equivariant functions 11 | 12 | As shown in the [module on GNN](https://dataflowr.github.io/website/modules/graph3/), invariant and equivariant functions are crucial for GNN. For example, the message passing GNN (MGNN) layer is defined by: 13 | $$ 14 | \label{eq:gnnlayer}h^{\ell+1}_i = f(h^\ell_i , \{\{ h^\ell_j\}\}_{j\sim i}), 15 | $$ 16 | where $i\sim j$ means that nodes $i$ and $j$ are neighbors and the function $f$ should not depend on the order of the elements in the multiset $\{\{ h^\ell_j\}\}_{j\sim i}$. This layer is applied in parallel to all nodes (with the same function $f$) producing a mapping from ${\bf h}^\ell = (h^\ell_1\dots, h^\ell_n)$ to $F({\bf h}^\ell) = {\bf h}^{\ell+1}$ with $F:\mathbb{R}^n \to \mathbb{R}^n$ where $n$ is the number of nodes in the graph (and only real hidden states are considered for simplicity). It is easy to see that $F$ is an equivariant function, i.e. permuting its input will permute its output. 17 | 18 | Another example of invariant and equivariant functions is given by the attention layer $\text{Attention}(Q,K,V) = Z$ defined for $Q$ a tensor of row queries, $K$ the keys and $V$ the values, $Q,K,V\in \mathbb{R}^{n\times d}$ by 19 | $$ 20 | Z_j = \sum_{i=1}^n \text{softmax}_i(Q_jK_i^T) V_i. 21 | $$ 22 | The queries are obtained from a tensor $X\in \mathbb{R}^{n\times c}$ by $Q= XW_Q^T$ and the keys and values are obtained from a tensor $X' \in \mathbb{R}^{n\times c'}$ by $K = X' W_K^T$ and $V = X' W_V^T$. 23 | We see that when the queries are fixed, the attention layer is invariant in the pair (keys, values): 24 | $$ 25 | Z_j = \sum_{i=1}^n \text{softmax}_{i}(Q_j K_{\sigma(i)}^T) V_{\sigma(i)}, 26 | $$ 27 | hence $\text{Attention}(X,X')$ is invariant in $X'$. Similarly, when the pair (keys, values) is fixed, the attention layer is equivariant in the queries: 28 | $$ 29 | Z_{\sigma(j)} = \sum_{i=1}^n \text{softmax}_{i}(Q_{\sigma(j)}K_{i}^T) V_{i}, 30 | $$ 31 | hence $\text{Attention}(X,X')$ is equivariant in $X$. 32 | If $X'=X$, we get the self-attention layer 33 | so that $\text{SelfAttention}(X) = \text{Attention}(X,X)$ is equivariant in $X$. 34 | 35 | In this post, we will **characterize invariant and equivariant functions** following the ideas given in the paper [Deep Sets](https://arxiv.org/abs/1703.06114). 36 | 37 | ## Representation of invariant and equivariant functions 38 | 39 | We start with some definitions. 40 | 41 | For a vector ${\bf x} = (x_1,\dots, x_n)\in \mathbb{R}^n$ and a permutation $\sigma \in \mathcal{S}_n$, we define 42 | $$ 43 | \sigma \star {\bf x} = (x_{\sigma^{-1}(1)},\dots, x_{\sigma^{-1}(n)}) 44 | $$ 45 | 46 | **Definitions:** 47 | - A function $f:\mathbb{R}^n\to \mathbb{R}$ is **invariant** if for all ${\bf x}$ and all $\sigma \in \mathcal{S}_n$, we have $f(\sigma \star {\bf x}) = f({\bf x})$. 48 | - A function $f:\mathbb{R}^n\to \mathbb{R}^n$ is **equivariant** if for all ${\bf x}$ and all $\sigma \in \mathcal{S}_n$, we have $f(\sigma \star {\bf x}) = \sigma \star f({\bf x})$. 49 | 50 | We can now state our main result: 51 | 52 | @@colbox-blue **Theorem** 53 | 54 | - **invariant case:** let $f:[0,1]^n \to \R$ be a continuous function. $f$ is invariant if and only if there are continuous functions $\phi: [0,1] \to \R^n$ and $\rho: \R^n\to \R$ such that 55 | $$ 56 | \label{eq:inv}f(\bx) = \rho\left( \sum_{i=1}^n \phi(x_i)\right) 57 | $$ 58 | 59 | - **equivariant case:** let $f:[0,1]^n \to \R^n$ be a continuous function. $f$ is equivariant if and only if there are continuous functions $\phi: [0,1] \to \R^n$ and $\rho: [0,1]\times \R^n\to \R$ such that 60 | $$ 61 | \label{eq:equiv}f_j(\bx) = \rho\left( x_j, \sum_{i=1}^n \phi(x_i)\right) 62 | $$ 63 | @@ 64 | 65 | We give some remarks before providing the proof below. For the sake of simplicity, we consider here a fixed number of points $n$ on the unit interval $[0,1]$. For results with a varying number of points, see [On the Limitations of Representing Functions on Sets](https://arxiv.org/abs/1901.09006) and for points in higher dimension $[0,1]^d$ with $d>1$, see [On Universal Equivariant Set Networks](https://arxiv.org/abs/1910.02421) and [Expressive Power of Invariant and Equivariant Graph Neural Networks](https://arxiv.org/abs/2006.15646). 66 | 67 | Our proof will make the mapping $\phi$ explicit and it will not depend on the function $f$. The mapping $\phi$ can be seen as an embedding of the points in $[0,1]$ in a space of high-dimension. Indeed this embedding space has to be of dimension at least the number of points $n$ in order to ensure universality. This is an important remark as in learning scenario, the size of the embedding is typically fixed and hence will limit the expressiveness of the algorithm. 68 | 69 | Coming back to the GNN layer \eqref{eq:gnnlayer}, our result on the invariant case tells us that we can always rewrite it as: 70 | $$ 71 | \label{eq:gnnlayer2}h^{\ell+1}_i =\rho\left( h_i^{\ell}, \sum_{j\sim i} \phi(h^\ell_j)\right), 72 | $$ 73 | and the dimension of the embedding $\phi(h)$ needs to be of the same order as the maximum degree in the graph. Note that \eqref{eq:gnnlayer2} is not of the form of \eqref{eq:equiv} as the sum inside the $\rho$ function is taken only on neighbors. Indeed, we know that message passing GNN are not universal (see [Expressive Power of Invariant and Equivariant Graph Neural Networks](https://arxiv.org/abs/2006.15646)). 74 | 75 | As a last remark, note that the original [PointNet](https://arxiv.org/abs/1612.00593) architecture $f$ is of the form $f_i(\bx) = \rho(x_i)$ which is not universal equivariant. Indeed, it is impossible to approximate the equivariant function $g_i(\bx) = \sum_i x_i$ as shown below (we denote $\be_1=(1,0,\dots,0)$): 76 | $$ 77 | \|f(0) - g(0)\|^2 = n \rho(0)^2\\ 78 | \|f(\be_1) -g(\be_1)\|^2 = (\rho(1)-1)^2 + (n-1)(\rho(0)-1)^2\geq (n-1)(\rho(0)-1)^2, 79 | $$ 80 | and these quantities cannot be small together. Hence PointNet is not universal equivariant but as shown in [On Universal Equivariant Set Networks](https://arxiv.org/abs/1910.02421), modifying PointNet by adding the term $ \sum_{i=1}^n \phi(x_i)$ inside the $\rho$ function as in \eqref{eq:equiv} makes it universal equivariant. We refer to [Are Transformers universal approximators of sequence-to-sequence functions?](https://arxiv.org/abs/1912.10077) for similar results about transformers based on self-attention. 81 | 82 | 83 | ## Proof of the Theorem 84 | 85 | We first show that the equivariant case is not more difficult than the invariant case. Assume that we proved the invariant case. Consider a permutation $\sigma\in \Sc_n$ such that $\sigma(1)=1$ so that $f(\sigma \star {\bf x}) = \sigma \star f({\bf x})$ gives for the first component: 86 | $$ 87 | f_1(x_1,x_{\sigma(2)},\dots, x_{\sigma(n)}) = f_1(x_1,x_2,\dots, x_n). 88 | $$ 89 | For any $x_1$, the mapping $(x_2,\dots, x_n) \mapsto f_1(x_1, x_2,\dots, x_n)$ is invariant. Hence by \eqref{eq:inv}, we have 90 | $$ 91 | f_1(x_1,x_2,\dots, x_n) = \rho\left(x_1, \sum_{i\neq 1}\phi(x_i) \right) 92 | $$ 93 | Now consider a permutation such that $\sigma(1)=k, \sigma(k)=1$ and $\sigma(i)=i$ for $i\neq 1,k$, then we have 94 | \begin{equation} 95 | f_k(x_1,x_2,\dots, x_n) = f_1(x_k,x_2\dots, x_1,\dots x_n), 96 | \end{equation} 97 | hence $f_k(x_1,x_2,\dots, x_n)=\rho\left(x_k, \sum_{i\neq k}\phi(x_i) \right)$ and \eqref{eq:equiv} follows. 98 | 99 | Hence, we only need to prove \eqref{eq:inv} and follow the proof given in [Deep Sets](https://arxiv.org/abs/1703.06114). We start with a crucial result stating that a set of $n$ real points is characterized by the first $n$ moments of its empirical measure. Let see what it means for $n=2$: we can recover the values of $x_1$ and $x_2$ from the quantities $p_1=x_1+x_2$ and $p_2=x_1^2+x_2^2$. To see that this is correct, note that 100 | $$ 101 | p_1^2 = x_1^2+2x_1x_2+x_2^2 = p_2+2x_1x_2, 102 | $$ 103 | so that $x_1x_2 = \frac{p_1^2-p_2}{2}$. As a result, we have 104 | $$ 105 | (x-x_1)(x-x_2) = x^2-p_1x+\frac{p_1^2-p_2}{2}, 106 | $$ 107 | and clearly $x_1$ and $x_2$ can be recovered as the roots of this polynomial whose coefficients are functions of $p_1$ and $p_2$. The result below extends this argument for a general $n$: 108 | @@colbox-blue **Proposition** 109 | 110 | Let $\Phi:[0,1]_{\leq}^n \to \mathbb{R}^{n}$, where $[0,1]_{\leq}^n = \{ \bx\in [0,1]^n,\: x_1\leq x_2\leq \dots\leq x_n\}$, be defined by 111 | $$ 112 | \Phi(x_1,x_2,\dots, x_n) = \left( \sum_i x_1, \sum_i x_i^2,\dots, \sum_i x_i^n\right) 113 | $$ 114 | is injective and has a continuous inverse mapping.@@ 115 | 116 | The proof follows from [Newton's identities](https://en.wikipedia.org/wiki/Newton%27s_identities). For $k\leq n$, we denote by $p_k = \sum_{i=1}^n x_i^k$ the power sums and by $e_k$ the [elementary symmetric polynomials](https://en.wikipedia.org/wiki/Elementary_symmetric_polynomial) (note that all polynomials are function of the $x_1,\dots, x_n$): 117 | \begin{equation} 118 | e_0 = 1\\ 119 | e_1 = \sum_i x_i\\ 120 | e_2 = \sum_{i < j} x_i x_j\\ \dots 121 | \end{equation} 122 | From Newton's identities, we have for $k\leq n$, 123 | $$ 124 | k e_k = \sum_{i=1}^k (-1)^{i-1}e_{k-i}p_i, 125 | $$ 126 | so that, we can express the elementary symmetric polynomials from the power sums: 127 | \begin{equation} 128 | e_1 = p_1\\ 129 | 2e_2 = e_1p_1-p_2=p_1^2-p_2\\ 130 | 3e_3 = e_2p_2-e_1p_2+p_3 = \frac{1}{2}p_1^3-\frac{3}{2}p_1p_2+p3\\ 131 | \dots 132 | \end{equation} 133 | Note that $\Phi(x_1,x_2,\dots, x_n) = (p_1,\dots, p_n)$ and since 134 | $$ 135 | \prod_{i=1}^n (x-x_i) = x^n -e_1x^{n-1}+e_2x^{n-2}\dots + (-1)^n e_n, 136 | $$ 137 | if $\Phi(\bx) = \Phi(\by)$ then $\prod_{i=1}^n (x-x_i)=\prod_{i=1}^n (x-y_i)$ so that $\{\{x_1,\dots, x_n\}\} = \{\{y_1,\dots, y_n\}\}$ and $\bx=\by \in [0,1]^n_{\leq}$, showing that $\Phi$ is injective. 138 | 139 | Hence we proved that $\Phi:[0,1]^n_{\leq} \to \text{Im}(\Phi)$ where $\text{Im}(\Phi)$ is the image of $\Phi$, is a bijection. We need now to prove that $\Phi^{-1}$ is continuous and we'll prove it directly. Let $\by_k \to \by \in\text{Im}(\Phi)$, we need to show that $\Phi^{-1}(\by_k) \to \Phi^{-1}(\by)$. Now if $\Phi^{-1}(\by_k) \not\to \Phi^{-1}(\by)$, since $[0,1]^M_{\leq}$ is compact, this means that there exists a convergent subsequence of $\Phi^{-1}(\by_{k})$ with $\Phi^{-1}(\by_{m_k}) \to \bx\neq \Phi^{-1}(\by) $. But by continuity of $\Phi$, we have $\by_{m_k} \to \Phi(\bx) = \by$, so that we get a contradiction and hence proved the continuity of $\Phi^{-1}$, finishing the proof of the proposition. 140 | 141 | We are now ready to prove \eqref{eq:inv}. Let $\phi:[0,1] \to \R^n$ be defined by $\phi(x) = (x,x^2,\dots, x^n)$ and $\rho = f\circ \Phi^{-1}$. Note that $\rho: \text{Im}(\Phi) \to \R$ and $\sum_{i}\phi(x_i) = \Phi(\bx_{\leq})$, where $\bx_{\leq}$ is the vector $\bx$ with components sorted in non-decreasing order. Hence as soon as f is invariant, we have $f(\bx) = f(\bx_{\leq})$ so that \eqref{eq:inv} is valid. We only need to extend the function $\rho$ from the domain $\text{Im}(\Phi)$ to $\R^n$ in a continuous way. This can be done by considering the projection $\pi$ on the compact $\text{Im}(\Phi)$ and define $\rho(\bx) = f\circ \Phi^{-1}(\pi(\bx))$. 142 | 143 | Follow on [twitter](https://twitter.com/marc_lelarge)! 144 | 145 | ## Thanks for reading! -------------------------------------------------------------------------------- /modules/extras/jupyterlab.md: -------------------------------------------------------------------------------- 1 | # JupyterLab 2 | 3 | This post explains how to install and configure 4 | [JupyterLab](https://jupyterlab.readthedocs.io/en/stable/). 5 | 6 | ## Installation 7 | 8 | If you are using virtual environments it's preferable to install JupyterLab 9 | outside a virtual environment and add them later as kernels. 10 | 11 | JupyterLab can be installed from `pip`: 12 | 13 | ```bash 14 | pip3 instal jupyterlab 15 | ``` 16 | 17 | Then launch it with the following command: 18 | 19 | ```bash 20 | jupyter-lab 21 | ``` 22 | 23 | If you are used to using tmux, you can run JupyterLab in the background with 24 | the following command: 25 | 26 | ```bash 27 | # launch tmux session in the background 28 | tmux new -d -s jupyter "jupyter-lab --no-browser" 29 | 30 | # attach to the session when you want to 31 | tmux attach-session -t jupyter 32 | ``` 33 | 34 | ## Attaching kernels 35 | 36 | ### Python virtual environment kernel 37 | 38 | Kernels in JupyterLab are installed exactly like in regular Jupyter Notebooks. 39 | 40 | We are going to create a first kernel based on Python 3 and we are going to 41 | name it "Python (data-science)". 42 | 43 | ```bash 44 | # create virtual env based on python3 45 | virtualenv data-science -p python3 46 | 47 | # activate it 48 | source data-science/bin/activate 49 | 50 | # install ipykernel within it 51 | pip install ipykernel 52 | 53 | # install the kernel into jupyter and give it a display name 54 | python -m ipykernel install --user --name data-science --display-name "Python (data-science)" 55 | ``` 56 | 57 | ### Julia kernel 58 | 59 | You can use JupyterLab with other languages than Python, like Julia. 60 | 61 | ```julia 62 | # install IJulia package (within Julia interpreter) 63 | Pkg.add("IJulia") 64 | ``` 65 | 66 | Now reload JupyterLab and you should see that a new Julia kernel appeared. 67 | 68 | ## JupyterLab extensions 69 | 70 | We can add widgets to JupyterLab, for instance: 71 | 72 | - [Table of contents](https://github.com/jupyterlab/jupyterlab-toc): 73 | automatically generate a table of contents for your 74 | notebook, based on the markdown sections you wrote 75 | - [Git](https://github.com/jupyterlab/jupyterlab-git): 76 | If working within a git repository, directly add & commit from 77 | the Jupyter Lab interface 78 | - [Latex](https://github.com/jupyterlab/jupyterlab-latex): 79 | Write and compile Latex files from Jupyter Lab 80 | - [Templates](https://pypi.org/project/jupyterlab-templates/): 81 | Lets you start a notebook from a template 82 | 83 | Extensions can be installed from the command line by using the 84 | `jupyter-labextension` command. All these commands must be run 85 | outside of any virtual environment (use the same python in which 86 | you installed jupyterlab). 87 | 88 | ### [Table of contents](https://github.com/jupyterlab/jupyterlab-toc) 89 | 90 | This widget generates a table of content from your notebook. 91 | 92 | Install it with: 93 | 94 | ```bash 95 | jupyter-labextension install @jupyterlab/toc 96 | ``` 97 | 98 | You'll see a new tab appear, when you click on it, it'll show 99 | the table of content. The sections are automatically created based on the 100 | markdown sections. Note that you can either enable automatic numbering or not. 101 | 102 | ### [Git](https://github.com/jupyterlab/jupyterlab-git): 103 | 104 | If you're tired of running back and forth between your terminal and Jupyter 105 | Lab to commit your code, consider this extension that brings a Git interface 106 | to Jupyter Lab. 107 | 108 | Install it with: 109 | 110 | ```bash 111 | pip3 install jupyterlab-git 112 | jupyter-labextension install @jupyterlab/git 113 | ``` 114 | 115 | As usual, a new tab appears that lets you commit & push directly 116 | from JupyterLab. 117 | 118 | ### [Latex](https://github.com/jupyterlab/jupyterlab-latex) 119 | 120 | Use this plugin if you wish to use Jupyter Lab for compiling Latex documents. 121 | 122 | Install it with: 123 | 124 | ```bash 125 | pip3 install jupyterlab_latex 126 | jupyter-labextension install @jupyterlab/latex 127 | ``` 128 | 129 | Now when you are editing a Latex file, a right click on the document will 130 | show an option that lets you render the file to pdf. 131 | 132 | ### [Templates](https://pypi.org/project/jupyterlab-templates/) 133 | 134 | Sometimes you might feel like you always copy paste the same lines of code 135 | at the beginning of a new notebook. If you are, consider using notebook 136 | templates with this plugin. 137 | 138 | Install it with: 139 | 140 | ```bash 141 | pip3 install jupyterlab_templates 142 | jupyter labextension install jupyterlab_templates 143 | jupyter serverextension enable --py jupyterlab_templates 144 | ``` 145 | 146 | Then you will see a new "Template" icon next to your kernels. If you click on 147 | it, you will be asked which template you want to use, and it will create 148 | a notebook based on this template. You can create your own templates by 149 | saving notebooks in the template directory. You also get to choose which 150 | directory the plugin will pick the templates from. 151 | -------------------------------------------------------------------------------- /modules/extras/nerf/pipeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/nerf/pipeline.jpg -------------------------------------------------------------------------------- /modules/extras/unet/unet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataflowr/website/5bd8ed8dc714e9ea7f926604beb4c9b83975100d/modules/extras/unet/unet.png -------------------------------------------------------------------------------- /modules/graph0.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["graph0"] 2 | 3 | # Module - Deep Learning on graphs 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | ## Introduction 11 | 12 | - Slides for a short [overview](https://dataflowr.github.io/slides/deep_graph_0.html) 13 | 14 | ## Node embedding 15 | 16 | - Course: [Node embedding](https://dataflowr.github.io/slides/deep_graph_1.html) 17 | 18 | ## Signal processing on graphs 19 | 20 | - Course: [Signal processing on graphs](https://dataflowr.github.io/slides/deep_graph_2.html) 21 | - Related post: [Inductive bias in GCN: a spectral perspective](https://dataflowr.github.io/website/modules/extras/GCN_inductivebias_spectral/) 22 | 23 | ## Graph embedding 24 | 25 | - Course:[Graph embedding](https://dataflowr.github.io/slides/deep_graph_3.html) 26 | - Related post: [Invariant and equivariant layers with applications to GNN, PointNet and Transformers](https://dataflowr.github.io/website/modules/extras/invariant_equivariant/) 27 | 28 | ## More advanced material 29 | 30 | - [Exploiting Graph Invariants in Deep Learning](https://dataflowr.github.io/website/modules/extras/graph_invariant) 31 | 32 | -------------------------------------------------------------------------------- /modules/graph1.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["graph1"] 2 | 3 | # Module - Deep Learning on graphs (1) 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Node embedding 12 | 13 | {{youtube_placeholder graph1}} 14 | {{yt_tsp 0 0 Introduction}} 15 | {{yt_tsp 132 0 Language model}} 16 | {{yt_tsp 304 0 Skip-gram model}} 17 | {{yt_tsp 524 0 Hierarchical softmax}} 18 | {{yt_tsp 679 0 DeepWalk}} 19 | {{yt_tsp 866 0 Negative sampling}} 20 | {{yt_tsp 1150 0 node2vec}} 21 | {{yt_tsp 1348 0 results on les Misérables}} 22 | {{yt_tsp 1510 0 results for multi-label classification}} 23 | 24 | ## Slides 25 | 26 | - [slides](https://dataflowr.github.io/slides/deep_graph_1.html) -------------------------------------------------------------------------------- /modules/graph2.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["graph2"] 2 | 3 | # Module - Deep Learning on graphs (2) 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Signal processing on graphs 12 | 13 | {{youtube_placeholder graph2}} 14 | {{yt_tsp 0 0 Introduction}} 15 | {{yt_tsp 100 0 Signal processing on graphs}} 16 | {{yt_tsp 184 0 Recap on Fourier analysis}} 17 | {{yt_tsp 304 0 Spectral graph theory}} 18 | {{yt_tsp 824 0 Graph Fourier analysis}} 19 | {{yt_tsp 998 0 Filtering}} 20 | {{yt_tsp 1113 0 Filtering on graphs}} 21 | {{yt_tsp 1321 0 Learning a localized kernel}} 22 | {{yt_tsp 1503 0 Chebyshev polynomials}} 23 | {{yt_tsp 1828 0 Convolutional neural networks on graphs}} 24 | 25 | 26 | ## Slides 27 | 28 | - [slides](https://dataflowr.github.io/slides/deep_graph_2.html) 29 | 30 | ## Notebook 31 | 32 | - [notebook](https://github.com/dataflowr/notebooks/blob/master/graphs/spectral_gnn.ipynb) in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/graphs/spectral_gnn.ipynb) 33 | 34 | ## Posts 35 | 36 | - [Inductive bias in GCN: a spectral perspective](../extras/GCN_inductivebias_spectral/) (run the [code](https://github.com/dataflowr/notebooks/blob/master/graphs/GCN_inductivebias_spectral.ipynb) or open it in [Colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/graphs/GCN_inductivebias_spectral-colab.ipynb)) -------------------------------------------------------------------------------- /modules/graph3.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["graph3"] 2 | 3 | # Module - Deep Learning on graphs (3) 4 | 5 | 6 | **Table of Contents** 7 | 8 | \toc 9 | 10 | 11 | ## Graph embedding 12 | 13 | {{youtube_placeholder graph3}} 14 | {{yt_tsp 0 0 Introduction}} 15 | {{yt_tsp 90 0 Graph embedding}} 16 | {{yt_tsp 163 0 How to represent graphs?}} 17 | {{yt_tsp 238 0 Why graph symmetries matter?}} 18 | {{yt_tsp 505 0 Invariant and equivariant functions}} 19 | {{yt_tsp 750 0 Message passing GNN}} 20 | {{yt_tsp 962 0 The many flavors of MGNN}} 21 | {{yt_tsp 1200 0 Separating power}} 22 | {{yt_tsp 1371 0 2-Weisfeiler-Lehman test}} 23 | {{yt_tsp 1619 0 How powerful are MGNN}} 24 | {{yt_tsp 1707 0 Empirical results}} 25 | {{yt_tsp 1750 0 Graphs as higher order tensors}} 26 | {{yt_tsp 1905 0 Invariant and equivariant linear operator}} 27 | {{yt_tsp 2147 0 Invariant linear GNN}} 28 | {{yt_tsp 2298 0 Folklore GNN}} 29 | 30 | ## Slides 31 | 32 | - [slides](https://dataflowr.github.io/slides/deep_graph_3.html) 33 | 34 | ## Post 35 | 36 | - [Invariant and equivariant layers with applications to GNN, PointNet and Transformers](../extras/invariant_equivariant) -------------------------------------------------------------------------------- /modules/privacy-preserving-ML.md: -------------------------------------------------------------------------------- 1 | @def sequence = ["privacy-preserving"] 2 | 3 | # Module - Privacy Preserving Machine Learning 4 | 5 | by [Daniel Huynh](https://github.com/dhuynh95) 6 | 7 | **Table of Contents** 8 | 9 | \toc 10 | 11 | 12 | ## Privacy Preserving Machine Learning 13 | 14 | {{youtube_placeholder privacy-preserving}} 15 | {{yt_tsp 0 0 Presentation}} 16 | {{yt_tsp 170 0 Context and cloud data threads}} 17 | {{yt_tsp 315 0 Confidential Computing (CC)}} 18 | {{yt_tsp 432 0 Intel SGX}} 19 | {{yt_tsp 520 0 Enclave}} 20 | {{yt_tsp 739 0 Azure Attestation Service}} 21 | {{yt_tsp 805 0 Use cases}} 22 | {{yt_tsp 890 0 Abdstraction layers for enclaves}} 23 | {{yt_tsp 957 0 Open enclave SDK}} 24 | {{yt_tsp 987 0 Lightweight OS + Demo (Graphene SGX)}} 25 | {{yt_tsp 1424 0 Multi-party machine learning}} 26 | {{yt_tsp 1610 0 Q&A}} 27 | {{yt_tsp 2006 0 Homomorphic Encryption (HE)}} 28 | {{yt_tsp 2240 0 CKKS encoder}} 29 | {{yt_tsp 2489 0 Homomorphic Encryption high-level view}} 30 | {{yt_tsp 2544 0 Homomorphic Encryption in practice}} 31 | {{yt_tsp 2717 0 Demo with TenSEAL}} 32 | {{yt_tsp 3025 0 Demo Homomorphic Random Forests}} 33 | {{yt_tsp 3698 0 to go beyond}} 34 | {{yt_tsp 3748 0 Secure Multi-Party Computing (MPC)}} 35 | {{yt_tsp 4078 0 Conclusion}} 36 | 37 | ## Slides and code 38 | 39 | - [slides](https://dataflowr.github.io/slides/privacy_preserving_ML_Daniel_Huynh.pdf) 40 | - [Cryptotree: Homomorphic Random Forests](https://github.com/dhuynh95/cryptotree) 41 | 42 | ## to go beyond 43 | 44 | - [Homomorphic Encryption](https://towardsdatascience.com/homomorphic-encryption-intro-part-1-overview-and-use-cases-a601adcff06c) -------------------------------------------------------------------------------- /notebooks_md/02a_basics.md: -------------------------------------------------------------------------------- 1 | [![Dataflowr](https://raw.githubusercontent.com/dataflowr/website/master/_assets/dataflowr_logo.png)](https://dataflowr.github.io/website/) 2 | 3 | You are viewing the static version of the notebook, you can get the [code (GitHub)](https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb) or run it in [colab](https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb) 4 | 5 | You can also do the [quizzes](https://dataflowr.github.io/quiz/module2a.html) 6 | 7 | # Module 2: PyTorch tensors and automatic differentiation 8 | 9 | [Video timestamp](https://youtu.be/BmAS8IH7n3c?t=103) 10 | 11 | ```python 12 | import matplotlib.pyplot as plt 13 | %matplotlib inline 14 | import torch 15 | import numpy as np 16 | ``` 17 | 18 | ```python 19 | torch.__version__ 20 | ``` 21 | 22 | Tensors are used to encode the signal to process, but also the internal states and parameters of models. 23 | 24 | **Manipulating data through this constrained structure allows to use CPUs and GPUs at peak performance.** 25 | 26 | Construct a 3x5 matrix, uninitialized: 27 | 28 | ```python 29 | x = torch.empty(3,5) 30 | print(x.dtype) 31 | print(x) 32 | ``` 33 | 34 | If you got an error this [stackoverflow link](https://stackoverflow.com/questions/50617917/overflow-when-unpacking-long-pytorch) might be useful... 35 | 36 | ```python 37 | x = torch.randn(3,5) 38 | print(x) 39 | ``` 40 | 41 | ```python 42 | print(x.size()) 43 | ``` 44 | 45 | torch.Size is in fact a [tuple](https://docs.python.org/3/tutorial/datastructures.html#tuples-and-sequences), so it supports the same operations. 46 | 47 | [Video timestamp](https://youtu.be/BmAS8IH7n3c?t=272) 48 | 49 | ```python 50 | x.size()[1] 51 | ``` 52 | 53 | ```python 54 | x.size() == (3,5) 55 | ``` 56 | 57 | ### Bridge to numpy 58 | 59 | [Video timestamp](https://youtu.be/BmAS8IH7n3c?t=325) 60 | 61 | ```python 62 | y = x.numpy() 63 | print(y) 64 | ``` 65 | 66 | ```python 67 | a = np.ones(5) 68 | b = torch.from_numpy(a) 69 | print(a.dtype) 70 | print(b) 71 | ``` 72 | 73 | ```python 74 | c = b.long() 75 | print(c.dtype, c) 76 | print(b.dtype, b) 77 | ``` 78 | 79 | ```python 80 | xr = torch.randn(3, 5) 81 | print(xr.dtype, xr) 82 | ``` 83 | 84 | ```python 85 | resb = xr + b 86 | resb 87 | ``` 88 | 89 | ```python 90 | resc = xr + c 91 | resc 92 | ``` 93 | 94 | Be careful with types! 95 | 96 | ```python 97 | resb == resc 98 | ``` 99 | 100 | ```python 101 | torch.set_printoptions(precision=10) 102 | ``` 103 | 104 | ```python 105 | resb[0,1] 106 | ``` 107 | 108 | ```python 109 | resc[0,1] 110 | ``` 111 | 112 | ```python 113 | resc[0,1].dtype 114 | ``` 115 | 116 | ```python 117 | xr[0,1] 118 | ``` 119 | 120 | ```python 121 | torch.set_printoptions(precision=4) 122 | ``` 123 | 124 | ### [Broadcasting](https://docs.scipy.org/doc/numpy-1.13.0/user/basics.broadcasting.html) 125 | 126 | [Video timestamp](https://youtu.be/BmAS8IH7n3c?t=670) 127 | 128 | Broadcasting automagically expands dimensions by replicating coefficients, when it is necessary to perform operations. 129 | 130 | 1. If one of the tensors has fewer dimensions than the other, it is reshaped by adding as many dimensions of size 1 as necessary in the front; then 131 | 2. for every mismatch, if one of the two tensor is of size one, it is expanded along this axis by replicating coefficients. 132 | 133 | If there is a tensor size mismatch for one of the dimension and neither of them is one, the operation fails. 134 | 135 | ```python 136 | A = torch.tensor([[1.], [2.], [3.], [4.]]) 137 | print(A.size()) 138 | B = torch.tensor([[5., -5., 5., -5., 5.]]) 139 | print(B.size()) 140 | C = A + B 141 | ``` 142 | 143 | ```python 144 | C 145 | ``` 146 | 147 | The original (column-)vector 148 | \begin{eqnarray} 149 | A = \left( \begin{array}{c} 150 | 1\\ 151 | 2\\ 152 | 3\\ 153 | 4\\ 154 | \end{array}\right) 155 | \end{eqnarray} 156 | is transformed into the matrix 157 | \begin{eqnarray} 158 | A = \left( \begin{array}{ccccc} 159 | 1&1&1&1&1\\ 160 | 2&2&2&2&2\\ 161 | 3&3&3&3&3\\ 162 | 4&4&4&4&4 163 | \end{array}\right) 164 | \end{eqnarray} 165 | and the original (row-)vector 166 | \begin{eqnarray} 167 | C = (5,-5,5,-5,5) 168 | \end{eqnarray} 169 | is transformed into the matrix 170 | \begin{eqnarray} 171 | C = \left( \begin{array}{ccccc} 172 | 5&-5&5&-5&5\\ 173 | 5&-5&5&-5&5\\ 174 | 5&-5&5&-5&5\\ 175 | 5&-5&5&-5&5 176 | \end{array}\right) 177 | \end{eqnarray} 178 | so that summing these matrices gives: 179 | \begin{eqnarray} 180 | A+C = \left( \begin{array}{ccccc} 181 | 6&-4&6&-4&6\\ 182 | 7&-3&7&-3&7\\ 183 | 8&-2&8&-2&8\\ 184 | 9&-1&9&-1&9 185 | \end{array}\right) 186 | \end{eqnarray} 187 | 188 | ### In-place modification 189 | 190 | [Video timestamp](https://youtu.be/BmAS8IH7n3c?t=875) 191 | 192 | ```python 193 | x 194 | ``` 195 | 196 | ```python 197 | xr 198 | ``` 199 | 200 | ```python 201 | print(x+xr) 202 | ``` 203 | 204 | ```python 205 | x.add_(xr) 206 | print(x) 207 | ``` 208 | 209 | Any operation that mutates a tensor in-place is post-fixed with an `_` 210 | 211 | For example: `x.fill_(y)`, `x.t_()`, will change `x`. 212 | 213 | ```python 214 | print(x.t()) 215 | ``` 216 | 217 | ```python 218 | x.t_() 219 | print(x) 220 | ``` 221 | 222 | ### Shared memory 223 | 224 | [Video timestamp](https://youtu.be/BmAS8IH7n3c?t=990) 225 | 226 | Also be careful, changing the torch tensor modify the numpy array and vice-versa... 227 | 228 | This is explained in the PyTorch documentation [here](https://pytorch.org/docs/stable/torch.html#torch.from_numpy): 229 | The returned tensor by `torch.from_numpy` and ndarray share the same memory. Modifications to the tensor will be reflected in the ndarray and vice versa. 230 | 231 | ```python 232 | a = np.ones(5) 233 | b = torch.from_numpy(a) 234 | print(b) 235 | ``` 236 | 237 | ```python 238 | a[2] = 0 239 | print(b) 240 | ``` 241 | 242 | ```python 243 | b[3] = 5 244 | print(a) 245 | ``` 246 | 247 | ### Cuda 248 | 249 | [Video timestamp](https://youtu.be/BmAS8IH7n3c?t=1120) 250 | 251 | ```python 252 | torch.cuda.is_available() 253 | ``` 254 | 255 | ```python 256 | #device = torch.device('cpu') 257 | device = torch.device('cuda') # Uncomment this to run on GPU 258 | ``` 259 | 260 | ```python 261 | x.device 262 | ``` 263 | 264 | ```python 265 | # let us run this cell only if CUDA is available 266 | # We will use ``torch.device`` objects to move tensors in and out of GPU 267 | if torch.cuda.is_available(): 268 | y = torch.ones_like(x, device=device) # directly create a tensor on GPU 269 | x = x.to(device) # or just use strings ``.to("cuda")`` 270 | z = x + y 271 | print(z,z.type()) 272 | print(z.to("cpu", torch.double)) # ``.to`` can also change dtype together! 273 | ``` 274 | 275 | ```python 276 | x = torch.randn(1) 277 | x = x.to(device) 278 | ``` 279 | 280 | ```python 281 | x.device 282 | ``` 283 | 284 | ```python 285 | # the following line is only useful if CUDA is available 286 | x = x.data 287 | print(x) 288 | print(x.item()) 289 | print(x.cpu().numpy()) 290 | ``` 291 | 292 | # Simple interfaces to standard image data-bases 293 | 294 | [Video timestamp](https://youtu.be/BmAS8IH7n3c?t=1354) 295 | 296 | An example, the [CIFAR10](https://pytorch.org/docs/stable/torchvision/datasets.html#torchvision.datasets.CIFAR10) dataset. 297 | 298 | ```python 299 | import torchvision 300 | 301 | data_dir = 'content/data' 302 | 303 | cifar = torchvision.datasets.CIFAR10(data_dir, train = True, download = True) 304 | cifar.data.shape 305 | ``` 306 | 307 | Documentation about the [`permute`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.permute) operation. 308 | 309 | ```python 310 | x = torch.from_numpy(cifar.data).permute(0,3,1,2).float() 311 | x = x / 255 312 | print(x.type(), x.size(), x.min().item(), x.max().item()) 313 | ``` 314 | 315 | Documentation about the [`narrow(input, dim, start, length)`](https://pytorch.org/docs/stable/torch.html#torch.narrow) operation. 316 | 317 | ```python 318 | # Narrows to the first images, converts to float 319 | x = torch.narrow(x, 0, 0, 48) 320 | ``` 321 | 322 | ```python 323 | x.shape 324 | ``` 325 | 326 | ```python 327 | # Showing images 328 | def show(img): 329 | npimg = img.numpy() 330 | plt.figure(figsize=(20,10)) 331 | plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest') 332 | 333 | show(torchvision.utils.make_grid(x, nrow = 12)) 334 | ``` 335 | 336 | ```python 337 | # Kills the green and blue channels 338 | x.narrow(1, 1, 2).fill_(0) 339 | show(torchvision.utils.make_grid(x, nrow = 12)) 340 | ``` 341 | 342 | # Autograd: automatic differentiation 343 | 344 | [Video timestamp](https://youtu.be/Z6H3zakmn6E?t=40) 345 | 346 | When executing tensor operations, PyTorch can automatically construct on-the-fly the graph of operations to compute the gradient of any quantity with respect to any tensor involved. 347 | 348 | To be more concrete, we introduce the following example: we consider parameters $w\in \mathbb{R}$ and $b\in \mathbb{R}$ with the corresponding function: 349 | \begin{eqnarray*} 350 | \ell = \left(\exp(wx+b) - y^* \right)^2 351 | \end{eqnarray*} 352 | 353 | Our goal here, will be to compute the following partial derivatives: 354 | \begin{eqnarray*} 355 | \frac{\partial \ell}{\partial w}\mbox{ and, }\frac{\partial \ell}{\partial b}. 356 | \end{eqnarray*} 357 | 358 | The reason for doing this will be clear when you will solve the practicals for this lesson! 359 | 360 | You can decompose this function as a composition of basic operations. This is call the forward pass on the graph of operations. 361 | ![backprop1](https://dataflowr.github.io/notebooks/Module2/img/backprop1.png) 362 | 363 | Let say we start with our model in `numpy`: 364 | 365 | ```python 366 | w = np.array([0.5]) 367 | b = np.array([2]) 368 | xx = np.array([0.5])#np.arange(0,1.5,.5) 369 | ``` 370 | 371 | transform these into `tensor`: 372 | 373 | ```python 374 | xx_t = torch.from_numpy(xx) 375 | w_t = torch.from_numpy(w) 376 | b_t = torch.from_numpy(b) 377 | ``` 378 | 379 | [Video timestamp](https://youtu.be/Z6H3zakmn6E?t=224) 380 | 381 | A `tensor` has a Boolean field `requires_grad`, set to `False` by default, which states if PyTorch should build the graph of operations so that gradients with respect to it can be computed. 382 | 383 | ```python 384 | w_t.requires_grad 385 | ``` 386 | 387 | We want to take derivative with respect to $w$ so we change this value: 388 | 389 | ```python 390 | w_t.requires_grad_(True) 391 | ``` 392 | 393 | We want to do the same thing for $b$ but the following line will produce an error! 394 | 395 | ```python 396 | b_t.requires_grad_(True) 397 | ``` 398 | 399 | Reading the error message should allow you to correct the mistake! 400 | 401 | ```python 402 | dtype = torch.float64 403 | ``` 404 | 405 | ```python 406 | b_t = b_t.type(dtype) 407 | ``` 408 | 409 | ```python 410 | b_t.requires_grad_(True) 411 | ``` 412 | 413 | [Video timestamp](https://youtu.be/Z6H3zakmn6E?t=404) 414 | 415 | We now compute the function: 416 | 417 | ```python 418 | def fun(x,ystar): 419 | y = torch.exp(w_t*x+b_t) 420 | print(y) 421 | return torch.sum((y-ystar)**2) 422 | 423 | ystar_t = torch.randn_like(xx_t) 424 | l_t = fun(xx_t,ystar_t) 425 | ``` 426 | 427 | ```python 428 | l_t 429 | ``` 430 | 431 | ```python 432 | l_t.requires_grad 433 | ``` 434 | 435 | After the computation is finished, i.e. *forward pass*, you can call `.backward()` and have all the gradients computed automatically. 436 | 437 | ```python 438 | print(w_t.grad) 439 | ``` 440 | 441 | ```python 442 | l_t.backward() 443 | ``` 444 | 445 | ```python 446 | print(w_t.grad) 447 | print(b_t.grad) 448 | ``` 449 | 450 | [Video timestamp](https://youtu.be/Z6H3zakmn6E?t=545) 451 | 452 | Let's try to understand these numbers... 453 | 454 | ![backprop2](https://dataflowr.github.io/notebooks/Module2/img/backprop2.png) 455 | 456 | ```python 457 | yy_t = torch.exp(w_t*xx_t+b_t) 458 | print(torch.sum(2*(yy_t-ystar_t)*yy_t*xx_t)) 459 | print(torch.sum(2*(yy_t-ystar_t)*yy_t)) 460 | ``` 461 | 462 | `tensor.backward()` accumulates the gradients in the `grad` fields of tensors. 463 | 464 | ```python 465 | l_t = fun(xx_t,ystar_t) 466 | l_t.backward() 467 | ``` 468 | 469 | ```python 470 | print(w_t.grad) 471 | print(b_t.grad) 472 | ``` 473 | 474 | By default, `backward` deletes the computational graph when it is used so that you will get an error below: 475 | 476 | ```python 477 | l_t.backward() 478 | ``` 479 | 480 | ```python 481 | # Manually zero the gradients 482 | w_t.grad.data.zero_() 483 | b_t.grad.data.zero_() 484 | l_t = fun(xx_t,ystar_t) 485 | l_t.backward(retain_graph=True) 486 | l_t.backward() 487 | print(w_t.grad) 488 | print(b_t.grad) 489 | ``` 490 | 491 | The gradients must be set to zero manually. Otherwise they will cumulate across several _.backward()_ calls. 492 | This accumulating behavior is desirable in particular to compute the gradient of a loss summed over several “mini-batches,” or the gradient of a sum of losses. 493 | 494 | [![Dataflowr](https://raw.githubusercontent.com/dataflowr/website/master/_assets/dataflowr_logo.png)](https://dataflowr.github.io/website/) 495 | -------------------------------------------------------------------------------- /utils.jl: -------------------------------------------------------------------------------- 1 | function hfun_bar(vname) 2 | val = Meta.parse(vname[1]) 3 | return round(sqrt(val), digits=2) 4 | end 5 | 6 | function hfun_m1fill(vname) 7 | var = vname[1] 8 | return pagevar("index", var) 9 | end 10 | 11 | function lx_baz(com, _) 12 | # keep this first line 13 | brace_content = Franklin.content(com.braces[1]) # input string 14 | # do whatever you want here 15 | return uppercase(brace_content) 16 | end 17 | 18 | # Thanks @tlienart 19 | 20 | using Franklin, JSON 21 | 22 | using Markdown, Dates 23 | 24 | include("youtube_videos.jl") 25 | 26 | const DATEFMT = dateformat"yyyy-mm-dd HH:MMp" 27 | const TZ = "America/New_York" 28 | 29 | function hfun_doc(params) 30 | fname = join(params[1:max(1, length(params)-2)], " ") 31 | head = params[end-1] 32 | type = params[end] 33 | doc = eval(Meta.parse("@doc $fname")) 34 | txt = Markdown.plain(doc) 35 | # possibly further processing here 36 | body = Franklin.fd2html(txt, internal=true) 37 | return """ 38 |
39 |

40 | $head 41 |
$type

42 |
$body
43 |
44 | """ 45 | end 46 | 47 | function hfun_youtube_placeholder(params) 48 | id = params[1] 49 | return """ 50 |
51 |
52 |
53 | 111 | """ 112 | end 113 | 114 | function hfun_yt_tsp(params) 115 | start = params[1] 116 | final = params[2] 117 | s = Time(0) + Second(start) 118 | if second(s) < 10 119 | display = "$(minute(s)):0$(second(s))" 120 | elseif hour(s) < 1 121 | display = "$(minute(s)):$(second(s))" 122 | else 123 | display = display = "$(hour(s)):0$(minute(s)):$(second(s))" 124 | end 125 | title = join(params[3:end], " ") 126 | return """ 127 |
128 | $display $title 129 | """ 130 | end 131 | 132 | function hfun_youtube(params) 133 | id = params[1] 134 | return """ 135 | 140 | """ 141 | end 142 | 143 | function hfun_youtube_start(params) 144 | id = params[1] 145 | start = params[2] 146 | return """ 147 | 152 | """ 153 | end 154 | 155 | function hfun_showtime(params) 156 | id = params[1] 157 | str = locvar(id) 158 | if isnothing(str) 159 | @warn "Unknown datetime variable $str" 160 | return "" 161 | end 162 | try 163 | DateTime(str, DATEFMT) 164 | catch err 165 | @warn "There was an error parsing date $str, the format is yyyy-mm-dd HH:MMp (see ?DateFormat)" 166 | rethrow(err) 167 | end 168 | end 169 | 170 | 171 | function parse_duration(str) 172 | str = replace(str, r"^PT"=>"") 173 | hrex, mrex, srex = Regex.(string.("^([0-9]+)", ["H","M","S"])) 174 | 175 | t = 0 176 | hmatch = match(hrex, str) 177 | if !isnothing(hmatch) 178 | h = parse(Int, hmatch[1]) 179 | t += 60*60*h 180 | str = replace(str, hrex=>"") 181 | end 182 | 183 | mmatch = match(mrex, str) 184 | if !isnothing(mmatch) 185 | m = parse(Int, mmatch[1]) 186 | t += 60*m 187 | str = replace(str, mrex=>"") 188 | end 189 | 190 | smatch = match(srex, str) 191 | if !isnothing(smatch) 192 | s = parse(Int, smatch[1]) 193 | t += s 194 | str = replace(str, srex=>"") 195 | end 196 | 197 | t 198 | end 199 | 200 | function hfun_go_live() 201 | seq = locvar("sequence") 202 | airtime = locvar("airtime") 203 | 204 | if isnothing(seq) 205 | @warn "airtime set, but no `sequence` variable not defined." * 206 | "sequence is an array of video IDs to play in order on this page" 207 | end 208 | 209 | vid_ids = [get(videos, s, s) for s in seq] 210 | 211 | f = tempname() 212 | # Get the duration of each video 213 | download("https://www.googleapis.com/youtube/v3/videos?id=$(join(vid_ids, ","))&part=contentDetails&key=AIzaSyDZhbWHc2PTEFTx173MaTgddnWCGPqdbB8", f) 214 | dict = JSON.parse(String(read(f))) 215 | 216 | durations = [parse_duration(video["contentDetails"]["duration"]) 217 | for video in dict["items"]] 218 | 219 | 220 | jrepr(x) = sprint(io->JSON.print(io, x)) 221 | """ 222 | 223 | 224 | 225 | 228 | """ 229 | end 230 | -------------------------------------------------------------------------------- /youtube_videos.jl: -------------------------------------------------------------------------------- 1 | # This dictionary maps easy to remember names to Youtube video IDs 2 | # after adding an ID here, you can use the {{youtube }} 3 | # syntax in your markdown files to embed the video into the page! 4 | videos = Dict( 5 | "course-intro" => "ZhC-DIrCe6A", 6 | "pytorch-tensors" => "BmAS8IH7n3c", 7 | "automatic-diff" => "Z6H3zakmn6E", 8 | "loss-functions" => "jReGEZXq4Ac", 9 | "optim-basics" => "UvM0hK4E2dc", 10 | "pytorch-module" => "OiyZXdnLHcI", 11 | "conv" => "HyotcucT-PE", 12 | "dataloading-emb" => "vm-ZusIUkiY", 13 | "collab-filt" => "TaEWBB00Els", 14 | "autoencoder" => "Z1OWiTFafpQ", 15 | "gan" => "tqhmilLuJRs", 16 | "rnn-1" => "4G681MnP_OA", 17 | "rnn-2" => "EsS95AxCnh0", 18 | "privacy-preserving" => "68aCbiPIgnM", 19 | "batches-seq" => "Fqx_RCwenfg", 20 | "intro-julia" => "oL_T_NVoz9w", 21 | "siamese" => "6x4IPITZ4dw", 22 | "ben-depth" => "l-J93Onvj70", 23 | "pbs-depth" => "cBrDJ0KHxCs", 24 | "dropout" => "gWiJr_y0Tgs", 25 | "batchnorm" => "cKSCyTODbDI", 26 | "resnets" => "LdJV4oHgO28", 27 | "graph1" => "Ch0Iz8BJn98", 28 | "graph2" => "o1CfrsSTRAU", 29 | "graph3" => "ZD0zwjEQhz8", 30 | "graphinv" => "J4iYhmXLHrM" 31 | 32 | ) --------------------------------------------------------------------------------