├── .gitignore
├── AdvML_Challenge_2024_2.html
├── AdvML_Challenge_2024_2.md
├── LICENSE
├── README.md
├── notebooks
├── AdvML_Challenge_2023_1_old.ipynb
├── AdvML_Challenge_2023_2_old.ipynb
├── AdvML_UniTS_2024_Lab_01_Intro_to_Kernels.ipynb
├── AdvML_UniTS_2024_Lab_02_Kernel_ridge_regression_and_kPCA.ipynb
├── AdvML_UniTS_2024_Lab_03_DL_with_PyTorch
│ ├── AdvML_UniTS_2024_Lab_03_DL_with_PyTorch_01_basics.ipynb
│ ├── AdvML_UniTS_2024_Lab_03_DL_with_PyTorch_02_nnets.ipynb
│ └── AdvML_UniTS_2024_Lab_03_DL_with_PyTorch_03_advanced.ipynb
├── AdvML_UniTS_2024_Lab_04_FCN_Augmentation.ipynb
├── AdvML_UniTS_2024_Lab_04bis_FCN_Invariance.ipynb
├── AdvML_UniTS_2024_Lab_05_CNN_Invariance_Equivariance.ipynb
├── AdvML_UniTS_2024_Lab_06_Cortex_Hubel_Wiesel.ipynb
├── AdvML_UniTS_2024_Lab_07_Permutation_Equivariance.ipynb
└── AdvML_UniTS_2024_Lab_08_Implicit_Bias.ipynb
├── requirements.txt
└── solutions
├── AdvML_UniTS_2024_Lab_01_Intro_to_Kernels_Solved.ipynb
├── AdvML_UniTS_2024_Lab_02_Kernel_ridge_regression_and_kPCA_Solved.ipynb
├── AdvML_UniTS_2024_Lab_03_DL_with_PyTorch
├── AdvML_UniTS_2024_Lab_04_FCN_Augmentation_Solved.ipynb
├── AdvML_UniTS_2024_Lab_04bis_FCN_Invariance_Solved.ipynb
├── AdvML_UniTS_2024_Lab_05_CNN_Invariance_Equivariance_Solved.ipynb
├── AdvML_UniTS_2024_Lab_06_Cortex_Hubel_Wiesel_Solved.ipynb
├── AdvML_UniTS_2024_Lab_07_Permutation_Equivariance_Solved.ipynb
└── AdvML_UniTS_2024_Lab_08_Implicit_Bias_Solved.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 Fabio Anselmi
2 | # Copyright (c) 2024 AILab @ UniTS
3 | # All rights reserved.
4 | # Released under the terms of the Clear BSD License
5 |
6 | # Byte-compiled / optimized / DLL files
7 | __pycache__/
8 | *.py[cod]
9 | *$py.class
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | share/python-wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | MANIFEST
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .nox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | *.py,cover
55 | .hypothesis/
56 | .pytest_cache/
57 | cover/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | db.sqlite3
67 | db.sqlite3-journal
68 |
69 | # Flask stuff:
70 | instance/
71 | .webassets-cache
72 |
73 | # Scrapy stuff:
74 | .scrapy
75 |
76 | # Sphinx documentation
77 | docs/_build/
78 |
79 | # PyBuilder
80 | .pybuilder/
81 | target/
82 |
83 | # Jupyter Notebook
84 | .ipynb_checkpoints
85 |
86 | # IPython
87 | profile_default/
88 | ipython_config.py
89 |
90 | # pyenv
91 | # For a library or package, you might want to ignore these files since the code is
92 | # intended to run in multiple environments; otherwise, check them in:
93 | .python-version
94 |
95 | # pipenv
96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
99 | # install all needed dependencies.
100 | Pipfile.lock
101 |
102 | # poetry
103 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104 | # This is especially recommended for binary packages to ensure reproducibility, and is more
105 | # commonly ignored for libraries.
106 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107 | poetry.lock
108 |
109 | # pdm
110 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
111 | #pdm.lock
112 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
113 | # in version control.
114 | # https://pdm.fming.dev/#use-with-ide
115 | .pdm.toml
116 |
117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118 | __pypackages__/
119 |
120 | # Celery stuff
121 | celerybeat-schedule
122 | celerybeat.pid
123 |
124 | # SageMath parsed files
125 | *.sage.py
126 |
127 | # Environments
128 | .env
129 | .venv
130 | env/
131 | venv/
132 | ENV/
133 | env.bak/
134 | venv.bak/
135 |
136 | # Spyder project settings
137 | .spyderproject
138 | .spyproject
139 |
140 | # Rope project settings
141 | .ropeproject
142 |
143 | # mkdocs documentation
144 | /site
145 |
146 | # mypy
147 | .mypy_cache/
148 | .dmypy.json
149 | dmypy.json
150 |
151 | # Pyre type checker
152 | .pyre/
153 |
154 | # pytype static type analyzer
155 | .pytype/
156 |
157 | # Cython debug symbols
158 | cython_debug/
159 |
160 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
161 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
162 |
163 | .idea
164 | .idea/
165 | .idea/**
166 |
167 | # User-specific stuff
168 | .idea/**/workspace.xml
169 | .idea/**/tasks.xml
170 | .idea/**/usage.statistics.xml
171 | .idea/**/dictionaries
172 | .idea/**/shelf
173 |
174 | # AWS User-specific
175 | .idea/**/aws.xml
176 |
177 | # Generated files
178 | .idea/**/contentModel.xml
179 |
180 | # Sensitive or high-churn files
181 | .idea/**/dataSources/
182 | .idea/**/dataSources.ids
183 | .idea/**/dataSources.local.xml
184 | .idea/**/sqlDataSources.xml
185 | .idea/**/dynamic.xml
186 | .idea/**/uiDesigner.xml
187 | .idea/**/dbnavigator.xml
188 |
189 | # Gradle
190 | .idea/**/gradle.xml
191 | .idea/**/libraries
192 |
193 | # Gradle and Maven with auto-import
194 | # When using Gradle or Maven with auto-import, you should exclude module files,
195 | # since they will be recreated, and may cause churn. Uncomment if using
196 | # auto-import.
197 | .idea/artifacts
198 | .idea/compiler.xml
199 | .idea/jarRepositories.xml
200 | .idea/modules.xml
201 | .idea/*.iml
202 | .idea/modules
203 | *.iml
204 | *.ipr
205 |
206 | # CMake
207 | cmake-build-*/
208 |
209 | # Mongo Explorer plugin
210 | .idea/**/mongoSettings.xml
211 |
212 | # File-based project format
213 | *.iws
214 |
215 | # IntelliJ
216 | out/
217 |
218 | # mpeltonen/sbt-idea plugin
219 | .idea_modules/
220 |
221 | # JIRA plugin
222 | atlassian-ide-plugin.xml
223 |
224 | # Cursive Clojure plugin
225 | .idea/replstate.xml
226 |
227 | # SonarLint plugin
228 | .idea/sonarlint/
229 |
230 | # Crashlytics plugin (for Android Studio and IntelliJ)
231 | com_crashlytics_export_strings.xml
232 | crashlytics.properties
233 | crashlytics-build.properties
234 | fabric.properties
235 |
236 | # Editor-based Rest Client
237 | .idea/httpRequests
238 |
239 | # Android studio 3.1+ serialized cache file
240 | .idea/caches/build_file_checksums.ser
241 |
242 | # VSCode
243 | .vscode/*
244 | .ionide/
245 | **/.ionide/*
246 |
247 | # Local History for Visual Studio Code
248 | .history/
249 |
250 | # Built Visual Studio Code Extensions
251 | *.vsix
252 |
253 | # Old KDE settings
254 | .directory
255 |
256 | # Typesheds
257 | typeshed
258 | typeshed/
259 | **/typeshed/*
260 |
261 | # "Large" dataset
262 | *.tar.gz
263 | *-ubyte
264 | *-ubyte.gz
265 |
266 | # PyTorch saved/pickled objects
267 | *.pt
268 | *.pth
269 |
270 | # neptune.ai local files
271 | .neptune/
272 |
273 | # Weights and Biases local files
274 | .wandb/
275 |
276 | # Admin scripts
277 | publish.sh
278 | publish.ps1
279 | _solutions/
280 |
281 | # Working files
282 | __scratch__/
283 | **/__scratch__/
284 | **/__scratch__/**
285 |
286 | .gitconfig
287 |
288 | # TODOs:
289 | **/todo.md
290 |
--------------------------------------------------------------------------------
/AdvML_Challenge_2024_2.md:
--------------------------------------------------------------------------------
1 | # *Challenge \#2*: An empirical study on the learnability of functions by *NNs*
2 |
3 | In the following *challenge exercises*, we will empirically investigate the behaviour of deep neural networks with respect to the learning of specific classes of functions, and in specific training regimes.
4 |
5 | ### A. The effect of *under-* and *over-*parameterisation in the *Teacher/Student* setup
6 |
7 | In this exercise, we will train deep neural networks (*students*), supervisedly, on input/output pairs produced by another deep neural network with frozen weights (*teacher*).[^1] Given our ability to modulate the expressivity of both networks, this setup allows us to disentangle the effects of task hardness, model expressivity, and training dynamics.
8 |
9 | We will monitor the training and test loss of the *students* during training, as well as the final distribution of weights. We will do so in three regimes: when the *student* has much less (*under-parameterisation*), much more (*over-parameterisation*) or exactly the same learnable parameters (within a fixed given structure) as those frozen in the *teacher*.
10 |
11 | #### What to do?
12 |
13 | 1. Instantiate the *teacher* model $\mathcal{T}$, a fully-connected feedforward neural network mapping a $100$-dimensional input to a single output scalar. Use $3$ hidden layers of sizes, respectively: $75$, $50$, $10$. Use the $\mathsf{ReLU}$ activation function after all neurons, except for the output. Weights and biases should be initialised as *i.i.d.* samples from the Standard Normal distribution. Keep those parameters fixed for the rest of the exercise.
14 | 2. Generate the **test set** for the learning task, by repeatedly querying the *teacher* model. Inputs $\boldsymbol{x}_i \in \mathbb{R}^{100}$ should be obtained as samples from the multivariate Uniform distribution in the interval $[0,2]^{100}$, whereas the outputs as $y_i = \mathcal{T}(\boldsymbol{x}_i)$. Generate at least $6 \times 10^4$ datapoints and keep them fixed for the rest of the exercise. Such points will be used as a way to quantify generalisation error by the *student* model.
15 | 3. Instantiate the *student* model $\mathcal{S}$, a fully-connected feedforward neural network mapping a $100$-dimensional input to a single output scalar – as in the case of the teacher. Repeat the steps that follow with (at least) three different *student* models, architecturally identical to the *teacher* but with different number and width of the hidden layers.
16 | - $S_u$: one hidden layer of size $10$;
17 | - $S_e$: as for the *teacher*;
18 | - $S_o$: $4$ hidden layers of sizes $200$, $200$, $200$, $100$;
19 | 4. Train the *student* model on the *MSE* loss for a sufficiently large number of iterations, as to allow for the training and test loss to reach a quasi-stationary behaviour. To actually perform the training, harvest a fresh sample of $\mathsf{B}$ inputs (*i.e.* $\left\{{\boldsymbol{x}_1, \dots, \boldsymbol{x}_{\mathsf{B}}}\right\}$) per iteration, label each of them using the *teacher* model, and train the *student* on the given batch. Use an optimizer of your choice, taking care to tune (at least) its learning rate to minimize time to convergence. Do not use default learning rates assuming they are already optimal! Do not optimize batch-size (as it is scarcely effective when tuning also the learning rate[^2]): you can use $\mathsf{B}=128$ (or less, if you cannot make it fit into memory).
20 | As the training progresses, log the training loss (every batch, if you can). Additionally, log also the test-set loss every given number of batches (of your choice).
21 | 5. Once the training is over, evaluate the *student* model on the test set one last time. Additionally, collect (separately) weights and biases for each layer of the *student* network, and compare their distribution to that of the *teacher* network. Do the same for the collection of all weights and biases of the network (*i.e.* not on a layer-wise basis).
22 |
23 | Comment on the results collected, specifically in terms of: number of learnable parameters, trainability, generalisation, distributional convergence to target parameters. Do so individually in each case, as well as in comparison across the different *student* models.
24 |
25 |
26 |
27 | ### B. Function learning and hierarchical structure
28 |
29 | In this exercise, we will train a particular kind of *deep residual network*, supervisedly, on examples generated by two specific polynomials. Although their monomials share most of their respective properties, one polynomial shows a strongly hierarchical structure[^3], whereas the other does not. The hierarchical polynomial is $B_6$, *i.e.* the *sixth-order multivariate complete Bell polynomial*, which is defined as follows.
30 | $$
31 | B_6(x_1, x_2, x_3, x_4, x_5, x_6) = x_1^6 + 15x_2x_1^4 + 20x_3x_1^3 + 45x_2^2x_1^2 + 15x_2^3 + 60x_3x_2x_1 + 15x_4x_1^2 + 10x_3^2 + 15x_4x_2 + 6x_5x_1 + x_6 \text{.}
32 | $$
33 | We will analyse the generalisation error of the same model trained to reproduce each of the two polynomials, both in terms of general input/output mapping ability and sensitivity with respect to the variation of individual input components.
34 |
35 | #### What to do?
36 |
37 | 1. Define the non-hierarchical counterpart of $B_6$, which we will call $\tilde{B}_6: \mathbb{R}^6 \rightarrow \mathbb{R}$, with a *scrambled* monomial structure. In detail, start from the definition of $B_6$ and iteratively replace the $x_{i_k}$ from each $i^{\text{th}}$ monomial with a different $x_{i_{\sigma_{i}(k)}}$ so that:
38 |
39 | - $\tilde{B}_6$ still depends non-trivially on all six input variables $x_1, x_2, x_3, x_4, x_5, x_6$.
40 | - No two monomials of $\tilde{B}_6$ share the same permutation $\sigma_{i}$ of indices.
41 | - No two monomials (regardless of their coefficient), although not sharing the same permutation $\sigma_{i}$ of indices, can be rearranged as such by means of the commutative property of sums and/or products. This should be a concern only for the two terms $15x_4x_2$ and $6x_5x_1$.
42 |
43 | 2. Generate both a **training set** and a **test set** associated with $B_6$ and $\tilde{B}_6$. In particular:
44 |
45 | - Harvest *i.i.d.* input vectors $\boldsymbol{x} = (x_1,\dots,x_{6}) \in \mathbb{R}^{6}$ from the multivariate Uniform distribution in the interval $[0,2]^{6}$;
46 | - Compute the associated output scalar $y \in \mathbb{R}$ as $y = B_6(\boldsymbol{x})$ or $y = \tilde{B}_6(\boldsymbol{x})$. You do not need to share the same inputs $\boldsymbol{x}$ among the datasets generated by the two polynomials.
47 |
48 | Each training set should contain at least $10^{5}$ datapoints, whereas each test set at least $6\times 10^{4}$.
49 |
50 | 3. Instantiate the model as a *fully-connected residual* deep neural network, *i.e.* a fully-connected feedforward neural network where each layer is endowed with a ResNet-style skip connection. Layers of different sizes share no skip connection, which is simply dropped.
51 |
52 | Use a network with $9$ layers ($1$ input layer, $8$ hidden layers, $1$ output layer), where hidden layers have all size $50$. Use the $\mathsf{ReLU}$ activation function after all neurons, except for the output.
53 |
54 | 4. Train the model (in each of the two cases, *i.e.* $B_6$ and $\tilde{B}_6$) on the *MSE* loss for at least $30$ epochs, using a batch size $\mathsf{B}=20$. Use the $\mathsf{Adam}$ optimiser, with a learning rate tuned to minimise training error (independently across the two datasets). As the training progresses, log the training and test losses (at least once at the end of each epoch).
55 |
56 | 5. Once the training is over, evaluate the model one last time on the test set. The result of such evaluation will be used as the *final* generalisation error.
57 |
58 | 6. Investigate how the trained network models the dependency of the output on each input variable separately. To do so, proceed as follows.
59 |
60 | - Harvest a new input vector $\boldsymbol{x} = (x_1,\dots,x_{6}) \in \mathbb{R}^{6}$ from the multivariate Uniform distribution in the interval $[0,2]^{6}$;
61 | - For each of the input components, keep all the others fixed to their sampled value, whereas the one considered is evaluated on a fine uniform 1D grid $\mathcal{G} \subset [0,2]$.
62 | - Evaluate both the target polynomial and the trained model on all resulting input points so generated, grouping their results according to the variable that is swept along the input interval. Compare the results of the sweeps (it is better to do so graphically!).
63 |
64 | 7. **Optional:** Repeat step 6 some more times, and present results in an aggregate form. This is useful to average over the randomness introduced when evaluating the input/output dependency of single input components. Indeed, such evaluation strongly depends on the specific input values sampled in the first place before generating the sweeps.
65 |
66 | Comment on the results. How does the hierarchical structure of the function to be learnt influence the learning process and/or the *final* learnt model?
67 |
68 |
69 |
70 | [^1]: E. Gardner and B. Derrida; Three unfinished works on the optimal storage capacity of networks. Journal of Physics A: Mathematical and General, 22(12):1983–1994, 1989.
71 | [^2]: V. Godbole, G.E. Dahl, J. Gilmer, C.J. Shallue and Z. Nado; Deep Learning Tuning Playbook, 2023.
72 | [^3]: T. Poggio and M. Fraser; Compositional sparsity of learnable functions. Bulletin of the American Mathematical Society, 61:438-456, 2024.
73 |
74 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Fabio Anselmi
4 | Copyright (c) 2024 AILab @ UniTS
5 | All rights reserved.
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Advanced Topics in Machine Learning 2024/2025
2 |
3 | Elective course for the *Data Science and Scientific Computing* M.Sc. programme @ University of Trieste
4 |
5 | Lecturer: Fabio Anselmi (email: [`fabio.anselmi@units.it`](mailto:fabio.anselmi@units.it))
6 |
7 | Labs:
8 |
9 | * [Lab 1](./notebooks/AdvML_UniTS_2024_Lab_01_Intro_to_Kernels.ipynb): **Kernels** and **features** ([solved](./solutions/AdvML_UniTS_2024_Lab_01_Intro_to_Kernels_Solved.ipynb))
10 |
11 | * [Lab 2](./notebooks/AdvML_UniTS_2024_Lab_02_Kernel_ridge_regression_and_kPCA.ipynb): *Kernelized* machine learning ([solved](./solutions/AdvML_UniTS_2024_Lab_02_Kernel_ridge_regression_and_kPCA_Solved.ipynb))
12 |
13 | * [Lab 3](./notebooks/AdvML_UniTS_2024_Lab_03_DL_with_PyTorch/AdvML_UniTS_2024_Lab_03_DL_with_PyTorch_02_nnets.ipynb): Deep Learning with `PyTorch` (not an actual *exercise notebook*; already *"solved"*!)
14 |
15 | * [Lab 4a](./notebooks/AdvML_UniTS_2024_Lab_04_FCN_Augmentation.ipynb): Effect of data augmentation on learned weights ([solved](./solutions/AdvML_UniTS_2024_Lab_04_FCN_Augmentation_Solved.ipynb))
16 |
17 | * [Lab 4b](./notebooks/AdvML_UniTS_2024_Lab_04bis_FCN_Invariance.ipynb): Invariance in a *shallow FCN* under data augmentation ([solved](./solutions/AdvML_UniTS_2024_Lab_04bis_FCN_Invariance_Solved.ipynb))
18 |
19 | * [Lab 5](./notebooks/AdvML_UniTS_2024_Lab_05_CNN_Invariance_Equivariance.ipynb): Invariance and Equivariance in a *CNN* ([solved](./solutions/AdvML_UniTS_2024_Lab_05_CNN_Invariance_Equivariance_Solved.ipynb))
20 |
21 | * [Lab 6](./notebooks/AdvML_UniTS_2024_Lab_06_Cortex_Hubel_Wiesel.ipynb): The *Hubel-Wiesel* model for cortical networks ([solved](./solutions/AdvML_UniTS_2024_Lab_06_Cortex_Hubel_Wiesel_Solved.ipynb))
22 |
23 | * [Lab 7](./notebooks/AdvML_UniTS_2024_Lab_07_Permutation_Equivariance.ipynb): *Deep sets* and *permutation equivariance* ([solved](./solutions/AdvML_UniTS_2024_Lab_07_Permutation_Equivariance_Solved.ipynb))
24 |
25 | * [Lab 8](./notebooks/AdvML_UniTS_2024_Lab_08_Implicit_Bias.ipynb): **Implicit bias** of gradient descent in *linear regression* ([solved](./solutions/AdvML_UniTS_2024_Lab_08_Implicit_Bias_Solved.ipynb))
26 |
27 | Challenges:
28 |
29 | * [Challenge 1](./notebooks/AdvML_Challenge_2023_1_old.ipynb): An analytical perambulation around *FashionMNIST*
30 |
31 | * [Challenge 2](./AdvML_Challenge_2024_2.md): An empirical study on the learnability of functions by *NNs*
32 |
33 | ---
34 |
35 | Please, fill the (very short) [**Final Projects form**](https://forms.gle/BsfG9BJfxuZy6tjN7) as soon as possible, to receive early feedback on the exam project! Thanks!
36 |
--------------------------------------------------------------------------------
/notebooks/AdvML_Challenge_2023_2_old.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "# *Challenge 2*: *Discovering **Symmetries** in Data*\n",
9 | "\n",
10 | "Advanced Topics in Machine Learning -- Fall 2024, UniTS\n",
11 | "\n",
12 | "
"
13 | ]
14 | },
15 | {
16 | "attachments": {},
17 | "cell_type": "markdown",
18 | "metadata": {},
19 | "source": [
20 | "Generate a dataset composed by rotations of 2-dimensional vectors *w.r.t.* a finite subgroup of the rotation group (*e.g.* every 3-4 degrees). Shuffle the dataset. \n",
21 | "Generate the labels according to the orbits. \n",
22 | "\n",
23 | "Implement a *NN* with one *FC* layer and `ReLU` to classify the orbits elements according to the label. \n",
24 | "\n",
25 | "Write a Loss that contains 3 terms:\n",
26 | "- The regularization term as explained in class using [*soft-sort*](https://github.com/google-research/fast-soft-sort) on rows and columns of the Gramian of the first layer weights;\n",
27 | "- The cross entropy entropy with orbit labels;\n",
28 | "- The norm of the commutator between the covariance matrix of the data and that of the weights, as explained in class. \n",
29 | "\n",
30 | "We are encouraged to come up with other constraints instead of *1* and especially *3*. \n",
31 | "\n",
32 | "Test the invariance of the representation and plot the Gramian of the learned weights. Which type of matrix should you see? "
33 | ]
34 | }
35 | ],
36 | "metadata": {
37 | "kernelspec": {
38 | "display_name": "Python 3",
39 | "language": "python",
40 | "name": "python3"
41 | },
42 | "language_info": {
43 | "name": "python",
44 | "version": "3.10.8"
45 | },
46 | "orig_nbformat": 4
47 | },
48 | "nbformat": 4,
49 | "nbformat_minor": 2
50 | }
51 |
--------------------------------------------------------------------------------
/notebooks/AdvML_UniTS_2024_Lab_01_Intro_to_Kernels.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Lab 1: **Kernels** and **features**\n",
8 | "\n",
9 | "Advanced Topics in Machine Learning -- Fall 2024, UniTS\n",
10 | "\n",
11 | "
"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "### Empirical verification of the *Kernel* $\\leftrightarrow$ *feature expansion* equivalence\n",
19 | "\n",
20 | "Recall the definition of a *kernel*:\n",
21 | "> Let $\\mathcal{X}$ be a non-empty set. A function $k: \\mathcal{X} \\times \\mathcal{X} \\rightarrow \\mathbb{R}$ is called a *kernel* if there exists a real-Hilbert space $\\mathcal{H}$ and a map $\\phi: \\mathcal{X} \\rightarrow \\mathcal{H}$ such that $\\forall x, x^\\prime \\in \\mathcal{X}$, $k(x, x^\\prime) := \\langle \\phi(x) , \\phi(x^\\prime) \\rangle_{\\mathcal{H}}$.\n",
22 | "\n",
23 | "To motivate the usefulness of kernelized ML methods, we can show that -- for $x\\in\\mathbb{R}^{d \\in \\mathbb{N}}$ -- the computation of $k(x, x^\\prime)$ in kernel form is equivalent to the explicit scalar product $\\langle \\varphi(x) , \\varphi(x^\\prime) \\rangle = \\varphi(x)^{T} \\varphi(x^\\prime)$ of some corresponding expanded feature maps $\\varphi: {R}^{d} \\rightarrow \\mathbb{R}^{d^\\prime}$ with generally $d^\\prime \\gg d$ (or even *infinite-dimensional* $\\varphi$s), though significantly simpler and more efficient to compute.\n",
24 | "\n",
25 | "In the lab that follows, verify such equivalence for simple kernels: the non-uniform *quadratic* (in $\\mathbb{R}^{d}$) and the *Gaussian* (in $\\mathbb{R}$).\n",
26 | "\n",
27 | "For each kernel:\n",
28 | "\n",
29 | "1. Implement a function that computes the kernel between two arrays of coordinates;\n",
30 | "2. Derive the explicit feature map $\\varphi(x)$ corresponding to that kernel;\n",
31 | "3. Implement a function that computes such feature map for a given array of coordinates;\n",
32 | "4. Verify that the kernel computed by (1) and the scalar product of its arguments through (3) are indeed equivalent.\n",
33 | "\n",
34 | "**Hint**: in case of need, you can finitely approximate the feature map by Taylor expansion.\n"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "def nu_quadratic_kernel(x, x_prime):\n",
44 | " \"\"\"Compute the non-uniform quadratic kernel between two arrays of coordinates.\n",
45 | "\n",
46 | " Parameters\n",
47 | " ----------\n",
48 | " x : array-like, shape: (n_features)\n",
49 | " First array of coordinates.\n",
50 | " x_prime : array-like, shape: (n_features)\n",
51 | " Second array of coordinates.\n",
52 | "\n",
53 | " Returns\n",
54 | " -------\n",
55 | " k : array-like, shape: (1)\n",
56 | " Kernel value.\n",
57 | " \"\"\"\n",
58 | " # YOUR CODE HERE\n",
59 | " raise NotImplementedError()"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "def nu_quadratic_feature_map(x):\n",
69 | " \"\"\"Compute the feature map corresponding to the non-uniform quadratic kernel.\n",
70 | "\n",
71 | " Parameters\n",
72 | " ----------\n",
73 | " x : array-like, shape: (n_features)\n",
74 | " Array of coordinates.\n",
75 | "\n",
76 | " Returns\n",
77 | " -------\n",
78 | " phi_x : array-like, shape: (n_features)\n",
79 | " Feature map.\n",
80 | " \"\"\"\n",
81 | " # YOUR CODE HERE\n",
82 | " raise NotImplementedError()"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": [
91 | "# Check that the two functions are equivalent on a randomly-initialized array\n",
92 | "# YOUR CODE HERE"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": null,
98 | "metadata": {},
99 | "outputs": [],
100 | "source": [
101 | "def gaussian_kernel(x, x_prime, sigma):\n",
102 | " \"\"\"Compute the Gaussian kernel between two arrays of coordinates.\n",
103 | "\n",
104 | " Parameters\n",
105 | " ----------\n",
106 | " x : array-like, shape: (n_features)\n",
107 | " First array of coordinates.\n",
108 | " x_prime : array-like, shape: (n_features)\n",
109 | " Second array of coordinates.\n",
110 | " sigma : float\n",
111 | " Kernel standard deviation.\n",
112 | "\n",
113 | " Returns\n",
114 | " -------\n",
115 | " k : array-like, shape: (1)\n",
116 | " Kernel value.\n",
117 | " \"\"\"\n",
118 | " # YOUR CODE HERE\n",
119 | " raise NotImplementedError()"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "def gaussian_feature_map(x, sigma):\n",
129 | " \"\"\"Compute the feature map corresponding to the Gaussian kernel.\n",
130 | "\n",
131 | " Parameters\n",
132 | " ----------\n",
133 | " x : array-like, shape: (n_features)\n",
134 | " Array of coordinates.\n",
135 | " sigma : float\n",
136 | " Kernel standard deviation.\n",
137 | "\n",
138 | " Returns\n",
139 | " -------\n",
140 | " phi_x : array-like, shape: (n_features)\n",
141 | " Feature map.\n",
142 | " \"\"\"\n",
143 | " # YOUR CODE HERE\n",
144 | " raise NotImplementedError()"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 1,
150 | "metadata": {},
151 | "outputs": [],
152 | "source": [
153 | "# Check the equivalence on randomly-initialized arrays\n",
154 | "# YOUR CODE HERE"
155 | ]
156 | }
157 | ],
158 | "metadata": {
159 | "colab": {
160 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
161 | "provenance": []
162 | },
163 | "kernelspec": {
164 | "display_name": "Python 3 (ipykernel)",
165 | "language": "python",
166 | "name": "python3"
167 | },
168 | "language_info": {
169 | "codemirror_mode": {
170 | "name": "ipython",
171 | "version": 3
172 | },
173 | "file_extension": ".py",
174 | "mimetype": "text/x-python",
175 | "name": "python",
176 | "nbconvert_exporter": "python",
177 | "pygments_lexer": "ipython3",
178 | "version": "3.10.9"
179 | }
180 | },
181 | "nbformat": 4,
182 | "nbformat_minor": 1
183 | }
184 |
--------------------------------------------------------------------------------
/notebooks/AdvML_UniTS_2024_Lab_02_Kernel_ridge_regression_and_kPCA.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj"
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "source": [
21 | "# Lab 2: *Kernelized* machine learning\n",
22 | "\n",
23 | "Advanced Topics in Machine Learning -- Fall 2024, UniTS\n",
24 | "\n",
25 | "
"
26 | ],
27 | "metadata": {
28 | "collapsed": false
29 | }
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "source": [
34 | "### *Kernel Regression* and *Kernel Ridge Regression*\n",
35 | "\n",
36 | "Recall that the solution of **linear regression** can also be written as: $w=(X^{T}X)^{-1}X^{T}y=X^{T}(XX^{T})^{-1}y$\n",
37 | "\n",
38 | "Let $X\\in R^{N\\times d}$: we have $X^{T}X\\in R^{d\\times d}$ and $K=XX^{T}\\in R^{N\\times N}$. Whether it is more convenient to (pre)compute which matrix product (among $X^{T}X$, $XX^{T}$) depends on the $d/N$ ratio.\n",
39 | "\n",
40 | "As far as predictions are concerned, we have that: $f(z)=z^{T}w=z^{T} X^{T}(XX^{T})^{-1}y= \\alpha^{T}(z)K^{-1}y$, with $\\alpha(z)=z^{T}X^{T}=K(z,X)\\in R^{1\\times N}$.\n",
41 | "\n",
42 | "How can we move to the non-linear regression case? We just substitute $x\\rightarrow \\phi(x)$, and the reasoning above can be repeated!\n",
43 | "\n",
44 | "For more information, you can look up [this Medium article](https://knork.medium.com/linear-regression-in-python-from-scratch-with-kernels-e9c37f7975b9) or [this code-first tutorial](https://github.com/luigicarratino/Tutorial_Kernels_MLSS2019_London/blob/master/Tutorial%20Kernel.ipynb)."
45 | ],
46 | "metadata": {
47 | "collapsed": false
48 | }
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 11,
53 | "outputs": [],
54 | "source": [
55 | "import numpy as np\n",
56 | "\n",
57 | "from sklearn.datasets import make_moons"
58 | ],
59 | "metadata": {
60 | "collapsed": false
61 | }
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "source": [
66 | "**Exercise 1: linear data regression**\n",
67 | "1. Generate and plot a dataset consisting in $100$ datapoints in the form $(x_i,y_i)$. The $x_i$ are sampled uniformly in $[2,30]$, whereas the $y_i$ are sampled from a Gaussian distribution centred in $2x_i + 50$ having unit variance."
68 | ],
69 | "metadata": {
70 | "collapsed": false
71 | }
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 12,
76 | "outputs": [],
77 | "source": [
78 | "# YOUR CODE HERE"
79 | ],
80 | "metadata": {
81 | "collapsed": false
82 | }
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "source": [
87 | "2. Fit a linear regression model to the data, **with no learnable intercept** (i.e. fix it to zero), and plot the predictions of the resulting model compared to the data."
88 | ],
89 | "metadata": {
90 | "collapsed": false
91 | }
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 13,
96 | "outputs": [],
97 | "source": [
98 | "# YOUR CODE HERE"
99 | ],
100 | "metadata": {
101 | "collapsed": false
102 | }
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "source": [
107 | "3. Fit a linear regression model to the data, **with learnable intercept**, and plot the predictions of the resulting model compared to the data."
108 | ],
109 | "metadata": {
110 | "collapsed": false
111 | }
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 14,
116 | "outputs": [],
117 | "source": [
118 | "# YOUR CODE HERE"
119 | ],
120 | "metadata": {
121 | "collapsed": false
122 | }
123 | },
124 | {
125 | "cell_type": "markdown",
126 | "source": [
127 | "4. Fit a linear ridge regression model to the data, and plot the predictions of the resulting model compared to the data."
128 | ],
129 | "metadata": {
130 | "collapsed": false
131 | }
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": 15,
136 | "outputs": [],
137 | "source": [
138 | "# YOUR CODE HERE"
139 | ],
140 | "metadata": {
141 | "collapsed": false
142 | }
143 | },
144 | {
145 | "cell_type": "markdown",
146 | "source": [
147 | "**Exercise 2: Kernel regression on *periodic* data**\n",
148 | "1. The following dataset is given. Plot it."
149 | ],
150 | "metadata": {
151 | "collapsed": false
152 | }
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 16,
157 | "outputs": [],
158 | "source": [
159 | "xkr = np.linspace(2, 30, 100)\n",
160 | "ykr = xkr + 4 * np.sin(xkr) + 4 * np.random.rand(xkr.shape[0])"
161 | ],
162 | "metadata": {
163 | "collapsed": false
164 | }
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 17,
169 | "outputs": [],
170 | "source": [
171 | "# YOUR CODE HERE"
172 | ],
173 | "metadata": {
174 | "collapsed": false
175 | }
176 | },
177 | {
178 | "cell_type": "markdown",
179 | "source": [
180 | "2. Define a function that computes the Gaussian kernel value between two vectors, represented as numpy arrays."
181 | ],
182 | "metadata": {
183 | "collapsed": false
184 | }
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 18,
189 | "outputs": [],
190 | "source": [
191 | "def gaussian_kernel(x1, x2, sigma):\n",
192 | " # YOUR CODE HERE\n",
193 | " ..."
194 | ],
195 | "metadata": {
196 | "collapsed": false
197 | }
198 | },
199 | {
200 | "cell_type": "markdown",
201 | "source": [
202 | "3. With the fuction just defined, compute the Gram matrix of the dataset. Use a Gaussian kernel with $\\sigma=1$."
203 | ],
204 | "metadata": {
205 | "collapsed": false
206 | }
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": 19,
211 | "outputs": [],
212 | "source": [
213 | "# YOUR CODE HERE"
214 | ],
215 | "metadata": {
216 | "collapsed": false
217 | }
218 | },
219 | {
220 | "cell_type": "markdown",
221 | "source": [
222 | "4. Fit a kernel ridge regression model to the data, and plot the resulting model compared to the data."
223 | ],
224 | "metadata": {
225 | "collapsed": false
226 | }
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 20,
231 | "outputs": [],
232 | "source": [
233 | "# YOUR CODE HERE"
234 | ],
235 | "metadata": {
236 | "collapsed": false
237 | }
238 | },
239 | {
240 | "cell_type": "markdown",
241 | "source": [
242 | "### *Kernel PCA* and the *Radial Basis Function* (RBF) Kernel\n",
243 | "\n",
244 | "Let us briefly recap the key concepts about PCA. We have a dataset $X\\in R^{N\\times d}$, and we want to find a new basis $Z\\in R^{N\\times d}$ such that the explained variance of the data, projected on the new basis, is maximized for any fixed number of components. This is equivalent to finding the eigenvectors of the covariance matrix $C=X^{T}X$.\n",
245 | "\n",
246 | "Such operation requires the data to be centered, i.e. $X_{i}^{(j)}-\\mu_{j} \\rightarrow X_{i}^{(j)}$, where $\\mu_{j}=\\frac{1}{N}\\sum_{i=1}^{N}X_{i}^{(j)}$ is the mean of the $j$-th feature.\n",
247 | "\n",
248 | "We can also write the eigenvalue problem component-wise, as follows: $Cz_{j}=\\lambda_{j}z_{j}$, where $z_{j}$ is the $j$-th eigenvector and $\\lambda_{j}$ is the corresponding eigenvalue. The eigenvectors are orthogonal, i.e. $z_{j}^{T}z_{k}=0$ for $j\\neq k$.\n",
249 | "\n",
250 | "Moving to the kernelized case, we operate the substitution $x\\rightarrow \\phi(x)$, and the eigenvalue problem becomes $C_{K}v=\\lambda v$ with $C_{K}=\\frac{1}{N}\\sum_{i}\\phi(x_{i})\\phi^{T}(x_{i})$, with $\\phi(\\cdot)$ being a generic feature map.\n",
251 | "\n",
252 | "One can prove that solutions of the eigenvalue problem are in the form $v=\\sum_{i}\\alpha_{i}\\phi(x_{i})$. Multiplying both sides of $C_{K}v=\\lambda v$ by $\\phi(x_{k})$ and substituting, we obtain $N\\lambda \\alpha=K \\alpha$.\n",
253 | "\n",
254 | "Such reasoning still requires normalization, i.e. $ \\phi(x)-\\frac{1}{d}\\sum_{i}(\\phi(x))_{i} \\rightarrow \\phi(x)$. The resulting kernel $K$ is in the form $K := K-2Id_{1/n}K + Id_{1/n}K Id_{1/n}$ with $Id_{1/n}$ the matrix with entries $1/n$.\n",
255 | "\n",
256 | "A more thorough explanation can be found in [this blogpost](https://sdat.ir/en/sdat-blog/python-kernel-tricks-and-nonlinear-dimensionality-reduction-via-rbf-kernel-pca).\n"
257 | ],
258 | "metadata": {
259 | "collapsed": false
260 | }
261 | },
262 | {
263 | "cell_type": "markdown",
264 | "source": [
265 | "**Exercise 3: Kernel PCA**\n",
266 | "1. A *Half Moons* dataset of $100$ points is given below. Plot it."
267 | ],
268 | "metadata": {
269 | "collapsed": false
270 | }
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 21,
275 | "outputs": [],
276 | "source": [
277 | "xhm, yhm = make_moons(n_samples=100, random_state=123)"
278 | ],
279 | "metadata": {
280 | "collapsed": false
281 | }
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": 22,
286 | "outputs": [],
287 | "source": [
288 | "# YOUR CODE HERE"
289 | ],
290 | "metadata": {
291 | "collapsed": false
292 | }
293 | },
294 | {
295 | "cell_type": "markdown",
296 | "source": [
297 | "2. Apply the PCA algorithm to the dataset, and plot the resulting projection using the first 2 principal components.\n",
298 | "**Hint**: you can use the `PCA` class from `sklearn.decomposition`."
299 | ],
300 | "metadata": {
301 | "collapsed": false
302 | }
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 23,
307 | "outputs": [],
308 | "source": [
309 | "# YOUR CODE HERE"
310 | ],
311 | "metadata": {
312 | "collapsed": false
313 | }
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "source": [
318 | "3. Repeat the previous step, but plotting just the first principal component. Comment on the separability of the two classes."
319 | ],
320 | "metadata": {
321 | "collapsed": false
322 | }
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 24,
327 | "outputs": [],
328 | "source": [
329 | "# YOUR CODE HERE"
330 | ],
331 | "metadata": {
332 | "collapsed": false
333 | }
334 | },
335 | {
336 | "cell_type": "markdown",
337 | "source": [
338 | "4. Implement a function that computes the *Radial Basis* PCA algorithm of the dataset, given as a NumPy array."
339 | ],
340 | "metadata": {
341 | "collapsed": false
342 | }
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 25,
347 | "outputs": [],
348 | "source": [
349 | "def kpca(x_data, gamma, n_components):\n",
350 | " \"\"\"\n",
351 | " Implementation of a RBF kernel PCA.\n",
352 | "\n",
353 | " Arguments:\n",
354 | " x_data: A MxN dataset as NumPy array where the samples are stored as rows (M),\n",
355 | " and the attributes defined as columns (N).\n",
356 | " gamma: The free parameter (coefficient) for the RBF kernel.\n",
357 | " n_components: The number of components to be returned.\n",
358 | "\n",
359 | " \"\"\"\n",
360 | " # YOUR CODE HERE\n",
361 | " xpcs = ...\n",
362 | " return xpcs"
363 | ],
364 | "metadata": {
365 | "collapsed": false
366 | }
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "source": [
371 | "5. Apply the function just defined to the dataset, and plot the resulting projection using the first 2 principal components."
372 | ],
373 | "metadata": {
374 | "collapsed": false
375 | }
376 | },
377 | {
378 | "cell_type": "code",
379 | "execution_count": 26,
380 | "outputs": [],
381 | "source": [
382 | "# YOUR CODE HERE"
383 | ],
384 | "metadata": {
385 | "collapsed": false
386 | }
387 | },
388 | {
389 | "cell_type": "markdown",
390 | "source": [
391 | "6. Repeat the previous step, but plotting just the first principal component. Comment on the separability of the two classes.\n"
392 | ],
393 | "metadata": {
394 | "collapsed": false
395 | }
396 | },
397 | {
398 | "cell_type": "code",
399 | "execution_count": 27,
400 | "outputs": [],
401 | "source": [
402 | "# YOUR CODE HERE"
403 | ],
404 | "metadata": {
405 | "collapsed": false
406 | }
407 | }
408 | ]
409 | }
410 |
--------------------------------------------------------------------------------
/notebooks/AdvML_UniTS_2024_Lab_03_DL_with_PyTorch/AdvML_UniTS_2024_Lab_03_DL_with_PyTorch_01_basics.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "# Lab 3.1 | A *whirlwind tour* of `PyTorch`: **the basics**\n",
9 | "\n",
10 | "Advanced Topics in Machine Learning -- Fall 2024, UniTS\n",
11 | "\n",
12 | "
"
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {},
18 | "source": [
19 | "**NOTE:** This notebook is the same as the *solved* version."
20 | ]
21 | },
22 | {
23 | "attachments": {},
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "[PyTorch](https://pyth.org/) is a Python/C++ framework for:\n",
28 | "- Efficient numerical computing, with support for strong GPU acceleration & parallelism;\n",
29 | "- Automatic algorithmic differentiation (mainly in *reverse mode*, *tape-based*; but more recently also in *forward mode*);\n",
30 | "- Development of deep artificial neural models (a.k.a. *deep learning*);\n",
31 | "\n",
32 | "It is also well integrated with the *scientific Python stack*.\n",
33 | "\n",
34 | "The flexibility of PyTorch and its *Pythonic* interfaces make it the most widely adopted framework for research and development, both in academia and industry (especially industrial *R&D*).\n",
35 | "\n",
36 | "For more info about `PyTorch`, you can have a look at the [official documentation](https://pytorch.org/docs/stable/index.html) or refer to [this book](https://isip.piconepress.com/courses/temple/ece_4822/resources/books/Deep-Learning-with-PyTorch.pdf). \n",
37 | "For insights about the inner workings of *autodiff*, you can start exploring the topic from [this survey](https://arxiv.org/abs/1502.05767)."
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 1,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "# Optional\n",
47 | "# pip install icecream"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 2,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "# It all begins with...\n",
57 | "import torch\n",
58 | "import torch as th # (Not necessary; a shorthand)\n",
59 | "\n",
60 | "import numpy as np # For comparison\n",
61 | "\n",
62 | "import matplotlib.pyplot as plt # For plotting\n",
63 | "\n",
64 | "from icecream import ic # For pretty-printing variables\n",
65 | "\n",
66 | "ic.configureOutput(prefix=\"\\n|> \") # For pretty-printing variables\n",
67 | "\n",
68 | "from torch import Tensor # For type annotations\n",
69 | "from numpy.typing import NDArray # For type annotations\n",
70 | "from typing import Any # For type annotations"
71 | ]
72 | },
73 | {
74 | "attachments": {},
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "## Basic operation with `Tensor`s\n",
79 | "\n",
80 | "The main building block of PyTorch's linear algebra capabilities is the `Tensor` class. A torch `Tensor` is the (loose) equivalent of NumPy's `ndarray` and most of the functionalities are the same as in NumPy. In general, it is always possible to perform the same logical/mathematical operations typical of NumPy on torch `Tensor`s."
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 3,
86 | "metadata": {},
87 | "outputs": [
88 | {
89 | "name": "stderr",
90 | "output_type": "stream",
91 | "text": [
92 | "|> x: tensor([[1, 2, 3],\n",
93 | " [> x: tensor([[1, 2, 3],\n",
94 | " [4, 5, 6]])\n",
95 | "|> y: array([[1, 2, 3],\n",
96 | " [4, 5, 6]])\n"
97 | ]
98 | }
99 | ],
100 | "source": [
101 | "x: Tensor = th.tensor([[1, 2, 3], [4, 5, 6]])\n",
102 | "y: NDArray[Any] = np.array([[1, 2, 3], [4, 5, 6]])\n",
103 | "\n",
104 | "_ = ic(x)\n",
105 | "_ = ic(y)"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 4,
111 | "metadata": {},
112 | "outputs": [
113 | {
114 | "name": "stderr",
115 | "output_type": "stream",
116 | "text": [
117 | "|> x.size(): torch.Size> x.size(): torch.Size([2, 3])\n",
118 | "|> x.shape: torch.Size([2, 3])\n",
119 | "|> y.shape: (2, 3)\n"
120 | ]
121 | }
122 | ],
123 | "source": [
124 | "# Shapes and sizes\n",
125 | "_ = ic(x.size())\n",
126 | "_ = ic(x.shape)\n",
127 | "\n",
128 | "_ = ic(y.shape)"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 5,
134 | "metadata": {},
135 | "outputs": [
136 | {
137 | "name": "stderr",
138 | "output_type": "stream",
139 | "text": [
140 | "|> x.dtype: torch.int64\n",
141 | "> x.dtype: torch.int64\n",
142 | "|> y.dtype: dtype('int64')\n"
143 | ]
144 | }
145 | ],
146 | "source": [
147 | "# (d)types\n",
148 | "_ = ic(x.dtype)\n",
149 | "_ = ic(y.dtype)"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 6,
155 | "metadata": {},
156 | "outputs": [
157 | {
158 | "name": "stdout",
159 | "output_type": "stream",
160 | "text": [
161 | "dtype of x before casting: torch.int64\n",
162 | "dtype of x after casting: torch.float32\n"
163 | ]
164 | }
165 | ],
166 | "source": [
167 | "# (d)types \"casting\"\n",
168 | "print(\"dtype of x before casting:\", x.dtype)\n",
169 | "x: Tensor = x.float()\n",
170 | "print(\"dtype of x after casting:\", x.dtype)"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 7,
176 | "metadata": {},
177 | "outputs": [
178 | {
179 | "name": "stdout",
180 | "output_type": "stream",
181 | "text": [
182 | "dtype of x: torch.float16\n"
183 | ]
184 | }
185 | ],
186 | "source": [
187 | "# Or with more granular control\n",
188 | "x: Tensor = x.to(dtype=th.float16)\n",
189 | "print(\"dtype of x:\", x.dtype)"
190 | ]
191 | },
192 | {
193 | "attachments": {},
194 | "cell_type": "markdown",
195 | "metadata": {},
196 | "source": [
197 | "Note that you can build a tensor through the constructor `th.Tensor` (as opposed to `torch.tensor`, mind the capitalisation!). In this case, since `th.Tensor` is an alias for `th.FloatTensor`, the tensor you create will have type `th.float32`.\n",
198 | "\n",
199 | "More info on data types [here](https://pytorch.org/docs/stable/tensors.html)."
200 | ]
201 | },
202 | {
203 | "attachments": {},
204 | "cell_type": "markdown",
205 | "metadata": {},
206 | "source": [
207 | "Tensor slicing works exactly like in NumPy, by means of square brackets:"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": 8,
213 | "metadata": {},
214 | "outputs": [
215 | {
216 | "name": "stderr",
217 | "output_type": "stream",
218 | "text": [
219 | "|> x: tensor([[[0.2516, 0.5426],> x: tensor([[[0.2516, 0.5426],\n",
220 | " [0.3589, 0.3382],\n",
221 | " [0.6716, 0.9616]],\n",
222 | " \n",
223 | " [[0.2841, 0.4554],\n",
224 | " [0.1629, 0.5155],\n",
225 | " [0.3716, 0.1191]]])\n"
226 | ]
227 | }
228 | ],
229 | "source": [
230 | "x: Tensor = th.rand(2, 3, 2)\n",
231 | "_ = ic(x)"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": 9,
237 | "metadata": {},
238 | "outputs": [
239 | {
240 | "name": "stderr",
241 | "output_type": "stream",
242 | "text": [
243 | "|> x[0, 1, 1]: tensor> x[0, 1, 1]: tensor(0.3382)\n",
244 | "|> x[0, 1:, 1]: tensor([0.3382, 0.9616])\n",
245 | "|> x[:, ::2, :]: tensor([[[0.2516, 0.5426],\n",
246 | " [0.6716, 0.9616]],\n",
247 | " \n",
248 | " [[0.2841, 0.4554],\n",
249 | " [0.3716, 0.1191]]])\n"
250 | ]
251 | }
252 | ],
253 | "source": [
254 | "_ = ic(x[0, 1, 1])\n",
255 | "\n",
256 | "_ = ic(x[0, 1:, 1])\n",
257 | "\n",
258 | "_ = ic(x[:, ::2, :])"
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": 10,
264 | "metadata": {},
265 | "outputs": [
266 | {
267 | "name": "stderr",
268 | "output_type": "stream",
269 | "text": [
270 | "|> x[0, 1, 1].shape> x[0, 1, 1].shape: torch.Size([])\n",
271 | "|> th.tensor(3.14).shape: torch.Size([])\n",
272 | "|> th.tensor([3.14]).shape: torch.Size([1])\n"
273 | ]
274 | }
275 | ],
276 | "source": [
277 | "# Note: 0-dimensional tensors vs 1-dimensional tensors\n",
278 | "_ = ic(x[0, 1, 1].shape)\n",
279 | "\n",
280 | "_ = ic(th.tensor(3.14).shape)\n",
281 | "\n",
282 | "_ = ic(th.tensor([3.14]).shape)"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 11,
288 | "metadata": {},
289 | "outputs": [
290 | {
291 | "name": "stderr",
292 | "output_type": "stream",
293 | "text": [
294 | "|> x.numel(): 12\n",
295 | "> x.numel(): 12\n",
296 | "|> th.tensor(3.14).numel(): 1\n",
297 | "|> th.tensor([3.14]).numel(): 1\n"
298 | ]
299 | }
300 | ],
301 | "source": [
302 | "# Use of `numel`\n",
303 | "_ = ic(x.numel())\n",
304 | "\n",
305 | "_ = ic(th.tensor(3.14).numel())\n",
306 | "\n",
307 | "_ = ic(th.tensor([3.14]).numel())"
308 | ]
309 | },
310 | {
311 | "attachments": {},
312 | "cell_type": "markdown",
313 | "metadata": {},
314 | "source": [
315 | "### Tensor reshaping\n",
316 | "\n",
317 | "Changing the shape of a tensor can be a crucial operation. To have an idea of its application, just think of `RGB` images.\n",
318 | "These may be represented as $3\\times H\\times W$ tensors, where H and W stand for height and width of the image (in number of pixels). It is often needed to look at an image as a flattened (1D) vector of pixels:"
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": 12,
324 | "metadata": {},
325 | "outputs": [
326 | {
327 | "name": "stderr",
328 | "output_type": "stream",
329 | "text": [
330 | "|> img.shape: torch.Size([> img.shape: torch.Size([3, 8, 8])\n",
331 | "|> img2.shape: torch.Size([3, 64])\n"
332 | ]
333 | }
334 | ],
335 | "source": [
336 | "img: Tensor = th.stack(\n",
337 | " tensors=(th.ones(8, 8), th.zeros(8, 8), th.ones(8, 8) / 2), dim=0\n",
338 | ")\n",
339 | "\n",
340 | "img.reshape(\n",
341 | " 3, 64\n",
342 | ") # note that reshaping is not in place, so this call does not change the actual shape of img\n",
343 | "\n",
344 | "_ = ic(img.shape)\n",
345 | "img2: Tensor = img.reshape(3, 64)\n",
346 | "_ = ic(img2.shape)"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": 13,
352 | "metadata": {},
353 | "outputs": [],
354 | "source": [
355 | "# plt.imshow(img.numpy()) # It errors: `TypeError: Invalid shape (3, 8, 8) for image data`"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": 14,
361 | "metadata": {},
362 | "outputs": [
363 | {
364 | "data": {
365 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAGdCAYAAAAv9mXmAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAW0ElEQVR4nO3db2yV9fn48austIvNqcSRKVRRx8BpuuEfMMwhuI1GmAmoizqMzuGQwOIDlz0YNdnYgyFZTMA/sJhs8U+II1EzlYRAQNSNgM5VDQ7RxQ1QObIiirQqtkU+3we/n823Q0tP2w+H0+/rlVzJeuc+va8R1vfuc9O2KiJSAMAgG1buBQAYmgQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCyqy3HR0aNHR3t7ezkuDcAAFQqFeOedd4553nEPzOjRo6NYLB7vywIwiBoaGo4ZmeP+Fpk7F4DK15ev5Z7BAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQRb8Cs3Dhwti5c2ccOnQoWlpaYsqUKYO9FwAVruTAXHvttXHXXXfFkiVL4oILLojNmzfHunXr4owzzsixHwAVqioiUikveP755+Oll16Kn/3sZ93HduzYEU888UTcfvvtx3x9oVCItra2khcF4MRRX18f7e3tvZ5T0h3M8OHD46KLLooNGzb0OL5hw4a45JJLPvc1NTU1USgUegwAQ19JgRk5cmRUV1dHa2trj+Otra1x2mmnfe5rmpubo62trXuKxWL/twWgYvTrIX9KPd9Vq6qqOurYZ5YuXRr19fXd09DQ0J9LAlBhqks5ef/+/XH48OGj7la++tWvHnVX85nOzs7o7Ozs/4YAVKSS7mC6urrixRdfjKamph7Hm5qaYuvWrYO6GACVraQ7mIiIZcuWxapVq6KlpSWee+65mD9/fowZMybuu+++HPsBUKFKDswjjzwSX/nKV+LXv/51jBo1KrZv3x4/+MEP4q233sqxHwAVquTvgxko3wcDUPkG/ftgAKCvBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALEoOzKWXXhpr1qyJYrEYKaWYPXt2jr0AqHAlB6auri62bdsWt956a459ABgiqkt9wfr162P9+vU5dgFgCCk5MKWqqamJ2tra7o8LhULuSwJwAsj+kL+5uTna2tq6p1gs5r4kACeA7IFZunRp1NfXd09DQ0PuSwJwAsj+FllnZ2d0dnbmvgwAJxjfBwNAFiXfwdTV1cXXv/717o/PPvvsmDBhQrz//vvx9ttvD+pyAFSuqohIpbxg2rRp8eyzzx51/MEHH4y5c+ce8/WFQiHa2tpKuSQAJ5j6+vpob2/v9ZySAzNQAgNQ+foSGM9gAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyKLk32gJDGFVVeXeoF8qc+vKVMrv9HIHA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWJQVm0aJF8cILL0RbW1u0trbG448/HuPHj8+1GwAVrKTATJs2LVauXBmTJ0+OpqamqK6ujg0bNsRJJ52Uaz8AKlRVRKT+vnjkyJHx7rvvxtSpU2Pz5s19ek2hUIi2trb+XhLIqaqq3Bv0S2VuXZk++xpeX18f7e3tvZ5bPZALnXzyyRER8f7773/hOTU1NVFbW9tjOQCGvgE95F+2bFls3rw5Xn311S88p7m5Odra2rqnWCwO5JIAVIh+v0W2YsWKuOKKK2LKlCm9RuPz7mBEBk5Q3iLjGLK/RXbPPffErFmzYurUqceMRWdnZ3R2dvbnMgBUsJIDc++998ZVV10Vl112WezevTvDSgAMBSUFZuXKlXH99dfH7Nmzo729PU499dSIiDh48GB88sknWRYEoDKV9Awmpc8/9Sc/+Uk89NBDffoc/pkynMA8g+EYsj2DqarQv3wAHH9+FhkAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWZT0C8cGU19+GxoAlcsdDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFmUFJgFCxbEtm3b4uDBg3Hw4MHYunVrzJgxI9duAFSwkgKzZ8+eWLRoUUycODEmTpwYTz/9dDz55JNx3nnn5doPgAqWBjLvvfdeuvnmm/t8fqFQSCmlVCgUBnRdY4wxx39K+RpeHf00bNiwuOaaa6Kuri6ee+65LzyvpqYmamtruz8uFAr9vSQAFaakejU2Nqb29vbU1dWVDhw4kGbOnNnr+YsXL06fxx2MMcZU3pRyB1P1//9Dnw0fPjzGjBkTI0aMiB/+8Icxb968mDZtWrz22mufe/7n3cEUi8Wor6+P9vb2Ui4NQJkVCoVoa2vr09fwkgPz3zZu3Bj//ve/Y8GCBYO+HAAnllK+hg/4+2Cqqqp63KEAQERESQ/5lyxZEuvWrYu33347CoVC/OhHP4rLLrvM98IAcJSSAnPqqafGqlWrYtSoUXHw4MF45ZVXYsaMGfHUU0/l2g+AClVSYObNm5drDwCGGD+LDIAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAYUmEWLFkVKKZYvXz5Y+wAwRPQ7MBMnToz58+fHtm3bBnMfAIaIfgWmrq4uHn744bjlllviwIEDg70TAENAvwKzcuXKWLt2bWzatGmw9wFgiKgu9QXXXXddXHjhhTFp0qQ+nV9TUxO1tbXdHxcKhVIvCUAFKukO5vTTT4+77747brjhhujo6OjTa5qbm6Otra17isVivxYFoLJURUTq68mzZ8+OJ554Ig4fPtx9rLq6Oo4cORJHjhyJ2traOHLkSI/XfN4dTLFYjPr6+mhvbx/4fwMAjptCoRBtbW19+hpe0ltkmzZtisbGxh7HHnjggXj99dfjd7/73VFxiYjo7OyMzs7OUi4DwBBQUmA+/PDDePXVV3sc++ijj+K999476jgA/7f5Tn4Asij5X5H9t+9+97uDsQcAQ4w7GACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyGPAvHKMSLC73Av22uHJXhyGptra2z+e6gwEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgi5ICs3jx4kgp9Zi9e/fm2g2AClZd6gu2b98e06dP7/74008/HdSFABgaSg7M4cOHo7W1NccuAAwhJT+DGTduXBSLxdi5c2esXr06zj777F7Pr6mpiUKh0GMAGPpKCszf/va3+PGPfxyXX3553HLLLXHaaafF1q1b45RTTvnC1zQ3N0dbW1v3FIvFAS8NwImvpMCsX78+/vznP8f27dtj06ZNccUVV0RExE033fSFr1m6dGnU19d3T0NDw8A2BqAilPwM5n/7+OOP4x//+EeMGzfuC8/p7OyMzs7OgVwGgAo0oO+DqampiXPPPdc/VQbgKCUF5s4774ypU6fGWWedFRdffHE89thjUV9fHw899FCu/QCoUCW9RXb66afH6tWrY+TIkfHuu+/G888/H5MnT4633nor134AVKiSAjNnzpxcewAwxPhZZABkITAAZCEwAGQhMABkITAAZCEwAGQhMABkITAAZCEwAGQhMABkITAAZCEwAGQhMABkITAAZCEwAGRR0u+DGUzNzc3R0dFRrssDkJk7GACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyKDkwo0ePjlWrVsX+/fvjo48+ipdffjkuvPDCHLsBUMGqSzl5xIgRsWXLlnjmmWdi5syZsW/fvhg7dmx88MEHmdYDoFKVFJhf/vKX8fbbb8fNN9/cfezNN98c9KUAqHwlvUU2a9asaGlpiUceeSRaW1vjpZdeinnz5vX6mpqamigUCj0GgKGvpMB87Wtfi4ULF8Ybb7wRl19+edx3331xzz33xI033viFr2lubo62trbuKRaLA14agBNfVUSkvp7c0dERLS0t8Z3vfKf72N133x2TJk2KSy655HNfU1NTE7W1td0fFwqFKBaLsXTp0ujo6Oj/5gAcd7W1tdHc3Bz19fXR3t7e67kl3cHs3bs3duzY0ePYa6+9FmPGjPnC13R2dkZ7e3uPAWDoKykwW7ZsiXPOOafHsfHjx3vQD8BRSgrM8uXLY/LkydHc3Bxjx46NOXPmxPz582PlypW59gOgQpUUmJaWlrjqqqtizpw5sX379vjVr34Vt912W/zpT3/KtR8AFaqk74OJiFi7dm2sXbs2xy4ADCF+FhkAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFkIDABZCAwAWQgMAFmUFJhdu3ZFSumoWbFiRa79AKhQ1aWcPGnSpPjSl77U/XFjY2M89dRT8eijjw76YgBUtpICs3///h4fL1q0KP71r3/FX/7yl0FdCoDKV1Jg/rfhw4fHDTfcEMuWLev1vJqamqitre3+uFAo9PeSAFSQfj/kv/LKK2PEiBHx4IMP9npec3NztLW1dU+xWOzvJQGoIP0OzE9/+tNYt25d7N27t9fzli5dGvX19d3T0NDQ30sCUEH69RbZmDFjYvr06XH11Vcf89zOzs7o7Ozsz2UAqGD9uoOZO3du7Nu3L9auXTvY+wAwRJQcmKqqqpg7d2489NBD8emnn+bYCYAhoOTATJ8+Pc4888y4//77c+wDwBBR8jOYjRs3RlVVVY5dABhC/CwyALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAsij598EMlpqamnJdGoB+KuVrd1VEpHyrHG306NFRLBaP5yUBGGQNDQ3xzjvv9HrOcQ9MxP+LTHt7+6B/3kKhEMViMRoaGrJ8/lzsfXzZ+/ir1N3t/cWf/1hxiSjTW2R9WWwg2tvbK+ovw2fsfXzZ+/ir1N3tffTn7QsP+QHIQmAAyGJIBaajoyN+85vfREdHR7lXKYm9jy97H3+Vuru9B6YsD/kBGPqG1B0MACcOgQEgC4EBIAuBASCLIROYhQsXxs6dO+PQoUPR0tISU6ZMKfdKx3TppZfGmjVrolgsRkopZs+eXe6V+mTRokXxwgsvRFtbW7S2tsbjjz8e48ePL/dax7RgwYLYtm1bHDx4MA4ePBhbt26NGTNmlHutki1atChSSrF8+fJyr9KrxYsXR0qpx+zdu7fca/XJ6NGjY9WqVbF///746KOP4uWXX44LL7yw3Gsd065du476M08pxYoVK8qyz5AIzLXXXht33XVXLFmyJC644ILYvHlzrFu3Ls4444xyr9arurq62LZtW9x6663lXqUk06ZNi5UrV8bkyZOjqakpqqurY8OGDXHSSSeVe7Ve7dmzJxYtWhQTJ06MiRMnxtNPPx1PPvlknHfeeeVerc8mTpwY8+fPj23btpV7lT7Zvn17nHbaad3zzW9+s9wrHdOIESNiy5Yt0dXVFTNnzozzzjsvfvGLX8QHH3xQ7tWOadKkST3+vKdPnx4REY8++mjZdkqVPs8//3z6/e9/3+PYjh070h133FH23fo6KaU0e/bssu/Rnxk5cmRKKaVLL7207LuUOu+99166+eaby75HX6auri7985//TN///vfTM888k5YvX172nXqbxYsXp5dffrnse5Q6S5cuTX/961/LvsdgzPLly9Mbb7xRtutX/B3M8OHD46KLLooNGzb0OL5hw4a45JJLyrTV/y0nn3xyRES8//77Zd6k74YNGxbXXXdd1NXVxXPPPVfudfpk5cqVsXbt2ti0aVO5V+mzcePGRbFYjJ07d8bq1avj7LPPLvdKxzRr1qxoaWmJRx55JFpbW+Oll16KefPmlXutkg0fPjxuuOGGuP/++8u6R9krO5AZNWpUSimlb3/72z2ONzc3p9dff73s+/V1KvkO5sknn6yY/8fX2NiY2tvbU1dXVzpw4ECaOXNm2Xfqy1x33XXplVdeSbW1tSkiKuIOZsaMGenqq69OjY2N3Xdde/fuTaecckrZd+ttDh06lA4dOpSWLFmSzj///DR//vz08ccfpxtvvLHsu5Uy11xzTerq6kqjRo0q5x7l/4MYyHwWmMmTJ/c4fvvtt6fXXnut7Pv1dSo1MCtWrEi7du1KDQ0NZd+lLzN8+PA0duzYdNFFF6U77rgj7du3L5177rll36u3Of3009N//vOf9K1vfav7WCUE5r/npJNOSnv37k0///nPy75Lb9PR0ZG2bNnS49jdd9+dtm7dWvbdSpn169enNWvWlHuP8v9BDGSGDx+eurq60pVXXtnj+F133ZWeffbZsu/X16nEwNxzzz3prbfeSmeddVbZd+nvbNy4Md13331l36O3mT17dkoppa6uru5JKaVPP/00dXV1pWHDhpV9x77Ohg0bjnpeeqLN7t270x/+8IcexxYsWJD27NlT9t36OmPGjEmHDx9Os2bNKuseFf8MpqurK1588cVoamrqcbypqSm2bt1apq2GvnvvvTeuvvrq+N73vhe7d+8u9zr9VlVVFbW1teVeo1ebNm2KxsbGOP/887vn73//ezz88MNx/vnnx5EjR8q9Yp/U1NTEueeee8L/U+UtW7bEOeec0+PY+PHj48033yzTRqWbO3du7Nu3L9auXVvuVcpf24HOtddemzo6OtLcuXPTN77xjbRs2bLU3t6exowZU/bdepu6uro0YcKENGHChJRSSrfddluaMGFCOuOMM8q+W2+zcuXKdODAgTR16tR06qmnds+Xv/zlsu/W2yxZsiRNmTIlnXnmmamxsTH99re/TYcPH07Tp08v+26lTiW8RXbnnXemqVOnprPOOitdfPHFac2aNengwYMn/P8uJ06cmDo7O1Nzc3MaO3ZsmjNnTvrwww/T9ddfX/bd+jJVVVVp9+7daenSpWXfJU6ABQZlFi5cmHbt2pU++eST1NLSUhH/ZHbatGnp8zzwwANl3623+SI33XRT2Xfrbf74xz92/x1pbW1NGzdurMi4RFRGYFavXp2KxWLq6OhIe/bsSY899tgJ/7zrs7niiivSK6+8kg4dOpR27NiR5s2bV/ad+jpNTU0ppZTGjRtX9l38uH4Asqj4ZzAAnJgEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALP4HroMzSz8FgsUAAAAASUVORK5CYII=",
366 | "text/plain": [
367 | ""
368 | ]
369 | },
370 | "metadata": {},
371 | "output_type": "display_data"
372 | }
373 | ],
374 | "source": [
375 | "new_img: Tensor = img.reshape(8, 8, 3)\n",
376 | "_ = plt.imshow(new_img.numpy())"
377 | ]
378 | },
379 | {
380 | "attachments": {},
381 | "cell_type": "markdown",
382 | "metadata": {},
383 | "source": [
384 | "**Note**\n",
385 | "\n",
386 | "Difference between:\n",
387 | "- `view`: operations on contiguous memory;\n",
388 | "- `reshape`: operations on (non-)contiguous memory, using `view` wherever possible;\n",
389 | "- `permute`: explicit reordering of dimensions and memory;\n",
390 | "- `.contiguous()`: ensure that the tensor is stored in contiguous memory, with no other modification."
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": 15,
396 | "metadata": {},
397 | "outputs": [
398 | {
399 | "data": {
400 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAGdCAYAAAAv9mXmAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAWv0lEQVR4nO3dfWyV9fn48asO2oXmVOLMFFDUMXAaJirFMKfgNoiwJaAuyjC6BYMEFv/QmWV0yeb+GJLN/MAHWEg2n0IciZo4SYgExIcR0LmqweHD4gQUjljEB3pU7Cny+f2xrN9vh5aelg+H0+/rlVyJ5859el8h6tv73G2ti4gUAHCEHVftBQAYmAQGgCwEBoAsBAaALAQGgCwEBoAsBAaALAQGgCwGVeOiw4cPj1KpVI1LA9BPhUIh3n777cOed9QDM3z48CgWi0f7sgAcQSNGjDhsZI56YLruXEb8v4hS+WhfHoD+KNRHFG/u1adQVfmILCL+HZdSR9UuD0BeHvIDkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkEWfArNgwYLYtm1b7N+/P1pbW+Oiiy460nsBUOMqDsxVV10Vt99+eyxatCjOO++82LhxYzz22GNx6qmn5tgPgBpVcWB+9rOfxd133x133313vPbaa3HTTTfFzp07Y8GCBTn2A6BGVRSYwYMHx/jx42PdunXdjq9bty4uvPDCz31PfX19FAqFbgPAwFdRYE488cQYNGhQtLW1dTve1tYWJ5988ue+p6WlJdrb27umWCz2fVsAakafHvKnlLq9rqurO+TYfyxevDiampq6ZsSIEX25JAA1ZlAlJ+/duzcOHDhwyN3KV7/61UPuav6jXC5HuVzu+4YA1KSK7mA6Ozvj+eefj6lTp3Y7PnXq1Ni8efMRXQyA2lbRHUxExJIlS2LlypXR2toazzzzTMybNy9GjhwZK1asyLEfADWq4sA8+OCD8ZWvfCV+/etfx7Bhw2Lr1q3x/e9/P956660c+wFQo+oi4vOfzmdSKBSivb09omlxRKnjaF4agP4qNES0t0RTU1OUSqUeT/W7yADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyKLiwFx88cWxevXqKBaLkVKKmTNn5tgLgBpXcWAaGxtjy5YtccMNN+TYB4ABYlClb1i7dm2sXbs2xy4ADCAVB6ZS9fX10dDQ0PW6UCjkviQAx4DsD/lbWlqivb29a4rFYu5LAnAMyB6YxYsXR1NTU9eMGDEi9yUBOAZk/4isXC5HuVzOfRkAjjF+DgaALCq+g2lsbIyvf/3rXa/POOOMGDduXLz//vuxc+fOI7ocALWr4sA0NzfHU0891fV66dKlERFx3333xZw5c47YYgDUtooD8/TTT0ddXV2OXQAYQDyDASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgi4oCs3Dhwnjuueeivb092tra4pFHHokxY8bk2g2AGlZRYCZPnhzLly+PiRMnxtSpU2PQoEGxbt26GDJkSK79AKhRgyo5efr06d1ez5kzJ959990YP358bNy48YguBkBtqygw/+3444+PiIj333//C8+pr6+PhoaGrteFQqE/lwSgRvTrIf+SJUti48aN8fLLL3/hOS0tLdHe3t41xWKxP5cEoEb0OTDLli2Lc845J2bPnt3jeYsXL46mpqauGTFiRF8vCUAN6dNHZHfeeWfMmDEjJk2adNg7knK5HOVyuU/LAVC7Kg7MXXfdFZdffnlccsklsWPHjgwrATAQVBSY5cuXx9VXXx0zZ86MUqkUJ510UkRE7Nu3Lz799NMsCwJQmyp6BvPTn/40hg4dGk8//XS88847XTNr1qxc+wFQoyq6g6mrq8u1BwADjN9FBkAWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFgIDQBYCA0AWAgNAFhUFZv78+bFly5bYt29f7Nu3LzZv3hzTpk3LtRsANayiwOzatSsWLlwYzc3N0dzcHE888UQ8+uijcfbZZ+faD4AaVRcRqT9f4L333ouf//zncc899/Tq/EKhEO3t7RFNiyNKHf25NABHW6Ehor0lmpqaolQq9XjqoL5e47jjjosrr7wyGhsb45lnnvnC8+rr66OhoeF/disU+npJAGpIxQ/5x44dG6VSKTo6OmLFihVx+eWXx6uvvvqF57e0tER7e3vXFIvFfi0MQG2o+COywYMHx8iRI2Po0KHxwx/+MObOnRuTJ0/+wsh83h1MsVj0ERlALargI7J+P4NZv359vPHGGzF//vze7eYZDEDtqiAw/f45mLq6um53KAAQUeFD/kWLFsVjjz0WO3fujEKhED/60Y/ikksu8bMwAByiosCcdNJJsXLlyhg2bFjs27cvXnrppZg2bVo8/vjjufYDoEZVFJi5c+fm2gOAAcbvIgMgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIAuBASALgQEgC4EBIIt+BWbhwoWRUoqlS5ceqX0AGCD6HJjm5uaYN29ebNmy5UjuA8AA0afANDY2xgMPPBDXX399fPDBB0d6JwAGgD4FZvny5bFmzZrYsGHDkd4HgAFiUKVvmDVrVpx//vkxYcKEXp1fX18fDQ0NXa8LhUKllwSgBlV0B3PKKafEHXfcEddcc010dHT06j0tLS3R3t7eNcVisU+LAlBb6iIi9fbkmTNnxl/+8pc4cOBA17FBgwbFwYMH4+DBg9HQ0BAHDx7s9p7Pu4MpFosRTYsjSr2LFADHiEJDRHtLNDU1RalU6vHUij4i27BhQ4wdO7bbsXvvvTdee+21+N3vfndIXCIiyuVylMvlSi4DwABQUWA++uijePnll7sd+/jjj+O999475DgA/7f5SX4Asqj4u8j+23e+850jsQcAA4w7GACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALIQGACyEBgAshAYALKoKDC33HJLpJS6ze7du3PtBkANG1TpG7Zu3RpTpkzpev3ZZ58d0YUAGBgqDsyBAweira0txy4ADCAVP4MZPXp0FIvF2LZtW6xatSrOOOOMHs+vr6+PQqHQbQAY+CoKzN/+9rf48Y9/HJdeemlcf/31cfLJJ8fmzZvjhBNO+ML3tLS0RHt7e9cUi8V+Lw3Asa8uIlJf3zxkyJB444034ve//30sXbr0c8+pr6+PhoaGrteFQuHfkWlaHFHq6OulAaiGQkNEe0s0NTVFqVTq8dSKn8H8b5988kn84x//iNGjR3/hOeVyOcrlcn8uA0AN6tfPwdTX18dZZ53lW5UBOERFgbntttti0qRJcfrpp8cFF1wQDz/8cDQ1NcX999+faz8AalRFH5GdcsopsWrVqjjxxBPj3XffjWeffTYmTpwYb731Vq79AKhRFQVm9uzZufYAYIDxu8gAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMhCYADIQmAAyEJgAMii4sAMHz48Vq5cGXv37o2PP/44XnzxxTj//PNz7AZADRtUyclDhw6NTZs2xZNPPhnTp0+PPXv2xKhRo+LDDz/MtB4AtaqiwPziF7+InTt3xnXXXdd17M033zziSwFQ+yr6iGzGjBnR2toaDz74YLS1tcULL7wQc+fO7fE99fX1USgUug0AA19Fgfna174WCxYsiNdffz0uvfTSWLFiRdx5551x7bXXfuF7Wlpaor29vWuKxWK/lwbg2FcXEam3J3d0dERra2t8+9vf7jp2xx13xIQJE+LCCy/83PfU19dHQ0ND1+tCofDvyDQtjih19H1zAI6+QkNEe0s0NTVFqVTq8dSK7mB2794dr7zySrdjr776aowcOfIL31Mul6NUKnUbAAa+igKzadOmOPPMM7sdGzNmjAf9AByiosAsXbo0Jk6cGC0tLTFq1KiYPXt2zJs3L5YvX55rPwBqVEWBaW1tjcsvvzxmz54dW7dujV/96ldx4403xp///Odc+wFQoyr6OZiIiDVr1sSaNWty7ALAAOJ3kQGQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkEVFgdm+fXuklA6ZZcuW5doPgBo1qJKTJ0yYEF/60pe6Xo8dOzYef/zxeOihh474YgDUtooCs3fv3m6vFy5cGP/617/i6aefPqJLAVD7KgrM/zZ48OC45pprYsmSJT2eV19fHw0NDV2vC4VCXy8JQA3p80P+yy67LIYOHRr33Xdfj+e1tLREe3t71xSLxb5eEoAaUhcRqS9vXLt2bZTL5ZgxY0aP533eHUyxWIxoWhxR6ujLpQGolkJDRHtLNDU1RalU6vHUPn1ENnLkyJgyZUpcccUVhz23XC5HuVzuy2UAqGF9+ohszpw5sWfPnlizZs2R3geAAaLiwNTV1cWcOXPi/vvvj88++yzHTgAMABUHZsqUKXHaaafFPffck2MfAAaIip/BrF+/Purq6nLsAsAA4neRAZCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkIXAAJCFwACQhcAAkEXF/z+YI6ZQX7VLA9BHFfy7+6gHplAo/Psvijcf7UsDcIQUCoUolUo9nlMXEenorPM/hg8fftjF+qJQKESxWIwRI0Zk+fq52PvosvfRV6u72/uLv/7bb7992POq8hFZbxbrj1KpVFN/M/yHvY8uex99tbq7vQ/9ur3hIT8AWQgMAFkMqMB0dHTEb37zm+jo6Kj2KhWx99Fl76OvVne3d/9U5SE/AAPfgLqDAeDYITAAZCEwAGQhMABkMWACs2DBgti2bVvs378/Wltb46KLLqr2Sod18cUXx+rVq6NYLEZKKWbOnFntlXpl4cKF8dxzz0V7e3u0tbXFI488EmPGjKn2Woc1f/782LJlS+zbty/27dsXmzdvjmnTplV7rYotXLgwUkqxdOnSaq/So1tuuSVSSt1m9+7d1V6rV4YPHx4rV66MvXv3xscffxwvvvhinH/++dVe67C2b99+yJ95SimWLVtWlX0GRGCuuuqquP3222PRokVx3nnnxcaNG+Oxxx6LU089tdqr9aixsTG2bNkSN9xwQ7VXqcjkyZNj+fLlMXHixJg6dWoMGjQo1q1bF0OGDKn2aj3atWtXLFy4MJqbm6O5uTmeeOKJePTRR+Pss8+u9mq91tzcHPPmzYstW7ZUe5Ve2bp1a5x88sld881vfrPaKx3W0KFDY9OmTdHZ2RnTp0+Ps88+O26++eb48MMPq73aYU2YMKHbn/eUKVMiIuKhhx6q2k6p1ufZZ59Nf/jDH7ode+WVV9Ktt95a9d16OymlNHPmzKrv0Zc58cQTU0opXXzxxVXfpdJ577330nXXXVf1PXozjY2N6Z///Gf63ve+l5588sm0dOnSqu/U09xyyy3pxRdfrPoelc7ixYvTX//616rvcSRm6dKl6fXXX6/a9Wv+Dmbw4MExfvz4WLduXbfj69atiwsvvLBKW/3fcvzxx0dExPvvv1/lTXrvuOOOi1mzZkVjY2M888wz1V6nV5YvXx5r1qyJDRs2VHuVXhs9enQUi8XYtm1brFq1Ks4444xqr3RYM2bMiNbW1njwwQejra0tXnjhhZg7d26116rY4MGD45prrol77rmnqntUvbL9mWHDhqWUUvrWt77V7XhLS0t67bXXqr5fb6eW72AeffTRmvkvvrFjx6ZSqZQ6OzvTBx98kKZPn171nXozs2bNSi+99FJqaGhIEVETdzDTpk1LV1xxRRo7dmzXXdfu3bvTCSecUPXdepr9+/en/fv3p0WLFqVzzz03zZs3L33yySfp2muvrfpulcyVV16ZOjs707Bhw6q5R/X/IPoz/wnMxIkTux3/5S9/mV599dWq79fbqdXALFu2LG3fvj2NGDGi6rv0ZgYPHpxGjRqVxo8fn2699da0Z8+edNZZZ1V9r57mlFNOSe+8804655xzuo7VQmD+e4YMGZJ2796dbrrppqrv0tN0dHSkTZs2dTt2xx13pM2bN1d9t0pm7dq1afXq1dXeo/p/EP2ZwYMHp87OznTZZZd1O3777benp556qur79XZqMTB33nlneuutt9Lpp59e9V36OuvXr08rVqyo+h49zcyZM1NKKXV2dnZNSil99tlnqbOzMx133HFV37G3s27dukOelx5rs2PHjvTHP/6x27H58+enXbt2VX233s7IkSPTgQMH0owZM6q6R80/g+ns7Iznn38+pk6d2u341KlTY/PmzVXaauC766674oorrojvfve7sWPHjmqv02d1dXXR0NBQ7TV6tGHDhhg7dmyce+65XfP3v/89HnjggTj33HPj4MGD1V6xV+rr6+Oss8465r9VedOmTXHmmWd2OzZmzJh48803q7RR5ebMmRN79uyJNWvWVHuV6te2v3PVVVeljo6ONGfOnPSNb3wjLVmyJJVKpTRy5Miq79bTNDY2pnHjxqVx48allFK68cYb07hx49Kpp55a9d16muXLl6cPPvggTZo0KZ100kld8+Uvf7nqu/U0ixYtShdddFE67bTT0tixY9Nvf/vbdODAgTRlypSq71bp1MJHZLfddluaNGlSOv3009MFF1yQVq9enfbt23fM/3PZ3NycyuVyamlpSaNGjUqzZ89OH330Ubr66qurvltvpq6uLu3YsSMtXry46rvEMbDAEZkFCxak7du3p08//TS1trbWxLfMTp48OX2ee++9t+q79TRf5Cc/+UnVd+tp/vSnP3X9PdLW1pbWr19fk3GJqI3ArFq1KhWLxdTR0ZF27dqVHn744WP+edd/5gc/+EF66aWX0v79+9Mrr7yS5s6dW/WdejtTp05NKaU0evToqu/i1/UDkEXNP4MB4NgkMABkITAAZCEwAGQhMABkITAAZCEwAGQhMABkITAAZCEwAGQhMABkITAAZPH/AbEw4qQvFuTrAAAAAElFTkSuQmCC",
401 | "text/plain": [
402 | ""
403 | ]
404 | },
405 | "metadata": {},
406 | "output_type": "display_data"
407 | }
408 | ],
409 | "source": [
410 | "new_img: Tensor = img.permute(1, 2, 0)\n",
411 | "_ = plt.imshow(new_img.numpy())"
412 | ]
413 | },
414 | {
415 | "cell_type": "code",
416 | "execution_count": 16,
417 | "metadata": {},
418 | "outputs": [
419 | {
420 | "name": "stderr",
421 | "output_type": "stream",
422 | "text": [
423 | "|> y.size(): torch.Size([3, 2])\n",
424 | "> y.size(): torch.Size([3, 2])\n",
425 | "|> y.contiguous().view(6): tensor([1, 4, 2, 5, 3, 6])\n"
426 | ]
427 | }
428 | ],
429 | "source": [
430 | "# See also:\n",
431 | "z: Tensor = th.tensor([[1, 2, 3], [4, 5, 6]])\n",
432 | "y: Tensor = z.t() # Transposition does not guarantee memory contiguity!\n",
433 | "_ = ic(y.size())\n",
434 | "# ic(y.view(6)) # It errors!\n",
435 | "_ = ic(y.contiguous().view(6))"
436 | ]
437 | },
438 | {
439 | "attachments": {},
440 | "cell_type": "markdown",
441 | "metadata": {},
442 | "source": [
443 | "### Linear algebra"
444 | ]
445 | },
446 | {
447 | "cell_type": "code",
448 | "execution_count": 17,
449 | "metadata": {},
450 | "outputs": [
451 | {
452 | "name": "stderr",
453 | "output_type": "stream",
454 | "text": [
455 | "|> x @ y: tensor([[> x @ y: tensor([[1.5202, 0.9676, 1.2438, 0.4088],\n",
456 | " [0.9676, 2.0481, 1.2334, 0.7898],\n",
457 | " [1.2438, 1.2334, 1.4995, 0.6067],\n",
458 | " [0.4088, 0.7898, 0.6067, 0.5024]])\n",
459 | "|> x.matmul(y): tensor([[1.5202, 0.9676, 1.2438, 0.4088],\n",
460 | " [0.9676, 2.0481, 1.2334, 0.7898],\n",
461 | " [1.2438, 1.2334, 1.4995, 0.6067],\n",
462 | " [0.4088, 0.7898, 0.6067, 0.5024]])\n",
463 | "|> th.matmul(x, y): tensor([[1.5202, 0.9676, 1.2438, 0.4088],\n",
464 | " [0.9676, 2.0481, 1.2334, 0.7898],\n",
465 | " [1.2438, 1.2334, 1.4995, 0.6067],\n",
466 | " [0.4088, 0.7898, 0.6067, 0.5024]])\n"
467 | ]
468 | }
469 | ],
470 | "source": [
471 | "x: Tensor = th.rand(4, 5)\n",
472 | "y: Tensor = x.T # matrix transposition; also .t()\n",
473 | "\n",
474 | "# All the same!\n",
475 | "_ = ic(x @ y)\n",
476 | "_ = ic(x.matmul(y))\n",
477 | "_ = ic(th.matmul(x, y))"
478 | ]
479 | },
480 | {
481 | "attachments": {},
482 | "cell_type": "markdown",
483 | "metadata": {},
484 | "source": [
485 | "Please note that the operator for matrix multiplication is `@`, not `*`, which indicates the Hadamard (element-wise) product instead."
486 | ]
487 | },
488 | {
489 | "cell_type": "code",
490 | "execution_count": 18,
491 | "metadata": {},
492 | "outputs": [
493 | {
494 | "name": "stderr",
495 | "output_type": "stream",
496 | "text": [
497 | "|> x * x: tensor([[1.4472e-02, > x * x: tensor([[1.4472e-02, 1.0694e-01, 6.4369e-01, 4.3950e-01, 3.1561e-01],\n",
498 | " [8.5509e-01, 5.4028e-01, 8.2724e-04, 5.8356e-02, 5.9352e-01],\n",
499 | " [2.9618e-01, 4.4405e-02, 6.7896e-01, 8.9673e-03, 4.7096e-01],\n",
500 | " [3.7688e-01, 6.1162e-02, 5.8633e-02, 5.2912e-03, 4.2259e-04]])\n"
501 | ]
502 | }
503 | ],
504 | "source": [
505 | "_ = ic(x * x)"
506 | ]
507 | },
508 | {
509 | "attachments": {},
510 | "cell_type": "markdown",
511 | "metadata": {},
512 | "source": [
513 | "Multiplying a matrix by itself is obviously equivalent to computing its power, and it can be done also by running one of the following commands:"
514 | ]
515 | },
516 | {
517 | "cell_type": "code",
518 | "execution_count": 19,
519 | "metadata": {},
520 | "outputs": [
521 | {
522 | "name": "stderr",
523 | "output_type": "stream",
524 | "text": [
525 | "|> th.pow(x, 2): tensor([[> th.pow(x, 2): tensor([[1.4472e-02, 1.0694e-01, 6.4369e-01, 4.3950e-01, 3.1561e-01],\n",
526 | " [8.5509e-01, 5.4028e-01, 8.2724e-04, 5.8356e-02, 5.9352e-01],\n",
527 | " [2.9618e-01, 4.4405e-02, 6.7896e-01, 8.9673e-03, 4.7096e-01],\n",
528 | " [3.7688e-01, 6.1162e-02, 5.8633e-02, 5.2912e-03, 4.2259e-04]])\n",
529 | "|> x**2: tensor([[1.4472e-02, 1.0694e-01, 6.4369e-01, 4.3950e-01, 3.1561e-01],\n",
530 | " [8.5509e-01, 5.4028e-01, 8.2724e-04, 5.8356e-02, 5.9352e-01],\n",
531 | " [2.9618e-01, 4.4405e-02, 6.7896e-01, 8.9673e-03, 4.7096e-01],\n",
532 | " [3.7688e-01, 6.1162e-02, 5.8633e-02, 5.2912e-03, 4.2259e-04]])\n"
533 | ]
534 | }
535 | ],
536 | "source": [
537 | "_ = ic(th.pow(x, 2))\n",
538 | "_ = ic(x**2)"
539 | ]
540 | },
541 | {
542 | "attachments": {},
543 | "cell_type": "markdown",
544 | "metadata": {},
545 | "source": [
546 | "As in NumPy, there exists a `dot` function to compute the scalar product between vectors. Note that differently from NumPy, in torch this is **not** equivalent to matrix multiplication, as it is intended to work **only with 1D vectors**."
547 | ]
548 | },
549 | {
550 | "cell_type": "code",
551 | "execution_count": 20,
552 | "metadata": {},
553 | "outputs": [
554 | {
555 | "name": "stderr",
556 | "output_type": "stream",
557 | "text": [
558 | "|>> v1.shape: torch.Size([4]), v2.shape: torch.Size([4])\n",
559 | "|> v1.dot(v2): tensor(0.5170)\n",
560 | "|> v1.matmul(v2): tensor(0.5170)\n",
561 | "|> v1 @ v2: tensor(0.5170)\n"
562 | ]
563 | }
564 | ],
565 | "source": [
566 | "v1: Tensor = x[:, 1]\n",
567 | "v2: Tensor = x[:, 2]\n",
568 | "_ = ic(v1.shape, v2.shape)\n",
569 | "\n",
570 | "_ = ic(\n",
571 | " v1.dot(v2)\n",
572 | ") # in the case of 1D vectors, there is no difference between row and column vectors\n",
573 | "_ = ic(v1.matmul(v2))\n",
574 | "_ = ic(v1 @ v2)"
575 | ]
576 | },
577 | {
578 | "attachments": {},
579 | "cell_type": "markdown",
580 | "metadata": {},
581 | "source": [
582 | "If you want to do something fancier with two vectors, like multiplying a column by a row to obtain a matrix, you need to switch to 2D vectors by reshaping them.\n",
583 | "\n",
584 | "When you reshape a tensor, you can leave one dimension unspecified (using -1), as it can be inferred automatically by th."
585 | ]
586 | },
587 | {
588 | "cell_type": "code",
589 | "execution_count": 21,
590 | "metadata": {},
591 | "outputs": [
592 | {
593 | "name": "stderr",
594 | "output_type": "stream",
595 | "text": [
596 | "|> v1.shape: torch.Size([4, > v1.shape: torch.Size([4, 1]), v2.shape: torch.Size([1, 4])\n",
597 | "|> v1 @ v2: tensor([[0.2624, 0.0094, 0.2695, 0.0792],\n",
598 | " [0.5897, 0.0211, 0.6057, 0.1780],\n",
599 | " [0.1691, 0.0061, 0.1736, 0.0510],\n",
600 | " [0.1984, 0.0071, 0.2038, 0.0599]])\n"
601 | ]
602 | }
603 | ],
604 | "source": [
605 | "v1: Tensor = v1.reshape(-1, 1) # column vector\n",
606 | "v2: Tensor = v2.reshape(1, -1) # row vector\n",
607 | "\n",
608 | "_ = ic(v1.shape, v2.shape)\n",
609 | "_ = ic(v1 @ v2)"
610 | ]
611 | },
612 | {
613 | "cell_type": "code",
614 | "execution_count": 22,
615 | "metadata": {},
616 | "outputs": [],
617 | "source": [
618 | "# ic(v1.dot(v2)) # this doesn't work! dot works only on 1D tensors"
619 | ]
620 | },
621 | {
622 | "attachments": {},
623 | "cell_type": "markdown",
624 | "metadata": {},
625 | "source": [
626 | "### Reduction operations"
627 | ]
628 | },
629 | {
630 | "cell_type": "code",
631 | "execution_count": 23,
632 | "metadata": {},
633 | "outputs": [
634 | {
635 | "name": "stderr",
636 | "output_type": "stream",
637 | "text": [
638 | "|> x: tensor([[[0.2181, 0.1274],\n",
639 | " [0.4803> x: tensor([[[0.2181, 0.1274],\n",
640 | " [0.4803, 0.6080],\n",
641 | " [0.9447, 0.5521]],\n",
642 | " \n",
643 | " [[0.1235, 0.7560],\n",
644 | " [0.6936, 0.1067],\n",
645 | " [0.2273, 0.3385]]])\n"
646 | ]
647 | }
648 | ],
649 | "source": [
650 | "x: Tensor = th.rand(2, 3, 2)\n",
651 | "_ = ic(x)"
652 | ]
653 | },
654 | {
655 | "cell_type": "code",
656 | "execution_count": 24,
657 | "metadata": {},
658 | "outputs": [
659 | {
660 | "name": "stderr",
661 | "output_type": "stream",
662 | "text": [
663 | "|> x.sum(): tensor(5.1762)\n",
664 | "> x.sum(): tensor(5.1762)\n",
665 | "|> th.sum(x): tensor(5.1762)\n"
666 | ]
667 | }
668 | ],
669 | "source": [
670 | "ic(x.sum())\n",
671 | "_ = ic(th.sum(x))"
672 | ]
673 | },
674 | {
675 | "cell_type": "code",
676 | "execution_count": 25,
677 | "metadata": {},
678 | "outputs": [
679 | {
680 | "name": "stderr",
681 | "output_type": "stream",
682 | "text": [
683 | "|> x.mean(): tensor(0.4313)\n",
684 | "> x.mean(): tensor(0.4313)\n",
685 | "|> th.mean(x): tensor(0.4313)\n"
686 | ]
687 | }
688 | ],
689 | "source": [
690 | "_ = ic(x.mean())\n",
691 | "_ = ic(th.mean(x))"
692 | ]
693 | },
694 | {
695 | "cell_type": "code",
696 | "execution_count": 26,
697 | "metadata": {},
698 | "outputs": [
699 | {
700 | "name": "stderr",
701 | "output_type": "stream",
702 | "text": [
703 | "|> x.argmin(): tensor(9)\n",
704 | "> x.argmin(): tensor(9)\n",
705 | "|> th.argmin(x): tensor(9)\n"
706 | ]
707 | }
708 | ],
709 | "source": [
710 | "_ = ic(x.argmin())\n",
711 | "_ = ic(th.argmin(x))"
712 | ]
713 | },
714 | {
715 | "attachments": {},
716 | "cell_type": "markdown",
717 | "metadata": {},
718 | "source": [
719 | "It is sometimes useful to specify one or more dimensions to reduce (along which you want to perform your operations):"
720 | ]
721 | },
722 | {
723 | "cell_type": "code",
724 | "execution_count": 27,
725 | "metadata": {},
726 | "outputs": [
727 | {
728 | "name": "stderr",
729 | "output_type": "stream",
730 | "text": [
731 | "|> x.mean(dim=0): tensor([[0.1708, 0.4417],> x.mean(dim=0): tensor([[0.1708, 0.4417],\n",
732 | " [0.5869, 0.3574],\n",
733 | " [0.5860, 0.4453]])\n"
734 | ]
735 | }
736 | ],
737 | "source": [
738 | "_ = ic(x.mean(dim=0))"
739 | ]
740 | },
741 | {
742 | "cell_type": "code",
743 | "execution_count": 28,
744 | "metadata": {},
745 | "outputs": [
746 | {
747 | "name": "stderr",
748 | "output_type": "stream",
749 | "text": [
750 | "|> x.argmax(dim=1): tensor([[2, 1],\n",
751 | " [1, 0]])\n",
752 | "> x.argmax(dim=1): tensor([[2, 1],\n",
753 | " [1, 0]])\n"
754 | ]
755 | }
756 | ],
757 | "source": [
758 | "_ = ic(x.argmax(dim=1))"
759 | ]
760 | },
761 | {
762 | "cell_type": "code",
763 | "execution_count": 29,
764 | "metadata": {},
765 | "outputs": [
766 | {
767 | "name": "stderr",
768 | "output_type": "stream",
769 | "text": [
770 | "|> x.sum(dim=(0, 1)): > x.sum(dim=(0, 1)): tensor([2.6875, 2.4887])\n"
771 | ]
772 | }
773 | ],
774 | "source": [
775 | "_ = ic(x.sum(dim=(0, 1)))"
776 | ]
777 | },
778 | {
779 | "attachments": {},
780 | "cell_type": "markdown",
781 | "metadata": {},
782 | "source": [
783 | "## Example: Linear regression\n",
784 | "\n",
785 | "By using all the pieces we've seen till now, we can build our first *model* using PyTorch: a linear regressor, i.e.:\n",
786 | "\n",
787 | "$$\n",
788 | "y = XW + b\n",
789 | "$$\n",
790 | "\n",
791 | "which can also be simplified as:\n",
792 | "\n",
793 | "$$\n",
794 | "y = XW\n",
795 | "$$\n",
796 | "\n",
797 | "if we incorporate the bias $b$ inside $W$ and add to the $X$ a column of ones to the right.\n",
798 | "\n",
799 | "\n",
800 | "We start by generating our data. We randomly sample $X$ as a $N\\times P$ tensor, meaning that we have 1000 datapoints and 100 features and produce $y$ as:\n",
801 | "$$\n",
802 | "y=XM+\\mathcal{N}(0,I)\n",
803 | "$$\n",
804 | "where $M$ is a randomly drawn projection vector (shape $P\\times 1$, same as our weights).\n",
805 | "We are adding some iid gaussian noise on the $y$ to avoid the interpolation regime, in which we could be fitting our data perfectly using a linear model."
806 | ]
807 | },
808 | {
809 | "cell_type": "code",
810 | "execution_count": 30,
811 | "metadata": {},
812 | "outputs": [],
813 | "source": [
814 | "N: int = 1000\n",
815 | "P: int = 100\n",
816 | "X: Tensor = th.rand(N, P)\n",
817 | "M: Tensor = th.rand(P, 1)\n",
818 | "y: Tensor = X @ M + th.normal(mean=th.zeros(N, 1), std=th.ones(N, 1))"
819 | ]
820 | },
821 | {
822 | "attachments": {},
823 | "cell_type": "markdown",
824 | "metadata": {},
825 | "source": [
826 | "We can add a column of ones to $X$ to include the bias:"
827 | ]
828 | },
829 | {
830 | "cell_type": "code",
831 | "execution_count": 31,
832 | "metadata": {},
833 | "outputs": [],
834 | "source": [
835 | "X: Tensor = th.cat(tensors=[X, th.ones(N, 1)], dim=1)"
836 | ]
837 | },
838 | {
839 | "attachments": {},
840 | "cell_type": "markdown",
841 | "metadata": {},
842 | "source": [
843 | "The regression can be fit with classical statistical methods such as Ordinary Least Squares, and the optimal $W$ has the form:\n",
844 | "\n",
845 | "$$\n",
846 | "W^*=(X^TX)^{-1}X^Ty\n",
847 | "$$\n"
848 | ]
849 | },
850 | {
851 | "cell_type": "code",
852 | "execution_count": 32,
853 | "metadata": {},
854 | "outputs": [],
855 | "source": [
856 | "W_star: Tensor = ((X.T @ X).inverse()) @ X.T @ y"
857 | ]
858 | },
859 | {
860 | "attachments": {},
861 | "cell_type": "markdown",
862 | "metadata": {},
863 | "source": [
864 | "To assess the quality of this fit we can evaluate the Mean Squared Error (MSE) between the original $y$ and the prediction:"
865 | ]
866 | },
867 | {
868 | "cell_type": "code",
869 | "execution_count": 33,
870 | "metadata": {},
871 | "outputs": [
872 | {
873 | "name": "stderr",
874 | "output_type": "stream",
875 | "text": [
876 | "|> loss: tensor(0.8603)\n",
877 | "> loss: tensor(0.8603)\n"
878 | ]
879 | }
880 | ],
881 | "source": [
882 | "loss: Tensor = th.nn.functional.mse_loss(input=X @ W_star, target=y)\n",
883 | "_ = ic(loss)"
884 | ]
885 | },
886 | {
887 | "attachments": {},
888 | "cell_type": "markdown",
889 | "metadata": {},
890 | "source": [
891 | "## Why not just `numpy` (1): Automatic differentiation"
892 | ]
893 | },
894 | {
895 | "attachments": {},
896 | "cell_type": "markdown",
897 | "metadata": {},
898 | "source": [
899 | "Let's take a look at how ``autograd`` collects gradients. We create two tensors ``a`` and ``b`` with\n",
900 | "``requires_grad=True``. This signals to ``autograd`` that every operation on them should be tracked."
901 | ]
902 | },
903 | {
904 | "cell_type": "code",
905 | "execution_count": 34,
906 | "metadata": {},
907 | "outputs": [],
908 | "source": [
909 | "a: Tensor = th.tensor([2.0, 3.0], requires_grad=True)\n",
910 | "b: Tensor = th.tensor([6.0, 4.0], requires_grad=True)"
911 | ]
912 | },
913 | {
914 | "attachments": {},
915 | "cell_type": "markdown",
916 | "metadata": {},
917 | "source": [
918 | "We create another tensor ``Q`` from ``a`` and ``b``.\n",
919 | "\n",
920 | "\\begin{align}Q = 3a^3 - b^2\\end{align}\n",
921 | "\n"
922 | ]
923 | },
924 | {
925 | "cell_type": "code",
926 | "execution_count": 35,
927 | "metadata": {},
928 | "outputs": [],
929 | "source": [
930 | "Q: Tensor = 3 * a**3 - b**2"
931 | ]
932 | },
933 | {
934 | "attachments": {},
935 | "cell_type": "markdown",
936 | "metadata": {},
937 | "source": [
938 | "We want to compute the gradients of ``Q`` w.r.t. ``a`` and ``b``, i.e.:\n",
939 | "\n",
940 | "\\begin{align}\\frac{\\partial Q}{\\partial a} = 9a^2\\end{align}\n",
941 | "\n",
942 | "\\begin{align}\\frac{\\partial Q}{\\partial b} = -2b\\end{align}\n",
943 | "\n",
944 | "We can do this by calling ``.backward()`` on any **scalar** function of ``Q``:"
945 | ]
946 | },
947 | {
948 | "cell_type": "code",
949 | "execution_count": 36,
950 | "metadata": {},
951 | "outputs": [],
952 | "source": [
953 | "Q.sum().backward()"
954 | ]
955 | },
956 | {
957 | "cell_type": "code",
958 | "execution_count": 37,
959 | "metadata": {},
960 | "outputs": [
961 | {
962 | "name": "stdout",
963 | "output_type": "stream",
964 | "text": [
965 | "tensor([True, True])\n",
966 | "tensor([True, True])\n"
967 | ]
968 | }
969 | ],
970 | "source": [
971 | "# check if collected gradients are correct\n",
972 | "print(9 * a**2 == a.grad)\n",
973 | "print(-2 * b == b.grad)"
974 | ]
975 | },
976 | {
977 | "attachments": {},
978 | "cell_type": "markdown",
979 | "metadata": {},
980 | "source": [
981 | "## Why not just `numpy` (2): GPU Acceleration"
982 | ]
983 | },
984 | {
985 | "cell_type": "code",
986 | "execution_count": 38,
987 | "metadata": {},
988 | "outputs": [
989 | {
990 | "name": "stderr",
991 | "output_type": "stream",
992 | "text": [
993 | "|> th.matmul(a, b): tensor> th.matmul(a, b): tensor([[256.6814, 249.5139, 255.2641, ..., 256.0125, 254.6015, 246.8795],\n",
994 | " [261.9914, 254.1315, 253.2242, ..., 251.7429, 247.9489, 241.0573],\n",
995 | " [252.4773, 243.8860, 251.4451, ..., 249.3845, 251.5149, 239.8638],\n",
996 | " ...,\n",
997 | " [254.6993, 242.6673, 245.9762, ..., 248.1128, 247.0720, 238.3586],\n",
998 | " [249.7507, 250.4346, 251.9537, ..., 250.1937, 241.0707, 238.6906],\n",
999 | " [248.8309, 241.1919, 247.3757, ..., 249.3362, 242.9924, 234.8426]])\n"
1000 | ]
1001 | }
1002 | ],
1003 | "source": [
1004 | "a: Tensor = th.rand(1000, 1000)\n",
1005 | "b: Tensor = th.rand(1000, 1000)\n",
1006 | "_ = ic(th.matmul(a, b))"
1007 | ]
1008 | },
1009 | {
1010 | "cell_type": "code",
1011 | "execution_count": 39,
1012 | "metadata": {},
1013 | "outputs": [],
1014 | "source": [
1015 | "for _ in range(1000):\n",
1016 | " r: Tensor = th.matmul(a, b)"
1017 | ]
1018 | },
1019 | {
1020 | "cell_type": "code",
1021 | "execution_count": 40,
1022 | "metadata": {},
1023 | "outputs": [
1024 | {
1025 | "name": "stderr",
1026 | "output_type": "stream",
1027 | "text": [
1028 | "|> th.matmul(a_cuda, b_cuda): tensor([[256.6814, 249.5140,> th.matmul(a_cuda, b_cuda): tensor([[256.6814, 249.5140, 255.2642, ..., 256.0125, 254.6014, 246.8796],\n",
1029 | " [261.9914, 254.1314, 253.2242, ..., 251.7429, 247.9490, 241.0573],\n",
1030 | " [252.4773, 243.8859, 251.4451, ..., 249.3844, 251.5150, 239.8637],\n",
1031 | " ...,\n",
1032 | " [254.6993, 242.6673, 245.9762, ..., 248.1129, 247.0721, 238.3586],\n",
1033 | " [249.7505, 250.4346, 251.9537, ..., 250.1937, 241.0707, 238.6907],\n",
1034 | " [248.8309, 241.1919, 247.3756, ..., 249.3362, 242.9924, 234.8425]],\n",
1035 | " device='cuda:0')\n"
1036 | ]
1037 | }
1038 | ],
1039 | "source": [
1040 | "a_cuda = a.cuda() # Or, generally: a.to(device)\n",
1041 | "b_cuda = b.cuda() # Or, generally: a.to(device)\n",
1042 | "_ = ic(th.matmul(a_cuda, b_cuda))"
1043 | ]
1044 | },
1045 | {
1046 | "cell_type": "code",
1047 | "execution_count": 41,
1048 | "metadata": {},
1049 | "outputs": [],
1050 | "source": [
1051 | "for _ in range(1000):\n",
1052 | " r = th.matmul(a_cuda, b_cuda)"
1053 | ]
1054 | }
1055 | ],
1056 | "metadata": {
1057 | "kernelspec": {
1058 | "display_name": "Python 3",
1059 | "language": "python",
1060 | "name": "python3"
1061 | },
1062 | "language_info": {
1063 | "codemirror_mode": {
1064 | "name": "ipython",
1065 | "version": 3
1066 | },
1067 | "file_extension": ".py",
1068 | "mimetype": "text/x-python",
1069 | "name": "python",
1070 | "nbconvert_exporter": "python",
1071 | "pygments_lexer": "ipython3",
1072 | "version": "3.10.12"
1073 | },
1074 | "orig_nbformat": 4
1075 | },
1076 | "nbformat": 4,
1077 | "nbformat_minor": 2
1078 | }
1079 |
--------------------------------------------------------------------------------
/notebooks/AdvML_UniTS_2024_Lab_03_DL_with_PyTorch/AdvML_UniTS_2024_Lab_03_DL_with_PyTorch_03_advanced.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "collapsed": false
7 | },
8 | "source": [
9 | "# Lab 3.3 | A *whirlwind tour* of `PyTorch`: ***advanced*/*on-demand* topics**\n",
10 | "\n",
11 | "Advanced Topics in Machine Learning -- Fall 2024, UniTS\n",
12 | "\n",
13 | "
"
14 | ]
15 | },
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {
19 | "collapsed": false
20 | },
21 | "source": [
22 | "**NOTE:** This notebook is the same as the *solved* version."
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "If we managed to reach this far into the *tutorial* (on time), we went pretty fast! 👏🏻 \n",
30 | "\n",
31 | "What's next: you tell me! \n",
32 | "\n",
33 | "- Questions?\n",
34 | "\n",
35 | "- Doubts? Existential doubts?\n",
36 | "\n",
37 | "- (One of my all-time favourites: *fancy* optimisers / LR scheduler?)"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 1,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "# We will need to live-code it, though..."
47 | ]
48 | }
49 | ],
50 | "metadata": {
51 | "colab": {
52 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
53 | "provenance": []
54 | },
55 | "kernelspec": {
56 | "display_name": "Python 3 (ipykernel)",
57 | "language": "python",
58 | "name": "python3"
59 | },
60 | "language_info": {
61 | "codemirror_mode": {
62 | "name": "ipython",
63 | "version": 3
64 | },
65 | "file_extension": ".py",
66 | "mimetype": "text/x-python",
67 | "name": "python",
68 | "nbconvert_exporter": "python",
69 | "pygments_lexer": "ipython3",
70 | "version": "3.10.12"
71 | }
72 | },
73 | "nbformat": 4,
74 | "nbformat_minor": 0
75 | }
76 |
--------------------------------------------------------------------------------
/notebooks/AdvML_UniTS_2024_Lab_04_FCN_Augmentation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Lab 4: Effect of **data augmentation** on a *shallow, linear FCN*\n",
8 | "\n",
9 | "Advanced Topics in Machine Learning -- Fall 2024, UniTS\n",
10 | "\n",
11 | "
"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "#### High-level overview\n",
19 | "\n",
20 | "In the *Lab* that follows, we will explore the effect of **data augmentation** on the structure of learned weights in a *neural network* model. As an example, we will consider a *shallow, linear FCN* (fully-connected network with just 1 layer and no non-linearities) that is trained on the MNIST *classification task*.\n",
21 | "\n",
22 | "On a high level, you need to:\n",
23 | "- Define and train a *shallow, linear FCN* on the MNIST dataset, using *training-set augmentation* by means of *random rotations*;\n",
24 | "- Extract the learned weights from the trained model;\n",
25 | "- Visualize the learned weights as *images* and comment on their structure.\n",
26 | "\n",
27 | "The rest of the notebook will guide you through more detailed steps you need to follow."
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "#### Loading the datasets and defining augmentation strategies\n",
35 | "\n",
36 | "Load the (training and test) *MNIST* datasets as we did in the previous lab, with the following meaningful differences:\n",
37 | "- Apply random rotations to the dataset used for training, using a rotation range of $[0, 180]$ degrees.\n",
38 | "- Apply normalization to both datasets, using a mean of $0.1307$ and a standard deviation of $0.3081$ (they are notable, pre-computed values for the MNIST **training** dataset);\n",
39 | "\n",
40 | "**Hint**: look up the documentation for the `transforms.RandomRotation` and `transforms.Normalize` classes.\n",
41 | "\n",
42 | "**Optional**: Visualize the augmented training dataset, to get a sense of the effect of the augmentation strategy."
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 1,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "# YOUR CODE HERE"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "#### Model definition and training\n",
59 | "\n",
60 | "Define the model and train it on the classification task, as we did in the previous *Lab*. Use fixed-learning-rate *Stochastic Gradient Descent* (with no momentum) as the optimizer. Feel free to experiment with the other hyperparameters.\n",
61 | "\n",
62 | "**Optional**: Plot the training loss as the training progresses. Show also test loss at the beginning and at the end of training.\n"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 2,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "# YOUR CODE HERE"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "#### Weights extraction and visualization\n",
79 | "\n",
80 | "Extract the tensors corresponding to learned weights from the trained model (they are stored as the `weights` attribute of the linear layer), and visualize them as images.\n",
81 | "\n",
82 | "Comment on the structure of the learned weights, in relation to the nature of the augmentation strategy.\n",
83 | "\n",
84 | "**Remark**: in order to be able to visualize the weights as images, they need to be appropriately scaled (as tensor) within the $[0,1]$ range.\n"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 3,
90 | "metadata": {},
91 | "outputs": [],
92 | "source": [
93 | "# YOUR CODE HERE"
94 | ]
95 | },
96 | {
97 | "cell_type": "markdown",
98 | "source": [
99 | "#### Optional I: Training with different augmentation strategies\n",
100 | "\n",
101 | "Repeat the same process described above, but using different augmentation strategy, namely:\n",
102 | "- Apply a central (vertical or horizontal, one only or both) flip to images in the training dataset;\n",
103 | "- Apply random translations to images in the training dataset, bound by an arbitrary maximum;\n",
104 | "\n",
105 | "Comment on the structure of the learned weights, in relation to the nature of the augmentation strategy.\n"
106 | ],
107 | "metadata": {
108 | "collapsed": false
109 | }
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "source": [
114 | "#### Optional II: Testing for the invariance of the learned representation\n",
115 | "\n",
116 | "Test that the internal representation produced by the weights learned under data augmentation is indeed invariant to the same transformation used for data augmentation. You may follow the guidance given in [this notebook](AdvML_UniTS_2024_Lab_04bis_FCN_Invariance.ipynb).\n"
117 | ],
118 | "metadata": {
119 | "collapsed": false
120 | }
121 | }
122 | ],
123 | "metadata": {
124 | "colab": {
125 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
126 | "provenance": []
127 | },
128 | "kernelspec": {
129 | "display_name": "Python 3 (ipykernel)",
130 | "language": "python",
131 | "name": "python3"
132 | },
133 | "language_info": {
134 | "codemirror_mode": {
135 | "name": "ipython",
136 | "version": 3
137 | },
138 | "file_extension": ".py",
139 | "mimetype": "text/x-python",
140 | "name": "python",
141 | "nbconvert_exporter": "python",
142 | "pygments_lexer": "ipython3",
143 | "version": "3.10.9"
144 | }
145 | },
146 | "nbformat": 4,
147 | "nbformat_minor": 1
148 | }
149 |
--------------------------------------------------------------------------------
/notebooks/AdvML_UniTS_2024_Lab_04bis_FCN_Invariance.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Lab 4 bis: **Invariance** in a *shallow FCN* under data augmentation\n",
8 | "\n",
9 | "Advanced Topics in Machine Learning -- Fall 2024, UniTS\n",
10 | "\n",
11 | "
"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "#### High-level overview\n",
19 | "\n",
20 | "In this *Lab*, we will understand how the effect of *data augmentation* (whose effect on learned **weights** has been analyzed in the previous lab) translates to the **representation** learned by the model.\n",
21 | "\n",
22 | "Specifically, we define *representation* the (ordered) set of activations of a *neural network* model, which is dependent on the input, and can be seen as the way the model *sees* the data as a result of the learning process.\n",
23 | "\n",
24 | "To accomplish this goal, we will:\n",
25 | "\n",
26 | "- Load the weights resulting from the training of the model described in the previous lab;\n",
27 | "- Learn how to extract the activations of a given layer of the model, in response to a given input;\n",
28 | "- Evaluate such activations on mutually-rotated versions of the same input, and compare such activations to assess their *invariance* with respect to the transformation."
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "#### Preliminary: adapt and re-run the previous notebook\n",
36 | "\n",
37 | "Before starting to delve into this lab, you should:\n",
38 | "- Go back to the previous *Lab* notebook;\n",
39 | "- Add the (single line of) code required to save the model weights after training;\n",
40 | "- Re-run the notebook, to make sure that the weights are saved correctly;\n"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "#### (Re-)definition of the model\n",
48 | "\n",
49 | "Define the exact same model you used in the previous lab, and instantiate it."
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 1,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "# YOUR CODE HERE"
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "#### Weights loading\n",
66 | "\n",
67 | "Load into the instance of your model the weights you just saved from the adapted notebook."
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": 2,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": [
76 | "# YOUR CODE HERE"
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "#### Data preparation\n",
84 | "\n",
85 | "To test for a given transformation invariance, you should have pairs of (test) data obtained from the same image: one original, and one transformed.\n",
86 | "\n",
87 | "**Hint**: if you want to offload the task to already implemented `torchvision.transforms`, notice (to your advantage) that -- since we are just testing the model -- the dataset needs not to be in shuffled order!\n"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": 3,
93 | "metadata": {},
94 | "outputs": [],
95 | "source": [
96 | "# YOUR CODE HERE"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "#### Activation extraction\n",
104 | "\n",
105 | "Write a function that extracts the activations of a given layer of the model, in response to a given input. Try to remain as generic as possible, since you may need to re-use it in the future.\n",
106 | "\n",
107 | "**Hint**: Look up in the documentation the purpose and features of `hook`s. If you are in trouble, just ask!\n"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 4,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": [
116 | "# YOUR CODE HERE"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "#### Invariance evaluation\n",
124 | "\n",
125 | "Recall the definition of *invariance* of (the result of) function $f$ with respect to transformation $g(\\cdot\\;; \\alpha)$ parametrized by $\\alpha$:\n",
126 | "\n",
127 | "$$f(g(x; \\alpha))=f(x)\\;\\;\\;\\; \\forall\\alpha$$\n",
128 | "\n",
129 | "With the function and data just defined, compare in a statistically-significant manner the activations of the model on the original and transformed versions of the same image. Comment on the results.\n"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": null,
135 | "metadata": {},
136 | "outputs": [],
137 | "source": [
138 | "# YOUR CODE HERE"
139 | ]
140 | }
141 | ],
142 | "metadata": {
143 | "colab": {
144 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
145 | "provenance": []
146 | },
147 | "kernelspec": {
148 | "display_name": "Python 3 (ipykernel)",
149 | "language": "python",
150 | "name": "python3"
151 | },
152 | "language_info": {
153 | "codemirror_mode": {
154 | "name": "ipython",
155 | "version": 3
156 | },
157 | "file_extension": ".py",
158 | "mimetype": "text/x-python",
159 | "name": "python",
160 | "nbconvert_exporter": "python",
161 | "pygments_lexer": "ipython3",
162 | "version": "3.10.9"
163 | }
164 | },
165 | "nbformat": 4,
166 | "nbformat_minor": 1
167 | }
168 |
--------------------------------------------------------------------------------
/notebooks/AdvML_UniTS_2024_Lab_05_CNN_Invariance_Equivariance.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Lab 5: **Invariance** and **Equivariance** at different layers of a *CNN*\n",
8 | "\n",
9 | "Advanced Topics in Machine Learning -- Fall 2024, UniTS\n",
10 | "\n",
11 | "
"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "#### Overview of the *Lab*\n",
19 | "\n",
20 | "In the following *Lab*, we will study the *invariance* and *equivariance* properties of specific layers within a *CNN*.\n",
21 | "\n",
22 | "Recall the definitions -- respectively -- of **invariance** and **equivariance** of (the result of) function $f$ with respect to transformation (expressed in the form of an operator) $P_{\\alpha}$ parametrized by $\\alpha$:\n",
23 | "\n",
24 | "- *Invariance*: $f(P_{\\alpha} x) = f(x)\\;\\;\\;\\; \\forall\\alpha$\n",
25 | "- *Equivariance*: $f(P_{\\alpha} x) = P_{\\alpha} f(x)\\;\\;\\;\\; \\forall\\alpha$\n",
26 | "\n",
27 | "According to theory, the training of a *CNN* with pooling should lead to a network whose:\n",
28 | "\n",
29 | "- *Convolutional* layers are *equivariant* to translation;\n",
30 | "- *Fully Connected* layers are *invariant* to translation.\n",
31 | "\n",
32 | "Due to the specific structure of convolutional layers, it is possible to show that the *equivariance* property gives rise to permuted activations in response to translation of inputs."
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 1,
38 | "metadata": {},
39 | "outputs": [],
40 | "source": [
41 | "from torch import nn\n",
42 | "from torch.nn import functional as F"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "The following *CNN* model is given, whose output -- for your convenience -- is a tuple, composed of the actual output of the network, the activation tensor after the second *convolutional* layer, and the activation tensor after the first *fully-connected* layer:"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 2,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "class Model(nn.Module):\n",
59 | " def __init__(self):\n",
60 | " super(Model, self).__init__()\n",
61 | " self.conv1 = nn.Conv2d(1, 1, kernel_size=2)\n",
62 | " self.fc1 = nn.Linear(729, 10)\n",
63 | "\n",
64 | " def forward(self, x):\n",
65 | " x = self.conv1(x)\n",
66 | " conv2repr = x.clone().detach()\n",
67 | " x = F.relu(x)\n",
68 | " x = x.view(x.shape[0], -1)\n",
69 | " x = self.fc1(x)\n",
70 | " fc1repr = x.clone().detach()\n",
71 | " x = F.log_softmax(x, dim=1)\n",
72 | " return x, conv2repr, fc1repr"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "Taking inspiration from previous *Labs*:\n",
80 | "\n",
81 | "1. Train the model on the (non-augmented) *MNIST* dataset;\n",
82 | "2. Prepare a test dataset composed of pairs of mutually traslated images;\n",
83 | "3. Extract the activations of layers `conv2` and `fc1` and check whether they respect the invariance/equivariance property.\n",
84 | "\n",
85 | "**Hint**: To test for *equivariance*, it may be useful to notice that **sorting** is invariant to permutations!"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 3,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "# YOUR CODE HERE"
95 | ]
96 | }
97 | ],
98 | "metadata": {
99 | "colab": {
100 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
101 | "provenance": []
102 | },
103 | "kernelspec": {
104 | "display_name": "Python 3 (ipykernel)",
105 | "language": "python",
106 | "name": "python3"
107 | },
108 | "language_info": {
109 | "codemirror_mode": {
110 | "name": "ipython",
111 | "version": 3
112 | },
113 | "file_extension": ".py",
114 | "mimetype": "text/x-python",
115 | "name": "python",
116 | "nbconvert_exporter": "python",
117 | "pygments_lexer": "ipython3",
118 | "version": "3.10.9"
119 | }
120 | },
121 | "nbformat": 4,
122 | "nbformat_minor": 1
123 | }
124 |
--------------------------------------------------------------------------------
/notebooks/AdvML_UniTS_2024_Lab_06_Cortex_Hubel_Wiesel.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Lab 6: The *Hubel-Wiesel* model of cortical networks\n",
8 | "\n",
9 | "Advanced Topics in Machine Learning -- Fall 2024, UniTS\n",
10 | "\n",
11 | "
"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "#### Overview of *Hubel & Wiesel* model\n",
19 | "\n",
20 | "In this lab, we will attempt an implementation of (a *machine-learning-flavoured* version of) the *Hubel & Wiesel* model of cortical networks. The model is a simplified (and, today we know, not entirely realistic) model of the primary visual cortex of the mammalian brain.\n",
21 | "\n",
22 | "According to the model, the cortex is composed by sets of *simple cells* and *complex cells*.\n",
23 | "\n",
24 | "- *Simple cells* are sensitive to a specific orientation of the edges in the visual field.\n",
25 | "- *Complex cells* integrate the signal received by a set of *simple cells*.\n",
26 | "\n",
27 | "By abstracting away the anatomical details, we can write:\n",
28 | "\n",
29 | "- For *simple cells*: $s_{ij}(x)=x^{T}(g_{j}(w_{i}))$;\n",
30 | "- For *complex cells*: $c_{h,i}(x)=\\sum_{j}\\sigma_{h}(s_{ij}(x))$\n",
31 | "\n",
32 | "where $x$ is an input vector, $w_{i}$ is the weight vector of the $i$-th simple cell, $g_{j}$ is a specific realization of the $g$ transformation, $\\sigma_{h}$ is the activation function of the $h$-th complex cell, and $s_{ij}$ and $c_{h,i}$ are the output of the $i$-th simple cell and the $h$-th complex cell, respectively.\n",
33 | "\n",
34 | "From the definitions above, we expect:\n",
35 | "- *Simple cells* to be equivariant to transformations of the inputs, i.e. $s_{i}(gx)=P_{g}s_{i}(x)$;\n",
36 | "- *Complex cells* to be invariant to transformations of the inputs, i.e. $c_{h,i}(x) = c_{h,i}(gx)$,\n",
37 | "\n",
38 | "where $P_{g}$ is an (any!) element of the group generating the transformation $g$."
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "#### Lab to-do:\n",
46 | "\n",
47 | "1. Implement a *simple* cell of the *Hubel & Wiesel* model, using central (vertical) flip as the transformation $g$ of interest.\n",
48 | "Note that such transformation is associated to the unitary group generated by $g_1 = \\text{Identity}$ and $g_2 = \\text{Flip}$, and as such requires (and admits) only two specific transformations of the data to be implemented."
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 1,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "def simplecell(): ..."
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "2. Implement a *complex* cell of the *Hubel & Wiesel* model."
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 2,
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "def complexcell(): ..."
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {},
79 | "source": [
80 | "3. Verify the expected properties of the *simple* and *complex* cells in terms of equivariance and invariance with respect to the same transformation $g$ described above, on synthetic data (you can generate some yourself!).\n"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {},
87 | "outputs": [],
88 | "source": [
89 | "# YOUR CODE HERE"
90 | ]
91 | }
92 | ],
93 | "metadata": {
94 | "colab": {
95 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
96 | "provenance": []
97 | },
98 | "kernelspec": {
99 | "display_name": "Python 3 (ipykernel)",
100 | "language": "python",
101 | "name": "python3"
102 | },
103 | "language_info": {
104 | "codemirror_mode": {
105 | "name": "ipython",
106 | "version": 3
107 | },
108 | "file_extension": ".py",
109 | "mimetype": "text/x-python",
110 | "name": "python",
111 | "nbconvert_exporter": "python",
112 | "pygments_lexer": "ipython3",
113 | "version": "3.10.9"
114 | }
115 | },
116 | "nbformat": 4,
117 | "nbformat_minor": 1
118 | }
119 |
--------------------------------------------------------------------------------
/notebooks/AdvML_UniTS_2024_Lab_08_Implicit_Bias.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Lab 8: **Implicit bias** of gradient descent: the case of *linear regression*\n",
8 | "\n",
9 | "Advanced Topics in Machine Learning -- Fall 2024, UniTS\n",
10 | "\n",
11 | "
"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "#### Overview of the *Lab*\n",
19 | "\n",
20 | "In this lab, we will study the *implicit bias* induced by *Gradient Descent* optimization in the simple case of *linear regression*, fitted on a *toy* dataset. In particular, we will show that *GD*-optimized weights converge to the **least norm** solution of the *linear regression* problem.\n",
21 | "\n",
22 | "An analysis of implicit bias induced by *Stochastic Gradient Descent* in *full-width linear fully-connected* and *full-width linear convolutional* neural networks (which are much more complex and expressive models!) is provided in [this paper](https://arxiv.org/abs/1806.00468).\n"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "source": [
28 | "#### Linear regression\n",
29 | "\n",
30 | "In the case of **linear regression**, fitted by means of *least squares*, we optimize the following loss function:\n",
31 | "$$\n",
32 | "L=\\|y-Xw\\|_{2}^{2}\n",
33 | "$$.\n",
34 | "\n",
35 | "If we choose the *GD* optimization algorithm, we perform weight updates proportional to the gradient of the loss function:\n",
36 | "$$\n",
37 | "\\nabla_{w} L = -X(y-Xw)\n",
38 | "$$.\n",
39 | "\n",
40 | "Additionally, notice that the **least norm** solution of the *linear regression* problem is given by:\n",
41 | "$$\n",
42 | "w^{*}=(X^{T}X)^{-1}X^{T}y\n",
43 | "$$.\n"
44 | ],
45 | "metadata": {
46 | "collapsed": false
47 | }
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "outputs": [],
53 | "source": [
54 | "import numpy as np"
55 | ],
56 | "metadata": {
57 | "collapsed": false
58 | }
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "source": [
63 | "#### To-do:\n",
64 | "\n",
65 | "The following *toy* dataset is provided:"
66 | ],
67 | "metadata": {
68 | "collapsed": false
69 | }
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "outputs": [],
75 | "source": [
76 | "m, n = 1000, 10\n",
77 | "X = np.random.normal(0, 1, (m, n))\n",
78 | "b = X.dot(np.random.normal(0, 1, n))"
79 | ],
80 | "metadata": {
81 | "collapsed": false
82 | }
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "source": [
87 | "1. Compute the *least norm* solution of the linear regression problem;\n",
88 | "2. Write a function that computes the gradient of the loss function, as required by *GD* optimization;\n",
89 | "3. Perform *GD* optimization of the linear regression problem iteratively, storing the weights at each iteration;\n",
90 | "4. Plot the evolution of the weights during *GD* optimization and comment.\n"
91 | ],
92 | "metadata": {
93 | "collapsed": false
94 | }
95 | }
96 | ],
97 | "metadata": {
98 | "colab": {
99 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
100 | "provenance": []
101 | },
102 | "kernelspec": {
103 | "display_name": "Python 3 (ipykernel)",
104 | "language": "python",
105 | "name": "python3"
106 | },
107 | "language_info": {
108 | "codemirror_mode": {
109 | "name": "ipython",
110 | "version": 3
111 | },
112 | "file_extension": ".py",
113 | "mimetype": "text/x-python",
114 | "name": "python",
115 | "nbconvert_exporter": "python",
116 | "pygments_lexer": "ipython3",
117 | "version": "3.10.9"
118 | }
119 | },
120 | "nbformat": 4,
121 | "nbformat_minor": 1
122 | }
123 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 Fabio Anselmi
2 | # Copyright (c) 2024 AILab @ UniTS
3 | # All rights reserved.
4 | # Released under the terms of the MIT License
5 |
6 | # Requirements in sync with Google Colab (17/09/2024)
7 |
8 | torch>=2.0.1
9 | torchvision>=0.15.2
10 | numpy>=1.23.5
11 | scipy>=1.11.2
12 | scikit-learn>=1.2.2
13 | matplotlib>=3.7.1
14 | tqdm>=4.66.1
15 | icecream>=2.1.3
16 |
--------------------------------------------------------------------------------
/solutions/AdvML_UniTS_2024_Lab_01_Intro_to_Kernels_Solved.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# [Solved] Lab 1: **Kernels** and **features**\n",
8 | "\n",
9 | "Advanced Topics in Machine Learning -- Fall 2023, UniTS\n",
10 | "\n",
11 | "
"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "### Empirical verification of the *Kernel* $\\leftrightarrow$ *feature expansion* equivalence\n",
19 | "\n",
20 | "Recall the definition of a *kernel*:\n",
21 | "> Let $\\mathcal{X}$ be a non-empty set. A function $k: \\mathcal{X} \\times \\mathcal{X} \\rightarrow \\mathbb{R}$ is called a *kernel* if there exists a real-Hilbert space $\\mathcal{H}$ and a map $\\phi: \\mathcal{X} \\rightarrow \\mathcal{H}$ such that $\\forall x, x^\\prime \\in \\mathcal{X}$, $k(x, x^\\prime) := \\langle \\phi(x) , \\phi(x^\\prime) \\rangle_{\\mathcal{H}}$.\n",
22 | "\n",
23 | "To motivate the usefulness of kernelized ML methods, we can show that -- for $x\\in\\mathbb{R}^{d \\in \\mathbb{N}}$ -- the computation of $k(x, x^\\prime)$ in kernel form is equivalent to the explicit scalar product $\\langle \\varphi(x) , \\varphi(x^\\prime) \\rangle = \\varphi(x)^{T} \\varphi(x^\\prime)$ of some corresponding expanded feature maps $\\varphi: {R}^{d} \\rightarrow \\mathbb{R}^{d^\\prime}$ with generally $d^\\prime \\gg d$ (or even *infinite-dimensional* $\\varphi$s), though significantly simpler and more efficient to compute.\n",
24 | "\n",
25 | "In the lab that follows, verify such equivalence for simple kernels: the non-uniform *quadratic* (in $\\mathbb{R}^{d}$) and the *Gaussian* (in $\\mathbb{R}$).\n",
26 | "\n",
27 | "For each kernel:\n",
28 | "\n",
29 | "1. Implement a function that computes the kernel between two arrays of coordinates;\n",
30 | "2. Derive the explicit feature map $\\varphi(x)$ corresponding to that kernel;\n",
31 | "3. Implement a function that computes such feature map for a given array of coordinates;\n",
32 | "4. Verify that the kernel computed by (1) and the scalar product of its arguments through (3) are indeed equivalent.\n",
33 | "\n",
34 | "**Hint**: in case of need, you can finitely approximate the feature map by Taylor expansion.\n"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 1,
40 | "outputs": [],
41 | "source": [
42 | "import itertools\n",
43 | "import math\n",
44 | "import numpy as np"
45 | ],
46 | "metadata": {
47 | "collapsed": false,
48 | "ExecuteTime": {
49 | "end_time": "2023-09-16T22:40:35.639481Z",
50 | "start_time": "2023-09-16T22:40:35.409551900Z"
51 | }
52 | }
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 2,
57 | "metadata": {
58 | "ExecuteTime": {
59 | "end_time": "2023-09-16T22:40:35.656220600Z",
60 | "start_time": "2023-09-16T22:40:35.656220600Z"
61 | }
62 | },
63 | "outputs": [],
64 | "source": [
65 | "def nu_quadratic_kernel(x, x_prime):\n",
66 | " \"\"\"Compute the non-uniform quadratic kernel between two arrays of coordinates.\n",
67 | "\n",
68 | " Parameters\n",
69 | " ----------\n",
70 | " x : array-like, shape: (n_features)\n",
71 | " First array of coordinates.\n",
72 | " x_prime : array-like, shape: (n_features)\n",
73 | " Second array of coordinates.\n",
74 | "\n",
75 | " Returns\n",
76 | " -------\n",
77 | " k : array-like, shape: (1)\n",
78 | " Kernel value.\n",
79 | " \"\"\"\n",
80 | "\n",
81 | " x, x_prime = np.asarray(x), np.asarray(\n",
82 | " x_prime\n",
83 | " ) # Always a good practice; almost overhead-free.\n",
84 | " return (1 + np.dot(x, x_prime)) ** 2"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 3,
90 | "metadata": {
91 | "ExecuteTime": {
92 | "end_time": "2023-09-16T22:40:35.656220600Z",
93 | "start_time": "2023-09-16T22:40:35.656220600Z"
94 | }
95 | },
96 | "outputs": [],
97 | "source": [
98 | "def nu_quadratic_feature_map(x):\n",
99 | " \"\"\"Compute the feature map corresponding to the non-uniform quadratic kernel.\n",
100 | "\n",
101 | " Parameters\n",
102 | " ----------\n",
103 | " x : array-like, shape: (n_features)\n",
104 | " Array of coordinates.\n",
105 | "\n",
106 | " Returns\n",
107 | " -------\n",
108 | " phi_x : array-like, shape: (n_features)\n",
109 | " Feature map.\n",
110 | " \"\"\"\n",
111 | "\n",
112 | " x = np.asarray(x)\n",
113 | "\n",
114 | " # Mixed products\n",
115 | " mixed = np.asarray(\n",
116 | " [\n",
117 | " math.sqrt(2) * x[i] * x[j]\n",
118 | " for i, j in itertools.combinations(range(len(x)), 2)\n",
119 | " ]\n",
120 | " )\n",
121 | "\n",
122 | " return np.concatenate((np.asarray([1]), math.sqrt(2) * x, x**2, mixed))"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 4,
128 | "metadata": {
129 | "ExecuteTime": {
130 | "end_time": "2023-09-16T22:40:35.672014800Z",
131 | "start_time": "2023-09-16T22:40:35.657835700Z"
132 | }
133 | },
134 | "outputs": [
135 | {
136 | "name": "stdout",
137 | "output_type": "stream",
138 | "text": [
139 | "Success!\n"
140 | ]
141 | }
142 | ],
143 | "source": [
144 | "# Check the equivalence on randomly-initialized arrays\n",
145 | "\n",
146 | "v = np.random.randn(100)\n",
147 | "u = np.random.randn(100)\n",
148 | "\n",
149 | "kernel_val = nu_quadratic_kernel(v, u)\n",
150 | "feature_map_val = nu_quadratic_feature_map(v).dot(nu_quadratic_feature_map(u))\n",
151 | "\n",
152 | "if np.allclose(kernel_val, feature_map_val):\n",
153 | " print(\"Success!\")"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 5,
159 | "metadata": {
160 | "ExecuteTime": {
161 | "end_time": "2023-09-16T22:40:35.679547Z",
162 | "start_time": "2023-09-16T22:40:35.673021100Z"
163 | }
164 | },
165 | "outputs": [],
166 | "source": [
167 | "def gaussian_kernel(x, x_prime, sigma):\n",
168 | " \"\"\"Compute the Gaussian kernel between two arrays of coordinates.\n",
169 | "\n",
170 | " Parameters\n",
171 | " ----------\n",
172 | " x : array-like, shape: (n_features)\n",
173 | " First array of coordinates.\n",
174 | " x_prime : array-like, shape: (n_features)\n",
175 | " Second array of coordinates.\n",
176 | " sigma : float\n",
177 | " Kernel standard deviation.\n",
178 | "\n",
179 | " Returns\n",
180 | " -------\n",
181 | " k : array-like, shape: (1)\n",
182 | " Kernel value.\n",
183 | " \"\"\"\n",
184 | "\n",
185 | " x, x_prime = np.asarray(x), np.asarray(x_prime)\n",
186 | " return np.exp(-((x - x_prime) ** 2) / (2 * sigma**2))"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 6,
192 | "metadata": {
193 | "ExecuteTime": {
194 | "end_time": "2023-09-16T22:40:35.687066700Z",
195 | "start_time": "2023-09-16T22:40:35.673021100Z"
196 | }
197 | },
198 | "outputs": [],
199 | "source": [
200 | "def gaussian_feature_map(x, sigma, approx_order=100):\n",
201 | " \"\"\"Compute the feature map corresponding to the Gaussian kernel.\n",
202 | "\n",
203 | " Parameters\n",
204 | " ----------\n",
205 | " x : array-like, shape: (n_features)\n",
206 | " Array of coordinates.\n",
207 | " sigma : float\n",
208 | " Kernel standard deviation.\n",
209 | " approx_order : int, optional (default=100)\n",
210 | " Order of the Taylor expansion used to approximate the feature map.\n",
211 | "\n",
212 | " Returns\n",
213 | " -------\n",
214 | " phi_x : array-like, shape: (n_features)\n",
215 | " Feature map.\n",
216 | " \"\"\"\n",
217 | "\n",
218 | " x = np.asarray(x)\n",
219 | "\n",
220 | " common_factor = np.exp(-(x**2) / (2 * sigma**2))\n",
221 | "\n",
222 | " taylor = (\n",
223 | " common_factor\n",
224 | " * np.asarray(\n",
225 | " [\n",
226 | " (x / sigma) ** i / math.sqrt(math.factorial(i))\n",
227 | " for i in range(approx_order)\n",
228 | " ]\n",
229 | " ).flatten()\n",
230 | " )\n",
231 | " # Flattening is required as otherwise we would have [[...],[...],[...], ...]\n",
232 | "\n",
233 | " return taylor"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 7,
239 | "metadata": {
240 | "ExecuteTime": {
241 | "end_time": "2023-09-16T22:40:35.690690800Z",
242 | "start_time": "2023-09-16T22:40:35.679547Z"
243 | }
244 | },
245 | "outputs": [
246 | {
247 | "name": "stdout",
248 | "output_type": "stream",
249 | "text": [
250 | "Success!\n"
251 | ]
252 | }
253 | ],
254 | "source": [
255 | "# Check the equivalence on randomly-initialized arrays\n",
256 | "\n",
257 | "v = 100 * np.random.rand(1)\n",
258 | "u = 100 * np.random.rand(1)\n",
259 | "\n",
260 | "chosen_sigma = 10\n",
261 | "\n",
262 | "kernel_val = gaussian_kernel(v, u, sigma=chosen_sigma)\n",
263 | "feature_map_val = gaussian_feature_map(v, sigma=chosen_sigma).dot(\n",
264 | " gaussian_feature_map(u, sigma=chosen_sigma)\n",
265 | ")\n",
266 | "\n",
267 | "if np.allclose(kernel_val, feature_map_val):\n",
268 | " print(\"Success!\")"
269 | ]
270 | }
271 | ],
272 | "metadata": {
273 | "colab": {
274 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
275 | "provenance": []
276 | },
277 | "kernelspec": {
278 | "display_name": "Python 3 (ipykernel)",
279 | "language": "python",
280 | "name": "python3"
281 | },
282 | "language_info": {
283 | "codemirror_mode": {
284 | "name": "ipython",
285 | "version": 3
286 | },
287 | "file_extension": ".py",
288 | "mimetype": "text/x-python",
289 | "name": "python",
290 | "nbconvert_exporter": "python",
291 | "pygments_lexer": "ipython3",
292 | "version": "3.10.9"
293 | }
294 | },
295 | "nbformat": 4,
296 | "nbformat_minor": 1
297 | }
298 |
--------------------------------------------------------------------------------
/solutions/AdvML_UniTS_2024_Lab_03_DL_with_PyTorch:
--------------------------------------------------------------------------------
1 | ../notebooks/AdvML_UniTS_2024_Lab_03_DL_with_PyTorch
--------------------------------------------------------------------------------
/solutions/AdvML_UniTS_2024_Lab_04bis_FCN_Invariance_Solved.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# [Solved] Lab 4 bis: **Invariance** in a *shallow FCN* under data augmentation\n",
8 | "\n",
9 | "Advanced Topics in Machine Learning -- Fall 2023, UniTS\n",
10 | "\n",
11 | "
"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "#### High-level overview\n",
19 | "\n",
20 | "In this *Lab*, we will understand how the effect of *data augmentation* (whose effect on learned **weights** has been analyzed in the previous lab) translates to the **representation** learned by the model.\n",
21 | "\n",
22 | "Specifically, we define *representation* the (ordered) set of activations of a *neural network* model, which is dependent on the input, and can be seen as the way the model *sees* the data as a result of the learning process.\n",
23 | "\n",
24 | "To accomplish this goal, we will:\n",
25 | "\n",
26 | "- Load the weights resulting from the training of the model described in the previous lab;\n",
27 | "- Learn how to extract the activations of a given layer of the model, in response to a given input;\n",
28 | "- Evaluate such activations on mutually-rotated versions of the same input, and compare such activations to assess their *invariance* with respect to the transformation."
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "#### Preliminary: adapt and re-run the previous notebook\n",
36 | "\n",
37 | "Before starting to delve into this lab, you should:\n",
38 | "- Go back to the previous *Lab* notebook;\n",
39 | "- Add the (single line of) code required to save the model weights after training;\n",
40 | "- Re-run the notebook, to make sure that the weights are saved correctly;\n"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 1,
46 | "metadata": {
47 | "ExecuteTime": {
48 | "end_time": "2023-09-17T00:59:23.883658900Z",
49 | "start_time": "2023-09-17T00:59:22.181232Z"
50 | }
51 | },
52 | "outputs": [],
53 | "source": [
54 | "import torch as th\n",
55 | "import torch.nn as nn\n",
56 | "import torch.nn.functional as F\n",
57 | "\n",
58 | "from torch.utils.data import DataLoader\n",
59 | "\n",
60 | "from torchvision import datasets\n",
61 | "from torchvision import transforms"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "#### (Re-)definition of the model\n",
69 | "\n",
70 | "Define the exact same model you used in the previous lab, and instantiate it."
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 2,
76 | "metadata": {
77 | "ExecuteTime": {
78 | "end_time": "2023-09-17T00:59:23.902354300Z",
79 | "start_time": "2023-09-17T00:59:23.889181500Z"
80 | }
81 | },
82 | "outputs": [],
83 | "source": [
84 | "# Model definition\n",
85 | "class MyModel(nn.Module):\n",
86 | " def __init__(self):\n",
87 | " super(MyModel, self).__init__()\n",
88 | " self.fc = nn.Linear(28 * 28, 10)\n",
89 | "\n",
90 | " def forward(self, x):\n",
91 | " x = x.flatten(start_dim=1)\n",
92 | " x = self.fc(x)\n",
93 | " x = F.log_softmax(x, dim=1) # More numerically stable than softmax\n",
94 | " return x\n",
95 | "\n",
96 | "\n",
97 | "# Model instantiation\n",
98 | "model = MyModel()"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "#### Weights loading\n",
106 | "\n",
107 | "Load into the instance of your model the weights you just saved from the adapted notebook."
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 3,
113 | "metadata": {
114 | "ExecuteTime": {
115 | "end_time": "2023-09-17T00:59:25.278325Z",
116 | "start_time": "2023-09-17T00:59:23.894646100Z"
117 | }
118 | },
119 | "outputs": [
120 | {
121 | "data": {
122 | "text/plain": "MyModel(\n (fc): Linear(in_features=784, out_features=10, bias=True)\n)"
123 | },
124 | "execution_count": 3,
125 | "metadata": {},
126 | "output_type": "execute_result"
127 | }
128 | ],
129 | "source": [
130 | "_ = model.load_state_dict(th.load(\"./models/rotation_invariant_slfcn.pth\"))\n",
131 | "model.eval()"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "#### Data preparation\n",
139 | "\n",
140 | "To test for a given transformation invariance, you should have pairs of (test) data obtained from the same image: one original, and one transformed.\n",
141 | "\n",
142 | "**Hint**: if you want to offload the task to already implemented `torchvision.transforms`, notice (to your advantage) that -- since we are just testing the model -- the dataset needs not to be in shuffled order!\n"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 4,
148 | "metadata": {
149 | "ExecuteTime": {
150 | "end_time": "2023-09-17T00:59:25.278325Z",
151 | "start_time": "2023-09-17T00:59:25.268282200Z"
152 | }
153 | },
154 | "outputs": [],
155 | "source": [
156 | "# Hyperparameters\n",
157 | "BATCH_SIZE = 1024"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 5,
163 | "metadata": {
164 | "ExecuteTime": {
165 | "end_time": "2023-09-17T00:59:25.504866400Z",
166 | "start_time": "2023-09-17T00:59:25.273318700Z"
167 | }
168 | },
169 | "outputs": [],
170 | "source": [
171 | "# Defining transforms\n",
172 | "augmentation = transforms.RandomAffine(degrees=(0, 180), translate=None, scale=None)\n",
173 | "to_tensor = transforms.ToTensor()\n",
174 | "normalization = transforms.Normalize(mean=0.1307, std=0.3081)\n",
175 | "\n",
176 | "# Defining testing data-sets/loaders\n",
177 | "test_dataset = datasets.MNIST(\n",
178 | " root=\"./data\",\n",
179 | " train=False,\n",
180 | " # Original: no augmentation\n",
181 | " transform=transforms.Compose([to_tensor, normalization]),\n",
182 | " download=True,\n",
183 | ")\n",
184 | "test_dataset_rot = datasets.MNIST(\n",
185 | " root=\"./data\",\n",
186 | " train=False,\n",
187 | " # Transformed: augmented\n",
188 | " transform=transforms.Compose([augmentation, to_tensor, normalization]),\n",
189 | " download=True,\n",
190 | ")\n",
191 | "\n",
192 | "test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)\n",
193 | "test_loader_rot = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)"
194 | ]
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {},
199 | "source": [
200 | "#### Activation extraction\n",
201 | "\n",
202 | "Write a function that extracts the activations of a given layer of the model, in response to a given input. Try to remain as generic as possible, since you may need to re-use it in the future.\n",
203 | "\n",
204 | "**Hint**: Look up in the documentation the purpose and features of `hook`s. If you are in trouble, just ask!\n"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 6,
210 | "metadata": {
211 | "ExecuteTime": {
212 | "end_time": "2023-09-17T00:59:25.525667900Z",
213 | "start_time": "2023-09-17T00:59:25.507949700Z"
214 | }
215 | },
216 | "outputs": [],
217 | "source": [
218 | "# Definition of a hook that outputs layer-specific activations\n",
219 | "def get_activations(_x, _model, _name):\n",
220 | " activations = {}\n",
221 | "\n",
222 | " def get_activation_hook(name):\n",
223 | " def hook(_model, _input, _output):\n",
224 | " _ = _model, _input\n",
225 | " activations[name] = _output.detach()\n",
226 | "\n",
227 | " return hook\n",
228 | "\n",
229 | " layer = getattr(_model, _name)\n",
230 | " layer.register_forward_hook(get_activation_hook(\"name\"))\n",
231 | " _ = _model(_x)\n",
232 | " activation = activations[\"name\"]\n",
233 | " return activation"
234 | ]
235 | },
236 | {
237 | "cell_type": "markdown",
238 | "metadata": {},
239 | "source": [
240 | "#### Invariance evaluation\n",
241 | "\n",
242 | "Recall the definition of *invariance* of (the result of) function $f$ with respect to transformation $g(\\cdot\\;; \\alpha)$ parametrized by $\\alpha$:\n",
243 | "\n",
244 | "$$f(g(x; \\alpha))=f(x)\\;\\;\\;\\; \\forall\\alpha$$\n",
245 | "\n",
246 | "With the function and data just defined, compare the activations of the model on the original and transformed versions of the same image. Comment on the results.\n"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 7,
252 | "metadata": {
253 | "ExecuteTime": {
254 | "end_time": "2023-09-17T00:59:28.136980100Z",
255 | "start_time": "2023-09-17T00:59:25.515423200Z"
256 | }
257 | },
258 | "outputs": [
259 | {
260 | "name": "stdout",
261 | "output_type": "stream",
262 | "text": [
263 | "Average Euclidean norm of the difference: 11.152886390686035\n",
264 | "Average Euclidean norm of the difference, control: 12.165810585021973\n"
265 | ]
266 | }
267 | ],
268 | "source": [
269 | "differences = []\n",
270 | "differences_control = []\n",
271 | "\n",
272 | "control_img = None\n",
273 | "\n",
274 | "for i, (images, label) in enumerate(test_loader):\n",
275 | " # The order is the same due to the fact that both dataloaders are not shuffled!\n",
276 | " (images_rot, label_rot) = test_loader_rot.__iter__().__next__()\n",
277 | "\n",
278 | " if i == 0:\n",
279 | " control_img = th.randn_like(images)\n",
280 | "\n",
281 | " act = get_activations(images, model, \"fc\")\n",
282 | " act_rot = get_activations(images_rot, model, \"fc\")\n",
283 | " act_control = get_activations(control_img, model, \"fc\")\n",
284 | "\n",
285 | " # When the non-augmented dataset is over, the rest of the batch is eventually filled with augmentations\n",
286 | " # This check (and fix) avoids dimension mismatches\n",
287 | " if act.shape[0] != act_rot.shape[0]:\n",
288 | " act_rot = act_rot[: act.shape[0]]\n",
289 | " act_control = act_control[: act.shape[0]]\n",
290 | "\n",
291 | " differences.append(act - act_rot)\n",
292 | " differences_control.append(act - act_control)\n",
293 | "\n",
294 | " control_img = images_rot\n",
295 | "\n",
296 | "avg_norm = th.linalg.norm(th.cat(differences, dim=0), dim=1).mean()\n",
297 | "avg_norm_control = th.linalg.norm(th.cat(differences_control, dim=0), dim=1).mean()\n",
298 | "\n",
299 | "print(f\"Average Euclidean norm of the difference: {avg_norm.item()}\")\n",
300 | "print(f\"Average Euclidean norm of the difference, control: {avg_norm_control.item()}\")"
301 | ]
302 | }
303 | ],
304 | "metadata": {
305 | "colab": {
306 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
307 | "provenance": []
308 | },
309 | "kernelspec": {
310 | "display_name": "Python 3 (ipykernel)",
311 | "language": "python",
312 | "name": "python3"
313 | },
314 | "language_info": {
315 | "codemirror_mode": {
316 | "name": "ipython",
317 | "version": 3
318 | },
319 | "file_extension": ".py",
320 | "mimetype": "text/x-python",
321 | "name": "python",
322 | "nbconvert_exporter": "python",
323 | "pygments_lexer": "ipython3",
324 | "version": "3.10.9"
325 | }
326 | },
327 | "nbformat": 4,
328 | "nbformat_minor": 4
329 | }
330 |
--------------------------------------------------------------------------------
/solutions/AdvML_UniTS_2024_Lab_06_Cortex_Hubel_Wiesel_Solved.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# [Solved] Lab 6: The *Hubel-Wiesel* model of cortical networks\n",
8 | "\n",
9 | "Advanced Topics in Machine Learning -- Fall 2023, UniTS\n",
10 | "\n",
11 | "
"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "#### Overview of *Hubel & Wiesel* model\n",
19 | "\n",
20 | "In this lab, we will attempt an implementation of (a *machine-learning-flavoured* version of) the *Hubel & Wiesel* model of cortical networks. The model is a simplified (and, today we know, not entirely realistic) model of the primary visual cortex of the mammalian brain.\n",
21 | "\n",
22 | "According to the model, the cortex is composed by sets of *simple cells* and *complex cells*.\n",
23 | "\n",
24 | "- *Simple cells* are sensitive to a specific orientation of the edges in the visual field.\n",
25 | "- *Complex cells* integrate the signal received by a set of *simple cells*.\n",
26 | "\n",
27 | "By abstracting away the anatomical details, we can write:\n",
28 | "\n",
29 | "- For *simple cells*: $s_{ij}(x)=x^{T}(g_{j}(w_{i}))$;\n",
30 | "- For *complex cells*: $c_{h,i}(x)=\\sum_{j}\\sigma_{h}(s_{ij}(x))$\n",
31 | "\n",
32 | "where $x$ is an input vector, $w_{i}$ is the weight vector of the $i$-th simple cell, $g_{j}$ is a specific realization of the $g$ transformation, $\\sigma_{h}$ is the activation function of the $h$-th complex cell, and $s_{ij}$ and $c_{h,i}$ are the output of the $i$-th simple cell and the $h$-th complex cell, respectively.\n",
33 | "\n",
34 | "From the definitions above, we expect:\n",
35 | "- *Simple cells* to be equivariant to transformations of the inputs, i.e. $s_{i}(gx)=P_{g}s_{i}(x)$;\n",
36 | "- *Complex cells* to be invariant to transformations of the inputs, i.e. $c_{h,i}(x) = c_{h,i}(gx)$,\n",
37 | "\n",
38 | "where $P_{g}$ is an (any!) element of the group generating the transformation $g$."
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "#### Lab to-do:\n",
46 | "\n",
47 | "1. Implement a *simple* cell of the *Hubel & Wiesel* model, using central (vertical) flip as the transformation $g$ of interest.\n",
48 | "Note that such transformation is associated to the unitary group generated by $g_1 = \\text{Identity}$ and $g_2 = \\text{Flip}$, and as such requires (and admits) only two specific transformations of the data to be implemented."
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 1,
54 | "outputs": [],
55 | "source": [
56 | "from typing import Callable\n",
57 | "import numpy as np"
58 | ],
59 | "metadata": {
60 | "collapsed": false,
61 | "ExecuteTime": {
62 | "end_time": "2023-09-17T01:10:15.593212200Z",
63 | "start_time": "2023-09-17T01:10:15.528046700Z"
64 | }
65 | }
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 2,
70 | "metadata": {
71 | "ExecuteTime": {
72 | "end_time": "2023-09-17T01:10:15.593212200Z",
73 | "start_time": "2023-09-17T01:10:15.593212200Z"
74 | }
75 | },
76 | "outputs": [],
77 | "source": [
78 | "def simplecell(x, g_i):\n",
79 | " if isinstance(g_i, Callable):\n",
80 | " return g_i(x)\n",
81 | " else:\n",
82 | " return np.matmul(g_i, x)"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 3,
88 | "outputs": [],
89 | "source": [
90 | "def flip_simplecells(x):\n",
91 | " return simplecell(x, np.eye(x.shape[0])), simplecell(x, np.fliplr)"
92 | ],
93 | "metadata": {
94 | "collapsed": false,
95 | "ExecuteTime": {
96 | "end_time": "2023-09-17T01:10:15.604134100Z",
97 | "start_time": "2023-09-17T01:10:15.593212200Z"
98 | }
99 | }
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "2. Implement a *complex* cell of the *Hubel & Wiesel* model."
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 4,
111 | "metadata": {
112 | "ExecuteTime": {
113 | "end_time": "2023-09-17T01:10:15.613198400Z",
114 | "start_time": "2023-09-17T01:10:15.604134100Z"
115 | }
116 | },
117 | "outputs": [],
118 | "source": [
119 | "def complexcell(x_iterable):\n",
120 | " return 1 / (1 + np.exp(-np.sum(np.array(x_iterable), axis=0)))"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "metadata": {},
126 | "source": [
127 | "3. Verify the expected properties of the *simple* and *complex* cells in terms of equivariance and invariance with respect to the same transformation $g$ described above, on synthetic data (you can generate some yourself!).\n"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 5,
133 | "metadata": {
134 | "ExecuteTime": {
135 | "end_time": "2023-09-17T01:10:15.613198400Z",
136 | "start_time": "2023-09-17T01:10:15.607193200Z"
137 | }
138 | },
139 | "outputs": [],
140 | "source": [
141 | "# Generate some data\n",
142 | "xsize = (28, 28)\n",
143 | "nperms = 2\n",
144 | "\n",
145 | "xlist = [\n",
146 | " np.random.rand(*xsize),\n",
147 | "]\n",
148 | "for _ in range(nperms - 1):\n",
149 | " xlist.append(np.fliplr(xlist[0]))\n",
150 | "\n",
151 | "x = np.stack(xlist)"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 6,
157 | "outputs": [],
158 | "source": [
159 | "# Compute the outputs of the simple cells\n",
160 | "ylist_sc = []\n",
161 | "for x_i in x:\n",
162 | " ylist_sc.append(flip_simplecells(x_i))\n",
163 | "y_sc = np.stack(ylist_sc)"
164 | ],
165 | "metadata": {
166 | "collapsed": false,
167 | "ExecuteTime": {
168 | "end_time": "2023-09-17T01:10:15.653413600Z",
169 | "start_time": "2023-09-17T01:10:15.613198400Z"
170 | }
171 | }
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 7,
176 | "outputs": [],
177 | "source": [
178 | "# Compute the outputs of the complex cells\n",
179 | "ylist_cc = []\n",
180 | "for y_i in y_sc:\n",
181 | " ylist_cc.append(complexcell(y_i))\n",
182 | "y_cc = np.stack(ylist_cc)"
183 | ],
184 | "metadata": {
185 | "collapsed": false,
186 | "ExecuteTime": {
187 | "end_time": "2023-09-17T01:10:15.654919500Z",
188 | "start_time": "2023-09-17T01:10:15.623309400Z"
189 | }
190 | }
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": 8,
195 | "outputs": [
196 | {
197 | "name": "stdout",
198 | "output_type": "stream",
199 | "text": [
200 | "Success (equivariance)!\n",
201 | "Success (invariance)!\n"
202 | ]
203 | }
204 | ],
205 | "source": [
206 | "# Equivariance of simple cells\n",
207 | "ysc_sorted = np.sort(y_sc.reshape(y_sc.shape[0], -1), axis=1)\n",
208 | "if np.isclose(ysc_sorted[0], ysc_sorted).all():\n",
209 | " print(\"Success (equivariance)!\")\n",
210 | "\n",
211 | "# Invariance of complex cells\n",
212 | "if np.isclose(\n",
213 | " y_cc.reshape(y_cc.shape[0], -1)[0], y_cc.reshape(y_cc.shape[0], -1)\n",
214 | ").all():\n",
215 | " print(\"Success (invariance)!\")"
216 | ],
217 | "metadata": {
218 | "collapsed": false,
219 | "ExecuteTime": {
220 | "end_time": "2023-09-17T01:10:15.654919500Z",
221 | "start_time": "2023-09-17T01:10:15.633084500Z"
222 | }
223 | }
224 | }
225 | ],
226 | "metadata": {
227 | "colab": {
228 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
229 | "provenance": []
230 | },
231 | "kernelspec": {
232 | "display_name": "Python 3 (ipykernel)",
233 | "language": "python",
234 | "name": "python3"
235 | },
236 | "language_info": {
237 | "codemirror_mode": {
238 | "name": "ipython",
239 | "version": 3
240 | },
241 | "file_extension": ".py",
242 | "mimetype": "text/x-python",
243 | "name": "python",
244 | "nbconvert_exporter": "python",
245 | "pygments_lexer": "ipython3",
246 | "version": "3.10.9"
247 | }
248 | },
249 | "nbformat": 4,
250 | "nbformat_minor": 1
251 | }
252 |
--------------------------------------------------------------------------------
/solutions/AdvML_UniTS_2024_Lab_07_Permutation_Equivariance_Solved.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# [Solved] Lab 7: **Geometric Deep Learning** and *permutation equivariance*\n",
7 | "\n",
8 | "Advanced Topics in Machine Learning -- Fall 2023, UniTS\n",
9 | "\n",
10 | "
\n"
11 | ],
12 | "metadata": {
13 | "collapsed": false
14 | }
15 | },
16 | {
17 | "cell_type": "markdown",
18 | "source": [
19 | "#### Geometric Deep Learning\n",
20 | "\n",
21 | "The nowadays popular and growing subfield of *Geometric Deep Learning* is concerned with the study of deep learning models explicitly designed to learn representations that are invariant/equivariant to certain transformations. Such goal is very often achieved thanks to a *deeper* understanding of the mathematical properties of data, representations, and how a given model is able to capture them."
22 | ],
23 | "metadata": {
24 | "collapsed": false
25 | }
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "source": [
30 | "#### *Deep Sets* and *permutation equivariance*\n",
31 | "\n",
32 | "In this lab, we will focus on a very simple mathematical construction, introduced in the [*Deep Sets* paper (2017)](https://arxiv.org/abs/1703.06114), able to exhibit in theory *permutation equivariance* with respect to input data, by design. Reading the paper is strongly recommended before starting this lab.\n",
33 | "\n",
34 | "Recall the *permutation equivariance* property, as stated in the paper:\n",
35 | "\n",
36 | "\n",
37 | "\n",
38 | "with $f$ being the model, $x$ a set of input data, and $\\pi$ a permutation of the indices.\n",
39 | "\n",
40 | "1. Implement such function (you can use *plain* `NumPy` !), as described in the following snippet from the paper:\n",
41 | "\n",
42 | "\n",
43 | "2. Check that the function is indeed permutation equivariant, applying it on synthetic data (you can generate them yourself), as we did in the previous lab(s) with images.\n"
44 | ],
45 | "metadata": {
46 | "collapsed": false
47 | }
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 1,
52 | "outputs": [],
53 | "source": [
54 | "import functools\n",
55 | "import numpy as np"
56 | ],
57 | "metadata": {
58 | "collapsed": false,
59 | "ExecuteTime": {
60 | "end_time": "2023-09-17T01:14:05.214987Z",
61 | "start_time": "2023-09-17T01:14:05.138265400Z"
62 | }
63 | }
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 2,
68 | "outputs": [],
69 | "source": [
70 | "def theta(_xshape, _lambda, _gamma):\n",
71 | " return _lambda * np.eye(_xshape) + _gamma * np.ones((_xshape, _xshape))\n",
72 | "\n",
73 | "\n",
74 | "def mish(_x):\n",
75 | " return _x * np.tanh(np.log(1 + np.exp(_x)))\n",
76 | "\n",
77 | "\n",
78 | "def f(_x, _lambda=1, _gamma=1):\n",
79 | " out = np.matmul(_x, theta(_x.shape[0], _lambda, _gamma))\n",
80 | " return mish(out) # Any nonlinearity suffices! :)"
81 | ],
82 | "metadata": {
83 | "collapsed": false,
84 | "ExecuteTime": {
85 | "end_time": "2023-09-17T01:14:05.228250500Z",
86 | "start_time": "2023-09-17T01:14:05.218063700Z"
87 | }
88 | }
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": 3,
93 | "outputs": [],
94 | "source": [
95 | "# Generate some data\n",
96 | "xsize = 200\n",
97 | "nperms = 200\n",
98 | "\n",
99 | "xlist = [\n",
100 | " np.random.rand(xsize),\n",
101 | "]\n",
102 | "for _ in range(nperms - 1):\n",
103 | " xlist.append(np.random.permutation(xlist[0]))\n",
104 | "\n",
105 | "x = np.stack(xlist)"
106 | ],
107 | "metadata": {
108 | "collapsed": false,
109 | "ExecuteTime": {
110 | "end_time": "2023-09-17T01:14:05.240813700Z",
111 | "start_time": "2023-09-17T01:14:05.218063700Z"
112 | }
113 | }
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": 4,
118 | "outputs": [],
119 | "source": [
120 | "# Apply function\n",
121 | "newf = functools.partial(f, _lambda=0.25, _gamma=0.4)\n",
122 | "y = np.apply_along_axis(newf, 1, x)"
123 | ],
124 | "metadata": {
125 | "collapsed": false,
126 | "ExecuteTime": {
127 | "end_time": "2023-09-17T01:14:05.290007100Z",
128 | "start_time": "2023-09-17T01:14:05.246223500Z"
129 | }
130 | }
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 5,
135 | "outputs": [
136 | {
137 | "name": "stdout",
138 | "output_type": "stream",
139 | "text": [
140 | "Success!\n"
141 | ]
142 | }
143 | ],
144 | "source": [
145 | "ysorted = np.sort(y, axis=1)\n",
146 | "if np.isclose(ysorted[0], ysorted).all():\n",
147 | " print(\"Success!\")"
148 | ],
149 | "metadata": {
150 | "collapsed": false,
151 | "ExecuteTime": {
152 | "end_time": "2023-09-17T01:14:05.290007100Z",
153 | "start_time": "2023-09-17T01:14:05.278363400Z"
154 | }
155 | }
156 | }
157 | ],
158 | "metadata": {
159 | "kernelspec": {
160 | "display_name": "Python 3",
161 | "language": "python",
162 | "name": "python3"
163 | },
164 | "language_info": {
165 | "codemirror_mode": {
166 | "name": "ipython",
167 | "version": 2
168 | },
169 | "file_extension": ".py",
170 | "mimetype": "text/x-python",
171 | "name": "python",
172 | "nbconvert_exporter": "python",
173 | "pygments_lexer": "ipython2",
174 | "version": "2.7.6"
175 | }
176 | },
177 | "nbformat": 4,
178 | "nbformat_minor": 0
179 | }
180 |
--------------------------------------------------------------------------------
/solutions/AdvML_UniTS_2024_Lab_08_Implicit_Bias_Solved.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# [Solved] Lab 8: **Implicit bias** of gradient descent: the case of *linear regression*\n",
8 | "\n",
9 | "Advanced Topics in Machine Learning -- Fall 2023, UniTS\n",
10 | "\n",
11 | "
"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "#### Overview of the *Lab*\n",
19 | "\n",
20 | "In this lab, we will study the *implicit bias* induced by *Gradient Descent* optimization in the simple case of *linear regression*, fitted on a *toy* dataset. In particular, we will show that *GD*-optimized weights converge to the **least norm** solution of the *linear regression* problem.\n",
21 | "\n",
22 | "An analysis of implicit bias induced by *Stochastic Gradient Descent* in *full-width linear fully-connected* and *full-width linear convolutional* neural networks (which are much more complex and expressive models!) is provided in [this paper](https://arxiv.org/abs/1806.00468).\n"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "source": [
28 | "#### Linear regression\n",
29 | "\n",
30 | "In the case of **linear regression**, fitted by means of *least squares*, we optimize the following loss function:\n",
31 | "$$\n",
32 | "L=\\|y-Xw\\|_{2}^{2}\n",
33 | "$$.\n",
34 | "\n",
35 | "If we choose the *GD* optimization algorithm, we perform weight updates proportional to the gradient of the loss function:\n",
36 | "$$\n",
37 | "\\nabla_{w} L = -X(y-Xw)\n",
38 | "$$.\n",
39 | "\n",
40 | "Additionally, notice that the **least norm** solution of the *linear regression* problem is given by:\n",
41 | "$$\n",
42 | "w^{*}=(X^{T}X)^{-1}X^{T}y\n",
43 | "$$.\n"
44 | ],
45 | "metadata": {
46 | "collapsed": false
47 | }
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 1,
52 | "outputs": [],
53 | "source": [
54 | "import numpy as np\n",
55 | "import matplotlib.pyplot as plt"
56 | ],
57 | "metadata": {
58 | "collapsed": false,
59 | "ExecuteTime": {
60 | "end_time": "2023-09-17T01:16:45.788352200Z",
61 | "start_time": "2023-09-17T01:16:45.494480700Z"
62 | }
63 | }
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "source": [
68 | "#### To-do:\n",
69 | "\n",
70 | "The following *toy* dataset is provided:"
71 | ],
72 | "metadata": {
73 | "collapsed": false
74 | }
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 2,
79 | "outputs": [],
80 | "source": [
81 | "m, n = 1000, 10\n",
82 | "X = np.random.normal(0, 1, (m, n))\n",
83 | "b = X.dot(np.random.normal(0, 1, n))"
84 | ],
85 | "metadata": {
86 | "collapsed": false,
87 | "ExecuteTime": {
88 | "end_time": "2023-09-17T01:16:45.798408700Z",
89 | "start_time": "2023-09-17T01:16:45.793390400Z"
90 | }
91 | }
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "source": [
96 | "1. Compute the *least norm* solution of the linear regression problem;\n",
97 | "2. Write a function that computes the gradient of the loss function, as required by *GD* optimization;\n",
98 | "3. Perform *GD* optimization of the linear regression problem iteratively, storing the weights at each iteration;\n",
99 | "4. Plot the evolution of the weights during *GD* optimization and comment.\n"
100 | ],
101 | "metadata": {
102 | "collapsed": false
103 | }
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 3,
108 | "outputs": [],
109 | "source": [
110 | "def least_norm_reg(_x, _b, _eps):\n",
111 | " # The regularizer `eps` allows to make the matrix invertible!\n",
112 | " return np.linalg.inv(_x.T.dot(_x) + _eps * np.eye(n)).dot(_x.T).dot(_b)"
113 | ],
114 | "metadata": {
115 | "collapsed": false,
116 | "ExecuteTime": {
117 | "end_time": "2023-09-17T01:16:45.798408700Z",
118 | "start_time": "2023-09-17T01:16:45.798408700Z"
119 | }
120 | }
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 4,
125 | "outputs": [],
126 | "source": [
127 | "def least_squares_reg_gradient(_x, _b, _w, _m):\n",
128 | " return _x.T.dot(_x.dot(_w) - _b) / _m"
129 | ],
130 | "metadata": {
131 | "collapsed": false,
132 | "ExecuteTime": {
133 | "end_time": "2023-09-17T01:16:45.808562700Z",
134 | "start_time": "2023-09-17T01:16:45.798408700Z"
135 | }
136 | }
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 5,
141 | "outputs": [],
142 | "source": [
143 | "def run_gd(steps, init_w, grad_fx):\n",
144 | " ws = [init_w]\n",
145 | " for step in steps:\n",
146 | " ws.append(ws[-1] - step * grad_fx(ws[-1]))\n",
147 | " return ws"
148 | ],
149 | "metadata": {
150 | "collapsed": false,
151 | "ExecuteTime": {
152 | "end_time": "2023-09-17T01:16:45.837750100Z",
153 | "start_time": "2023-09-17T01:16:45.808562700Z"
154 | }
155 | }
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": 6,
160 | "outputs": [
161 | {
162 | "data": {
163 | "text/plain": "",
164 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAGdCAYAAACyzRGfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA40klEQVR4nO3deXxU9b3/8feZmcxkITMhJGSRsO/7KrKouGHR4latWmtd7v3VVlApXZRrF7W1UW9rtcWlaGtpvbhdhWtrrdKKIKKVAEEERJZAwhISIGSyTpKZ8/sjyUAkLElm5kwyr+ej5zGZc87M+fAtmrff7/d8j2GapikAAIAIsVldAAAAiC2EDwAAEFGEDwAAEFGEDwAAEFGEDwAAEFGEDwAAEFGEDwAAEFGEDwAAEFEOqwv4skAgoP379ys5OVmGYVhdDgAAOAOmaaqiokLZ2dmy2U7dtxF14WP//v3KycmxugwAANAORUVF6tWr1ynPibrwkZycLKmxeLfbbXE1AADgTHi9XuXk5AR/j59K1IWP5qEWt9tN+AAAoJM5kykTTDgFAAARRfgAAAARRfgAAAARFXVzPgAA0cnv96u+vt7qMmAhu90uh8PR4aUwCB8AgNOqrKzU3r17ZZqm1aXAYomJicrKypLT6Wz3dxA+AACn5Pf7tXfvXiUmJio9PZ0FIGOUaZqqq6tTaWmpCgoKNGjQoNMuJnYyhA8AwCnV19fLNE2lp6crISHB6nJgoYSEBMXFxWnPnj2qq6tTfHx8u76HCacAgDNCjwcktbu3o8V3hKAOAACAM0b4AAAAEUX4AAAAEUX4AAAghhUVFWnGjBkaPny4Ro8erddeey3s14yZu128tfX64+oCHThaq0evHW11OQAARAWHw6EnnnhCY8eOVUlJicaPH6/LLrtMSUlJYbtmzPR8OGyGnvjndr2SV6SyqjqrywEARMD777+vvn37Bl9j2cnaIisrS2PHjpUk9ezZU6mpqTpy5EhYa4mZ8JHodCjb03g/8q5DlRZXAwBA9MnLy1MgEFBOTk5YrxMz4UOSBvTsJknaWVJlcSUAAESXw4cP61vf+pYWLVoU9mvFVPjon9Y4frWTng8AiHnnnHOOfvOb3wTfX3/99TIMQ1VVjf+Bun//fjmdTm3dutWqElsIZ70+n09XX321FixYoKlTp4as5pNpc/hYtWqVZs+erezsbBmGoWXLlp1wztatW3XFFVfI4/EoOTlZ55xzjgoLC0NRb4fQ8wEAaJaSkqKKigpJjXd8vPPOO0pOTlZZWZkkadGiRbrwwgs1bNgwK8sMCle9pmnq1ltv1YUXXqibb7455HW3ps3ho6qqSmPGjNHChQtbPb5z505Nnz5dQ4cO1fvvv6+NGzfqJz/5SbvXfw+l/mmN4WNXKT0fANBepmmquq7Bki2UT9Xt3r27Kisbfx8sXLhQN910k9LT01VWVqb6+notWrRI99xzT8iu11HhqvfDDz/UK6+8omXLlmns2LEaO3asNm3aFOryW2jzrbazZs3SrFmzTnr8/vvv12WXXabHHnssuK9///7tqy7EBvRsHHYpPFKten9AcfaYGnUCgJCoqfdr+E/fseTaWx66VInO0KwS0dyTUFVVpeeff14fffSR1qxZo7KyMi1dulTJycn6yle+0ubvPXr0qF599VV9+9vfbvX4Aw88oAcffPCU37F27VpNnDixQ/Xu2rVLmzdv1uzZs095renTpysQCJzhny40QvrbNxAI6K233tLgwYN16aWXqmfPnpo8eXKrQzPNfD6fvF5viy1cMt3xSnTa1RAwtedwddiuAwCIfs09CYsXL9aUKVM0ePBgud1ulZWV6amnntLdd9/drofpHT169JSTNufOnautW7eechs5cmSH63377bf1+eeft7n+SAjpImMlJSWqrKzUI488ol/84hd69NFH9Y9//EPXXHONVqxYofPPP/+Ez+Tm5p42AYaKYRjqn56kz/Z5tau0UgOb5oAAAM5cQpxdWx661LJrh0pKSoo2b96sJ598Uk899ZQkye12a/Xq1dq4caPeeuut4LmPPvqo/vKXv8gwDN1333266aabtHv3bl1xxRUaOXKkPv30U51zzjn6/e9/r/vvv19btmzR2LFjdd111+n+++9vcd20tDSlpaWFtd6VK1fqxz/+sdLT07VkyRKtWbOmPU0UNiENH83dNldeeaW+973vSZLGjh2rNWvW6Nlnn201fCxYsEDz588Pvvd6vWG9v3hAejd9ts+rnaVMOgWA9jAMI2RDH1bq3r273nvvPfXt21cXX3yxpMZf5s8884zuuOMOdevW+B+oeXl5evXVV5WXl6fq6mpNmjRJF1xwgSTps88+0wsvvKDx48frqquu0uuvv66HH35Y27ZtU15eniX1StL555+vkSNHasmSJWFfs6M9QjrskpaWJofDoeHDh7fYP2zYsJPe7eJyueR2u1ts4cSkUwCAdGwY4/hJmm63WzU1NZo7d25w3+rVq/W1r31N8fHxSk1N1UUXXaS1a9dKkgYOHKgJEybIMAxdf/31Ye1hONN6m+3duzcqg4cU4p4Pp9OpSZMmadu2bS32f/HFF+rTp08oL9VuzZNOdxI+ACCmXXvttSfcPfPMM8/omWeeOeXnTNMMzq348pyQ9swROVNtqXfv3r0666yzwlZLR7W556OyslL5+fnKz8+XJBUUFCg/Pz/Ys/HDH/5Qr7zyip577jnt2LFDCxcu1F//+lfdeeedIS28vZp7PnaWVoX0li0AQNc0ffp0vfHGG/L5fCorK9OKFSs0adIkSdL27du1fv16maap1157TVOnTlVycnJwPQ6rFBQUKDs729IaTqXN4SMvL0/jxo3TuHHjJEnz58/XuHHj9NOf/lSSdPXVV+vZZ5/VY489plGjRun555/X66+/runTp4e28nbql5Ykw5DKa+p1hAfMAQBOY+LEibruuus0YcIEnXfeeXrwwQeVlZUlSRo9erQWLlyoUaNGKTU1Vddcc4169Oih8ePHa9SoUXr44YctqXnkyJHavn27Ro0aFZV3vLR52GXGjBmn7TG4/fbbdfvtt7e7qHBKcNp1VkqC9pbVaGdplXp0c1ldEgAgTPr27at58+YFX9vr3nvv1b333nvCfofDoT/+8Y8n7H/ppZfafa1Q6N69uzZs2NBiX6jaIhQ6/3Tlduif3k17y2q0q7RSZ/dLtbocAECYHP+L1upfuFaLpraIySU+B6Qz6RQA0DF9+/YN+e20sSImw0f/9ObbbVnrAwCASIvJ8EHPBwAA1onR8NHY81F4pFq+Br/F1QAAEFtiMnz0THapm8uhgCkV8oA5AAAiKibDh2EYDL0AAGCRmAwf0rFJpzxgDgDODKtCQwrN34OYDR/0fADAmbHbGx9jX1fHqtCQqqsbpyvExcW1+zticpExidttAeBMORwOJSYmqrS0VHFxcbLZYva/W2OaaZqqrq5WSUmJUlJSgqG0PWI2fAwIDrtUtnhCIQCgJcMwlJWVpYKCAu3Zs8fqcmCxlJQUZWZmdug7YjZ89OmRKMOQKmobVFrpU8/keKtLAoCo5XQ6NWjQIIZeYlxcXFyHejyaxWz4iI+zK6d7ogqPVGtXaRXhAwBOw2azKT6ef1ei42J64I5JpwAARF5Mhw8mnQIAEHkxHT6On3QKAAAiI6bDR/+mYRd6PgAAiJyYDh/NPR9FZdWqrecBcwAAREJMh4+0bk4lxztkmtLuw/R+AAAQCTEdPhofMMekUwAAIimmw4d03KTTEiadAgAQCTEfPoKTTg/R8wEAQCTEfPjgdlsAACKL8HHc7bamaVpcDQAAXV/Mh4/ePRJltxmq9DWopMJndTkAAHR5MR8+XA67cronSGLSKQAAkRDz4UM6bt4Hk04BAAg7woekAT253RYAgEghfEjqn8bttgAARArhQ/R8AAAQSW0OH6tWrdLs2bOVnZ0twzC0bNmyk557xx13yDAMPfHEEx0oMfyaez72Ha1RTR0PmAMAIJzaHD6qqqo0ZswYLVy48JTnLVu2TP/+97+VnZ3d7uIiJTXJqZTEOElSAUMvAACElaOtH5g1a5ZmzZp1ynP27dunuXPn6p133tHll1/e7uIixTAM9U9L0vrCo9pZWqnh2W6rSwIAoMtqc/g4nUAgoJtvvlk//OEPNWLEiNOe7/P55PMdW9zL6/WGuqQzMiC9m9YXHuXptgAAhFnIJ5w++uijcjgcuvvuu8/o/NzcXHk8nuCWk5MT6pLOSHDSKc94AQAgrEIaPtatW6cnn3xSf/rTn2QYxhl9ZsGCBSovLw9uRUVFoSzpjB273ZbwAQBAOIU0fHzwwQcqKSlR79695XA45HA4tGfPHn3/+99X3759W/2My+WS2+1usVnh2O22VQoEeMAcAADhEtI5HzfffLMuvvjiFvsuvfRS3XzzzbrttttCeamQ652aKIfNUE29X8XeWmWnJFhdEgAAXVKbw0dlZaV27NgRfF9QUKD8/Hylpqaqd+/e6tGjR4vz4+LilJmZqSFDhnS82jCKs9vUu0eidpVWaVdpFeEDAIAwafOwS15ensaNG6dx48ZJkubPn69x48bppz/9aciLi7T+aUw6BQAg3Nrc8zFjxgyZ5pnPidi9e3dbL2GZAT2T9M+t0i7CBwAAYcOzXY4zIL2554O1PgAACBfCx3EGpDfdbkvPBwAAYUP4OE5zz8f+8lpV+RosrgYAgK6J8HGclESneia7JEmfF1dYXA0AAF0T4eNLmh8qt+WANc+YAQCgqyN8fMnwrKbwsZ/wAQBAOBA+voSeDwAAwovw8SXNPR+fH/CqwR+wuBoAALoewseX9OmRpESnXb6GgHYfZr0PAABCjfDxJXaboaGZyZKkzcz7AAAg5AgfrWDeBwAA4UP4aMXwLI8k7ngBACAcCB+tCPZ87Pe26SF6AADg9AgfrRiSkSybIR2uqlNphc/qcgAA6FIIH61IcNrVv+k5L5uZ9wEAQEgRPk6ClU4BAAgPwsdJcMcLAADhQfg4ieaej630fAAAEFKEj5MY1hQ+Cg5XqcrXYHE1AAB0HYSPk0hPdqlnskumKX1eXGF1OQAAdBmEj1Ng3gcAAKFH+DgF7ngBACD0CB+nQM8HAAChR/g4heaej88PeNXgD1hcDQAAXQPh4xT69EhSotMuX0NAuw9XWV0OAABdAuHjFOw2Q0MzkyVJm5n3AQBASBA+ToN5HwAAhBbh4zSGZ3kkcccLAAChQvg4jWDPx36vTNO0uBoAADo/wsdpDMlIls2QDlfVqbTCZ3U5AAB0em0OH6tWrdLs2bOVnZ0twzC0bNmy4LH6+nrde++9GjVqlJKSkpSdna1vfetb2r9/fyhrjqgEp13907tJkjYz7wMAgA5rc/ioqqrSmDFjtHDhwhOOVVdXa/369frJT36i9evX64033tAXX3yhK664IiTFWoWVTgEACB1HWz8wa9YszZo1q9VjHo9Hy5cvb7Hvd7/7nc4++2wVFhaqd+/e7avSYsOz3Xpz437ueAEAIATaHD7aqry8XIZhKCUlpdXjPp9PPt+xuRReb/T9gm/u+dhKzwcAAB0W1gmntbW1uu+++/SNb3xDbre71XNyc3Pl8XiCW05OTjhLapdhTeGj4HCVqnwNFlcDAEDnFrbwUV9frxtuuEGBQEBPP/30Sc9bsGCBysvLg1tRUVG4Smq39GSXeia7ZJrS58UVVpcDAECnFpbwUV9fr69//esqKCjQ8uXLT9rrIUkul0tut7vFFo1Y6RQAgNAIefhoDh7bt2/XP//5T/Xo0SPUl7AEd7wAABAabZ5wWllZqR07dgTfFxQUKD8/X6mpqcrOzta1116r9evX629/+5v8fr+Ki4slSampqXI6naGrPMLo+QAAIDTaHD7y8vJ0wQUXBN/Pnz9fknTLLbfogQce0JtvvilJGjt2bIvPrVixQjNmzGh/pRZr7vn4/IBXDf6AHHYWhwUAoD3aHD5mzJhxymecdNXnn/TpkaREp13VdX7tPlylgT2TrS4JAIBOif98P0N2m6GhmY2BYzPzPgAAaDfCRxsw7wMAgI4jfLTB8CyPJO54AQCgIwgfbRDs+djv7bJzWwAACDfCRxsMyUiWzZAOV9WptMJ3+g8AAIATED7aIMFpV//0bpKkzcz7AACgXQgfbcRKpwAAdAzho4244wUAgI4hfLRRc8/HVno+AABoF8JHGw1rCh8Fh6tU5WuwuBoAADofwkcbpSe7lO2Jl2lKG/cetbocAAA6HcJHO4zv012StH5PmcWVAADQ+RA+2mFCU/hYR/gAAKDNCB/tMLFPqiRpfeFRBQKsdAoAQFsQPtphaFayEuLsKq+p165DlVaXAwBAp0L4aIc4u01jchofMpe3m6EXAADagvDRTsz7AACgfQgf7RQMH4WEDwAA2oLw0U7jezeGj12lVTpSVWdxNQAAdB6Ej3ZKSXRqYM/GJ9xuoPcDAIAzRvjogAlNvR95zPsAAOCMET46gEmnAAC0HeGjA5qXWd9YdFT1/oDF1QAA0DkQPjqgf1qSUhLj5GsIaMt+r9XlAADQKRA+OsBmM4LzPhh6AQDgzBA+Omg8630AANAmhI8OCk463V0m0+QhcwAAnA7ho4PG9EqR3Wao2Fur/eW1VpcDAEDUI3x0UILTrhHZbknM+wAA4EwQPkKgean19YQPAABOq83hY9WqVZo9e7ays7NlGIaWLVvW4rhpmnrggQeUnZ2thIQEzZgxQ5s3bw5VvVFpYl/ueAEA4Ey1OXxUVVVpzJgxWrhwYavHH3vsMT3++ONauHCh1q5dq8zMTF1yySWqqKjocLHRqnnS6ZYDXlX5GiyuBgCA6OZo6wdmzZqlWbNmtXrMNE098cQTuv/++3XNNddIkhYvXqyMjAwtWbJEd9xxR8eqjVJZngRle+K1v7xWG/ce1dQBaVaXBABA1ArpnI+CggIVFxdr5syZwX0ul0vnn3++1qxZ0+pnfD6fvF5vi60zal7vg3kfAACcWkjDR3FxsSQpIyOjxf6MjIzgsS/Lzc2Vx+MJbjk5OaEsKWJ4yBwAAGcmLHe7GIbR4r1pmifsa7ZgwQKVl5cHt6KionCUFHYT+6RKktYXHlUgwGJjAACcTEjDR2ZmpiSd0MtRUlJyQm9IM5fLJbfb3WLrjIZmJSshzq7ymnrtLK20uhwAAKJWSMNHv379lJmZqeXLlwf31dXVaeXKlZo6dWooLxV14uw2jcnxSGLoBQCAU2lz+KisrFR+fr7y8/MlNU4yzc/PV2FhoQzD0Lx58/TLX/5SS5cu1WeffaZbb71ViYmJ+sY3vhHq2qMO8z4AADi9Nt9qm5eXpwsuuCD4fv78+ZKkW265RX/605/0ox/9SDU1NbrzzjtVVlamyZMn691331VycnLoqo5SE3jCLQAAp2WYUfYoVq/XK4/Ho/Ly8k43/+NodZ3GPtQ45LT+J5coNclpcUUAAERGW35/82yXEEpJdGpgz26SpA30fgAA0CrCR4hNaHrIXB7zPgAAaBXhI8SYdAoAwKkRPkKseZn1jUVHVe8PWFwNAADRh/ARYv3TkpSSGCdfQ0Bb9nfO59QAABBOhI8Qs9mM4LwPhl4AADgR4SMMxjPvAwCAkyJ8hEHzpNO8PUcUZcuoAABgOcJHGIzplSKHzdBBr097y2qsLgcAgKhC+AiDBKdd43qnSJJWbS+1thgAAKIM4SNMzhuULkn64ItDFlcCAEB0IXyEybmDG8PHhzsPqYH1PgAACCJ8hMmoszxKSYxTRW2DNu49anU5AABEDcJHmNhthqYNTJMkrWLoBQCAIMJHGJ3fNO+DSacAABxD+Aijcwc39nxsLDqq8up6i6sBACA6ED7CKMuToEE9uylgNk48BQAAhI+wO7f5lluGXgAAkET4CLvzBh+bdMpS6wAAED7CbnK/HnI6bNp3tEa7DlVZXQ4AAJYjfIRZgtOus/umSpJWfcHQCwAAhI8IOHdQ49DLB9uZdAoAAOEjAs5rWmr9o52H5WvwW1wNAADWInxEwNDMZKUnu1RT79e6PWVWlwMAgKUIHxFgGEZw6IWl1gEAsY7wESHnsd4HAACSCB8RM72p52Pzfq9KK3wWVwMAgHUIHxGS1s2lEdluSdKHOxh6AQDELsJHBDXf9cJ6HwCAWEb4iKDgpNPtLLUOAIhdIQ8fDQ0N+vGPf6x+/fopISFB/fv310MPPaRAIBDqS3U6E/p0V6LTrkOVPm09UGF1OQAAWMIR6i989NFH9eyzz2rx4sUaMWKE8vLydNttt8nj8eiee+4J9eU6FZfDrnP699B7n5fog+2lGt40BwQAgFgS8p6Pjz76SFdeeaUuv/xy9e3bV9dee61mzpypvLy8UF+qUzovOPTCvA8AQGwKefiYPn26/vWvf+mLL76QJG3cuFGrV6/WZZdd1ur5Pp9PXq+3xdaVnds06XRtQZlq6lhqHQAQe0I+7HLvvfeqvLxcQ4cOld1ul9/v18MPP6wbb7yx1fNzc3P14IMPhrqMqNU/LUlnpSRo39EafVxwWBcM6Wl1SQAARFTIez5eeeUVvfjii1qyZInWr1+vxYsX61e/+pUWL17c6vkLFixQeXl5cCsqKgp1SVHFMAydN7jpKbcstQ4AiEEh7/n44Q9/qPvuu0833HCDJGnUqFHas2ePcnNzdcstt5xwvsvlksvlCnUZUe28Qel66ZMi5n0AAGJSyHs+qqurZbO1/Fq73c6ttseZOiBNNkPaUVKp/UdrrC4HAICICnn4mD17th5++GG99dZb2r17t5YuXarHH39cV199dagv1Wl5EuM0NidFEg+aAwDEnpCHj9/97ne69tprdeedd2rYsGH6wQ9+oDvuuEM///nPQ32pTu3cpqfcrtrOvA8AQGwxzChb59vr9crj8ai8vFxud9ddhGvdnjJ97Zk18iTEaf1PLpHdZlhdEgAA7daW398828UiY3p55I53qLymXnm7j1hdDgAAEUP4sIjDbtMlwzMlSX/fdMDiagAAiBzCh4W+OjpLkvT2Z8UKBKJq9AsAgLAhfFho2sA0ueMdKqnwKW9PmdXlAAAQEYQPCzkdx4Ze3vp0v8XVAAAQGYQPizH0AgCINYQPizH0AgCINYQPizH0AgCINYSPKMDQCwAglhA+ogBDLwCAWEL4iAIMvQAAYgnhI0ocP/TiZ+gFANCFET6iRIuhF571AgDowggfUeL4oRee9QIA6MoIH1GEoRcAQCwgfEQRhl4AALGA8BFFGHoBAMQCwkeUYegFANDVET6iDEMvAICujvARZRh6AQB0dYSPKNQ89PJ3hl4AAF0Q4SMKNQ+9lDL0AgDogggfUYihFwBAV0b4iFIMvQAAuirCR5Ri6AUA0FURPqIUQy8AgK6K8BHFGHoBAHRFhI8oNm1gmjwJcSqt8Gn1jkNWlwMAQEgQPqKY02HT1ePOkiQt+fcei6sBACA0CB9R7huTe0uS/rm1RAe9tRZXAwBAx4UlfOzbt0/f/OY31aNHDyUmJmrs2LFat25dOC7V5Q3OSNakvt3lD5h6ZW2R1eUAANBhIQ8fZWVlmjZtmuLi4vT2229ry5Yt+vWvf62UlJRQXypmNPd+vPxJIRNPAQCdniPUX/joo48qJydHL7zwQnBf3759Q32ZmDJrZJYe/OsW7S+v1covSnTh0AyrSwIAoN1C3vPx5ptvauLEibruuuvUs2dPjRs3Ts8999xJz/f5fPJ6vS02tBQfZ9e143tJkv7n40KLqwEAoGNCHj527dqlZ555RoMGDdI777yj73znO7r77rv15z//udXzc3Nz5fF4gltOTk6oS+oSbmwaelmxrUT7j9ZYXA0AAO1nmKYZ0kkETqdTEydO1Jo1a4L77r77bq1du1YfffTRCef7fD75fL7ge6/Xq5ycHJWXl8vtdoeytE7vxkUf66Ndh3X3RYM0/5LBVpcDAECQ1+uVx+M5o9/fIe/5yMrK0vDhw1vsGzZsmAoLWx8ucLlccrvdLTa0rnni6StrC9XgD1hcDQAA7RPy8DFt2jRt27atxb4vvvhCffr0CfWlYs6lIzLVI8mpg16f/vV5idXlAADQLiEPH9/73vf08ccf65e//KV27NihJUuWaNGiRZozZ06oLxVznA6brpvYOCdmyb+ZeAoA6JxCHj4mTZqkpUuX6qWXXtLIkSP185//XE888YRuuummUF8qJt14dmP4WLW9VEVHqi2uBgCAtgv5hNOOasuElVh18x/+rQ+2H9KdMwboR18ZanU5AABYO+EU4XdT08TTV/P2qq6BiacAgM6F8NEJXTQsQ+nJLh2q9Gn5loNWlwMAQJsQPjqhOLtN1zdPPP1kj8XVAADQNoSPTuqGs3NkGNKHOw6r4FCV1eUAAHDGCB+dVK/uiZoxOF2S9NIn3HYLAOg8CB+d2DcmNy7c9r/r9srX4Le4GgAAzgzhoxO7YEi6sjzxOlJVp398Vmx1OQAAnBHCRyfmsNt0/aTGiaf/w4qnAIBOgvDRyV0/KUc2Q/qk4Ii2HvBaXQ4AAKdF+OjksjwJmjUqS5K0cMUOi6sBAOD0CB9dwF0XDpQk/X3TAe0oqbC4GgAATo3w0QUMzXRr5vAMmab01IqdVpcDAMApET66iLsuHCRJ+r/8fdrNomMAgChG+OgiRvXy6IIh6QqY0tPvM/cDABC9CB9dyF0XNfZ+vLF+n4qOVFtcDQAArSN8dCHje3fX9IFpagiYenYlcz8AANGJ8NHFNN/58lreXhWX11pcDQAAJyJ8dDGT+/fQ2f1SVecP0PsBAIhKhI8u6O6mO19e+qRQJRX0fgAAogvhowuaNrCHxvVOka8hoOc/KLC6HAAAWiB8dEGGYQR7P178eI+OVNVZXBEAAMcQPrqoGUPSNeosj6rr/PrD6l1WlwMAQBDho4syDENzm+58Wbxmj8qr6y2uCACARoSPLuySYRkampmsSl+DXljD3A8AQHQgfHRhNtux3o8/ri5QRS29HwAA6xE+urhZI7M0ID1J3toG/fmjPVaXAwAA4aOrsx/X+/GH1QWq9DVYXBEAINYRPmLA7NHZ6peWpCNVdVr4Hk+8BQBYi/ARAxx2m+6/bJgk6Q+rd2lXaaXFFQEAYhnhI0ZcNKynZgxJV73f1EN/2yLTNK0uCQAQo8IePnJzc2UYhubNmxfuS+EUDMPQT786XHF2Q+9vK9W/tpZYXRIAIEaFNXysXbtWixYt0ujRo8N5GZyh/und9B/T+0uSHvrbFtXW+y2uCAAQi8IWPiorK3XTTTfpueeeU/fu3cN1GbTRXRcOVIbbpcIj1XpuFcuuAwAiL2zhY86cObr88st18cUXn/I8n88nr9fbYkP4JLkc+q+myadPvb9D+47WWFwRACDWhCV8vPzyy1q/fr1yc3NPe25ubq48Hk9wy8nJCUdJOM4VY7J1dt9U1dYH9Mu3tlpdDgAgxoQ8fBQVFemee+7Riy++qPj4+NOev2DBApWXlwe3oqKiUJeELzEMQw9cMUI2Q3pr0wGt2XHI6pIAADEk5OFj3bp1Kikp0YQJE+RwOORwOLRy5Ur99re/lcPhkN/fcpKjy+WS2+1usSH8hme79c1z+kiSHvjrZtX7AxZXBACIFSEPHxdddJE2bdqk/Pz84DZx4kTddNNNys/Pl91uD/Ul0U7zLxms7olx+uJgpf7Cc18AABES8vCRnJyskSNHttiSkpLUo0cPjRw5MtSXQwekJDr1w0uHSpJ+s/wLlVb4LK4IABALWOE0xl0/KUcjz3Krwtegx/7xudXlAABigGFG2TrbXq9XHo9H5eXlzP+IkHV7yvS1Z9ZIkpbeOVXjerMuCwCgbdry+5ueD2hCn+66ZvxZkqQH3tysQCCq8igAoIshfECSdN+soermcmjj3nIt+oCVTwEA4UP4gCSpZ3K8fnx548qnv353mzbtLbe4IgBAV0X4QND1k3L0lRGZqvebuuflDaqua7C6JABAF0T4QJBhGMq9ZpQy3fHadahKP/8bS68DAEKP8IEWuic59fjXx8gwpJc+KdQ/Piu2uiQAQBdD+MAJpg5M07fP7S9Juu+NT1VcXmtxRQCAroTwgVZ9f+YQjTzLraPV9fr+a/ncfgsACBnCB1rldNj05A3jFB9n04c7Duv51dx+CwAIDcIHTmpAejf99KsjJEn//c42fbaP228BAB1H+MAp3Xh2ji4dkRG8/bamzm91SQCATo7wgVMyDEOPXDNaGW6XdpZW6edvbbG6JABAJ0f4wGl1T3Lq19eNlSQt+Xeh3t3M7bcAgPYjfOCMTB+Upm+f13j77b2vf6q9ZdUWVwQA6KwIHzhjP2i6/basul63/2mtvLX1VpcEAOiECB84Y06HTYtunqgMt0tfHKzUnS+uV70/YHVZAIBOhvCBNslOSdAfbpmkRKddq3cc0o+XfibTZAEyAMCZI3ygzUae5dHvbhwnmyG9klekp9/faXVJAIBOhPCBdrloWIZ+NvvYAmR/3bjf4ooAAJ0F4QPtdsvUvrp9Wj9J0vdf26i83UcsrggA0BkQPtAh918+TJcMz1BdQ0D/78952n2oyuqSAABRjvCBDrHbDD15w1iN7uVRWXW9bvvTWpVV1VldFgAgihE+0GGJToeev2WizkpJUMGhKt3xl3XyNfAMGABA6wgfCImeyfH6462TlOxy6JPdR3Tv/36qQIBbcAEAJyJ8IGSGZCbr6W+Ol8NmaFn+fi14Y5P8BBAAwJcQPhBS5w5K16+uGxNcA2T+q/msggoAaIHwgZC7atxZ+t2NjT0g/5e/X3OXrFddAwEEANCI8IGwuHx0lp795gQ57Ta9s/mgvv2XPNXWMwkVAED4QBhdPDxDf7h1ouLjbHp/W6lue2GtqnwNVpcFALAY4QNhde6gdC2+7WwlOe36aNdhfeuPn8hbW291WQAAC4U8fOTm5mrSpElKTk5Wz549ddVVV2nbtm2hvgw6kcn9e+jF/5wsd7xD6/aU6abn/s1CZAAQw0IePlauXKk5c+bo448/1vLly9XQ0KCZM2eqqoplt2PZuN7d9dK3z1FqklOb9pXrxuc+VmmFz+qyAAAWMEzTDOtCDKWlperZs6dWrlyp884777Tne71eeTwelZeXy+12h7M0WGD7wQp94/l/q7TCp/7pSXrh1knq0yPJ6rIAAB3Ult/fYZ/zUV5eLklKTU1t9bjP55PX622xoesalJGsV++YomxPvHaVVmn271br/W0lVpcFAIigsIYP0zQ1f/58TZ8+XSNHjmz1nNzcXHk8nuCWk5MTzpIQBfqlJemNO6dpXO8UeWsbdNuf1uqpFTsU5k44AECUCOuwy5w5c/TWW29p9erV6tWrV6vn+Hw++XzHxv69Xq9ycnIYdokBvga/Hnhzs176pEiS9JURmfrV18eom8thcWUAgLaKimGXu+66S2+++aZWrFhx0uAhSS6XS263u8WG2OBy2JV7zWj98upRirMb+sfmYl391IfaVVppdWkAgDAKefgwTVNz587VG2+8offee0/9+vUL9SXQxXxjcm+9/O0p6pns0vaSSl258EP9a+tBq8sCAIRJyMPHnDlz9OKLL2rJkiVKTk5WcXGxiouLVVNTE+pLoQuZ0Ke7/nbXdE3s010Vvgb9x+I8PfnP7QrwVFwA6HJCPufDMIxW97/wwgu69dZbT/t5brWNbXUNAf38b1v0l4/3SJIuHpahx64drdQkp8WVAQBOpS2/v8O+zkdbET4gSa/mFenHyz5TXUNAPZKceujKkbp8dJbVZQEATiIqJpwCHfH1iTl647tTNSQjWYer6jRnyXp998V1rIoKAF0A4QNRa+RZHr151zTddeFA2W2G3v6sWDN/s1L/l7+PNUEAoBMjfCCquRx2fX/mEP3fnGkaluVWWXW97nk5X//vz+tU4q21ujwAQDsQPtApjDzLozfnTtP8SwYrzm7on1sP6uLHV+p/1+2lFwQAOhnCBzqNOLtNd180SH+9a7pGneWRt7ZBP3hto27701oWJgOAToTwgU5naKZbS++cqh99ZYicdpve31aqS36zSj9Z9hkTUgGgE+BWW3RqO0oqlfv3rfrX541Pxk1y2nXH+QP0n+f2U6KTZ8QAQKSwzgdizkc7Dyv37a36dG+5JKlnskvfu2SwrpvQSw47HXwAEG6ED8SkQMDUW5sO6LF3PlfRkcbl/Af27Kb7vjJUFw3redLVdwEAHUf4QEzzNfj1Px8X6rfvbdfR6npJ0tn9UjXngoE6b1AaIQQAwoDwAUgqr6nXsyt36o+rC+RrCEiShmYm6z/P7a8rxmTL6WA4BgBChfABHGf/0Ro9/0GBXl5bqOo6vyQpw+3SbdP66caze8uTEGdxhQDQ+RE+gFaUV9drySeFeuHDApU03ZKb5LTrhrN767ZpfdWre6LFFQJA50X4AE6hriGgNzfu13OrdmnbwQpJkt1m6LJRWbpxUo7O6d9DNhvzQgCgLQgfwBkwTVMrvyjVcx/s0oc7Dgf3n5WSoK9N6KVrx/dS7x70hgDAmSB8AG302b5yLfmkUH/N368KX0Nw/+R+qbp2Qi9dNipLSS4WLQOAkyF8AO1UW+/XO5uL9b/r9mr1jkNq/qcj0WnXZaOy9LXxvXR2v1TZGZYBgBYIH0AI7D9ao6Ub9um1vCLtPlwd3N8jyamLh2Vo5ogMTRuYpvg4u4VVAkB0IHwAIWSaptbtKdNreXv19mcH5K09NiyT6LTr/MHpmjkiQxcOyZAnkdt2AcQmwgcQJvX+gD4pOKJ3Nxfr3S0HdaC8NnjMYTM0uX+qLhmWoemD0jUgPYnVVAHEDMIHEAGmaeqzfV69u6VY724+GLxtt1mG26WpA9I0dUAPTR2YprNSEiyqFADCj/ABWGD3oSot33JQ731eonWFZaprWtK9WZ8eicEwMmVAD6V1c1lUKQCEHuEDsFhtvV/r95Tpw52HtGbnYX26t1z+QMt/1HqnJmpc7xSNzWnchme75XIweRVA50T4AKJMRW29Pik4ojU7D+vDHYf0eXHFCec47TYNy3ZrXM6xQNI7NZHVVgF0CoQPIMqVV9dr496jyi86th2pqjvhvESnXUMykzU0061hWY2vQzKTeRgegKhD+AA6GdM0VXSkRhuKyrShsDGMbDngPWHeSLNsT7yGZjUGkQHp3dQvLUn905LUPckZ4coBoBHhA+gCGvwB7T5cpa0HKvR5sVefH6jQ58UV2ne05qSfSUmMawoi3dQ/PUn90pLUt0eSclITlBxPbwmA8CF8AF1YeU29vjhYoc8PeLXtYIUKDlWpoLRK+49bc6Q17niHenVP1FndE9Sre0LjzynNPyfIkxDHuiQA2o3wAcSgmjq/dh+uagwjh6q0s7RSBYeqtPtQlcqq60/7eZfDpgx3vDLcrqbXL/8cr7RuTnVzOQgpAE7Qlt/fPKYT6CISnHYNy3JrWNaJ/9BX+Rq072iN9pXVaG9ZtfaW1Wjv0RrtLWvcd6jSJ19DQIVHqlV4pLqVbz/GabepRzdn45bkUo9uTqV1c6lHklOpSU51T3QqJTGuaXPKkxCnOLstXH9sAJ1Q2MLH008/rf/+7//WgQMHNGLECD3xxBM699xzw3U5AKeQ5HJocEayBmckt3q8tt6v0gqfir21Ouit1UGvr+m15fvqOr/q/AEdKK9tsbT86XRzOeRJiAuGEnd8nJLjHUpuej3+vbvpNcllVzeXQ0kuhxLi7NxyDHQhYQkfr7zyiubNm6enn35a06ZN0+9//3vNmjVLW7ZsUe/evcNxSQAdEB9nV05qonJSE095Xk2dX4erfDpcWafDVT4dqqxr/LnSpyNVdTpUVafy6jqVVdfraHVd8CF8lb4GVTb1vrSHYUiJcXYlNYWRJJddiU6HEp12JTrtio+zKyGu8eeEOLsSnA4lxNmU0HTM5bArPs7W9HPja+Nmk8vRuM/psMlhMxhSAiIgLHM+Jk+erPHjx+uZZ54J7hs2bJiuuuoq5ebmnvKzzPkAug5/wJS3pl5l1XU6WlOv8urGnytqG1RRW6+K2gZ5W/xcHzxW7fOrqq5BgQjOSrMZktNhk9NukyvO3vjaFEycDpvi7DbF2Q3F2RvPibPbFOdo3Oe02+SwG3LYGt877DbF2RpfHXZDcbam4/bGkOOwGXLYDdltje/tNkNxTe/tRuP75s1ha/nebjNkNwzZDEM2m469P/7VZshmqPEc49h7whXCxdI5H3V1dVq3bp3uu+++FvtnzpypNWvWhPpyAKKY3Waoe5Kz3euPmKap2vqAKn0Nqq5raHr1N776/Kqua1BtvV/VdX7V1PtV0/R6/Pvaer98DQHV1vubtoB8DX756gOqbfCr3n8s3QRMqbY+oNr6gNTUa9PVGMFAciyYNP9sGJLNdmyf8aXzJMlmO/beOO77ml8br9H8eclQ48867nxDxz5jqHGHreln47jPNeck40ufbQ5Qzft0/Lk69vnmP++xY01foBO/R1KL7zj+usfvPP57W9bQ8ri+vL9F5jvJOcefcQbf2drnmus+nTi7ofsvH37a88Il5OHj0KFD8vv9ysjIaLE/IyNDxcXFJ5zv8/nk8/mC771eb6hLAtBJGYahBKddCU67pPA8iM8fMFXXEFBdQ1MoaQiozh+Qr77xtXl/g9+UryGgev+xrc5vqr5pX11DQPUBUw3+gBoCpur9ATX4TTUEAqr3N+6vD5gKBEzV+035A43n+QNm8Dx/07GA2bjfHzCD5/gDpvxm8/kBmaaC7wPB19P/eYOfa3wXljZF9HM6bF0rfDT7cvIyTbPVNJabm6sHH3wwXGUAwCnZbccHnM69EJtpNgaQ5kASOO69eVxAMc1jQcY0jwWSgGkGvyNgmgoE1LSv6VUKntO4T8HrqPF/J5x//LlmcJ8kNV276f3x56tpn6lj9QW/K/iHPe548M9/bF/TKVKLa365huM/2zKINX/X8e9bnt/8/sRzjv//48v7zRbHj99/YhA84ftOOOPEk1o7p7XJFVZP4A55+EhLS5Pdbj+hl6OkpOSE3hBJWrBggebPnx987/V6lZOTE+qyAKDLMwxDdqMxUAHRLOQ33zudTk2YMEHLly9vsX/58uWaOnXqCee7XC653e4WGwAA6LrCMuwyf/583XzzzZo4caKmTJmiRYsWqbCwUN/5znfCcTkAANCJhCV8XH/99Tp8+LAeeughHThwQCNHjtTf//539enTJxyXAwAAnQjPdgEAAB3Wlt/fPHABAABEFOEDAABEFOEDAABEFOEDAABEFOEDAABEFOEDAABEFOEDAABEFOEDAABEFOEDAABEVFiWV++I5gVXvV6vxZUAAIAz1fx7+0wWTo+68FFRUSFJysnJsbgSAADQVhUVFfJ4PKc8J+qe7RIIBLR//34lJyfLMIyQfrfX61VOTo6Kiop4bkwY0c6RQTtHDm0dGbRzZISrnU3TVEVFhbKzs2WznXpWR9T1fNhsNvXq1Sus13C73fzFjgDaOTJo58ihrSODdo6McLTz6Xo8mjHhFAAARBThAwAARFRMhQ+Xy6Wf/exncrlcVpfSpdHOkUE7Rw5tHRm0c2REQztH3YRTAADQtcVUzwcAALAe4QMAAEQU4QMAAEQU4QMAAERUzISPp59+Wv369VN8fLwmTJigDz74wOqSOr1Vq1Zp9uzZys7OlmEYWrZsWYvjpmnqgQceUHZ2thISEjRjxgxt3rzZmmI7sdzcXE2aNEnJycnq2bOnrrrqKm3btq3FObR1xz3zzDMaPXp0cOGlKVOm6O233w4ep43DIzc3V4ZhaN68ecF9tHXHPfDAAzIMo8WWmZkZPG51G8dE+HjllVc0b9483X///dqwYYPOPfdczZo1S4WFhVaX1qlVVVVpzJgxWrhwYavHH3vsMT3++ONauHCh1q5dq8zMTF1yySXB5/fgzKxcuVJz5szRxx9/rOXLl6uhoUEzZ85UVVVV8BzauuN69eqlRx55RHl5ecrLy9OFF16oK6+8MvgvZNo49NauXatFixZp9OjRLfbT1qExYsQIHThwILht2rQpeMzyNjZjwNlnn21+5zvfabFv6NCh5n333WdRRV2PJHPp0qXB94FAwMzMzDQfeeSR4L7a2lrT4/GYzz77rAUVdh0lJSWmJHPlypWmadLW4dS9e3fz+eefp43DoKKiwhw0aJC5fPly8/zzzzfvuece0zT5+xwqP/vZz8wxY8a0eiwa2rjL93zU1dVp3bp1mjlzZov9M2fO1Jo1ayyqqusrKChQcXFxi3Z3uVw6//zzafcOKi8vlySlpqZKoq3Dwe/36+WXX1ZVVZWmTJlCG4fBnDlzdPnll+viiy9usZ+2Dp3t27crOztb/fr10w033KBdu3ZJio42jroHy4XaoUOH5Pf7lZGR0WJ/RkaGiouLLaqq62tu29bafc+ePVaU1CWYpqn58+dr+vTpGjlypCTaOpQ2bdqkKVOmqLa2Vt26ddPSpUs1fPjw4L+QaePQePnll7V+/XqtXbv2hGP8fQ6NyZMn689//rMGDx6sgwcP6he/+IWmTp2qzZs3R0Ubd/nw0cwwjBbvTdM8YR9Cj3YPrblz5+rTTz/V6tWrTzhGW3fckCFDlJ+fr6NHj+r111/XLbfcopUrVwaP08YdV1RUpHvuuUfvvvuu4uPjT3oebd0xs2bNCv48atQoTZkyRQMGDNDixYt1zjnnSLK2jbv8sEtaWprsdvsJvRwlJSUnpD6ETvOsato9dO666y69+eabWrFihXr16hXcT1uHjtPp1MCBAzVx4kTl5uZqzJgxevLJJ2njEFq3bp1KSko0YcIEORwOORwOrVy5Ur/97W/lcDiC7Ulbh1ZSUpJGjRql7du3R8Xf5y4fPpxOpyZMmKDly5e32L98+XJNnTrVoqq6vn79+ikzM7NFu9fV1WnlypW0exuZpqm5c+fqjTfe0Hvvvad+/fq1OE5bh49pmvL5fLRxCF100UXatGmT8vPzg9vEiRN10003KT8/X/3796etw8Dn82nr1q3KysqKjr/PEZnWarGXX37ZjIuLM//whz+YW7ZsMefNm2cmJSWZu3fvtrq0Tq2iosLcsGGDuWHDBlOS+fjjj5sbNmww9+zZY5qmaT7yyCOmx+Mx33jjDXPTpk3mjTfeaGZlZZler9fiyjuX7373u6bH4zHff/9988CBA8Gturo6eA5t3XELFiwwV61aZRYUFJiffvqp+V//9V+mzWYz3333XdM0aeNwOv5uF9OkrUPh+9//vvn++++bu3btMj/++GPzq1/9qpmcnBz8vWd1G8dE+DBN03zqqafMPn36mE6n0xw/fnzwNkW034oVK0xJJ2y33HKLaZqNt3P97Gc/MzMzM02Xy2Wed9555qZNm6wtuhNqrY0lmS+88ELwHNq6426//fbgvyPS09PNiy66KBg8TJM2Dqcvhw/auuOuv/56Mysry4yLizOzs7PNa665xty8eXPwuNVtbJimaUamjwUAACAG5nwAAIDoQvgAAAARRfgAAAARRfgAAAARRfgAAAARRfgAAAARRfgAAAARRfgAAAARRfgAAAARRfgAAAARRfgAAAARRfgAAAAR9f8BiU2vk8vvxooAAAAASUVORK5CYII="
165 | },
166 | "metadata": {},
167 | "output_type": "display_data"
168 | }
169 | ],
170 | "source": [
171 | "# Least norm solution\n",
172 | "w_opt = least_norm_reg(X, b, 1e-6)\n",
173 | "\n",
174 | "# Initialize weights to 0\n",
175 | "w0 = np.zeros(n)\n",
176 | "\n",
177 | "# Run GD for 100 steps\n",
178 | "grad_w = lambda _w: least_squares_reg_gradient(X, b, _w, m)\n",
179 | "wslist = run_gd([0.1] * 50, w0, grad_w)\n",
180 | "\n",
181 | "# Plot\n",
182 | "_ = plt.plot(\n",
183 | " range(len(wslist)),\n",
184 | " [np.linalg.norm(w_opt - w) ** 2 for w in wslist],\n",
185 | " label=\"$|\\!|w_{\\mathrm{opt}}-w_{t}|\\!|^2$\",\n",
186 | ")\n",
187 | "_ = plt.legend()"
188 | ],
189 | "metadata": {
190 | "collapsed": false,
191 | "ExecuteTime": {
192 | "end_time": "2023-09-17T01:16:46.039825300Z",
193 | "start_time": "2023-09-17T01:16:45.818471400Z"
194 | }
195 | }
196 | }
197 | ],
198 | "metadata": {
199 | "colab": {
200 | "authorship_tag": "ABX9TyPkibvpTEMRILBn2/x8IuJj",
201 | "provenance": []
202 | },
203 | "kernelspec": {
204 | "display_name": "Python 3 (ipykernel)",
205 | "language": "python",
206 | "name": "python3"
207 | },
208 | "language_info": {
209 | "codemirror_mode": {
210 | "name": "ipython",
211 | "version": 3
212 | },
213 | "file_extension": ".py",
214 | "mimetype": "text/x-python",
215 | "name": "python",
216 | "nbconvert_exporter": "python",
217 | "pygments_lexer": "ipython3",
218 | "version": "3.10.9"
219 | }
220 | },
221 | "nbformat": 4,
222 | "nbformat_minor": 1
223 | }
224 |
--------------------------------------------------------------------------------