├── .gitignore
├── LICENSE.md
├── README.md
├── assets
├── banner.png
└── video_play.png
├── lab1
├── PT_Part1_Intro.ipynb
├── PT_Part2_Music_Generation.ipynb
├── README.md
├── TF_Part1_Intro.ipynb
├── TF_Part2_Music_Generation.ipynb
├── img
│ ├── add-graph.png
│ ├── computation-graph-2.png
│ ├── computation-graph.png
│ ├── lab1ngram.png
│ ├── lstm_inference.png
│ ├── lstm_unrolled-01-01.png
│ ├── lstm_unrolled-01.png
│ ├── lstm_unrolled.png
│ └── music_waveform.png
└── solutions
│ ├── PT_Part1_Intro_Solution.ipynb
│ ├── PT_Part2_Music_Generation_Solution.ipynb
│ ├── TF_Part1_Intro_Solution.ipynb
│ └── TF_Part2_Music_Generation_Solution.ipynb
├── lab2
├── PT_Part1_MNIST.ipynb
├── PT_Part2_Debiasing.ipynb
├── TF_Part1_MNIST.ipynb
├── TF_Part2_Debiasing.ipynb
├── img
│ ├── DB-VAE.png
│ ├── SS-VAE.png
│ ├── convnet_fig.png
│ ├── mnist_2layers_arch.png
│ └── mnist_model.png
└── solutions
│ ├── PT_Part1_MNIST_Solution.ipynb
│ ├── PT_Part2_Debiasing_Solution.ipynb
│ ├── TF_Part1_MNIST_Solution.ipynb
│ └── TF_Part2_Debiasing_Solution.ipynb
├── lab3
├── LLM_Finetuning.ipynb
├── README.md
├── img
│ └── yoda_wallpaper.jpg
└── solutions
│ └── LLM_Finetuning_Solution.ipynb
├── mitdeeplearning
├── __init__.py
├── bin
│ └── abc2wav
├── data
│ ├── faces
│ │ ├── DF
│ │ │ ├── 10.png
│ │ │ ├── 19.png
│ │ │ ├── 6.png
│ │ │ ├── 7.png
│ │ │ └── 9.png
│ │ ├── DM
│ │ │ ├── 20.png
│ │ │ ├── 3.png
│ │ │ ├── 5.png
│ │ │ ├── 8.png
│ │ │ └── 9.png
│ │ ├── LF
│ │ │ ├── 1.png
│ │ │ ├── 11.png
│ │ │ ├── 2.png
│ │ │ ├── 4.png
│ │ │ └── 8.png
│ │ └── LM
│ │ │ ├── 1.png
│ │ │ ├── 11.png
│ │ │ ├── 5.png
│ │ │ ├── 8.png
│ │ │ └── 9.png
│ ├── irish.abc
│ └── text_styles
│ │ ├── leprechaun.txt
│ │ └── yoda.txt
├── lab1.py
├── lab2.py
├── lab3.py
├── lab3_old.py
└── util.py
├── setup.cfg
├── setup.py
├── test.py
└── xtra_labs
├── llm_finetune
├── NOT_FINAL
├── benchmark.csv
├── draft.py
├── spider.png
└── utils.py
├── rl_pong
├── RL.ipynb
├── img
│ ├── COMING SOON
│ └── vista_overview.png
└── solutions
│ └── RL_Solution.ipynb
├── rl_selfdriving
├── RL.ipynb
├── img
│ ├── COMING SOON
│ └── vista_overview.png
└── solutions
│ └── RL_Solution.ipynb
└── uncertainty
├── Part1_IntroductionCapsa.ipynb
└── Part2_BiasAndUncertainty.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .ipynb_checkpoints
3 |
4 |
5 | *.pyc
6 | *.h5
7 | lab2/logs/*
8 |
9 |
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 | *$py.class
14 |
15 | # C extensions
16 | *.so
17 |
18 | # Distribution / packaging
19 | .Python
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | .eggs/
26 | lib/
27 | lib64/
28 | parts/
29 | sdist/
30 | var/
31 | wheels/
32 | pip-wheel-metadata/
33 | share/python-wheels/
34 | *.egg-info/
35 | .installed.cfg
36 | *.egg
37 | MANIFEST
38 |
39 | # PyInstaller
40 | # Usually these files are written by a python script from a template
41 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
42 | *.manifest
43 | *.spec
44 |
45 | # Installer logs
46 | pip-log.txt
47 | pip-delete-this-directory.txt
48 |
49 | # Unit test / coverage reports
50 | htmlcov/
51 | .tox/
52 | .nox/
53 | .coverage
54 | .coverage.*
55 | .cache
56 | nosetests.xml
57 | coverage.xml
58 | *.cover
59 | *.py,cover
60 | .hypothesis/
61 | .pytest_cache/
62 |
63 | # Translations
64 | *.mo
65 | *.pot
66 |
67 | # Django stuff:
68 | *.log
69 | local_settings.py
70 | db.sqlite3
71 | db.sqlite3-journal
72 |
73 | # Flask stuff:
74 | instance/
75 | .webassets-cache
76 |
77 | # Scrapy stuff:
78 | .scrapy
79 |
80 | # Sphinx documentation
81 | docs/_build/
82 |
83 | # PyBuilder
84 | target/
85 |
86 | # Jupyter Notebook
87 | .ipynb_checkpoints
88 |
89 | # IPython
90 | profile_default/
91 | ipython_config.py
92 |
93 | # pyenv
94 | .python-version
95 |
96 | # pipenv
97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | # install all needed dependencies.
101 | #Pipfile.lock
102 |
103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
104 | __pypackages__/
105 |
106 | # Celery stuff
107 | celerybeat-schedule
108 | celerybeat.pid
109 |
110 | # SageMath parsed files
111 | *.sage.py
112 |
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 |
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 |
126 | # Rope project settings
127 | .ropeproject
128 |
129 | # mkdocs documentation
130 | /site
131 |
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 |
137 | # Pyre type checker
138 | .pyre/
139 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 [MIT Introduction to Deep Learning](http://introtodeeplearning.com/)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | The above copyright notice and this permission notice shall be included in all
12 | copies or substantial portions of the Software.
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](http://introtodeeplearning.com)
2 |
3 | This repository contains all of the code and software labs for [MIT Introduction to Deep Learning](http://introtodeeplearning.com)! All lecture slides and videos are available on the program website.
4 |
5 | # Instructions
6 | MIT Introduction to Deep Learning software labs are designed to be completed at your own pace. At the end of each of the labs, there will be instructions on how you can submit your materials as part of the lab competitions. These instructions include what information must be submitted and in what format.
7 |
8 | ## Opening the labs in Google Colaboratory:
9 |
10 | The 2025 Introduction to Deep Learning labs will be run in Google's Colaboratory, a Jupyter notebook environment that runs entirely in the cloud, so you don't need to download anything. To run these labs, you must have a Google account.
11 |
12 | On this Github repo, navigate to the lab folder you want to run (`lab1`, `lab2`, `lab3`) and open the appropriate python notebook (\*.ipynb). Click the "Run in Colab" link on the top of the lab. That's it!
13 |
14 | ## Running the labs
15 | Now, to run the labs, open the Jupyter notebook on Colab. Navigate to the "Runtime" tab --> "Change runtime type". In the pop-up window, under "Runtime type" select "Python 3", and under "Hardware accelerator" select "GPU". Go through the notebooks and fill in the `#TODO` cells to get the code to compile for yourself!
16 |
17 |
18 | ### MIT Deep Learning package
19 | You might notice that inside the labs we install the `mitdeeplearning` python package from the Python Package repository:
20 |
21 | `pip install mitdeeplearning`
22 |
23 | This package contains convienence functions that we use throughout the course and can be imported like any other Python package.
24 |
25 | `>>> import mitdeeplearning as mdl`
26 |
27 | We do this for you in each of the labs, but the package is also open source under the same license so you can also use it outside the class.
28 |
29 | ## Lecture Videos
30 |
31 | [
](https://www.youtube.com/watch?v=njKP3FqW3Sk&list=PLtBw6njQRU-rwp5__7C0oIVt26ZgjG9NI&index=1)
32 |
33 | All lecture videos are available publicly online and linked above! Use and/or modification of lecture slides outside of MIT Introduction to Deep Learning must reference:
34 |
35 | > © MIT Introduction to Deep Learning
36 | >
37 | > http://introtodeeplearning.com
38 |
39 | ## License
40 | All code in this repository is copyright 2025 [MIT Introduction to Deep Learning](http://introtodeeplearning.com). All Rights Reserved.
41 |
42 | Licensed under the MIT License. You may not use this file except in compliance with the License. Use and/or modification of this code outside of MIT Introduction to Deep Learning must reference:
43 |
44 | > © MIT Introduction to Deep Learning
45 | >
46 | > http://introtodeeplearning.com
47 |
--------------------------------------------------------------------------------
/assets/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/assets/banner.png
--------------------------------------------------------------------------------
/assets/video_play.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/assets/video_play.png
--------------------------------------------------------------------------------
/lab1/PT_Part1_Intro.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "WBk0ZDWY-ff8"
7 | },
8 | "source": [
9 | "
\n",
18 | "\n",
19 | "# Copyright Information\n"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {
26 | "id": "3eI6DUic-6jo"
27 | },
28 | "outputs": [],
29 | "source": [
30 | "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n",
31 | "#\n",
32 | "# Licensed under the MIT License. You may not use this file except in compliance\n",
33 | "# with the License. Use and/or modification of this code outside of MIT Introduction\n",
34 | "# to Deep Learning must reference:\n",
35 | "#\n",
36 | "# © MIT Introduction to Deep Learning\n",
37 | "# http://introtodeeplearning.com\n",
38 | "#"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {
44 | "id": "57knM8jrYZ2t"
45 | },
46 | "source": [
47 | "# Lab 1: Intro to PyTorch and Music Generation with RNNs\n",
48 | "\n",
49 | "In this lab, you'll get exposure to using PyTorch and learn how it can be used for deep learning. Go through the code and run each cell. Along the way, you'll encounter several ***TODO*** blocks -- follow the instructions to fill them out before running those cells and continuing.\n",
50 | "\n",
51 | "\n",
52 | "# Part 1: Intro to PyTorch\n",
53 | "\n",
54 | "## 0.1 Install PyTorch\n",
55 | "\n",
56 | "[PyTorch](https://pytorch.org/) is a popular deep learning library known for its flexibility and ease of use. Here we'll learn how computations are represented and how to define a simple neural network in PyTorch. For all the labs in Introduction to Deep Learning 2025, there will be a PyTorch version available.\n",
57 | "\n",
58 | "Let's install PyTorch and a couple of dependencies."
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "id": "LkaimNJfYZ2w"
66 | },
67 | "outputs": [],
68 | "source": [
69 | "import torch\n",
70 | "import torch.nn as nn\n",
71 | "\n",
72 | "# Download and import the MIT Introduction to Deep Learning package\n",
73 | "!pip install mitdeeplearning --quiet\n",
74 | "import mitdeeplearning as mdl\n",
75 | "\n",
76 | "import numpy as np\n",
77 | "import matplotlib.pyplot as plt"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {
83 | "id": "2QNMcdP4m3Vs"
84 | },
85 | "source": [
86 | "## 1.1 What is PyTorch?\n",
87 | "\n",
88 | "PyTorch is a machine learning library, like TensorFlow. At its core, PyTorch provides an interface for creating and manipulating [tensors](https://pytorch.org/docs/stable/tensors.html), which are data structures that you can think of as multi-dimensional arrays. Tensors are represented as n-dimensional arrays of base datatypes such as a string or integer -- they provide a way to generalize vectors and matrices to higher dimensions. PyTorch provides the ability to perform computation on these tensors, define neural networks, and train them efficiently.\n",
89 | "\n",
90 | "The [```shape```](https://pytorch.org/docs/stable/generated/torch.Tensor.shape.html#torch.Tensor.shape) of a PyTorch tensor defines its number of dimensions and the size of each dimension. The `ndim` or [```dim```](https://pytorch.org/docs/stable/generated/torch.Tensor.dim.html#torch.Tensor.dim) of a PyTorch tensor provides the number of dimensions (n-dimensions) -- this is equivalent to the tensor's rank (as is used in TensorFlow), and you can also think of this as the tensor's order or degree.\n",
91 | "\n",
92 | "Let’s start by creating some tensors and inspecting their properties:\n"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": null,
98 | "metadata": {
99 | "id": "tFxztZQInlAB"
100 | },
101 | "outputs": [],
102 | "source": [
103 | "integer = torch.tensor(1234)\n",
104 | "decimal = torch.tensor(3.14159265359)\n",
105 | "\n",
106 | "print(f\"`integer` is a {integer.ndim}-d Tensor: {integer}\")\n",
107 | "print(f\"`decimal` is a {decimal.ndim}-d Tensor: {decimal}\")\n"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {
113 | "id": "-dljcPUcoJZ6"
114 | },
115 | "source": [
116 | "Vectors and lists can be used to create 1-d tensors:"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": null,
122 | "metadata": {
123 | "id": "oaHXABe8oPcO"
124 | },
125 | "outputs": [],
126 | "source": [
127 | "fibonacci = torch.tensor([1, 1, 2, 3, 5, 8])\n",
128 | "count_to_100 = torch.tensor(range(100))\n",
129 | "\n",
130 | "print(f\"`fibonacci` is a {fibonacci.ndim}-d Tensor with shape: {fibonacci.shape}\")\n",
131 | "print(f\"`count_to_100` is a {count_to_100.ndim}-d Tensor with shape: {count_to_100.shape}\")\n"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {
137 | "id": "gvffwkvtodLP"
138 | },
139 | "source": [
140 | "Next, let’s create 2-d (i.e., matrices) and higher-rank tensors. In image processing and computer vision, we will use 4-d Tensors with dimensions corresponding to batch size, number of color channels, image height, and image width."
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {
147 | "id": "tFeBBe1IouS3"
148 | },
149 | "outputs": [],
150 | "source": [
151 | "### Defining higher-order Tensors ###\n",
152 | "\n",
153 | "'''TODO: Define a 2-d Tensor'''\n",
154 | "matrix = # TODO\n",
155 | "\n",
156 | "assert isinstance(matrix, torch.Tensor), \"matrix must be a torch Tensor object\"\n",
157 | "assert matrix.ndim == 2\n",
158 | "\n",
159 | "'''TODO: Define a 4-d Tensor.'''\n",
160 | "# Use torch.zeros to initialize a 4-d Tensor of zeros with size 10 x 3 x 256 x 256.\n",
161 | "# You can think of this as 10 images where each image is RGB 256 x 256.\n",
162 | "images = # TODO\n",
163 | "\n",
164 | "assert isinstance(images, torch.Tensor), \"images must be a torch Tensor object\"\n",
165 | "assert images.ndim == 4, \"images must have 4 dimensions\"\n",
166 | "assert images.shape == (10, 3, 256, 256), \"images is incorrect shape\"\n",
167 | "print(f\"images is a {images.ndim}-d Tensor with shape: {images.shape}\")"
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {
173 | "id": "wkaCDOGapMyl"
174 | },
175 | "source": [
176 | "As you have seen, the `shape` of a tensor provides the number of elements in each tensor dimension. The `shape` is quite useful, and we'll use it often. You can also use slicing to access subtensors within a higher-rank tensor:"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": null,
182 | "metadata": {
183 | "id": "FhaufyObuLEG"
184 | },
185 | "outputs": [],
186 | "source": [
187 | "row_vector = matrix[1]\n",
188 | "column_vector = matrix[:, 1]\n",
189 | "scalar = matrix[0, 1]\n",
190 | "\n",
191 | "print(f\"`row_vector`: {row_vector}\")\n",
192 | "print(f\"`column_vector`: {column_vector}\")\n",
193 | "print(f\"`scalar`: {scalar}\")"
194 | ]
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {
199 | "id": "iD3VO-LZYZ2z"
200 | },
201 | "source": [
202 | "## 1.2 Computations on Tensors\n",
203 | "\n",
204 | "A convenient way to think about and visualize computations in a machine learning framework like PyTorch is in terms of graphs. We can define this graph in terms of tensors, which hold data, and the mathematical operations that act on these tensors in some order. Let's look at a simple example, and define this computation using PyTorch:\n",
205 | "\n",
206 | ""
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": null,
212 | "metadata": {
213 | "id": "X_YJrZsxYZ2z"
214 | },
215 | "outputs": [],
216 | "source": [
217 | "# Create the nodes in the graph and initialize values\n",
218 | "a = torch.tensor(15)\n",
219 | "b = torch.tensor(61)\n",
220 | "\n",
221 | "# Add them!\n",
222 | "c1 = torch.add(a, b)\n",
223 | "c2 = a + b # PyTorch overrides the \"+\" operation so that it is able to act on Tensors\n",
224 | "print(f\"c1: {c1}\")\n",
225 | "print(f\"c2: {c2}\")\n"
226 | ]
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "metadata": {
231 | "id": "Mbfv_QOiYZ23"
232 | },
233 | "source": [
234 | "Notice how we've created a computation graph consisting of PyTorch operations, and how the output is a tensor with value 76 -- we've just created a computation graph consisting of operations, and it's executed them and given us back the result.\n",
235 | "\n",
236 | "Now let's consider a slightly more complicated example:\n",
237 | "\n",
238 | "\n",
239 | "\n",
240 | "Here, we take two inputs, `a, b`, and compute an output `e`. Each node in the graph represents an operation that takes some input, does some computation, and passes its output to another node.\n",
241 | "\n",
242 | "Let's define a simple function in PyTorch to construct this computation function:"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": null,
248 | "metadata": {
249 | "id": "PJnfzpWyYZ23",
250 | "scrolled": true
251 | },
252 | "outputs": [],
253 | "source": [
254 | "### Defining Tensor computations ###\n",
255 | "\n",
256 | "# Construct a simple computation function\n",
257 | "def func(a, b):\n",
258 | " '''TODO: Define the operation for c, d, e.'''\n",
259 | " c = # TODO\n",
260 | " d = # TODO\n",
261 | " e = # TODO\n",
262 | " return e\n"
263 | ]
264 | },
265 | {
266 | "cell_type": "markdown",
267 | "metadata": {
268 | "id": "AwrRfDMS2-oy"
269 | },
270 | "source": [
271 | "Now, we can call this function to execute the computation graph given some inputs `a,b`:"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {
278 | "id": "pnwsf8w2uF7p"
279 | },
280 | "outputs": [],
281 | "source": [
282 | "# Consider example values for a,b\n",
283 | "a, b = 1.5, 2.5\n",
284 | "# Execute the computation\n",
285 | "e_out = func(a, b)\n",
286 | "print(f\"e_out: {e_out}\")"
287 | ]
288 | },
289 | {
290 | "cell_type": "markdown",
291 | "metadata": {
292 | "id": "6HqgUIUhYZ29"
293 | },
294 | "source": [
295 | "Notice how our output is a tensor with value defined by the output of the computation, and that the output has no shape as it is a single scalar value."
296 | ]
297 | },
298 | {
299 | "cell_type": "markdown",
300 | "metadata": {
301 | "id": "1h4o9Bb0YZ29"
302 | },
303 | "source": [
304 | "## 1.3 Neural networks in PyTorch\n",
305 | "We can also define neural networks in PyTorch. PyTorch uses [``torch.nn.Module``](https://pytorch.org/docs/stable/generated/torch.nn.Module.html), which serves as a base class for all neural network modules in PyTorch and thus provides a framework for building and training neural networks.\n",
306 | "\n",
307 | "Let's consider the example of a simple perceptron defined by just one dense (aka fully-connected or linear) layer: $ y = \\sigma(Wx + b) $, where $W$ represents a matrix of weights, $b$ is a bias, $x$ is the input, $\\sigma$ is the sigmoid activation function, and $y$ is the output.\n",
308 | "\n",
309 | "\n",
310 | "\n",
311 | "We will use `torch.nn.Module` to define layers -- the building blocks of neural networks. Layers implement common neural networks operations. In PyTorch, when we implement a layer, we subclass `nn.Module` and define the parameters of the layer as attributes of our new class. We also define and override a function [``forward``](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.forward), which will define the forward pass computation that is performed at every step. All classes subclassing `nn.Module` should override the `forward` function.\n",
312 | "\n",
313 | "Let's write a dense layer class to implement a perceptron defined above."
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": null,
319 | "metadata": {
320 | "id": "HutbJk-1kHPh"
321 | },
322 | "outputs": [],
323 | "source": [
324 | "### Defining a dense layer ###\n",
325 | "\n",
326 | "# num_inputs: number of input nodes\n",
327 | "# num_outputs: number of output nodes\n",
328 | "# x: input to the layer\n",
329 | "\n",
330 | "class OurDenseLayer(torch.nn.Module):\n",
331 | " def __init__(self, num_inputs, num_outputs):\n",
332 | " super(OurDenseLayer, self).__init__()\n",
333 | " # Define and initialize parameters: a weight matrix W and bias b\n",
334 | " # Note that the parameter initialize is random!\n",
335 | " self.W = torch.nn.Parameter(torch.randn(num_inputs, num_outputs))\n",
336 | " self.bias = torch.nn.Parameter(torch.randn(num_outputs))\n",
337 | "\n",
338 | " def forward(self, x):\n",
339 | " '''TODO: define the operation for z (hint: use torch.matmul).'''\n",
340 | " z = # TODO\n",
341 | "\n",
342 | " '''TODO: define the operation for out (hint: use torch.sigmoid).'''\n",
343 | " y = # TODO\n",
344 | " return y\n"
345 | ]
346 | },
347 | {
348 | "cell_type": "markdown",
349 | "metadata": {
350 | "id": "GqeEbn959hV_"
351 | },
352 | "source": [
353 | "Now, let's test the output of our layer."
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": null,
359 | "metadata": {
360 | "id": "2yxjCPa69hV_"
361 | },
362 | "outputs": [],
363 | "source": [
364 | "# Define a layer and test the output!\n",
365 | "num_inputs = 2\n",
366 | "num_outputs = 3\n",
367 | "layer = OurDenseLayer(num_inputs, num_outputs)\n",
368 | "x_input = torch.tensor([[1, 2.]])\n",
369 | "y = layer(x_input)\n",
370 | "\n",
371 | "print(f\"input shape: {x_input.shape}\")\n",
372 | "print(f\"output shape: {y.shape}\")\n",
373 | "print(f\"output result: {y}\")"
374 | ]
375 | },
376 | {
377 | "cell_type": "markdown",
378 | "metadata": {
379 | "id": "Jt1FgM7qYZ3D"
380 | },
381 | "source": [
382 | "Conveniently, PyTorch has defined a number of ```nn.Modules``` (or Layers) that are commonly used in neural networks, for example a [```nn.Linear```](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) or [`nn.Sigmoid`](https://pytorch.org/docs/stable/generated/torch.nn.Sigmoid.html) module.\n",
383 | "\n",
384 | "Now, instead of using a single ```Module``` to define our simple neural network, we'll use the [`nn.Sequential`](https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html) module from PyTorch and a single [`nn.Linear` ](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) layer to define our network. With the `Sequential` API, you can readily create neural networks by stacking together layers like building blocks."
385 | ]
386 | },
387 | {
388 | "cell_type": "code",
389 | "execution_count": null,
390 | "metadata": {
391 | "id": "7WXTpmoL6TDz"
392 | },
393 | "outputs": [],
394 | "source": [
395 | "### Defining a neural network using the PyTorch Sequential API ###\n",
396 | "\n",
397 | "# define the number of inputs and outputs\n",
398 | "n_input_nodes = 2\n",
399 | "n_output_nodes = 3\n",
400 | "\n",
401 | "# Define the model\n",
402 | "'''TODO: Use the Sequential API to define a neural network with a\n",
403 | " single linear (dense!) layer, followed by non-linearity to compute z'''\n",
404 | "model = nn.Sequential( ''' TODO ''' )\n"
405 | ]
406 | },
407 | {
408 | "cell_type": "markdown",
409 | "metadata": {
410 | "id": "HDGcwYfUyR-U"
411 | },
412 | "source": [
413 | "We've defined our model using the Sequential API. Now, we can test it out using an example input:"
414 | ]
415 | },
416 | {
417 | "cell_type": "code",
418 | "execution_count": null,
419 | "metadata": {
420 | "id": "zKhp6XqCFFa0"
421 | },
422 | "outputs": [],
423 | "source": [
424 | "# Test the model with example input\n",
425 | "x_input = torch.tensor([[1, 2.]])\n",
426 | "model_output = model(x_input)\n",
427 | "print(f\"input shape: {x_input.shape}\")\n",
428 | "print(f\"output shape: {y.shape}\")\n",
429 | "print(f\"output result: {y}\")"
430 | ]
431 | },
432 | {
433 | "cell_type": "markdown",
434 | "metadata": {
435 | "id": "596NvsOOtr9F"
436 | },
437 | "source": [
438 | "With PyTorch, we can create more flexible models by subclassing [`nn.Module`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html). The `nn.Module` class allows us to group layers together flexibly to define new architectures.\n",
439 | "\n",
440 | "As we saw earlier with `OurDenseLayer`, we can subclass `nn.Module` to create a class for our model, and then define the forward pass through the network using the `forward` function. Subclassing affords the flexibility to define custom layers, custom training loops, custom activation functions, and custom models. Let's define the same neural network model as above (i.e., Linear layer with an activation function after it), now using subclassing and using PyTorch's built in linear layer from `nn.Linear`."
441 | ]
442 | },
443 | {
444 | "cell_type": "code",
445 | "execution_count": null,
446 | "metadata": {
447 | "id": "K4aCflPVyViD"
448 | },
449 | "outputs": [],
450 | "source": [
451 | "### Defining a model using subclassing ###\n",
452 | "\n",
453 | "class LinearWithSigmoidActivation(nn.Module):\n",
454 | " def __init__(self, num_inputs, num_outputs):\n",
455 | " super(LinearWithSigmoidActivation, self).__init__()\n",
456 | " '''TODO: define a model with a single Linear layer and sigmoid activation.'''\n",
457 | " self.linear = '''TODO: linear layer'''\n",
458 | " self.activation = '''TODO: sigmoid activation'''\n",
459 | "\n",
460 | " def forward(self, inputs):\n",
461 | " linear_output = self.linear(inputs)\n",
462 | " output = self.activation(linear_output)\n",
463 | " return output\n"
464 | ]
465 | },
466 | {
467 | "cell_type": "markdown",
468 | "metadata": {
469 | "id": "goKCQ9dEGzRn"
470 | },
471 | "source": [
472 | "Let's test out our new model, using an example input, setting `n_input_nodes=2` and `n_output_nodes=3` as before."
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": null,
478 | "metadata": {
479 | "id": "V-eNhSyRG6hl"
480 | },
481 | "outputs": [],
482 | "source": [
483 | "n_input_nodes = 2\n",
484 | "n_output_nodes = 3\n",
485 | "model = LinearWithSigmoidActivation(n_input_nodes, n_output_nodes)\n",
486 | "x_input = torch.tensor([[1, 2.]])\n",
487 | "y = model(x_input)\n",
488 | "print(f\"input shape: {x_input.shape}\")\n",
489 | "print(f\"output shape: {y.shape}\")\n",
490 | "print(f\"output result: {y}\")"
491 | ]
492 | },
493 | {
494 | "cell_type": "markdown",
495 | "metadata": {
496 | "id": "HTIFMJLAzsyE"
497 | },
498 | "source": [
499 | "Importantly, `nn.Module` affords us a lot of flexibility to define custom models. For example, we can use boolean arguments in the `forward` function to specify different network behaviors, for example different behaviors during training and inference. Let's suppose under some instances we want our network to simply output the input, without any perturbation. We define a boolean argument `isidentity` to control this behavior:"
500 | ]
501 | },
502 | {
503 | "cell_type": "code",
504 | "execution_count": null,
505 | "metadata": {
506 | "id": "P7jzGX5D1xT5"
507 | },
508 | "outputs": [],
509 | "source": [
510 | "### Custom behavior with subclassing nn.Module ###\n",
511 | "\n",
512 | "class LinearButSometimesIdentity(nn.Module):\n",
513 | " def __init__(self, num_inputs, num_outputs):\n",
514 | " super(LinearButSometimesIdentity, self).__init__()\n",
515 | " self.linear = nn.Linear(num_inputs, num_outputs)\n",
516 | "\n",
517 | " '''TODO: Implement the behavior where the network outputs the input, unchanged,\n",
518 | " under control of the isidentity argument.'''\n",
519 | " def forward(self, inputs, isidentity=False):\n",
520 | " ''' TODO '''\n"
521 | ]
522 | },
523 | {
524 | "cell_type": "markdown",
525 | "metadata": {
526 | "id": "Ku4rcCGx5T3y"
527 | },
528 | "source": [
529 | "Let's test this behavior:"
530 | ]
531 | },
532 | {
533 | "cell_type": "code",
534 | "execution_count": null,
535 | "metadata": {
536 | "id": "NzC0mgbk5dp2"
537 | },
538 | "outputs": [],
539 | "source": [
540 | "# Test the IdentityModel\n",
541 | "model = LinearButSometimesIdentity(num_inputs=2, num_outputs=3)\n",
542 | "x_input = torch.tensor([[1, 2.]])\n",
543 | "\n",
544 | "'''TODO: pass the input into the model and call with and without the input identity option.'''\n",
545 | "out_with_linear = # TODO\n",
546 | "\n",
547 | "out_with_identity = # TODO\n",
548 | "\n",
549 | "print(f\"input: {x_input}\")\n",
550 | "print(\"Network linear output: {}; network identity output: {}\".format(out_with_linear, out_with_identity))"
551 | ]
552 | },
553 | {
554 | "cell_type": "markdown",
555 | "metadata": {
556 | "id": "7V1dEqdk6VI5"
557 | },
558 | "source": [
559 | "Now that we have learned how to define layers and models in PyTorch using both the Sequential API and subclassing `nn.Module`, we're ready to turn our attention to how to actually implement network training with backpropagation."
560 | ]
561 | },
562 | {
563 | "cell_type": "markdown",
564 | "metadata": {
565 | "id": "dQwDhKn8kbO2"
566 | },
567 | "source": [
568 | "## 1.4 Automatic Differentiation in PyTorch\n",
569 | "\n",
570 | "In PyTorch, [`torch.autograd`](https://pytorch.org/docs/stable/autograd.html) is used for [automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation), which is critical for training deep learning models with [backpropagation](https://en.wikipedia.org/wiki/Backpropagation).\n",
571 | "\n",
572 | "We will use the PyTorch [`.backward()`](https://pytorch.org/docs/stable/generated/torch.Tensor.backward.html) method to trace operations for computing gradients. On a tensor, the [`requires_grad`](https://pytorch.org/docs/stable/generated/torch.Tensor.requires_grad_.html) attribute controls whether autograd should record operations on that tensor. When a forward pass is made through the network, PyTorch builds a computational graph dynamically; then, to compute the gradient, the `backward()` method is called to perform backpropagation.\n",
573 | "\n",
574 | "Let's compute the gradient of $ y = x^2 $:"
575 | ]
576 | },
577 | {
578 | "cell_type": "code",
579 | "execution_count": null,
580 | "metadata": {
581 | "id": "tdkqk8pw5yJM"
582 | },
583 | "outputs": [],
584 | "source": [
585 | "### Gradient computation ###\n",
586 | "\n",
587 | "# y = x^2\n",
588 | "# Example: x = 3.0\n",
589 | "x = torch.tensor(3.0, requires_grad=True)\n",
590 | "y = x ** 2\n",
591 | "y.backward() # Compute the gradient\n",
592 | "\n",
593 | "dy_dx = x.grad\n",
594 | "print(\"dy_dx of y=x^2 at x=3.0 is: \", dy_dx)\n",
595 | "assert dy_dx == 6.0\n"
596 | ]
597 | },
598 | {
599 | "cell_type": "markdown",
600 | "metadata": {
601 | "id": "JhU5metS5xF3"
602 | },
603 | "source": [
604 | "In training neural networks, we use differentiation and stochastic gradient descent (SGD) to optimize a loss function. Now that we have a sense of how PyTorch's autograd can be used to compute and access derivatives, we will look at an example where we use automatic differentiation and SGD to find the minimum of $ L=(x-x_f)^2 $. Here $x_f$ is a variable for a desired value we are trying to optimize for; $L$ represents a loss that we are trying to minimize. While we can clearly solve this problem analytically ($ x_{min}=x_f $), considering how we can compute this using PyTorch's autograd sets us up nicely for future labs where we use gradient descent to optimize entire neural network losses."
605 | ]
606 | },
607 | {
608 | "cell_type": "code",
609 | "execution_count": null,
610 | "metadata": {
611 | "attributes": {
612 | "classes": [
613 | "py"
614 | ],
615 | "id": ""
616 | },
617 | "id": "7g1yWiSXqEf-"
618 | },
619 | "outputs": [],
620 | "source": [
621 | "### Function minimization with autograd and gradient descent ###\n",
622 | "\n",
623 | "# Initialize a random value for our intial x\n",
624 | "x = torch.randn(1)\n",
625 | "print(f\"Initializing x={x.item()}\")\n",
626 | "\n",
627 | "learning_rate = 1e-2 # Learning rate\n",
628 | "history = []\n",
629 | "x_f = 4 # Target value\n",
630 | "\n",
631 | "\n",
632 | "# We will run gradient descent for a number of iterations. At each iteration, we compute the loss,\n",
633 | "# compute the derivative of the loss with respect to x, and perform the update.\n",
634 | "for i in range(500):\n",
635 | " x = torch.tensor([x], requires_grad=True)\n",
636 | "\n",
637 | " # TODO: Compute the loss as the square of the difference between x and x_f\n",
638 | " loss = # TODO\n",
639 | "\n",
640 | " # Backpropagate through the loss to compute gradients\n",
641 | " loss.backward()\n",
642 | "\n",
643 | " # Update x with gradient descent\n",
644 | " x = x.item() - learning_rate * x.grad\n",
645 | "\n",
646 | " history.append(x.item())\n",
647 | "\n",
648 | "# Plot the evolution of x as we optimize toward x_f!\n",
649 | "plt.plot(history)\n",
650 | "plt.plot([0, 500], [x_f, x_f])\n",
651 | "plt.legend(('Predicted', 'True'))\n",
652 | "plt.xlabel('Iteration')\n",
653 | "plt.ylabel('x value')\n",
654 | "plt.show()\n"
655 | ]
656 | },
657 | {
658 | "cell_type": "markdown",
659 | "metadata": {
660 | "id": "pC7czCwk3ceH"
661 | },
662 | "source": [
663 | "Now, we have covered the fundamental concepts of PyTorch -- tensors, operations, neural networks, and automatic differentiation. Fire!!\n"
664 | ]
665 | }
666 | ],
667 | "metadata": {
668 | "accelerator": "GPU",
669 | "colab": {
670 | "collapsed_sections": [
671 | "WBk0ZDWY-ff8"
672 | ],
673 | "name": "PT_Part1_Intro.ipynb",
674 | "provenance": []
675 | },
676 | "kernelspec": {
677 | "display_name": "Python 3",
678 | "language": "python",
679 | "name": "python3"
680 | },
681 | "language_info": {
682 | "codemirror_mode": {
683 | "name": "ipython",
684 | "version": 3
685 | },
686 | "file_extension": ".py",
687 | "mimetype": "text/x-python",
688 | "name": "python",
689 | "nbconvert_exporter": "python",
690 | "pygments_lexer": "ipython3",
691 | "version": "3.10.6"
692 | },
693 | "vscode": {
694 | "interpreter": {
695 | "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
696 | }
697 | }
698 | },
699 | "nbformat": 4,
700 | "nbformat_minor": 0
701 | }
702 |
--------------------------------------------------------------------------------
/lab1/README.md:
--------------------------------------------------------------------------------
1 | # MIT 6.S191 Lab 1: Intro to Deep Learning in Python and Music Generation with RNNs
2 |
3 | 
4 | ## Part 1: Intro to Deep Learning in Python -- TensorFlow and PyTorch
5 | TensorFlow ("TF") and PyTorch ("PT") are software libraries used in machine learning. Here we'll learn how computations are represented and how to define simple neural networks in TensorFlow and PyTorch. The TensorFlow labs will be prefixed by `TF`; PyTorch labs will be prefixed by `PT`.
6 |
7 | TensorFlow uses a high-level API called [Keras](https://www.tensorflow.org/guide/keras) that provides a powerful, intuitive framework for building and training deep learning models. In the TensorFlow Intro (`TF_Part1_Intro`) you will learn the basics of computations in TensorFlow, the Keras API, and TensorFlow 2.0's imperative execution style.
8 |
9 | [PyTorch](https://pytorch.org/) is a popular deep learning library known for its flexibility, ease of use, and dynamic execution. In the PyTorch Intro (`PT_Part1_Intro`) you will learn the basics of computations in PyTorch and how to define neural networks using either the sequential API and `torch.nn.Module`.
10 |
11 | ## Part 2: Music Generation with RNNs
12 | In the second portion of the lab, we will play around with building a Recurrent Neural Network (RNN) for music generation. We will be using a "character RNN" to predict the next character of sheet music in ABC notation. Finally, we will sample from this model to generate a brand new music file that has never been heard before!
13 |
14 |
--------------------------------------------------------------------------------
/lab1/TF_Part1_Intro.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "WBk0ZDWY-ff8"
7 | },
8 | "source": [
9 | "\n",
18 | "\n",
19 | "# Copyright Information\n"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {
26 | "id": "3eI6DUic-6jo"
27 | },
28 | "outputs": [],
29 | "source": [
30 | "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n",
31 | "#\n",
32 | "# Licensed under the MIT License. You may not use this file except in compliance\n",
33 | "# with the License. Use and/or modification of this code outside of MIT Introduction\n",
34 | "# to Deep Learning must reference:\n",
35 | "#\n",
36 | "# © MIT Introduction to Deep Learning\n",
37 | "# http://introtodeeplearning.com\n",
38 | "#"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {
44 | "id": "57knM8jrYZ2t"
45 | },
46 | "source": [
47 | "# Lab 1: Intro to TensorFlow and Music Generation with RNNs\n",
48 | "\n",
49 | "In this lab, you'll get exposure to using TensorFlow and learn how it can be used for solving deep learning tasks. Go through the code and run each cell. Along the way, you'll encounter several ***TODO*** blocks -- follow the instructions to fill them out before running those cells and continuing.\n",
50 | "\n",
51 | "\n",
52 | "# Part 1: Intro to TensorFlow\n",
53 | "\n",
54 | "## 0.1 Install TensorFlow\n",
55 | "\n",
56 | "TensorFlow is a software library extensively used in machine learning. Here we'll learn how computations are represented and how to define a simple neural network in TensorFlow. For all the TensorFlow labs in Introduction to Deep Learning 2025, we'll be using TensorFlow 2, which affords great flexibility and the ability to imperatively execute operations, just like in Python. You'll notice that TensorFlow 2 is quite similar to Python in its syntax and imperative execution. Let's install TensorFlow and a couple of dependencies.\n"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": null,
62 | "metadata": {
63 | "id": "LkaimNJfYZ2w"
64 | },
65 | "outputs": [],
66 | "source": [
67 | "import tensorflow as tf\n",
68 | "\n",
69 | "# Download and import the MIT Introduction to Deep Learning package\n",
70 | "!pip install mitdeeplearning --quiet\n",
71 | "import mitdeeplearning as mdl\n",
72 | "\n",
73 | "import numpy as np\n",
74 | "import matplotlib.pyplot as plt"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {
80 | "id": "2QNMcdP4m3Vs"
81 | },
82 | "source": [
83 | "## 1.1 Why is TensorFlow called TensorFlow?\n",
84 | "\n",
85 | "TensorFlow is called 'TensorFlow' because it handles the flow (node/mathematical operation) of Tensors, which are data structures that you can think of as multi-dimensional arrays. Tensors are represented as n-dimensional arrays of base dataypes such as a string or integer -- they provide a way to generalize vectors and matrices to higher dimensions.\n",
86 | "\n",
87 | "The ```shape``` of a Tensor defines its number of dimensions and the size of each dimension. The ```rank``` of a Tensor provides the number of dimensions (n-dimensions) -- you can also think of this as the Tensor's order or degree.\n",
88 | "\n",
89 | "Let's first look at 0-d Tensors, of which a scalar is an example:"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": null,
95 | "metadata": {
96 | "id": "tFxztZQInlAB"
97 | },
98 | "outputs": [],
99 | "source": [
100 | "sport = tf.constant(\"Tennis\", tf.string)\n",
101 | "number = tf.constant(1.41421356237, tf.float64)\n",
102 | "\n",
103 | "print(\"`sport` is a {}-d Tensor\".format(tf.rank(sport).numpy()))\n",
104 | "print(\"`number` is a {}-d Tensor\".format(tf.rank(number).numpy()))"
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {
110 | "id": "-dljcPUcoJZ6"
111 | },
112 | "source": [
113 | "Vectors and lists can be used to create 1-d Tensors:"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {
120 | "id": "oaHXABe8oPcO"
121 | },
122 | "outputs": [],
123 | "source": [
124 | "sports = tf.constant([\"Tennis\", \"Basketball\"], tf.string)\n",
125 | "numbers = tf.constant([3.141592, 1.414213, 2.71821], tf.float64)\n",
126 | "\n",
127 | "print(\"`sports` is a {}-d Tensor with shape: {}\".format(tf.rank(sports).numpy(), tf.shape(sports)))\n",
128 | "print(\"`numbers` is a {}-d Tensor with shape: {}\".format(tf.rank(numbers).numpy(), tf.shape(numbers)))"
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "metadata": {
134 | "id": "gvffwkvtodLP"
135 | },
136 | "source": [
137 | "Next we consider creating 2-d (i.e., matrices) and higher-rank Tensors. For examples, in future labs involving image processing and computer vision, we will use 4-d Tensors. Here the dimensions correspond to the number of example images in our batch, image height, image width, and the number of color channels."
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {
144 | "id": "tFeBBe1IouS3"
145 | },
146 | "outputs": [],
147 | "source": [
148 | "### Defining higher-order Tensors ###\n",
149 | "\n",
150 | "'''TODO: Define a 2-d Tensor'''\n",
151 | "matrix = # TODO\n",
152 | "\n",
153 | "assert isinstance(matrix, tf.Tensor), \"matrix must be a tf Tensor object\"\n",
154 | "assert tf.rank(matrix).numpy() == 2"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {
161 | "id": "Zv1fTn_Ya_cz"
162 | },
163 | "outputs": [],
164 | "source": [
165 | "'''TODO: Define a 4-d Tensor.'''\n",
166 | "# Use tf.zeros to initialize a 4-d Tensor of zeros with size 10 x 256 x 256 x 3.\n",
167 | "# You can think of this as 10 images where each image is RGB 256 x 256.\n",
168 | "images = # TODO\n",
169 | "\n",
170 | "assert isinstance(images, tf.Tensor), \"matrix must be a tf Tensor object\"\n",
171 | "assert tf.rank(images).numpy() == 4, \"matrix must be of rank 4\"\n",
172 | "assert tf.shape(images).numpy().tolist() == [10, 256, 256, 3], \"matrix is incorrect shape\""
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {
178 | "id": "wkaCDOGapMyl"
179 | },
180 | "source": [
181 | "As you have seen, the ```shape``` of a Tensor provides the number of elements in each Tensor dimension. The ```shape``` is quite useful, and we'll use it often. You can also use slicing to access subtensors within a higher-rank Tensor:"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {
188 | "id": "FhaufyObuLEG"
189 | },
190 | "outputs": [],
191 | "source": [
192 | "row_vector = matrix[1]\n",
193 | "column_vector = matrix[:,1]\n",
194 | "scalar = matrix[0, 1]\n",
195 | "\n",
196 | "print(\"`row_vector`: {}\".format(row_vector.numpy()))\n",
197 | "print(\"`column_vector`: {}\".format(column_vector.numpy()))\n",
198 | "print(\"`scalar`: {}\".format(scalar.numpy()))"
199 | ]
200 | },
201 | {
202 | "cell_type": "markdown",
203 | "metadata": {
204 | "id": "iD3VO-LZYZ2z"
205 | },
206 | "source": [
207 | "## 1.2 Computations on Tensors\n",
208 | "\n",
209 | "A convenient way to think about and visualize computations in TensorFlow is in terms of graphs. We can define this graph in terms of Tensors, which hold data, and the mathematical operations that act on these Tensors in some order. Let's look at a simple example, and define this computation using TensorFlow:\n",
210 | "\n",
211 | ""
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": null,
217 | "metadata": {
218 | "id": "X_YJrZsxYZ2z"
219 | },
220 | "outputs": [],
221 | "source": [
222 | "# Create the nodes in the graph, and initialize values\n",
223 | "a = tf.constant(15)\n",
224 | "b = tf.constant(61)\n",
225 | "\n",
226 | "# Add them!\n",
227 | "c1 = tf.add(a,b)\n",
228 | "c2 = a + b # TensorFlow overrides the \"+\" operation so that it is able to act on Tensors\n",
229 | "print(c1)\n",
230 | "print(c2)"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {
236 | "id": "Mbfv_QOiYZ23"
237 | },
238 | "source": [
239 | "Notice how we've created a computation graph consisting of TensorFlow operations, and how the output is a Tensor with value 76 -- we've just created a computation graph consisting of operations, and it's executed them and given us back the result.\n",
240 | "\n",
241 | "Now let's consider a slightly more complicated example:\n",
242 | "\n",
243 | "\n",
244 | "\n",
245 | "Here, we take two inputs, `a, b`, and compute an output `e`. Each node in the graph represents an operation that takes some input, does some computation, and passes its output to another node.\n",
246 | "\n",
247 | "Let's define a simple function in TensorFlow to construct this computation function:"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": null,
253 | "metadata": {
254 | "id": "PJnfzpWyYZ23",
255 | "scrolled": true
256 | },
257 | "outputs": [],
258 | "source": [
259 | "### Defining Tensor computations ###\n",
260 | "\n",
261 | "# Construct a simple computation function\n",
262 | "def func(a,b):\n",
263 | " '''TODO: Define the operation for c, d, e (use tf.add, tf.subtract, tf.multiply).'''\n",
264 | " c = # TODO\n",
265 | " d = # TODO\n",
266 | " e = # TODO\n",
267 | " return e"
268 | ]
269 | },
270 | {
271 | "cell_type": "markdown",
272 | "metadata": {
273 | "id": "AwrRfDMS2-oy"
274 | },
275 | "source": [
276 | "Now, we can call this function to execute the computation graph given some inputs `a,b`:"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": null,
282 | "metadata": {
283 | "id": "pnwsf8w2uF7p"
284 | },
285 | "outputs": [],
286 | "source": [
287 | "# Consider example values for a,b\n",
288 | "a, b = 1.5, 2.5\n",
289 | "# Execute the computation\n",
290 | "e_out = func(a,b)\n",
291 | "print(e_out)"
292 | ]
293 | },
294 | {
295 | "cell_type": "markdown",
296 | "metadata": {
297 | "id": "6HqgUIUhYZ29"
298 | },
299 | "source": [
300 | "Notice how our output is a Tensor with value defined by the output of the computation, and that the output has no shape as it is a single scalar value."
301 | ]
302 | },
303 | {
304 | "cell_type": "markdown",
305 | "metadata": {
306 | "id": "1h4o9Bb0YZ29"
307 | },
308 | "source": [
309 | "## 1.3 Neural networks in TensorFlow\n",
310 | "We can also define neural networks in TensorFlow. TensorFlow uses a high-level API called [Keras](https://www.tensorflow.org/guide/keras) that provides a powerful, intuitive framework for building and training deep learning models.\n",
311 | "\n",
312 | "Let's first consider the example of a simple perceptron defined by just one dense layer: $ y = \\sigma(Wx + b)$, where $W$ represents a matrix of weights, $b$ is a bias, $x$ is the input, $\\sigma$ is the sigmoid activation function, and $y$ is the output. We can also visualize this operation using a graph:\n",
313 | "\n",
314 | "\n",
315 | "\n",
316 | "Tensors can flow through abstract types called [```Layers```](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) -- the building blocks of neural networks. ```Layers``` implement common neural networks operations, and are used to update weights, compute losses, and define inter-layer connectivity. We will first define a ```Layer``` to implement the simple perceptron defined above."
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": null,
322 | "metadata": {
323 | "id": "HutbJk-1kHPh"
324 | },
325 | "outputs": [],
326 | "source": [
327 | "### Defining a network Layer ###\n",
328 | "\n",
329 | "# n_output_nodes: number of output nodes\n",
330 | "# input_shape: shape of the input\n",
331 | "# x: input to the layer\n",
332 | "\n",
333 | "class OurDenseLayer(tf.keras.layers.Layer):\n",
334 | " def __init__(self, n_output_nodes):\n",
335 | " super(OurDenseLayer, self).__init__()\n",
336 | " self.n_output_nodes = n_output_nodes\n",
337 | "\n",
338 | " def build(self, input_shape):\n",
339 | " d = int(input_shape[-1])\n",
340 | " # Define and initialize parameters: a weight matrix W and bias b\n",
341 | " # Note that parameter initialization is random!\n",
342 | " self.W = self.add_weight(\"weight\", shape=[d, self.n_output_nodes]) # note the dimensionality\n",
343 | " self.b = self.add_weight(\"bias\", shape=[1, self.n_output_nodes]) # note the dimensionality\n",
344 | "\n",
345 | " def call(self, x):\n",
346 | " '''TODO: define the operation for z (hint: use tf.matmul)'''\n",
347 | " z = # TODO\n",
348 | "\n",
349 | " '''TODO: define the operation for out (hint: use tf.sigmoid)'''\n",
350 | " y = # TODO\n",
351 | " return y\n",
352 | "\n",
353 | "# Since layer parameters are initialized randomly, we will set a random seed for reproducibility\n",
354 | "tf.keras.utils.set_random_seed(1)\n",
355 | "layer = OurDenseLayer(3)\n",
356 | "layer.build((1,2))\n",
357 | "x_input = tf.constant([[1,2.]], shape=(1,2))\n",
358 | "y = layer.call(x_input)\n",
359 | "\n",
360 | "# test the output!\n",
361 | "print(y.numpy())\n",
362 | "mdl.lab1.test_custom_dense_layer_output(y)"
363 | ]
364 | },
365 | {
366 | "cell_type": "markdown",
367 | "metadata": {
368 | "id": "Jt1FgM7qYZ3D"
369 | },
370 | "source": [
371 | "Conveniently, TensorFlow has defined a number of ```Layers``` that are commonly used in neural networks, for example a [```Dense```](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense?version=stable). Now, instead of using a single ```Layer``` to define our simple neural network, we'll use the [`Sequential`](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/Sequential) model from Keras and a single [`Dense` ](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dense) layer to define our network. With the `Sequential` API, you can readily create neural networks by stacking together layers like building blocks."
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": null,
377 | "metadata": {
378 | "id": "7WXTpmoL6TDz"
379 | },
380 | "outputs": [],
381 | "source": [
382 | "### Defining a neural network using the Sequential API ###\n",
383 | "\n",
384 | "# Import relevant packages\n",
385 | "from tensorflow.keras import Sequential\n",
386 | "from tensorflow.keras.layers import Dense\n",
387 | "\n",
388 | "# Define the number of outputs\n",
389 | "n_output_nodes = 3\n",
390 | "\n",
391 | "# First define the model\n",
392 | "model = Sequential()\n",
393 | "\n",
394 | "'''TODO: Define a dense (fully connected) layer to compute z'''\n",
395 | "# Remember: dense layers are defined by the parameters W and b!\n",
396 | "# You can read more about the initialization of W and b in the TF documentation :)\n",
397 | "# https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense?version=stable\n",
398 | "dense_layer = # TODO\n",
399 | "\n",
400 | "# Add the dense layer to the model\n",
401 | "model.add(dense_layer)\n"
402 | ]
403 | },
404 | {
405 | "cell_type": "markdown",
406 | "metadata": {
407 | "id": "HDGcwYfUyR-U"
408 | },
409 | "source": [
410 | "That's it! We've defined our model using the Sequential API. Now, we can test it out using an example input:"
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": null,
416 | "metadata": {
417 | "id": "sg23OczByRDb"
418 | },
419 | "outputs": [],
420 | "source": [
421 | "# Test model with example input\n",
422 | "x_input = tf.constant([[1,2.]], shape=(1,2))\n",
423 | "\n",
424 | "'''TODO: feed input into the model and predict the output!'''\n",
425 | "model_output = # TODO\n",
426 | "print(model_output)"
427 | ]
428 | },
429 | {
430 | "cell_type": "markdown",
431 | "metadata": {
432 | "id": "596NvsOOtr9F"
433 | },
434 | "source": [
435 | "In addition to defining models using the `Sequential` API, we can also define neural networks by directly subclassing the [`Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model?version=stable) class, which groups layers together to enable model training and inference. The `Model` class captures what we refer to as a \"model\" or as a \"network\". Using Subclassing, we can create a class for our model, and then define the forward pass through the network using the `call` function. Subclassing affords the flexibility to define custom layers, custom training loops, custom activation functions, and custom models. Let's define the same neural network as above now using Subclassing rather than the `Sequential` model."
436 | ]
437 | },
438 | {
439 | "cell_type": "code",
440 | "execution_count": null,
441 | "metadata": {
442 | "id": "K4aCflPVyViD"
443 | },
444 | "outputs": [],
445 | "source": [
446 | "### Defining a model using subclassing ###\n",
447 | "\n",
448 | "from tensorflow.keras import Model\n",
449 | "from tensorflow.keras.layers import Dense\n",
450 | "\n",
451 | "class SubclassModel(tf.keras.Model):\n",
452 | "\n",
453 | " # In __init__, we define the Model's layers\n",
454 | " def __init__(self, n_output_nodes):\n",
455 | " super(SubclassModel, self).__init__()\n",
456 | " '''TODO: Our model consists of a single Dense layer. Define this layer.'''\n",
457 | " self.dense_layer = '''TODO: Dense Layer'''\n",
458 | "\n",
459 | " # In the call function, we define the Model's forward pass.\n",
460 | " def call(self, inputs):\n",
461 | " return self.dense_layer(inputs)"
462 | ]
463 | },
464 | {
465 | "cell_type": "markdown",
466 | "metadata": {
467 | "id": "U0-lwHDk4irB"
468 | },
469 | "source": [
470 | "Just like the model we built using the `Sequential` API, let's test out our `SubclassModel` using an example input.\n",
471 | "\n"
472 | ]
473 | },
474 | {
475 | "cell_type": "code",
476 | "execution_count": null,
477 | "metadata": {
478 | "id": "LhB34RA-4gXb"
479 | },
480 | "outputs": [],
481 | "source": [
482 | "n_output_nodes = 3\n",
483 | "model = SubclassModel(n_output_nodes)\n",
484 | "\n",
485 | "x_input = tf.constant([[1,2.]], shape=(1,2))\n",
486 | "\n",
487 | "print(model.call(x_input))"
488 | ]
489 | },
490 | {
491 | "cell_type": "markdown",
492 | "metadata": {
493 | "id": "HTIFMJLAzsyE"
494 | },
495 | "source": [
496 | "Importantly, Subclassing affords us a lot of flexibility to define custom models. For example, we can use boolean arguments in the `call` function to specify different network behaviors, for example different behaviors during training and inference. Let's suppose under some instances we want our network to simply output the input, without any perturbation. We define a boolean argument `isidentity` to control this behavior:"
497 | ]
498 | },
499 | {
500 | "cell_type": "code",
501 | "execution_count": null,
502 | "metadata": {
503 | "id": "P7jzGX5D1xT5"
504 | },
505 | "outputs": [],
506 | "source": [
507 | "### Defining a model using subclassing and specifying custom behavior ###\n",
508 | "\n",
509 | "from tensorflow.keras import Model\n",
510 | "from tensorflow.keras.layers import Dense\n",
511 | "\n",
512 | "class IdentityModel(tf.keras.Model):\n",
513 | "\n",
514 | " # As before, in __init__ we define the Model's layers\n",
515 | " # Since our desired behavior involves the forward pass, this part is unchanged\n",
516 | " def __init__(self, n_output_nodes):\n",
517 | " super(IdentityModel, self).__init__()\n",
518 | " self.dense_layer = tf.keras.layers.Dense(n_output_nodes, activation='sigmoid')\n",
519 | "\n",
520 | " '''TODO: Implement the behavior where the network outputs the input, unchanged, under control of the isidentity argument.'''\n",
521 | " def call(self, inputs, isidentity=False):\n",
522 | " ### TODO"
523 | ]
524 | },
525 | {
526 | "cell_type": "markdown",
527 | "metadata": {
528 | "id": "Ku4rcCGx5T3y"
529 | },
530 | "source": [
531 | "Let's test this behavior:"
532 | ]
533 | },
534 | {
535 | "cell_type": "code",
536 | "execution_count": null,
537 | "metadata": {
538 | "id": "NzC0mgbk5dp2"
539 | },
540 | "outputs": [],
541 | "source": [
542 | "n_output_nodes = 3\n",
543 | "model = IdentityModel(n_output_nodes)\n",
544 | "\n",
545 | "x_input = tf.constant([[1,2.]], shape=(1,2))\n",
546 | "'''TODO: pass the input into the model and call with and without the input identity option.'''\n",
547 | "out_activate = # TODO\n",
548 | "out_identity = # TODO\n",
549 | "\n",
550 | "print(\"Network output with activation: {}; network identity output: {}\".format(out_activate.numpy(), out_identity.numpy()))"
551 | ]
552 | },
553 | {
554 | "cell_type": "markdown",
555 | "metadata": {
556 | "id": "7V1dEqdk6VI5"
557 | },
558 | "source": [
559 | "Now that we have learned how to define `Layers` as well as neural networks in TensorFlow using both the `Sequential` and Subclassing APIs, we're ready to turn our attention to how to actually implement network training with backpropagation."
560 | ]
561 | },
562 | {
563 | "cell_type": "markdown",
564 | "metadata": {
565 | "id": "dQwDhKn8kbO2"
566 | },
567 | "source": [
568 | "## 1.4 Automatic differentiation in TensorFlow\n",
569 | "\n",
570 | "[Automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation)\n",
571 | "is one of the most important parts of TensorFlow and is the backbone of training with\n",
572 | "[backpropagation](https://en.wikipedia.org/wiki/Backpropagation). We will use the TensorFlow GradientTape [`tf.GradientTape`](https://www.tensorflow.org/api_docs/python/tf/GradientTape?version=stable) to trace operations for computing gradients later.\n",
573 | "\n",
574 | "When a forward pass is made through the network, all forward-pass operations get recorded to a \"tape\"; then, to compute the gradient, the tape is played backwards. By default, the tape is discarded after it is played backwards; this means that a particular `tf.GradientTape` can only\n",
575 | "compute one gradient, and subsequent calls throw a runtime error. However, we can compute multiple gradients over the same computation by creating a ```persistent``` gradient tape.\n",
576 | "\n",
577 | "First, we will look at how we can compute gradients using GradientTape and access them for computation. We define the simple function $ y = x^2$ and compute the gradient:"
578 | ]
579 | },
580 | {
581 | "cell_type": "code",
582 | "execution_count": null,
583 | "metadata": {
584 | "id": "tdkqk8pw5yJM"
585 | },
586 | "outputs": [],
587 | "source": [
588 | "### Gradient computation with GradientTape ###\n",
589 | "\n",
590 | "# y = x^2\n",
591 | "# Example: x = 3.0\n",
592 | "x = tf.Variable(3.0)\n",
593 | "\n",
594 | "# Initiate the gradient tape\n",
595 | "with tf.GradientTape() as tape:\n",
596 | " # Define the function\n",
597 | " y = x * x\n",
598 | "# Access the gradient -- derivative of y with respect to x\n",
599 | "dy_dx = tape.gradient(y, x)\n",
600 | "\n",
601 | "assert dy_dx.numpy() == 6.0"
602 | ]
603 | },
604 | {
605 | "cell_type": "markdown",
606 | "metadata": {
607 | "id": "JhU5metS5xF3"
608 | },
609 | "source": [
610 | "In training neural networks, we use differentiation and stochastic gradient descent (SGD) to optimize a loss function. Now that we have a sense of how `GradientTape` can be used to compute and access derivatives, we will look at an example where we use automatic differentiation and SGD to find the minimum of $L=(x-x_f)^2$. Here $x_f$ is a variable for a desired value we are trying to optimize for; $L$ represents a loss that we are trying to minimize. While we can clearly solve this problem analytically ($x_{min}=x_f$), considering how we can compute this using `GradientTape` sets us up nicely for future labs where we use gradient descent to optimize entire neural network losses."
611 | ]
612 | },
613 | {
614 | "cell_type": "code",
615 | "execution_count": null,
616 | "metadata": {
617 | "attributes": {
618 | "classes": [
619 | "py"
620 | ],
621 | "id": ""
622 | },
623 | "id": "7g1yWiSXqEf-"
624 | },
625 | "outputs": [],
626 | "source": [
627 | "### Function minimization with automatic differentiation and SGD ###\n",
628 | "\n",
629 | "# Initialize a random value for our initial x\n",
630 | "x = tf.Variable([tf.random.normal([1])])\n",
631 | "print(\"Initializing x={}\".format(x.numpy()))\n",
632 | "\n",
633 | "learning_rate = 1e-2 # learning rate for SGD\n",
634 | "history = []\n",
635 | "# Define the target value\n",
636 | "x_f = 4\n",
637 | "\n",
638 | "# We will run SGD for a number of iterations. At each iteration, we compute the loss,\n",
639 | "# compute the derivative of the loss with respect to x, and perform the SGD update.\n",
640 | "for i in range(500):\n",
641 | " with tf.GradientTape() as tape:\n",
642 | " '''TODO: define the loss as described above'''\n",
643 | " loss = # TODO\n",
644 | "\n",
645 | " # loss minimization using gradient tape\n",
646 | " grad = tape.gradient(loss, x) # compute the derivative of the loss with respect to x\n",
647 | " new_x = x - learning_rate*grad # sgd update\n",
648 | " x.assign(new_x) # update the value of x\n",
649 | " history.append(x.numpy()[0])\n",
650 | "\n",
651 | "# Plot the evolution of x as we optimize towards x_f!\n",
652 | "plt.plot(history)\n",
653 | "plt.plot([0, 500],[x_f,x_f])\n",
654 | "plt.legend(('Predicted', 'True'))\n",
655 | "plt.xlabel('Iteration')\n",
656 | "plt.ylabel('x value')"
657 | ]
658 | },
659 | {
660 | "cell_type": "markdown",
661 | "metadata": {
662 | "id": "pC7czCwk3ceH"
663 | },
664 | "source": [
665 | "`GradientTape` provides an extremely flexible framework for automatic differentiation. In order to back propagate errors through a neural network, we track forward passes on the Tape, use this information to determine the gradients, and then use these gradients for optimization using SGD.\n"
666 | ]
667 | }
668 | ],
669 | "metadata": {
670 | "accelerator": "GPU",
671 | "colab": {
672 | "collapsed_sections": [
673 | "WBk0ZDWY-ff8"
674 | ],
675 | "name": "TF_Part1_Intro.ipynb",
676 | "provenance": []
677 | },
678 | "kernelspec": {
679 | "display_name": "Python 3",
680 | "language": "python",
681 | "name": "python3"
682 | },
683 | "language_info": {
684 | "name": "python",
685 | "version": "3.9.6"
686 | },
687 | "vscode": {
688 | "interpreter": {
689 | "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
690 | }
691 | }
692 | },
693 | "nbformat": 4,
694 | "nbformat_minor": 0
695 | }
696 |
--------------------------------------------------------------------------------
/lab1/img/add-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab1/img/add-graph.png
--------------------------------------------------------------------------------
/lab1/img/computation-graph-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab1/img/computation-graph-2.png
--------------------------------------------------------------------------------
/lab1/img/computation-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab1/img/computation-graph.png
--------------------------------------------------------------------------------
/lab1/img/lab1ngram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab1/img/lab1ngram.png
--------------------------------------------------------------------------------
/lab1/img/lstm_inference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab1/img/lstm_inference.png
--------------------------------------------------------------------------------
/lab1/img/lstm_unrolled-01-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab1/img/lstm_unrolled-01-01.png
--------------------------------------------------------------------------------
/lab1/img/lstm_unrolled-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab1/img/lstm_unrolled-01.png
--------------------------------------------------------------------------------
/lab1/img/lstm_unrolled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab1/img/lstm_unrolled.png
--------------------------------------------------------------------------------
/lab1/img/music_waveform.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab1/img/music_waveform.png
--------------------------------------------------------------------------------
/lab1/solutions/TF_Part1_Intro_Solution.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "WBk0ZDWY-ff8"
7 | },
8 | "source": [
9 | "\n",
18 | "\n",
19 | "# Copyright Information\n"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {
26 | "id": "3eI6DUic-6jo"
27 | },
28 | "outputs": [],
29 | "source": [
30 | "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n",
31 | "#\n",
32 | "# Licensed under the MIT License. You may not use this file except in compliance\n",
33 | "# with the License. Use and/or modification of this code outside of MIT Introduction\n",
34 | "# to Deep Learning must reference:\n",
35 | "#\n",
36 | "# © MIT Introduction to Deep Learning\n",
37 | "# http://introtodeeplearning.com\n",
38 | "#"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {
44 | "id": "57knM8jrYZ2t"
45 | },
46 | "source": [
47 | "# Lab 1: Intro to TensorFlow and Music Generation with RNNs\n",
48 | "\n",
49 | "In this lab, you'll get exposure to using TensorFlow and learn how it can be used for solving deep learning tasks. Go through the code and run each cell. Along the way, you'll encounter several ***TODO*** blocks -- follow the instructions to fill them out before running those cells and continuing.\n",
50 | "\n",
51 | "\n",
52 | "# Part 1: Intro to TensorFlow\n",
53 | "\n",
54 | "## 0.1 Install TensorFlow\n",
55 | "\n",
56 | "TensorFlow is a software library extensively used in machine learning. Here we'll learn how computations are represented and how to define a simple neural network in TensorFlow. For all the TensorFlow labs in Introduction to Deep Learning 2025, we'll be using TensorFlow 2, which affords great flexibility and the ability to imperatively execute operations, just like in Python. You'll notice that TensorFlow 2 is quite similar to Python in its syntax and imperative execution. Let's install TensorFlow and a couple of dependencies.\n"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": null,
62 | "metadata": {
63 | "id": "LkaimNJfYZ2w"
64 | },
65 | "outputs": [],
66 | "source": [
67 | "import tensorflow as tf\n",
68 | "\n",
69 | "# Download and import the MIT Introduction to Deep Learning package\n",
70 | "!pip install mitdeeplearning --quiet\n",
71 | "import mitdeeplearning as mdl\n",
72 | "\n",
73 | "import numpy as np\n",
74 | "import matplotlib.pyplot as plt"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {
80 | "id": "2QNMcdP4m3Vs"
81 | },
82 | "source": [
83 | "## 1.1 Why is TensorFlow called TensorFlow?\n",
84 | "\n",
85 | "TensorFlow is called 'TensorFlow' because it handles the flow (node/mathematical operation) of Tensors, which are data structures that you can think of as multi-dimensional arrays. Tensors are represented as n-dimensional arrays of base dataypes such as a string or integer -- they provide a way to generalize vectors and matrices to higher dimensions.\n",
86 | "\n",
87 | "The ```shape``` of a Tensor defines its number of dimensions and the size of each dimension. The ```rank``` of a Tensor provides the number of dimensions (n-dimensions) -- you can also think of this as the Tensor's order or degree.\n",
88 | "\n",
89 | "Let's first look at 0-d Tensors, of which a scalar is an example:"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": null,
95 | "metadata": {
96 | "id": "tFxztZQInlAB"
97 | },
98 | "outputs": [],
99 | "source": [
100 | "sport = tf.constant(\"Tennis\", tf.string)\n",
101 | "number = tf.constant(1.41421356237, tf.float64)\n",
102 | "\n",
103 | "print(\"`sport` is a {}-d Tensor\".format(tf.rank(sport).numpy()))\n",
104 | "print(\"`number` is a {}-d Tensor\".format(tf.rank(number).numpy()))"
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {
110 | "id": "-dljcPUcoJZ6"
111 | },
112 | "source": [
113 | "Vectors and lists can be used to create 1-d Tensors:"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {
120 | "id": "oaHXABe8oPcO"
121 | },
122 | "outputs": [],
123 | "source": [
124 | "sports = tf.constant([\"Tennis\", \"Basketball\"], tf.string)\n",
125 | "numbers = tf.constant([3.141592, 1.414213, 2.71821], tf.float64)\n",
126 | "\n",
127 | "print(\"`sports` is a {}-d Tensor with shape: {}\".format(tf.rank(sports).numpy(), tf.shape(sports)))\n",
128 | "print(\"`numbers` is a {}-d Tensor with shape: {}\".format(tf.rank(numbers).numpy(), tf.shape(numbers)))"
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "metadata": {
134 | "id": "gvffwkvtodLP"
135 | },
136 | "source": [
137 | "Next we consider creating 2-d (i.e., matrices) and higher-rank Tensors. For examples, in future labs involving image processing and computer vision, we will use 4-d Tensors. Here the dimensions correspond to the number of example images in our batch, image height, image width, and the number of color channels."
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {
144 | "id": "tFeBBe1IouS3"
145 | },
146 | "outputs": [],
147 | "source": [
148 | "### Defining higher-order Tensors ###\n",
149 | "\n",
150 | "'''TODO: Define a 2-d Tensor'''\n",
151 | "matrix = tf.constant([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]) # TODO\n",
152 | "# matrix = # TODO\n",
153 | "\n",
154 | "assert isinstance(matrix, tf.Tensor), \"matrix must be a tf Tensor object\"\n",
155 | "assert tf.rank(matrix).numpy() == 2"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {
162 | "id": "Zv1fTn_Ya_cz"
163 | },
164 | "outputs": [],
165 | "source": [
166 | "'''TODO: Define a 4-d Tensor.'''\n",
167 | "# Use tf.zeros to initialize a 4-d Tensor of zeros with size 10 x 256 x 256 x 3.\n",
168 | "# You can think of this as 10 images where each image is RGB 256 x 256.\n",
169 | "images = tf.zeros([10, 256, 256, 3]) # TODO\n",
170 | "# images = # TODO\n",
171 | "\n",
172 | "assert isinstance(images, tf.Tensor), \"matrix must be a tf Tensor object\"\n",
173 | "assert tf.rank(images).numpy() == 4, \"matrix must be of rank 4\"\n",
174 | "assert tf.shape(images).numpy().tolist() == [10, 256, 256, 3], \"matrix is incorrect shape\""
175 | ]
176 | },
177 | {
178 | "cell_type": "markdown",
179 | "metadata": {
180 | "id": "wkaCDOGapMyl"
181 | },
182 | "source": [
183 | "As you have seen, the ```shape``` of a Tensor provides the number of elements in each Tensor dimension. The ```shape``` is quite useful, and we'll use it often. You can also use slicing to access subtensors within a higher-rank Tensor:"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": null,
189 | "metadata": {
190 | "id": "FhaufyObuLEG"
191 | },
192 | "outputs": [],
193 | "source": [
194 | "row_vector = matrix[1]\n",
195 | "column_vector = matrix[:,1]\n",
196 | "scalar = matrix[0, 1]\n",
197 | "\n",
198 | "print(\"`row_vector`: {}\".format(row_vector.numpy()))\n",
199 | "print(\"`column_vector`: {}\".format(column_vector.numpy()))\n",
200 | "print(\"`scalar`: {}\".format(scalar.numpy()))"
201 | ]
202 | },
203 | {
204 | "cell_type": "markdown",
205 | "metadata": {
206 | "id": "iD3VO-LZYZ2z"
207 | },
208 | "source": [
209 | "## 1.2 Computations on Tensors\n",
210 | "\n",
211 | "A convenient way to think about and visualize computations in TensorFlow is in terms of graphs. We can define this graph in terms of Tensors, which hold data, and the mathematical operations that act on these Tensors in some order. Let's look at a simple example, and define this computation using TensorFlow:\n",
212 | "\n",
213 | ""
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": null,
219 | "metadata": {
220 | "id": "X_YJrZsxYZ2z"
221 | },
222 | "outputs": [],
223 | "source": [
224 | "# Create the nodes in the graph, and initialize values\n",
225 | "a = tf.constant(15)\n",
226 | "b = tf.constant(61)\n",
227 | "\n",
228 | "# Add them!\n",
229 | "c1 = tf.add(a,b)\n",
230 | "c2 = a + b # TensorFlow overrides the \"+\" operation so that it is able to act on Tensors\n",
231 | "print(c1)\n",
232 | "print(c2)"
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "metadata": {
238 | "id": "Mbfv_QOiYZ23"
239 | },
240 | "source": [
241 | "Notice how we've created a computation graph consisting of TensorFlow operations, and how the output is a Tensor with value 76 -- we've just created a computation graph consisting of operations, and it's executed them and given us back the result.\n",
242 | "\n",
243 | "Now let's consider a slightly more complicated example:\n",
244 | "\n",
245 | "\n",
246 | "\n",
247 | "Here, we take two inputs, `a, b`, and compute an output `e`. Each node in the graph represents an operation that takes some input, does some computation, and passes its output to another node.\n",
248 | "\n",
249 | "Let's define a simple function in TensorFlow to construct this computation function:"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {
256 | "id": "PJnfzpWyYZ23",
257 | "scrolled": true
258 | },
259 | "outputs": [],
260 | "source": [
261 | "### Defining Tensor computations ###\n",
262 | "\n",
263 | "# Construct a simple computation function\n",
264 | "def func(a,b):\n",
265 | " '''TODO: Define the operation for c, d, e (use tf.add, tf.subtract, tf.multiply).'''\n",
266 | " c = tf.add(a, b)\n",
267 | " # c = # TODO\n",
268 | " d = tf.subtract(b, 1)\n",
269 | " # d = # TODO\n",
270 | " e = tf.multiply(c, d)\n",
271 | " # e = # TODO\n",
272 | " return e"
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {
278 | "id": "AwrRfDMS2-oy"
279 | },
280 | "source": [
281 | "Now, we can call this function to execute the computation graph given some inputs `a,b`:"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "metadata": {
288 | "id": "pnwsf8w2uF7p"
289 | },
290 | "outputs": [],
291 | "source": [
292 | "# Consider example values for a,b\n",
293 | "a, b = 1.5, 2.5\n",
294 | "# Execute the computation\n",
295 | "e_out = func(a,b)\n",
296 | "print(e_out)"
297 | ]
298 | },
299 | {
300 | "cell_type": "markdown",
301 | "metadata": {
302 | "id": "6HqgUIUhYZ29"
303 | },
304 | "source": [
305 | "Notice how our output is a Tensor with value defined by the output of the computation, and that the output has no shape as it is a single scalar value."
306 | ]
307 | },
308 | {
309 | "cell_type": "markdown",
310 | "metadata": {
311 | "id": "1h4o9Bb0YZ29"
312 | },
313 | "source": [
314 | "## 1.3 Neural networks in TensorFlow\n",
315 | "We can also define neural networks in TensorFlow. TensorFlow uses a high-level API called [Keras](https://www.tensorflow.org/guide/keras) that provides a powerful, intuitive framework for building and training deep learning models.\n",
316 | "\n",
317 | "Let's first consider the example of a simple perceptron defined by just one dense layer: $ y = \\sigma(Wx + b)$, where $W$ represents a matrix of weights, $b$ is a bias, $x$ is the input, $\\sigma$ is the sigmoid activation function, and $y$ is the output. We can also visualize this operation using a graph:\n",
318 | "\n",
319 | "\n",
320 | "\n",
321 | "Tensors can flow through abstract types called [```Layers```](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) -- the building blocks of neural networks. ```Layers``` implement common neural networks operations, and are used to update weights, compute losses, and define inter-layer connectivity. We will first define a ```Layer``` to implement the simple perceptron defined above."
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": null,
327 | "metadata": {
328 | "id": "HutbJk-1kHPh"
329 | },
330 | "outputs": [],
331 | "source": [
332 | "### Defining a network Layer ###\n",
333 | "\n",
334 | "# n_output_nodes: number of output nodes\n",
335 | "# input_shape: shape of the input\n",
336 | "# x: input to the layer\n",
337 | "\n",
338 | "class OurDenseLayer(tf.keras.layers.Layer):\n",
339 | " def __init__(self, n_output_nodes):\n",
340 | " super(OurDenseLayer, self).__init__()\n",
341 | " self.n_output_nodes = n_output_nodes\n",
342 | "\n",
343 | " def build(self, input_shape):\n",
344 | " d = int(input_shape[-1])\n",
345 | " # Define and initialize parameters: a weight matrix W and bias b\n",
346 | " # Note that parameter initialization is random!\n",
347 | " self.W = self.add_weight(\"weight\", shape=[d, self.n_output_nodes]) # note the dimensionality\n",
348 | " self.b = self.add_weight(\"bias\", shape=[1, self.n_output_nodes]) # note the dimensionality\n",
349 | "\n",
350 | " def call(self, x):\n",
351 | " '''TODO: define the operation for z (hint: use tf.matmul)'''\n",
352 | " z = tf.matmul(x, self.W) + self.b # TODO\n",
353 | " # z = # TODO\n",
354 | "\n",
355 | " '''TODO: define the operation for out (hint: use tf.sigmoid)'''\n",
356 | " y = tf.sigmoid(z) # TODO\n",
357 | " # y = # TODO\n",
358 | " return y\n",
359 | "\n",
360 | "# Since layer parameters are initialized randomly, we will set a random seed for reproducibility\n",
361 | "tf.keras.utils.set_random_seed(1)\n",
362 | "layer = OurDenseLayer(3)\n",
363 | "layer.build((1,2))\n",
364 | "x_input = tf.constant([[1,2.]], shape=(1,2))\n",
365 | "y = layer.call(x_input)\n",
366 | "\n",
367 | "# test the output!\n",
368 | "print(y.numpy())\n",
369 | "mdl.lab1.test_custom_dense_layer_output(y)"
370 | ]
371 | },
372 | {
373 | "cell_type": "markdown",
374 | "metadata": {
375 | "id": "Jt1FgM7qYZ3D"
376 | },
377 | "source": [
378 | "Conveniently, TensorFlow has defined a number of ```Layers``` that are commonly used in neural networks, for example a [```Dense```](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense?version=stable). Now, instead of using a single ```Layer``` to define our simple neural network, we'll use the [`Sequential`](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/Sequential) model from Keras and a single [`Dense` ](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dense) layer to define our network. With the `Sequential` API, you can readily create neural networks by stacking together layers like building blocks."
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": null,
384 | "metadata": {
385 | "id": "7WXTpmoL6TDz"
386 | },
387 | "outputs": [],
388 | "source": [
389 | "### Defining a neural network using the Sequential API ###\n",
390 | "\n",
391 | "# Import relevant packages\n",
392 | "from tensorflow.keras import Sequential\n",
393 | "from tensorflow.keras.layers import Dense\n",
394 | "\n",
395 | "# Define the number of outputs\n",
396 | "n_output_nodes = 3\n",
397 | "\n",
398 | "# First define the model\n",
399 | "model = Sequential()\n",
400 | "\n",
401 | "'''TODO: Define a dense (fully connected) layer to compute z'''\n",
402 | "# Remember: dense layers are defined by the parameters W and b!\n",
403 | "# You can read more about the initialization of W and b in the TF documentation :)\n",
404 | "# https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense?version=stable\n",
405 | "dense_layer = Dense(n_output_nodes, activation='sigmoid') # TODO\n",
406 | "# dense_layer = # TODO\n",
407 | "\n",
408 | "# Add the dense layer to the model\n",
409 | "model.add(dense_layer)\n"
410 | ]
411 | },
412 | {
413 | "cell_type": "markdown",
414 | "metadata": {
415 | "id": "HDGcwYfUyR-U"
416 | },
417 | "source": [
418 | "That's it! We've defined our model using the Sequential API. Now, we can test it out using an example input:"
419 | ]
420 | },
421 | {
422 | "cell_type": "code",
423 | "execution_count": null,
424 | "metadata": {
425 | "id": "sg23OczByRDb"
426 | },
427 | "outputs": [],
428 | "source": [
429 | "# Test model with example input\n",
430 | "x_input = tf.constant([[1,2.]], shape=(1,2))\n",
431 | "\n",
432 | "'''TODO: feed input into the model and predict the output!'''\n",
433 | "model_output = model(x_input).numpy()\n",
434 | "# model_output = # TODO\n",
435 | "print(model_output)"
436 | ]
437 | },
438 | {
439 | "cell_type": "markdown",
440 | "metadata": {
441 | "id": "596NvsOOtr9F"
442 | },
443 | "source": [
444 | "In addition to defining models using the `Sequential` API, we can also define neural networks by directly subclassing the [`Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model?version=stable) class, which groups layers together to enable model training and inference. The `Model` class captures what we refer to as a \"model\" or as a \"network\". Using Subclassing, we can create a class for our model, and then define the forward pass through the network using the `call` function. Subclassing affords the flexibility to define custom layers, custom training loops, custom activation functions, and custom models. Let's define the same neural network as above now using Subclassing rather than the `Sequential` model."
445 | ]
446 | },
447 | {
448 | "cell_type": "code",
449 | "execution_count": null,
450 | "metadata": {
451 | "id": "K4aCflPVyViD"
452 | },
453 | "outputs": [],
454 | "source": [
455 | "### Defining a model using subclassing ###\n",
456 | "\n",
457 | "from tensorflow.keras import Model\n",
458 | "from tensorflow.keras.layers import Dense\n",
459 | "\n",
460 | "class SubclassModel(tf.keras.Model):\n",
461 | "\n",
462 | " # In __init__, we define the Model's layers\n",
463 | " def __init__(self, n_output_nodes):\n",
464 | " super(SubclassModel, self).__init__()\n",
465 | " '''TODO: Our model consists of a single Dense layer. Define this layer.'''\n",
466 | " self.dense_layer = Dense(n_output_nodes, activation='sigmoid') # TODO\n",
467 | " # self.dense_layer = '''TODO: Dense Layer'''\n",
468 | "\n",
469 | " # In the call function, we define the Model's forward pass.\n",
470 | " def call(self, inputs):\n",
471 | " return self.dense_layer(inputs)"
472 | ]
473 | },
474 | {
475 | "cell_type": "markdown",
476 | "metadata": {
477 | "id": "U0-lwHDk4irB"
478 | },
479 | "source": [
480 | "Just like the model we built using the `Sequential` API, let's test out our `SubclassModel` using an example input.\n",
481 | "\n"
482 | ]
483 | },
484 | {
485 | "cell_type": "code",
486 | "execution_count": null,
487 | "metadata": {
488 | "id": "LhB34RA-4gXb"
489 | },
490 | "outputs": [],
491 | "source": [
492 | "n_output_nodes = 3\n",
493 | "model = SubclassModel(n_output_nodes)\n",
494 | "\n",
495 | "x_input = tf.constant([[1,2.]], shape=(1,2))\n",
496 | "\n",
497 | "print(model.call(x_input))"
498 | ]
499 | },
500 | {
501 | "cell_type": "markdown",
502 | "metadata": {
503 | "id": "HTIFMJLAzsyE"
504 | },
505 | "source": [
506 | "Importantly, Subclassing affords us a lot of flexibility to define custom models. For example, we can use boolean arguments in the `call` function to specify different network behaviors, for example different behaviors during training and inference. Let's suppose under some instances we want our network to simply output the input, without any perturbation. We define a boolean argument `isidentity` to control this behavior:"
507 | ]
508 | },
509 | {
510 | "cell_type": "code",
511 | "execution_count": null,
512 | "metadata": {
513 | "id": "P7jzGX5D1xT5"
514 | },
515 | "outputs": [],
516 | "source": [
517 | "### Defining a model using subclassing and specifying custom behavior ###\n",
518 | "\n",
519 | "from tensorflow.keras import Model\n",
520 | "from tensorflow.keras.layers import Dense\n",
521 | "\n",
522 | "class IdentityModel(tf.keras.Model):\n",
523 | "\n",
524 | " # As before, in __init__ we define the Model's layers\n",
525 | " # Since our desired behavior involves the forward pass, this part is unchanged\n",
526 | " def __init__(self, n_output_nodes):\n",
527 | " super(IdentityModel, self).__init__()\n",
528 | " self.dense_layer = tf.keras.layers.Dense(n_output_nodes, activation='sigmoid')\n",
529 | "\n",
530 | " '''TODO: Implement the behavior where the network outputs the input, unchanged, under control of the isidentity argument.'''\n",
531 | " def call(self, inputs, isidentity=False):\n",
532 | " x = self.dense_layer(inputs)\n",
533 | " if isidentity: # TODO\n",
534 | " return inputs # TODO\n",
535 | " return x\n",
536 | "\n",
537 | " # def call(self, inputs, isidentity=False):\n",
538 | " # TODO"
539 | ]
540 | },
541 | {
542 | "cell_type": "markdown",
543 | "metadata": {
544 | "id": "Ku4rcCGx5T3y"
545 | },
546 | "source": [
547 | "Let's test this behavior:"
548 | ]
549 | },
550 | {
551 | "cell_type": "code",
552 | "execution_count": null,
553 | "metadata": {
554 | "id": "NzC0mgbk5dp2"
555 | },
556 | "outputs": [],
557 | "source": [
558 | "n_output_nodes = 3\n",
559 | "model = IdentityModel(n_output_nodes)\n",
560 | "\n",
561 | "x_input = tf.constant([[1,2.]], shape=(1,2))\n",
562 | "'''TODO: pass the input into the model and call with and without the input identity option.'''\n",
563 | "out_activate = model.call(x_input) # TODO\n",
564 | "# out_activate = # TODO\n",
565 | "out_identity = model.call(x_input, isidentity=True) # TODO\n",
566 | "# out_identity = # TODO\n",
567 | "\n",
568 | "print(\"Network output with activation: {}; network identity output: {}\".format(out_activate.numpy(), out_identity.numpy()))"
569 | ]
570 | },
571 | {
572 | "cell_type": "markdown",
573 | "metadata": {
574 | "id": "7V1dEqdk6VI5"
575 | },
576 | "source": [
577 | "Now that we have learned how to define `Layers` as well as neural networks in TensorFlow using both the `Sequential` and Subclassing APIs, we're ready to turn our attention to how to actually implement network training with backpropagation."
578 | ]
579 | },
580 | {
581 | "cell_type": "markdown",
582 | "metadata": {
583 | "id": "dQwDhKn8kbO2"
584 | },
585 | "source": [
586 | "## 1.4 Automatic differentiation in TensorFlow\n",
587 | "\n",
588 | "[Automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation)\n",
589 | "is one of the most important parts of TensorFlow and is the backbone of training with\n",
590 | "[backpropagation](https://en.wikipedia.org/wiki/Backpropagation). We will use the TensorFlow GradientTape [`tf.GradientTape`](https://www.tensorflow.org/api_docs/python/tf/GradientTape?version=stable) to trace operations for computing gradients later.\n",
591 | "\n",
592 | "When a forward pass is made through the network, all forward-pass operations get recorded to a \"tape\"; then, to compute the gradient, the tape is played backwards. By default, the tape is discarded after it is played backwards; this means that a particular `tf.GradientTape` can only\n",
593 | "compute one gradient, and subsequent calls throw a runtime error. However, we can compute multiple gradients over the same computation by creating a ```persistent``` gradient tape.\n",
594 | "\n",
595 | "First, we will look at how we can compute gradients using GradientTape and access them for computation. We define the simple function $ y = x^2$ and compute the gradient:"
596 | ]
597 | },
598 | {
599 | "cell_type": "code",
600 | "execution_count": null,
601 | "metadata": {
602 | "id": "tdkqk8pw5yJM"
603 | },
604 | "outputs": [],
605 | "source": [
606 | "### Gradient computation with GradientTape ###\n",
607 | "\n",
608 | "# y = x^2\n",
609 | "# Example: x = 3.0\n",
610 | "x = tf.Variable(3.0)\n",
611 | "\n",
612 | "# Initiate the gradient tape\n",
613 | "with tf.GradientTape() as tape:\n",
614 | " # Define the function\n",
615 | " y = x * x\n",
616 | "# Access the gradient -- derivative of y with respect to x\n",
617 | "dy_dx = tape.gradient(y, x)\n",
618 | "\n",
619 | "assert dy_dx.numpy() == 6.0"
620 | ]
621 | },
622 | {
623 | "cell_type": "markdown",
624 | "metadata": {
625 | "id": "JhU5metS5xF3"
626 | },
627 | "source": [
628 | "In training neural networks, we use differentiation and stochastic gradient descent (SGD) to optimize a loss function. Now that we have a sense of how `GradientTape` can be used to compute and access derivatives, we will look at an example where we use automatic differentiation and SGD to find the minimum of $L=(x-x_f)^2$. Here $x_f$ is a variable for a desired value we are trying to optimize for; $L$ represents a loss that we are trying to minimize. While we can clearly solve this problem analytically ($x_{min}=x_f$), considering how we can compute this using `GradientTape` sets us up nicely for future labs where we use gradient descent to optimize entire neural network losses."
629 | ]
630 | },
631 | {
632 | "cell_type": "code",
633 | "execution_count": null,
634 | "metadata": {
635 | "attributes": {
636 | "classes": [
637 | "py"
638 | ],
639 | "id": ""
640 | },
641 | "id": "7g1yWiSXqEf-"
642 | },
643 | "outputs": [],
644 | "source": [
645 | "### Function minimization with automatic differentiation and SGD ###\n",
646 | "\n",
647 | "# Initialize a random value for our initial x\n",
648 | "x = tf.Variable([tf.random.normal([1])])\n",
649 | "print(\"Initializing x={}\".format(x.numpy()))\n",
650 | "\n",
651 | "learning_rate = 1e-2 # learning rate for SGD\n",
652 | "history = []\n",
653 | "# Define the target value\n",
654 | "x_f = 4\n",
655 | "\n",
656 | "# We will run SGD for a number of iterations. At each iteration, we compute the loss,\n",
657 | "# compute the derivative of the loss with respect to x, and perform the SGD update.\n",
658 | "for i in range(500):\n",
659 | " with tf.GradientTape() as tape:\n",
660 | " '''TODO: define the loss as described above'''\n",
661 | " loss = (x - x_f)**2 # \"forward pass\": record the current loss on the tape\n",
662 | " # loss = # TODO\n",
663 | "\n",
664 | " # loss minimization using gradient tape\n",
665 | " grad = tape.gradient(loss, x) # compute the derivative of the loss with respect to x\n",
666 | " new_x = x - learning_rate*grad # sgd update\n",
667 | " x.assign(new_x) # update the value of x\n",
668 | " history.append(x.numpy()[0])\n",
669 | "\n",
670 | "# Plot the evolution of x as we optimize towards x_f!\n",
671 | "plt.plot(history)\n",
672 | "plt.plot([0, 500],[x_f,x_f])\n",
673 | "plt.legend(('Predicted', 'True'))\n",
674 | "plt.xlabel('Iteration')\n",
675 | "plt.ylabel('x value')"
676 | ]
677 | },
678 | {
679 | "cell_type": "markdown",
680 | "metadata": {
681 | "id": "pC7czCwk3ceH"
682 | },
683 | "source": [
684 | "`GradientTape` provides an extremely flexible framework for automatic differentiation. In order to back propagate errors through a neural network, we track forward passes on the Tape, use this information to determine the gradients, and then use these gradients for optimization using SGD.\n"
685 | ]
686 | }
687 | ],
688 | "metadata": {
689 | "accelerator": "GPU",
690 | "colab": {
691 | "collapsed_sections": [
692 | "WBk0ZDWY-ff8"
693 | ],
694 | "name": "TF_Part1_Intro_Solution.ipynb",
695 | "provenance": []
696 | },
697 | "kernelspec": {
698 | "display_name": "Python 3",
699 | "language": "python",
700 | "name": "python3"
701 | },
702 | "language_info": {
703 | "name": "python",
704 | "version": "3.9.6"
705 | },
706 | "vscode": {
707 | "interpreter": {
708 | "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
709 | }
710 | }
711 | },
712 | "nbformat": 4,
713 | "nbformat_minor": 0
714 | }
715 |
--------------------------------------------------------------------------------
/lab2/TF_Part1_MNIST.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "Xmf_JRJa_N8C"
7 | },
8 | "source": [
9 | "\n",
18 | "\n",
19 | "# Copyright Information"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {
26 | "id": "gKA_J7bdP33T"
27 | },
28 | "outputs": [],
29 | "source": [
30 | "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n",
31 | "#\n",
32 | "# Licensed under the MIT License. You may not use this file except in compliance\n",
33 | "# with the License. Use and/or modification of this code outside of MIT Introduction\n",
34 | "# to Deep Learning must reference:\n",
35 | "#\n",
36 | "# © MIT Introduction to Deep Learning\n",
37 | "# http://introtodeeplearning.com\n",
38 | "#"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {
44 | "id": "Cm1XpLftPi4A"
45 | },
46 | "source": [
47 | "# Laboratory 2: Computer Vision\n",
48 | "\n",
49 | "# Part 1: MNIST Digit Classification\n",
50 | "\n",
51 | "In the first portion of this lab, we will build and train a convolutional neural network (CNN) for classification of handwritten digits from the famous [MNIST](http://yann.lecun.com/exdb/mnist/) dataset. The MNIST dataset consists of 60,000 training images and 10,000 test images. Our classes are the digits 0-9.\n",
52 | "\n",
53 | "First, let's download the course repository, install dependencies, and import the relevant packages we'll need for this lab."
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {
60 | "id": "RsGqx_ai_N8F"
61 | },
62 | "outputs": [],
63 | "source": [
64 | "# Import Tensorflow 2.0\n",
65 | "# !pip install tensorflow\n",
66 | "import tensorflow as tf\n",
67 | "\n",
68 | "# MIT introduction to deep learning package\n",
69 | "!pip install mitdeeplearning --quiet\n",
70 | "import mitdeeplearning as mdl\n",
71 | "\n",
72 | "# other packages\n",
73 | "import matplotlib.pyplot as plt\n",
74 | "import numpy as np\n",
75 | "import random\n",
76 | "from tqdm import tqdm"
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {
82 | "id": "nCpHDxX1bzyZ"
83 | },
84 | "source": [
85 | "We'll also install Comet. If you followed the instructions from Lab 1, you should have your Comet account set up. Enter your API key below."
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {
92 | "id": "GSR_PAqjbzyZ"
93 | },
94 | "outputs": [],
95 | "source": [
96 | "!pip install comet_ml > /dev/null 2>&1\n",
97 | "import comet_ml\n",
98 | "# TODO: ENTER YOUR API KEY HERE!!\n",
99 | "COMET_API_KEY = \"\"\n",
100 | "\n",
101 | "# Check that we are using a GPU, if not switch runtimes\n",
102 | "# using Runtime > Change Runtime Type > GPU\n",
103 | "assert len(tf.config.list_physical_devices('GPU')) > 0\n",
104 | "assert COMET_API_KEY != \"\", \"Please insert your Comet API Key\""
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "source": [
110 | "# start a first comet experiment for the first part of the lab\n",
111 | "comet_ml.init(project_name=\"6S191_lab2_part1_NN\")\n",
112 | "comet_model_1 = comet_ml.Experiment()"
113 | ],
114 | "metadata": {
115 | "id": "wGPDtVxvTtPk"
116 | },
117 | "execution_count": null,
118 | "outputs": []
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {
123 | "id": "HKjrdUtX_N8J"
124 | },
125 | "source": [
126 | "## 1.1 MNIST dataset\n",
127 | "\n",
128 | "Let's download and load the dataset and display a few random samples from it:"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": null,
134 | "metadata": {
135 | "id": "p2dQsHI3_N8K"
136 | },
137 | "outputs": [],
138 | "source": [
139 | "mnist = tf.keras.datasets.mnist\n",
140 | "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()\n",
141 | "train_images = (np.expand_dims(train_images, axis=-1)/255.).astype(np.float32)\n",
142 | "train_labels = (train_labels).astype(np.int64)\n",
143 | "test_images = (np.expand_dims(test_images, axis=-1)/255.).astype(np.float32)\n",
144 | "test_labels = (test_labels).astype(np.int64)"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {
150 | "id": "5ZtUqOqePsRD"
151 | },
152 | "source": [
153 | "Our training set is made up of 28x28 grayscale images of handwritten digits.\n",
154 | "\n",
155 | "Let's visualize what some of these images and their corresponding training labels look like."
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {
162 | "id": "bDBsR2lP_N8O",
163 | "scrolled": true
164 | },
165 | "outputs": [],
166 | "source": [
167 | "plt.figure(figsize=(10,10))\n",
168 | "random_inds = np.random.choice(60000,36)\n",
169 | "for i in range(36):\n",
170 | " plt.subplot(6,6,i+1)\n",
171 | " plt.xticks([])\n",
172 | " plt.yticks([])\n",
173 | " plt.grid(False)\n",
174 | " image_ind = random_inds[i]\n",
175 | " plt.imshow(np.squeeze(train_images[image_ind]), cmap=plt.cm.binary)\n",
176 | " plt.xlabel(train_labels[image_ind])\n",
177 | "comet_model_1.log_figure(figure=plt)"
178 | ]
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "metadata": {
183 | "id": "V6hd3Nt1_N8q"
184 | },
185 | "source": [
186 | "## 1.2 Neural Network for Handwritten Digit Classification\n",
187 | "\n",
188 | "We'll first build a simple neural network consisting of two fully connected layers and apply this to the digit classification task. Our network will ultimately output a probability distribution over the 10 digit classes (0-9). This first architecture we will be building is depicted below:\n",
189 | "\n",
190 | "\n"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {
196 | "id": "rphS2rMIymyZ"
197 | },
198 | "source": [
199 | "### Fully connected neural network architecture\n",
200 | "To define the architecture of this first fully connected neural network, we'll once again use the Keras API and define the model using the [`Sequential`](https://www.tensorflow.org/api_docs/python/tf/keras/models/Sequential) class. Note how we first use a [`Flatten`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Flatten) layer, which flattens the input so that it can be fed into the model.\n",
201 | "\n",
202 | "In this next block, you'll define the fully connected layers of this simple work."
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": null,
208 | "metadata": {
209 | "id": "MMZsbjAkDKpU"
210 | },
211 | "outputs": [],
212 | "source": [
213 | "def build_fc_model():\n",
214 | " fc_model = tf.keras.Sequential([\n",
215 | " # First define a Flatten layer\n",
216 | " tf.keras.layers.Flatten(),\n",
217 | "\n",
218 | " # '''TODO: Define the activation function for the first fully connected (Dense) layer.'''\n",
219 | " tf.keras.layers.Dense(128, activation= '''TODO'''),\n",
220 | "\n",
221 | " # '''TODO: Define the second Dense layer to output the classification probabilities'''\n",
222 | " '''[TODO Dense layer to output classification probabilities]'''\n",
223 | "\n",
224 | " ])\n",
225 | " return fc_model\n",
226 | "\n",
227 | "model = build_fc_model()"
228 | ]
229 | },
230 | {
231 | "cell_type": "markdown",
232 | "metadata": {
233 | "id": "VtGZpHVKz5Jt"
234 | },
235 | "source": [
236 | "As we progress through this next portion, you may find that you'll want to make changes to the architecture defined above. **Note that in order to update the model later on, you'll need to re-run the above cell to re-initialize the model.**"
237 | ]
238 | },
239 | {
240 | "cell_type": "markdown",
241 | "metadata": {
242 | "id": "mVN1_AeG_N9N"
243 | },
244 | "source": [
245 | "Let's take a step back and think about the network we've just created. The first layer in this network, `tf.keras.layers.Flatten`, transforms the format of the images from a 2d-array (28 x 28 pixels), to a 1d-array of 28 * 28 = 784 pixels. You can think of this layer as unstacking rows of pixels in the image and lining them up. There are no learned parameters in this layer; it only reformats the data.\n",
246 | "\n",
247 | "After the pixels are flattened, the network consists of a sequence of two `tf.keras.layers.Dense` layers. These are fully-connected neural layers. The first `Dense` layer has 128 nodes (or neurons). The second (and last) layer (which you've defined!) should return an array of probability scores that sum to 1. Each node contains a score that indicates the probability that the current image belongs to one of the handwritten digit classes.\n",
248 | "\n",
249 | "That defines our fully connected model!"
250 | ]
251 | },
252 | {
253 | "cell_type": "markdown",
254 | "metadata": {
255 | "id": "gut8A_7rCaW6"
256 | },
257 | "source": [
258 | "\n",
259 | "\n",
260 | "### Compile the model\n",
261 | "\n",
262 | "Before training the model, we need to define a few more settings. These are added during the model's [`compile`](https://www.tensorflow.org/api_docs/python/tf/keras/models/Sequential#compile) step:\n",
263 | "\n",
264 | "* *Loss function* — This defines how we measure how accurate the model is during training. As was covered in lecture, during training we want to minimize this function, which will \"steer\" the model in the right direction.\n",
265 | "* *Optimizer* — This defines how the model is updated based on the data it sees and its loss function.\n",
266 | "* *Metrics* — Here we can define metrics used to monitor the training and testing steps. In this example, we'll look at the *accuracy*, the fraction of the images that are correctly classified.\n",
267 | "\n",
268 | "We'll start out by using a stochastic gradient descent (SGD) optimizer initialized with a learning rate of 0.1. Since we are performing a categorical classification task, we'll want to use the [cross entropy loss](https://www.tensorflow.org/api_docs/python/tf/keras/metrics/sparse_categorical_crossentropy).\n",
269 | "\n",
270 | "You'll want to experiment with both the choice of optimizer and learning rate and evaluate how these affect the accuracy of the trained model."
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": null,
276 | "metadata": {
277 | "id": "Lhan11blCaW7"
278 | },
279 | "outputs": [],
280 | "source": [
281 | "'''TODO: Experiment with different optimizers and learning rates. How do these affect\n",
282 | " the accuracy of the trained model? Which optimizers and/or learning rates yield\n",
283 | " the best performance?'''\n",
284 | "model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-1),\n",
285 | " loss='sparse_categorical_crossentropy',\n",
286 | " metrics=['accuracy'])"
287 | ]
288 | },
289 | {
290 | "cell_type": "markdown",
291 | "metadata": {
292 | "id": "qKF6uW-BCaW-"
293 | },
294 | "source": [
295 | "### Train the model\n",
296 | "\n",
297 | "We're now ready to train our model, which will involve feeding the training data (`train_images` and `train_labels`) into the model, and then asking it to learn the associations between images and labels. We'll also need to define the batch size and the number of epochs, or iterations over the MNIST dataset, to use during training.\n",
298 | "\n",
299 | "In Lab 1, we saw how we can use `GradientTape` to optimize losses and train models with stochastic gradient descent. After defining the model settings in the `compile` step, we can also accomplish training by calling the [`fit`](https://www.tensorflow.org/api_docs/python/tf/keras/models/Sequential#fit) method on an instance of the `Model` class. We will use this to train our fully connected model\n"
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": null,
305 | "metadata": {
306 | "id": "EFMbIqIvQ2X0"
307 | },
308 | "outputs": [],
309 | "source": [
310 | "# Define the batch size and the number of epochs to use during training\n",
311 | "BATCH_SIZE = 64\n",
312 | "EPOCHS = 5\n",
313 | "\n",
314 | "model.fit(train_images, train_labels, batch_size=BATCH_SIZE, epochs=EPOCHS)\n",
315 | "comet_model_1.end()"
316 | ]
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "metadata": {
321 | "id": "W3ZVOhugCaXA"
322 | },
323 | "source": [
324 | "As the model trains, the loss and accuracy metrics are displayed. With five epochs and a learning rate of 0.01, this fully connected model should achieve an accuracy of approximatley 0.97 (or 97%) on the training data."
325 | ]
326 | },
327 | {
328 | "cell_type": "markdown",
329 | "metadata": {
330 | "id": "oEw4bZgGCaXB"
331 | },
332 | "source": [
333 | "### Evaluate accuracy on the test dataset\n",
334 | "\n",
335 | "Now that we've trained the model, we can ask it to make predictions about a test set that it hasn't seen before. In this example, the `test_images` array comprises our test dataset. To evaluate accuracy, we can check to see if the model's predictions match the labels from the `test_labels` array.\n",
336 | "\n",
337 | "Use the [`evaluate`](https://www.tensorflow.org/api_docs/python/tf/keras/models/Sequential#evaluate) method to evaluate the model on the test dataset!"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": null,
343 | "metadata": {
344 | "id": "VflXLEeECaXC"
345 | },
346 | "outputs": [],
347 | "source": [
348 | "'''TODO: Use the evaluate method to test the model!'''\n",
349 | "test_loss, test_acc = # TODO\n",
350 | "\n",
351 | "print('Test accuracy:', test_acc)"
352 | ]
353 | },
354 | {
355 | "cell_type": "markdown",
356 | "metadata": {
357 | "id": "yWfgsmVXCaXG"
358 | },
359 | "source": [
360 | "You may observe that the accuracy on the test dataset is a little lower than the accuracy on the training dataset. This gap between training accuracy and test accuracy is an example of *overfitting*, when a machine learning model performs worse on new data than on its training data.\n",
361 | "\n",
362 | "What is the highest accuracy you can achieve with this first fully connected model? Since the handwritten digit classification task is pretty straightforward, you may be wondering how we can do better...\n",
363 | "\n",
364 | ""
365 | ]
366 | },
367 | {
368 | "cell_type": "markdown",
369 | "metadata": {
370 | "id": "baIw9bDf8v6Z"
371 | },
372 | "source": [
373 | "## 1.3 Convolutional Neural Network (CNN) for handwritten digit classification"
374 | ]
375 | },
376 | {
377 | "cell_type": "markdown",
378 | "metadata": {
379 | "id": "_J72Yt1o_fY7"
380 | },
381 | "source": [
382 | "As we saw in lecture, convolutional neural networks (CNNs) are particularly well-suited for a variety of tasks in computer vision, and have achieved near-perfect accuracies on the MNIST dataset. We will now build a CNN composed of two convolutional layers and pooling layers, followed by two fully connected layers, and ultimately output a probability distribution over the 10 digit classes (0-9). The CNN we will be building is depicted below:\n",
383 | "\n",
384 | ""
385 | ]
386 | },
387 | {
388 | "cell_type": "markdown",
389 | "metadata": {
390 | "id": "EEHqzbJJAEoR"
391 | },
392 | "source": [
393 | "### Define the CNN model\n",
394 | "\n",
395 | "We'll use the same training and test datasets as before, and proceed similarly as our fully connected network to define and train our new CNN model. To do this we will explore two layers we have not encountered before: you can use [`keras.layers.Conv2D` ](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D) to define convolutional layers and [`keras.layers.MaxPool2D`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/MaxPool2D) to define the pooling layers. Use the parameters shown in the network architecture above to define these layers and build the CNN model."
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": null,
401 | "metadata": {
402 | "id": "vec9qcJs-9W5"
403 | },
404 | "outputs": [],
405 | "source": [
406 | "def build_cnn_model():\n",
407 | " cnn_model = tf.keras.Sequential([\n",
408 | "\n",
409 | " # TODO: Define the first convolutional layer\n",
410 | " tf.keras.layers.Conv2D('''TODO''')\n",
411 | "\n",
412 | " # TODO: Define the first max pooling layer\n",
413 | " tf.keras.layers.MaxPool2D('''TODO''')\n",
414 | "\n",
415 | " # TODO: Define the second convolutional layer\n",
416 | " tf.keras.layers.Conv2D('''TODO''')\n",
417 | "\n",
418 | " # TODO: Define the second max pooling layer\n",
419 | " tf.keras.layers.MaxPool2D('''TODO''')\n",
420 | "\n",
421 | " tf.keras.layers.Flatten(),\n",
422 | " tf.keras.layers.Dense(128, activation=tf.nn.relu),\n",
423 | "\n",
424 | " # TODO: Define the last Dense layer to output the classification\n",
425 | " # probabilities. Pay attention to the activation needed a probability\n",
426 | " # output\n",
427 | " '''[TODO Dense layer to output classification probabilities]'''\n",
428 | " ])\n",
429 | "\n",
430 | " return cnn_model\n",
431 | "\n",
432 | "cnn_model = build_cnn_model()\n",
433 | "# Initialize the model by passing some data through\n",
434 | "cnn_model.predict(train_images[[0]])\n",
435 | "# Print the summary of the layers in the model.\n",
436 | "print(cnn_model.summary())"
437 | ]
438 | },
439 | {
440 | "cell_type": "markdown",
441 | "metadata": {
442 | "id": "kUAXIBynCih2"
443 | },
444 | "source": [
445 | "### Train and test the CNN model\n",
446 | "\n",
447 | "Now, as before, we can define the loss function, optimizer, and metrics through the `compile` method. Compile the CNN model with an optimizer and learning rate of choice:"
448 | ]
449 | },
450 | {
451 | "cell_type": "code",
452 | "execution_count": null,
453 | "metadata": {
454 | "id": "vheyanDkCg6a"
455 | },
456 | "outputs": [],
457 | "source": [
458 | "comet_ml.init(project_name=\"6.s191lab2_part1_CNN\")\n",
459 | "comet_model_2 = comet_ml.Experiment()\n",
460 | "\n",
461 | "'''TODO: Define the compile operation with your optimizer and learning rate of choice'''\n",
462 | "cnn_model.compile(optimizer='''TODO''', loss='''TODO''', metrics=['accuracy']) # TODO"
463 | ]
464 | },
465 | {
466 | "cell_type": "markdown",
467 | "metadata": {
468 | "id": "U19bpRddC7H_"
469 | },
470 | "source": [
471 | "As was the case with the fully connected model, we can train our CNN using the `fit` method via the Keras API."
472 | ]
473 | },
474 | {
475 | "cell_type": "code",
476 | "execution_count": null,
477 | "metadata": {
478 | "id": "YdrGZVmWDK4p"
479 | },
480 | "outputs": [],
481 | "source": [
482 | "'''TODO: Use model.fit to train the CNN model, with the same batch_size and number of epochs previously used.'''\n",
483 | "cnn_model.fit('''TODO''')\n",
484 | "# comet_model_2.end()"
485 | ]
486 | },
487 | {
488 | "cell_type": "markdown",
489 | "metadata": {
490 | "id": "pEszYWzgDeIc"
491 | },
492 | "source": [
493 | "Great! Now that we've trained the model, let's evaluate it on the test dataset using the [`evaluate`](https://www.tensorflow.org/api_docs/python/tf/keras/models/Sequential#evaluate) method:"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": null,
499 | "metadata": {
500 | "id": "JDm4znZcDtNl"
501 | },
502 | "outputs": [],
503 | "source": [
504 | "'''TODO: Use the evaluate method to test the model!'''\n",
505 | "test_loss, test_acc = # TODO\n",
506 | "\n",
507 | "print('Test accuracy:', test_acc)"
508 | ]
509 | },
510 | {
511 | "cell_type": "markdown",
512 | "metadata": {
513 | "id": "2rvEgK82Glv9"
514 | },
515 | "source": [
516 | "What is the highest accuracy you're able to achieve using the CNN model, and how does the accuracy of the CNN model compare to the accuracy of the simple fully connected network? What optimizers and learning rates seem to be optimal for training the CNN model?\n",
517 | "\n",
518 | "Feel free to click the Comet links to investigate the training/accuracy curves for your model."
519 | ]
520 | },
521 | {
522 | "cell_type": "markdown",
523 | "metadata": {
524 | "id": "xsoS7CPDCaXH"
525 | },
526 | "source": [
527 | "### Make predictions with the CNN model\n",
528 | "\n",
529 | "With the model trained, we can use it to make predictions about some images. The [`predict`](https://www.tensorflow.org/api_docs/python/tf/keras/models/Sequential#predict) function call generates the output predictions given a set of input samples.\n"
530 | ]
531 | },
532 | {
533 | "cell_type": "code",
534 | "execution_count": null,
535 | "metadata": {
536 | "id": "Gl91RPhdCaXI"
537 | },
538 | "outputs": [],
539 | "source": [
540 | "predictions = cnn_model.predict(test_images)"
541 | ]
542 | },
543 | {
544 | "cell_type": "markdown",
545 | "metadata": {
546 | "id": "x9Kk1voUCaXJ"
547 | },
548 | "source": [
549 | "With this function call, the model has predicted the label for each image in the testing set. Let's take a look at the prediction for the first image in the test dataset:"
550 | ]
551 | },
552 | {
553 | "cell_type": "code",
554 | "execution_count": null,
555 | "metadata": {
556 | "id": "3DmJEUinCaXK"
557 | },
558 | "outputs": [],
559 | "source": [
560 | "predictions[0]"
561 | ]
562 | },
563 | {
564 | "cell_type": "markdown",
565 | "metadata": {
566 | "id": "-hw1hgeSCaXN"
567 | },
568 | "source": [
569 | "As you can see, a prediction is an array of 10 numbers. Recall that the output of our model is a probability distribution over the 10 digit classes. Thus, these numbers describe the model's \"confidence\" that the image corresponds to each of the 10 different digits.\n",
570 | "\n",
571 | "Let's look at the digit that has the highest confidence for the first image in the test dataset:"
572 | ]
573 | },
574 | {
575 | "cell_type": "code",
576 | "execution_count": null,
577 | "metadata": {
578 | "id": "qsqenuPnCaXO"
579 | },
580 | "outputs": [],
581 | "source": [
582 | "'''TODO: identify the digit with the highest confidence prediction for the first\n",
583 | " image in the test dataset. '''\n",
584 | "prediction = # TODO\n",
585 | "\n",
586 | "print(prediction)"
587 | ]
588 | },
589 | {
590 | "cell_type": "markdown",
591 | "metadata": {
592 | "id": "E51yS7iCCaXO"
593 | },
594 | "source": [
595 | "So, the model is most confident that this image is a \"???\". We can check the test label (remember, this is the true identity of the digit) to see if this prediction is correct:"
596 | ]
597 | },
598 | {
599 | "cell_type": "code",
600 | "execution_count": null,
601 | "metadata": {
602 | "id": "Sd7Pgsu6CaXP"
603 | },
604 | "outputs": [],
605 | "source": [
606 | "print(\"Label of this digit is:\", test_labels[0])\n",
607 | "plt.imshow(test_images[0,:,:,0], cmap=plt.cm.binary)\n",
608 | "comet_model_2.log_figure(figure=plt)"
609 | ]
610 | },
611 | {
612 | "cell_type": "markdown",
613 | "metadata": {
614 | "id": "ygh2yYC972ne"
615 | },
616 | "source": [
617 | "It is! Let's visualize the classification results on the MNIST dataset. We will plot images from the test dataset along with their predicted label, as well as a histogram that provides the prediction probabilities for each of the digits:"
618 | ]
619 | },
620 | {
621 | "cell_type": "code",
622 | "execution_count": null,
623 | "metadata": {
624 | "id": "HV5jw-5HwSmO"
625 | },
626 | "outputs": [],
627 | "source": [
628 | "#@title Change the slider to look at the model's predictions! { run: \"auto\" }\n",
629 | "\n",
630 | "image_index = 79 #@param {type:\"slider\", min:0, max:100, step:1}\n",
631 | "plt.subplot(1,2,1)\n",
632 | "mdl.lab2.plot_image_prediction(image_index, predictions, test_labels, test_images)\n",
633 | "plt.subplot(1,2,2)\n",
634 | "mdl.lab2.plot_value_prediction(image_index, predictions, test_labels)\n",
635 | "comet_model_2.log_figure(figure=plt)"
636 | ]
637 | },
638 | {
639 | "cell_type": "markdown",
640 | "metadata": {
641 | "id": "kgdvGD52CaXR"
642 | },
643 | "source": [
644 | "We can also plot several images along with their predictions, where correct prediction labels are blue and incorrect prediction labels are grey. The number gives the percent confidence (out of 100) for the predicted label. Note the model can be very confident in an incorrect prediction!"
645 | ]
646 | },
647 | {
648 | "cell_type": "code",
649 | "execution_count": null,
650 | "metadata": {
651 | "id": "hQlnbqaw2Qu_"
652 | },
653 | "outputs": [],
654 | "source": [
655 | "# Plots the first X test images, their predicted label, and the true label\n",
656 | "# Color correct predictions in blue, incorrect predictions in red\n",
657 | "num_rows = 5\n",
658 | "num_cols = 4\n",
659 | "num_images = num_rows*num_cols\n",
660 | "plt.figure(figsize=(2*2*num_cols, 2*num_rows))\n",
661 | "for i in range(num_images):\n",
662 | " plt.subplot(num_rows, 2*num_cols, 2*i+1)\n",
663 | " mdl.lab2.plot_image_prediction(i, predictions, test_labels, test_images)\n",
664 | " plt.subplot(num_rows, 2*num_cols, 2*i+2)\n",
665 | " mdl.lab2.plot_value_prediction(i, predictions, test_labels)\n",
666 | "comet_model_2.log_figure(figure=plt)\n",
667 | "comet_model_2.end()\n"
668 | ]
669 | },
670 | {
671 | "cell_type": "markdown",
672 | "metadata": {
673 | "id": "k-2glsRiMdqa"
674 | },
675 | "source": [
676 | "## 1.4 Training the model 2.0\n",
677 | "\n",
678 | "Earlier in the lab, we used the [`fit`](https://www.tensorflow.org/api_docs/python/tf/keras/models/Sequential#fit) function call to train the model. This function is quite high-level and intuitive, which is really useful for simpler models. As you may be able to tell, this function abstracts away many details in the training call, and we have less control over training model, which could be useful in other contexts.\n",
679 | "\n",
680 | "As an alternative to this, we can use the [`tf.GradientTape`](https://www.tensorflow.org/api_docs/python/tf/GradientTape) class to record differentiation operations during training, and then call the [`tf.GradientTape.gradient`](https://www.tensorflow.org/api_docs/python/tf/GradientTape#gradient) function to actually compute the gradients. You may recall seeing this in Lab 1 Part 1, but let's take another look at this here.\n",
681 | "\n",
682 | "We'll use this framework to train our `cnn_model` using stochastic gradient descent."
683 | ]
684 | },
685 | {
686 | "cell_type": "code",
687 | "execution_count": null,
688 | "metadata": {
689 | "id": "Wq34id-iN1Ml"
690 | },
691 | "outputs": [],
692 | "source": [
693 | "# Rebuild the CNN model\n",
694 | "cnn_model = build_cnn_model()\n",
695 | "\n",
696 | "batch_size = 12\n",
697 | "loss_history = mdl.util.LossHistory(smoothing_factor=0.95) # to record the evolution of the loss\n",
698 | "plotter = mdl.util.PeriodicPlotter(sec=2, xlabel='Iterations', ylabel='Loss', scale='semilogy')\n",
699 | "optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2) # define our optimizer\n",
700 | "\n",
701 | "comet_ml.init(project_name=\"6.s191lab2_part1_CNN2\")\n",
702 | "comet_model_3 = comet_ml.Experiment()\n",
703 | "\n",
704 | "if hasattr(tqdm, '_instances'): tqdm._instances.clear() # clear if it exists\n",
705 | "\n",
706 | "for idx in tqdm(range(0, train_images.shape[0], batch_size)):\n",
707 | " # First grab a batch of training data and convert the input images to tensors\n",
708 | " (images, labels) = (train_images[idx:idx+batch_size], train_labels[idx:idx+batch_size])\n",
709 | " images = tf.convert_to_tensor(images, dtype=tf.float32)\n",
710 | "\n",
711 | " # GradientTape to record differentiation operations\n",
712 | " with tf.GradientTape() as tape:\n",
713 | " #'''TODO: feed the images into the model and obtain the predictions'''\n",
714 | " logits = # TODO\n",
715 | "\n",
716 | " #'''TODO: compute the categorical cross entropy loss\n",
717 | " loss_value = tf.keras.backend.sparse_categorical_crossentropy('''TODO''', '''TODO''') # TODO\n",
718 | " comet_model_3.log_metric(\"loss\", loss_value.numpy().mean(), step=idx)\n",
719 | "\n",
720 | " loss_history.append(loss_value.numpy().mean()) # append the loss to the loss_history record\n",
721 | " plotter.plot(loss_history.get())\n",
722 | "\n",
723 | " # Backpropagation\n",
724 | " '''TODO: Use the tape to compute the gradient against all parameters in the CNN model.\n",
725 | " Use cnn_model.trainable_variables to access these parameters.'''\n",
726 | " grads = # TODO\n",
727 | " optimizer.apply_gradients(zip(grads, cnn_model.trainable_variables))\n",
728 | "\n",
729 | "comet_model_3.log_figure(figure=plt)\n",
730 | "comet_model_3.end()\n"
731 | ]
732 | },
733 | {
734 | "cell_type": "markdown",
735 | "metadata": {
736 | "id": "3cNtDhVaqEdR"
737 | },
738 | "source": [
739 | "## 1.5 Conclusion\n",
740 | "In this part of the lab, you had the chance to play with different MNIST classifiers with different architectures (fully-connected layers only, CNN), and experiment with how different hyperparameters affect accuracy (learning rate, etc.). The next part of the lab explores another application of CNNs, facial detection, and some drawbacks of AI systems in real world applications, like issues of bias."
741 | ]
742 | }
743 | ],
744 | "metadata": {
745 | "accelerator": "GPU",
746 | "colab": {
747 | "collapsed_sections": [
748 | "Xmf_JRJa_N8C"
749 | ],
750 | "name": "TF_Part1_MNIST.ipynb",
751 | "provenance": []
752 | },
753 | "kernelspec": {
754 | "display_name": "Python 3",
755 | "name": "python3"
756 | },
757 | "language_info": {
758 | "codemirror_mode": {
759 | "name": "ipython",
760 | "version": 3
761 | },
762 | "file_extension": ".py",
763 | "mimetype": "text/x-python",
764 | "name": "python",
765 | "nbconvert_exporter": "python",
766 | "pygments_lexer": "ipython3",
767 | "version": "3.9.6"
768 | }
769 | },
770 | "nbformat": 4,
771 | "nbformat_minor": 0
772 | }
773 |
--------------------------------------------------------------------------------
/lab2/img/DB-VAE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab2/img/DB-VAE.png
--------------------------------------------------------------------------------
/lab2/img/SS-VAE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab2/img/SS-VAE.png
--------------------------------------------------------------------------------
/lab2/img/convnet_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab2/img/convnet_fig.png
--------------------------------------------------------------------------------
/lab2/img/mnist_2layers_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab2/img/mnist_2layers_arch.png
--------------------------------------------------------------------------------
/lab2/img/mnist_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab2/img/mnist_model.png
--------------------------------------------------------------------------------
/lab3/README.md:
--------------------------------------------------------------------------------
1 | # MIT 6.S191 Lab 3: Fine-Tune an LLM, You Must!
2 |
3 | 
4 | In this lab, you will fine-tune a multi-billion parameter large language model (LLM). We will go through several fundamental concepts of LLMs, including tokenization, templates, and fine-tuning. This lab provides a complete pipeline for fine-tuning a language model to generate responses in a specific style, and you will explore not only language model fine-tuning, but also ways to evaluate the performance of a language model.
5 |
6 | You will use Google's [Gemma 2B](https://huggingface.co/google/gemma-2b-it) model as the base language model to fine-tune; [Liquid AI's](https://www.liquid.ai/) [LFM-40B](https://www.liquid.ai/liquid-foundation-models) as an evaluation "judge" model; and Comet ML's [Opik](https://www.comet.com/site/products/opik/) as a framework for streamlined LLM evaluation.
--------------------------------------------------------------------------------
/lab3/img/yoda_wallpaper.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/lab3/img/yoda_wallpaper.jpg
--------------------------------------------------------------------------------
/mitdeeplearning/__init__.py:
--------------------------------------------------------------------------------
1 | import mitdeeplearning.util
2 |
3 | import mitdeeplearning.lab1
4 | import mitdeeplearning.lab2
5 | import mitdeeplearning.lab3
6 | import mitdeeplearning.lab3_old
7 |
--------------------------------------------------------------------------------
/mitdeeplearning/bin/abc2wav:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | abcfile=$1
4 | suffix=${abcfile%.abc}
5 | abc2midi $abcfile -o "$suffix.mid"
6 | timidity "$suffix.mid" -Ow "$suffix.wav"
7 | rm "$suffix.abc" "$suffix.mid"
8 |
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/DF/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/DF/10.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/DF/19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/DF/19.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/DF/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/DF/6.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/DF/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/DF/7.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/DF/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/DF/9.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/DM/20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/DM/20.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/DM/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/DM/3.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/DM/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/DM/5.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/DM/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/DM/8.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/DM/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/DM/9.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/LF/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/LF/1.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/LF/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/LF/11.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/LF/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/LF/2.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/LF/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/LF/4.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/LF/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/LF/8.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/LM/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/LM/1.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/LM/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/LM/11.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/LM/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/LM/5.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/LM/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/LM/8.png
--------------------------------------------------------------------------------
/mitdeeplearning/data/faces/LM/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/mitdeeplearning/data/faces/LM/9.png
--------------------------------------------------------------------------------
/mitdeeplearning/lab1.py:
--------------------------------------------------------------------------------
1 | import os
2 | import regex as re
3 | import subprocess
4 | import urllib
5 | import numpy as np
6 | import tensorflow as tf
7 |
8 | from IPython.display import Audio
9 |
10 |
11 | cwd = os.path.dirname(__file__)
12 |
13 |
14 | def load_training_data():
15 | with open(os.path.join(cwd, "data", "irish.abc"), "r") as f:
16 | text = f.read()
17 | songs = extract_song_snippet(text)
18 | return songs
19 |
20 |
21 | def extract_song_snippet(text):
22 | pattern = "(^|\n\n)(.*?)\n\n"
23 | search_results = re.findall(pattern, text, overlapped=True, flags=re.DOTALL)
24 | songs = [song[1] for song in search_results]
25 | print("Found {} songs in text".format(len(songs)))
26 | return songs
27 |
28 |
29 | def save_song_to_abc(song, filename="tmp"):
30 | save_name = "{}.abc".format(filename)
31 | with open(save_name, "w") as f:
32 | f.write(song)
33 | return filename
34 |
35 |
36 | def abc2wav(abc_file):
37 | path_to_tool = os.path.join(cwd, "bin", "abc2wav")
38 | cmd = "{} {}".format(path_to_tool, abc_file)
39 | return os.system(cmd)
40 |
41 |
42 | def play_wav(wav_file):
43 | return Audio(wav_file)
44 |
45 |
46 | def play_song(song):
47 | basename = save_song_to_abc(song)
48 | ret = abc2wav(basename + ".abc")
49 | if ret == 0: # did not suceed
50 | return play_wav(basename + ".wav")
51 | return None
52 |
53 |
54 | def play_generated_song(generated_text):
55 | songs = extract_song_snippet(generated_text)
56 | if len(songs) == 0:
57 | print(
58 | "No valid songs found in generated text. Try training the \
59 | model longer or increasing the amount of generated music to \
60 | ensure complete songs are generated!"
61 | )
62 |
63 | for song in songs:
64 | play_song(song)
65 | print(
66 | "None of the songs were valid, try training longer to improve \
67 | syntax."
68 | )
69 |
70 |
71 | def test_batch_func_types(func, args):
72 | ret = func(*args)
73 | assert len(ret) == 2, "[FAIL] get_batch must return two arguments (input and label)"
74 | assert type(ret[0]) == np.ndarray, "[FAIL] test_batch_func_types: x is not np.array"
75 | assert type(ret[1]) == np.ndarray, "[FAIL] test_batch_func_types: y is not np.array"
76 | print("[PASS] test_batch_func_types")
77 | return True
78 |
79 |
80 | def test_batch_func_shapes(func, args):
81 | dataset, seq_length, batch_size = args
82 | x, y = func(*args)
83 | correct = (batch_size, seq_length)
84 | assert (
85 | x.shape == correct
86 | ), "[FAIL] test_batch_func_shapes: x {} is not correct shape {}".format(
87 | x.shape, correct
88 | )
89 | assert (
90 | y.shape == correct
91 | ), "[FAIL] test_batch_func_shapes: y {} is not correct shape {}".format(
92 | y.shape, correct
93 | )
94 | print("[PASS] test_batch_func_shapes")
95 | return True
96 |
97 |
98 | def test_batch_func_next_step(func, args):
99 | x, y = func(*args)
100 | assert (
101 | x[:, 1:] == y[:, :-1]
102 | ).all(), "[FAIL] test_batch_func_next_step: x_{t} must equal y_{t-1} for all t"
103 | print("[PASS] test_batch_func_next_step")
104 | return True
105 |
106 |
107 | def test_custom_dense_layer_output(y):
108 | # define the ground truth value for the array
109 | true_y = np.array([[0.27064407, 0.1826951, 0.50374055]], dtype="float32")
110 | assert tf.shape(y).numpy().tolist() == list(
111 | true_y.shape
112 | ), "[FAIL] output is of incorrect shape. expected {} but got {}".format(
113 | true_y.shape, y.numpy().shape
114 | )
115 | np.testing.assert_almost_equal(
116 | y.numpy(),
117 | true_y,
118 | decimal=7,
119 | err_msg="[FAIL] output is of incorrect value. expected {} but got {}".format(
120 | true_y, y.numpy()
121 | ),
122 | verbose=True,
123 | )
124 | print("[PASS] test_custom_dense_layer_output")
125 | return True
126 |
--------------------------------------------------------------------------------
/mitdeeplearning/lab2.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import os
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 | import tensorflow as tf
6 | import time
7 | import h5py
8 | import sys
9 | import glob
10 |
11 | IM_SHAPE = (64, 64, 3)
12 |
13 |
14 | def plot_image_prediction(i, predictions_array, true_label, img):
15 | predictions_array, true_label, img = predictions_array[i], true_label[i], img[i]
16 | plt.grid(False)
17 | plt.xticks([])
18 | plt.yticks([])
19 |
20 | plt.imshow(np.squeeze(img), cmap=plt.cm.binary)
21 |
22 | predicted_label = np.argmax(predictions_array)
23 | if predicted_label == true_label:
24 | color = "blue"
25 | else:
26 | color = "red"
27 |
28 | plt.xlabel(
29 | "{} {:2.0f}% ({})".format(
30 | predicted_label, 100 * np.max(predictions_array), true_label
31 | ),
32 | color=color,
33 | )
34 |
35 |
36 | def plot_value_prediction(i, predictions_array, true_label):
37 | predictions_array, true_label = predictions_array[i], true_label[i]
38 | plt.grid(False)
39 | plt.xticks([])
40 | plt.yticks([])
41 | thisplot = plt.bar(range(10), predictions_array, color="#777777")
42 | plt.ylim([0, 1])
43 | predicted_label = np.argmax(predictions_array)
44 |
45 | thisplot[predicted_label].set_color("red")
46 | thisplot[true_label].set_color("blue")
47 |
48 |
49 | class TrainingDatasetLoader(object):
50 | def __init__(self, data_path, channels_last=True):
51 | print("Opening {}".format(data_path))
52 | sys.stdout.flush()
53 |
54 | self.cache = h5py.File(data_path, "r")
55 |
56 | print("Loading data into memory...")
57 | sys.stdout.flush()
58 | self.images = self.cache["images"][:]
59 | self.channels_last = channels_last
60 | self.labels = self.cache["labels"][:].astype(np.float32)
61 | self.image_dims = self.images.shape
62 | n_train_samples = self.image_dims[0]
63 |
64 | self.train_inds = np.random.permutation(np.arange(n_train_samples))
65 |
66 | self.pos_train_inds = self.train_inds[self.labels[self.train_inds, 0] == 1.0]
67 | self.neg_train_inds = self.train_inds[self.labels[self.train_inds, 0] != 1.0]
68 |
69 | def get_train_size(self):
70 | return self.train_inds.shape[0]
71 |
72 | def get_train_steps_per_epoch(self, batch_size, factor=10):
73 | return self.get_train_size() // factor // batch_size
74 |
75 | def get_batch(self, n, only_faces=False, p_pos=None, p_neg=None, return_inds=False):
76 | if only_faces:
77 | selected_inds = np.random.choice(
78 | self.pos_train_inds, size=n, replace=False, p=p_pos
79 | )
80 | else:
81 | selected_pos_inds = np.random.choice(
82 | self.pos_train_inds, size=n // 2, replace=False, p=p_pos
83 | )
84 | selected_neg_inds = np.random.choice(
85 | self.neg_train_inds, size=n // 2, replace=False, p=p_neg
86 | )
87 | selected_inds = np.concatenate((selected_pos_inds, selected_neg_inds))
88 |
89 | sorted_inds = np.sort(selected_inds)
90 | train_img = (self.images[sorted_inds, :, :, ::-1] / 255.0).astype(np.float32)
91 | train_label = self.labels[sorted_inds, ...]
92 |
93 | if not self.channels_last:
94 | train_img = np.ascontiguousarray(
95 | np.transpose(train_img, (0, 3, 1, 2))
96 | ) # [B, H, W, C] -> [B, C, H, W]
97 | return (
98 | (train_img, train_label, sorted_inds)
99 | if return_inds
100 | else (train_img, train_label)
101 | )
102 |
103 | def get_n_most_prob_faces(self, prob, n):
104 | idx = np.argsort(prob)[::-1]
105 | most_prob_inds = self.pos_train_inds[idx[: 10 * n : 10]]
106 | return (self.images[most_prob_inds, ...] / 255.0).astype(np.float32)
107 |
108 | def get_all_train_faces(self):
109 | return self.images[self.pos_train_inds]
110 |
111 |
112 | def get_test_faces(channels_last=True):
113 | cwd = os.path.dirname(__file__)
114 | images = {"LF": [], "LM": [], "DF": [], "DM": []}
115 | for key in images.keys():
116 | files = glob.glob(os.path.join(cwd, "data", "faces", key, "*.png"))
117 | for file in sorted(files):
118 | image = cv2.resize(cv2.imread(file), (64, 64))[:, :, ::-1] / 255.0
119 | if not channels_last:
120 | image = np.transpose(image, (2, 0, 1))
121 | images[key].append(image)
122 |
123 | return images["LF"], images["LM"], images["DF"], images["DM"]
124 |
--------------------------------------------------------------------------------
/mitdeeplearning/lab3.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from openai import OpenAI
4 | from datasets import load_dataset
5 | from torch.utils.data import DataLoader
6 |
7 |
8 | cwd = os.path.dirname(__file__)
9 |
10 | def create_dataloader(style):
11 | ds = load_dataset("databricks/databricks-dolly-15k", split="train")
12 | with open(os.path.join(cwd, f"data/text_styles/{style}.txt"), "r") as f:
13 | new_responses = [line.strip().replace("\\n", "\n") for line in f]
14 |
15 | # Update the entire dataset at once with the new responses
16 | ds_ = ds.select(range(len(new_responses)))
17 | ds_ = ds_.map(
18 | lambda x, idx: {"response_style": new_responses[idx]},
19 | with_indices=True,
20 | num_proc=1
21 | )
22 |
23 | n = len(new_responses)
24 | ds_test = ds.select(range(n, n+n))
25 |
26 | # Create a dataloader
27 | dataloader = DataLoader(ds_, batch_size=1, shuffle=True)
28 | dataloader_test = DataLoader(ds_test, batch_size=1, shuffle=True)
29 | return dataloader, dataloader_test
30 |
31 |
32 |
33 | class LLMClient:
34 | def __init__(self, model: str, api_key: str, api_base: str = "https://openrouter.ai/api/v1"):
35 | self.llm_client = OpenAI(api_key=api_key, base_url=api_base)
36 | self.model = model
37 |
38 | def ask(self, user: str, system: str = None, **kwargs):
39 | messages = [{"role": "user", "content": user}]
40 | if system:
41 | messages.insert(0, {"role": "system", "content": system})
42 | res = self.llm_client.chat.completions.create(
43 | model=self.model,
44 | messages=messages,
45 | **kwargs
46 | )
47 | return res
48 |
49 |
50 | yoda_test_text = (
51 | "Wisdom, sought by many, found by few, it is. Haste not, patience have. "
52 | "For in stillness, answers come. Much to learn, still you have. "
53 | "Fear leads to anger; anger, to hate. Down the dark path, guide you it will. "
54 | "Trust the Force, you must. Powerful ally it is. Life it creates, surrounds, binds. "
55 | "Adventure, excitement, a Jedi craves not these things. Discipline, balance, seek you should. "
56 | "Hmm, clearer now is the path, yes? Help you more, I can, if needed it is. "
57 | "Endless, the journey of learning is. Stay true to your path, and clarity you will find. "
58 | "Remember, the Force flows through all, but your heart determines how it shapes your destiny. "
59 | "Much more to teach, I have. Ready, are you? Mmm."
60 | )
61 |
62 |
63 |
64 | # class Llama(LLMClient):
65 | # def __init__(self, api_key: str):
66 | # """
67 | # Initialize the LlamaFree model client.
68 |
69 | # LlamaFree is available from LlamaFree.
70 | # Provide your LlamaFree API key (`api_key`) to access.
71 | # """
72 | # # super().__init__(model="meta-llama/llama-3.2-3b-instruct", api_key=api_key)
73 | # super().__init__(model="meta-llama/llama-3.1-8b-instruct", api_key=api_key)
74 |
75 |
76 | # class LFM40B(LLMClient):
77 | # def __init__(self, api_key: str):
78 | # """
79 | # Initialize the LFM-40B model client.
80 |
81 | # LFM-40B is available from Lambda Labs.
82 | # Provide your Lambda Labs API key (`api_key`) to access.
83 | # """
84 | # api_base = "https://api.lambdalabs.com/v1"
85 | # super().__init__(model="lfm-40b", api_base=api_base, api_key=api_key)
86 |
--------------------------------------------------------------------------------
/mitdeeplearning/lab3_old.py:
--------------------------------------------------------------------------------
1 | import io
2 | import base64
3 | from IPython.display import HTML
4 | import gym
5 | import numpy as np
6 | import cv2
7 |
8 |
9 | def play_video(filename, width=None):
10 | encoded = base64.b64encode(io.open(filename, "r+b").read())
11 | video_width = 'width="' + str(width) + '"' if width is not None else ""
12 | embedded = HTML(
13 | data="""
14 | """.format(video_width, encoded.decode("ascii"))
17 | )
18 |
19 | return embedded
20 |
21 |
22 | def preprocess_pong(image):
23 | I = image[35:195] # Crop
24 | I = I[::2, ::2, 0] # Downsample width and height by a factor of 2
25 | I[I == 144] = 0 # Remove background type 1
26 | I[I == 109] = 0 # Remove background type 2
27 | I[I != 0] = 1 # Set remaining elements (paddles, ball, etc.) to 1
28 | I = cv2.dilate(I, np.ones((3, 3), np.uint8), iterations=1)
29 | I = I[::2, ::2, np.newaxis]
30 | return I.astype(np.float)
31 |
32 |
33 | def pong_change(prev, curr):
34 | prev = preprocess_pong(prev)
35 | curr = preprocess_pong(curr)
36 | I = prev - curr
37 | # I = (I - I.min()) / (I.max() - I.min() + 1e-10)
38 | return I
39 |
40 |
41 | class Memory:
42 | def __init__(self):
43 | self.clear()
44 |
45 | # Resets/restarts the memory buffer
46 | def clear(self):
47 | self.observations = []
48 | self.actions = []
49 | self.rewards = []
50 |
51 | # Add observations, actions, rewards to memory
52 | def add_to_memory(self, new_observation, new_action, new_reward):
53 | self.observations.append(new_observation)
54 | self.actions.append(new_action)
55 | self.rewards.append(new_reward)
56 |
57 |
58 | def aggregate_memories(memories):
59 | batch_memory = Memory()
60 |
61 | for memory in memories:
62 | for step in zip(memory.observations, memory.actions, memory.rewards):
63 | batch_memory.add_to_memory(*step)
64 |
65 | return batch_memory
66 |
67 |
68 | def parallelized_collect_rollout(batch_size, envs, model, choose_action):
69 | assert (
70 | len(envs) == batch_size
71 | ), "Number of parallel environments must be equal to the batch size."
72 |
73 | memories = [Memory() for _ in range(batch_size)]
74 | next_observations = [single_env.reset() for single_env in envs]
75 | previous_frames = [obs for obs in next_observations]
76 | done = [False] * batch_size
77 | rewards = [0] * batch_size
78 |
79 | while True:
80 | current_frames = [obs for obs in next_observations]
81 | diff_frames = [
82 | pong_change(prev, curr)
83 | for (prev, curr) in zip(previous_frames, current_frames)
84 | ]
85 |
86 | diff_frames_not_done = [
87 | diff_frames[b] for b in range(batch_size) if not done[b]
88 | ]
89 | actions_not_done = choose_action(
90 | model, np.array(diff_frames_not_done), single=False
91 | )
92 |
93 | actions = [None] * batch_size
94 | ind_not_done = 0
95 | for b in range(batch_size):
96 | if not done[b]:
97 | actions[b] = actions_not_done[ind_not_done]
98 | ind_not_done += 1
99 |
100 | for b in range(batch_size):
101 | if done[b]:
102 | continue
103 | next_observations[b], rewards[b], done[b], info = envs[b].step(actions[b])
104 | previous_frames[b] = current_frames[b]
105 | memories[b].add_to_memory(diff_frames[b], actions[b], rewards[b])
106 |
107 | if all(done):
108 | break
109 |
110 | return memories
111 |
112 |
113 | def save_video_of_model(model, env_name, suffix=""):
114 | import skvideo.io
115 | from pyvirtualdisplay import Display
116 |
117 | display = Display(visible=0, size=(400, 300))
118 | display.start()
119 |
120 | env = gym.make(env_name)
121 | obs = env.reset()
122 | prev_obs = obs
123 |
124 | filename = env_name + suffix + ".mp4"
125 | output_video = skvideo.io.FFmpegWriter(filename)
126 |
127 | counter = 0
128 | done = False
129 | while not done:
130 | frame = env.render(mode="rgb_array")
131 | output_video.writeFrame(frame)
132 |
133 | if "CartPole" in env_name:
134 | input_obs = obs
135 | elif "Pong" in env_name:
136 | input_obs = pong_change(prev_obs, obs)
137 | else:
138 | raise ValueError(f"Unknown env for saving: {env_name}")
139 |
140 | action = model(np.expand_dims(input_obs, 0)).numpy().argmax()
141 |
142 | prev_obs = obs
143 | obs, reward, done, info = env.step(action)
144 | counter += 1
145 |
146 | output_video.close()
147 | print("Successfully saved {} frames into {}!".format(counter, filename))
148 | return filename
149 |
150 |
151 | def save_video_of_memory(memory, filename, size=(512, 512)):
152 | import skvideo.io
153 |
154 | output_video = skvideo.io.FFmpegWriter(filename)
155 |
156 | for observation in memory.observations:
157 | output_video.writeFrame(cv2.resize(255 * observation, size))
158 |
159 | output_video.close()
160 | return filename
161 |
--------------------------------------------------------------------------------
/mitdeeplearning/util.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import time
3 | import numpy as np
4 |
5 | from IPython import display as ipythondisplay
6 | from string import Formatter
7 |
8 |
9 | def display_model(model):
10 | import tensorflow as tf
11 | tf.keras.utils.plot_model(model, to_file="tmp.png", show_shapes=True)
12 | return ipythondisplay.Image("tmp.png")
13 |
14 |
15 | def plot_sample(x, y, vae, backend='tf'):
16 | """Plot original and reconstructed images side by side.
17 |
18 | Args:
19 | x: Input images array of shape [B, H, W, C] (TF) or [B, C, H, W] (PT)
20 | y: Labels array of shape [B] where 1 indicates a face
21 | vae: VAE model (TensorFlow or PyTorch)
22 | framework: 'tf' or 'pt' indicating which framework to use
23 | """
24 | plt.figure(figsize=(2, 1))
25 |
26 | if backend == 'tf':
27 | idx = np.where(y == 1)[0][0]
28 | _, _, _, recon = vae(x)
29 | recon = np.clip(recon, 0, 1)
30 |
31 | elif backend == 'pt':
32 | import torch
33 | y = y.detach().cpu().numpy()
34 | face_indices = np.where(y == 1)[0]
35 | idx = face_indices[0] if len(face_indices) > 0 else 0
36 |
37 | with torch.inference_mode():
38 | _, _, _, recon = vae(x)
39 | recon = torch.clamp(recon, 0, 1)
40 | recon = recon.permute(0, 2, 3, 1).detach().cpu().numpy()
41 | x = x.permute(0, 2, 3, 1).detach().cpu().numpy()
42 |
43 | else:
44 | raise ValueError("framework must be 'tf' or 'pt'")
45 |
46 | plt.subplot(1, 2, 1)
47 | plt.imshow(x[idx])
48 | plt.grid(False)
49 |
50 | plt.subplot(1, 2, 2)
51 | plt.imshow(recon[idx])
52 | plt.grid(False)
53 |
54 | if backend == 'pt':
55 | plt.show()
56 |
57 |
58 | class LossHistory:
59 | def __init__(self, smoothing_factor=0.0):
60 | self.alpha = smoothing_factor
61 | self.loss = []
62 |
63 | def append(self, value):
64 | self.loss.append(
65 | self.alpha * self.loss[-1] + (1 - self.alpha) * value
66 | if len(self.loss) > 0
67 | else value
68 | )
69 |
70 | def get(self):
71 | return self.loss
72 |
73 |
74 | class PeriodicPlotter:
75 | def __init__(self, sec, xlabel="", ylabel="", scale=None):
76 | self.xlabel = xlabel
77 | self.ylabel = ylabel
78 | self.sec = sec
79 | self.scale = scale
80 |
81 | self.tic = time.time()
82 |
83 | def plot(self, data):
84 | if time.time() - self.tic > self.sec:
85 | plt.cla()
86 |
87 | if self.scale is None:
88 | plt.plot(data)
89 | elif self.scale == "semilogx":
90 | plt.semilogx(data)
91 | elif self.scale == "semilogy":
92 | plt.semilogy(data)
93 | elif self.scale == "loglog":
94 | plt.loglog(data)
95 | else:
96 | raise ValueError("unrecognized parameter scale {}".format(self.scale))
97 |
98 | plt.xlabel(self.xlabel)
99 | plt.ylabel(self.ylabel)
100 | ipythondisplay.clear_output(wait=True)
101 | ipythondisplay.display(plt.gcf())
102 |
103 | self.tic = time.time()
104 |
105 |
106 | def create_grid_of_images(xs, size=(5, 5)):
107 | """Combine a list of images into a single image grid by stacking them into an array of shape `size`"""
108 |
109 | grid = []
110 | counter = 0
111 | for i in range(size[0]):
112 | row = []
113 | for j in range(size[1]):
114 | row.append(xs[counter])
115 | counter += 1
116 | row = np.hstack(row)
117 | grid.append(row)
118 | grid = np.vstack(grid)
119 | return grid
120 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | # Inside of setup.cfg
2 | [metadata]
3 | description_file = README.md
4 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from pkg_resources import DistributionNotFound, get_distribution
2 | from distutils.core import setup
3 |
4 |
5 | def get_dist(pkgname):
6 | try:
7 | return get_distribution(pkgname)
8 | except DistributionNotFound:
9 | return None
10 |
11 | install_deps = [
12 | 'comet_ml',
13 | 'numpy',
14 | 'regex',
15 | 'tqdm',
16 | 'gym',
17 | 'opik',
18 | 'openai',
19 | 'transformers',
20 | 'datasets',
21 | 'peft',
22 | 'lion-pytorch',
23 | ]
24 | tf_ver = '2.0.0a'
25 | if get_dist('tensorflow>='+tf_ver) is None and get_dist('tensorflow_gpu>='+tf_ver) is None:
26 | install_deps.append('tensorflow>='+tf_ver)
27 |
28 | setup(
29 | name = 'mitdeeplearning', # How you named your package folder (MyLib)
30 | packages = ['mitdeeplearning'], # Chose the same as "name"
31 | version = '0.7.5', # Start with a small number and increase it with every change you make
32 | license='MIT', # Chose a license from here: https://help.github.com/articles/licensing-a-repository
33 | description = 'Official software labs for MIT Introduction to Deep Learning (http://introtodeeplearning.com)', # Give a short description about your library
34 | author = 'Alexander Amini', # Type in your name
35 | author_email = 'introtodeeplearning-staff@mit.edu', # Type in your E-Mail
36 | url = 'http://introtodeeplearning.com', # Provide either the link to your github or to your website
37 | download_url = 'https://github.com/MITDeepLearning/introtodeeplearning/archive/v0.7.5.tar.gz', # I explain this later on
38 | keywords = ['deep learning', 'neural networks', 'tensorflow', 'introduction'], # Keywords that define your package best
39 | install_requires=install_deps,
40 | classifiers=[
41 | 'Development Status :: 3 - Alpha', # Chose either "3 - Alpha", "4 - Beta" or "5 - Production/Stable" as the current state of your package
42 | 'License :: OSI Approved :: MIT License', # Again, pick a license
43 | 'Programming Language :: Python :: 3', #Specify which pyhton versions that you want to support',
44 | 'Programming Language :: Python :: 3.6',
45 | ],
46 | package_data={
47 | 'mitdeeplearning': ['bin/*', 'data/*', 'data/text_styles/*', 'data/faces/DF/*', 'data/faces/DM/*', 'data/faces/LF/*', 'data/faces/LM/*'],
48 | },
49 |
50 | )
51 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import mitdeeplearning as mdl
2 |
3 | songs = mdl.lab1.load_training_data()
4 |
5 | basename = mdl.lab1.save_song_to_abc(songs[0])
6 | ret = mdl.lab1.abc2wav(basename+'.abc')
7 |
8 | import pdb; pdb.set_trace()
9 |
--------------------------------------------------------------------------------
/xtra_labs/llm_finetune/NOT_FINAL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/xtra_labs/llm_finetune/NOT_FINAL
--------------------------------------------------------------------------------
/xtra_labs/llm_finetune/draft.py:
--------------------------------------------------------------------------------
1 | """
2 | Drafting lab flow in script format using PyTorch
3 | """
4 | from datasets import load_dataset
5 | import math
6 | import numpy as np
7 | import pandas as pd
8 | import random
9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | from torch.nn import CrossEntropyLoss
13 | from torch.optim import Adam
14 | import transformers
15 | from trl import SFTTrainer
16 | from tqdm import tqdm
17 |
18 | from utils import run_benchmark, make_spider_plot
19 |
20 | # Part 1
21 |
22 | # TEXT: overview of LLM lab
23 | # Load pretrained LLM (medium size model)
24 |
25 | # model_name = "facebook/opt-1.3b"
26 | model_name = "facebook/opt-125m"
27 | model = transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
28 | tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
29 |
30 | # TEXT: explain tokenizer
31 | # Include cell for tokenizer inspection
32 |
33 | # TEXT: explain how LLMs are trained for next token prediction
34 | # Write a function to predict next token
35 | def predict_next_token(probs, tokenizer):
36 | new_token = np.random.choice(len(probs), p=probs.numpy())
37 | print(tokenizer.decode(new_token), end='', flush=True)
38 | return new_token
39 |
40 | # TEXT: explain that next token prediction must be called multiple times for inference
41 | # Call in loop for autoregressive inference
42 | def generate(start_text, model, tokenizer, num_steps=20, temp=1.):
43 | print(start_text, end="")
44 | x = tokenizer.encode(start_text)
45 | num_start = len(x)
46 |
47 | for i in range(num_steps):
48 | input_tensor = torch.tensor(x).view(1, -1).to("cuda")
49 | logits = model(input_tensor).logits
50 | probs = F.softmax(logits/temp, dim=-1)[0, -1, :].cpu().detach()
51 |
52 | new_token = predict_next_token(probs, tokenizer)
53 | x.append(new_token)
54 |
55 | output = tokenizer.decode(x[num_start:])
56 | return output
57 |
58 | def generate_pt(model, tokenizer, text, num_steps=50, until=None, temp=1.):
59 | device = model.device
60 | print(text, end='', flush=True)
61 | x = tokenizer.encode(text)
62 | enc_until = tokenizer.encode(until)[1:]
63 | num_start = len(x)
64 |
65 | decoded = tokenizer.decode(x)
66 |
67 | for step in range(num_steps):
68 | with torch.no_grad():
69 | input_tensor = torch.reshape(torch.LongTensor(x), [1, -1]).to(device)
70 | logits = model(input_tensor).logits
71 | probs = F.softmax(logits/temp, dim=-1)[0, -1, :]
72 | probs = probs.detach().cpu().numpy()
73 |
74 | new_token = np.random.choice(len(probs), p=probs)
75 | x.append(new_token)
76 |
77 | new_decoded = tokenizer.decode(x)
78 | new_part = new_decoded[len(decoded):]
79 | decoded = new_decoded
80 |
81 | print(new_part, end='', flush=True)
82 | text += new_part
83 |
84 | if len(x) >= len(until) and text[-len(until):] == until:
85 | break
86 |
87 |
88 | output = tokenizer.decode(x[num_start:])
89 | print("\n", flush=True)
90 | return output
91 |
92 | # Test autoregressive generation
93 | # while True:
94 | # print("\n\n\n\n\n")
95 | # input_text = input("Prompt: ")
96 | # output = generate(input_text, model, tokenizer)
97 |
98 | # TEXT: some background on LLM benchmarking
99 | # Load benchmark dataset and evaluate model
100 | benchmark_dataset = pd.read_csv("benchmark.csv")
101 | # category_accs_1300m, avg_acc_1300m = run_benchmark(model, tokenizer, benchmark_dataset)
102 |
103 | # TEXT: ask them to make a prediction on how accuracy will be affected by different model sizes
104 |
105 | # Benchmark smaller model
106 | # model_name_350m = "facebook/opt-350m"
107 | # model_350m = transformers.AutoModelForCausalLM.from_pretrained(model_name_350m, device_map="auto")
108 | # tokenizer_350m = transformers.AutoTokenizer.from_pretrained(model_name_350m)
109 |
110 | # category_accs_350m, avg_acc_350m = run_benchmark(model_350m, tokenizer_350m, benchmark_dataset)
111 |
112 | # Benchmark larger model
113 | # model_name_2700m = "facebook/opt-2.7b"
114 | # model_2700m = transformers.AutoModelForCausalLM.from_pretrained(model_name_2700m, device_map="auto")
115 | # tokenizer_2700m = transformers.AutoTokenizer.from_pretrained(model_name_2700m)
116 |
117 | # category_accs_2700m, avg_acc_2700m = run_benchmark(model_2700m, tokenizer_2700m, benchmark_dataset)
118 |
119 | # Spider plot
120 |
121 | # benchmark_data = {"350M-Model": category_accs_350m, "1300M-Model": category_accs_1300m, "2700M-Model": category_accs_2700m}
122 | # benchmark_data = {"350M-Model": category_accs_1300m}
123 | # make_spider_plot(benchmark_data)
124 |
125 | def print_lora_params(module, layer_type):
126 | summ = 0
127 | for name, child in module.named_children():
128 | if isinstance(child, layer_type):
129 | num_params = sum(p.numel() for p in child.parameters() if p.requires_grad)
130 |
131 | print(name, num_params, child.in_features, child.out_features, (child.in_features * 8 + child.out_features * 8 == num_params))
132 |
133 | summ += num_params
134 | else:
135 | summ += print_lora_params(child, layer_type)
136 |
137 | return summ
138 |
139 | # Part 2
140 |
141 | # inspect current model
142 | # print(model)
143 |
144 | # summ = print_lora_params(model, nn.Linear)
145 |
146 | # print("with function", summ)
147 |
148 | # print("without function", sum(p.numel() for p in model.parameters() if p.requires_grad))
149 |
150 | # # freeze all parameter gradients
151 | for param in model.parameters():
152 | param.requires_grad = False
153 |
154 | # new LoRA linear layer class
155 | class LoRALinear(nn.Module):
156 | def __init__(
157 | self,
158 | in_features: int,
159 | out_features: int,
160 | pretrained_weight: torch.Tensor,
161 | pretrained_bias: torch.Tensor,
162 | r: int = 8,
163 | lora_alpha: int = 8,
164 | lora_dropout: float = 0.1,
165 | **kwargs
166 | ):
167 | super(LoRALinear, self).__init__()
168 |
169 | self.r = r
170 | self.in_features = in_features
171 | self.out_features = out_features
172 | self.lora_alpha = lora_alpha
173 |
174 | self.weight = nn.Parameter(pretrained_weight)
175 | self.weight.requires_grad = False
176 |
177 | if pretrained_bias is not None:
178 | self.bias = nn.Parameter(pretrained_bias)
179 | self.bias.requires_grad = False
180 | else:
181 | self.bias = None
182 |
183 | # from https://github.com/microsoft/LoRA/blob/main/loralib/layers.py
184 | self.lora_A = nn.Parameter(self.weight.new_zeros((r, in_features)))
185 | self.lora_B = nn.Parameter(self.weight.new_zeros((out_features, r)))
186 | self.scaling = self.lora_alpha / self.r
187 | self.lora_dropout = nn.Dropout(p=lora_dropout)
188 |
189 | def forward(self, x: torch.Tensor):
190 | result = F.linear(x, self.weight, bias=self.bias)
191 | result += (self.lora_dropout(x) @ self.lora_A.transpose(0, 1) @ self.lora_B.transpose(0, 1)) * self.scaling
192 | return result
193 |
194 | # replace linear layers in model recursively
195 | def replace_linear_with_lora(module):
196 | for name, child in module.named_children():
197 | if isinstance(child, nn.Linear):
198 | setattr(module, name, LoRALinear(child.in_features, child.out_features, child.weight, child.bias))
199 | else:
200 | replace_linear_with_lora(child)
201 |
202 | replace_linear_with_lora(model)
203 |
204 |
205 |
206 | # summ = print_lora_params(model, LoRALinear)
207 |
208 | # print("with function", summ)
209 |
210 | # print("without function", sum(p.numel() for p in model.parameters() if p.requires_grad))
211 |
212 |
213 | # inspect new model
214 | # print(model)
215 |
216 | # load chat dataset
217 | dataset_name = "timdettmers/openassistant-guanaco"
218 | ft_dataset = load_dataset(dataset_name, split="train")
219 |
220 | # train model (barebones loop)
221 | context_length = 768
222 | loss_fn = CrossEntropyLoss()
223 |
224 | learning_rate = 1e-4
225 | optimizer = Adam(model.parameters(), lr=learning_rate)
226 | num_epochs = 5
227 |
228 | model = model.to("cuda")
229 |
230 | ### Train the model
231 | # Define some training args
232 | args = transformers.TrainingArguments("/home/dnori/introtodeeplearning/xtra_labs/llm_finetune/outputs",
233 | per_device_train_batch_size=1,
234 | logging_first_step=True,
235 | logging_steps=20,
236 | save_steps=100,
237 | )
238 |
239 | # Define a callback to check the progress on a sample question
240 | class PrinterCallback(transformers.TrainerCallback):
241 | def on_log(self, args, state, control, model, logs=None, **kwargs):
242 | start_text = "### Human: When the weather is sunny, what color is the sky?### Assistant:"
243 | generate_pt(model, tokenizer, start_text, num_steps=200, until="###")
244 |
245 | # Actually train the model
246 | trainer = SFTTrainer(
247 | model,
248 | args=args,
249 | train_dataset=ft_dataset,
250 | dataset_text_field="text",
251 | max_seq_length=context_length,
252 | callbacks=[PrinterCallback()]
253 | )
254 | trainer.train()
255 |
256 |
257 | # for epoch in range(num_epochs):
258 | # total_loss = 0
259 | # num_batches = 0
260 |
261 | # for batch in tqdm(ft_dataset):
262 | # prompt = batch["text"]
263 |
264 | # # encode with tokenizer
265 | # x = tokenizer.encode(prompt)
266 | # x_tensor = torch.tensor(x).view(1, -1).to("cuda")
267 | # max_len = min(context_length, x_tensor.shape[1]-1)
268 | # selected_len = random.randint(1,max_len)
269 |
270 | # input_tensor = x_tensor[:,:selected_len]
271 | # target_tensor = x_tensor[0,1:selected_len+1]
272 |
273 | # # zero gradients
274 | # optimizer.zero_grad()
275 |
276 | # # run through model
277 | # logits = model(input_tensor).logits[0]
278 |
279 | # # apply loss
280 | # loss = loss_fn(logits, target_tensor)
281 |
282 | # # backpropagation
283 | # loss.backward()
284 |
285 | # # optimizer step
286 | # optimizer.step()
287 |
288 | # total_loss += loss.item()
289 | # num_batches += 1
290 |
291 | # # Print average loss for the epoch
292 | # average_loss = total_loss / num_batches
293 | # print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {average_loss}")
294 |
295 | # # evaluate finetuned model on benchmark
296 | # category_accs_1300m_ft, avg_acc_1300m_ft = run_benchmark(model, tokenizer, benchmark_dataset)
297 |
298 | # add to spider plot
299 | # benchmark_data = {"350M-Model": category_accs_350m, "1300M-Model": category_accs_1300m, "1300M-Model-Finetuned": category_accs_1300m_ft, "2700M-Model": category_accs_2700m}
300 | # benchmark_data = {"350M-Model": category_accs_1300m, "350M-Model-Finetuned": category_accs_1300m_ft}
301 | # make_spider_plot(benchmark_data)
--------------------------------------------------------------------------------
/xtra_labs/llm_finetune/spider.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/xtra_labs/llm_finetune/spider.png
--------------------------------------------------------------------------------
/xtra_labs/llm_finetune/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains functions that the students will not interface with
3 | """
4 | import matplotlib.pyplot as plt
5 | import numpy as np
6 | import pandas as pd
7 | import tensorflow as tf
8 | import torch
9 | import torch.nn.functional as F
10 | from tqdm import tqdm
11 |
12 | def run_benchmark(model, tokenizer, dataset, few_shot=7, num_steps=500, verbose=False):
13 | device = model.device
14 | dataset["Correct"] = 0.0
15 |
16 | # Loop through every question in the benchmark
17 | for step, row in tqdm(dataset.iterrows(), total=len(dataset)):
18 | question = row['Question']
19 | pre_text = f"### Human: {question}### Assistant:"
20 | len_prefix = len(tokenizer.encode(pre_text))
21 |
22 | # Run the model individually with each of the four responses.
23 | # Measure the model's logprob for outputing each of the four responses.
24 | # Choose the answer with the highest logprob
25 | logprobs = []
26 | answers = []
27 | for choice in ["A", "B", "C", "D"]:
28 | answer = row[f'Answer {choice}']
29 | text = f"{pre_text} {answer}"
30 |
31 | # Run the model
32 | with torch.no_grad():
33 | x = tokenizer.encode(text, return_tensors="pt").to(device)
34 | logits = model(x).logits
35 | probs = F.softmax(logits, dim=-1)[0, :-1, :] # shape: [seq_len-1, vocab_size]
36 | y = x[0, 1:] # shape: [seq_len-1]
37 |
38 | # Compute the log probability for this answer to appear (average logprob over the answer tokens)
39 | next_token_prob = np.array([probs[i, y[i]].item() for i in range(y.shape[0])])
40 | num_ans_tokens = x.shape[1] - len_prefix
41 | logprob = np.mean(np.log(next_token_prob[-num_ans_tokens:]))
42 | logprobs.append(logprob)
43 | answers.append(answer)
44 |
45 | # Check for the correct answer (always the zero-th index, by definition)
46 | correct = np.argmax(logprobs) == 0
47 |
48 | # Record if the model got the answer correct or not.
49 | # Optionally print the question -> prediction if verbose
50 | dataset.at[step, "Correct"] = float(correct)
51 | if verbose:
52 | print(f"[{correct}] {question} -> {answers[np.argmax(logprobs)]}")
53 |
54 |
55 | # Group by the the categories and compute the average accuracy
56 | accs = dataset.groupby("Category")["Correct"].mean()
57 | sorted_accs = accs.sort_values()
58 | print(sorted_accs)
59 |
60 | return accs, dataset["Correct"].mean()
61 |
62 | def make_spider_plot(data):
63 | """
64 | Data is a dictionary where keys are different entities
65 | Values are pd Series where series indices are plot labels and series values show performance
66 | """
67 | colors = ['#1aaf6c', '#429bf4', '#d42cea']
68 | i = 0
69 | fig, ax = plt.subplots(figsize=(8,6), subplot_kw=dict(polar=True))
70 | for k,v in data.items():
71 | labels = v.index.tolist()
72 | values = v.values.tolist()
73 |
74 | num_vars = len(labels)
75 | angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
76 | angles += angles[:1]
77 | values += values[:1]
78 |
79 | ax.plot(angles, values, color=colors[i], linewidth=1, label=k)
80 | ax.fill(angles, values, color=colors[i], alpha=0.25)
81 |
82 | i+=1
83 |
84 | ax.set_theta_offset(np.pi / 2)
85 | ax.set_theta_direction(-1)
86 | ax.set_thetagrids(np.degrees(angles[:-1]), labels)
87 | for label, angle in zip(ax.get_xticklabels(), angles):
88 | if angle in (0, np.pi):
89 | label.set_horizontalalignment('center')
90 | elif 0 < angle < np.pi:
91 | label.set_horizontalalignment('left')
92 | else:
93 | label.set_horizontalalignment('right')
94 |
95 | ax.set_ylim(0, 1)
96 | ax.set_rlabel_position(180 / num_vars)
97 |
98 | ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
99 |
100 | plt.savefig("spider.png")
101 |
102 |
103 |
--------------------------------------------------------------------------------
/xtra_labs/rl_pong/img/COMING SOON:
--------------------------------------------------------------------------------
1 | COMING SOON
2 |
--------------------------------------------------------------------------------
/xtra_labs/rl_pong/img/vista_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/xtra_labs/rl_pong/img/vista_overview.png
--------------------------------------------------------------------------------
/xtra_labs/rl_selfdriving/img/COMING SOON:
--------------------------------------------------------------------------------
1 | COMING SOON
2 |
--------------------------------------------------------------------------------
/xtra_labs/rl_selfdriving/img/vista_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2e90e0461ffa952f5855d4a89a32742796794527/xtra_labs/rl_selfdriving/img/vista_overview.png
--------------------------------------------------------------------------------
/xtra_labs/uncertainty/Part1_IntroductionCapsa.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {
7 | "id": "SWa-rLfIlTaf"
8 | },
9 | "source": [
10 | "\n",
19 | "\n",
20 | "# Copyright Information"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {
27 | "id": "-LohleBMlahL"
28 | },
29 | "outputs": [],
30 | "source": [
31 | "# Copyright 2023 MIT Introduction to Deep Learning. All Rights Reserved.\n",
32 | "# \n",
33 | "# Licensed under the MIT License. You may not use this file except in compliance\n",
34 | "# with the License. Use and/or modification of this code outside of MIT Introduction\n",
35 | "# to Deep Learning must reference:\n",
36 | "#\n",
37 | "# © MIT Introduction to Deep Learning\n",
38 | "# http://introtodeeplearning.com\n",
39 | "#"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {
45 | "id": "ckzz5Hus-hJB"
46 | },
47 | "source": [
48 | "# Laboratory 3: Debiasing, Uncertainty, and Robustness\n",
49 | "\n",
50 | "# Part 1: Introduction to Capsa\n",
51 | "\n",
52 | "In this lab, we'll explore different ways to make deep learning models more **robust** and **trustworthy**.\n",
53 | "\n",
54 | "To achieve this it is critical to be able to identify and diagnose issues of bias and uncertainty in deep learning models, as we explored in the Facial Detection Lab 2. We need benchmarks that uniformly measure how uncertain a given model is, and we need principled ways of measuring bias and uncertainty. To that end, in this lab, we'll utilize [Capsa](https://github.com/themis-ai/capsa), a risk-estimation wrapping library developed by [Themis AI](https://themisai.io/). Capsa supports the estimation of three different types of ***risk***, defined as measures of how robust and trustworthy our model is. These are:\n",
55 | "1. **Representation bias**: reflects how likely combinations of features are to appear in a given dataset. Often, certain combinations of features are severely under-represented in datasets, which means models learn them less well and can thus lead to unwanted bias.\n",
56 | "2. **Data uncertainty**: reflects noise in the data, for example when sensors have noisy measurements, classes in datasets have low separations, and generally when very similar inputs lead to drastically different outputs. Also known as *aleatoric* uncertainty. \n",
57 | "3. **Model uncertainty**: captures the areas of our underlying data distribution that the model has not yet learned or has difficulty learning. Areas of high model uncertainty can be due to out-of-distribution (OOD) samples or data that is harder to learn. Also known as *epistemic* uncertainty."
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {
63 | "id": "o02MyoDrnNqP"
64 | },
65 | "source": [
66 | "## CAPSA overview\n",
67 | "\n",
68 | "This lab introduces Capsa and its functionalities, to next build automated tools that use Capsa to mitigate the underlying issues of bias and uncertainty.\n",
69 | "\n",
70 | "The core idea behind [Capsa](https://themisai.io/capsa/) is that any deep learning model of interest can be ***wrapped*** -- just like wrapping a gift -- to be made ***aware of its own risks***. Risk is captured in representation bias, data uncertainty, and model uncertainty.\n",
71 | "\n",
72 | "\n",
73 | "\n",
74 | "This means that Capsa takes the user's original model as input, and modifies it minimally to create a risk-aware variant while preserving the model's underlying structure and training pipeline. Capsa is a one-line addition to any training workflow in TensorFlow. In this part of the lab, we'll apply Capsa's risk estimation methods to a simple regression problem to further explore the notions of bias and uncertainty. \n",
75 | "\n",
76 | "Please refer to [Capsa's documentation](https://themisai.io/capsa/) for additional details."
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {
82 | "id": "hF0uSqk-nwmA"
83 | },
84 | "source": [
85 | "Let's get started by installing the necessary dependencies:"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {
92 | "id": "NdXF4Reyj6yy"
93 | },
94 | "outputs": [],
95 | "source": [
96 | "# Import Tensorflow 2.0\n",
97 | "%tensorflow_version 2.x\n",
98 | "import tensorflow as tf\n",
99 | "\n",
100 | "import IPython\n",
101 | "import functools\n",
102 | "import matplotlib.pyplot as plt\n",
103 | "import numpy as np\n",
104 | "from tqdm import tqdm\n",
105 | "\n",
106 | "# Download and import the MIT Introduction to Deep Learning package\n",
107 | "!pip install mitdeeplearning\n",
108 | "import mitdeeplearning as mdl\n",
109 | "\n",
110 | "# Download and import Capsa\n",
111 | "!pip install capsa\n",
112 | "import capsa"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {
118 | "id": "xzEcxjKHn8gc"
119 | },
120 | "source": [
121 | "## 1.1 Dataset\n",
122 | "\n",
123 | "We will build understanding of bias and uncertainty by training a neural network for a simple 2D regression task: modeling the function $y = x^3$. We will use Capsa to analyze this dataset and the performance of the model. Noise and missing-ness will be injected into the dataset.\n",
124 | "\n",
125 | "Let's generate the dataset and visualize it:"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {
132 | "id": "fH40EhC1j9dH"
133 | },
134 | "outputs": [],
135 | "source": [
136 | "# Get the data for the cubic function, injected with noise and missing-ness\n",
137 | "# This is just a toy dataset that we can use to test some of the wrappers on\n",
138 | "def gen_data(x_min, x_max, n, train=True):\n",
139 | " if train: \n",
140 | " x = np.random.triangular(x_min, 2, x_max, size=(n, 1))\n",
141 | " else: \n",
142 | " x = np.linspace(x_min, x_max, n).reshape(n, 1)\n",
143 | "\n",
144 | " sigma = 2*np.exp(-(x+1)**2/1) + 0.2 if train else np.zeros_like(x)\n",
145 | " y = x**3/6 + np.random.normal(0, sigma).astype(np.float32)\n",
146 | "\n",
147 | " return x, y\n",
148 | "\n",
149 | "# Plot the dataset and visualize the train and test datapoints\n",
150 | "x_train, y_train = gen_data(-4, 4, 2000, train=True) # train data\n",
151 | "x_test, y_test = gen_data(-6, 6, 500, train=False) # test data\n",
152 | "\n",
153 | "plt.figure(figsize=(10, 6))\n",
154 | "plt.plot(x_test, y_test, c='r', zorder=-1, label='ground truth')\n",
155 | "plt.scatter(x_train, y_train, s=1.5, label='train data')\n",
156 | "plt.legend()"
157 | ]
158 | },
159 | {
160 | "cell_type": "markdown",
161 | "metadata": {
162 | "id": "Fz3UxT8vuN95"
163 | },
164 | "source": [
165 | "In the plot above, the blue points are the training data, which will be used as inputs to train the neural network model. The red line is the ground truth data, which will be used to evaluate the performance of the model.\n",
166 | "\n",
167 | "#### **TODO: Inspecting the 2D regression dataset**\n",
168 | "\n",
169 | " Write short (~1 sentence) answers to the questions below to complete the `TODO`s:\n",
170 | "\n",
171 | "1. What are your observations about where the train data and test data lie relative to each other?\n",
172 | "2. What, if any, areas do you expect to have high/low aleatoric (data) uncertainty?\n",
173 | "3. What, if any, areas do you expect to have high/low epistemic (model) uncertainty?"
174 | ]
175 | },
176 | {
177 | "cell_type": "markdown",
178 | "metadata": {
179 | "id": "mXMOYRHnv8tF"
180 | },
181 | "source": [
182 | "## 1.2 Regression on cubic dataset\n",
183 | "\n",
184 | "Next we will define a small dense neural network model that can predict `y` given `x`: this is a classical regression task! We will build the model and use the [`model.fit()`](https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit) function to train the model -- normally, without any risk-awareness -- using the train dataset that we visualized above."
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": null,
190 | "metadata": {
191 | "id": "7p1XwfZVuB68"
192 | },
193 | "outputs": [],
194 | "source": [
195 | "### Define and train a dense NN model for the regression task###\n",
196 | "\n",
197 | "'''Function to define a small dense NN'''\n",
198 | "def create_dense_NN():\n",
199 | " return tf.keras.Sequential(\n",
200 | " [\n",
201 | " tf.keras.Input(shape=(1,)),\n",
202 | " tf.keras.layers.Dense(32, \"relu\"),\n",
203 | " tf.keras.layers.Dense(32, \"relu\"),\n",
204 | " tf.keras.layers.Dense(32, \"relu\"),\n",
205 | " tf.keras.layers.Dense(1),\n",
206 | " ]\n",
207 | " )\n",
208 | "\n",
209 | "dense_NN = create_dense_NN()\n",
210 | "\n",
211 | "# Build the model for regression, defining the loss function and optimizer\n",
212 | "dense_NN.compile(\n",
213 | " optimizer=tf.keras.optimizers.Adam(learning_rate=5e-3),\n",
214 | " loss=tf.keras.losses.MeanSquaredError(), # MSE loss for the regression task\n",
215 | ")\n",
216 | "\n",
217 | "# Train the model for 30 epochs using model.fit().\n",
218 | "loss_history = dense_NN.fit(x_train, y_train, epochs=30)"
219 | ]
220 | },
221 | {
222 | "cell_type": "markdown",
223 | "metadata": {
224 | "id": "ovwYBUG3wTDv"
225 | },
226 | "source": [
227 | "Now, we are ready to evaluate our neural network. We use the test data to assess performance on the regression task, and visualize the predicted values against the true values.\n",
228 | "\n",
229 | "Given your observation of the data in the previous plot, where do you expect the model to perform well? Let's test the model and see:"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "metadata": {
236 | "id": "fb-EklZywR4D"
237 | },
238 | "outputs": [],
239 | "source": [
240 | "# Pass the test data through the network and predict the y values\n",
241 | "y_predicted = dense_NN.predict(x_test)\n",
242 | "\n",
243 | "# Visualize the true (x, y) pairs for the test data vs. the predicted values\n",
244 | "plt.figure(figsize=(10, 6))\n",
245 | "plt.scatter(x_train, y_train, s=1.5, label='train data')\n",
246 | "plt.plot(x_test, y_test, c='r', zorder=-1, label='ground truth')\n",
247 | "plt.plot(x_test, y_predicted, c='b', zorder=0, label='predicted')\n",
248 | "plt.legend()"
249 | ]
250 | },
251 | {
252 | "cell_type": "markdown",
253 | "metadata": {
254 | "id": "7Vktjwfu0ReH"
255 | },
256 | "source": [
257 | "\n",
258 | "#### **TODO: Analyzing the performance of standard regression model**\n",
259 | "\n",
260 | "Write short (~1 sentence) answers to the questions below to complete the `TODO`s:\n",
261 | "\n",
262 | "1. Where does the model perform well?\n",
263 | "2. Where does the model perform poorly?"
264 | ]
265 | },
266 | {
267 | "cell_type": "markdown",
268 | "metadata": {
269 | "id": "7MzvM48JyZMO"
270 | },
271 | "source": [
272 | "## 1.3 Evaluating bias\n",
273 | "\n",
274 | "Now that we've seen what the predictions from this model look like, we will identify and quantify bias and uncertainty in this problem. We first consider bias.\n",
275 | "\n",
276 | "Recall that *representation bias* reflects how likely combinations of features are to appear in a given dataset. Capsa calculates how likely combinations of features are by using a histogram estimation approach: the `capsa.HistogramWrapper`. For low-dimensional data, the `capsa.HistogramWrapper` bins the input directly into discrete categories and measures the density. More details of the `HistogramWrapper` and how it can be used are [available here](https://themisai.io/capsa/api_documentation/HistogramWrapper.html).\n",
277 | "\n",
278 | "We start by taking our `dense_NN` and wrapping it with the `capsa.HistogramWrapper`:"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": null,
284 | "metadata": {
285 | "id": "AVv-knsCwOp9"
286 | },
287 | "outputs": [],
288 | "source": [
289 | "### Wrap the dense network for bias estimation ###\n",
290 | "\n",
291 | "standard_dense_NN = create_dense_NN()\n",
292 | "bias_wrapped_dense_NN = capsa.HistogramWrapper(\n",
293 | " standard_dense_NN, # the original model\n",
294 | " num_bins=20,\n",
295 | " queue_size=2000, # how many samples to track\n",
296 | " target_hidden_layer=False # for low-dimensional data (like this dataset), we can estimate biases directly from data\n",
297 | ")"
298 | ]
299 | },
300 | {
301 | "cell_type": "markdown",
302 | "metadata": {
303 | "id": "UFHO7LKcz8uP"
304 | },
305 | "source": [
306 | "Now that we've wrapped the classifier, let's re-train it to update the bias estimates as we train. We can use the exact same training pipeline, using `compile` to build the model and `model.fit()` to train the model:"
307 | ]
308 | },
309 | {
310 | "cell_type": "code",
311 | "execution_count": null,
312 | "metadata": {
313 | "id": "SkyD3rsqy2ff"
314 | },
315 | "outputs": [],
316 | "source": [
317 | "### Compile and train the wrapped model! ###\n",
318 | "\n",
319 | "# Build the model for regression, defining the loss function and optimizer\n",
320 | "bias_wrapped_dense_NN.compile(\n",
321 | " optimizer=tf.keras.optimizers.Adam(learning_rate=2e-3),\n",
322 | " loss=tf.keras.losses.MeanSquaredError(), # MSE loss for the regression task\n",
323 | ")\n",
324 | "\n",
325 | "# Train the wrapped model for 30 epochs.\n",
326 | "loss_history_bias_wrap = bias_wrapped_dense_NN.fit(x_train, y_train, epochs=30)\n",
327 | "\n",
328 | "print(\"Done training model with Bias Wrapper!\")"
329 | ]
330 | },
331 | {
332 | "cell_type": "markdown",
333 | "metadata": {
334 | "id": "_6iVeeqq0f_H"
335 | },
336 | "source": [
337 | "We can now use our wrapped model to assess the bias for a given test input. With the wrapping capability, Capsa neatly allows us to output a *bias score* along with the predicted target value. This bias score reflects the density of data surrounding an input point -- the higher the score, the greater the data representation and density. The wrapped, risk-aware model outputs the predicted target and bias score after it is called!\n",
338 | "\n",
339 | "Let's see how it is done:"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": null,
345 | "metadata": {
346 | "id": "tZ17eCbP0YM4"
347 | },
348 | "outputs": [],
349 | "source": [
350 | "### Generate and visualize bias scores for data in test set ###\n",
351 | "\n",
352 | "# Call the risk-aware model to generate scores\n",
353 | "predictions, bias = bias_wrapped_dense_NN(x_test)\n",
354 | "\n",
355 | "# Visualize the relationship between the input data x and the bias\n",
356 | "fig, ax = plt.subplots(2, 1, figsize=(8,6))\n",
357 | "ax[0].plot(x_test, bias, label='bias')\n",
358 | "ax[0].set_ylabel('Estimated Bias')\n",
359 | "ax[0].legend()\n",
360 | "\n",
361 | "# Let's compare against the ground truth density distribution\n",
362 | "# should roughly align with our estimated bias in this toy example\n",
363 | "ax[1].hist(x_train, 50, label='ground truth')\n",
364 | "ax[1].set_xlim(-6, 6)\n",
365 | "ax[1].set_ylabel('True Density')\n",
366 | "ax[1].legend();"
367 | ]
368 | },
369 | {
370 | "cell_type": "markdown",
371 | "metadata": {
372 | "id": "HpDMT_1FERQE"
373 | },
374 | "source": [
375 | "#### **TODO: Evaluating bias with wrapped regression model**\n",
376 | "\n",
377 | "Write short (~1 sentence) answers to the questions below to complete the `TODO`s:\n",
378 | "\n",
379 | "1. How does the bias score relate to the train/test data density from the first plot?\n",
380 | "2. What is one limitation of the Histogram approach that simply bins the data based on frequency?"
381 | ]
382 | },
383 | {
384 | "cell_type": "markdown",
385 | "metadata": {
386 | "id": "PvS8xR_q27Ec"
387 | },
388 | "source": [
389 | "# 1.4 Estimating data uncertainty\n",
390 | "\n",
391 | "Next we turn our attention to uncertainty, first focusing on the uncertainty in the data -- the aleatoric uncertainty.\n",
392 | "\n",
393 | "As introduced in Lecture 5 on Robust & Trustworthy Deep Learning, in regression we can estimate aleatoric uncertainty by training the model to predict both a target value and a variance for every input. Because we estimate both a mean and variance for every input, this method is called Mean Variance Estimation (MVE). MVE involves modifying the output layer to predict both the mean and variance, and changing the loss to reflect the prediction likelihood.\n",
394 | "\n",
395 | "Capsa automatically implements these changes for us: we can wrap a given model using `capsa.MVEWrapper` to use MVE to estimate aleatoric uncertainty. All we have to do is define the model and the loss function to evaluate its predictions! More details of the `MVEWrapper` and how it can be used are [available here](https://themisai.io/capsa/api_documentation/MVEWrapper.html).\n",
396 | "\n",
397 | "Let's take our standard network, wrap it with `capsa.MVEWrapper`, build the wrapped model, and then train it for the regression task. Finally, we evaluate performance of the resulting model by quantifying the aleatoric uncertainty across the data space: "
398 | ]
399 | },
400 | {
401 | "cell_type": "code",
402 | "execution_count": null,
403 | "metadata": {
404 | "id": "sxmm-2sd3G9u"
405 | },
406 | "outputs": [],
407 | "source": [
408 | "### Estimating data uncertainty with Capsa wrapping ###\n",
409 | "\n",
410 | "standard_dense_NN = create_dense_NN()\n",
411 | "# Wrap the dense network for aleatoric uncertainty estimation\n",
412 | "mve_wrapped_NN = capsa.MVEWrapper(standard_dense_NN)\n",
413 | "\n",
414 | "# Build the model for regression, defining the loss function and optimizer\n",
415 | "mve_wrapped_NN.compile(\n",
416 | " optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2),\n",
417 | " loss=tf.keras.losses.MeanSquaredError(), # MSE loss for the regression task\n",
418 | ")\n",
419 | "\n",
420 | "# Train the wrapped model for 30 epochs.\n",
421 | "loss_history_mve_wrap = mve_wrapped_NN.fit(x_train, y_train, epochs=30)\n",
422 | "\n",
423 | "# Call the uncertainty-aware model to generate outputs for the test data\n",
424 | "x_test_clipped = np.clip(x_test, x_train.min(), x_train.max())\n",
425 | "prediction = mve_wrapped_NN(x_test_clipped)"
426 | ]
427 | },
428 | {
429 | "cell_type": "code",
430 | "execution_count": null,
431 | "metadata": {
432 | "id": "dT2Rx8JCg3NR"
433 | },
434 | "outputs": [],
435 | "source": [
436 | "# Capsa makes the aleatoric uncertainty an attribute of the prediction!\n",
437 | "pred = np.array(prediction.y_hat).flatten()\n",
438 | "unc = np.sqrt(prediction.aleatoric).flatten() # out.aleatoric is the predicted variance\n",
439 | "\n",
440 | "# Visualize the aleatoric uncertainty across the data space\n",
441 | "plt.figure(figsize=(10, 6))\n",
442 | "plt.scatter(x_train, y_train, s=1.5, label='train data')\n",
443 | "plt.plot(x_test, y_test, c='r', zorder=-1, label='ground truth')\n",
444 | "plt.fill_between(x_test_clipped.flatten(), pred-2*unc, pred+2*unc, \n",
445 | " color='b', alpha=0.2, label='aleatoric')\n",
446 | "plt.legend()"
447 | ]
448 | },
449 | {
450 | "cell_type": "markdown",
451 | "metadata": {
452 | "id": "ZFeArgRX9U9s"
453 | },
454 | "source": [
455 | "#### **TODO: Estimating aleatoric uncertainty**\n",
456 | "\n",
457 | "Write short (~1 sentence) answers to the questions below to complete the `TODO`s:\n",
458 | "\n",
459 | "1. For what values of $x$ is the aleatoric uncertainty high or increasing suddenly?\n",
460 | "2. How does your answer in (1) relate to how the $x$ values are distributed?"
461 | ]
462 | },
463 | {
464 | "cell_type": "markdown",
465 | "metadata": {
466 | "id": "6FC5WPRT5lAb"
467 | },
468 | "source": [
469 | "# 1.5 Estimating model uncertainty\n",
470 | "\n",
471 | "Finally, we use Capsa for estimating the uncertainty underlying the model predictions -- the epistemic uncertainty. In this example, we'll use ensembles, which essentially copy the model `N` times and average predictions across all runs for a more robust prediction, and also calculate the variance of the `N` runs to estimate the uncertainty.\n",
472 | "\n",
473 | "Capsa provides a neat wrapper, `capsa.EnsembleWrapper`, to make an ensemble from an input model. Just like with aleatoric estimation, we can take our standard dense network model, wrap it with `capsa.EnsembleWrapper`, build the wrapped model, and then train it for the regression task. More details of the `EnsembleWrapper` and how it can be used are [available here](https://themisai.io/capsa/api_documentation/EnsembleWrapper.html).\n",
474 | "\n",
475 | "Finally, we evaluate the resulting model by quantifying the epistemic uncertainty on the test data:"
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "execution_count": null,
481 | "metadata": {
482 | "id": "SuRlhq2c5Fob"
483 | },
484 | "outputs": [],
485 | "source": [
486 | "### Estimating model uncertainty with Capsa wrapping ###\n",
487 | "\n",
488 | "standard_dense_NN = create_dense_NN()\n",
489 | "# Wrap the dense network for epistemic uncertainty estimation with an Ensemble\n",
490 | "ensemble_NN = capsa.EnsembleWrapper(standard_dense_NN)\n",
491 | "\n",
492 | "# Build the model for regression, defining the loss function and optimizer\n",
493 | "ensemble_NN.compile(\n",
494 | " optimizer=tf.keras.optimizers.Adam(learning_rate=3e-3),\n",
495 | " loss=tf.keras.losses.MeanSquaredError(), # MSE loss for the regression task\n",
496 | ")\n",
497 | "\n",
498 | "# Train the wrapped model for 30 epochs.\n",
499 | "loss_history_ensemble = ensemble_NN.fit(x_train, y_train, epochs=30)\n",
500 | "\n",
501 | "# Call the uncertainty-aware model to generate outputs for the test data\n",
502 | "prediction = ensemble_NN(x_test)"
503 | ]
504 | },
505 | {
506 | "cell_type": "code",
507 | "execution_count": null,
508 | "metadata": {
509 | "id": "eauNoKDOj_ZT"
510 | },
511 | "outputs": [],
512 | "source": [
513 | "# Capsa makes the epistemic uncertainty an attribute of the prediction!\n",
514 | "pred = np.array(prediction.y_hat).flatten()\n",
515 | "unc = np.array(prediction.epistemic).flatten()\n",
516 | "\n",
517 | "# Visualize the aleatoric uncertainty across the data space\n",
518 | "plt.figure(figsize=(10, 6))\n",
519 | "plt.scatter(x_train, y_train, s=1.5, label='train data')\n",
520 | "plt.plot(x_test, y_test, c='r', zorder=-1, label='ground truth')\n",
521 | "plt.fill_between(x_test.flatten(), pred-20*unc, pred+20*unc, color='b', alpha=0.2, label='epistemic')\n",
522 | "plt.legend()"
523 | ]
524 | },
525 | {
526 | "cell_type": "markdown",
527 | "metadata": {
528 | "id": "N4LMn2tLPBdg"
529 | },
530 | "source": [
531 | "#### **TODO: Estimating epistemic uncertainty**\n",
532 | "\n",
533 | "Write short (~1 sentence) answers to the questions below to complete the `TODO`s:\n",
534 | "\n",
535 | "1. For what values of $x$ is the epistemic uncertainty high or increasing suddenly?\n",
536 | "2. How does your answer in (1) relate to how the $x$ values are distributed (refer back to original plot)? Think about both the train and test data.\n",
537 | "3. How could you reduce the epistemic uncertainty in regions where it is high?"
538 | ]
539 | },
540 | {
541 | "cell_type": "markdown",
542 | "metadata": {
543 | "id": "CkpvkOL06jRd"
544 | },
545 | "source": [
546 | "# 1.6 Conclusion\n",
547 | "\n",
548 | "You've just analyzed the bias, aleatoric uncertainty, and epistemic uncertainty for your first risk-aware model! This is a task that data scientists do constantly to determine methods of improving their models and datasets.\n",
549 | "\n",
550 | "In the next part of the lab, you'll continue to build off of these concepts to study them in the context of facial detection systems: not only diagnosing issues of bias and uncertainty, but also developing solutions to *mitigate* these risks.\n",
551 | "\n",
552 | ""
553 | ]
554 | },
555 | {
556 | "cell_type": "code",
557 | "execution_count": null,
558 | "metadata": {
559 | "id": "nIpfPcpjlsKK"
560 | },
561 | "outputs": [],
562 | "source": []
563 | }
564 | ],
565 | "metadata": {
566 | "colab": {
567 | "include_colab_link": true,
568 | "provenance": []
569 | },
570 | "kernelspec": {
571 | "display_name": "Python 3",
572 | "name": "python3"
573 | },
574 | "language_info": {
575 | "name": "python"
576 | }
577 | },
578 | "nbformat": 4,
579 | "nbformat_minor": 0
580 | }
581 |
--------------------------------------------------------------------------------