├── README.md ├── week01 ├── data │ ├── target.npy │ └── train.npy └── week01_linear_models.ipynb ├── week04 ├── images │ ├── gan.png │ ├── pca.png │ ├── denoising.jpg │ ├── autoencoder.png │ ├── noise_to_face.png │ ├── similar_images.jpg │ ├── transpose_conv.jpg │ └── nvidia_cool_gan.png └── lfw_dataset.py ├── week05 ├── images │ ├── rnn.png │ └── char-nn.png ├── week05_part_of_speech_tagging.ipynb ├── week05_generating_names_with_rnn.ipynb └── week05_sga_text_generation.ipynb ├── week06 ├── data │ ├── img_0.jpg │ ├── img_1.jpg │ ├── img_10.jpg │ ├── img_11.jpg │ ├── img_12.jpg │ ├── img_13.jpg │ ├── img_14.jpg │ ├── img_15.jpg │ ├── img_16.jpg │ ├── img_17.jpg │ ├── img_18.jpg │ ├── img_19.jpg │ ├── img_2.jpg │ ├── img_20.jpg │ ├── img_21.jpg │ ├── img_22.jpg │ ├── img_23.jpg │ ├── img_24.jpg │ ├── img_25.jpg │ ├── img_26.jpg │ ├── img_27.jpg │ ├── img_28.jpg │ ├── img_29.jpg │ ├── img_3.jpg │ ├── img_4.jpg │ ├── img_5.jpg │ ├── img_6.jpg │ ├── img_7.jpg │ ├── img_8.jpg │ └── img_9.jpg ├── images │ ├── flatten_help.jpg │ ├── inceptionv3.png │ ├── encoder_decoder.png │ └── encoder_decoder_explained.png ├── grading_utils.py └── beheaded_inception3.py ├── week03 ├── images │ ├── cifar10.jpg │ ├── flowers.jpg │ ├── center_crop.jpg │ └── inceptionv3.png ├── SGA1_Object_Detection.ipynb └── week03_finetuning_inception.ipynb ├── colab_instructions.md ├── utils ├── util.py ├── setup_colab.py ├── tqdm_utils.py ├── grading.py └── download_utils.py ├── .gitignore └── week02 ├── week02_digits_classification.ipynb └── week02_numpy_neural_network.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # intro-to-dl-pytorch -------------------------------------------------------------------------------- /week01/data/target.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week01/data/target.npy -------------------------------------------------------------------------------- /week01/data/train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week01/data/train.npy -------------------------------------------------------------------------------- /week04/images/gan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week04/images/gan.png -------------------------------------------------------------------------------- /week04/images/pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week04/images/pca.png -------------------------------------------------------------------------------- /week05/images/rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week05/images/rnn.png -------------------------------------------------------------------------------- /week06/data/img_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_0.jpg -------------------------------------------------------------------------------- /week06/data/img_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_1.jpg -------------------------------------------------------------------------------- /week06/data/img_10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_10.jpg -------------------------------------------------------------------------------- /week06/data/img_11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_11.jpg -------------------------------------------------------------------------------- /week06/data/img_12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_12.jpg -------------------------------------------------------------------------------- /week06/data/img_13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_13.jpg -------------------------------------------------------------------------------- /week06/data/img_14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_14.jpg -------------------------------------------------------------------------------- /week06/data/img_15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_15.jpg -------------------------------------------------------------------------------- /week06/data/img_16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_16.jpg -------------------------------------------------------------------------------- /week06/data/img_17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_17.jpg -------------------------------------------------------------------------------- /week06/data/img_18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_18.jpg -------------------------------------------------------------------------------- /week06/data/img_19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_19.jpg -------------------------------------------------------------------------------- /week06/data/img_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_2.jpg -------------------------------------------------------------------------------- /week06/data/img_20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_20.jpg -------------------------------------------------------------------------------- /week06/data/img_21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_21.jpg -------------------------------------------------------------------------------- /week06/data/img_22.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_22.jpg -------------------------------------------------------------------------------- /week06/data/img_23.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_23.jpg -------------------------------------------------------------------------------- /week06/data/img_24.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_24.jpg -------------------------------------------------------------------------------- /week06/data/img_25.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_25.jpg -------------------------------------------------------------------------------- /week06/data/img_26.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_26.jpg -------------------------------------------------------------------------------- /week06/data/img_27.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_27.jpg -------------------------------------------------------------------------------- /week06/data/img_28.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_28.jpg -------------------------------------------------------------------------------- /week06/data/img_29.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_29.jpg -------------------------------------------------------------------------------- /week06/data/img_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_3.jpg -------------------------------------------------------------------------------- /week06/data/img_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_4.jpg -------------------------------------------------------------------------------- /week06/data/img_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_5.jpg -------------------------------------------------------------------------------- /week06/data/img_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_6.jpg -------------------------------------------------------------------------------- /week06/data/img_7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_7.jpg -------------------------------------------------------------------------------- /week06/data/img_8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_8.jpg -------------------------------------------------------------------------------- /week06/data/img_9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/data/img_9.jpg -------------------------------------------------------------------------------- /week03/images/cifar10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week03/images/cifar10.jpg -------------------------------------------------------------------------------- /week03/images/flowers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week03/images/flowers.jpg -------------------------------------------------------------------------------- /week05/images/char-nn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week05/images/char-nn.png -------------------------------------------------------------------------------- /week04/images/denoising.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week04/images/denoising.jpg -------------------------------------------------------------------------------- /week03/images/center_crop.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week03/images/center_crop.jpg -------------------------------------------------------------------------------- /week03/images/inceptionv3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week03/images/inceptionv3.png -------------------------------------------------------------------------------- /week04/images/autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week04/images/autoencoder.png -------------------------------------------------------------------------------- /week06/images/flatten_help.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/images/flatten_help.jpg -------------------------------------------------------------------------------- /week06/images/inceptionv3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/images/inceptionv3.png -------------------------------------------------------------------------------- /week04/images/noise_to_face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week04/images/noise_to_face.png -------------------------------------------------------------------------------- /week04/images/similar_images.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week04/images/similar_images.jpg -------------------------------------------------------------------------------- /week04/images/transpose_conv.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week04/images/transpose_conv.jpg -------------------------------------------------------------------------------- /week04/images/nvidia_cool_gan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week04/images/nvidia_cool_gan.png -------------------------------------------------------------------------------- /week06/images/encoder_decoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/images/encoder_decoder.png -------------------------------------------------------------------------------- /week06/images/encoder_decoder_explained.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/HEAD/week06/images/encoder_decoder_explained.png -------------------------------------------------------------------------------- /week06/grading_utils.py: -------------------------------------------------------------------------------- 1 | def test_vocab(vocab): 2 | return len(vocab) 3 | 4 | 5 | def test_network(network): 6 | return int(all(param.grad is not None for param in network.parameters())) 7 | 8 | 9 | def test_batch(batch): 10 | return int(batch[0].shape[0]) 11 | -------------------------------------------------------------------------------- /colab_instructions.md: -------------------------------------------------------------------------------- 1 | # Running on Google Colab (tested for all weeks) 2 | Google has released its own flavour of Jupyter called Colab, which has free GPUs! 3 | 4 | Here's how you can use it: 5 | 1. Open https://colab.research.google.com, click **Sign in** in the upper right corner, use your Google credentials to sign in. 6 | 2. Click **GITHUB** tab, paste https://github.com/hse-aml/intro-to-dl-pytorch and press Enter 7 | 3. Choose the notebook you want to open, e.g. week01/week01_linear_models.ipynb 8 | 4. Click **File -> Save a copy in Drive...** to save your progress in Google Drive 9 | 5. Click **Runtime -> Change runtime type** and select **GPU** in Hardware accelerator box 10 | 6. Start with **executing** some of the first cells that download dependencies and import packages 11 | 7. Enjoy the assignment! 12 | 8. If you run many notebooks on Colab, they can continue to eat up memory, 13 | you can kill them with `! pkill -9 python3` and check with `! nvidia-smi` that GPU memory is freed. 14 | -------------------------------------------------------------------------------- /utils/util.py: -------------------------------------------------------------------------------- 1 | """Some auxiliary files used for honor track numpy assignment""" 2 | import numpy as np 3 | 4 | 5 | def eval_numerical_gradient(f, x, verbose=False, h=0.00001): 6 | """Evaluates gradient df/dx via finite differences: 7 | df/dx ~ (f(x+h) - f(x-h)) / 2h 8 | Adopted from https://github.com/ddtm/dl-course/ (our ysda course). 9 | """ 10 | fx = f(x) # evaluate function value at original point 11 | grad = np.zeros_like(x) 12 | # iterate over all indexes in x 13 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 14 | while not it.finished: 15 | 16 | # evaluate function at x+h 17 | ix = it.multi_index 18 | oldval = x[ix] 19 | x[ix] = oldval + h # increment by h 20 | fxph = f(x) # evalute f(x + h) 21 | x[ix] = oldval - h 22 | fxmh = f(x) # evaluate f(x - h) 23 | x[ix] = oldval # restore 24 | 25 | # compute the partial derivative with centered formula 26 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 27 | if verbose: 28 | print (ix, grad[ix]) 29 | it.iternext() # step to next dimension 30 | 31 | return grad 32 | -------------------------------------------------------------------------------- /utils/setup_colab.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def download_github_code(path): 5 | filename = path.rsplit('/')[-1] 6 | os.system('shred -u {}'.format(filename)) 7 | os.system('wget -q https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/main/{} -O {}'.format(path, filename)) 8 | 9 | 10 | def setup_week01(): 11 | download_github_code('utils/grading.py') 12 | download_github_code('week01/data/train.npy') 13 | download_github_code('week01/data/target.npy') 14 | 15 | 16 | def setup_week02(): 17 | download_github_code('utils/grading.py') 18 | 19 | 20 | def setup_week02_honor(): 21 | download_github_code('utils/tqdm_utils.py') 22 | download_github_code('utils/util.py') 23 | 24 | 25 | def setup_week03_1(): 26 | download_github_code('utils/grading.py') 27 | 28 | 29 | def setup_week03_2(): 30 | download_github_code('utils/tqdm_utils.py') 31 | download_github_code('utils/download_utils.py') 32 | download_github_code('utils/grading.py') 33 | 34 | 35 | def setup_week04(): 36 | download_github_code('week04/lfw_dataset.py') 37 | download_github_code('utils/tqdm_utils.py') 38 | download_github_code('utils/download_utils.py') 39 | download_github_code('utils/grading.py') 40 | 41 | 42 | def setup_week05(): 43 | download_github_code('utils/grading.py') 44 | download_github_code('week05/names.txt') 45 | 46 | 47 | def setup_week06(): 48 | download_github_code('utils/grading.py') 49 | download_github_code('week06/beheaded_inception3.py') 50 | download_github_code('week06/grading_utils.py') 51 | os.system('wget -qO- https://github.com/hse-aml/intro-to-dl-pytorch/releases/download/final_project/handout.tar.gz | tar -xzvf - -C .') 52 | 53 | for i in range(30): 54 | os.system(f'wget -q https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/main/week06/data/img_{i}.jpg -O data/img_{i}') 55 | -------------------------------------------------------------------------------- /week06/beheaded_inception3.py: -------------------------------------------------------------------------------- 1 | import torch, torch.nn as nn 2 | import torch.nn.functional as F 3 | from torch.autograd import Variable 4 | from torchvision.models.inception import Inception3 5 | from warnings import warn 6 | from torch.utils.model_zoo import load_url 7 | 8 | 9 | class BeheadedInception3(Inception3): 10 | """ Like torchvision.models.inception.Inception3 but the head goes separately """ 11 | 12 | def forward(self, x): 13 | if self.transform_input: 14 | x = x.clone() 15 | x[:, 0] = x[:, 0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 16 | x[:, 1] = x[:, 1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 17 | x[:, 2] = x[:, 2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 18 | else: warn("Input isn't transformed") 19 | x = self.Conv2d_1a_3x3(x) 20 | x = self.Conv2d_2a_3x3(x) 21 | x = self.Conv2d_2b_3x3(x) 22 | x = F.max_pool2d(x, kernel_size=3, stride=2) 23 | x = self.Conv2d_3b_1x1(x) 24 | x = self.Conv2d_4a_3x3(x) 25 | x = F.max_pool2d(x, kernel_size=3, stride=2) 26 | x = self.Mixed_5b(x) 27 | x = self.Mixed_5c(x) 28 | x = self.Mixed_5d(x) 29 | x = self.Mixed_6a(x) 30 | x = self.Mixed_6b(x) 31 | x = self.Mixed_6c(x) 32 | x = self.Mixed_6d(x) 33 | x = self.Mixed_6e(x) 34 | x = self.Mixed_7a(x) 35 | x = self.Mixed_7b(x) 36 | x_for_attn = x = self.Mixed_7c(x) 37 | # 8 x 8 x 2048 38 | x = F.avg_pool2d(x, kernel_size=8) 39 | # 1 x 1 x 2048 40 | x_for_capt = x = x.view(x.size(0), -1) 41 | # 2048 42 | x = self.fc(x) 43 | # 1000 (num_classes) 44 | return x_for_attn, x_for_capt, x 45 | 46 | 47 | def beheaded_inception_v3(transform_input=True): 48 | model= BeheadedInception3(transform_input=transform_input) 49 | inception_url = 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth' 50 | model.load_state_dict(load_url(inception_url)) 51 | return model 52 | 53 | -------------------------------------------------------------------------------- /utils/tqdm_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import print_function 4 | import tqdm 5 | tqdm.monitor_interval = 0 # workaround for https://github.com/tqdm/tqdm/issues/481 6 | 7 | 8 | class SimpleTqdm(): 9 | def __init__(self, iterable=None, total=None, **kwargs): 10 | self.iterable = list(iterable) if iterable is not None else None 11 | self.total = len(self.iterable) if self.iterable is not None else total 12 | assert self.iterable is not None or self.total is not None 13 | self.current_step = 0 14 | self.print_frequency = max(self.total // 50, 1) 15 | self.desc = "" 16 | 17 | def set_description_str(self, desc): 18 | self.desc = desc 19 | 20 | def set_description(self, desc): 21 | self.desc = desc 22 | 23 | def update(self, steps): 24 | last_print_step = (self.current_step // self.print_frequency) * self.print_frequency 25 | i = 1 26 | while last_print_step + i * self.print_frequency <= self.current_step + steps: 27 | print("*", end='') 28 | i += 1 29 | self.current_step += steps 30 | 31 | def close(self): 32 | print("\n" + self.desc) 33 | 34 | def __iter__(self): 35 | assert self.iterable is not None 36 | self.index = 0 37 | return self 38 | 39 | def __next__(self): 40 | if self.index < self.total: 41 | element = self.iterable[self.index] 42 | self.update(1) 43 | self.index += 1 44 | return element 45 | else: 46 | self.close() 47 | raise StopIteration 48 | 49 | 50 | def use_simple_tqdm(): 51 | try: 52 | import google.colab 53 | import os 54 | return not bool(int(os.environ.get("EXPERIMENTAL_TQDM", "0"))) 55 | except ImportError: 56 | return False 57 | 58 | 59 | def tqdm_notebook_failsafe(*args, **kwargs): 60 | if use_simple_tqdm(): 61 | # tqdm is broken on Google Colab 62 | return SimpleTqdm(*args, **kwargs) 63 | else: 64 | return tqdm.tqdm_notebook(*args, **kwargs) 65 | -------------------------------------------------------------------------------- /week04/lfw_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import cv2 4 | import pandas as pd 5 | import tarfile 6 | 7 | 8 | ATTRS_NAME = "lfw_attributes.txt" # http://www.cs.columbia.edu/CAVE/databases/pubfig/download/lfw_attributes.txt 9 | IMAGES_NAME = "lfw-deepfunneled.tgz" # http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz 10 | RAW_IMAGES_NAME = "lfw.tgz" # http://vis-www.cs.umass.edu/lfw/lfw.tgz 11 | 12 | 13 | def decode_image_from_raw_bytes(raw_bytes): 14 | img = cv2.imdecode(np.asarray(bytearray(raw_bytes), dtype=np.uint8), 1) 15 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 16 | return img 17 | 18 | 19 | def load_lfw_dataset( 20 | use_raw=False, 21 | dx=80, dy=80, 22 | dimx=45, dimy=45): 23 | 24 | # read attrs 25 | df_attrs = pd.read_csv(ATTRS_NAME, sep='\t', skiprows=1) 26 | df_attrs.columns = list(df_attrs.columns)[1:] + ["NaN"] 27 | df_attrs = df_attrs.drop("NaN", axis=1) 28 | imgs_with_attrs = set(map(tuple, df_attrs[["person", "imagenum"]].values)) 29 | 30 | # read photos 31 | all_photos = [] 32 | photo_ids = [] 33 | 34 | with tarfile.open(RAW_IMAGES_NAME if use_raw else IMAGES_NAME) as f: 35 | for m in f.getmembers(): 36 | if m.isfile() and m.name.endswith(".jpg"): 37 | # prepare image 38 | img = decode_image_from_raw_bytes(f.extractfile(m).read()) 39 | img = img[dy:-dy, dx:-dx] 40 | img = cv2.resize(img, (dimx, dimy)) 41 | # parse person 42 | fname = os.path.split(m.name)[-1] 43 | fname_splitted = fname[:-4].replace('_', ' ').split() 44 | person_id = ' '.join(fname_splitted[:-1]) 45 | photo_number = int(fname_splitted[-1]) 46 | if (person_id, photo_number) in imgs_with_attrs: 47 | all_photos.append(img) 48 | photo_ids.append({'person': person_id, 'imagenum': photo_number}) 49 | 50 | photo_ids = pd.DataFrame(photo_ids) 51 | all_photos = np.stack(all_photos).astype('uint8') 52 | 53 | # preserve photo_ids order! 54 | all_attrs = photo_ids.merge(df_attrs, on=('person', 'imagenum')).drop(["person", "imagenum"], axis=1) 55 | 56 | return all_photos, all_attrs 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # solved notebooks 132 | *_solved* 133 | 134 | # any zipped 135 | *.zip 136 | -------------------------------------------------------------------------------- /utils/grading.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | 4 | 5 | class Grader(object): 6 | def __init__(self, assignment_key, all_parts=()): 7 | """ 8 | Assignment key is the way to tell Coursera which problem is being submitted. 9 | """ 10 | self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1' 11 | self.assignment_key = assignment_key 12 | self.answers = {part: None for part in all_parts} 13 | 14 | def submit(self, email, token): 15 | submission = { 16 | "assignmentKey": self.assignment_key, 17 | "submitterEmail": email, 18 | "secret": token, 19 | "parts": {} 20 | } 21 | for part, output in self.answers.items(): 22 | if output is not None: 23 | submission["parts"][part] = {"output": output} 24 | else: 25 | submission["parts"][part] = dict() 26 | 27 | request = requests.post(self.submission_page, data=json.dumps(submission)) 28 | response = request.json() 29 | if request.status_code == 201: 30 | print('Submitted to Coursera platform. See results on assignment page!') 31 | elif u'details' in response and u'learnerMessage' in response[u'details']: 32 | print(response[u'details'][u'learnerMessage']) 33 | else: 34 | print("Unknown response from Coursera: {}".format(request.status_code)) 35 | print(response) 36 | 37 | def set_answer(self, part, answer): 38 | """ 39 | Adds an answer for submission. Answer is expected either as string, number, or an iterable of numbers. 40 | 41 | Args: 42 | part: str, assignment part id 43 | answer: answer to submit. If non iterable, appends repr(answer). If string, is appended as provided. 44 | If an iterable and not string, converted to space-delimited repr() of members. 45 | """ 46 | if isinstance(answer, str): 47 | self.answers[part] = answer 48 | else: 49 | try: 50 | self.answers[part] = " ".join(map(repr, answer)) 51 | except TypeError: 52 | self.answers[part] = repr(answer) 53 | 54 | 55 | def array_to_grader(array, epsilon=1e-4): 56 | """ 57 | Utility function to help preparing Coursera grading conditions descriptions. 58 | 59 | Args: 60 | array: iterable of numbers, the correct answers 61 | epsilon: the generated expression will accept the answers with this absolute difference with provided values 62 | 63 | Returns: 64 | String. A Coursera grader expression that checks whether the user submission is in (array - epsilon, array + epsilon) 65 | """ 66 | res = [] 67 | for element in array: 68 | if isinstance(element, int): 69 | res.append("[{0}, {0}]".format(element)) 70 | else: 71 | res.append("({0}, {1})".format(element - epsilon, element + epsilon)) 72 | return " ".join(res) 73 | -------------------------------------------------------------------------------- /utils/download_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import shutil 5 | import requests 6 | import time 7 | from functools import wraps 8 | import traceback 9 | import tqdm_utils 10 | 11 | 12 | # https://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ 13 | def retry(ExceptionToCheck, tries=4, delay=3, backoff=2): 14 | def deco_retry(f): 15 | 16 | @wraps(f) 17 | def f_retry(*args, **kwargs): 18 | mtries, mdelay = tries, delay 19 | while mtries > 1: 20 | try: 21 | return f(*args, **kwargs) 22 | except KeyboardInterrupt as e: 23 | raise e 24 | except ExceptionToCheck as e: 25 | print("%s, retrying in %d seconds..." % (str(e), mdelay)) 26 | traceback.print_exc() 27 | time.sleep(mdelay) 28 | mtries -= 1 29 | mdelay *= backoff 30 | return f(*args, **kwargs) 31 | 32 | return f_retry # true decorator 33 | 34 | return deco_retry 35 | 36 | 37 | @retry(Exception) 38 | def download_file(url, file_path): 39 | r = requests.get(url, stream=True) 40 | total_size = int(r.headers.get('content-length')) 41 | bar = tqdm_utils.tqdm_notebook_failsafe(total=total_size, unit='B', unit_scale=True) 42 | bar.set_description(os.path.split(file_path)[-1]) 43 | incomplete_download = False 44 | try: 45 | with open(file_path, 'wb', buffering=16 * 1024 * 1024) as f: 46 | for chunk in r.iter_content(4 * 1024 * 1024): 47 | f.write(chunk) 48 | bar.update(len(chunk)) 49 | except Exception as e: 50 | raise e 51 | finally: 52 | bar.close() 53 | if os.path.exists(file_path) and os.path.getsize(file_path) != total_size: 54 | incomplete_download = True 55 | os.remove(file_path) 56 | if incomplete_download: 57 | raise Exception("Incomplete download") 58 | 59 | 60 | def download_from_github(version, fn, target_dir): 61 | url = "https://github.com/hse-aml/intro-to-dl/releases/download/{0}/{1}".format(version, fn) 62 | file_path = os.path.join(target_dir, fn) 63 | download_file(url, file_path) 64 | 65 | 66 | def sequential_downloader(version, fns, target_dir): 67 | os.makedirs(target_dir, exist_ok=True) 68 | for fn in fns: 69 | download_from_github(version, fn, target_dir) 70 | 71 | 72 | def link_all_files_from_dir(src_dir, dst_dir): 73 | os.makedirs(dst_dir, exist_ok=True) 74 | if not os.path.exists(src_dir): 75 | # Coursera "readonly/readonly" bug workaround 76 | src_dir = src_dir.replace("readonly", "readonly/readonly") 77 | for fn in os.listdir(src_dir): 78 | src_file = os.path.join(src_dir, fn) 79 | dst_file = os.path.join(dst_dir, fn) 80 | if os.name == "nt": 81 | shutil.copyfile(src_file, dst_file) 82 | else: 83 | if os.path.islink(dst_file): 84 | os.remove(dst_file) 85 | if not os.path.exists(dst_file): 86 | os.symlink(os.path.abspath(src_file), dst_file) 87 | 88 | 89 | def download_week_3_resources(save_path): 90 | # Originals: 91 | # http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz 92 | # http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat 93 | sequential_downloader( 94 | "v0.3", 95 | [ 96 | "102flowers.tgz", 97 | "imagelabels.mat" 98 | ], 99 | save_path 100 | ) 101 | 102 | 103 | def download_week_4_resources(save_path): 104 | # Originals 105 | # http://www.cs.columbia.edu/CAVE/databases/pubfig/download/lfw_attributes.txt 106 | # http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz 107 | # http://vis-www.cs.umass.edu/lfw/lfw.tgz 108 | sequential_downloader( 109 | "v0.4", 110 | [ 111 | "lfw-deepfunneled.tgz", 112 | "lfw.tgz", 113 | "lfw_attributes.txt" 114 | ], 115 | save_path 116 | ) 117 | 118 | 119 | def download_week_6_resources(save_path): 120 | # Originals: 121 | # http://msvocds.blob.core.windows.net/annotations-1-0-3/captions_train-val2014.zip 122 | sequential_downloader( 123 | "v0.1", 124 | [ 125 | "captions_train-val2014.zip", 126 | "train2014_sample.zip", 127 | "train_img_embeds.pickle", 128 | "train_img_fns.pickle", 129 | "val2014_sample.zip", 130 | "val_img_embeds.pickle", 131 | "val_img_fns.pickle" 132 | ], 133 | save_path 134 | ) 135 | 136 | 137 | def link_week_3_resources(): 138 | link_all_files_from_dir("../readonly/week3/", ".") 139 | 140 | 141 | def link_week_4_resources(): 142 | link_all_files_from_dir("../readonly/week4/", ".") 143 | 144 | 145 | def link_week_6_resources(): 146 | link_all_files_from_dir("../readonly/week6/", ".") 147 | -------------------------------------------------------------------------------- /week02/week02_digits_classification.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"week02_digits_classification.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.5"}},"cells":[{"cell_type":"markdown","metadata":{"id":"iz75Onm1BmVZ"},"source":["# MNIST digits classification with PyTorch\n","\n","In this programming assignment you will implement your first neural network and train it to classify handwritten digits."]},{"cell_type":"code","metadata":{"id":"CR5ons9CxI26"},"source":["%%bash\n","\n","shred -u setup_colab.py\n","\n","wget https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/main/utils/setup_colab.py -O setup_colab.py"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"BzrfY5t_xI27"},"source":["import setup_colab\n","\n","setup_colab.setup_week02()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"M75gw2yqBmVl"},"source":["import numpy as np\n","\n","%matplotlib inline\n","import matplotlib.pyplot as plt\n","\n","import tqdm\n","import itertools\n","import collections\n","from IPython import display\n","\n","import torch\n","from torch import nn\n","from torch import optim\n","from torch.utils.data import DataLoader\n","\n","from torchvision.datasets import MNIST\n","from torchvision.transforms import ToTensor\n","from torchvision.utils import make_grid"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"OjtpMRyiHdbt"},"source":["#auxiliary stuff\n","class AverageMeter:\n"," \n"," def __init__(self):\n"," self.reset()\n","\n"," def reset(self):\n"," self.val = 0\n"," self.avg = 0\n"," self.sum = 0\n"," self.count = 0\n","\n"," def update(self, val, n=1):\n"," self.val = val\n"," self.sum += val * n\n"," self.count += n\n"," self.avg = self.sum / self.count"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"LFrkxLQRBmVm"},"source":["### Fill in your Coursera token and email\n","To successfully submit your answers to our grader, please fill in your Coursera submission token and email."]},{"cell_type":"code","metadata":{"id":"Yobu6DjTBmVn"},"source":["import grading\n","\n","grader = grading.Grader(\n"," assignment_key=\"jNcGh-dHRvuN45xP616Dyw\",\n"," all_parts=[\"zGwHg\", \"5Ww9B\"]\n",")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"I9Q5SMbUBmVn"},"source":["# token expires every 30 min\n","COURSERA_TOKEN = \"### YOUR TOKEN HERE ###\"\n","COURSERA_EMAIL = \"### YOUR EMAIL HERE ###\""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"SRIiXmfFBmVn"},"source":["## MNIST dataset\n","\n","In this task we will work with MNIST dataset which contains 60000 28x28 images of handwritten digits from 0 to 9. \n","\n","For the data processing we'll use `torchvision` library. It is very simple and easy to use library for computer vision & deep learning. For a deep dive into the library you can check out the githab page: https://github.com/pytorch/vision\n","\n","Firstly, we set up datasets and dataloaders:"]},{"cell_type":"code","metadata":{"id":"q-7Ut2_iBmVo"},"source":["# use it to conver from PIL to torch.Tensor\n","image_transform = ToTensor()\n","\n","train_dataset = MNIST(root='./', train=True, download=True, transform=image_transform)\n","test_dataset = MNIST(root='./', train=False, download=True, transform=image_transform)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"-uuRj-5nBmVp"},"source":["BATCH_SIZE = 32\n","\n","train_dataloader = DataLoader(\n"," train_dataset,\n"," batch_size=BATCH_SIZE,\n"," shuffle=True,\n",")\n","\n","test_dataloader = DataLoader(\n"," test_dataset,\n"," batch_size=BATCH_SIZE,\n",")"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"_zQK0y3OBmVp"},"source":["Let's look at a batch of images:"]},{"cell_type":"code","metadata":{"id":"Ndo1muoOBmVp"},"source":["example_batch = list(itertools.islice(train_dataloader, 1))[0]\n","images, labels = example_batch\n","\n","# make a grid of images\n","grid_images = make_grid(images, 8).permute(1, 2, 0)\n","\n","print(\"Labels of images: \", labels.view(-1, 8).tolist())\n","\n","plt.figure(figsize=(20, 10))\n","plt.imshow(make_grid(images, 8).permute(1, 2, 0))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"IUoHQGAnBmVq"},"source":["The task is to train a model that will be able to take an image as an input and predict the class label for it (from 0 to 9) as an output.\n","\n","As a main metric we will use accuracy:"]},{"cell_type":"code","metadata":{"id":"rn8vmPkfBmVr"},"source":["def calculate_accuracy(prediction, target):\n"," # Note that prediction.shape == target.shape == [B, ]\n"," \n"," matching = (prediction == target).float()\n"," return matching.mean()"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"pUqclU3HBmVr"},"source":["## Linear Model\n","\n","Let's start with a linear model and implement it as a neural network in PyTorch. \n","\n","Linear model takes a batch of $B$ images as an input, applies a linear transformation to them and outputs logits. For each image $x$ it returns a vector of logits $z$:\n","$$z = x \\cdot W + b $$\n","\n","Here x.shape = [1, 28 * 28], z.shape = [1, 10].\n","\n","Then, we can make the prediction $\\hat{y}$ for $x$ by taking the class with the maximum logit or obtain probabilities $p$ for all classes by applying softmax function to vector of logits:\n","$$\\hat{y} = \\arg\\max_k z_k \\quad p_k = \\frac{e^{z_k}}{\\sum_{i=0}^{9}{e^{z_i}}} \\quad k = 0..9$$\n"]},{"cell_type":"code","metadata":{"id":"cr-sj3X9BmVr"},"source":["# Any neural network in PyTorch is a class with trainable (i.e. requires_grad=True) parameters.\n","# For more detailed tutorial look at https://pytorch.org/tutorials/beginner/nn_tutorial.html\n","\n","class LinearModel(nn.Module): # inheritance from nn.Module is required\n"," \n"," def __init__(self, input_dim: int, output_dim: int):\n"," super().__init__() # don't forget to init subclass\n"," \n"," # initialize weight and bias\n"," # NOTE that using of nn.Parameter is required\n"," # Don't use just torch.Tensor\n"," self.weight = nn.Parameter(torch.randn(output_dim, input_dim))\n"," self.bias = nn.Parameter(torch.zeros(output_dim))\n"," \n"," # initialize weight correctly\n"," self.reset_parameters()\n"," \n"," def reset_parameters(self):\n"," nn.init.kaiming_normal_(self.weight)\n"," \n"," def forward(self, input: torch.Tensor):\n"," # We expect input.shape == [B, 1, 28, 28] and need to output logits.shape = [B,10]\n"," \n"," ### YOUR SOLUTION ###\n"," \n"," return logits"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"UNZ_jPKixqhs"},"source":["INPUT_DIM = 28 * 28\n","OUTPUT_DIM = 10 # num classes\n","\n","linear_model = LinearModel(INPUT_DIM, OUTPUT_DIM)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"kefMlVIMyCzn"},"source":["# validate shapes\n","\n","assert linear_model.weight.shape == (OUTPUT_DIM, INPUT_DIM)\n","assert linear_model.bias.shape == (OUTPUT_DIM, )\n","assert linear_model.forward(example_batch[0]).shape == (BATCH_SIZE, 10)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"88KjRwzwyQQr"},"source":["To train the model we will need a loss and an optimizer. \n","\n","We will use a cross-entropy loss, for one object it looks as follows:\n","$$\\text{cross-entropy}(y, p) = -\\sum_{k=0}^{9}{\\log(p_k)[y = k]}$$ \n","\n","where $y$ is a true label, $p_k$ is a predicted probability for class $k$, and \n","$$\n","[x]=\\begin{cases}\n"," 1, \\quad \\text{if $x$ is true} \\\\\n"," 0, \\quad \\text{otherwise}\n"," \\end{cases}\n","$$\n","\n","Cross-entropy minimization pushes $p_k$ close to 1 when $y = k$, which is what we want.\n","\n","CrossEntropyLoss criterion in PyTorch combines softmax and loss calculation."]},{"cell_type":"code","metadata":{"id":"tSYCvwVRBmVs"},"source":["criterion = nn.CrossEntropyLoss()\n","optimizer = optim.SGD(linear_model.parameters(), lr=1e-2, momentum=0.9, nesterov=True)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"NtvIcXmRBmVu"},"source":["Write training and testing loops"]},{"cell_type":"code","metadata":{"id":"9x8uy-8dBmVu"},"source":["NUM_EPOCH = 10\n","DEVICE = torch.device('cpu') # you can change to `cuda:0`\n","HISTORY = collections.defaultdict(list)\n","\n","linear_model.to(DEVICE)\n","\n","for epoch in range(NUM_EPOCH):\n"," # AverageMeter will accumulate average of some metric\n"," # Procceed to `utils.py` to see implementation\n"," train_loss_meter = AverageMeter()\n"," train_accuracy_meter = AverageMeter()\n"," test_loss_meter = AverageMeter()\n"," test_accuracy_meter = AverageMeter()\n"," \n"," # training loop\n"," for train_batch in train_dataloader:\n"," \n"," # unpack batch and move to specific device (for example, GPU or TPU)\n"," images, labels = train_batch\n"," images = images.to(DEVICE)\n"," labels = labels.to(DEVICE)\n"," \n"," ### YOUR SOLUTION ###\n"," # do forward pass\n"," # calculate loss (CrossEntropy)\n"," # zero out the previous gradients of our model parameters\n"," # calculate new gradients\n"," # do optimization step\n"," \n"," # calculate current average loss and accuracy\n"," train_loss_meter.update(loss.item())\n"," train_accuracy_meter.update(\n"," calculate_accuracy(\n"," prediction.detach(),\n"," labels\n"," ).item()\n"," )\n"," \n"," # save average train loss and accuracy\n"," HISTORY['train_loss'].append(train_loss_meter.avg)\n"," HISTORY['train_accuracy'].append(train_accuracy_meter.avg)\n"," \n"," # testing loop\n"," for test_batch in test_dataloader:\n"," images, labels = test_batch\n"," images = images.to(DEVICE)\n"," labels = labels.to(DEVICE)\n"," \n"," # аdd `with torch.no_grad()' to avoid computing gradients of weights\n"," with torch.no_grad():\n"," # do everything like we did in training loop\n"," logits = linear_model(images)\n"," prediction = logits.argmax(dim=-1)\n"," loss = criterion(logits, labels)\n"," \n"," test_loss_meter.update(loss.item())\n"," test_accuracy_meter.update(\n"," calculate_accuracy(\n"," prediction,\n"," labels\n"," ).item()\n"," )\n"," \n"," # save average test accuracy loss and accuracy\n"," HISTORY['test_loss'].append(test_loss_meter.avg)\n"," HISTORY['test_accuracy'].append(test_accuracy_meter.avg)\n"," \n"," # visualize all together\n"," display.clear_output()\n"," fig, axes = plt.subplots(1, 2, figsize=(20, 7))\n"," axes[0].set_title('Loss (Cross Entropy)')\n"," axes[0].plot(HISTORY['train_loss'], label='Train Loss')\n"," axes[0].plot(HISTORY['test_loss'], label='Test Loss')\n"," axes[0].grid()\n"," axes[0].legend(fontsize=20)\n"," \n"," axes[1].set_title('Accuracy')\n"," axes[1].plot(HISTORY['train_accuracy'], label='Train Accuracy')\n"," axes[1].plot(HISTORY['test_accuracy'], label='Test Accuracy')\n"," axes[1].grid()\n"," axes[1].legend(fontsize=20)\n"," \n"," plt.show()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"tVK59LTcBmVw"},"source":["assert HISTORY['test_accuracy'][-1] > 0.92"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"vS9LhPeMxI3B"},"source":["ans_part1 = HISTORY['test_accuracy'][-1]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Z6bMyMCKBmVx"},"source":["## GRADED PART, DO NOT CHANGE!\n","grader.set_answer(\"zGwHg\", ans_part1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"wj2EH27KxI3C"},"source":["# you can make submission with answers so far to check yourself at this stage\n","grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"AhNvJjljBmVx"},"source":["## MLP with hidden layers\n","\n","Previously we've coded a fully-connected linear layer with matrix multiplication by hand. But usually people code only very specific layers by hand, all standard layers are already implemented in PyTorch. The analog of our LinearModel in PyTorch is nn.Linear\n","\n","Now define an MLP with 2 hidden layers. \n","- Do not forget to use nonlinearities between linear layers, for example, nn.ReLU\n","- nn.Sequential help you to combine several layers into one model "]},{"cell_type":"code","metadata":{"id":"AwlSa-JDBmVy"},"source":["INPUT_DIM = 28 * 28\n","OUTPUT_DIM = 10 # num classes\n","\n","# HINT\n","# Use nn.Sequential, nn.Linear and nn.ReLU\n","\n","### YOUR SOLUTION ###"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"grSmgqzJBmVy"},"source":["Use the code from above to train the model. You're aiming for ~0.97 test accuracy here."]},{"cell_type":"code","metadata":{"id":"MKEDLGhwBmVy","scrolled":false},"source":["HISTORY = collections.defaultdict(list)\n","\n","### YOUR SOLUTION ###"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"iO9wWf-0BmVz"},"source":["ans_part2 = HISTORY['test_accuracy'][-1]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"fm-wxw3yxI3D"},"source":["## GRADED PART, DO NOT CHANGE!\n","grader.set_answer(\"5Ww9B\", ans_part2)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"6uaveQVWxI3D"},"source":["grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)"],"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /week05/week05_part_of_speech_tagging.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"accelerator":"GPU","colab":{"name":"week05_part_of_speech_tagging.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.5"}},"cells":[{"cell_type":"markdown","metadata":{"collapsed":true,"id":"WwNfzDcnXJoN"},"source":["## Part Of Speech Tagging\n","\n","\n","\n","Unlike our previous experience with language modelling, this time around we learn the mapping between two different kinds of elements.\n","\n","This setting is common for a range of useful problems:\n","* Speech Recognition - processing human voice into text\n","* Part Of Speech Tagging - for morphology-aware search and as an auxuliary task for most NLP problems\n","* Named Entity Recognition - for chat bots and web crawlers\n","* Protein structure prediction - for bioinformatics\n","\n","In this programming assignment we will work with part-of-speech tagging. As the name suggests, it's about converting a sequence of words into a sequence of part-of-speech tags. We'll use a reduced tag set for simplicity:\n","\n","### POS-tags\n","- ADJ - adjective (new, good, high, ...)\n","- ADP - adposition\t(on, of, at, ...)\n","- ADV - adverb\t(really, already, still, ...)\n","- CONJ\t- conjunction\t(and, or, but, ...)\n","- DET - determiner, article\t(the, a, some, ...)\n","- NOUN\t- noun\t(year, home, costs, ...)\n","- NUM - numeral\t(twenty-four, fourth, 1991, ...)\n","- PRT -\tparticle (at, on, out, ...)\n","- PRON - pronoun (he, their, her, ...)\n","- VERB - verb (is, say, told, ...)\n","- .\t- punctuation marks\t(. , ;)\n","- X\t- other\t(ersatz, esprit, dunno, ...)\n","\n","__Disclaimer:__ This assignment is ungraded."]},{"cell_type":"code","metadata":{"id":"zTG0uxEbXJoP","scrolled":true},"source":["import nltk\n","import sys\n","import numpy as np\n","\n","nltk.download('brown')\n","nltk.download('universal_tagset')\n","\n","data = nltk.corpus.brown.tagged_sents(tagset='universal')\n","all_tags = ['#EOS#','#UNK#','ADV', 'NOUN', 'ADP', 'PRON', 'DET', '.', 'PRT', 'VERB', 'X', 'NUM', 'CONJ', 'ADJ']\n","\n","data = np.array([[(word.lower(),tag) for word, tag in sentence] for sentence in data])"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"8jRfjw3FXJoQ"},"source":["from sklearn.model_selection import train_test_split\n","train_data, test_data = train_test_split(data, test_size=0.25, random_state=42)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"L2AHSrM1XJoR"},"source":["from IPython.display import HTML, display\n","def draw(sentence):\n"," words,tags = zip(*sentence)\n"," display(HTML('{tags}{words}
'.format(\n"," words = '{}'.format(''.join(words)),\n"," tags = '{}'.format(''.join(tags)))))\n"," \n"," \n","draw(data[11])\n","draw(data[10])\n","draw(data[7])"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"SbwpLW0mXJoS"},"source":["### Building vocabularies\n","\n","Just like before, we have to build a mapping from tokens to integer ids. This time around, our model operates on a word level, processing one word per RNN step. This means we'll have to deal with far larger vocabulary.\n","\n","Luckily for us, we only receive those words as input i.e. we don't have to predict them. This means we can have a large vocabulary for free by using word embeddings."]},{"cell_type":"code","metadata":{"id":"9qOGuB1xXJoS"},"source":["from collections import Counter\n","word_counts = Counter()\n","for sentence in data:\n"," words,tags = zip(*sentence)\n"," word_counts.update(words)\n","\n","all_words = ['#EOS#','#UNK#'] + list(list(zip(*word_counts.most_common(10000)))[0])\n","\n","#let's measure what fraction of data words are in the dictionary\n","print(\"Coverage = %.5f\"%(float(sum(word_counts[w] for w in all_words)) / sum(word_counts.values())))"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"kCNS_olOXJoT"},"source":["from collections import defaultdict\n","word_to_id = defaultdict(lambda: 1, {word: i for i, word in enumerate(all_words)})\n","tag_to_id = {tag:i for i, tag in enumerate(all_tags)}"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"FHs7kEoqXJoU"},"source":["convert words and tags into fixed-size matrix"]},{"cell_type":"code","metadata":{"id":"b58RLWShXJoU"},"source":["def to_matrix(lines, token_to_id, max_len=None, pad=0, dtype='int32', time_major=False):\n"," \"\"\"Converts a list of names into rnn-digestable matrix with paddings added after the end\"\"\"\n"," \n"," max_len = max_len or max(map(len,lines))\n"," matrix = np.empty([len(lines),max_len],dtype)\n"," matrix.fill(pad)\n","\n"," for i in range(len(lines)):\n"," line_ix = list(map(token_to_id.__getitem__,lines[i]))[:max_len]\n"," matrix[i,:len(line_ix)] = line_ix\n","\n"," return matrix.T if time_major else matrix"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"pCxwnAbPXJoV"},"source":["batch_words, batch_tags = zip(*[zip(*sentence) for sentence in data[-3:]])\n","\n","print(\"Word ids:\")\n","print(to_matrix(batch_words,word_to_id))\n","print(\"Tag ids:\")\n","print(to_matrix(batch_tags,tag_to_id))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"collapsed":true,"id":"rT8HO5rVXJoW"},"source":["### Build and train a simple model\n","\n","In this lab we'll focus on a high-level PyTorch interface to recurrent neural networks, which we tried at the end of the previous lab."]},{"cell_type":"code","metadata":{"id":"wBqW72t7XJoX"},"source":["import torch\n","from torch import nn"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"S0A3N-4pXJoX"},"source":["class SimpleModel(nn.Module):\n"," \n"," def __init__(self):\n"," super().__init__()\n"," \n"," self.rnn = nn.Sequential(\n"," nn.Embedding(len(all_words), 64),\n"," nn.RNN(64, 64, batch_first=True)\n"," )\n"," self.classifier = nn.Sequential(\n"," nn.Linear(64, len(all_tags)),\n"," )\n"," \n"," def forward(self, input):\n"," output, _ = self.rnn(input)\n"," return self.classifier(output)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"DKijbze4XJoY"},"source":["We will use data generator for batch traiing:"]},{"cell_type":"code","metadata":{"id":"OmksTBr0XJoY"},"source":["BATCH_SIZE = 128\n","\n","def generate_batches(sentences, batch_size=BATCH_SIZE, max_len=None, pad=0):\n"," assert isinstance(sentences, np.ndarray), \"Make sure sentences is a numpy array\"\n"," \n"," while True:\n"," indices = np.random.permutation(np.arange(len(sentences)))\n"," for start in range(0, len(indices) - 1, batch_size):\n"," batch_indices = indices[start:start + batch_size]\n"," batch_words, batch_tags = [],[]\n"," for sent in sentences[batch_indices]:\n"," words,tags = zip(*sent)\n"," batch_words.append(words)\n"," batch_tags.append(tags)\n","\n"," batch_words = to_matrix(batch_words, word_to_id, max_len, pad)\n"," batch_tags = to_matrix(batch_tags, tag_to_id, max_len, pad)\n","\n"," yield batch_words, batch_tags\n"," "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"QWOPvdXlcMHc"},"source":["# import stuff\n","from torch import optim\n","\n","from tqdm import tqdm\n","from itertools import islice\n","\n","#auxiliary stuff\n","class AverageMeter:\n"," \n"," def __init__(self):\n"," self.reset()\n","\n"," def reset(self):\n"," self.val = 0\n"," self.avg = 0\n"," self.sum = 0\n"," self.count = 0\n","\n"," def update(self, val, n=1):\n"," self.val = val\n"," self.sum += val * n\n"," self.count += n\n"," self.avg = self.sum / self.count"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"wOZ9fMfNXJoZ"},"source":["### Training\n","Here we do not compute loss for padded symbols by using argument ignore_index of CrossEntropyLoss."]},{"cell_type":"code","metadata":{"id":"RezsxflyXJob"},"source":["NUM_EPOCH = 10\n","DEVICE = torch.device('cuda')\n","\n","model = SimpleModel().to(DEVICE)\n","optimizer = optim.Adam(model.parameters(), 1e-3)\n","\n","# ignore padding index\n","criterion = nn.CrossEntropyLoss(ignore_index=0)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"dYtTvq7gXJob"},"source":["for _ in range(NUM_EPOCH):\n","\n"," loss_meter = AverageMeter()\n"," for batch in islice(generate_batches(train_data), 0, len(train_data) // BATCH_SIZE):\n"," word_id, tag_id = batch\n"," word_id = torch.from_numpy(word_id).long().to(DEVICE)\n"," tag_id = torch.from_numpy(tag_id).long().to(DEVICE)\n"," \n"," logits = model(word_id).transpose(-1, -2)\n"," \n"," loss = criterion(logits, tag_id)\n"," \n"," optimizer.zero_grad()\n"," loss.backward()\n"," optimizer.step()\n","\n"," loss_meter.update(loss.item())\n"," \n"," print(loss_meter.avg)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Obv6ezi6iWNo"},"source":["def compute_test_accuracy(model):\n"," test_words, test_tags = zip(*[zip(*sentence) for sentence in test_data])\n"," test_words, test_tags = to_matrix(test_words, word_to_id), to_matrix(test_tags, tag_to_id)\n","\n"," test_words = torch.from_numpy(test_words).long().to(DEVICE)\n"," test_tags = torch.from_numpy(test_tags).long().to(DEVICE)\n","\n"," predicted_tag_probabilities = model(test_words).argmax(dim=-1)\n","\n"," numerator = torch.sum(torch.logical_and((predicted_tag_probabilities == test_tags), (test_tags != 0)))\n"," denominator = torch.sum(test_words != 0)\n"," accuracy = (numerator / denominator).item()\n"," return accuracy"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"IIYQ5ZRXZB6r"},"source":["accuracy = compute_test_accuracy(model)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"RGjUQ4R0Yyoy"},"source":["print(\"Final accuracy: %.5f\" % accuracy)\n","\n","assert accuracy > 0.94"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"IX3AjVt6XJoe"},"source":["### Task I: getting all bidirectional\n","\n","Since we're analyzing a full sequence, it's legal for us to look into future data.\n","\n","A simple way to achieve that is to go both directions at once, making a __bidirectional RNN__.\n","\n","Try to set argument `bidirectional` to True in `nn.RNN`. You will need to adjust dimensions of rnn layer too!\n","\n","Your first task is to use such a layer for our POS-tagger."]},{"cell_type":"code","metadata":{"id":"A8h3k8VeXJof"},"source":["# Define a model that utilizes bidirectional nn.RNN\n","\n","### Your code here ###"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Vz1DM5X1XJog"},"source":["acc = compute_test_accuracy(model)\n","print(\"\\nFinal accuracy: %.5f\"%acc)\n","\n","assert acc>0.96, \"Bidirectional RNNs are better than this!\"\n","print(\"Well done!\")"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"RdW2nBw-XJog"},"source":["### Task II: now go and improve it\n","\n","You guesses it. We're now gonna ask you to come up with a better network.\n","\n","Here's a few tips:\n","\n","* __Go beyond nn.RNN__: there's `nn.LSTM` and `nn.GRU`\n"," * You can also use 1D Convolutions (`nn.Conv1d`). They are often as good as recurrent layers but with less overfitting.\n","* __Stack more layers__: if there is a common motif to this course it's about stacking layers\n"," * Try to add recurrent and 1dconv layers on top of one another\n"," * Just remember that bigger networks may need more epochs to train\n","* __Gradient clipping__: If your training isn't as stable as you'd like, try to use `nn.utils.clip_grad_norm_`.\n"," * Which is to say, it's a good idea to watch over your loss curve at each minibatch. \n","* __Regularization__: you can apply dropouts as usuall but also in an RNN-specific way\n"," * `nn.Dropout` works inbetween RNN layers\n"," * Recurrent layers also have `dropout` parameter\n","* __More words!__: You can obtain greater performance by expanding your model's input dictionary from 5000 to up to every single word!\n"," * Just make sure your model doesn't overfit due to so many parameters.\n"," * Combined with regularizers or pre-trained word-vectors this could be really good because right now our model is blind to >5% of words.\n","* __The most important advice__: don't cram in everything at once!\n"," * If you stuff in a lot of modiffications, some of them almost inevitably gonna be detrimental and you'll never know which of them are.\n"," * Try to instead go in small iterations and record experiment results to guide further search.\n"," \n","There's some advanced stuff waiting at the end of the notebook.\n"," \n","Good hunting!"]},{"cell_type":"code","metadata":{"id":"Hm_NIQ5rXJoh"},"source":["# "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"6RXKBxmxXJoh"},"source":["acc = compute_test_accuracy(model)\n","print(\"\\nFinal accuracy: %.5f\"%acc)\n","\n","if acc >= 0.99:\n"," print(\"Awesome! Sky was the limit and yet you scored even higher!\")\n","elif acc >= 0.98:\n"," print(\"Excellent! Whatever dark magic you used, it certainly did it's trick.\")\n","elif acc >= 0.97:\n"," print(\"Well done! If this was a graded assignment, you would have gotten a 100% score.\")\n","elif acc > 0.96:\n"," print(\"Just a few more iterations!\")\n","else:\n"," print(\"There seems to be something broken in the model. Unless you know what you're doing, try taking bidirectional RNN and adding one enhancement at a time to see where's the problem.\")"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"8Isvs4IIXJoi"},"source":["```\n","\n","```\n","\n","```\n","\n","```\n","\n","```\n","\n","```\n","\n","\n","#### Some advanced stuff\n","Here there are a few more tips on how to improve training that are a bit trickier to impliment. We strongly suggest that you try them _after_ you've got a good initial model.\n","* __Use pre-trained embeddings__: you can use pre-trained weights from [there](http://ahogrammer.com/2017/01/20/the-list-of-pretrained-word-embeddings/) to kickstart your Embedding layer.\n"," * Use nn.Embedding.from_pretrained to init the embedding layer with a pretrained matrix.\n"," * When using pre-trained embeddings, pay attention to the fact that model's dictionary is different from your own.\n"," * You may want to freeze the parameters of embedding layer for several first epoches of fine-tuning or to not fine-tune them at all. In the first case you can choose zero learning rate for this parameter group, in the second case just use the freeze argument of nn.Embedding.from_pretrained.\n","* __More efficient batching__: right now it spends a lot of time iterating over \"0\"s\n"," * This happens because batch is always padded to the length of a longest sentence\n"," * You can speed things up by pre-generating batches of similar lengths and feeding it with randomly chosen pre-generated batch.\n"," * This technically breaks the i.i.d. assumption, but it works unless you come up with some insane rnn architectures.\n","* __Structured loss functions__: since we're tagging the whole sequence at once, we might as well train our network to do so.\n"," * There's more than one way to do so, but we'd recommend starting with [Conditional Random Fields](http://blog.echen.me/2012/01/03/introduction-to-conditional-random-fields/)\n"," * You can read an [official PyTorch tutorial on Bi-LSTM Conditional Random Field](https://pytorch.org/tutorials/beginner/nlp/advanced_tutorial.html#bi-lstm-conditional-random-field-discussion).\n"]}]} -------------------------------------------------------------------------------- /week01/week01_linear_models.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"week01_linear_models.ipynb","provenance":[],"collapsed_sections":["GDqx6q0yq67H","s7SxSkvlq67K"]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.5"}},"cells":[{"cell_type":"markdown","metadata":{"id":"lswrtMSkq662"},"source":["# Linear Models and Optimization\n","\n","In this programming assignment you will implement a linear classifier and train it using stochastic gradient descent modifications and numpy."]},{"cell_type":"code","metadata":{"id":"x8BZCfO7wemu"},"source":["%%bash\n","\n","shred -u setup_colab.py\n","\n","wget https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/main/utils/setup_colab.py -O setup_colab.py"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"xU9neFw9wemv"},"source":["import setup_colab\n","\n","setup_colab.setup_week01()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"MHTssL7sq66-"},"source":["import numpy as np\n","\n","%matplotlib inline\n","import matplotlib.pyplot as plt"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"XTFUF_ohwem1"},"source":["import grading\n","\n","grader = grading.Grader(\n"," assignment_key=\"UaHtvpEFEee0XQ6wjK-hZg\",\n"," all_parts=[\"xU7U4\", \"HyTF6\", \"uNidL\", \"ToK7N\", \"GBdgZ\", \"dLdHG\"]\n",")"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"O6MGCofdq_HN"},"source":["### Fill in your Coursera token and email\n","To successfully submit your answers to our grader, please fill in your Coursera submission token and email."]},{"cell_type":"code","metadata":{"id":"d5EY-Cb6q66_"},"source":["# token expires every 30 min\n","COURSERA_TOKEN = ### YOUR TOKEN HERE\n","COURSERA_EMAIL = ### YOUR EMAIL HERE"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"YUInirwOq66_"},"source":["## Two-dimensional classification\n","\n","To make things more intuitive, let's solve a 2D classification problem with synthetic data."]},{"cell_type":"code","metadata":{"id":"B1S-9WROq66_"},"source":["with open('train.npy', 'rb') as fin:\n"," X = np.load(fin)\n"," \n","with open('target.npy', 'rb') as fin:\n"," y = np.load(fin)\n","\n","plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, s=20)\n","plt.show()"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"eTcj9rELq67A"},"source":["# Task\n","\n","## Features\n","\n","As you can notice the data above isn't linearly separable. Since that we should add features (or use non-linear model). Note that decision line between two classes have form of circle, since that we can add quadratic features to make the problem linearly separable. The idea under this displayed on image below:\n","\n","![](kernel.png)"]},{"cell_type":"code","metadata":{"id":"k7qjcYkgq67A"},"source":["def expand(X):\n"," \"\"\"\n"," Adds quadratic features. \n"," This expansion allows your linear model to make non-linear separation.\n"," \n"," For each sample (row in matrix), compute an expanded row:\n"," [feature0, feature1, feature0^2, feature1^2, feature0*feature1, 1]\n"," \n"," :param X: matrix of features, shape [n_samples,2]\n"," :returns: expanded features of shape [n_samples,6]\n"," \"\"\"\n"," X_expanded = np.zeros((X.shape[0], 6))\n"," \n"," # TODO:"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Do2BlIUhq67A"},"source":["X_expanded = expand(X)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"MRcg2FG2q67B"},"source":["Here are some tests for your implementation of `expand` function."]},{"cell_type":"code","metadata":{"id":"UOWyKIFWq67B"},"source":["# simple test on random numbers\n","\n","dummy_X = np.array([\n"," [0,0],\n"," [1,0],\n"," [2.61,-1.28],\n"," [-0.59,2.1]\n"," ])\n","\n","# call your expand function\n","dummy_expanded = expand(dummy_X)\n","\n","# what it should have returned: x0 x1 x0^2 x1^2 x0*x1 1\n","dummy_expanded_ans = np.array([[ 0. , 0. , 0. , 0. , 0. , 1. ],\n"," [ 1. , 0. , 1. , 0. , 0. , 1. ],\n"," [ 2.61 , -1.28 , 6.8121, 1.6384, -3.3408, 1. ],\n"," [-0.59 , 2.1 , 0.3481, 4.41 , -1.239 , 1. ]])\n","\n","#tests\n","assert isinstance(dummy_expanded,np.ndarray), \"please make sure you return numpy array\"\n","assert dummy_expanded.shape == dummy_expanded_ans.shape, \"please make sure your shape is correct\"\n","assert np.allclose(dummy_expanded,dummy_expanded_ans,1e-3), \"Something's out of order with features\"\n","\n","print(\"Seems legit!\")\n"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"l-NsE48yq67C"},"source":["## Logistic regression\n","\n","To classify objects we will obtain probability of object belongs to class '1'. To predict probability we will use output of linear model and logistic function:\n","\n","$$ a(x; w) = \\langle w, x \\rangle $$\n","$$ P( y=1 \\; \\big| \\; x, \\, w) = \\dfrac{1}{1 + \\exp(- \\langle w, x \\rangle)} = \\sigma(\\langle w, x \\rangle)$$\n"]},{"cell_type":"code","metadata":{"id":"XiU5-LsCq67C"},"source":["def probability(X, w):\n"," \"\"\"\n"," Given input features and weights\n"," return predicted probabilities of y==1 given x, P(y=1|x), see description above\n"," \n"," Don't forget to use expand(X) function (where necessary) in this and subsequent functions.\n"," \n"," :param X: feature matrix X of shape [n_samples,6] (expanded)\n"," :param w: weight vector w of shape [6] for each of the expanded features\n"," :returns: an array of predicted probabilities in [0,1] interval.\n"," \"\"\"\n","\n"," # TODO:"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"j2cwD41Cq67D"},"source":["dummy_weights = np.linspace(-1, 1, 6)\n","ans_part1 = probability(X_expanded[:1, :], dummy_weights)[0]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"gxVq6_sXq67D"},"source":["## GRADED PART, DO NOT CHANGE!\n","grader.set_answer(\"xU7U4\", ans_part1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"nzXoOVXtq67D"},"source":["# you can make submission with answers so far to check yourself at this stage\n","grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"2fDmcMTCq67D"},"source":["In logistic regression the optimal parameters $w$ are found by cross-entropy minimization:\n","\n","Loss for one sample: $$ l(x_i, y_i, w) = - \\left[ {y_i \\cdot log P(y_i = 1 \\, | \\, x_i,w) + (1-y_i) \\cdot log (1-P(y_i = 1\\, | \\, x_i,w))}\\right] $$\n","\n","Loss for many samples: $$ L(X, \\vec{y}, w) = {1 \\over \\ell} \\sum_{i=1}^\\ell l(x_i, y_i, w) $$\n","\n"]},{"cell_type":"code","metadata":{"id":"IpZSZ1-8q67D"},"source":["def compute_loss(X, y, w):\n"," \"\"\"\n"," Given feature matrix X [n_samples,6], target vector [n_samples] of 1/0,\n"," and weight vector w [6], compute scalar loss function L using formula above.\n"," Keep in mind that our loss is averaged over all samples (rows) in X.\n"," \"\"\"\n"," # TODO:"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Bym6Zuq_q67E"},"source":["# use output of this cell to fill answer field \n","ans_part2 = compute_loss(X_expanded, y, dummy_weights)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"ulkJuTwkq67E"},"source":["## GRADED PART, DO NOT CHANGE!\n","grader.set_answer(\"HyTF6\", ans_part2)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"AEKOi1JNq67E"},"source":["# you can make submission with answers so far to check yourself at this stage\n","grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uHARoiTqq67E"},"source":["Since we train our model with gradient descent, we should compute gradients.\n","\n","To be specific, we need a derivative of loss function over each weight [6 of them].\n","\n","$$ \\nabla_w L = {1 \\over \\ell} \\sum_{i=1}^\\ell \\nabla_w l(x_i, y_i, w) $$ \n","\n","We won't be giving you the exact formula this time — instead, try figuring out a derivative with pen and paper. \n","\n","As usual, we've made a small test for you, but if you need more, feel free to check your math against finite differences (estimate how $L$ changes if you shift $w$ by $10^{-5}$ or so)."]},{"cell_type":"code","metadata":{"id":"fNfm4VZRq67F"},"source":["def compute_grad(X, y, w):\n"," \"\"\"\n"," Given feature matrix X [n_samples,6], target vector [n_samples] of 1/0,\n"," and weight vector w [6], compute vector [6] of derivatives of L over each weights.\n"," Keep in mind that our loss is averaged over all samples (rows) in X.\n"," \"\"\"\n"," \n"," # TODO"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"XZIff14Zq67F"},"source":["# use output of this cell to fill answer field \n","ans_part3 = np.linalg.norm(compute_grad(X_expanded, y, dummy_weights))"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"RQ4Erw1xq67G"},"source":["## GRADED PART, DO NOT CHANGE!\n","grader.set_answer(\"uNidL\", ans_part3)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"1YVXUNOMq67G"},"source":["# you can make submission with answers so far to check yourself at this stage\n","grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mFZvWPPBq67H"},"source":["Here's an auxiliary function that visualizes the predictions:"]},{"cell_type":"code","metadata":{"id":"kmQPfWd1q67H"},"source":["from IPython import display\n","\n","h = 0.01\n","x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n","y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n","xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n","\n","def visualize(X, y, w, history):\n"," \"\"\"draws classifier prediction with matplotlib magic\"\"\"\n"," Z = probability(expand(np.c_[xx.ravel(), yy.ravel()]), w)\n"," Z = Z.reshape(xx.shape)\n"," plt.subplot(1, 2, 1)\n"," plt.contourf(xx, yy, Z, alpha=0.8)\n"," plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)\n"," plt.xlim(xx.min(), xx.max())\n"," plt.ylim(yy.min(), yy.max())\n"," \n"," plt.subplot(1, 2, 2)\n"," plt.plot(history)\n"," plt.grid()\n"," ymin, ymax = plt.ylim()\n"," plt.ylim(0, ymax)\n"," display.clear_output(wait=True)\n"," plt.show()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"79uNfp65q67H"},"source":["visualize(X, y, dummy_weights, [0.5, 0.5, 0.25])"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GDqx6q0yq67H"},"source":["## Training\n","In this section we'll use the functions you wrote to train our classifier using stochastic gradient descent.\n","\n","You can try change hyperparameters like batch size, learning rate and so on to find the best one, but use our hyperparameters when fill answers."]},{"cell_type":"markdown","metadata":{"id":"9GXkq6VCq67H"},"source":["## Mini-batch SGD\n","\n","Stochastic gradient descent just takes a random batch of $m$ samples on each iteration, calculates a gradient of the loss on it and makes a step:\n","$$ w_t = w_{t-1} - \\eta \\dfrac{1}{m} \\sum_{j=1}^m \\nabla_w l(x_{i_j}, y_{i_j}, w_t) $$\n","\n"]},{"cell_type":"code","metadata":{"id":"8n6rfTrLq67I"},"source":["# please use np.random.seed(42), eta=0.1, n_iter=100 and batch_size=4 for deterministic results\n","\n","np.random.seed(42)\n","w = np.array([0, 0, 0, 0, 0, 1])\n","\n","eta= 0.1 # learning rate\n","\n","n_iter = 100\n","batch_size = 4\n","loss = np.zeros(n_iter)\n","plt.figure(figsize=(12, 5))\n","\n","for i in range(n_iter):\n"," ind = np.random.choice(X_expanded.shape[0], batch_size)\n"," loss[i] = compute_loss(X_expanded, y, w)\n"," if i % 10 == 0:\n"," visualize(X_expanded[ind, :], y[ind], w, loss)\n","\n"," # Keep in mind that compute_grad already does averaging over batch for you!\n"," # TODO:\n","\n","visualize(X, y, w, loss)\n","plt.clf()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"xFNsqMkdq67I"},"source":["# use output of this cell to fill answer field \n","\n","ans_part4 = compute_loss(X_expanded, y, w)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"7dutmAB-q67I"},"source":["## GRADED PART, DO NOT CHANGE!\n","grader.set_answer(\"ToK7N\", ans_part4)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"wYbPHz2Zq67I"},"source":["# you can make submission with answers so far to check yourself at this stage\n","grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"BvusiENKq67I"},"source":["## SGD with momentum\n","\n","Momentum is a method that helps accelerate SGD in the relevant direction and dampens oscillations as can be seen in image below. It does this by adding a fraction $\\alpha$ of the update vector of the past time step to the current update vector.\n","
\n","
\n","\n","$$ \\nu_t = \\alpha \\nu_{t-1} + \\eta\\dfrac{1}{m} \\sum_{j=1}^m \\nabla_w l(x_{i_j}, y_{i_j}, w_t) $$\n","$$ w_t = w_{t-1} - \\nu_t$$\n","\n","
\n","\n","\n","![](sgd.png)\n"]},{"cell_type":"code","metadata":{"id":"BbC2_UBzq67J"},"source":["# please use np.random.seed(42), eta=0.05, alpha=0.9, n_iter=100 and batch_size=4 for deterministic results\n","np.random.seed(42)\n","w = np.array([0, 0, 0, 0, 0, 1])\n","\n","eta = 0.05 # learning rate\n","alpha = 0.9 # momentum\n","nu = np.zeros_like(w)\n","\n","n_iter = 100\n","batch_size = 4\n","loss = np.zeros(n_iter)\n","plt.figure(figsize=(12, 5))\n","\n","for i in range(n_iter):\n"," ind = np.random.choice(X_expanded.shape[0], batch_size)\n"," loss[i] = compute_loss(X_expanded, y, w)\n"," if i % 10 == 0:\n"," visualize(X_expanded[ind, :], y[ind], w, loss)\n","\n"," # TODO:\n","\n","visualize(X, y, w, loss)\n","plt.clf()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"rAFfLT7Gq67J"},"source":["# use output of this cell to fill answer field \n","\n","ans_part5 = compute_loss(X_expanded, y, w)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"pzuqS2Voq67K"},"source":["## GRADED PART, DO NOT CHANGE!\n","grader.set_answer(\"GBdgZ\", ans_part5)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"bbLEEyB4q67K"},"source":["# you can make submission with answers so far to check yourself at this stage\n","grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"s7SxSkvlq67K"},"source":["## RMSprop\n","\n","Implement RMSPROP algorithm, which use squared gradients to adjust learning rate:\n","\n","$$ G_j^t = \\alpha G_j^{t-1} + (1 - \\alpha) g_{tj}^2 $$\n","$$ w_j^t = w_j^{t-1} - \\dfrac{\\eta}{\\sqrt{G_j^t + \\varepsilon}} g_{tj} $$"]},{"cell_type":"code","metadata":{"id":"KphpIssPq67L"},"source":["# please use np.random.seed(42), eta=0.1, alpha=0.9, n_iter=100 and batch_size=4 for deterministic results\n","np.random.seed(42)\n","\n","w = np.array([0, 0, 0, 0, 0, 1.])\n","\n","eta = 0.1 # learning rate\n","alpha = 0.9 # moving average of gradient norm squared\n","g2 = None # we start with None so that you can update this value correctly on the first iteration\n","eps = 1e-8\n","\n","n_iter = 100\n","batch_size = 4\n","loss = np.zeros(n_iter)\n","plt.figure(figsize=(12,5))\n","for i in range(n_iter):\n"," ind = np.random.choice(X_expanded.shape[0], batch_size)\n"," loss[i] = compute_loss(X_expanded, y, w)\n"," if i % 10 == 0:\n"," visualize(X_expanded[ind, :], y[ind], w, loss)\n","\n"," # TODO:\n","\n","visualize(X, y, w, loss)\n","plt.clf()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"LFyg9IyTq67L"},"source":["# use output of this cell to fill answer field \n","ans_part6 = compute_loss(X_expanded, y, w)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"tppAYN1xq67L"},"source":["## GRADED PART, DO NOT CHANGE!\n","grader.set_answer(\"dLdHG\", ans_part6)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Kth0Hxh2q67L"},"source":["grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)"],"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /week03/SGA1_Object_Detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "accelerator": "GPU", 6 | "colab": { 7 | "name": "SGA1_Object Detection.ipynb", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "display_name": "Python 3", 13 | "name": "python3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "_MaSHiIyGd23" 21 | }, 22 | "source": [ 23 | "# Object Detection" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "id": "ZcPTRljhXhhI" 30 | }, 31 | "source": [ 32 | "In this assignment, you will implement a fruit detector. \n", 33 | "The task is divided into steps for simpler navigation.\n", 34 | "\n", 35 | "Let's start!" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "metadata": { 41 | "id": "fcggFBc_FdC2" 42 | }, 43 | "source": [ 44 | "# we will need this library to process the labeling\n", 45 | "! pip install xmltodict" 46 | ], 47 | "execution_count": null, 48 | "outputs": [] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "metadata": { 53 | "id": "HsTOiuplEugf" 54 | }, 55 | "source": [ 56 | "import numpy as np\n", 57 | "import torch\n", 58 | "from torch import nn\n", 59 | "from torch.nn import functional as F\n", 60 | "from torch.utils.data import Dataset, DataLoader\n", 61 | "import xmltodict\n", 62 | "import json\n", 63 | "import glob\n", 64 | "import cv2\n", 65 | "import os\n", 66 | "import torchvision\n", 67 | "import matplotlib.pyplot as plt\n", 68 | "\n", 69 | "import torchvision\n", 70 | "import torchvision.transforms as T\n", 71 | "from torchvision.models.detection.faster_rcnn import FastRCNNPredictor\n", 72 | "from torchvision.models.detection import fasterrcnn_resnet50_fpn" 73 | ], 74 | "execution_count": null, 75 | "outputs": [] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": { 80 | "id": "XfbyE1_tIL1x" 81 | }, 82 | "source": [ 83 | "## Step 0. Dataset" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": { 89 | "id": "KgTskp7RbqFZ" 90 | }, 91 | "source": [ 92 | "First, let's load the data that you can download [here](https://drive.google.com/file/d/1Ve5e9qdy_sUCMM4qXWrw8ecURg2af9Cm/view?usp=sharing). \n", 93 | "\n", 94 | "We have already written a dataset class for you and we encourage you to figure out how it works." 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "metadata": { 100 | "id": "LelLza0-FR_Y" 101 | }, 102 | "source": [ 103 | "class2tag = {\"apple\": 1, \"orange\": 2, \"banana\": 3}\n", 104 | "\n", 105 | "\n", 106 | "class FruitDataset(Dataset):\n", 107 | " def __init__(self, data_dir, transform=None):\n", 108 | " self.images = []\n", 109 | " self.annotations = []\n", 110 | " self.transform = transform\n", 111 | " for annotation in glob.glob(data_dir + \"/*xml\"):\n", 112 | " image_fname = os.path.splitext(annotation)[0] + \".jpg\"\n", 113 | " self.images.append(cv2.cvtColor(cv2.imread(image_fname), cv2.COLOR_BGR2RGB))\n", 114 | " with open(annotation) as f:\n", 115 | " annotation_dict = xmltodict.parse(f.read())\n", 116 | " bboxes = []\n", 117 | " labels = []\n", 118 | " objects = annotation_dict[\"annotation\"][\"object\"]\n", 119 | " if not isinstance(objects, list):\n", 120 | " objects = [objects]\n", 121 | " for obj in objects:\n", 122 | " bndbox = obj[\"bndbox\"]\n", 123 | " bbox = [bndbox[\"xmin\"], bndbox[\"ymin\"], bndbox[\"xmax\"], bndbox[\"ymax\"]]\n", 124 | " bbox = list(map(int, bbox))\n", 125 | " bboxes.append(torch.tensor(bbox))\n", 126 | " labels.append(class2tag[obj[\"name\"]])\n", 127 | " self.annotations.append(\n", 128 | " {\"boxes\": torch.stack(bboxes).float(), \"labels\": torch.tensor(labels)}\n", 129 | " )\n", 130 | "\n", 131 | " def __getitem__(self, i):\n", 132 | " if self.transform:\n", 133 | " # the following code is correct if you use albumentations\n", 134 | " # if you use torchvision transforms you have to modify it\n", 135 | " res = self.transform(\n", 136 | " image=self.images[i],\n", 137 | " bboxes=self.annotations[i][\"boxes\"],\n", 138 | " labels=self.annotations[i][\"labels\"],\n", 139 | " )\n", 140 | " return res[\"image\"], {\n", 141 | " \"boxes\": torch.tensor(res[\"bboxes\"]),\n", 142 | " \"labels\": torch.tensor(res[\"labels\"]),\n", 143 | " }\n", 144 | " else:\n", 145 | " return self.images[i], self.annotations[i]\n", 146 | "\n", 147 | " def __len__(self):\n", 148 | " return len(self.images)" 149 | ], 150 | "execution_count": null, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": { 156 | "id": "-OCL52YoiigR" 157 | }, 158 | "source": [ 159 | "## Step 1. Intersection over Union (10 points)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": { 165 | "id": "bsybDfSlIrFL" 166 | }, 167 | "source": [ 168 | "In the [Object Detection task](https://en.wikipedia.org/wiki/Object_detection), you need to find objects of a certain class on the image and locate their positions (using the bounding box). The model should predict the coordinates of the bounding box `[x0, y0, x1, y1]` and the label for this box. The model can predict multiple candidate bounding boxes for an object. We will select candidates using [Intersection Over Union](https://en.wikipedia.org/wiki/Jaccard_index)." 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": { 174 | "id": "otUTl7TMi6Ts" 175 | }, 176 | "source": [ 177 | "\n" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": { 183 | "id": "QPZqTTOgjX_K" 184 | }, 185 | "source": [ 186 | "Implement a function that will calculate IoU for bounding boxes." 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "metadata": { 192 | "id": "gtCPABU8dXOz" 193 | }, 194 | "source": [ 195 | "def intersection_over_union(dt_bbox, gt_bbox):\n", 196 | " \"\"\"\n", 197 | " Intersection over Union between two bboxes\n", 198 | " :param dt_bbox: list or numpy array of size (4,) [x0, y0, x1, y1]\n", 199 | " :param gt_bbox: list or numpy array of size (4,) [x0, y0, x1, y1]\n", 200 | " :return : intersection over union\n", 201 | " \"\"\"\n", 202 | " ## YOUR CODE HERE\n", 203 | " \n", 204 | " return iou" 205 | ], 206 | "execution_count": null, 207 | "outputs": [] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": { 212 | "id": "n2XtWBs9jq1I" 213 | }, 214 | "source": [ 215 | "If the function is implemented correctly, then the execution of the following cell will produce:\n", 216 | "\n", 217 | "**0.14285714285714285**" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "metadata": { 223 | "id": "wXZFFOTBjjBX" 224 | }, 225 | "source": [ 226 | "dt_bbox = [0, 0, 2, 2]\n", 227 | "gt_bbox = [1, 1, 3, 3]\n", 228 | "intersection_over_union(dt_bbox, gt_bbox)" 229 | ], 230 | "execution_count": null, 231 | "outputs": [] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": { 236 | "id": "Loadw2Krkq_a" 237 | }, 238 | "source": [ 239 | "## Step 2. Evaluate Sample (15 points)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": { 245 | "id": "SIrVulAQLidT" 246 | }, 247 | "source": [ 248 | "We now have to evaluate the predictions of the model. To do this, we will write a function that will do the following:\n", 249 | "1. Take model predictions and ground truth bounding boxes and labels as inputs.\n", 250 | "2. For each bounding box from the prediction, find the closest bounding box among the answers.\n", 251 | "3. For each found pair of bounding boxes, check whether the IoU is greater than a certain threshold `iou_threshold`. If the **IoU** exceeds the threshold, then we consider this answer as **True Positive**.\n", 252 | "4. Remove a matched bounding box from the evaluation.\n", 253 | "5. For each predicted bounding box, return the detection score and whether we were able to match it or not." 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "metadata": { 259 | "id": "oK54_evGzyQg" 260 | }, 261 | "source": [ 262 | "def evaluate_sample(target_pred, target_true, iou_threshold=0.5):\n", 263 | " # ground truth\n", 264 | " gt_bboxes = target_true['boxes'].numpy()\n", 265 | " gt_labels = target_true['labels'].numpy()\n", 266 | "\n", 267 | " # predictions\n", 268 | " dt_bboxes = target_pred['boxes'].numpy()\n", 269 | " dt_labels = target_pred['labels'].numpy()\n", 270 | " dt_scores = target_pred['scores'].numpy()\n", 271 | "\n", 272 | " results = []\n", 273 | " # for each bounding box from the prediction, find the closest bounding box among the answers\n", 274 | " for detection_id in range(len(dt_labels)):\n", 275 | " dt_bbox = dt_bboxes[detection_id, :]\n", 276 | " dt_label = dt_labels[detection_id]\n", 277 | " dt_score = dt_scores[detection_id]\n", 278 | "\n", 279 | " detection_result_dict = {'score': dt_score}\n", 280 | "\n", 281 | " ## YOUR CODE HERE\n", 282 | " \n", 283 | " if max_gt_id >= 0 and max_IoU >= iou_threshold:\n", 284 | " # mark as True Positive\n", 285 | " detection_result_dict['TP'] = 1\n", 286 | " # delete matched bounding box\n", 287 | " gt_labels = np.delete(gt_labels, max_gt_id, axis=0)\n", 288 | " gt_bboxes = np.delete(gt_bboxes, max_gt_id, axis=0)\n", 289 | "\n", 290 | " else:\n", 291 | " detection_result_dict['TP'] = 0\n", 292 | "\n", 293 | " results.append(detection_result_dict)\n", 294 | "\n", 295 | " return results" 296 | ], 297 | "execution_count": null, 298 | "outputs": [] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": { 303 | "id": "wgZX7BqUPUtk" 304 | }, 305 | "source": [ 306 | "## Step 3. Evaluate Model (15 points)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": { 312 | "id": "H5eWfC29RCKJ" 313 | }, 314 | "source": [ 315 | "To assess the quality of the model, we will use the [mAP](https://jonathan-hui.medium.com/\\map-mean-average-precision-for-object-detection-45c121a31173) metric defined as AP Area under the curve. To do this, you will need to calculate `recall` and` precision`." 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "metadata": { 321 | "id": "WK04btkgQAq3" 322 | }, 323 | "source": [ 324 | "from sklearn.metrics import auc" 325 | ], 326 | "execution_count": null, 327 | "outputs": [] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "metadata": { 332 | "id": "jhmZOkQajpwZ" 333 | }, 334 | "source": [ 335 | "def evaluate(model, test_loader, device):\n", 336 | " results = []\n", 337 | " model.eval()\n", 338 | " nbr_boxes = 0\n", 339 | " with torch.no_grad():\n", 340 | " for batch, (images, targets_true) in enumerate(test_loader):\n", 341 | " images = list(image.to(device).float() for image in images)\n", 342 | " targets_pred = model(images)\n", 343 | "\n", 344 | " targets_true = [{k: v.cpu().float() for k, v in t.items()} for t in targets_true]\n", 345 | " targets_pred = [{k: v.cpu().float() for k, v in t.items()} for t in targets_pred]\n", 346 | "\n", 347 | " for i in range(len(targets_true)):\n", 348 | " target_true = targets_true[i]\n", 349 | " target_pred = targets_pred[i]\n", 350 | " nbr_boxes += target_true['labels'].shape[0]\n", 351 | "\n", 352 | " results.extend(evaluate_sample(target_pred, target_true))\n", 353 | "\n", 354 | " results = sorted(results, key=lambda k: k['score'], reverse=True)\n", 355 | "\n", 356 | " # compute precision and recall to calculate mAP\n", 357 | "\n", 358 | " ## YOUR CODE HERE\n", 359 | "\n", 360 | " return auc(recall, precision)" 361 | ], 362 | "execution_count": null, 363 | "outputs": [] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": { 368 | "id": "4quQZewevyvp" 369 | }, 370 | "source": [ 371 | "## Step 4. Train functions (30 points)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": { 377 | "id": "DNBQcU_cSgts" 378 | }, 379 | "source": [ 380 | "Now define the functions for training the model." 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "metadata": { 386 | "id": "26JW24UJSmaq" 387 | }, 388 | "source": [ 389 | "def train_one_epoch(model, train_dataloader, optimizer, device):\n", 390 | " # YOUR CODE HERE\n", 391 | " # TRAIN YOUR MODEL ON THE train_dataloader\n", 392 | " pass\n", 393 | "\n", 394 | "\n", 395 | "def train(model, train_dataloader, val_dataloader, optimizer, device, n_epochs=10):\n", 396 | " for epoch in range(n_epochs):\n", 397 | " model.eval()\n", 398 | " test_auc = evaluate(model, val_dataloader, device=device)\n", 399 | " print(\"AUC ON TEST: {:.4f}\".format(test_auc))\n", 400 | " model.train()\n", 401 | " train_one_epoch(model, train_dataloader, optimizer, device=device)" 402 | ], 403 | "execution_count": null, 404 | "outputs": [] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": { 409 | "id": "a5zi3LMUwXao" 410 | }, 411 | "source": [ 412 | "## Step 5. Train model (30 points)" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": { 418 | "id": "gOIm5e6TT7Pm" 419 | }, 420 | "source": [ 421 | "Train the model for object detection on a training dataset and achieve a PR-AUC of at least 0.91 on a test dataset. You can use models from `torchvision`." 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": { 427 | "id": "-AJ0Bi_JUHoe" 428 | }, 429 | "source": [ 430 | "It is mandatory to use augmentation for training to achieve the desired result on the test. Use the `torchvision.transforms` module or the [albumentations](https://albumentations.ai/) library. The latter library is especially convenient since it can calculate the new coordinates of bounding boxes itself after image transformations. We advise you to pay attention to this [tutorial](https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation/). Please note that the code written in the dataset above is only correct if you are using `albumentations`." 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "metadata": { 436 | "id": "OfX1ayfrTgNO" 437 | }, 438 | "source": [ 439 | "train_transform = # YOUR CODE FOR AUGMENTATIONS\n", 440 | "val_transform = # YOUR CODE FOR VALIDATION AUGMENTATIONS\n", 441 | "# HINT: TRAIN TRANSFORM OBVIOUSLY SHOULD BE HARDER THAN THOSE FOR VALIDATION\n", 442 | "\n", 443 | "train_dataset = FruitDataset(\"./train_zip/train\", transform=train_transform)\n", 444 | "val_dataset = FruitDataset(\"./test_zip/test\", transform=val_transform)" 445 | ], 446 | "execution_count": null, 447 | "outputs": [] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "metadata": { 452 | "id": "rN_qlKqnwidK" 453 | }, 454 | "source": [ 455 | "model = # YOUR CODE, CREATE MODEL FOR OBJECT DETECTION\r\n", 456 | "# HINT: YOU CAN USE torchvision.models AND torchvision.models.detection\r\n", 457 | "# READ OFFICIAL DOCS FOR MORE INFO\r\n", 458 | "\r\n", 459 | "optimizer = # SELECT YOUR OPTIMIZER\r\n", 460 | "train_dataloader = # CREATE YOUR DATALOADER, SELECT APPROPRIATE batch_size\r\n", 461 | "val_dataloader = # CREATE VALIDATION DATALOADER\r\n", 462 | "n_epochs = # SELECT APPROPRIZTE NUMBER OF EPOCHS\r\n", 463 | "device = torch.device(\"cuda:0\") if torch.cuda.is_available() else torch.device(\"cpu\")\r\n", 464 | "\r\n", 465 | "train(model, train_dataloader, val_dataloader, optimizer, device, n_epochs)" 466 | ], 467 | "execution_count": null, 468 | "outputs": [] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": { 473 | "id": "c60A2kk0S8R9" 474 | }, 475 | "source": [ 476 | "Output the final quality of the model." 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "metadata": { 482 | "id": "q0pjpdNGS3n-" 483 | }, 484 | "source": [ 485 | "evaluate(model, val_dataloader, criterion)" 486 | ], 487 | "execution_count": null, 488 | "outputs": [] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": { 493 | "id": "RdAOThYUS-91" 494 | }, 495 | "source": [ 496 | "Draw predicted bounding boxes for any two images from the test dataset." 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "metadata": { 502 | "id": "qCrtr-x9TA8G" 503 | }, 504 | "source": [ 505 | "image, labels = next(iter(train_dataset))\r\n", 506 | "pred = model(image.unsqueeze(0).to(device))[0]" 507 | ], 508 | "execution_count": null, 509 | "outputs": [] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "metadata": { 514 | "id": "iUQq3OxvTGti" 515 | }, 516 | "source": [ 517 | "from PIL import ImageDraw\r\n", 518 | "\r\n", 519 | "image = torchvision.transform.ToPILImage()(image)\r\n", 520 | "draw = ImageDraw.Draw(image)\r\n", 521 | "for box in labels['boxes']:\r\n", 522 | " draw.rectangle([(box[0], box[1]), (box[2], box[3])])\r\n", 523 | " \r\n", 524 | "for box in pred['boxes']:\r\n", 525 | " draw.rectangle([(box[0], box[1]), (box[2], box[3])], outline='red')\r\n", 526 | "image" 527 | ], 528 | "execution_count": null, 529 | "outputs": [] 530 | } 531 | ] 532 | } -------------------------------------------------------------------------------- /week05/week05_generating_names_with_rnn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "ffuHOST6CsLm" 7 | }, 8 | "source": [ 9 | "# Generating names with recurrent neural networks\n", 10 | "\n", 11 | "In this programming assignment you'll find yourself delving into the heart (and other intestines) of recurrent neural networks on a class of toy problems.\n", 12 | "\n", 13 | "Struggle to find a name for the variable? Let's see how you'll come up with a name for your son/daughter. Surely no human has expertize over what is a good child name, so let us train RNN instead." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "id": "WulV-Skdzc8Y" 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "%%bash\n", 25 | "\n", 26 | "shred -u setup_colab.py\n", 27 | "\n", 28 | "wget https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/main/utils/setup_colab.py -O setup_colab.py" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "id": "NBDyjj2ezc8Y" 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "import setup_colab\n", 40 | "\n", 41 | "setup_colab.setup_week05()" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "id": "dloEnPemCsLt" 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "import numpy as np\n", 53 | "import matplotlib.pyplot as plt" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": { 59 | "id": "zOVCrUnHQmJT" 60 | }, 61 | "source": [ 62 | "### Fill in your Coursera token and email\n", 63 | "To successfully submit your answers to our grader, please fill in your Coursera submission token and email." 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "id": "UAeDUQxwQnUa" 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "import grading\n", 75 | "\n", 76 | "grader = grading.Grader(\n", 77 | " assignment_key=\"cULEpp2NEeemQBKZKgu93A\",\n", 78 | " all_parts=[\"pttMO\", \"uly0D\", \"mf20L\", \"zwTu9\"]\n", 79 | ")" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "id": "hnklRR5-QnXO" 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "# token expires every 30 min\n", 91 | "COURSERA_TOKEN = \"### YOUR TOKEN HERE ###\"\n", 92 | "COURSERA_EMAIL = \"### YOUR EMAIL HERE ###\"" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": { 98 | "id": "mTT2uW5xCsLu" 99 | }, 100 | "source": [ 101 | "# Load data\n", 102 | "The dataset contains ~8k names from different cultures, all in latin transcript.\n", 103 | "\n", 104 | "This notebook has been designed so as to allow you to quickly swap names for something similar: deep learning article titles, IKEA furniture, pokemon names, etc." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "ExecuteTime": { 112 | "end_time": "2018-08-13T20:26:42.701832Z", 113 | "start_time": "2018-08-13T20:26:42.697766Z" 114 | }, 115 | "id": "dhFyOX6PCsLv" 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "start_token = \" \" # so that the network knows that we're generating a first token\n", 120 | "\n", 121 | "# this is the token for padding,\n", 122 | "# we will add fake pad token at the end of names \n", 123 | "# to make them of equal size for further batching\n", 124 | "pad_token = \"#\"\n", 125 | "\n", 126 | "with open(\"names.txt\") as f:\n", 127 | " names = f.read()[:-1].split('\\n')\n", 128 | " names = [start_token + name for name in names]" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "ExecuteTime": { 136 | "end_time": "2018-08-13T20:26:42.707885Z", 137 | "start_time": "2018-08-13T20:26:42.703302Z" 138 | }, 139 | "id": "Kf43mc6CCsLv" 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "print('number of samples:', len(names))\n", 144 | "for x in names[::1000]:\n", 145 | " print(x)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "ExecuteTime": { 153 | "end_time": "2018-08-13T20:26:42.857411Z", 154 | "start_time": "2018-08-13T20:26:42.709371Z" 155 | }, 156 | "id": "72rNxCG9CsLv" 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "MAX_LENGTH = max(map(len, names))\n", 161 | "print(\"max length:\", MAX_LENGTH)\n", 162 | "\n", 163 | "plt.title('Sequence length distribution')\n", 164 | "plt.hist(list(map(len, names)), bins=25);" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": { 170 | "id": "T3VEZIuHCsLv" 171 | }, 172 | "source": [ 173 | "# Text processing\n", 174 | "\n", 175 | "First we need to collect a \"vocabulary\" of all unique tokens i.e. unique characters. We can then encode inputs as a sequence of character ids." 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": { 182 | "ExecuteTime": { 183 | "end_time": "2018-08-13T20:26:42.864592Z", 184 | "start_time": "2018-08-13T20:26:42.858725Z" 185 | }, 186 | "id": "CLCx5pkcCsLw" 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "tokens = ### YOUR CODE HERE: all unique characters in the dataset ###\n", 191 | "\n", 192 | "num_tokens = len(tokens)\n", 193 | "print ('num_tokens = ', num_tokens)\n", 194 | "\n", 195 | "assert 50 < num_tokens < 60, \"Names should contain within 50 and 60 unique tokens depending on encoding\"" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": { 201 | "id": "61QZczNkCsLw" 202 | }, 203 | "source": [ 204 | "### Cast everything from symbols into identifiers\n", 205 | "\n", 206 | "Instead of symbols we'll feed our recurrent neural network with ids of characters from our dictionary.\n", 207 | "\n", 208 | "To create such dictionary, let's assign `token_to_id`" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "ExecuteTime": { 216 | "end_time": "2018-08-13T20:26:42.870330Z", 217 | "start_time": "2018-08-13T20:26:42.866135Z" 218 | }, 219 | "id": "9AxRKAp0CsLx" 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "token_to_id =### YOUR CODE HERE: create a dictionary of {symbol -> its index in tokens}\n", 224 | "\n", 225 | "assert len(tokens) == len(token_to_id), \"dictionaries must have same size\"" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": { 232 | "ExecuteTime": { 233 | "end_time": "2018-08-13T20:26:42.875943Z", 234 | "start_time": "2018-08-13T20:26:42.871834Z" 235 | }, 236 | "id": "vJ4tU0V-CsLx" 237 | }, 238 | "outputs": [], 239 | "source": [ 240 | "def to_matrix(lines, max_len=None, pad=token_to_id[pad_token], dtype='int32', batch_first = True):\n", 241 | " \"\"\"Casts a list of names into rnn-digestable matrix\"\"\"\n", 242 | " \n", 243 | " max_len = max_len or max(map(len, lines))\n", 244 | " lines_ix = np.zeros([len(lines), max_len], dtype) + pad\n", 245 | "\n", 246 | " for i in range(len(lines)):\n", 247 | " line_ix = [token_to_id[c] for c in lines[i]]\n", 248 | " lines_ix[i, :len(line_ix)] = line_ix\n", 249 | " \n", 250 | " if not batch_first: # convert [batch, time] into [time, batch]\n", 251 | " lines_ix = np.transpose(lines_ix)\n", 252 | "\n", 253 | " return lines_ix" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "ExecuteTime": { 261 | "end_time": "2018-08-13T20:26:42.883107Z", 262 | "start_time": "2018-08-13T20:26:42.877186Z" 263 | }, 264 | "id": "kyBkrX0BCsLx" 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "# Example: cast 4 random names to padded matrices (so that we can easily batch them)\n", 269 | "print('\\n'.join(names[::2000]))\n", 270 | "print(to_matrix(names[::2000]))" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": { 276 | "id": "rn0in_1ICsLx" 277 | }, 278 | "source": [ 279 | "# Defining a recurrent neural network\n", 280 | "\n", 281 | "We can rewrite recurrent neural network as a consecutive application of dense layer to input $x_t$ and previous rnn state $h_t$. This is exactly what we're gonna do now.\n", 282 | "\n", 283 | "\n", 284 | "Since we're training a language model, there should also be:\n", 285 | "* An embedding layer that converts character id x_t to a vector.\n", 286 | "* An output layer that predicts probabilities of next phoneme based on h_t+1" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": { 293 | "id": "Fq_BI6hpUPqS" 294 | }, 295 | "outputs": [], 296 | "source": [ 297 | "import torch, torch.nn as nn\n", 298 | "import torch.nn.functional as F" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": { 305 | "ExecuteTime": { 306 | "end_time": "2018-08-13T20:26:44.044903Z", 307 | "start_time": "2018-08-13T20:26:44.041084Z" 308 | }, 309 | "id": "Yq0HH3UfCsLy" 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "class CharRNNCell(nn.Module):\n", 314 | " \"\"\"\n", 315 | " Implement the scheme above as torch module\n", 316 | " \"\"\"\n", 317 | " \n", 318 | " def __init__(self, num_tokens=len(tokens), embedding_size=16, rnn_num_units=64):\n", 319 | " super().__init__()\n", 320 | " self.num_units = rnn_num_units\n", 321 | " \n", 322 | " self.embedding = nn.Embedding(num_tokens, embedding_size)\n", 323 | " self.rnn_update = nn.Linear(embedding_size + rnn_num_units, rnn_num_units)\n", 324 | " self.rnn_to_logits = nn.Linear(rnn_num_units, num_tokens)\n", 325 | " \n", 326 | " def forward(self, x, h_prev):\n", 327 | " \"\"\"\n", 328 | " This method computes h_next(x, h_prev) and log P(x_next | h_next)\n", 329 | " We'll call it repeatedly to produce the whole sequence.\n", 330 | " \n", 331 | " :param x: batch of character ids, int64[batch_size]\n", 332 | " :param h_prev: previous rnn hidden states, float32 matrix [batch, rnn_num_units]\n", 333 | " \"\"\"\n", 334 | " # get vector embedding of x\n", 335 | " x_emb = ### YOUR CODE HERE ###\n", 336 | " # compute next hidden state using self.rnn_update\n", 337 | " # hint: use torch.cat(..., dim=...) for concatenation\n", 338 | "\n", 339 | " h_next = ### YOUR CODE HERE ###\n", 340 | " \n", 341 | " assert h_next.size() == h_prev.size()\n", 342 | " \n", 343 | " #compute logits for next character probs\n", 344 | " logits = ### YOUR CODE HERE ###\n", 345 | " \n", 346 | " return h_next, logits\n", 347 | " \n", 348 | " def initial_state(self, batch_size):\n", 349 | " \"\"\" return rnn state before it processes first input (aka h0) \"\"\"\n", 350 | " return torch.zeros(batch_size, self.num_units)" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": { 356 | "id": "RukLWQHqCsLy" 357 | }, 358 | "source": [ 359 | "# RNN: loop\n", 360 | "\n", 361 | "Once `rnn_one_step` is ready, let's apply it in a loop over name characters to get predictions -- we will generate names character by character starting with start_token:\n", 362 | "\n", 363 | "" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": { 370 | "ExecuteTime": { 371 | "end_time": "2018-08-13T20:26:44.342948Z", 372 | "start_time": "2018-08-13T20:26:44.056136Z" 373 | }, 374 | "id": "JetmBlR3CsLy" 375 | }, 376 | "outputs": [], 377 | "source": [ 378 | "def rnn_loop(char_rnn, batch_ix):\n", 379 | " \"\"\"\n", 380 | " Computes logits_seq(next_character) for all time-steps in batch_ix\n", 381 | " :param batch_ix: an int32 matrix of shape [batch, time], output of to_matrix(lines)\n", 382 | " \"\"\"\n", 383 | " batch_size, max_length = batch_ix.size()\n", 384 | " hid_state = char_rnn.initial_state(batch_size)\n", 385 | " logits_seq = []\n", 386 | "\n", 387 | " for x_t in batch_ix.transpose(0,1):\n", 388 | " hid_state, logits = char_rnn(x_t, hid_state) # <-- here we call your one-step code\n", 389 | " logits_seq.append(logits)\n", 390 | " \n", 391 | " return torch.stack(logits_seq, dim=1)" 392 | ] 393 | }, 394 | { 395 | "cell_type": "markdown", 396 | "metadata": { 397 | "id": "EQmar3Z6vzWA" 398 | }, 399 | "source": [ 400 | "Check that the output of rnn_loop has the right format:" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": { 407 | "id": "pq_nvp_hCsLz" 408 | }, 409 | "outputs": [], 410 | "source": [ 411 | "batch_ix = to_matrix(names[:5])\n", 412 | "batch_ix = torch.tensor(batch_ix, dtype=torch.int64)\n", 413 | "\n", 414 | "logits_seq = rnn_loop(CharRNNCell(), batch_ix)" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": { 421 | "id": "sIE8z_NPvdJs" 422 | }, 423 | "outputs": [], 424 | "source": [ 425 | "## GRADED PART, DO NOT CHANGE!\n", 426 | "grader.set_answer(\"mf20L\", tuple(logits_seq.size()))" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": { 432 | "id": "xs6EJtCyCsL0" 433 | }, 434 | "source": [ 435 | "## Training\n", 436 | "We train our char-rnn exactly the same way we train any deep learning model, the only difference is that this time we sample strings. \n", 437 | "\n", 438 | "To compute the loss in a vectorized manner, we can take `batch_ix[:, 1:]` -- a matrix of token ids shifted 1 step to the left so i-th element is acutally the \"next token\" for i-th prediction. " 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": { 445 | "id": "n9MFAe_bCsL1" 446 | }, 447 | "outputs": [], 448 | "source": [ 449 | "from IPython.display import clear_output\n", 450 | "from random import sample\n", 451 | "\n", 452 | "DEVICE = torch.device('cpu') # you can change to `cuda`\n", 453 | "\n", 454 | "char_rnn = CharRNNCell().to(DEVICE)\n", 455 | "opt = torch.optim.Adam(char_rnn.parameters())\n", 456 | "criterion = nn.CrossEntropyLoss()\n", 457 | "history = []" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": null, 463 | "metadata": { 464 | "id": "NfylXTtzCsL1" 465 | }, 466 | "outputs": [], 467 | "source": [ 468 | "for i in range(1000):\n", 469 | " # for simplisity we will \n", 470 | " batch_ix = to_matrix(sample(names, 32))\n", 471 | " batch_ix = torch.tensor(batch_ix, dtype=torch.int64).to(DEVICE)\n", 472 | " \n", 473 | " # do forward pass\n", 474 | " logits_seq = rnn_loop(char_rnn, batch_ix)\n", 475 | "\n", 476 | " # make shifted versions of batch and predictions to compute the loss \n", 477 | " predictions_logits = logits_seq[:, :-1]\n", 478 | " actual_next_tokens = batch_ix[:, 1:]\n", 479 | " \n", 480 | " # compute loss\n", 481 | " loss = ### YOUR CODE HERE ###\n", 482 | " \n", 483 | " # train with backprop\n", 484 | " ### YOUR CODE HERE ###\n", 485 | "\n", 486 | "assert np.mean(history[:10]) > np.mean(history[-10:]), \"RNN didn't converge.\"" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": { 493 | "id": "_HVv8Wjn6EHm" 494 | }, 495 | "outputs": [], 496 | "source": [ 497 | "## GRADED PART, DO NOT CHANGE!\n", 498 | "grader.set_answer(\"zwTu9\", int(np.mean(history[:10]) > np.mean(history[-10:])))" 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": { 504 | "id": "Y5-v3rfx4TSp" 505 | }, 506 | "source": [ 507 | "Here we computed loss over all symbols including pad tokens at the end of each name. In practice it would be better to exclude all pad tokens except one for each sequence. We need our model to be able to generate one pad token at the end of the sequence to mark the end of the sequence, but there is no need to generate all next pad tokens (we use them just for \n", 508 | "convenient data representation). \n", 509 | "\n", 510 | "Parameter ignore_index of CrossEntropyLoss allows to do so.\n", 511 | "\n" 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "metadata": { 517 | "id": "MVXLPYYECsL1" 518 | }, 519 | "source": [ 520 | "## RNN: sampling\n", 521 | "Once we've trained our network a bit, let's get to actually generating stuff. All we need is the single rnn step function you have defined in char_rnn.forward." 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": null, 527 | "metadata": { 528 | "id": "FwI8hhSxCsL1" 529 | }, 530 | "outputs": [], 531 | "source": [ 532 | "def generate_sample(char_rnn, seed_phrase=' ', max_length=MAX_LENGTH, temperature=1.0):\n", 533 | " '''\n", 534 | " The function generates text given a start phrase.\n", 535 | " :param seed_phrase: prefix characters. The RNN is asked to continue the phrase\n", 536 | " :param max_length: maximum output length, including seed_phrase\n", 537 | " :param temperature: coefficient for sampling. Higher temperature produces more chaotic outputs,\n", 538 | " smaller temperature converges to the single most likely output\n", 539 | " '''\n", 540 | " \n", 541 | " x_sequence = [token_to_id[token] for token in seed_phrase]\n", 542 | " x_sequence = torch.tensor([x_sequence], dtype=torch.int64)\n", 543 | " hid_state = char_rnn.initial_state(batch_size=1)\n", 544 | " \n", 545 | " #feed the seed phrase, if any\n", 546 | " for i in range(len(seed_phrase) - 1):\n", 547 | " hid_state, _ = char_rnn(x_sequence[:, i], hid_state)\n", 548 | " \n", 549 | " #start generating\n", 550 | " for _ in range(max_length - len(seed_phrase)):\n", 551 | " hid_state, logits_next = char_rnn(x_sequence[:, -1], hid_state)\n", 552 | " p_next = F.softmax(logits_next / temperature, dim=-1).data.numpy()[0]\n", 553 | " \n", 554 | " # sample next token and push it back into x_sequence\n", 555 | " next_ix = np.random.choice(num_tokens,p=p_next)\n", 556 | " next_ix = torch.tensor([[next_ix]], dtype=torch.int64)\n", 557 | " x_sequence = torch.cat([x_sequence, next_ix], dim=1)\n", 558 | " \n", 559 | " return ''.join([tokens[ix] for ix in x_sequence.data.numpy()[0]])" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": null, 565 | "metadata": { 566 | "id": "vHdkvurmCsL2" 567 | }, 568 | "outputs": [], 569 | "source": [ 570 | "for _ in range(10):\n", 571 | " print(generate_sample(char_rnn))" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": { 578 | "id": "2RnK9FZACsL2" 579 | }, 580 | "outputs": [], 581 | "source": [ 582 | "for _ in range(10):\n", 583 | " print(generate_sample(char_rnn, seed_phrase=' Trump'))" 584 | ] 585 | }, 586 | { 587 | "cell_type": "markdown", 588 | "metadata": { 589 | "id": "FbukAsjoCsL8" 590 | }, 591 | "source": [ 592 | "## More hight-level implementation\n", 593 | "What we just did is a manual low-level implementation of RNN. While it's cool, we guess you won't like the idea of re-writing it from scratch on every occasion.\n", 594 | "\n", 595 | "As you might have guessed, torch has a solution for this. To be more specific, there are two options:\n", 596 | "\n", 597 | "`nn.RNNCell(emb_size, rnn_num_units)` - implements a single step of RNN just like you did. Basically concat-linear-tanh\n", 598 | "`nn.RNN(emb_size, rnn_num_units)` - implements the whole rnn_loop for you.\n", 599 | "There's also `nn.LSTMCell` vs `nn.LSTM`, `nn.GRUCell` vs `nn.GRU`, etc. etc.\n", 600 | "\n", 601 | "In this example we'll rewrite the char_rnn and rnn_loop using high-level rnn API." 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": null, 607 | "metadata": { 608 | "id": "UacxOUHMCsL9" 609 | }, 610 | "outputs": [], 611 | "source": [ 612 | "class CharRNNLoop(nn.Module):\n", 613 | " def __init__(self, num_tokens=num_tokens, emb_size=16, rnn_num_units=64):\n", 614 | " super(self.__class__, self).__init__()\n", 615 | " self.emb = nn.Embedding(num_tokens, emb_size)\n", 616 | " self.rnn = nn.RNN(emb_size, rnn_num_units, batch_first=True)\n", 617 | " self.hid_to_logits = nn.Linear(rnn_num_units, num_tokens)\n", 618 | " \n", 619 | " def forward(self, x):\n", 620 | " \"\"\"\n", 621 | " Computes log P(next_character) for all time-steps in x\n", 622 | " :param x: an int32 matrix of shape [batch, time], output of to_matrix(lines)\n", 623 | " :output next_logp: a float32 tensor [batch, time, dictionary_size]\n", 624 | " \"\"\"\n", 625 | " ### YOUR CODE HERE ###\n" 626 | ] 627 | }, 628 | { 629 | "cell_type": "markdown", 630 | "metadata": { 631 | "id": "jouqgGYkCG3z" 632 | }, 633 | "source": [ 634 | "Train the model using the same training code and check that it works very similar to our hand-written RNN. " 635 | ] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": null, 640 | "metadata": { 641 | "id": "NMQSqcfRBPLD" 642 | }, 643 | "outputs": [], 644 | "source": [ 645 | "model = CharRNNLoop().to(DEVICE)\n", 646 | "opt = torch.optim.Adam(model.parameters())\n", 647 | "history_high = [] # put the history in this variable for grading" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": { 654 | "id": "RiK3feiNBdTk" 655 | }, 656 | "outputs": [], 657 | "source": [ 658 | "### YOUR CODE HERE ###\n", 659 | "\n", 660 | "assert np.mean(history_high[:10]) > np.mean(history_high[-10:]), \"RNN didn't converge.\"" 661 | ] 662 | }, 663 | { 664 | "cell_type": "code", 665 | "execution_count": null, 666 | "metadata": { 667 | "id": "HpWe48ahB0VE" 668 | }, 669 | "outputs": [], 670 | "source": [ 671 | "## GRADED PART, DO NOT CHANGE!\n", 672 | "grader.set_answer(\"pttMO\", int(np.mean(history_high[:10]) > np.mean(history_high[-10:])))\n", 673 | "grader.set_answer(\"uly0D\", len(set([generate_sample(char_rnn, ' Sad') for _ in range(25)])))" 674 | ] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": null, 679 | "metadata": { 680 | "id": "HOFdJpvqzc8i" 681 | }, 682 | "outputs": [], 683 | "source": [ 684 | "grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)" 685 | ] 686 | }, 687 | { 688 | "cell_type": "markdown", 689 | "metadata": { 690 | "id": "HnOnFHU-CsL9" 691 | }, 692 | "source": [ 693 | "Here's another example with LSTM" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": null, 699 | "metadata": { 700 | "id": "j3sn0jpUCsL9" 701 | }, 702 | "outputs": [], 703 | "source": [ 704 | "import torch, torch.nn as nn\n", 705 | "import torch.nn.functional as F\n", 706 | "\n", 707 | "\n", 708 | "class CharLSTMCell(nn.Module):\n", 709 | " \"\"\"\n", 710 | " Implements something like CharRNNCell, but with LSTM\n", 711 | " \"\"\"\n", 712 | " def __init__(self, num_tokens=len(tokens), embedding_size=16, rnn_num_units=64):\n", 713 | " super().__init__()\n", 714 | " \n", 715 | " self.num_units = rnn_num_units\n", 716 | " self.emb = nn.Embedding(num_tokens, embedding_size)\n", 717 | " self.lstm = nn.LSTMCell(embedding_size, rnn_num_units)\n", 718 | " self.rnn_to_logits = nn.Linear(rnn_num_units, num_tokens)\n", 719 | " \n", 720 | " def forward(self, x, prev_state):\n", 721 | " (prev_h, prev_c) = prev_state\n", 722 | " (next_h, next_c) = self.lstm(self.emb(x), (prev_h, prev_c))\n", 723 | " logits = self.rnn_to_logits(next_h)\n", 724 | " \n", 725 | " return (next_h, next_c), logits\n", 726 | " \n", 727 | " def initial_state(self, batch_size):\n", 728 | " \"\"\" LSTM has two state variables, cell and hid \"\"\"\n", 729 | " return torch.zeros(batch_size, self.num_units), torch.zeros(batch_size, self.num_units)\n", 730 | " \n", 731 | "char_lstm = CharLSTMCell()" 732 | ] 733 | }, 734 | { 735 | "cell_type": "markdown", 736 | "metadata": { 737 | "id": "pMRUeIzNCsL8" 738 | }, 739 | "source": [ 740 | "# Try it out!\n", 741 | "\n", 742 | "__Disclaimer:__ This part of assignment is entirely optional. You won't receive bonus points for it. However, it's a fun thing to do. Please share your results on course forums.\n", 743 | "\n", 744 | "You've just implemented a recurrent language model that can be tasked with generating any kind of sequence, so there's plenty of data you can try it on:\n", 745 | "\n", 746 | "* Novels/poems/songs of your favorite author\n", 747 | "* News titles/clickbait titles\n", 748 | "* Source code of Linux or Tensorflow\n", 749 | "* Molecules in [smiles](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) format\n", 750 | "* Melody in notes/chords format\n", 751 | "* IKEA catalog titles\n", 752 | "* Pokemon names\n", 753 | "* Cards from Magic, the Gathering / Hearthstone\n", 754 | "\n", 755 | "If you're willing to give it a try, here's what you wanna look at:\n", 756 | "* Current data format is a sequence of lines, so a novel can be formatted as a list of sentences. Alternatively, you can change data preprocessing altogether.\n", 757 | "* While some datasets are readily available, others can only be scraped from the web. Try `Selenium` or `Scrapy` for that.\n", 758 | "* Make sure MAX_LENGTH is adjusted for longer datasets. \n", 759 | "* More complex tasks require larger RNN architecture, try more neurons or several layers. It would also require more training iterations.\n", 760 | "* Long-term dependencies in music, novels or molecules are better handled with LSTM or GRU\n", 761 | "\n", 762 | "__Good hunting!__" 763 | ] 764 | } 765 | ], 766 | "metadata": { 767 | "colab": { 768 | "collapsed_sections": [], 769 | "name": "week05_generating_names_with_rnn.ipynb", 770 | "provenance": [] 771 | }, 772 | "kernelspec": { 773 | "display_name": "Python 3", 774 | "language": "python", 775 | "name": "python3" 776 | }, 777 | "language_info": { 778 | "codemirror_mode": { 779 | "name": "ipython", 780 | "version": 3 781 | }, 782 | "file_extension": ".py", 783 | "mimetype": "text/x-python", 784 | "name": "python", 785 | "nbconvert_exporter": "python", 786 | "pygments_lexer": "ipython3", 787 | "version": "3.8.5" 788 | } 789 | }, 790 | "nbformat": 4, 791 | "nbformat_minor": 1 792 | } -------------------------------------------------------------------------------- /week02/week02_numpy_neural_network.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "RHVHERZ9yQCg" 7 | }, 8 | "source": [ 9 | "### Your very own neural network\n", 10 | "\n", 11 | "In this programming assignment we're going to build a neural network using naught but pure numpy and steel nerves. It's going to be fun, we promise!\n", 12 | "\n", 13 | "__Disclaimer:__ This assignment is ungraded." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "id": "Qfl0MexHxKfA" 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "%%bash\n", 25 | "\n", 26 | "shred -u setup_colab.py\n", 27 | "\n", 28 | "wget https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/main/utils/setup_colab.py -O setup_colab.py" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "id": "Wd4ktmBUxKfB" 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "import setup_colab\n", 40 | "\n", 41 | "setup_colab.setup_week02_honor()" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "id": "ZsP6AyE2yQCo" 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "import tqdm_utils" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "id": "Nm1Oc2pOyQCp" 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "from __future__ import print_function\n", 64 | "import numpy as np\n", 65 | "np.random.seed(42)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": { 71 | "id": "RBRdghpvyQCq" 72 | }, 73 | "source": [ 74 | "Here goes our main class: a layer that can do .forward() and .backward() passes." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "id": "3Zbf2fgkyQCq" 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "class Layer:\n", 86 | " \"\"\"\n", 87 | " A building block. Each layer is capable of performing two things:\n", 88 | " \n", 89 | " - Process input to get output: output = layer.forward(input)\n", 90 | " \n", 91 | " - Propagate gradients through itself: grad_input = layer.backward(input, grad_output)\n", 92 | " \n", 93 | " Some layers also have learnable parameters which they update during layer.backward.\n", 94 | " \"\"\"\n", 95 | " def __init__(self):\n", 96 | " \"\"\"Here you can initialize layer parameters (if any) and auxiliary stuff.\"\"\"\n", 97 | " # A dummy layer does nothing\n", 98 | " pass\n", 99 | " \n", 100 | " def forward(self, input):\n", 101 | " \"\"\"\n", 102 | " Takes input data of shape [batch, input_units], returns output data [batch, output_units]\n", 103 | " \"\"\"\n", 104 | " # A dummy layer just returns whatever it gets as input.\n", 105 | " return input\n", 106 | "\n", 107 | " def backward(self, input, grad_output):\n", 108 | " \"\"\"\n", 109 | " Performs a backpropagation step through the layer, with respect to the given input.\n", 110 | " \n", 111 | " To compute loss gradients w.r.t input, you need to apply chain rule (backprop):\n", 112 | " \n", 113 | " d loss / d x = (d loss / d layer) * (d layer / d x)\n", 114 | " \n", 115 | " Luckily, you already receive d loss / d layer as input, so you only need to multiply it by d layer / d x.\n", 116 | " \n", 117 | " If your layer has parameters (e.g. dense layer), you also need to update them here using d loss / d layer\n", 118 | " \"\"\"\n", 119 | " # The gradient of a dummy layer is precisely grad_output, but we'll write it more explicitly\n", 120 | " num_units = input.shape[1]\n", 121 | " \n", 122 | " d_layer_d_input = np.eye(num_units)\n", 123 | " \n", 124 | " return np.dot(grad_output, d_layer_d_input) # chain rule" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": { 130 | "id": "r6zh8GqGyQCr" 131 | }, 132 | "source": [ 133 | "### The road ahead\n", 134 | "\n", 135 | "We're going to build a neural network that classifies MNIST digits. To do so, we'll need a few building blocks:\n", 136 | "- Dense layer - a fully-connected layer, $f(X)=W \\cdot X + \\vec{b}$\n", 137 | "- ReLU layer (or any other nonlinearity you want)\n", 138 | "- Loss function - crossentropy\n", 139 | "- Backprop algorithm - a stochastic gradient descent with backpropageted gradients\n", 140 | "\n", 141 | "Let's approach them one at a time.\n" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": { 147 | "id": "rqY4Y-jnyQCr" 148 | }, 149 | "source": [ 150 | "### Nonlinearity layer\n", 151 | "\n", 152 | "This is the simplest layer you can get: it simply applies a nonlinearity to each element of your network." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "id": "gU5L2X80yQCr" 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "class ReLU(Layer):\n", 164 | " def __init__(self):\n", 165 | " \"\"\"ReLU layer simply applies elementwise rectified linear unit to all inputs\"\"\"\n", 166 | " pass\n", 167 | " \n", 168 | " def forward(self, input):\n", 169 | " \"\"\"Apply elementwise ReLU to [batch, input_units] matrix\"\"\"\n", 170 | " # \n", 171 | " \n", 172 | " def backward(self, input, grad_output):\n", 173 | " \"\"\"Compute gradient of loss w.r.t. ReLU input\"\"\"\n", 174 | " relu_grad = input > 0\n", 175 | " return grad_output*relu_grad " 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": { 182 | "id": "kLbDWVndyQCs" 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "# some tests\n", 187 | "from util import eval_numerical_gradient\n", 188 | "x = np.linspace(-1,1,10*32).reshape([10,32])\n", 189 | "l = ReLU()\n", 190 | "grads = l.backward(x,np.ones([10,32])/(32*10))\n", 191 | "numeric_grads = eval_numerical_gradient(lambda x: l.forward(x).mean(), x=x)\n", 192 | "assert np.allclose(grads, numeric_grads, rtol=1e-3, atol=0),\\\n", 193 | " \"gradient returned by your layer does not match the numerically computed gradient\"" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": { 199 | "id": "RPM7MAtQyQCs" 200 | }, 201 | "source": [ 202 | "#### Instant primer: lambda functions\n", 203 | "\n", 204 | "In python, you can define functions in one line using the `lambda` syntax: `lambda param1, param2: expression`\n", 205 | "\n", 206 | "For example: `f = lambda x, y: x+y` is equivalent to a normal function:\n", 207 | "\n", 208 | "```\n", 209 | "def f(x,y):\n", 210 | " return x+y\n", 211 | "```\n", 212 | "For more information, click [here](http://www.secnetix.de/olli/Python/lambda_functions.hawk). " 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": { 218 | "id": "qH8Ka4NnyQCs" 219 | }, 220 | "source": [ 221 | "### Dense layer\n", 222 | "\n", 223 | "Now let's build something more complicated. Unlike nonlinearity, a dense layer actually has something to learn.\n", 224 | "\n", 225 | "A dense layer applies affine transformation. In a vectorized form, it can be described as:\n", 226 | "$$f(X)= W \\cdot X + \\vec b $$\n", 227 | "\n", 228 | "Where \n", 229 | "* X is an object-feature matrix of shape [batch_size, num_features],\n", 230 | "* W is a weight matrix [num_features, num_outputs] \n", 231 | "* and b is a vector of num_outputs biases.\n", 232 | "\n", 233 | "Both W and b are initialized during layer creation and updated each time backward is called." 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "id": "GEHhgyQXyQCs" 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "class Dense(Layer):\n", 245 | " def __init__(self, input_units, output_units, learning_rate=0.1):\n", 246 | " \"\"\"\n", 247 | " A dense layer is a layer which performs a learned affine transformation:\n", 248 | " f(x) = + b\n", 249 | " \"\"\"\n", 250 | " self.learning_rate = learning_rate\n", 251 | " \n", 252 | " # initialize weights with small random numbers. We use normal initialization, \n", 253 | " # but surely there is something better. Try this once you got it working: http://bit.ly/2vTlmaJ\n", 254 | " self.weights = np.random.randn(input_units, output_units)*0.01\n", 255 | " self.biases = np.zeros(output_units)\n", 256 | " \n", 257 | " def forward(self,input):\n", 258 | " \"\"\"\n", 259 | " Perform an affine transformation:\n", 260 | " f(x) = + b\n", 261 | " \n", 262 | " input shape: [batch, input_units]\n", 263 | " output shape: [batch, output units]\n", 264 | " \"\"\"\n", 265 | " return #\n", 266 | " \n", 267 | " def backward(self,input,grad_output):\n", 268 | " \n", 269 | " # compute d f / d x = d f / d dense * d dense / d x\n", 270 | " # where d dense/ d x = weights transposed\n", 271 | " grad_input = #\n", 272 | " \n", 273 | " # compute gradient w.r.t. weights and biases\n", 274 | " grad_weights = #\n", 275 | " grad_biases = #\n", 276 | " \n", 277 | " assert grad_weights.shape == self.weights.shape and grad_biases.shape == self.biases.shape\n", 278 | " # Here we perform a stochastic gradient descent step. \n", 279 | " # Later on, you can try replacing that with something better.\n", 280 | " self.weights = self.weights - self.learning_rate * grad_weights\n", 281 | " self.biases = self.biases - self.learning_rate * grad_biases\n", 282 | " \n", 283 | " return grad_input" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": { 289 | "id": "NqbRAsgxyQCt" 290 | }, 291 | "source": [ 292 | "### Testing the dense layer\n", 293 | "\n", 294 | "Here we have a few tests to make sure your dense layer works properly. You can just run them, get 3 \"well done\"s and forget they ever existed.\n", 295 | "\n", 296 | "... or not get 3 \"well done\"s and go fix stuff. If that is the case, here are some tips for you:\n", 297 | "* Make sure you compute gradients for W and b as __sum of gradients over batch__, not mean over gradients. Grad_output is already divided by batch size.\n", 298 | "* If you're debugging, try saving gradients in class fields, like \"self.grad_w = grad_w\" or print first 3-5 weights. This helps debugging.\n", 299 | "* If nothing else helps, try ignoring tests and proceed to network training. If it trains alright, you may be off by something that does not affect network training." 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": { 306 | "id": "8qu2R1NFyQCu", 307 | "outputId": "404908b2-da7a-4e9d-f896-c8fdd2232fc8" 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "l = Dense(128, 150)\n", 312 | "\n", 313 | "assert -0.05 < l.weights.mean() < 0.05 and 1e-3 < l.weights.std() < 1e-1,\\\n", 314 | " \"The initial weights must have zero mean and small variance. \"\\\n", 315 | " \"If you know what you're doing, remove this assertion.\"\n", 316 | "assert -0.05 < l.biases.mean() < 0.05, \"Biases must be zero mean. Ignore if you have a reason to do otherwise.\"\n", 317 | "\n", 318 | "# To test the outputs, we explicitly set weights with fixed values. DO NOT DO THAT IN ACTUAL NETWORK!\n", 319 | "l = Dense(3,4)\n", 320 | "\n", 321 | "x = np.linspace(-1,1,2*3).reshape([2,3])\n", 322 | "l.weights = np.linspace(-1,1,3*4).reshape([3,4])\n", 323 | "l.biases = np.linspace(-1,1,4)\n", 324 | "\n", 325 | "assert np.allclose(l.forward(x),np.array([[ 0.07272727, 0.41212121, 0.75151515, 1.09090909],\n", 326 | " [-0.90909091, 0.08484848, 1.07878788, 2.07272727]]))\n", 327 | "print(\"Well done!\")" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": { 334 | "id": "NoFuwEFFyQCv", 335 | "outputId": "65f20202-1265-4af5-a8dd-972a7170a375" 336 | }, 337 | "outputs": [], 338 | "source": [ 339 | "# To test the grads, we use gradients obtained via finite differences\n", 340 | "\n", 341 | "from util import eval_numerical_gradient\n", 342 | "\n", 343 | "x = np.linspace(-1,1,10*32).reshape([10,32])\n", 344 | "l = Dense(32,64,learning_rate=0)\n", 345 | "\n", 346 | "numeric_grads = eval_numerical_gradient(lambda x: l.forward(x).sum(),x)\n", 347 | "grads = l.backward(x,np.ones([10,64]))\n", 348 | "\n", 349 | "assert np.allclose(grads,numeric_grads,rtol=1e-3,atol=0), \"input gradient does not match numeric grad\"\n", 350 | "print(\"Well done!\")" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": { 357 | "id": "Dnv9cOT_yQCv", 358 | "outputId": "788cbe48-4cad-4183-e559-29a618cdbbe4" 359 | }, 360 | "outputs": [], 361 | "source": [ 362 | "#test gradients w.r.t. params\n", 363 | "def compute_out_given_wb(w,b):\n", 364 | " l = Dense(32,64,learning_rate=1)\n", 365 | " l.weights = np.array(w)\n", 366 | " l.biases = np.array(b)\n", 367 | " x = np.linspace(-1,1,10*32).reshape([10,32])\n", 368 | " return l.forward(x)\n", 369 | " \n", 370 | "def compute_grad_by_params(w,b):\n", 371 | " l = Dense(32,64,learning_rate=1)\n", 372 | " l.weights = np.array(w)\n", 373 | " l.biases = np.array(b)\n", 374 | " x = np.linspace(-1,1,10*32).reshape([10,32])\n", 375 | " l.backward(x,np.ones([10,64]) / 10.)\n", 376 | " return w - l.weights, b - l.biases\n", 377 | " \n", 378 | "w,b = np.random.randn(32,64), np.linspace(-1,1,64)\n", 379 | "\n", 380 | "numeric_dw = eval_numerical_gradient(lambda w: compute_out_given_wb(w,b).mean(0).sum(),w )\n", 381 | "numeric_db = eval_numerical_gradient(lambda b: compute_out_given_wb(w,b).mean(0).sum(),b )\n", 382 | "grad_w,grad_b = compute_grad_by_params(w,b)\n", 383 | "\n", 384 | "assert np.allclose(numeric_dw,grad_w,rtol=1e-3,atol=0), \"weight gradient does not match numeric weight gradient\"\n", 385 | "assert np.allclose(numeric_db,grad_b,rtol=1e-3,atol=0), \"weight gradient does not match numeric weight gradient\"\n", 386 | "print(\"Well done!\")" 387 | ] 388 | }, 389 | { 390 | "cell_type": "markdown", 391 | "metadata": { 392 | "id": "7ddbVIoOyQCw" 393 | }, 394 | "source": [ 395 | "### The loss function\n", 396 | "\n", 397 | "Since we want to predict probabilities, it would be logical for us to define softmax nonlinearity on top of our network and compute loss given predicted probabilities. However, there is a better way to do so.\n", 398 | "\n", 399 | "If you write down the expression for crossentropy as a function of softmax logits (a), you'll see:\n", 400 | "\n", 401 | "$$ loss = - log \\space {e^{a_{correct}} \\over {\\underset i \\sum e^{a_i} } } $$\n", 402 | "\n", 403 | "If you take a closer look, ya'll see that it can be rewritten as:\n", 404 | "\n", 405 | "$$ loss = - a_{correct} + log {\\underset i \\sum e^{a_i} } $$\n", 406 | "\n", 407 | "It's called Log-softmax and it's better than naive log(softmax(a)) in all aspects:\n", 408 | "* Better numerical stability\n", 409 | "* Easier to get derivative right\n", 410 | "* Marginally faster to compute\n", 411 | "\n", 412 | "So why not just use log-softmax throughout our computation and never actually bother to estimate probabilities.\n", 413 | "\n", 414 | "Here you are! We've defined the both loss functions for you so that you could focus on neural network part." 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": { 421 | "id": "Kd12ULJzyQCw" 422 | }, 423 | "outputs": [], 424 | "source": [ 425 | "def softmax_crossentropy_with_logits(logits,reference_answers):\n", 426 | " \"\"\"Compute crossentropy from logits[batch,n_classes] and ids of correct answers\"\"\"\n", 427 | " logits_for_answers = logits[np.arange(len(logits)),reference_answers]\n", 428 | " \n", 429 | " xentropy = - logits_for_answers + np.log(np.sum(np.exp(logits),axis=-1))\n", 430 | " \n", 431 | " return xentropy\n", 432 | "\n", 433 | "def grad_softmax_crossentropy_with_logits(logits,reference_answers):\n", 434 | " \"\"\"Compute crossentropy gradient from logits[batch,n_classes] and ids of correct answers\"\"\"\n", 435 | " ones_for_answers = np.zeros_like(logits)\n", 436 | " ones_for_answers[np.arange(len(logits)),reference_answers] = 1\n", 437 | " \n", 438 | " softmax = np.exp(logits) / np.exp(logits).sum(axis=-1,keepdims=True)\n", 439 | " \n", 440 | " return (- ones_for_answers + softmax) / logits.shape[0]" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": null, 446 | "metadata": { 447 | "id": "ls2kg386yQCw" 448 | }, 449 | "outputs": [], 450 | "source": [ 451 | "logits = np.linspace(-1,1,500).reshape([50,10])\n", 452 | "answers = np.arange(50)%10\n", 453 | "\n", 454 | "softmax_crossentropy_with_logits(logits,answers)\n", 455 | "grads = grad_softmax_crossentropy_with_logits(logits,answers)\n", 456 | "numeric_grads = eval_numerical_gradient(lambda l: softmax_crossentropy_with_logits(l,answers).mean(),logits)\n", 457 | "\n", 458 | "assert np.allclose(numeric_grads,grads,rtol=1e-3,atol=0), \"The reference implementation has just failed. Someone has just changed the rules of math.\"" 459 | ] 460 | }, 461 | { 462 | "cell_type": "markdown", 463 | "metadata": { 464 | "id": "fok7whlUyQCx" 465 | }, 466 | "source": [ 467 | "### Full network\n", 468 | "\n", 469 | "Now let's combine what we've just built into a working neural network. As we announced, we're gonna use this monster to classify handwritten digits, so let's get them loaded." 470 | ] 471 | }, 472 | { 473 | "cell_type": "markdown", 474 | "metadata": { 475 | "id": "BvjgtCUTyQCx" 476 | }, 477 | "source": [ 478 | "We will download the data using pythorch. " 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": null, 484 | "metadata": { 485 | "id": "tqy9a-D0yQCx", 486 | "outputId": "06637152-af5c-460c-edda-c622c2c7adb3" 487 | }, 488 | "outputs": [], 489 | "source": [ 490 | "!pip install torchvision" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": { 497 | "id": "7kZv6irQyQCx", 498 | "outputId": "fcf2164e-596b-4a98-e0a6-c090da418370" 499 | }, 500 | "outputs": [], 501 | "source": [ 502 | "# import numpy and matplotlib\n", 503 | "%pylab inline\n", 504 | "\n", 505 | "import torchvision\n", 506 | "\n", 507 | "transform = torchvision.transforms.Compose([\n", 508 | " torchvision.transforms.ToTensor(),\n", 509 | " torchvision.transforms.Lambda(lambda x: x.flatten())\n", 510 | "])\n", 511 | "\n", 512 | "train_dataset = torchvision.datasets.MNIST(root='.', train=True,\n", 513 | " download=True, transform=transform)\n", 514 | "test_dataset = torchvision.datasets.MNIST(root='.', train=True,\n", 515 | " download=True, transform=transform)" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": { 522 | "id": "yr02M6H-yQCy" 523 | }, 524 | "outputs": [], 525 | "source": [ 526 | "X_train, y_train = [], []\n", 527 | "for i in range(len(train_dataset)):\n", 528 | " x, y = train_dataset[i]\n", 529 | " X_train.append(x.numpy())\n", 530 | " y_train.append(y)\n", 531 | "\n", 532 | "X_train = np.array(X_train)\n", 533 | "y_train = np.array(y_train)\n", 534 | "\n", 535 | "# we reserve the last 10000 training examples for validation\n", 536 | "X_train, X_val = X_train[:-10000], X_train[-10000:]\n", 537 | "y_train, y_val = y_train[:-10000], y_train[-10000:]\n", 538 | "\n", 539 | "X_test, y_test = [], []\n", 540 | "for i in range(len(test_dataset)):\n", 541 | " x, y = test_dataset[i]\n", 542 | " X_test.append(x.numpy())\n", 543 | " y_test.append(y)\n", 544 | "\n", 545 | "X_test = np.array(X_test)\n", 546 | "y_test = np.array(y_test)" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": { 553 | "id": "fzzXwlFEyQCy", 554 | "outputId": "f4e81edd-7cc9-4e96-e83d-8299d6a78d68" 555 | }, 556 | "outputs": [], 557 | "source": [ 558 | "plt.figure(figsize=[6, 6])\n", 559 | "\n", 560 | "for i in range(4):\n", 561 | " plt.subplot(2, 2, i + 1)\n", 562 | " plt.title(f\"Label: {y_train[i]}\")\n", 563 | " plt.imshow(X_train[i].reshape([28, 28]), cmap='gray')" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": null, 569 | "metadata": { 570 | "id": "KtIwOlejyQCy" 571 | }, 572 | "outputs": [], 573 | "source": [] 574 | }, 575 | { 576 | "cell_type": "markdown", 577 | "metadata": { 578 | "id": "aOKsQ0fiyQCz" 579 | }, 580 | "source": [ 581 | "We'll define network as a list of layers, each applied on top of previous one. In this setting, computing predictions and training becomes trivial." 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": null, 587 | "metadata": { 588 | "id": "-QlPCek8yQCz" 589 | }, 590 | "outputs": [], 591 | "source": [ 592 | "network = []\n", 593 | "network.append(Dense(X_train.shape[1],100))\n", 594 | "network.append(ReLU())\n", 595 | "network.append(Dense(100,200))\n", 596 | "network.append(ReLU())\n", 597 | "network.append(Dense(200,10))" 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": null, 603 | "metadata": { 604 | "id": "dk4zSNiuyQCz" 605 | }, 606 | "outputs": [], 607 | "source": [ 608 | "def forward(network, X):\n", 609 | " \"\"\"\n", 610 | " Compute activations of all network layers by applying them sequentially.\n", 611 | " Return a list of activations for each layer. \n", 612 | " Make sure last activation corresponds to network logits.\n", 613 | " \"\"\"\n", 614 | " activations = []\n", 615 | " input = X\n", 616 | "\n", 617 | " # \n", 618 | " \n", 619 | " assert len(activations) == len(network)\n", 620 | " return activations\n", 621 | "\n", 622 | "def predict(network,X):\n", 623 | " \"\"\"\n", 624 | " Compute network predictions.\n", 625 | " \"\"\"\n", 626 | " logits = forward(network,X)[-1]\n", 627 | " return logits.argmax(axis=-1)\n", 628 | "\n", 629 | "def train(network,X,y):\n", 630 | " \"\"\"\n", 631 | " Train your network on a given batch of X and y.\n", 632 | " You first need to run forward to get all layer activations.\n", 633 | " Then you can run layer.backward going from last to first layer.\n", 634 | " \n", 635 | " After you called backward for all layers, all Dense layers have already made one gradient step.\n", 636 | " \"\"\"\n", 637 | " \n", 638 | " # Get the layer activations\n", 639 | " layer_activations = forward(network,X)\n", 640 | " layer_inputs = [X]+layer_activations #layer_input[i] is an input for network[i]\n", 641 | " logits = layer_activations[-1]\n", 642 | " \n", 643 | " # Compute the loss and the initial gradient\n", 644 | " loss = softmax_crossentropy_with_logits(logits,y)\n", 645 | " loss_grad = grad_softmax_crossentropy_with_logits(logits,y)\n", 646 | " \n", 647 | " # \n", 648 | " \n", 649 | " return np.mean(loss)" 650 | ] 651 | }, 652 | { 653 | "cell_type": "markdown", 654 | "metadata": { 655 | "id": "zhMVSMPCyQCz" 656 | }, 657 | "source": [ 658 | "Instead of tests, we provide you with a training loop that prints training and validation accuracies on every epoch.\n", 659 | "\n", 660 | "If your implementation of forward and backward are correct, your accuracy should grow from 90~93% to >97% with the default network." 661 | ] 662 | }, 663 | { 664 | "cell_type": "markdown", 665 | "metadata": { 666 | "id": "pQ2rJwWfyQC0" 667 | }, 668 | "source": [ 669 | "### Training loop\n", 670 | "\n", 671 | "As usual, we split data into minibatches, feed each such minibatch into the network and update weights." 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": null, 677 | "metadata": { 678 | "id": "tlTQvjA3yQC0" 679 | }, 680 | "outputs": [], 681 | "source": [ 682 | "def iterate_minibatches(inputs, targets, batchsize, shuffle=False):\n", 683 | " assert len(inputs) == len(targets)\n", 684 | " if shuffle:\n", 685 | " indices = np.random.permutation(len(inputs))\n", 686 | " for start_idx in tqdm_utils.tqdm_notebook_failsafe(range(0, len(inputs) - batchsize + 1, batchsize)):\n", 687 | " if shuffle:\n", 688 | " excerpt = indices[start_idx:start_idx + batchsize]\n", 689 | " else:\n", 690 | " excerpt = slice(start_idx, start_idx + batchsize)\n", 691 | " yield inputs[excerpt], targets[excerpt]" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": { 698 | "id": "6BAUHRBhyQC0" 699 | }, 700 | "outputs": [], 701 | "source": [ 702 | "from IPython.display import clear_output\n", 703 | "train_log = []\n", 704 | "val_log = []" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": null, 710 | "metadata": { 711 | "id": "yg8lzMfYyQC0" 712 | }, 713 | "outputs": [], 714 | "source": [ 715 | "for epoch in range(25):\n", 716 | "\n", 717 | " for x_batch,y_batch in iterate_minibatches(X_train,y_train,batchsize=32,shuffle=True):\n", 718 | " train(network,x_batch,y_batch)\n", 719 | " \n", 720 | " train_log.append(np.mean(predict(network,X_train)==y_train))\n", 721 | " val_log.append(np.mean(predict(network,X_val)==y_val))\n", 722 | " \n", 723 | " clear_output()\n", 724 | " print(\"Epoch\",epoch)\n", 725 | " print(\"Train accuracy:\",train_log[-1])\n", 726 | " print(\"Val accuracy:\",val_log[-1])\n", 727 | " plt.plot(train_log,label='train accuracy')\n", 728 | " plt.plot(val_log,label='val accuracy')\n", 729 | " plt.legend(loc='best')\n", 730 | " plt.grid()\n", 731 | " plt.show()\n", 732 | " " 733 | ] 734 | }, 735 | { 736 | "cell_type": "markdown", 737 | "metadata": { 738 | "id": "vRe2Y4MdyQC0" 739 | }, 740 | "source": [ 741 | "### Try it out!\n", 742 | "\n", 743 | "Congradulations, you managed to get this far! Now you can chose one or more options what to do next. \n", 744 | "\n", 745 | "\n", 746 | "#### Option I: initialization\n", 747 | "* Implement Dense layer with Xavier initialization as explained [here](http://bit.ly/2vTlmaJ). Compare xavier initialization to default initialization on deep networks (5+ layers).\n", 748 | "\n", 749 | "#### Option II: regularization\n", 750 | "* Implement a version of Dense layer with L2 regularization penalty: when updating Dense Layer weights, adjust gradients to minimize\n", 751 | "\n", 752 | "$$ Loss = Crossentropy + \\alpha \\cdot \\underset i \\sum {w_i}^2 $$\n", 753 | "\n", 754 | "Check that regularization mitigates overfitting in case of abundantly large number of neurons. Consider tuning $\\alpha$ for better results.\n", 755 | "\n", 756 | "#### Option III: optimization\n", 757 | "* Implement a version of Dense layer that uses momentum/rmsprop or whatever method worked best for you last time.\n", 758 | "\n", 759 | "Most of those methods require persistent parameters like momentum direction or moving average grad norm, but you can easily store those params inside your layers.\n", 760 | "\n", 761 | "Compare your chosen method performance with vanilla SGD's one.\n", 762 | "\n", 763 | "### Some advanced stuff\n", 764 | "If you are still with us and want more, consider implementing Batch Normalization ([guide](https://gab41.lab41.org/batch-normalization-what-the-hey-d480039a9e3b)) or Dropout ([guide](https://medium.com/@amarbudhiraja/https-medium-com-amarbudhiraja-learning-less-to-learn-better-dropout-in-deep-machine-learning-74334da4bfc5)). Note, however, that those \"layers\" behave differently when training and when predicting on test set.\n", 765 | "\n", 766 | "* Dropout:\n", 767 | " * During training: drop units randomly with probability __p__ and multiply everything by __1/(1-p)__\n", 768 | " * During final predicton: do nothing; pretend there's no dropout\n", 769 | " \n", 770 | "* Batch normalization\n", 771 | " * During training, it substracts mean-over-batch and divides by std-over-batch and updates mean and variance.\n", 772 | " * During final prediction, it uses accumulated mean and variance.\n" 773 | ] 774 | } 775 | ], 776 | "metadata": { 777 | "colab": { 778 | "collapsed_sections": [], 779 | "name": "week02_numpy_neural_network_honor.ipynb", 780 | "provenance": [] 781 | }, 782 | "kernelspec": { 783 | "display_name": "Python 3", 784 | "language": "python", 785 | "name": "python3" 786 | }, 787 | "language_info": { 788 | "codemirror_mode": { 789 | "name": "ipython", 790 | "version": 3 791 | }, 792 | "file_extension": ".py", 793 | "mimetype": "text/x-python", 794 | "name": "python", 795 | "nbconvert_exporter": "python", 796 | "pygments_lexer": "ipython3", 797 | "version": "3.8.5" 798 | }, 799 | "toc": { 800 | "colors": { 801 | "hover_highlight": "#DAA520", 802 | "navigate_num": "#000000", 803 | "navigate_text": "#333333", 804 | "running_highlight": "#FF0000", 805 | "selected_highlight": "#FFD700", 806 | "sidebar_border": "#EEEEEE", 807 | "wrapper_background": "#FFFFFF" 808 | }, 809 | "moveMenuLeft": true, 810 | "nav_menu": { 811 | "height": "264px", 812 | "width": "252px" 813 | }, 814 | "navigate_menu": true, 815 | "number_sections": true, 816 | "sideBar": true, 817 | "threshold": 4, 818 | "toc_cell": false, 819 | "toc_section_display": "block", 820 | "toc_window_display": false, 821 | "widenNotebook": false 822 | } 823 | }, 824 | "nbformat": 4, 825 | "nbformat_minor": 1 826 | } 827 | -------------------------------------------------------------------------------- /week03/week03_finetuning_inception.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "H9cgWox0eiRB" 7 | }, 8 | "source": [ 9 | "# Fine-tuning InceptionV3 for flowers classification\n", 10 | "\n", 11 | "In this programming assignment you will fine-tune InceptionV3 architecture for flowers classification task.\n", 12 | "\n", 13 | "InceptionV3 architecture (https://research.googleblog.com/2016/03/train-your-own-image-classifier-with.html):\n", 14 | "\n", 15 | "\n", 16 | "Flowers classification dataset (http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html) consists of 102 flower categories commonly occurring in the United Kingdom. Each class contains between 40 and 258 images:\n", 17 | "" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "%%bash\n", 27 | "\n", 28 | "shred -u setup_colab.py\n", 29 | "\n", 30 | "wget https://raw.githubusercontent.com/hse-aml/intro-to-dl-pytorch/main/utils/setup_colab.py -O setup_colab.py" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import setup_colab\n", 40 | "\n", 41 | "setup_colab.setup_week03_2()" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "ExecuteTime": { 49 | "end_time": "2021-02-14T15:26:09.068129Z", 50 | "start_time": "2021-02-14T15:26:08.219347Z" 51 | }, 52 | "colab": { 53 | "base_uri": "https://localhost:8080/" 54 | }, 55 | "executionInfo": { 56 | "elapsed": 551, 57 | "status": "ok", 58 | "timestamp": 1613301368013, 59 | "user": { 60 | "displayName": "Alexander Markovich", 61 | "photoUrl": "", 62 | "userId": "05353592946685554048" 63 | }, 64 | "user_tz": -180 65 | }, 66 | "id": "yRsMV_1ReiRJ", 67 | "outputId": "6ac5089e-8352-4cdf-e0d9-4ecbc131e90e", 68 | "scrolled": false 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "%matplotlib inline\n", 73 | "import matplotlib.pyplot as plt\n", 74 | "\n", 75 | "import random\n", 76 | "import tqdm\n", 77 | "import collections\n", 78 | "from IPython import display\n", 79 | "\n", 80 | "import torch\n", 81 | "from torch import nn\n", 82 | "from torch import optim\n", 83 | "from torch.utils.data import DataLoader\n", 84 | "\n", 85 | "from torchvision import datasets, transforms, models\n", 86 | "\n", 87 | "from sklearn.model_selection import train_test_split" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "ExecuteTime": { 95 | "end_time": "2021-02-14T15:26:10.693869Z", 96 | "start_time": "2021-02-14T15:26:10.690502Z" 97 | }, 98 | "executionInfo": { 99 | "elapsed": 1241, 100 | "status": "ok", 101 | "timestamp": 1613301369415, 102 | "user": { 103 | "displayName": "Alexander Markovich", 104 | "photoUrl": "", 105 | "userId": "05353592946685554048" 106 | }, 107 | "user_tz": -180 108 | }, 109 | "id": "l_4fuEj3eiRL" 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "# auxiliary stuff\n", 114 | "def calculate_accuracy(prediction, target):\n", 115 | " # Note that prediction.shape == target.shape == [B, ]\n", 116 | " \n", 117 | " matching = (prediction == target).float()\n", 118 | " return matching.mean()\n", 119 | "\n", 120 | "\n", 121 | "class AverageMeter:\n", 122 | " \n", 123 | " def __init__(self):\n", 124 | " self.reset()\n", 125 | "\n", 126 | " def reset(self):\n", 127 | " self.val = 0\n", 128 | " self.avg = 0\n", 129 | " self.sum = 0\n", 130 | " self.count = 0\n", 131 | "\n", 132 | " def update(self, val, n=1):\n", 133 | " self.val = val\n", 134 | " self.sum += val * n\n", 135 | " self.count += n\n", 136 | " self.avg = self.sum / self.count" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": { 142 | "id": "10EpF5WxeiRM" 143 | }, 144 | "source": [ 145 | "# Fill in your Coursera token and email\n", 146 | "To successfully submit your answers to our grader, please fill in your Coursera submission token and email." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": { 153 | "executionInfo": { 154 | "elapsed": 886, 155 | "status": "ok", 156 | "timestamp": 1613301369803, 157 | "user": { 158 | "displayName": "Alexander Markovich", 159 | "photoUrl": "", 160 | "userId": "05353592946685554048" 161 | }, 162 | "user_tz": -180 163 | }, 164 | "id": "Pv_j04eUeiRM", 165 | "scrolled": false 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "import grading \n", 170 | "\n", 171 | "grader = grading.Grader(\n", 172 | " assignment_key=\"2v-uxpD7EeeMxQ6FWsz5LA\", \n", 173 | " all_parts=[\"wuwwC\", \"qRsZ1\"]\n", 174 | ")" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "executionInfo": { 182 | "elapsed": 585, 183 | "status": "ok", 184 | "timestamp": 1613301369804, 185 | "user": { 186 | "displayName": "Alexander Markovich", 187 | "photoUrl": "", 188 | "userId": "05353592946685554048" 189 | }, 190 | "user_tz": -180 191 | }, 192 | "id": "j4Nvaj_HeiRM", 193 | "scrolled": false 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "# token expires every 30 min\n", 198 | "COURSERA_TOKEN = ### YOUR TOKEN HERE\n", 199 | "COURSERA_EMAIL = ### YOUR EMAIL HERE" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": { 205 | "id": "Kj0T4NEGeiRM" 206 | }, 207 | "source": [ 208 | "# Load dataset" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": { 214 | "id": "Bpvl_o4TeiRN" 215 | }, 216 | "source": [ 217 | "Dataset was downloaded for you, it takes 12 min and 400mb.\n", 218 | "Relevant links (just in case):\n", 219 | "- http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html\n", 220 | "- http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz\n", 221 | "- http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": { 228 | "colab": { 229 | "base_uri": "https://localhost:8080/" 230 | }, 231 | "executionInfo": { 232 | "elapsed": 8012, 233 | "status": "ok", 234 | "timestamp": 1613301378451, 235 | "user": { 236 | "displayName": "Alexander Markovich", 237 | "photoUrl": "", 238 | "userId": "05353592946685554048" 239 | }, 240 | "user_tz": -180 241 | }, 242 | "id": "WEl2X-PCeiRN", 243 | "outputId": "6d4e790b-dd54-4ba6-9a05-94d67d707b58", 244 | "scrolled": false 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "import download_utils\n", 249 | "\n", 250 | "download_utils.download_week_3_resources('flowers')" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "executionInfo": { 258 | "elapsed": 3692, 259 | "status": "ok", 260 | "timestamp": 1613301378670, 261 | "user": { 262 | "displayName": "Alexander Markovich", 263 | "photoUrl": "", 264 | "userId": "05353592946685554048" 265 | }, 266 | "user_tz": -180 267 | }, 268 | "id": "yeqEDTbheiRN" 269 | }, 270 | "outputs": [], 271 | "source": [ 272 | "# unpack data into flowers/\n", 273 | "!tar -zxvf flowers/102flowers.tgz -C flowers/" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": { 280 | "executionInfo": { 281 | "elapsed": 2968, 282 | "status": "ok", 283 | "timestamp": 1613301378671, 284 | "user": { 285 | "displayName": "Alexander Markovich", 286 | "photoUrl": "", 287 | "userId": "05353592946685554048" 288 | }, 289 | "user_tz": -180 290 | }, 291 | "id": "9yn3HyAfeiRN" 292 | }, 293 | "outputs": [], 294 | "source": [ 295 | "from scipy.io import loadmat\n", 296 | "\n", 297 | "# `datasets.ImageFolder` is a generic data loader\n", 298 | "# where the images are arranged in this way:\n", 299 | "# root/dog/xxx.png\n", 300 | "# root/dog/xxy.png\n", 301 | "# root/dog/xxz.png\n", 302 | "# ...\n", 303 | "# root/cat/123.png\n", 304 | "# root/cat/nsdf3.png\n", 305 | "# root/cat/asd932_.png\n", 306 | "\n", 307 | "class FlowersDataset(datasets.ImageFolder):\n", 308 | " \n", 309 | " def __init__(self, path: str, transform = None):\n", 310 | " super(FlowersDataset, self).__init__(path, transform=transform)\n", 311 | " \n", 312 | " # load labels from `*.mat` file\n", 313 | " self.labels = loadmat(f'{path}/imagelabels.mat')['labels'][0] - 1\n", 314 | " self.labels = self.labels.tolist()\n", 315 | " self.classes = list(set(self.labels))\n", 316 | " \n", 317 | " def __getitem__(self, index: int):\n", 318 | " image, _ = super().__getitem__(index)\n", 319 | " label = self.labels[index]\n", 320 | " return image, label\n", 321 | " \n", 322 | " def __len__(self):\n", 323 | " return len(self.labels)" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": { 330 | "executionInfo": { 331 | "elapsed": 2325, 332 | "status": "ok", 333 | "timestamp": 1613301378672, 334 | "user": { 335 | "displayName": "Alexander Markovich", 336 | "photoUrl": "", 337 | "userId": "05353592946685554048" 338 | }, 339 | "user_tz": -180 340 | }, 341 | "id": "U2_pK2xLeiRO" 342 | }, 343 | "outputs": [], 344 | "source": [ 345 | "dataset = FlowersDataset('flowers', transforms.PILToTensor())" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": { 352 | "colab": { 353 | "base_uri": "https://localhost:8080/", 354 | "height": 305 355 | }, 356 | "executionInfo": { 357 | "elapsed": 1682, 358 | "status": "ok", 359 | "timestamp": 1613301379463, 360 | "user": { 361 | "displayName": "Alexander Markovich", 362 | "photoUrl": "", 363 | "userId": "05353592946685554048" 364 | }, 365 | "user_tz": -180 366 | }, 367 | "id": "It0ryjl8eiRO", 368 | "outputId": "02daecd6-12e6-41c5-a473-3d1fa6ef9277" 369 | }, 370 | "outputs": [], 371 | "source": [ 372 | "image, label = dataset[232]\n", 373 | "print(label)\n", 374 | "plt.imshow(image.permute(1, 2, 0))" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": { 380 | "id": "d9qQphe5eiRO" 381 | }, 382 | "source": [ 383 | "## Prepare images for model" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": { 389 | "id": "myjLSMOIeiRO" 390 | }, 391 | "source": [ 392 | "We will take a center crop from each image like this:\n", 393 | "\n", 394 | "\n", 395 | "And then resize image to IMG_SIZE x IMG_SIZE, where IMG_SIZE = 299" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": { 402 | "executionInfo": { 403 | "elapsed": 639, 404 | "status": "ok", 405 | "timestamp": 1613301383275, 406 | "user": { 407 | "displayName": "Alexander Markovich", 408 | "photoUrl": "", 409 | "userId": "05353592946685554048" 410 | }, 411 | "user_tz": -180 412 | }, 413 | "id": "Zk7V2tXbeiRO" 414 | }, 415 | "outputs": [], 416 | "source": [ 417 | "IMAGE_SIZE = 299\n", 418 | "\n", 419 | "class CenterCrop2(nn.Module):\n", 420 | " \n", 421 | " def __init__(self):\n", 422 | " super(CenterCrop2, self).__init__()\n", 423 | " \n", 424 | " def forward(self, image: torch.Tensor):\n", 425 | " h, w = image.shape[-2:]\n", 426 | " \n", 427 | " s = h if w > h else w\n", 428 | " image = ### YOUR CODE HERE ###\n", 429 | " return image\n", 430 | "\n", 431 | "prepare_transforms = transforms.Compose([\n", 432 | " transforms.PILToTensor(),\n", 433 | " \n", 434 | " # Convert from uint8 ([0, 255]) to float32 ([0.0, 0.1])\n", 435 | " transforms.Lambda(lambda image: image.float() / 255.),\n", 436 | " \n", 437 | " # Center crop\n", 438 | " CenterCrop2(),\n", 439 | " transforms.Resize(IMAGE_SIZE),\n", 440 | "\n", 441 | " # Normalization\n", 442 | " # This is necessary because the original model was trained\n", 443 | " # with normalization and expects normalized input\n", 444 | " transforms.Normalize(\n", 445 | " mean=[0.485, 0.456, 0.406],\n", 446 | " std=[0.229, 0.224, 0.225]\n", 447 | " )\n", 448 | "])" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": null, 454 | "metadata": { 455 | "executionInfo": { 456 | "elapsed": 593, 457 | "status": "ok", 458 | "timestamp": 1613301387075, 459 | "user": { 460 | "displayName": "Alexander Markovich", 461 | "photoUrl": "", 462 | "userId": "05353592946685554048" 463 | }, 464 | "user_tz": -180 465 | }, 466 | "id": "M-i7RvWBeiRP" 467 | }, 468 | "outputs": [], 469 | "source": [ 470 | "dataset = FlowersDataset('flowers', prepare_transforms)" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": null, 476 | "metadata": { 477 | "colab": { 478 | "base_uri": "https://localhost:8080/", 479 | "height": 323 480 | }, 481 | "executionInfo": { 482 | "elapsed": 1402, 483 | "status": "ok", 484 | "timestamp": 1613301389754, 485 | "user": { 486 | "displayName": "Alexander Markovich", 487 | "photoUrl": "", 488 | "userId": "05353592946685554048" 489 | }, 490 | "user_tz": -180 491 | }, 492 | "id": "R9fJyT3ueiRP", 493 | "outputId": "d1ab0684-5a03-4e95-9d40-8f94ae9f44df" 494 | }, 495 | "outputs": [], 496 | "source": [ 497 | "image, label = dataset[232]\n", 498 | "print(image.shape, label)\n", 499 | "plt.imshow(image.permute(1, 2, 0))" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "dummy_image = torch.rand((3, 300, 250))\n", 509 | "cropped_image = CenterCrop2().forward(dummy_image)" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "# GRADED PART, DO NOT CHANGE!\n", 519 | "grader.set_answer(\"qRsZ1\", cropped_image.shape)" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "metadata": {}, 526 | "outputs": [], 527 | "source": [ 528 | "# you can make submission with answers so far to check yourself at this stage\n", 529 | "grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)" 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "metadata": { 535 | "id": "xLRLnBZ6eiRP" 536 | }, 537 | "source": [ 538 | "## Split into train/test" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": null, 544 | "metadata": { 545 | "executionInfo": { 546 | "elapsed": 730, 547 | "status": "ok", 548 | "timestamp": 1613301392051, 549 | "user": { 550 | "displayName": "Alexander Markovich", 551 | "photoUrl": "", 552 | "userId": "05353592946685554048" 553 | }, 554 | "user_tz": -180 555 | }, 556 | "id": "k2GT1v5NeiRP" 557 | }, 558 | "outputs": [], 559 | "source": [ 560 | "indexes = list(range(len(dataset)))\n", 561 | "train_indexes, test_indexes = train_test_split(\n", 562 | " indexes,\n", 563 | " test_size=0.2,\n", 564 | " random_state=42,\n", 565 | " stratify=dataset.labels\n", 566 | ")" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "metadata": { 573 | "executionInfo": { 574 | "elapsed": 540, 575 | "status": "ok", 576 | "timestamp": 1613301393600, 577 | "user": { 578 | "displayName": "Alexander Markovich", 579 | "photoUrl": "", 580 | "userId": "05353592946685554048" 581 | }, 582 | "user_tz": -180 583 | }, 584 | "id": "_0jAS8I1eiRP" 585 | }, 586 | "outputs": [], 587 | "source": [ 588 | "train_dataset = torch.utils.data.Subset(dataset, train_indexes)\n", 589 | "test_dataset = torch.utils.data.Subset(dataset, test_indexes)" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": null, 595 | "metadata": { 596 | "executionInfo": { 597 | "elapsed": 549, 598 | "status": "ok", 599 | "timestamp": 1613301395036, 600 | "user": { 601 | "displayName": "Alexander Markovich", 602 | "photoUrl": "", 603 | "userId": "05353592946685554048" 604 | }, 605 | "user_tz": -180 606 | }, 607 | "id": "Do4dMgGPeiRP" 608 | }, 609 | "outputs": [], 610 | "source": [ 611 | "assert (len(train_dataset) + len(test_dataset)) == len(dataset)" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": null, 617 | "metadata": { 618 | "executionInfo": { 619 | "elapsed": 689, 620 | "status": "ok", 621 | "timestamp": 1613301397282, 622 | "user": { 623 | "displayName": "Alexander Markovich", 624 | "photoUrl": "", 625 | "userId": "05353592946685554048" 626 | }, 627 | "user_tz": -180 628 | }, 629 | "id": "x8OqtblDeiRQ" 630 | }, 631 | "outputs": [], 632 | "source": [ 633 | "BATCH_SIZE = 32\n", 634 | "\n", 635 | "# `pin_memory` speed up processing if you use GPU\n", 636 | "# `num_workers` also speed up processing since use additional process\n", 637 | "train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,\n", 638 | " num_workers=4, pin_memory=True)\n", 639 | "test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE,\n", 640 | " num_workers=4, pin_memory=True)" 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": { 647 | "colab": { 648 | "base_uri": "https://localhost:8080/", 649 | "height": 559 650 | }, 651 | "executionInfo": { 652 | "elapsed": 2631, 653 | "status": "ok", 654 | "timestamp": 1613301407158, 655 | "user": { 656 | "displayName": "Alexander Markovich", 657 | "photoUrl": "", 658 | "userId": "05353592946685554048" 659 | }, 660 | "user_tz": -180 661 | }, 662 | "id": "jC_evNB1eiRQ", 663 | "outputId": "2639ccc1-4224-4ebb-d4b8-2e28e7b5a322" 664 | }, 665 | "outputs": [], 666 | "source": [ 667 | "cols = 8\n", 668 | "rows = 2\n", 669 | "\n", 670 | "fig = plt.figure(figsize=(20, 5))\n", 671 | "\n", 672 | "for i in range(cols):\n", 673 | " for j in range(rows):\n", 674 | " random_index = random.randint(0, len(train_dataset) - 1)\n", 675 | " \n", 676 | " ax = fig.add_subplot(rows, cols, i * rows + j + 1)\n", 677 | " ax.grid('off')\n", 678 | " ax.axis('off')\n", 679 | " \n", 680 | " image, label = train_dataset[random_index]\n", 681 | " \n", 682 | " # move the channel dimension to the end\n", 683 | " images = image.permute(1, 2, 0)\n", 684 | " \n", 685 | " ax.imshow(images)\n", 686 | " ax.set_title(label)\n", 687 | "\n", 688 | "plt.show()" 689 | ] 690 | }, 691 | { 692 | "cell_type": "markdown", 693 | "metadata": { 694 | "id": "33Wl-GgdeiRQ" 695 | }, 696 | "source": [ 697 | "# Training model" 698 | ] 699 | }, 700 | { 701 | "cell_type": "markdown", 702 | "metadata": { 703 | "id": "qncR5sC1eiRQ" 704 | }, 705 | "source": [ 706 | "You cannot train such a huge architecture from scratch with such a small dataset.\n", 707 | "\n", 708 | "But using fine-tuning of last layers of pre-trained network you can get a pretty good classifier very quickly." 709 | ] 710 | }, 711 | { 712 | "cell_type": "code", 713 | "execution_count": null, 714 | "metadata": { 715 | "executionInfo": { 716 | "elapsed": 529, 717 | "status": "ok", 718 | "timestamp": 1613301409733, 719 | "user": { 720 | "displayName": "Alexander Markovich", 721 | "photoUrl": "", 722 | "userId": "05353592946685554048" 723 | }, 724 | "user_tz": -180 725 | }, 726 | "id": "fW1C5QW2eiRR" 727 | }, 728 | "outputs": [], 729 | "source": [ 730 | "class PretrainedInceptionV3(nn.Module):\n", 731 | " \n", 732 | " def __init__(self, output_dim: int):\n", 733 | " super(PretrainedInceptionV3, self).__init__()\n", 734 | " \n", 735 | " # Download pretrained model and turn on eval regime\n", 736 | " self.barebone = models.inception.inception_v3(\n", 737 | " pretrained=True, progress=True, aux_logits=False\n", 738 | " )\n", 739 | " \n", 740 | " # Freeze all layers\n", 741 | " for p in self.barebone.parameters():\n", 742 | " p.requires_grad_(False)\n", 743 | " \n", 744 | " # Unfreeze penultimate block\n", 745 | " # The network will adjust faster to new data\n", 746 | " for p in self.barebone.Mixed_7c.parameters():\n", 747 | " p.requires_grad_(True)\n", 748 | " \n", 749 | " # Replace the last Linear layer\n", 750 | " in_features_final_fc = self.barebone.fc.in_features\n", 751 | " self.barebone.fc = nn.Linear(in_features_final_fc, output_dim)\n", 752 | " \n", 753 | " def forward(self, input: torch.Tensor):\n", 754 | " return self.barebone(input)" 755 | ] 756 | }, 757 | { 758 | "cell_type": "markdown", 759 | "metadata": { 760 | "id": "WSiurJXAeiRR" 761 | }, 762 | "source": [ 763 | "## Init train pipeline" 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": null, 769 | "metadata": { 770 | "colab": { 771 | "base_uri": "https://localhost:8080/" 772 | }, 773 | "executionInfo": { 774 | "elapsed": 974, 775 | "status": "ok", 776 | "timestamp": 1613301441231, 777 | "user": { 778 | "displayName": "Alexander Markovich", 779 | "photoUrl": "", 780 | "userId": "05353592946685554048" 781 | }, 782 | "user_tz": -180 783 | }, 784 | "id": "U8dzLTN_eiRR", 785 | "outputId": "87828ac2-8aaf-49c4-e792-00b07c1aaf8d" 786 | }, 787 | "outputs": [], 788 | "source": [ 789 | "NUM_EPOCH = 20\n", 790 | "DEVICE = torch.device('cuda:0')\n", 791 | "HISTORY = collections.defaultdict(list)\n", 792 | "\n", 793 | "model = PretrainedInceptionV3(output_dim=len(train_dataset.dataset.classes)).to(DEVICE)\n", 794 | "criterion = nn.CrossEntropyLoss()\n", 795 | "optimizer = optim.Adam(\n", 796 | " [p for p in model.parameters() if p.requires_grad],\n", 797 | " lr=1e-2\n", 798 | ")\n", 799 | "\n", 800 | "# Set up learning rate scheduler\n", 801 | "# This will speed up the convergence of the model\n", 802 | "lr_scheduler = optim.lr_scheduler.MultiStepLR(\n", 803 | " optimizer=optimizer,\n", 804 | " milestones=[7, 15],\n", 805 | " gamma=0.1,\n", 806 | " verbose=True\n", 807 | ")" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": null, 813 | "metadata": { 814 | "colab": { 815 | "base_uri": "https://localhost:8080/", 816 | "height": 1000 817 | }, 818 | "executionInfo": { 819 | "elapsed": 25719, 820 | "status": "error", 821 | "timestamp": 1613302592535, 822 | "user": { 823 | "displayName": "Alexander Markovich", 824 | "photoUrl": "", 825 | "userId": "05353592946685554048" 826 | }, 827 | "user_tz": -180 828 | }, 829 | "id": "xwdHAZpVeiRT", 830 | "outputId": "c6677906-e739-4939-962a-ab1f44b45f70" 831 | }, 832 | "outputs": [], 833 | "source": [ 834 | "for epoch in range(NUM_EPOCH):\n", 835 | " # AverageMeter will accumulate average of some metric\n", 836 | " train_loss_meter = AverageMeter()\n", 837 | " train_accuracy_meter = AverageMeter()\n", 838 | " test_loss_meter = AverageMeter()\n", 839 | " test_accuracy_meter = AverageMeter()\n", 840 | " \n", 841 | " # training loop\n", 842 | " # sets the module in training mode -- it i s important for nn.Dropout\n", 843 | " model.train()\n", 844 | " # wrap `train_dataloader` within tqdm to visualize progress\n", 845 | " for train_batch in tqdm.tqdm(train_dataloader):\n", 846 | " \n", 847 | " # unpack batch and move to specific device (for example, GPU or TPU)\n", 848 | " images, labels = train_batch\n", 849 | " images = images.to(DEVICE)\n", 850 | " labels = labels.to(DEVICE)\n", 851 | " \n", 852 | " # do forward pass\n", 853 | " logits = model.forward(images)\n", 854 | " prediction = logits.argmax(dim=-1)\n", 855 | " \n", 856 | " # calculate loss (CrossEntropy)\n", 857 | " loss = criterion(logits, labels)\n", 858 | " \n", 859 | " # zero out the previous gradients of our model parameters\n", 860 | " optimizer.zero_grad()\n", 861 | " \n", 862 | " # calculate new gradients\n", 863 | " loss.backward()\n", 864 | " \n", 865 | " # do optimization step\n", 866 | " optimizer.step()\n", 867 | " \n", 868 | " # calculate current average loss and accuracy\n", 869 | " train_loss_meter.update(loss.item())\n", 870 | " train_accuracy_meter.update(\n", 871 | " calculate_accuracy(\n", 872 | " prediction.detach(),\n", 873 | " labels\n", 874 | " ).item()\n", 875 | " )\n", 876 | " \n", 877 | " # update lr_scheduler\n", 878 | " lr_scheduler.step()\n", 879 | " \n", 880 | " # save average train loss and accuracy\n", 881 | " HISTORY['train_loss'].append(train_loss_meter.avg)\n", 882 | " HISTORY['train_accuracy'].append(train_accuracy_meter.avg)\n", 883 | " \n", 884 | " # lr_scheduler.get_last_lr() return list of LRs (one LR for each group)\n", 885 | " HISTORY['learning_rate'].append(lr_scheduler.get_last_lr()[0])\n", 886 | " \n", 887 | " # testing loop\n", 888 | " # sets the module in evaluation mode\n", 889 | " model.eval()\n", 890 | " for test_batch in test_dataloader:\n", 891 | " images, labels = test_batch\n", 892 | " images = images.to(DEVICE)\n", 893 | " labels = labels.to(DEVICE)\n", 894 | " \n", 895 | " # аdd `with torch.no_grad()' to avoid computing gradients of weights\n", 896 | " with torch.no_grad():\n", 897 | " # do everything like we did in training loop\n", 898 | " logits = model(images)\n", 899 | " prediction = logits.argmax(dim=-1)\n", 900 | " loss = criterion(logits, labels)\n", 901 | " \n", 902 | " test_loss_meter.update(loss.item())\n", 903 | " test_accuracy_meter.update(\n", 904 | " calculate_accuracy(\n", 905 | " prediction,\n", 906 | " labels\n", 907 | " ).item()\n", 908 | " )\n", 909 | " \n", 910 | " # save average test accuracy loss and accuracy\n", 911 | " HISTORY['test_loss'].append(test_loss_meter.avg)\n", 912 | " HISTORY['test_accuracy'].append(test_accuracy_meter.avg)\n", 913 | " \n", 914 | " # visualize all togather\n", 915 | " display.clear_output()\n", 916 | " fig, axes = plt.subplots(1, 3, figsize=(20, 10))\n", 917 | " \n", 918 | " axes[0].set_title('Loss (Cross Entropy)')\n", 919 | " axes[0].plot(HISTORY['train_loss'], label='Train Loss')\n", 920 | " axes[0].plot(HISTORY['test_loss'], label='Test Loss')\n", 921 | " axes[0].grid()\n", 922 | " axes[0].legend(fontsize=20)\n", 923 | " \n", 924 | " axes[1].set_title('Accuracy')\n", 925 | " axes[1].plot(HISTORY['train_accuracy'], label='Train Accuracy')\n", 926 | " axes[1].plot(HISTORY['test_accuracy'], label='Test Accuracy')\n", 927 | " axes[1].grid()\n", 928 | " axes[1].legend(fontsize=20)\n", 929 | " \n", 930 | " axes[2].set_title('Learning Rate')\n", 931 | " axes[2].plot(HISTORY['learning_rate'])\n", 932 | " axes[2].grid()\n", 933 | " \n", 934 | " plt.show()" 935 | ] 936 | }, 937 | { 938 | "cell_type": "code", 939 | "execution_count": null, 940 | "metadata": { 941 | "executionInfo": { 942 | "elapsed": 544, 943 | "status": "ok", 944 | "timestamp": 1613302603840, 945 | "user": { 946 | "displayName": "Alexander Markovich", 947 | "photoUrl": "", 948 | "userId": "05353592946685554048" 949 | }, 950 | "user_tz": -180 951 | }, 952 | "id": "hmvdQuxWvXhM" 953 | }, 954 | "outputs": [], 955 | "source": [ 956 | "assert max(HISTORY['test_accuracy']) > 0.95" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": null, 962 | "metadata": { 963 | "id": "n1VGLh0veiRU" 964 | }, 965 | "outputs": [], 966 | "source": [ 967 | "## GRADED PART, DO NOT CHANGE!\n", 968 | "grader.set_answer(\"wuwwC\", max(HISTORY['test_accuracy']))" 969 | ] 970 | }, 971 | { 972 | "cell_type": "code", 973 | "execution_count": null, 974 | "metadata": { 975 | "id": "01ESdQ1-eiRV" 976 | }, 977 | "outputs": [], 978 | "source": [ 979 | "# you can make submission with answers so far to check yourself at this stage\n", 980 | "grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)" 981 | ] 982 | } 983 | ], 984 | "metadata": { 985 | "accelerator": "GPU", 986 | "colab": { 987 | "collapsed_sections": [], 988 | "name": "week3_task2_fine_tuning_clean.ipynb", 989 | "provenance": [], 990 | "toc_visible": true 991 | }, 992 | "kernelspec": { 993 | "display_name": "Python 3", 994 | "language": "python", 995 | "name": "python3" 996 | }, 997 | "language_info": { 998 | "codemirror_mode": { 999 | "name": "ipython", 1000 | "version": 3 1001 | }, 1002 | "file_extension": ".py", 1003 | "mimetype": "text/x-python", 1004 | "name": "python", 1005 | "nbconvert_exporter": "python", 1006 | "pygments_lexer": "ipython3", 1007 | "version": "3.8.5" 1008 | }, 1009 | "toc": { 1010 | "colors": { 1011 | "hover_highlight": "#DAA520", 1012 | "navigate_num": "#000000", 1013 | "navigate_text": "#333333", 1014 | "running_highlight": "#FF0000", 1015 | "selected_highlight": "#FFD700", 1016 | "sidebar_border": "#EEEEEE", 1017 | "wrapper_background": "#FFFFFF" 1018 | }, 1019 | "moveMenuLeft": true, 1020 | "nav_menu": { 1021 | "height": "120px", 1022 | "width": "252px" 1023 | }, 1024 | "navigate_menu": true, 1025 | "number_sections": true, 1026 | "sideBar": true, 1027 | "threshold": 4, 1028 | "toc_cell": false, 1029 | "toc_section_display": "block", 1030 | "toc_window_display": false, 1031 | "widenNotebook": false 1032 | } 1033 | }, 1034 | "nbformat": 4, 1035 | "nbformat_minor": 1 1036 | } -------------------------------------------------------------------------------- /week05/week05_sga_text_generation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python [default]", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.6.4" 21 | }, 22 | "colab": { 23 | "name": "SGA2_Text Generation.ipynb", 24 | "provenance": [], 25 | "collapsed_sections": [] 26 | }, 27 | "accelerator": "GPU" 28 | }, 29 | "cells": [ 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "1Oiu5Fabu3ue" 34 | }, 35 | "source": [ 36 | "# Text Generation\n", 37 | "## Character-Level LSTM in PyTorch" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": { 43 | "id": "1W8R8WgZceEk" 44 | }, 45 | "source": [ 46 | "In this assignment, you will train a Reccurent Neural Network to generate a text one character at the time. The task is divided into steps for simpler navigation.\n", 47 | "\n", 48 | "Let's start!" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "metadata": { 54 | "id": "sqUOE2flceEl" 55 | }, 56 | "source": [ 57 | "import numpy as np\n", 58 | "import torch\n", 59 | "from torch import nn\n", 60 | "import torch.nn.functional as F" 61 | ], 62 | "execution_count": null, 63 | "outputs": [] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": { 68 | "id": "-JY1C-COvVRb" 69 | }, 70 | "source": [ 71 | "##Step 0. Data" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": { 77 | "id": "_wHfCDyzceEl" 78 | }, 79 | "source": [ 80 | "As training data, we will use \"War and Peace\" by Leo Tolstoy. The book in plain text format can be downloaded from [Project Gutenberg website ](https://www.gutenberg.org/ebooks/2600).\n", 81 | "\n", 82 | "Download and load data." 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "metadata": { 88 | "id": "CnskyXUl4K3I" 89 | }, 90 | "source": [ 91 | "# open text file and read in data as `text`\n", 92 | "with open('2600-0.txt', 'r') as f:\n", 93 | " text = f.read()" 94 | ], 95 | "execution_count": null, 96 | "outputs": [] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": { 101 | "id": "Jp1Ljc4mceEl" 102 | }, 103 | "source": [ 104 | "Let's view the first 100 symbols:" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "metadata": { 110 | "colab": { 111 | "base_uri": "https://localhost:8080/", 112 | "height": 35 113 | }, 114 | "id": "NUb_mCJS5W5i", 115 | "outputId": "73797486-20b2-4031-992f-5fb728db201a" 116 | }, 117 | "source": [ 118 | "text[:100]" 119 | ], 120 | "execution_count": null, 121 | "outputs": [ 122 | { 123 | "output_type": "execute_result", 124 | "data": { 125 | "application/vnd.google.colaboratory.intrinsic+json": { 126 | "type": "string" 127 | }, 128 | "text/plain": [ 129 | "'\\ufeff\\nThe Project Gutenberg EBook of War and Peace, by Leo Tolstoy\\n\\nThis eBook is for the use of anyone '" 130 | ] 131 | }, 132 | "metadata": { 133 | "tags": [] 134 | }, 135 | "execution_count": 3 136 | } 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": { 142 | "id": "O4TwJP8f5eOt" 143 | }, 144 | "source": [ 145 | "The book starts at `7277`:" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "metadata": { 151 | "id": "7VctmLQfceEl", 152 | "colab": { 153 | "base_uri": "https://localhost:8080/", 154 | "height": 52 155 | }, 156 | "outputId": "5db9958a-05c7-4625-d072-42444692360c" 157 | }, 158 | "source": [ 159 | "text[7277:7500]" 160 | ], 161 | "execution_count": null, 162 | "outputs": [ 163 | { 164 | "output_type": "execute_result", 165 | "data": { 166 | "application/vnd.google.colaboratory.intrinsic+json": { 167 | "type": "string" 168 | }, 169 | "text/plain": [ 170 | "'CHAPTER I\\n\\n“Well, Prince, so Genoa and Lucca are now just family estates of the\\nBuonapartes. But I warn you, if you don’t tell me that this means war,\\nif you still try to defend the infamies and horrors perpetrated by that\\n'" 171 | ] 172 | }, 173 | "metadata": { 174 | "tags": [] 175 | }, 176 | "execution_count": 4 177 | } 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": { 183 | "id": "6iG2vAfmwhWN" 184 | }, 185 | "source": [ 186 | "##Step 1. Tokenization (15 points)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": { 192 | "id": "4iC21bopceEl" 193 | }, 194 | "source": [ 195 | "Now let's create dictionaries for converting characters to integers and vice versa. This makes it easier to use symbols as input on the neural network." 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "metadata": { 201 | "id": "tYVlmnxLceEl" 202 | }, 203 | "source": [ 204 | "# Create two dictionaries:\n", 205 | "# int2char -- maps integers to characters\n", 206 | "# char2int -- maps characters to unique integers\n", 207 | "\n", 208 | "## YOUR CODE HERE\n", 209 | "\n", 210 | "# encode the text\n", 211 | "encoded = np.array([char2int[ch] for ch in text])" 212 | ], 213 | "execution_count": null, 214 | "outputs": [] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": { 219 | "id": "oJIzwzSwceEl" 220 | }, 221 | "source": [ 222 | "Let's see how the encoding of characters into integers looks like" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "metadata": { 228 | "id": "WK1MYr_9ceEl", 229 | "colab": { 230 | "base_uri": "https://localhost:8080/" 231 | }, 232 | "outputId": "3a1bdf92-49f7-4a28-d0ae-2d7ba7013512" 233 | }, 234 | "source": [ 235 | "encoded[:100]" 236 | ], 237 | "execution_count": null, 238 | "outputs": [ 239 | { 240 | "output_type": "execute_result", 241 | "data": { 242 | "text/plain": [ 243 | "array([ 3, 95, 30, 21, 52, 54, 45, 78, 38, 56, 52, 5, 92, 54, 76, 41, 92,\n", 244 | " 52, 66, 1, 52, 78, 42, 54, 60, 4, 38, 38, 27, 54, 38, 93, 54, 73,\n", 245 | " 62, 78, 54, 62, 66, 68, 54, 45, 52, 62, 5, 52, 25, 54, 1, 46, 54,\n", 246 | " 19, 52, 38, 54, 30, 38, 11, 98, 92, 38, 46, 95, 95, 30, 21, 90, 98,\n", 247 | " 54, 52, 4, 38, 38, 27, 54, 90, 98, 54, 93, 38, 78, 54, 92, 21, 52,\n", 248 | " 54, 41, 98, 52, 54, 38, 93, 54, 62, 66, 46, 38, 66, 52, 54])" 249 | ] 250 | }, 251 | "metadata": { 252 | "tags": [] 253 | }, 254 | "execution_count": 6 255 | } 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": { 261 | "id": "VEzrUe5Qyjz8" 262 | }, 263 | "source": [ 264 | "##Step 2. One-hot encoding (15 points)" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": { 270 | "id": "azltQy-gceEl" 271 | }, 272 | "source": [ 273 | "LSTM expects one-hot encoded input, which means that each character is converted to an integer (via our created dictionary) and then converted to a vector, where a value 1 will be only on a corresponding position and the rest of the vector will be filled with zeros.\n", 274 | "\n", 275 | "Implement a function that does this kind of coding.\n" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "metadata": { 281 | "id": "OnahALhiceEl" 282 | }, 283 | "source": [ 284 | "def one_hot_encode(arr, n_labels):\n", 285 | " \n", 286 | " # arr - array of integers\n", 287 | " # n_labels - number of labels (the size of a one-hot-encoded vector)\n", 288 | "\n", 289 | " ## YOUR CODE HERE\n", 290 | " \n", 291 | " return one_hot" 292 | ], 293 | "execution_count": null, 294 | "outputs": [] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "metadata": { 299 | "id": "L3lTdLKfceEl", 300 | "colab": { 301 | "base_uri": "https://localhost:8080/" 302 | }, 303 | "outputId": "ee6a6534-e772-452e-e8c6-19b945a39f56" 304 | }, 305 | "source": [ 306 | "# check that the function works correctly\n", 307 | "test_indx = np.array([[7, 2, 5]])\n", 308 | "one_hot = one_hot_encode(test_indx, 8)\n", 309 | "\n", 310 | "print(one_hot)" 311 | ], 312 | "execution_count": null, 313 | "outputs": [ 314 | { 315 | "output_type": "stream", 316 | "text": [ 317 | "[[[0. 0. 0. 0. 0. 0. 0. 1.]\n", 318 | " [0. 0. 1. 0. 0. 0. 0. 0.]\n", 319 | " [0. 0. 0. 0. 0. 1. 0. 0.]]]\n" 320 | ], 321 | "name": "stdout" 322 | } 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": { 328 | "id": "G7qAGC8R0lF1" 329 | }, 330 | "source": [ 331 | "The output must be\n", 332 | "\n", 333 | "```\n", 334 | "[[[0. 0. 0. 0. 0. 0. 0. 1.]\n", 335 | " [0. 0. 1. 0. 0. 0. 0. 0.]\n", 336 | " [0. 0. 0. 0. 0. 1. 0. 0.]]]\n", 337 | "```" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": { 343 | "id": "_Qxw5Dg41HAO" 344 | }, 345 | "source": [ 346 | "##Step 3. Mini-batches (20 points)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": { 352 | "id": "50zKx6pM43RB" 353 | }, 354 | "source": [ 355 | "To train the neural network, we will organize mini-batches as follows: divide the entire input sequence 'arr' by the desired number of subsequences (parameter `batch_size`), and send a sequence of length `seq_length` to the input of the network." 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": { 361 | "id": "9YyL91CuceEl" 362 | }, 363 | "source": [ 364 | "\n" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": { 370 | "id": "xzTSxOr37JyR" 371 | }, 372 | "source": [ 373 | "### How to create mini-batches\n", 374 | "\n", 375 | "\n", 376 | "1. Find the total number of batches `n_batches` that fit the text.\n", 377 | "2. Discard all unnecessary text that does not fit into full batches.\n", 378 | "3. Split text into `n_batches` batches'.\n", 379 | "4. Get `x's` and `y`'s for a batch. Therefore, `y` is a version of `x` shifted by 1.\n", 380 | "\n" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "metadata": { 386 | "id": "8vmDKLiOceEl" 387 | }, 388 | "source": [ 389 | "def get_batches(arr, batch_size, seq_length):\n", 390 | " # Create a generator that returns batches of size batch_size x seq_length\n", 391 | " \n", 392 | " batch_size_total = batch_size * seq_length\n", 393 | " ## Get the number of batches we can make\n", 394 | " n_batches = ## YOUR CODE HERE\n", 395 | " \n", 396 | " ## Keep only enough characters to make full batches\n", 397 | " arr = ## YOUR CODE HERE\n", 398 | " \n", 399 | " ## Reshape into batch_size rows\n", 400 | " arr = ## YOUR CODE HERE\n", 401 | " \n", 402 | " ## Iterate over the batches using a window of size seq_length\n", 403 | " for n in range(0, arr.shape[1], seq_length):\n", 404 | " x = ## YOUR CODE HERE\n", 405 | " # The target is a version of x shifted by one (do not forget border conditions)\n", 406 | " y = ## YOUR CODE HERE\n", 407 | " yield x, y" 408 | ], 409 | "execution_count": null, 410 | "outputs": [] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": { 415 | "id": "s9uKOvbqceEl" 416 | }, 417 | "source": [ 418 | "Let's check how our function works." 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "metadata": { 424 | "id": "qtKlLXi1ceEl" 425 | }, 426 | "source": [ 427 | "batches = get_batches(encoded, 4, 30)\n", 428 | "x, y = next(batches)" 429 | ], 430 | "execution_count": null, 431 | "outputs": [] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "metadata": { 436 | "id": "Rg5MUTqqceEl", 437 | "colab": { 438 | "base_uri": "https://localhost:8080/" 439 | }, 440 | "outputId": "ed16bb01-29a8-4370-a2be-a87f26077e91" 441 | }, 442 | "source": [ 443 | "# printing out the first 10 items in a sequence\n", 444 | "print('x\\n', x[:10, :10])\n", 445 | "print('\\ny\\n', y[:10, :10])" 446 | ], 447 | "execution_count": null, 448 | "outputs": [ 449 | { 450 | "output_type": "stream", 451 | "text": [ 452 | "x\n", 453 | " [[ 3 95 30 21 52 54 45 78 38 56]\n", 454 | " [ 54 98 62 46 94 54 16 38 41 10]\n", 455 | " [ 54 62 66 68 54 100 21 90 5 21]\n", 456 | " [ 54 21 62 68 54 62 98 27 52 68]]\n", 457 | "\n", 458 | "y\n", 459 | " [[ 95 30 21 52 54 45 78 38 56 52]\n", 460 | " [ 98 62 46 94 54 16 38 41 10 78]\n", 461 | " [ 62 66 68 54 100 21 90 5 21 54]\n", 462 | " [ 21 62 68 54 62 98 27 52 68 54]]\n" 463 | ], 464 | "name": "stdout" 465 | } 466 | ] 467 | }, 468 | { 469 | "cell_type": "markdown", 470 | "metadata": { 471 | "id": "R_qHIAEIceEl" 472 | }, 473 | "source": [ 474 | "If you've implemented get_batches correctly, the result should look something like this (exact numbers may vary):\n", 475 | "```\n", 476 | "x\n", 477 | " [[ 48 94 12 110 32 96 107 34 18 106]\n", 478 | " [ 96 78 1 69 32 34 18 34 27 3]\n", 479 | " [ 3 96 0 18 96 86 18 100 34 96]\n", 480 | " [ 32 96 110 42 101 96 0 18 19 96]]\n", 481 | "\n", 482 | "y\n", 483 | " [[ 94 12 110 32 96 107 34 18 106 32]\n", 484 | " [ 78 1 69 32 34 18 34 27 3 94]\n", 485 | " [ 96 0 18 96 86 18 100 34 96 1]\n", 486 | " [ 96 110 42 101 96 0 18 19 96 3]]\n", 487 | " ```\n", 488 | "Make sure the data is shifted one step for `y`." 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": { 494 | "id": "tholjONVAJUa" 495 | }, 496 | "source": [ 497 | "##Step 4. Defining the network (40 points)" 498 | ] 499 | }, 500 | { 501 | "cell_type": "markdown", 502 | "metadata": { 503 | "id": "E7s5eRaoceEl" 504 | }, 505 | "source": [ 506 | "### Model Architecture\n", 507 | "\n", 508 | "The proposed architecture is as follows:\n", 509 | "\n", 510 | "* Define an [LSTM layer](https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html#torch.nn.LSTM) with `dropout=drop_prob` and `batch_first=True` (since we use batches)\n", 511 | "* Define a [Dropout layer](https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html#torch.nn.Dropout) with `drop_prob`.\n", 512 | "* Define a [Linear layer](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear) with `in_features=n_hidden` and `out_features` equals to number of characters." 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "metadata": { 518 | "id": "HlTnDntHceEl", 519 | "colab": { 520 | "base_uri": "https://localhost:8080/" 521 | }, 522 | "outputId": "67fed249-72c3-42e6-c363-95b6e077171c" 523 | }, 524 | "source": [ 525 | "# check if GPU is available\n", 526 | "train_on_gpu = torch.cuda.is_available()\n", 527 | "if(train_on_gpu):\n", 528 | " print('Training on GPU')\n", 529 | "else: \n", 530 | " print('No GPU available, training on CPU; consider making n_epochs very small.')" 531 | ], 532 | "execution_count": null, 533 | "outputs": [ 534 | { 535 | "output_type": "stream", 536 | "text": [ 537 | "Training on GPU\n" 538 | ], 539 | "name": "stdout" 540 | } 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "metadata": { 546 | "id": "lg-SvaGhceEl" 547 | }, 548 | "source": [ 549 | "class CharRNN(nn.Module):\n", 550 | " \n", 551 | " def __init__(self, tokens, n_hidden=256, n_layers=2,\n", 552 | " drop_prob=0.5, lr=0.001):\n", 553 | " super().__init__()\n", 554 | " self.drop_prob = drop_prob\n", 555 | " self.n_layers = n_layers\n", 556 | " self.n_hidden = n_hidden\n", 557 | " self.lr = lr\n", 558 | " \n", 559 | " # creating character dictionaries\n", 560 | " self.chars = tokens\n", 561 | " self.int2char = dict(enumerate(self.chars))\n", 562 | " self.char2int = {ch: ii for ii, ch in self.int2char.items()}\n", 563 | " \n", 564 | " # Define the LSTM layer\n", 565 | " ## YOUR CODE HERE\n", 566 | "\n", 567 | " # Define a dropout layer\n", 568 | " ## YOUR CODE HERE\n", 569 | "\n", 570 | " # Define the final, fully-connected output layer\n", 571 | " ## YOUR CODE HERE\n", 572 | " \n", 573 | " \n", 574 | " def forward(self, x, hidden):\n", 575 | " ''' Forward pass through the network. \n", 576 | " These inputs are x, and the hidden/cell state `hidden`. '''\n", 577 | " \n", 578 | " # Get the outputs and the new hidden state from the lstm\n", 579 | " ## YOUR CODE HERE\n", 580 | "\n", 581 | " # Pass through a dropout layer\n", 582 | " ## YOUR CODE HERE\n", 583 | "\n", 584 | " # Stack up LSTM outputs using view\n", 585 | " # you may need to use contiguous to reshape the output\n", 586 | " ## YOUR CODE HERE\n", 587 | "\n", 588 | " # Put x through the fully-connected layer\n", 589 | " ## YOUR CODE HERE\n", 590 | "\n", 591 | " # return the final output and the hidden state\n", 592 | " return out, hidden\n", 593 | " \n", 594 | " \n", 595 | " def init_hidden(self, batch_size):\n", 596 | " ''' Initializes hidden state '''\n", 597 | " # Create two new tensors with sizes n_layers x batch_size x n_hidden,\n", 598 | " # initialized to zero, for hidden state and cell state of LSTM\n", 599 | " weight = next(self.parameters()).data\n", 600 | " \n", 601 | " if (train_on_gpu):\n", 602 | " hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),\n", 603 | " weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())\n", 604 | " else:\n", 605 | " hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),\n", 606 | " weight.new(self.n_layers, batch_size, self.n_hidden).zero_())\n", 607 | " \n", 608 | " return hidden\n", 609 | " " 610 | ], 611 | "execution_count": null, 612 | "outputs": [] 613 | }, 614 | { 615 | "cell_type": "markdown", 616 | "metadata": { 617 | "id": "VPX7bf08EipB" 618 | }, 619 | "source": [ 620 | "##Step 5. Train the model (10 points)" 621 | ] 622 | }, 623 | { 624 | "cell_type": "markdown", 625 | "metadata": { 626 | "id": "zBgcPj1mGOZN" 627 | }, 628 | "source": [ 629 | "We use Cross Entropy as a loss function, Adam as optimizer, and [`clip_grad_norm_`](https://pytorch.org/docs/stable/_modules/torch/nn/utils/clip_grad.html) to get rid of the gradient explosion." 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "metadata": { 635 | "id": "lv8VkRI0ceEl" 636 | }, 637 | "source": [ 638 | "def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):\n", 639 | " ''' Training a network \n", 640 | " \n", 641 | " Arguments\n", 642 | " ---------\n", 643 | " lr: learning rate\n", 644 | " clip: gradient clipping\n", 645 | " val_frac: Fraction of data to hold out for validation\n", 646 | " print_every: Number of steps for printing training and validation loss\n", 647 | " \n", 648 | " '''\n", 649 | " net.train()\n", 650 | " \n", 651 | " opt = torch.optim.Adam(net.parameters(), lr=lr)\n", 652 | " criterion = nn.CrossEntropyLoss()\n", 653 | " \n", 654 | " # create training and validation data\n", 655 | " val_idx = int(len(data)*(1-val_frac))\n", 656 | " data, val_data = data[:val_idx], data[val_idx:]\n", 657 | " \n", 658 | " if(train_on_gpu):\n", 659 | " net.cuda()\n", 660 | " \n", 661 | " counter = 0\n", 662 | " n_chars = len(net.chars)\n", 663 | " for e in range(epochs):\n", 664 | " # initialize hidden state\n", 665 | " h = net.init_hidden(batch_size)\n", 666 | " \n", 667 | " for x, y in get_batches(data, batch_size, seq_length):\n", 668 | " counter += 1\n", 669 | " \n", 670 | " # One-hot encode our data and make them Torch tensors\n", 671 | " x = one_hot_encode(x, n_chars)\n", 672 | " inputs, targets = torch.from_numpy(x), torch.from_numpy(y)\n", 673 | " \n", 674 | " if(train_on_gpu):\n", 675 | " inputs, targets = inputs.cuda(), targets.cuda()\n", 676 | "\n", 677 | " # Creating new variables for the hidden state, otherwise\n", 678 | " # we'd backprop through the entire training history\n", 679 | " h = tuple([each.data for each in h])\n", 680 | "\n", 681 | " net.zero_grad()\n", 682 | " \n", 683 | " output, h = net(inputs, h)\n", 684 | " \n", 685 | " loss = criterion(output, targets.view(batch_size*seq_length).long())\n", 686 | " loss.backward()\n", 687 | "\n", 688 | " # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs\n", 689 | " nn.utils.clip_grad_norm_(net.parameters(), clip)\n", 690 | " opt.step()\n", 691 | " \n", 692 | " # loss stats\n", 693 | " if counter % print_every == 0:\n", 694 | " # Get validation loss\n", 695 | " val_h = net.init_hidden(batch_size)\n", 696 | " val_losses = []\n", 697 | " net.eval()\n", 698 | " for x, y in get_batches(val_data, batch_size, seq_length):\n", 699 | " # One-hot encode our data and make them Torch tensors\n", 700 | " x = one_hot_encode(x, n_chars)\n", 701 | " x, y = torch.from_numpy(x), torch.from_numpy(y)\n", 702 | " \n", 703 | " # Creating new variables for the hidden state, otherwise\n", 704 | " # we'd backprop through the entire training history\n", 705 | " val_h = tuple([each.data for each in val_h])\n", 706 | " \n", 707 | " inputs, targets = x, y\n", 708 | " if(train_on_gpu):\n", 709 | " inputs, targets = inputs.cuda(), targets.cuda()\n", 710 | "\n", 711 | " output, val_h = net(inputs, val_h)\n", 712 | " val_loss = criterion(output, targets.view(batch_size*seq_length).long())\n", 713 | " \n", 714 | " val_losses.append(val_loss.item())\n", 715 | " \n", 716 | " net.train() # reset to train mode after iterationg through validation data\n", 717 | " \n", 718 | " print(\"Epoch: {}/{}...\".format(e+1, epochs),\n", 719 | " \"Step: {}...\".format(counter),\n", 720 | " \"Loss: {:.4f}...\".format(loss.item()),\n", 721 | " \"Val Loss: {:.4f}\".format(np.mean(val_losses)))" 722 | ], 723 | "execution_count": null, 724 | "outputs": [] 725 | }, 726 | { 727 | "cell_type": "markdown", 728 | "metadata": { 729 | "id": "Gt0q4KGEceEm" 730 | }, 731 | "source": [ 732 | "Initialize the model and set hyperparameters." 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "metadata": { 738 | "id": "OOgs59nDceEm" 739 | }, 740 | "source": [ 741 | "# Set your model hyperparameters\n", 742 | "\n", 743 | "n_hidden = ## YOUR CODE HERE\n", 744 | "n_layers = ## YOUR CODE HERE\n", 745 | "\n", 746 | "net = CharRNN(chars, n_hidden, n_layers)\n", 747 | "print(net)" 748 | ], 749 | "execution_count": null, 750 | "outputs": [] 751 | }, 752 | { 753 | "cell_type": "markdown", 754 | "metadata": { 755 | "id": "XHy6mECuceEm" 756 | }, 757 | "source": [ 758 | "Set training hyperparameters." 759 | ] 760 | }, 761 | { 762 | "cell_type": "code", 763 | "metadata": { 764 | "scrolled": true, 765 | "id": "ABqi9klKceEm" 766 | }, 767 | "source": [ 768 | "batch_size = ## YOUR CODE HERE\n", 769 | "seq_length = ## YOUR CODE HERE\n", 770 | "# start small if you are just testing initial behavior\n", 771 | "n_epochs = ## YOUR CODE HERE\n", 772 | "\n", 773 | "# train the model\n", 774 | "train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)" 775 | ], 776 | "execution_count": null, 777 | "outputs": [] 778 | }, 779 | { 780 | "cell_type": "markdown", 781 | "metadata": { 782 | "id": "n-Nqbv1CICR7" 783 | }, 784 | "source": [ 785 | "##Step 6. Making Predictions" 786 | ] 787 | }, 788 | { 789 | "cell_type": "markdown", 790 | "metadata": { 791 | "id": "K2sJhx5iceEm" 792 | }, 793 | "source": [ 794 | "Now that the model is trained, we want to predict the next character for the sequence of input symbols. We pass a character as input, and the network predicts the next character. Then we take that character, pass it as input, and get another predicted character, and so on.\n", 795 | "\n", 796 | "### Details of a `predict` function\n", 797 | "\n", 798 | "Our RNN's output comes from a fully connected layer and outputs the **distribution of the next character scores**. To actually get the next character, we use the softmax function, which gives us a **probability** distribution that we can then choose to predict the next character.\n", 799 | "\n", 800 | "**Top K sampling**\n", 801 | "\n", 802 | "Our predictions are based on a categorical distribution of probabilities for all possible characters. We can make the sampling process smarter by looking at only some of the most likely $K$ characters. This will prevent the network from giving us completely absurd characters, and will also allow some noise and randomness to be introduced into the selected text. This technique is called [top K](https://pytorch.org/docs/stable/generated/torch.topk.html#torch.topk) sampling." 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "metadata": { 808 | "id": "QEIRW_B2ceEm" 809 | }, 810 | "source": [ 811 | "def predict(net, char, h=None, top_k=None):\n", 812 | " ''' Given a character, predict the next character.\n", 813 | " Returns the predicted character and the hidden state.\n", 814 | " '''\n", 815 | " \n", 816 | " # tensor inputs\n", 817 | " x = np.array([[net.char2int[char]]])\n", 818 | " x = one_hot_encode(x, len(net.chars))\n", 819 | " inputs = torch.from_numpy(x)\n", 820 | " \n", 821 | " if(train_on_gpu):\n", 822 | " inputs = inputs.cuda()\n", 823 | " \n", 824 | " # detach hidden state from history\n", 825 | " h = tuple([each.data for each in h])\n", 826 | " # get the output of the model\n", 827 | " out, h = net(inputs, h)\n", 828 | "\n", 829 | " # get the character probabilities\n", 830 | " p = F.softmax(out, dim=1).data\n", 831 | " if(train_on_gpu):\n", 832 | " p = p.cpu() # move to cpu\n", 833 | " \n", 834 | " # get top characters\n", 835 | " if top_k is None:\n", 836 | " top_ch = np.arange(len(net.chars))\n", 837 | " else:\n", 838 | " p, top_ch = p.topk(top_k)\n", 839 | " top_ch = top_ch.numpy().squeeze()\n", 840 | " \n", 841 | " # select the likely next character with some element of randomness\n", 842 | " p = p.numpy().squeeze()\n", 843 | " char = np.random.choice(top_ch, p=p/p.sum())\n", 844 | " \n", 845 | " # return the encoded value of the predicted char and the hidden state\n", 846 | " return net.int2char[char], h" 847 | ], 848 | "execution_count": null, 849 | "outputs": [] 850 | }, 851 | { 852 | "cell_type": "markdown", 853 | "metadata": { 854 | "id": "OG38j3gQceEm" 855 | }, 856 | "source": [ 857 | "To generate the text, you need to feed the initial characters, let's call them `prime`." 858 | ] 859 | }, 860 | { 861 | "cell_type": "code", 862 | "metadata": { 863 | "id": "P9vpB5gRceEm" 864 | }, 865 | "source": [ 866 | "def sample(net, size, prime='The', top_k=None):\n", 867 | " \n", 868 | " if(train_on_gpu):\n", 869 | " net.cuda()\n", 870 | " else:\n", 871 | " net.cpu()\n", 872 | " \n", 873 | " net.eval() # eval mode\n", 874 | " \n", 875 | " # First off, run through the prime characters\n", 876 | " chars = [ch for ch in prime]\n", 877 | " h = net.init_hidden(1)\n", 878 | " for ch in prime:\n", 879 | " char, h = predict(net, ch, h, top_k=top_k)\n", 880 | "\n", 881 | " chars.append(char)\n", 882 | " \n", 883 | " # Now pass in the previous character and get a new one\n", 884 | " for ii in range(size):\n", 885 | " char, h = predict(net, chars[-1], h, top_k=top_k)\n", 886 | " chars.append(char)\n", 887 | "\n", 888 | " return ''.join(chars)" 889 | ], 890 | "execution_count": null, 891 | "outputs": [] 892 | }, 893 | { 894 | "cell_type": "code", 895 | "metadata": { 896 | "id": "BqmFA9eEceEm", 897 | "colab": { 898 | "base_uri": "https://localhost:8080/" 899 | }, 900 | "outputId": "df57ac2f-0ca4-4802-e2c8-e838dc08c846" 901 | }, 902 | "source": [ 903 | "print(sample(net, 1000, prime='Anna', top_k=5))" 904 | ], 905 | "execution_count": null, 906 | "outputs": [ 907 | { 908 | "output_type": "stream", 909 | "text": [ 910 | "Annatzen\n", 911 | "conscious is what there would\n", 912 | "be he wrote a bed of men without my daughter. I am going to spit a barcher, at his can on the sitting old man on to herself her sake to meen it is a chirch sight. It is now that the Emperor says by the same said.”\n", 913 | "\n", 914 | "“Why, all how so my dear!\n", 915 | "I will get about anything.”\n", 916 | "\n", 917 | "“Will your excellency,” said the countess a wold shouting with a\n", 918 | "feeling\n", 919 | "of\n", 920 | "angry and a stritten at the drawing room, and the count, the\n", 921 | "staff,\n", 922 | "help the study of happiness, and the parts around and state of\n", 923 | "way. She was naming the room with a\n", 924 | "smile of whites who was ball on her free his white hand and as had a position that she had bore taken in the depriments. She was not it was a single to spent this canden.\n", 925 | "\n", 926 | "“There wand and here in this and that this, I am gering about the service. If he was? How\n", 927 | "thinks we her side. A corrude and the midst of the country!” he said in the stall of this sige of the same ation that he could not look at the counte who had seen him turned on the crew and s\n" 928 | ], 929 | "name": "stdout" 930 | } 931 | ] 932 | }, 933 | { 934 | "cell_type": "code", 935 | "metadata": { 936 | "id": "TH1ag4h1ceEm" 937 | }, 938 | "source": [ 939 | "" 940 | ], 941 | "execution_count": null, 942 | "outputs": [] 943 | } 944 | ] 945 | } --------------------------------------------------------------------------------