├── .gitignore ├── AlexNet ├── README.md ├── alexnet.py ├── plots │ └── alexnet_metrics.png ├── train_alexnet.ipynb ├── trainer.py └── utils.py ├── LinearRegression ├── eval.ipynb └── linear_regression.py ├── LogisticRegression ├── eval.ipynb └── logistic_regression.py ├── README.md └── ResNet ├── README.md ├── plots ├── baseline_pytorch_resnet18_metrics.png └── resnet18_metrics.png ├── resnet18.py ├── train_resnet18.ipynb └── trainer.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /AlexNet/README.md: -------------------------------------------------------------------------------- 1 | # AlexNet Implementation 2 | A toy project to learn, implement, and train the famous AlexNet Architecture from scratch from the 2012 paper 3 | "[ImageNet Classification with Deep Convolutional Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf)" simply because we can. 4 | 5 | ## Some Personal Details on Model Training 6 | - Find a suitable batch size for training. Sticking to $128$ in accordance with AlexNet paper. 7 | - Ensure that data preprocessing transformations are appropriate and desired. 8 | 9 | # Results 10 | 11 | The final results after training for ~$30$ epochs are as follows: 12 | `Test Top-1 accuracy: 31.754350662231445 % | Top-5 accuracy: 58.7618670886076` 13 | 14 | ![alexnet results](plots/alexnet_metrics.png) 15 | 16 | Considering that the [original AlexNet paper](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf) reports top-1 and top-5 error rate scores of 67.4% and 40.9% 17 | (Top-1 and top-5 accuracies are inversely equivalent being 32.6% and 59.1% respectively) on ImageNet, we can say that the implementation is sufficiently accurate for the Tiny ImageNet dataset. 18 | 19 | 20 | # Acknowledgements 21 | - [ImageNet Classification with Deep Convolutional Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf) 22 | - [dansuh17/alexnet-pytorch](https://github.com/dansuh17/alexnet-pytorch) 23 | - [Writing AlexNet from Scratch in PyTorch](https://blog.paperspace.com/alexnet-pytorch/#data-loading) 24 | - https://pytorch.org/hub/pytorch_vision_alexnet/ 25 | - [Difference between AlexNet, VGGNet, ResNet, and Inception](https://towardsdatascience.com/the-w3h-of-alexnet-vggnet-resnet-and-inception-7baaaecccc96) -------------------------------------------------------------------------------- /AlexNet/alexnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementing the famous AlexNet Architecture from scratch from the 2012 paper 3 | "ImageNet Classification with Deep Convolutional Neural Networks" 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | class AlexNet(nn.Module): 11 | 12 | def __init__(self, n_classes: int = 1000) -> None: 13 | super().__init__() 14 | 15 | self.features = nn.Sequential( 16 | nn.Conv2d(3, 96, kernel_size=11, stride=4), 17 | nn.ReLU(), 18 | nn.LocalResponseNorm(size=5, alpha=10e-4, beta=0.75, k=2), 19 | nn.MaxPool2d(kernel_size=3, stride=2), 20 | nn.Conv2d(96, 256, kernel_size=5, padding=2), 21 | nn.ReLU(), 22 | nn.LocalResponseNorm(size=5, alpha=10e-4, beta=0.75, k=2), 23 | nn.MaxPool2d(kernel_size=3, stride=2), 24 | nn.Conv2d(256, 384, kernel_size=3, padding=1), 25 | nn.ReLU(), 26 | nn.Conv2d(384, 384, kernel_size=3, padding=1), 27 | nn.ReLU(), 28 | nn.Conv2d(384, 256, kernel_size=3, padding=1), 29 | nn.ReLU(), 30 | nn.MaxPool2d(kernel_size=3, stride=2), 31 | ) 32 | 33 | self.classifier = nn.Sequential( 34 | nn.Dropout(0.5), 35 | nn.Linear(in_features=6 * 6 * 256, out_features=4096), 36 | nn.ReLU(), 37 | nn.Dropout(0.5), 38 | nn.Linear(in_features=4096, out_features=4096), 39 | nn.ReLU(), 40 | nn.Linear(in_features=4096, out_features=n_classes), 41 | ) 42 | 43 | def forward(self, x: torch.Tensor) -> torch.Tensor: 44 | 45 | x = self.features(x) # [bs, 256, 6, 6] 46 | x = x.reshape(x.size(0), -1) # reshape to [bs, 6*6*256] = [bs, 9216] 47 | o = self.classifier(x) 48 | 49 | return o 50 | 51 | 52 | # test basic forward pass of AlexNet 53 | if __name__ == "__main__": 54 | alexnet = AlexNet() 55 | n_params = sum(p.numel() for p in alexnet.parameters()) 56 | n_trainable_params = sum(p.numel() for p in alexnet.parameters() if p.requires_grad) 57 | 58 | print(f"Number of parameters: {n_params}") 59 | print(f"Number of trainable parameters: {n_trainable_params}") 60 | 61 | # paper mentions 224 x 224 but seems to be a mistake? 62 | dummy_image = torch.randn(1, 3, 227, 227) 63 | out = alexnet(dummy_image) 64 | 65 | assert out.shape == (1, 1000), f"Expected shape: (1, 1000) | Actual shape: {out.shape}" 66 | 67 | print(f"\nModel Summary:\n========\n{alexnet}") 68 | -------------------------------------------------------------------------------- /AlexNet/plots/alexnet_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aandyw/StuffFromScratch/74578e8ccfd4bc40eaa06d9082e48ee6dfb85fb8/AlexNet/plots/alexnet_metrics.png -------------------------------------------------------------------------------- /AlexNet/trainer.py: -------------------------------------------------------------------------------- 1 | """Trainer class for AlexNet""" 2 | 3 | import sys, os 4 | import datetime 5 | import matplotlib.pyplot as plt 6 | from tqdm import tqdm 7 | import logging 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | from torch.utils.data import DataLoader 13 | 14 | 15 | class Trainer: 16 | def __init__( 17 | self, 18 | model: nn.Module, 19 | model_name: str = "alexnet", 20 | batch_size: int = 256, 21 | learning_rate: float = 0.01, 22 | weight_decay: float = 0.0005, 23 | momentum: float = 0.9, 24 | num_epochs: int = 30, 25 | check_val_every_n_epoch: int = 1, 26 | device: str = "cpu", 27 | checkpoints_dir: str = "checkpoints", 28 | ) -> None: 29 | """Trainer object to facilitate training and evaluation""" 30 | 31 | self.model = model 32 | self.model_name = model_name 33 | 34 | # training configurations 35 | self.batch_size = batch_size # does nothing; mainly for viz 36 | self.learning_rate = learning_rate 37 | self.momentum = momentum 38 | self.weight_decay = weight_decay 39 | self.num_epochs = num_epochs 40 | self.check_val_every_n_epoch = check_val_every_n_epoch 41 | self.device = device 42 | self.model.to(self.device) 43 | 44 | # set loss function and optimizer 45 | self.criterion = nn.CrossEntropyLoss() 46 | 47 | # SGD used by original alexnet paper 48 | self.optimizer = optim.SGD( 49 | self.model.parameters(), lr=self.learning_rate, momentum=self.momentum, weight_decay=self.weight_decay 50 | ) 51 | 52 | # Decays lr of each parameter group by 0.1 every step_size epochs 53 | self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=7, gamma=0.1) 54 | 55 | # model metrics 56 | self.train_losses = [] 57 | self.train_accuracies = [] 58 | self.train_top_k_accuracies = [] 59 | self.val_losses = [] 60 | self.val_accuracies = [] 61 | self.val_top_k_accuracies = [] 62 | 63 | # logging info 64 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s | %(message)s") 65 | self.logger = logging.getLogger() 66 | 67 | # create checkpoints directory 68 | self.checkpoints_dir = checkpoints_dir 69 | os.makedirs(self.checkpoints_dir, exist_ok=True) # create plots dir 70 | 71 | def train(self, train_dataloader: DataLoader, val_dataloader: DataLoader) -> None: 72 | """Train the AlexNet Model""" 73 | 74 | for epoch in range(self.num_epochs): 75 | self.model.train() # set model to train 76 | 77 | # loss tracking metrics 78 | running_loss = 0.0 79 | running_vloss = 0.0 80 | batch_loss = 0.0 81 | running_acc = 0.0 82 | running_top_k_acc = 0.0 83 | 84 | pbar = tqdm(enumerate(train_dataloader), total=len(train_dataloader)) 85 | 86 | for i, (inputs, labels) in pbar: 87 | inputs, labels = inputs.to(self.device), labels.to(self.device) 88 | 89 | # zero gradients for every batch 90 | self.optimizer.zero_grad() 91 | 92 | # compute predictions + loss 93 | outputs = self.model(inputs) # predicted class 94 | loss = self.criterion(outputs, labels) 95 | 96 | # compute training accuracy 97 | running_acc += self.__accuracy(outputs, labels) 98 | running_top_k_acc += self._top_k_accuracy(outputs, labels, k=5) 99 | 100 | # perform backpropagation 101 | loss.backward() # compute gradients 102 | self.optimizer.step() # update model parameters 103 | 104 | # gather data and report 105 | running_loss += loss.item() 106 | batch_loss += loss.item() 107 | if i % 10 == 0: 108 | batch_loss = batch_loss / 10 # loss per batch 109 | pbar.set_postfix({"loss": round(batch_loss, 5)}) 110 | batch_loss = 0.0 111 | 112 | self.scheduler.step() 113 | 114 | train_accuracy = running_acc / len(train_dataloader) 115 | train_top_k_accuracy = running_top_k_acc / len(train_dataloader) 116 | avg_loss = running_loss / len(train_dataloader) 117 | 118 | prev_loss = self.train_losses[-1][0] if self.train_losses else float("inf") 119 | if avg_loss <= prev_loss: 120 | self.save(epoch + 1, avg_loss) 121 | 122 | self.train_accuracies.append((epoch, train_accuracy.cpu())) 123 | self.train_top_k_accuracies.append((epoch, train_top_k_accuracy)) 124 | self.train_losses.append((epoch, avg_loss)) 125 | 126 | if epoch % self.check_val_every_n_epoch == 0: 127 | self.model.eval() # set model to evaluation 128 | with torch.no_grad(): 129 | running_val_acc = 0 130 | running_val_top_k_acc = 0 131 | for inputs, labels in val_dataloader: 132 | inputs, labels = inputs.to(self.device), labels.to(self.device) 133 | 134 | outputs = self.model(inputs) 135 | loss = self.criterion(outputs, labels) 136 | 137 | running_vloss += loss.item() 138 | 139 | # compute validtion accuracy 140 | running_val_acc += self.__accuracy(outputs, labels) 141 | running_val_top_k_acc += self._top_k_accuracy(outputs, labels, k=5) 142 | 143 | val_top_k_accuracy = running_val_top_k_acc / len(val_dataloader) 144 | self.val_top_k_accuracies.append((epoch, val_top_k_accuracy)) 145 | 146 | val_accuracy = running_val_acc / len(val_dataloader) 147 | self.val_accuracies.append((epoch, val_accuracy.cpu())) 148 | 149 | avg_vloss = running_vloss / len(val_dataloader) 150 | self.val_losses.append((epoch, avg_vloss)) 151 | 152 | self.logger.info( 153 | f"[EPOCH {epoch + 1}] LOSS : train={avg_loss} val={avg_vloss} | ACCURACY (Top-1) : train={train_accuracy} val={val_accuracy} | TOP-5 : train={train_top_k_accuracy} val={val_top_k_accuracy}" 154 | ) 155 | 156 | def test(self, test_dataloader: DataLoader) -> None: 157 | """Test the AlexNet Model""" 158 | 159 | correct = 0 160 | top_5 = 0 161 | 162 | self.model.eval() 163 | with torch.no_grad(): 164 | for inputs, labels in test_dataloader: 165 | inputs, labels = inputs.to(self.device), labels.to(self.device) 166 | outputs = self.model(inputs) 167 | correct += self.__accuracy(outputs, labels) 168 | top_5 += self._top_k_accuracy(outputs, labels, k=5) 169 | 170 | self.logger.info( 171 | f"Test accuracy: {(correct / len(test_dataloader)) * 100} % | Top-5 accuracy: {(top_5 / len(test_dataloader)) * 100}" 172 | ) 173 | 174 | def plot_metrics(self) -> None: 175 | """Create plots for model metrics""" 176 | 177 | os.makedirs("plots", exist_ok=True) # create plots dir 178 | 179 | t_iters, t_loss = list(zip(*self.train_losses)) 180 | _, v_loss = list(zip(*self.val_losses)) 181 | _, acc = list(zip(*self.train_accuracies)) 182 | _, v_acc = list(zip(*self.val_accuracies)) 183 | 184 | fig, ax = plt.subplots(1, 2, figsize=(12, 5)) 185 | fig.suptitle(f"Model: [{self.model_name}]") 186 | 187 | ax[0].set_title(f"Loss Curve (batch_size={self.batch_size}, lr={self.learning_rate}), momentum={self.momentum}") 188 | ax[0].plot(t_iters, t_loss) 189 | ax[0].plot(t_iters, v_loss) 190 | ax[0].set_xlabel("Epochs") 191 | ax[0].set_ylabel("Loss") 192 | ax[0].legend(["Train", "Validation"]) 193 | ax[0].set_xticks(t_iters) 194 | 195 | ax[1].set_title( 196 | f"Accuracy Curve (batch_size={self.batch_size}, lr={self.learning_rate}), momentum={self.momentum}" 197 | ) 198 | ax[1].plot(t_iters, acc) 199 | ax[1].plot(t_iters, v_acc) 200 | ax[1].set_xlabel("Epochs") 201 | ax[1].set_ylabel("Accuracy") 202 | ax[1].legend(["Train", "Validation"]) 203 | ax[1].set_xticks(t_iters) 204 | 205 | fig.savefig(f"plots/{self.model_name}_metrics.png") 206 | plt.show() 207 | 208 | def _top_k_accuracy(self, outputs: torch.Tensor, labels: torch.Tensor, k: int) -> float: 209 | """Top-K accuracy. Top-1 is the equivalent to regular accuracy.""" 210 | 211 | values, indices = torch.topk(outputs, k) 212 | topk_correct = indices.eq(labels.view(-1, 1).expand_as(indices)) 213 | accuracy = topk_correct.sum().item() / labels.size(0) 214 | 215 | return accuracy 216 | 217 | def __accuracy(self, outputs: torch.Tensor, labels: torch.Tensor) -> float: 218 | """Compute accuracy given outputs as logits""" 219 | 220 | preds = torch.argmax(outputs, dim=1) 221 | accuracy = torch.sum(preds == labels) / len(preds) 222 | 223 | return accuracy 224 | 225 | def save(self, epoch: int, loss: float) -> None: 226 | """Save model""" 227 | 228 | time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 229 | checkpoint_path = os.path.join(self.checkpoints_dir, f"{self.model_name}_e{epoch}_{time}.pt") 230 | state = { 231 | "epoch": epoch, 232 | "model": self.model.state_dict(), 233 | "optimizer": self.optimizer.state_dict(), 234 | "loss": loss, 235 | } 236 | torch.save(state, checkpoint_path) 237 | 238 | def load(self, checkpoint_name: str) -> None: 239 | """Load model""" 240 | 241 | checkpoint_path = os.path.join(self.checkpoints_dir, checkpoint_name) 242 | checkpoint = torch.load(checkpoint_path) 243 | self.model.load_state_dict(checkpoint["model"]) 244 | self.optimizer.load_state_dict(checkpoint["optimizer"]) 245 | -------------------------------------------------------------------------------- /AlexNet/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | 6 | def get_device(): 7 | """Get available device""" 8 | 9 | if torch.cuda.is_available(): 10 | print("Using CUDA...") 11 | return torch.device("cuda") 12 | elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): 13 | print("Using MPS...") 14 | return torch.device("mps") 15 | else: 16 | print("Using CPU...") 17 | return torch.device("cpu") 18 | 19 | 20 | def imshow(img): 21 | """Display image""" 22 | 23 | img = img / 2 + 0.5 # unnormalize 24 | npimg = img.numpy() 25 | plt.imshow(np.transpose(npimg, (1, 2, 0))) 26 | plt.show() 27 | -------------------------------------------------------------------------------- /LinearRegression/linear_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | 4 | 5 | class LinearRegression: 6 | """Linear Regression with Least Squared Error""" 7 | 8 | def __init__(self, epochs=1000, learning_rate=1e-2): 9 | self.epochs = epochs 10 | self.learning_rate = learning_rate 11 | self.W = None # weights 12 | self.b = None # bias 13 | self.losses = [] 14 | 15 | def __compute_loss(self, y, y_pred): 16 | # Mean Squared Error (MSE) is our cost function 17 | 18 | least_squares = (y_pred - y) ** 2 19 | return np.mean(least_squares) 20 | 21 | def fit(self, X, y): 22 | N, features = X.shape 23 | self.W = np.random.randn(features) 24 | self.b = 0 25 | 26 | for epoch in range(self.epochs): 27 | y_pred = self.predict(X) 28 | loss = self.__compute_loss(y, y_pred) # MSE loss 29 | 30 | ### compute gradients ### 31 | residuals = y_pred - y 32 | grad_W = (2 / N) * np.matmul(X.T, residuals) 33 | grad_b = (2 / N) * np.sum(residuals) 34 | 35 | ### parameter updates ### 36 | self.W -= self.learning_rate * grad_W 37 | self.b -= self.learning_rate * grad_b 38 | self.losses.append(loss) 39 | 40 | if (epoch + 1) % 1000 == 0: 41 | print(f"[Epoch {epoch + 1}/{self.epochs}] Loss: {round(loss, 5)}") 42 | 43 | def predict(self, X): 44 | return np.matmul(X, self.W) + self.b 45 | -------------------------------------------------------------------------------- /LogisticRegression/eval.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Baseline Evaluation with Sklearn" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 15, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "The autoreload extension is already loaded. To reload it, use:\n", 20 | " %reload_ext autoreload\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "%load_ext autoreload\n", 26 | "%autoreload 2\n", 27 | "import random\n", 28 | "import numpy as np\n", 29 | "import pandas as pd\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "from sklearn.model_selection import train_test_split\n", 32 | "from sklearn.datasets import load_breast_cancer\n", 33 | "from sklearn.linear_model import LogisticRegression\n", 34 | "from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 16, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "def sklearn_to_df(data_loader):\n", 44 | " X_data = data_loader.data\n", 45 | " X_columns = data_loader.feature_names\n", 46 | " X = pd.DataFrame(X_data, columns=X_columns)\n", 47 | "\n", 48 | " y_data = data_loader.target\n", 49 | " label_names = data_loader.target_names\n", 50 | " y = pd.Series(y_data, name='target')\n", 51 | "\n", 52 | " return X, y, label_names\n", 53 | "\n", 54 | "X, y, label_names = sklearn_to_df(load_breast_cancer())\n", 55 | "\n", 56 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 17, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stderr", 66 | "output_type": "stream", 67 | "text": [ 68 | "/Users/andy/miniconda3/envs/40.319/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):\n", 69 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", 70 | "\n", 71 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n", 72 | " https://scikit-learn.org/stable/modules/preprocessing.html\n", 73 | "Please also refer to the documentation for alternative solver options:\n", 74 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", 75 | " n_iter_i = _check_optimize_result(\n" 76 | ] 77 | }, 78 | { 79 | "data": { 80 | "text/html": [ 81 | "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 82 | ], 83 | "text/plain": [ 84 | "LogisticRegression()" 85 | ] 86 | }, 87 | "execution_count": 17, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "baseline_model = LogisticRegression(max_iter=100)\n", 94 | "baseline_model.fit(X_train, y_train)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 18, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "def evaluate(model):\n", 104 | " y_pred = model.predict(X_test)\n", 105 | "\n", 106 | " accuracy = accuracy_score(y_test, y_pred)\n", 107 | " conf_matrix = confusion_matrix(y_test, y_pred)\n", 108 | "\n", 109 | " print(f'Accuracy: {accuracy*100:.2f}%')\n", 110 | "\n", 111 | " disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=label_names)\n", 112 | " disp.plot(cmap=plt.cm.Blues) # You can change the color map if you like\n", 113 | " plt.title('Confusion Matrix')\n", 114 | " plt.show()\n", 115 | "\n", 116 | "def plot_losses(losses):\n", 117 | " plt.plot(losses)\n", 118 | " plt.title('Training Loss')\n", 119 | " plt.xlabel('Epoch')\n", 120 | " plt.ylabel('Loss')\n", 121 | " plt.show()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 19, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "Accuracy: 95.61%\n" 134 | ] 135 | }, 136 | { 137 | "data": { 138 | "image/png": "", 139 | "text/plain": [ 140 | "
" 141 | ] 142 | }, 143 | "metadata": {}, 144 | "output_type": "display_data" 145 | } 146 | ], 147 | "source": [ 148 | "evaluate(baseline_model)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "# Evaluating My Implementation" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 20, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "from logistic_regression import LogisticRegression as MyLogisticRegression" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 40, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "[Epoch 100/1000] Loss: 12.93496\n", 177 | "[Epoch 200/1000] Loss: 3.32343\n", 178 | "[Epoch 300/1000] Loss: 3.37494\n", 179 | "[Epoch 400/1000] Loss: 10.7662\n", 180 | "[Epoch 500/1000] Loss: 2.17328\n", 181 | "[Epoch 600/1000] Loss: 2.27783\n", 182 | "[Epoch 700/1000] Loss: 2.49438\n", 183 | "[Epoch 800/1000] Loss: 2.68703\n", 184 | "[Epoch 900/1000] Loss: 2.40905\n", 185 | "[Epoch 1000/1000] Loss: 2.41401\n" 186 | ] 187 | } 188 | ], 189 | "source": [ 190 | "# set seed for reproducibility\n", 191 | "np.random.seed(0)\n", 192 | "\n", 193 | "my_model = MyLogisticRegression()\n", 194 | "my_model.fit(X_train.values, y_train)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 41, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "Accuracy: 94.74%\n" 207 | ] 208 | }, 209 | { 210 | "data": { 211 | "image/png": "", 212 | "text/plain": [ 213 | "
" 214 | ] 215 | }, 216 | "metadata": {}, 217 | "output_type": "display_data" 218 | }, 219 | { 220 | "data": { 221 | "image/png": "", 222 | "text/plain": [ 223 | "
" 224 | ] 225 | }, 226 | "metadata": {}, 227 | "output_type": "display_data" 228 | } 229 | ], 230 | "source": [ 231 | "evaluate(my_model)\n", 232 | "plot_losses(my_model.losses)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "Our implementation is a success! However, notice that the LogisticRegression implemented by Sklearn converges much faster than our implementation. This is because it is using a solver called Limited-memory Broyden–Fletcher–Goldfarb–Shanno algorithm (LBFGS) which converges much faster than Gradient Descent (GD) which is what our implementation is using." 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "# Ackowledgements" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "A few resources that helped out:\n", 254 | "- https://developer.ibm.com/articles/implementing-logistic-regression-from-scratch-in-python/\n", 255 | "- https://medium.com/@koushikkushal95/logistic-regression-from-scratch-dfb8527a4226" 256 | ] 257 | } 258 | ], 259 | "metadata": { 260 | "kernelspec": { 261 | "display_name": "40.319", 262 | "language": "python", 263 | "name": "python3" 264 | }, 265 | "language_info": { 266 | "codemirror_mode": { 267 | "name": "ipython", 268 | "version": 3 269 | }, 270 | "file_extension": ".py", 271 | "mimetype": "text/x-python", 272 | "name": "python", 273 | "nbconvert_exporter": "python", 274 | "pygments_lexer": "ipython3", 275 | "version": "3.10.8" 276 | } 277 | }, 278 | "nbformat": 4, 279 | "nbformat_minor": 2 280 | } 281 | -------------------------------------------------------------------------------- /LogisticRegression/logistic_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | 4 | 5 | class LogisticRegression: 6 | 7 | def __init__(self, epochs=1000, learning_rate=1e-2, threshold=0.5): 8 | self.epochs = epochs 9 | self.learning_rate = learning_rate 10 | self.threshold = threshold 11 | self.W = None # weights 12 | self.b = None # bias 13 | self.losses = [] 14 | 15 | def __sigmoid(self, z): 16 | # sigmoid activation function 17 | 18 | # return 1 / (1 + np.exp(-z)) 19 | return scipy.special.expit(z) # handles np.exp(.) overflow 20 | 21 | def __compute_loss(self, y, y_pred, epsilon=1e-9): 22 | # binary cross-entropy loss (BCE) 23 | 24 | # epsilon added to prevent log(0) 25 | return -np.mean(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1 - y_pred + epsilon)) 26 | 27 | def fit(self, X, y): 28 | N, features = X.shape 29 | self.W = np.random.randn(features) 30 | self.b = 0 31 | 32 | for epoch in range(self.epochs): 33 | z = np.matmul(X, self.W) + self.b 34 | y_pred = self.__sigmoid(z) 35 | loss = self.__compute_loss(y, y_pred) 36 | 37 | ### compute gradients ### 38 | residuals = y_pred - y 39 | grad_W = (1 / N) * np.matmul(X.T, residuals) 40 | grad_b = (1 / N) * np.sum(residuals) 41 | 42 | ### parameter updates ### 43 | self.W -= self.learning_rate * grad_W 44 | self.b -= self.learning_rate * grad_b 45 | self.losses.append(loss) 46 | 47 | if (epoch + 1) % 100 == 0: 48 | print(f"[Epoch {epoch + 1}/{self.epochs}] Loss: {round(loss, 5)}") 49 | 50 | def predict(self, X): 51 | z = np.matmul(X, self.W) + self.b 52 | y_pred = self.__sigmoid(z) 53 | y_pred = np.where(y_pred >= self.threshold, 1, 0) 54 | return y_pred 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ツ 2 | 3 | # Implementations 4 | 5 | ## Traditional 6 | |Implementation | Dataset | Notebooks | 7 | | --- | --- | --- | 8 | | Simple Linear Regression | Dummy + Diabetes Dataset (regression) | [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](LinearRegression/eval.ipynb) | 9 | | Logistic Regression | Breast Cancer Wisconsin Dataset | [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](LogisticRegression/eval.ipynb) | 10 | 11 | ## Deep Learning 12 | |Implementation | Dataset | Notebooks | 13 | | --- | --- | --- | 14 | | AlexNet | Tiny ImageNet | [![PyTorch](https://img.shields.io/badge/PyTorch-%23EE4C2C.svg?style=for-the-badge&logo=PyTorch&logoColor=white)](AlexNet/train_alexnet.ipynb) | 15 | | ResNet-18 | CIFAR-10 | [![PyTorch](https://img.shields.io/badge/PyTorch-%23EE4C2C.svg?style=for-the-badge&logo=PyTorch&logoColor=white)](ResNet/train_resnet18.ipynb) | 16 | 17 | ## Computer Vision 18 | |Implementation | Dataset | Notebooks | 19 | | --- | --- | --- | 20 | | U-Net Architecture | SOME RANDOM SEGMENTATION DATASET | [![PyTorch](https://img.shields.io/badge/PyTorch-%23EE4C2C.svg?style=for-the-badge&logo=PyTorch&logoColor=white)](...) | 21 | 22 | 23 | --- 24 | 25 | # Roadmap 26 | - [x] Linear Regression 27 | - [x] Logistic Regression 28 | - [ ] Autoencoder 29 | - [ ] Variational Autoencoder (VAE) 30 | - [ ] Generative Adversarial Network (GAN) 31 | - [ ] Graph Neural Network 32 | - [x] ResNet18 + Residual Layars 33 | - [ ] U-Net Architecture [WIP] 34 | - [x] AlexNet 35 | 36 | ### Sequence Models 37 | - [ ] Recurrent Neural Network (RNN) 38 | - [ ] Long Short-Term Memory (LSTM) 39 | - [ ] Gated Recurrent Unit (GRU) 40 | 41 | 42 | ### Misc 43 | - [ ] Ensembling + XGBoost 44 | 45 | 46 | # Acknowledgements 47 | This readme layout inspired by [rasbt/deeplearning-models](https://github.com/rasbt/deeplearning-models) 48 | -------------------------------------------------------------------------------- /ResNet/README.md: -------------------------------------------------------------------------------- 1 | # ResNet-18 Implementation 2 | A toy project to learn, implement, and train our own ResNet-18 on CIFAR-10. 3 | 4 | 5 | # Results 6 | 7 | ### Existing PyTorch ResNet-18 Model (Baseline) 8 | ![baseline pytorch resnet18](plots/baseline_pytorch_resnet18_metrics.png) 9 | 10 | 11 | ### Our ResNet-18 Implementation 12 | We load the pretrained weights from PyTorch's `ResNet18_Weights.IMAGENET1K_V1`. 13 | 14 | 15 | ![ours resnet18](plots/resnet18_metrics.png) 16 | 17 | 18 | # Acknowledgements 19 | - [PyTorch CIFAR10 Training Tutorial](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html) 20 | - [A Detailed Introduction to ResNet and Its Implementation in PyTorch](https://medium.com/@freshtechyy/a-detailed-introduction-to-resnet-and-its-implementation-in-pytorch-744b13c8074a) by Huili Yu 21 | - [Let's reproduce GPT-2 (124M)](https://www.youtube.com/watch?v=l8pRSuU81PU) by Andrej Karpathy 22 | - [Helpful conventions for PyTorch model building](https://github.com/FrancescoSaverioZuppichini/Pytorch-how-and-when-to-use-Module-Sequential-ModuleList-and-ModuleDict/blob/master/README.md) by FrancescoSaverioZuppichini 23 | 24 | -------------------------------------------------------------------------------- /ResNet/plots/baseline_pytorch_resnet18_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aandyw/StuffFromScratch/74578e8ccfd4bc40eaa06d9082e48ee6dfb85fb8/ResNet/plots/baseline_pytorch_resnet18_metrics.png -------------------------------------------------------------------------------- /ResNet/plots/resnet18_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aandyw/StuffFromScratch/74578e8ccfd4bc40eaa06d9082e48ee6dfb85fb8/ResNet/plots/resnet18_metrics.png -------------------------------------------------------------------------------- /ResNet/resnet18.py: -------------------------------------------------------------------------------- 1 | """ 2 | Building an 18-layer residual network (ResNet-18) from scratch. 3 | From the paper "Deep Residual Learning for Image Recognition" (https://arxiv.org/abs/1512.03385) 4 | """ 5 | 6 | import torchvision 7 | import torch 8 | import torch.nn as nn 9 | 10 | 11 | class BasicBlock(nn.Module): 12 | """The Residual Block""" 13 | 14 | def __init__(self, in_channels: int, out_channels: int, stride: int = 1, downsample: bool = False) -> None: 15 | """ 16 | Create the Residual Block 17 | 18 | Args: 19 | in_channels (int): number of input channels 20 | out_channels (int): number of output channels 21 | stride (int): stride of first 3x3 convolution layer 22 | downsample (bool): whether to adjust for spatial dimensions due to downsampling via stride=2 23 | """ 24 | super().__init__() 25 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) 26 | self.bn1 = nn.BatchNorm2d(out_channels) 27 | self.relu = nn.ReLU(inplace=True) 28 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False) 29 | self.bn2 = nn.BatchNorm2d(out_channels) 30 | 31 | # For downsampling, the skip connection will pass through the 1x1 conv layer with stride of 2 to 32 | # match the spatial dimension of the downsampled feature maps and channels for the add operation. 33 | # 34 | # More specifically, the 'downsample block' is used for layer 2, 3, 4 of ResNet18 where the first conv2d 35 | # layer of the BasicBlock uses a stride of 2 instead of 1 to downsample feature maps for a larger 36 | # receptive field. 37 | # This is why we need to carefully craft our 'downsample block' to make sure spatial dimensions are 38 | # not disrupted when we add the skip connection in these residual blocks. 39 | self.downsample = None 40 | if downsample: 41 | self.downsample = nn.Sequential( 42 | nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), 43 | nn.BatchNorm2d(out_channels), 44 | ) 45 | 46 | def forward(self, x: torch.Tensor) -> torch.Tensor: 47 | identity = x.clone() 48 | x = self.relu(self.bn1(self.conv1(x))) 49 | x = self.bn2(self.conv2(x)) 50 | 51 | if self.downsample: # if layer not None 52 | identity = self.downsample(identity) 53 | 54 | x += identity 55 | o = self.relu(x) 56 | 57 | return o 58 | 59 | 60 | class ResNet18(nn.Module): 61 | """The ResNet-18 Model""" 62 | 63 | def __init__(self, n_classes: int = 10) -> None: 64 | """ 65 | Create the ResNet-18 Model 66 | 67 | Args: 68 | n_classes (int, optional): The number of output classes we predict for. Defaults to 10. 69 | """ 70 | super().__init__() 71 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=(3, 3), bias=False) 72 | self.bn1 = nn.BatchNorm2d(64) 73 | self.relu = nn.ReLU(inplace=True) 74 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 75 | 76 | self.layer1 = nn.Sequential( 77 | BasicBlock(64, 64), 78 | BasicBlock(64, 64), 79 | ) 80 | self.layer2 = nn.Sequential( 81 | BasicBlock(64, 128, stride=2, downsample=True), 82 | BasicBlock(128, 128), 83 | ) 84 | self.layer3 = nn.Sequential( 85 | BasicBlock(128, 256, stride=2, downsample=True), 86 | BasicBlock(256, 256), 87 | ) 88 | self.layer4 = nn.Sequential( 89 | BasicBlock(256, 512, stride=2, downsample=True), 90 | BasicBlock(512, 512), 91 | ) 92 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) 93 | 94 | # our fully connected layer will be different to accomodate for CIFAR-10 95 | self.fc = nn.Linear(in_features=512, out_features=n_classes) 96 | 97 | def forward(self, x: torch.Tensor) -> torch.Tensor: 98 | x = self.maxpool(self.relu(self.bn1(self.conv1(x)))) 99 | 100 | x = self.layer1(x) 101 | x = self.layer2(x) 102 | x = self.layer3(x) 103 | x = self.layer4(x) 104 | x = self.avgpool(x) # [bs, 512, 1, 1] 105 | 106 | x = torch.squeeze(x) # reshape to [bs, 512] 107 | o = self.fc(x) 108 | 109 | return o 110 | 111 | @classmethod 112 | def from_pretrained(cls, model_type: str) -> nn.Module: 113 | """ 114 | Load pretrained PyTorch ResNet-18 weights into our ResNet-18 implementation 115 | 116 | Inspired by Andrej Karpathy from 'Let's reproduce GPT-2 (124M)' 117 | (https://www.youtube.com/watch?v=l8pRSuU81PU) 118 | """ 119 | 120 | assert model_type in {"resnet18"}, "only supports resnet18" 121 | print("loading weights from pytorch pretrained resnet18") 122 | 123 | # our model 124 | model = ResNet18(n_classes=10) 125 | r18 = model.state_dict() 126 | r18_keys = r18.keys() 127 | 128 | # pretrained pytorch resnet18 model 129 | p_model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1) 130 | p_r18 = p_model.state_dict() 131 | p_r18_keys = p_r18.keys() 132 | 133 | assert len(p_r18_keys) == len(r18_keys), f"mistmatched keys: {len(p_r18_keys)} != {len(r18_keys)}" 134 | # load weights from pretrained 135 | for k in p_r18_keys: 136 | if k.startswith("fc"): # skip fc layer, we add our own for CIFAR-10 137 | continue 138 | 139 | assert p_r18[k].shape == r18[k].shape 140 | with torch.no_grad(): 141 | r18[k].copy_(p_r18[k]) 142 | 143 | return model 144 | -------------------------------------------------------------------------------- /ResNet/trainer.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | import matplotlib.pyplot as plt 3 | from tqdm import tqdm 4 | import logging 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from torch.utils.data import DataLoader 10 | 11 | 12 | class Trainer: 13 | def __init__( 14 | self, 15 | model: nn.Module, 16 | model_name: str = "resnet18", 17 | batch_size: int = 256, 18 | learning_rate: float = 0.01, 19 | num_epochs: int = 30, 20 | check_val_every_n_epoch: int = 1, 21 | device: str = "cpu", 22 | ) -> None: 23 | """Trainer object to facilitate training and evaluation""" 24 | 25 | self.model = model 26 | self.model_name = model_name 27 | 28 | # training configurations 29 | self.batch_size = batch_size # does nothing; mainly for viz 30 | self.learning_rate = learning_rate 31 | self.num_epochs = num_epochs 32 | self.check_val_every_n_epoch = check_val_every_n_epoch 33 | self.device = device 34 | self.model.to(self.device) 35 | 36 | # set loss function and optimizer 37 | self.criterion = nn.CrossEntropyLoss() 38 | 39 | # SGD used by original paper "Deep Residual Learning for Image Recognition" 40 | self.optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.9) 41 | self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=7, gamma=0.1) 42 | 43 | # model metrics 44 | self.train_losses = [] 45 | self.train_accuracies = [] 46 | self.val_losses = [] 47 | self.val_accuracies = [] 48 | 49 | # logging info 50 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s | %(message)s") 51 | self.logger = logging.getLogger() 52 | 53 | def train(self, train_dataloader: DataLoader, val_dataloader: DataLoader) -> None: 54 | """Train the ResNet-18 Model""" 55 | 56 | for epoch in range(self.num_epochs): 57 | self.model.train() # set model to train 58 | 59 | # loss tracking metrics 60 | running_loss = 0.0 61 | running_vloss = 0.0 62 | batch_loss = 0.0 63 | running_acc = 0.0 64 | 65 | pbar = tqdm(enumerate(train_dataloader), total=len(train_dataloader)) 66 | 67 | for i, (inputs, labels) in pbar: 68 | inputs, labels = inputs.to(self.device), labels.to(self.device) 69 | 70 | # zero gradients for every batch 71 | self.optimizer.zero_grad() 72 | 73 | # compute predictions + loss 74 | outputs = self.model(inputs) # predicted class 75 | loss = self.criterion(outputs, labels) 76 | 77 | # compute training accuracy 78 | running_acc += self.__accuracy(outputs, labels) 79 | 80 | # perform backpropagation 81 | loss.backward() # compute gradients 82 | self.optimizer.step() # update model parameters 83 | 84 | # gather data and report 85 | running_loss += loss.item() 86 | batch_loss += loss.item() 87 | if i % 10 == 0: 88 | batch_loss = batch_loss / 10 # loss per batch 89 | pbar.set_postfix({"loss": round(batch_loss, 5)}) 90 | batch_loss = 0.0 91 | 92 | self.scheduler.step() 93 | 94 | train_accuracy = running_acc / len(train_dataloader) 95 | self.train_accuracies.append((epoch, train_accuracy.cpu())) 96 | 97 | avg_loss = running_loss / len(train_dataloader) 98 | self.train_losses.append((epoch, avg_loss)) 99 | 100 | if epoch % self.check_val_every_n_epoch == 0: 101 | self.model.eval() # set model to evaluation 102 | with torch.no_grad(): 103 | running_val_acc = 0 104 | for inputs, labels in val_dataloader: 105 | inputs, labels = inputs.to(self.device), labels.to(self.device) 106 | 107 | outputs = self.model(inputs) 108 | loss = self.criterion(outputs, labels) 109 | 110 | running_vloss += loss.item() 111 | # compute validtion accuracy 112 | running_val_acc += self.__accuracy(outputs, labels) 113 | 114 | val_accuracy = running_val_acc / len(val_dataloader) 115 | self.val_accuracies.append((epoch, val_accuracy.cpu())) 116 | 117 | avg_vloss = running_vloss / len(val_dataloader) 118 | self.val_losses.append((epoch, avg_vloss)) 119 | 120 | self.logger.info( 121 | f"[EPOCH {epoch + 1}] LOSS : train={avg_loss} val={avg_vloss} | ACCURACY : train={train_accuracy} val={val_accuracy}" 122 | ) 123 | 124 | def test(self, test_dataloader: DataLoader) -> None: 125 | """Test the ResNet-18 Model""" 126 | 127 | correct = 0 128 | self.model.eval() 129 | with torch.no_grad(): 130 | for inputs, labels in test_dataloader: 131 | inputs, labels = inputs.to(self.device), labels.to(self.device) 132 | outputs = self.model(inputs) 133 | correct += self.__accuracy(outputs, labels) 134 | 135 | self.logger.info(f"Test accuracy: {(correct / len(test_dataloader)) * 100} %") 136 | 137 | def plot_metrics(self) -> None: 138 | """Create plots for model metrics""" 139 | 140 | os.makedirs("plots", exist_ok=True) # create plots dir 141 | 142 | t_iters, t_loss = list(zip(*self.train_losses)) 143 | _, v_loss = list(zip(*self.val_losses)) 144 | _, acc = list(zip(*self.train_accuracies)) 145 | _, v_acc = list(zip(*self.val_accuracies)) 146 | 147 | fig, ax = plt.subplots(1, 2, figsize=(12, 5)) 148 | fig.suptitle(f"Model: [{self.model_name}]") 149 | 150 | ax[0].set_title(f"Loss Curve (batch_size={self.batch_size}, lr={self.learning_rate})") 151 | ax[0].plot(t_iters, t_loss) 152 | ax[0].plot(t_iters, v_loss) 153 | ax[0].set_xlabel("Epochs") 154 | ax[0].set_ylabel("Loss") 155 | ax[0].legend(["Train", "Validation"]) 156 | ax[0].set_xticks(t_iters) 157 | 158 | ax[1].set_title(f"Accuracy Curve (batch_size={self.batch_size}, lr={self.learning_rate})") 159 | ax[1].plot(t_iters, acc) 160 | ax[1].plot(t_iters, v_acc) 161 | ax[1].set_xlabel("Epochs") 162 | ax[1].set_ylabel("Accuracy") 163 | ax[1].legend(["Train", "Validation"]) 164 | ax[1].set_xticks(t_iters) 165 | 166 | fig.savefig(f"plots/{self.model_name}_metrics.png") 167 | plt.show() 168 | 169 | def __accuracy(self, outputs: torch.Tensor, labels: torch.Tensor) -> float: 170 | """Compute accuracy given outputs as logits""" 171 | 172 | preds = torch.argmax(outputs, dim=1) 173 | return torch.sum(preds == labels) / len(preds) 174 | --------------------------------------------------------------------------------