├── .gitignore
├── AlexNet
├── README.md
├── alexnet.py
├── plots
│ └── alexnet_metrics.png
├── train_alexnet.ipynb
├── trainer.py
└── utils.py
├── LinearRegression
├── eval.ipynb
└── linear_regression.py
├── LogisticRegression
├── eval.ipynb
└── logistic_regression.py
├── README.md
└── ResNet
├── README.md
├── plots
├── baseline_pytorch_resnet18_metrics.png
└── resnet18_metrics.png
├── resnet18.py
├── train_resnet18.ipynb
└── trainer.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/AlexNet/README.md:
--------------------------------------------------------------------------------
1 | # AlexNet Implementation
2 | A toy project to learn, implement, and train the famous AlexNet Architecture from scratch from the 2012 paper
3 | "[ImageNet Classification with Deep Convolutional Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf)" simply because we can.
4 |
5 | ## Some Personal Details on Model Training
6 | - Find a suitable batch size for training. Sticking to $128$ in accordance with AlexNet paper.
7 | - Ensure that data preprocessing transformations are appropriate and desired.
8 |
9 | # Results
10 |
11 | The final results after training for ~$30$ epochs are as follows:
12 | `Test Top-1 accuracy: 31.754350662231445 % | Top-5 accuracy: 58.7618670886076`
13 |
14 | 
15 |
16 | Considering that the [original AlexNet paper](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf) reports top-1 and top-5 error rate scores of 67.4% and 40.9%
17 | (Top-1 and top-5 accuracies are inversely equivalent being 32.6% and 59.1% respectively) on ImageNet, we can say that the implementation is sufficiently accurate for the Tiny ImageNet dataset.
18 |
19 |
20 | # Acknowledgements
21 | - [ImageNet Classification with Deep Convolutional Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf)
22 | - [dansuh17/alexnet-pytorch](https://github.com/dansuh17/alexnet-pytorch)
23 | - [Writing AlexNet from Scratch in PyTorch](https://blog.paperspace.com/alexnet-pytorch/#data-loading)
24 | - https://pytorch.org/hub/pytorch_vision_alexnet/
25 | - [Difference between AlexNet, VGGNet, ResNet, and Inception](https://towardsdatascience.com/the-w3h-of-alexnet-vggnet-resnet-and-inception-7baaaecccc96)
--------------------------------------------------------------------------------
/AlexNet/alexnet.py:
--------------------------------------------------------------------------------
1 | """
2 | Implementing the famous AlexNet Architecture from scratch from the 2012 paper
3 | "ImageNet Classification with Deep Convolutional Neural Networks"
4 | """
5 |
6 | import torch
7 | import torch.nn as nn
8 |
9 |
10 | class AlexNet(nn.Module):
11 |
12 | def __init__(self, n_classes: int = 1000) -> None:
13 | super().__init__()
14 |
15 | self.features = nn.Sequential(
16 | nn.Conv2d(3, 96, kernel_size=11, stride=4),
17 | nn.ReLU(),
18 | nn.LocalResponseNorm(size=5, alpha=10e-4, beta=0.75, k=2),
19 | nn.MaxPool2d(kernel_size=3, stride=2),
20 | nn.Conv2d(96, 256, kernel_size=5, padding=2),
21 | nn.ReLU(),
22 | nn.LocalResponseNorm(size=5, alpha=10e-4, beta=0.75, k=2),
23 | nn.MaxPool2d(kernel_size=3, stride=2),
24 | nn.Conv2d(256, 384, kernel_size=3, padding=1),
25 | nn.ReLU(),
26 | nn.Conv2d(384, 384, kernel_size=3, padding=1),
27 | nn.ReLU(),
28 | nn.Conv2d(384, 256, kernel_size=3, padding=1),
29 | nn.ReLU(),
30 | nn.MaxPool2d(kernel_size=3, stride=2),
31 | )
32 |
33 | self.classifier = nn.Sequential(
34 | nn.Dropout(0.5),
35 | nn.Linear(in_features=6 * 6 * 256, out_features=4096),
36 | nn.ReLU(),
37 | nn.Dropout(0.5),
38 | nn.Linear(in_features=4096, out_features=4096),
39 | nn.ReLU(),
40 | nn.Linear(in_features=4096, out_features=n_classes),
41 | )
42 |
43 | def forward(self, x: torch.Tensor) -> torch.Tensor:
44 |
45 | x = self.features(x) # [bs, 256, 6, 6]
46 | x = x.reshape(x.size(0), -1) # reshape to [bs, 6*6*256] = [bs, 9216]
47 | o = self.classifier(x)
48 |
49 | return o
50 |
51 |
52 | # test basic forward pass of AlexNet
53 | if __name__ == "__main__":
54 | alexnet = AlexNet()
55 | n_params = sum(p.numel() for p in alexnet.parameters())
56 | n_trainable_params = sum(p.numel() for p in alexnet.parameters() if p.requires_grad)
57 |
58 | print(f"Number of parameters: {n_params}")
59 | print(f"Number of trainable parameters: {n_trainable_params}")
60 |
61 | # paper mentions 224 x 224 but seems to be a mistake?
62 | dummy_image = torch.randn(1, 3, 227, 227)
63 | out = alexnet(dummy_image)
64 |
65 | assert out.shape == (1, 1000), f"Expected shape: (1, 1000) | Actual shape: {out.shape}"
66 |
67 | print(f"\nModel Summary:\n========\n{alexnet}")
68 |
--------------------------------------------------------------------------------
/AlexNet/plots/alexnet_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aandyw/StuffFromScratch/74578e8ccfd4bc40eaa06d9082e48ee6dfb85fb8/AlexNet/plots/alexnet_metrics.png
--------------------------------------------------------------------------------
/AlexNet/trainer.py:
--------------------------------------------------------------------------------
1 | """Trainer class for AlexNet"""
2 |
3 | import sys, os
4 | import datetime
5 | import matplotlib.pyplot as plt
6 | from tqdm import tqdm
7 | import logging
8 |
9 | import torch
10 | import torch.nn as nn
11 | import torch.optim as optim
12 | from torch.utils.data import DataLoader
13 |
14 |
15 | class Trainer:
16 | def __init__(
17 | self,
18 | model: nn.Module,
19 | model_name: str = "alexnet",
20 | batch_size: int = 256,
21 | learning_rate: float = 0.01,
22 | weight_decay: float = 0.0005,
23 | momentum: float = 0.9,
24 | num_epochs: int = 30,
25 | check_val_every_n_epoch: int = 1,
26 | device: str = "cpu",
27 | checkpoints_dir: str = "checkpoints",
28 | ) -> None:
29 | """Trainer object to facilitate training and evaluation"""
30 |
31 | self.model = model
32 | self.model_name = model_name
33 |
34 | # training configurations
35 | self.batch_size = batch_size # does nothing; mainly for viz
36 | self.learning_rate = learning_rate
37 | self.momentum = momentum
38 | self.weight_decay = weight_decay
39 | self.num_epochs = num_epochs
40 | self.check_val_every_n_epoch = check_val_every_n_epoch
41 | self.device = device
42 | self.model.to(self.device)
43 |
44 | # set loss function and optimizer
45 | self.criterion = nn.CrossEntropyLoss()
46 |
47 | # SGD used by original alexnet paper
48 | self.optimizer = optim.SGD(
49 | self.model.parameters(), lr=self.learning_rate, momentum=self.momentum, weight_decay=self.weight_decay
50 | )
51 |
52 | # Decays lr of each parameter group by 0.1 every step_size epochs
53 | self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=7, gamma=0.1)
54 |
55 | # model metrics
56 | self.train_losses = []
57 | self.train_accuracies = []
58 | self.train_top_k_accuracies = []
59 | self.val_losses = []
60 | self.val_accuracies = []
61 | self.val_top_k_accuracies = []
62 |
63 | # logging info
64 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s | %(message)s")
65 | self.logger = logging.getLogger()
66 |
67 | # create checkpoints directory
68 | self.checkpoints_dir = checkpoints_dir
69 | os.makedirs(self.checkpoints_dir, exist_ok=True) # create plots dir
70 |
71 | def train(self, train_dataloader: DataLoader, val_dataloader: DataLoader) -> None:
72 | """Train the AlexNet Model"""
73 |
74 | for epoch in range(self.num_epochs):
75 | self.model.train() # set model to train
76 |
77 | # loss tracking metrics
78 | running_loss = 0.0
79 | running_vloss = 0.0
80 | batch_loss = 0.0
81 | running_acc = 0.0
82 | running_top_k_acc = 0.0
83 |
84 | pbar = tqdm(enumerate(train_dataloader), total=len(train_dataloader))
85 |
86 | for i, (inputs, labels) in pbar:
87 | inputs, labels = inputs.to(self.device), labels.to(self.device)
88 |
89 | # zero gradients for every batch
90 | self.optimizer.zero_grad()
91 |
92 | # compute predictions + loss
93 | outputs = self.model(inputs) # predicted class
94 | loss = self.criterion(outputs, labels)
95 |
96 | # compute training accuracy
97 | running_acc += self.__accuracy(outputs, labels)
98 | running_top_k_acc += self._top_k_accuracy(outputs, labels, k=5)
99 |
100 | # perform backpropagation
101 | loss.backward() # compute gradients
102 | self.optimizer.step() # update model parameters
103 |
104 | # gather data and report
105 | running_loss += loss.item()
106 | batch_loss += loss.item()
107 | if i % 10 == 0:
108 | batch_loss = batch_loss / 10 # loss per batch
109 | pbar.set_postfix({"loss": round(batch_loss, 5)})
110 | batch_loss = 0.0
111 |
112 | self.scheduler.step()
113 |
114 | train_accuracy = running_acc / len(train_dataloader)
115 | train_top_k_accuracy = running_top_k_acc / len(train_dataloader)
116 | avg_loss = running_loss / len(train_dataloader)
117 |
118 | prev_loss = self.train_losses[-1][0] if self.train_losses else float("inf")
119 | if avg_loss <= prev_loss:
120 | self.save(epoch + 1, avg_loss)
121 |
122 | self.train_accuracies.append((epoch, train_accuracy.cpu()))
123 | self.train_top_k_accuracies.append((epoch, train_top_k_accuracy))
124 | self.train_losses.append((epoch, avg_loss))
125 |
126 | if epoch % self.check_val_every_n_epoch == 0:
127 | self.model.eval() # set model to evaluation
128 | with torch.no_grad():
129 | running_val_acc = 0
130 | running_val_top_k_acc = 0
131 | for inputs, labels in val_dataloader:
132 | inputs, labels = inputs.to(self.device), labels.to(self.device)
133 |
134 | outputs = self.model(inputs)
135 | loss = self.criterion(outputs, labels)
136 |
137 | running_vloss += loss.item()
138 |
139 | # compute validtion accuracy
140 | running_val_acc += self.__accuracy(outputs, labels)
141 | running_val_top_k_acc += self._top_k_accuracy(outputs, labels, k=5)
142 |
143 | val_top_k_accuracy = running_val_top_k_acc / len(val_dataloader)
144 | self.val_top_k_accuracies.append((epoch, val_top_k_accuracy))
145 |
146 | val_accuracy = running_val_acc / len(val_dataloader)
147 | self.val_accuracies.append((epoch, val_accuracy.cpu()))
148 |
149 | avg_vloss = running_vloss / len(val_dataloader)
150 | self.val_losses.append((epoch, avg_vloss))
151 |
152 | self.logger.info(
153 | f"[EPOCH {epoch + 1}] LOSS : train={avg_loss} val={avg_vloss} | ACCURACY (Top-1) : train={train_accuracy} val={val_accuracy} | TOP-5 : train={train_top_k_accuracy} val={val_top_k_accuracy}"
154 | )
155 |
156 | def test(self, test_dataloader: DataLoader) -> None:
157 | """Test the AlexNet Model"""
158 |
159 | correct = 0
160 | top_5 = 0
161 |
162 | self.model.eval()
163 | with torch.no_grad():
164 | for inputs, labels in test_dataloader:
165 | inputs, labels = inputs.to(self.device), labels.to(self.device)
166 | outputs = self.model(inputs)
167 | correct += self.__accuracy(outputs, labels)
168 | top_5 += self._top_k_accuracy(outputs, labels, k=5)
169 |
170 | self.logger.info(
171 | f"Test accuracy: {(correct / len(test_dataloader)) * 100} % | Top-5 accuracy: {(top_5 / len(test_dataloader)) * 100}"
172 | )
173 |
174 | def plot_metrics(self) -> None:
175 | """Create plots for model metrics"""
176 |
177 | os.makedirs("plots", exist_ok=True) # create plots dir
178 |
179 | t_iters, t_loss = list(zip(*self.train_losses))
180 | _, v_loss = list(zip(*self.val_losses))
181 | _, acc = list(zip(*self.train_accuracies))
182 | _, v_acc = list(zip(*self.val_accuracies))
183 |
184 | fig, ax = plt.subplots(1, 2, figsize=(12, 5))
185 | fig.suptitle(f"Model: [{self.model_name}]")
186 |
187 | ax[0].set_title(f"Loss Curve (batch_size={self.batch_size}, lr={self.learning_rate}), momentum={self.momentum}")
188 | ax[0].plot(t_iters, t_loss)
189 | ax[0].plot(t_iters, v_loss)
190 | ax[0].set_xlabel("Epochs")
191 | ax[0].set_ylabel("Loss")
192 | ax[0].legend(["Train", "Validation"])
193 | ax[0].set_xticks(t_iters)
194 |
195 | ax[1].set_title(
196 | f"Accuracy Curve (batch_size={self.batch_size}, lr={self.learning_rate}), momentum={self.momentum}"
197 | )
198 | ax[1].plot(t_iters, acc)
199 | ax[1].plot(t_iters, v_acc)
200 | ax[1].set_xlabel("Epochs")
201 | ax[1].set_ylabel("Accuracy")
202 | ax[1].legend(["Train", "Validation"])
203 | ax[1].set_xticks(t_iters)
204 |
205 | fig.savefig(f"plots/{self.model_name}_metrics.png")
206 | plt.show()
207 |
208 | def _top_k_accuracy(self, outputs: torch.Tensor, labels: torch.Tensor, k: int) -> float:
209 | """Top-K accuracy. Top-1 is the equivalent to regular accuracy."""
210 |
211 | values, indices = torch.topk(outputs, k)
212 | topk_correct = indices.eq(labels.view(-1, 1).expand_as(indices))
213 | accuracy = topk_correct.sum().item() / labels.size(0)
214 |
215 | return accuracy
216 |
217 | def __accuracy(self, outputs: torch.Tensor, labels: torch.Tensor) -> float:
218 | """Compute accuracy given outputs as logits"""
219 |
220 | preds = torch.argmax(outputs, dim=1)
221 | accuracy = torch.sum(preds == labels) / len(preds)
222 |
223 | return accuracy
224 |
225 | def save(self, epoch: int, loss: float) -> None:
226 | """Save model"""
227 |
228 | time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
229 | checkpoint_path = os.path.join(self.checkpoints_dir, f"{self.model_name}_e{epoch}_{time}.pt")
230 | state = {
231 | "epoch": epoch,
232 | "model": self.model.state_dict(),
233 | "optimizer": self.optimizer.state_dict(),
234 | "loss": loss,
235 | }
236 | torch.save(state, checkpoint_path)
237 |
238 | def load(self, checkpoint_name: str) -> None:
239 | """Load model"""
240 |
241 | checkpoint_path = os.path.join(self.checkpoints_dir, checkpoint_name)
242 | checkpoint = torch.load(checkpoint_path)
243 | self.model.load_state_dict(checkpoint["model"])
244 | self.optimizer.load_state_dict(checkpoint["optimizer"])
245 |
--------------------------------------------------------------------------------
/AlexNet/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 |
5 |
6 | def get_device():
7 | """Get available device"""
8 |
9 | if torch.cuda.is_available():
10 | print("Using CUDA...")
11 | return torch.device("cuda")
12 | elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
13 | print("Using MPS...")
14 | return torch.device("mps")
15 | else:
16 | print("Using CPU...")
17 | return torch.device("cpu")
18 |
19 |
20 | def imshow(img):
21 | """Display image"""
22 |
23 | img = img / 2 + 0.5 # unnormalize
24 | npimg = img.numpy()
25 | plt.imshow(np.transpose(npimg, (1, 2, 0)))
26 | plt.show()
27 |
--------------------------------------------------------------------------------
/LinearRegression/linear_regression.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy
3 |
4 |
5 | class LinearRegression:
6 | """Linear Regression with Least Squared Error"""
7 |
8 | def __init__(self, epochs=1000, learning_rate=1e-2):
9 | self.epochs = epochs
10 | self.learning_rate = learning_rate
11 | self.W = None # weights
12 | self.b = None # bias
13 | self.losses = []
14 |
15 | def __compute_loss(self, y, y_pred):
16 | # Mean Squared Error (MSE) is our cost function
17 |
18 | least_squares = (y_pred - y) ** 2
19 | return np.mean(least_squares)
20 |
21 | def fit(self, X, y):
22 | N, features = X.shape
23 | self.W = np.random.randn(features)
24 | self.b = 0
25 |
26 | for epoch in range(self.epochs):
27 | y_pred = self.predict(X)
28 | loss = self.__compute_loss(y, y_pred) # MSE loss
29 |
30 | ### compute gradients ###
31 | residuals = y_pred - y
32 | grad_W = (2 / N) * np.matmul(X.T, residuals)
33 | grad_b = (2 / N) * np.sum(residuals)
34 |
35 | ### parameter updates ###
36 | self.W -= self.learning_rate * grad_W
37 | self.b -= self.learning_rate * grad_b
38 | self.losses.append(loss)
39 |
40 | if (epoch + 1) % 1000 == 0:
41 | print(f"[Epoch {epoch + 1}/{self.epochs}] Loss: {round(loss, 5)}")
42 |
43 | def predict(self, X):
44 | return np.matmul(X, self.W) + self.b
45 |
--------------------------------------------------------------------------------
/LogisticRegression/eval.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Baseline Evaluation with Sklearn"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 15,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "name": "stdout",
17 | "output_type": "stream",
18 | "text": [
19 | "The autoreload extension is already loaded. To reload it, use:\n",
20 | " %reload_ext autoreload\n"
21 | ]
22 | }
23 | ],
24 | "source": [
25 | "%load_ext autoreload\n",
26 | "%autoreload 2\n",
27 | "import random\n",
28 | "import numpy as np\n",
29 | "import pandas as pd\n",
30 | "import matplotlib.pyplot as plt\n",
31 | "from sklearn.model_selection import train_test_split\n",
32 | "from sklearn.datasets import load_breast_cancer\n",
33 | "from sklearn.linear_model import LogisticRegression\n",
34 | "from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 16,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "def sklearn_to_df(data_loader):\n",
44 | " X_data = data_loader.data\n",
45 | " X_columns = data_loader.feature_names\n",
46 | " X = pd.DataFrame(X_data, columns=X_columns)\n",
47 | "\n",
48 | " y_data = data_loader.target\n",
49 | " label_names = data_loader.target_names\n",
50 | " y = pd.Series(y_data, name='target')\n",
51 | "\n",
52 | " return X, y, label_names\n",
53 | "\n",
54 | "X, y, label_names = sklearn_to_df(load_breast_cancer())\n",
55 | "\n",
56 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 17,
62 | "metadata": {},
63 | "outputs": [
64 | {
65 | "name": "stderr",
66 | "output_type": "stream",
67 | "text": [
68 | "/Users/andy/miniconda3/envs/40.319/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
69 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
70 | "\n",
71 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
72 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
73 | "Please also refer to the documentation for alternative solver options:\n",
74 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
75 | " n_iter_i = _check_optimize_result(\n"
76 | ]
77 | },
78 | {
79 | "data": {
80 | "text/html": [
81 | "
LogisticRegression() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
82 | ],
83 | "text/plain": [
84 | "LogisticRegression()"
85 | ]
86 | },
87 | "execution_count": 17,
88 | "metadata": {},
89 | "output_type": "execute_result"
90 | }
91 | ],
92 | "source": [
93 | "baseline_model = LogisticRegression(max_iter=100)\n",
94 | "baseline_model.fit(X_train, y_train)"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 18,
100 | "metadata": {},
101 | "outputs": [],
102 | "source": [
103 | "def evaluate(model):\n",
104 | " y_pred = model.predict(X_test)\n",
105 | "\n",
106 | " accuracy = accuracy_score(y_test, y_pred)\n",
107 | " conf_matrix = confusion_matrix(y_test, y_pred)\n",
108 | "\n",
109 | " print(f'Accuracy: {accuracy*100:.2f}%')\n",
110 | "\n",
111 | " disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=label_names)\n",
112 | " disp.plot(cmap=plt.cm.Blues) # You can change the color map if you like\n",
113 | " plt.title('Confusion Matrix')\n",
114 | " plt.show()\n",
115 | "\n",
116 | "def plot_losses(losses):\n",
117 | " plt.plot(losses)\n",
118 | " plt.title('Training Loss')\n",
119 | " plt.xlabel('Epoch')\n",
120 | " plt.ylabel('Loss')\n",
121 | " plt.show()"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 19,
127 | "metadata": {},
128 | "outputs": [
129 | {
130 | "name": "stdout",
131 | "output_type": "stream",
132 | "text": [
133 | "Accuracy: 95.61%\n"
134 | ]
135 | },
136 | {
137 | "data": {
138 | "image/png": "",
139 | "text/plain": [
140 | ""
141 | ]
142 | },
143 | "metadata": {},
144 | "output_type": "display_data"
145 | }
146 | ],
147 | "source": [
148 | "evaluate(baseline_model)"
149 | ]
150 | },
151 | {
152 | "cell_type": "markdown",
153 | "metadata": {},
154 | "source": [
155 | "# Evaluating My Implementation"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": 20,
161 | "metadata": {},
162 | "outputs": [],
163 | "source": [
164 | "from logistic_regression import LogisticRegression as MyLogisticRegression"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 40,
170 | "metadata": {},
171 | "outputs": [
172 | {
173 | "name": "stdout",
174 | "output_type": "stream",
175 | "text": [
176 | "[Epoch 100/1000] Loss: 12.93496\n",
177 | "[Epoch 200/1000] Loss: 3.32343\n",
178 | "[Epoch 300/1000] Loss: 3.37494\n",
179 | "[Epoch 400/1000] Loss: 10.7662\n",
180 | "[Epoch 500/1000] Loss: 2.17328\n",
181 | "[Epoch 600/1000] Loss: 2.27783\n",
182 | "[Epoch 700/1000] Loss: 2.49438\n",
183 | "[Epoch 800/1000] Loss: 2.68703\n",
184 | "[Epoch 900/1000] Loss: 2.40905\n",
185 | "[Epoch 1000/1000] Loss: 2.41401\n"
186 | ]
187 | }
188 | ],
189 | "source": [
190 | "# set seed for reproducibility\n",
191 | "np.random.seed(0)\n",
192 | "\n",
193 | "my_model = MyLogisticRegression()\n",
194 | "my_model.fit(X_train.values, y_train)"
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": 41,
200 | "metadata": {},
201 | "outputs": [
202 | {
203 | "name": "stdout",
204 | "output_type": "stream",
205 | "text": [
206 | "Accuracy: 94.74%\n"
207 | ]
208 | },
209 | {
210 | "data": {
211 | "image/png": "",
212 | "text/plain": [
213 | ""
214 | ]
215 | },
216 | "metadata": {},
217 | "output_type": "display_data"
218 | },
219 | {
220 | "data": {
221 | "image/png": "",
222 | "text/plain": [
223 | ""
224 | ]
225 | },
226 | "metadata": {},
227 | "output_type": "display_data"
228 | }
229 | ],
230 | "source": [
231 | "evaluate(my_model)\n",
232 | "plot_losses(my_model.losses)"
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "metadata": {},
238 | "source": [
239 | "Our implementation is a success! However, notice that the LogisticRegression implemented by Sklearn converges much faster than our implementation. This is because it is using a solver called Limited-memory Broyden–Fletcher–Goldfarb–Shanno algorithm (LBFGS) which converges much faster than Gradient Descent (GD) which is what our implementation is using."
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {},
245 | "source": [
246 | "# Ackowledgements"
247 | ]
248 | },
249 | {
250 | "cell_type": "markdown",
251 | "metadata": {},
252 | "source": [
253 | "A few resources that helped out:\n",
254 | "- https://developer.ibm.com/articles/implementing-logistic-regression-from-scratch-in-python/\n",
255 | "- https://medium.com/@koushikkushal95/logistic-regression-from-scratch-dfb8527a4226"
256 | ]
257 | }
258 | ],
259 | "metadata": {
260 | "kernelspec": {
261 | "display_name": "40.319",
262 | "language": "python",
263 | "name": "python3"
264 | },
265 | "language_info": {
266 | "codemirror_mode": {
267 | "name": "ipython",
268 | "version": 3
269 | },
270 | "file_extension": ".py",
271 | "mimetype": "text/x-python",
272 | "name": "python",
273 | "nbconvert_exporter": "python",
274 | "pygments_lexer": "ipython3",
275 | "version": "3.10.8"
276 | }
277 | },
278 | "nbformat": 4,
279 | "nbformat_minor": 2
280 | }
281 |
--------------------------------------------------------------------------------
/LogisticRegression/logistic_regression.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy
3 |
4 |
5 | class LogisticRegression:
6 |
7 | def __init__(self, epochs=1000, learning_rate=1e-2, threshold=0.5):
8 | self.epochs = epochs
9 | self.learning_rate = learning_rate
10 | self.threshold = threshold
11 | self.W = None # weights
12 | self.b = None # bias
13 | self.losses = []
14 |
15 | def __sigmoid(self, z):
16 | # sigmoid activation function
17 |
18 | # return 1 / (1 + np.exp(-z))
19 | return scipy.special.expit(z) # handles np.exp(.) overflow
20 |
21 | def __compute_loss(self, y, y_pred, epsilon=1e-9):
22 | # binary cross-entropy loss (BCE)
23 |
24 | # epsilon added to prevent log(0)
25 | return -np.mean(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1 - y_pred + epsilon))
26 |
27 | def fit(self, X, y):
28 | N, features = X.shape
29 | self.W = np.random.randn(features)
30 | self.b = 0
31 |
32 | for epoch in range(self.epochs):
33 | z = np.matmul(X, self.W) + self.b
34 | y_pred = self.__sigmoid(z)
35 | loss = self.__compute_loss(y, y_pred)
36 |
37 | ### compute gradients ###
38 | residuals = y_pred - y
39 | grad_W = (1 / N) * np.matmul(X.T, residuals)
40 | grad_b = (1 / N) * np.sum(residuals)
41 |
42 | ### parameter updates ###
43 | self.W -= self.learning_rate * grad_W
44 | self.b -= self.learning_rate * grad_b
45 | self.losses.append(loss)
46 |
47 | if (epoch + 1) % 100 == 0:
48 | print(f"[Epoch {epoch + 1}/{self.epochs}] Loss: {round(loss, 5)}")
49 |
50 | def predict(self, X):
51 | z = np.matmul(X, self.W) + self.b
52 | y_pred = self.__sigmoid(z)
53 | y_pred = np.where(y_pred >= self.threshold, 1, 0)
54 | return y_pred
55 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ツ
2 |
3 | # Implementations
4 |
5 | ## Traditional
6 | |Implementation | Dataset | Notebooks |
7 | | --- | --- | --- |
8 | | Simple Linear Regression | Dummy + Diabetes Dataset (regression) | [](LinearRegression/eval.ipynb) |
9 | | Logistic Regression | Breast Cancer Wisconsin Dataset | [](LogisticRegression/eval.ipynb) |
10 |
11 | ## Deep Learning
12 | |Implementation | Dataset | Notebooks |
13 | | --- | --- | --- |
14 | | AlexNet | Tiny ImageNet | [](AlexNet/train_alexnet.ipynb) |
15 | | ResNet-18 | CIFAR-10 | [](ResNet/train_resnet18.ipynb) |
16 |
17 | ## Computer Vision
18 | |Implementation | Dataset | Notebooks |
19 | | --- | --- | --- |
20 | | U-Net Architecture | SOME RANDOM SEGMENTATION DATASET | [](...) |
21 |
22 |
23 | ---
24 |
25 | # Roadmap
26 | - [x] Linear Regression
27 | - [x] Logistic Regression
28 | - [ ] Autoencoder
29 | - [ ] Variational Autoencoder (VAE)
30 | - [ ] Generative Adversarial Network (GAN)
31 | - [ ] Graph Neural Network
32 | - [x] ResNet18 + Residual Layars
33 | - [ ] U-Net Architecture [WIP]
34 | - [x] AlexNet
35 |
36 | ### Sequence Models
37 | - [ ] Recurrent Neural Network (RNN)
38 | - [ ] Long Short-Term Memory (LSTM)
39 | - [ ] Gated Recurrent Unit (GRU)
40 |
41 |
42 | ### Misc
43 | - [ ] Ensembling + XGBoost
44 |
45 |
46 | # Acknowledgements
47 | This readme layout inspired by [rasbt/deeplearning-models](https://github.com/rasbt/deeplearning-models)
48 |
--------------------------------------------------------------------------------
/ResNet/README.md:
--------------------------------------------------------------------------------
1 | # ResNet-18 Implementation
2 | A toy project to learn, implement, and train our own ResNet-18 on CIFAR-10.
3 |
4 |
5 | # Results
6 |
7 | ### Existing PyTorch ResNet-18 Model (Baseline)
8 | 
9 |
10 |
11 | ### Our ResNet-18 Implementation
12 | We load the pretrained weights from PyTorch's `ResNet18_Weights.IMAGENET1K_V1`.
13 |
14 |
15 | 
16 |
17 |
18 | # Acknowledgements
19 | - [PyTorch CIFAR10 Training Tutorial](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
20 | - [A Detailed Introduction to ResNet and Its Implementation in PyTorch](https://medium.com/@freshtechyy/a-detailed-introduction-to-resnet-and-its-implementation-in-pytorch-744b13c8074a) by Huili Yu
21 | - [Let's reproduce GPT-2 (124M)](https://www.youtube.com/watch?v=l8pRSuU81PU) by Andrej Karpathy
22 | - [Helpful conventions for PyTorch model building](https://github.com/FrancescoSaverioZuppichini/Pytorch-how-and-when-to-use-Module-Sequential-ModuleList-and-ModuleDict/blob/master/README.md) by FrancescoSaverioZuppichini
23 |
24 |
--------------------------------------------------------------------------------
/ResNet/plots/baseline_pytorch_resnet18_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aandyw/StuffFromScratch/74578e8ccfd4bc40eaa06d9082e48ee6dfb85fb8/ResNet/plots/baseline_pytorch_resnet18_metrics.png
--------------------------------------------------------------------------------
/ResNet/plots/resnet18_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aandyw/StuffFromScratch/74578e8ccfd4bc40eaa06d9082e48ee6dfb85fb8/ResNet/plots/resnet18_metrics.png
--------------------------------------------------------------------------------
/ResNet/resnet18.py:
--------------------------------------------------------------------------------
1 | """
2 | Building an 18-layer residual network (ResNet-18) from scratch.
3 | From the paper "Deep Residual Learning for Image Recognition" (https://arxiv.org/abs/1512.03385)
4 | """
5 |
6 | import torchvision
7 | import torch
8 | import torch.nn as nn
9 |
10 |
11 | class BasicBlock(nn.Module):
12 | """The Residual Block"""
13 |
14 | def __init__(self, in_channels: int, out_channels: int, stride: int = 1, downsample: bool = False) -> None:
15 | """
16 | Create the Residual Block
17 |
18 | Args:
19 | in_channels (int): number of input channels
20 | out_channels (int): number of output channels
21 | stride (int): stride of first 3x3 convolution layer
22 | downsample (bool): whether to adjust for spatial dimensions due to downsampling via stride=2
23 | """
24 | super().__init__()
25 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
26 | self.bn1 = nn.BatchNorm2d(out_channels)
27 | self.relu = nn.ReLU(inplace=True)
28 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
29 | self.bn2 = nn.BatchNorm2d(out_channels)
30 |
31 | # For downsampling, the skip connection will pass through the 1x1 conv layer with stride of 2 to
32 | # match the spatial dimension of the downsampled feature maps and channels for the add operation.
33 | #
34 | # More specifically, the 'downsample block' is used for layer 2, 3, 4 of ResNet18 where the first conv2d
35 | # layer of the BasicBlock uses a stride of 2 instead of 1 to downsample feature maps for a larger
36 | # receptive field.
37 | # This is why we need to carefully craft our 'downsample block' to make sure spatial dimensions are
38 | # not disrupted when we add the skip connection in these residual blocks.
39 | self.downsample = None
40 | if downsample:
41 | self.downsample = nn.Sequential(
42 | nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
43 | nn.BatchNorm2d(out_channels),
44 | )
45 |
46 | def forward(self, x: torch.Tensor) -> torch.Tensor:
47 | identity = x.clone()
48 | x = self.relu(self.bn1(self.conv1(x)))
49 | x = self.bn2(self.conv2(x))
50 |
51 | if self.downsample: # if layer not None
52 | identity = self.downsample(identity)
53 |
54 | x += identity
55 | o = self.relu(x)
56 |
57 | return o
58 |
59 |
60 | class ResNet18(nn.Module):
61 | """The ResNet-18 Model"""
62 |
63 | def __init__(self, n_classes: int = 10) -> None:
64 | """
65 | Create the ResNet-18 Model
66 |
67 | Args:
68 | n_classes (int, optional): The number of output classes we predict for. Defaults to 10.
69 | """
70 | super().__init__()
71 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=(3, 3), bias=False)
72 | self.bn1 = nn.BatchNorm2d(64)
73 | self.relu = nn.ReLU(inplace=True)
74 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
75 |
76 | self.layer1 = nn.Sequential(
77 | BasicBlock(64, 64),
78 | BasicBlock(64, 64),
79 | )
80 | self.layer2 = nn.Sequential(
81 | BasicBlock(64, 128, stride=2, downsample=True),
82 | BasicBlock(128, 128),
83 | )
84 | self.layer3 = nn.Sequential(
85 | BasicBlock(128, 256, stride=2, downsample=True),
86 | BasicBlock(256, 256),
87 | )
88 | self.layer4 = nn.Sequential(
89 | BasicBlock(256, 512, stride=2, downsample=True),
90 | BasicBlock(512, 512),
91 | )
92 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
93 |
94 | # our fully connected layer will be different to accomodate for CIFAR-10
95 | self.fc = nn.Linear(in_features=512, out_features=n_classes)
96 |
97 | def forward(self, x: torch.Tensor) -> torch.Tensor:
98 | x = self.maxpool(self.relu(self.bn1(self.conv1(x))))
99 |
100 | x = self.layer1(x)
101 | x = self.layer2(x)
102 | x = self.layer3(x)
103 | x = self.layer4(x)
104 | x = self.avgpool(x) # [bs, 512, 1, 1]
105 |
106 | x = torch.squeeze(x) # reshape to [bs, 512]
107 | o = self.fc(x)
108 |
109 | return o
110 |
111 | @classmethod
112 | def from_pretrained(cls, model_type: str) -> nn.Module:
113 | """
114 | Load pretrained PyTorch ResNet-18 weights into our ResNet-18 implementation
115 |
116 | Inspired by Andrej Karpathy from 'Let's reproduce GPT-2 (124M)'
117 | (https://www.youtube.com/watch?v=l8pRSuU81PU)
118 | """
119 |
120 | assert model_type in {"resnet18"}, "only supports resnet18"
121 | print("loading weights from pytorch pretrained resnet18")
122 |
123 | # our model
124 | model = ResNet18(n_classes=10)
125 | r18 = model.state_dict()
126 | r18_keys = r18.keys()
127 |
128 | # pretrained pytorch resnet18 model
129 | p_model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1)
130 | p_r18 = p_model.state_dict()
131 | p_r18_keys = p_r18.keys()
132 |
133 | assert len(p_r18_keys) == len(r18_keys), f"mistmatched keys: {len(p_r18_keys)} != {len(r18_keys)}"
134 | # load weights from pretrained
135 | for k in p_r18_keys:
136 | if k.startswith("fc"): # skip fc layer, we add our own for CIFAR-10
137 | continue
138 |
139 | assert p_r18[k].shape == r18[k].shape
140 | with torch.no_grad():
141 | r18[k].copy_(p_r18[k])
142 |
143 | return model
144 |
--------------------------------------------------------------------------------
/ResNet/trainer.py:
--------------------------------------------------------------------------------
1 | import sys, os
2 | import matplotlib.pyplot as plt
3 | from tqdm import tqdm
4 | import logging
5 |
6 | import torch
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from torch.utils.data import DataLoader
10 |
11 |
12 | class Trainer:
13 | def __init__(
14 | self,
15 | model: nn.Module,
16 | model_name: str = "resnet18",
17 | batch_size: int = 256,
18 | learning_rate: float = 0.01,
19 | num_epochs: int = 30,
20 | check_val_every_n_epoch: int = 1,
21 | device: str = "cpu",
22 | ) -> None:
23 | """Trainer object to facilitate training and evaluation"""
24 |
25 | self.model = model
26 | self.model_name = model_name
27 |
28 | # training configurations
29 | self.batch_size = batch_size # does nothing; mainly for viz
30 | self.learning_rate = learning_rate
31 | self.num_epochs = num_epochs
32 | self.check_val_every_n_epoch = check_val_every_n_epoch
33 | self.device = device
34 | self.model.to(self.device)
35 |
36 | # set loss function and optimizer
37 | self.criterion = nn.CrossEntropyLoss()
38 |
39 | # SGD used by original paper "Deep Residual Learning for Image Recognition"
40 | self.optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.9)
41 | self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=7, gamma=0.1)
42 |
43 | # model metrics
44 | self.train_losses = []
45 | self.train_accuracies = []
46 | self.val_losses = []
47 | self.val_accuracies = []
48 |
49 | # logging info
50 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s | %(message)s")
51 | self.logger = logging.getLogger()
52 |
53 | def train(self, train_dataloader: DataLoader, val_dataloader: DataLoader) -> None:
54 | """Train the ResNet-18 Model"""
55 |
56 | for epoch in range(self.num_epochs):
57 | self.model.train() # set model to train
58 |
59 | # loss tracking metrics
60 | running_loss = 0.0
61 | running_vloss = 0.0
62 | batch_loss = 0.0
63 | running_acc = 0.0
64 |
65 | pbar = tqdm(enumerate(train_dataloader), total=len(train_dataloader))
66 |
67 | for i, (inputs, labels) in pbar:
68 | inputs, labels = inputs.to(self.device), labels.to(self.device)
69 |
70 | # zero gradients for every batch
71 | self.optimizer.zero_grad()
72 |
73 | # compute predictions + loss
74 | outputs = self.model(inputs) # predicted class
75 | loss = self.criterion(outputs, labels)
76 |
77 | # compute training accuracy
78 | running_acc += self.__accuracy(outputs, labels)
79 |
80 | # perform backpropagation
81 | loss.backward() # compute gradients
82 | self.optimizer.step() # update model parameters
83 |
84 | # gather data and report
85 | running_loss += loss.item()
86 | batch_loss += loss.item()
87 | if i % 10 == 0:
88 | batch_loss = batch_loss / 10 # loss per batch
89 | pbar.set_postfix({"loss": round(batch_loss, 5)})
90 | batch_loss = 0.0
91 |
92 | self.scheduler.step()
93 |
94 | train_accuracy = running_acc / len(train_dataloader)
95 | self.train_accuracies.append((epoch, train_accuracy.cpu()))
96 |
97 | avg_loss = running_loss / len(train_dataloader)
98 | self.train_losses.append((epoch, avg_loss))
99 |
100 | if epoch % self.check_val_every_n_epoch == 0:
101 | self.model.eval() # set model to evaluation
102 | with torch.no_grad():
103 | running_val_acc = 0
104 | for inputs, labels in val_dataloader:
105 | inputs, labels = inputs.to(self.device), labels.to(self.device)
106 |
107 | outputs = self.model(inputs)
108 | loss = self.criterion(outputs, labels)
109 |
110 | running_vloss += loss.item()
111 | # compute validtion accuracy
112 | running_val_acc += self.__accuracy(outputs, labels)
113 |
114 | val_accuracy = running_val_acc / len(val_dataloader)
115 | self.val_accuracies.append((epoch, val_accuracy.cpu()))
116 |
117 | avg_vloss = running_vloss / len(val_dataloader)
118 | self.val_losses.append((epoch, avg_vloss))
119 |
120 | self.logger.info(
121 | f"[EPOCH {epoch + 1}] LOSS : train={avg_loss} val={avg_vloss} | ACCURACY : train={train_accuracy} val={val_accuracy}"
122 | )
123 |
124 | def test(self, test_dataloader: DataLoader) -> None:
125 | """Test the ResNet-18 Model"""
126 |
127 | correct = 0
128 | self.model.eval()
129 | with torch.no_grad():
130 | for inputs, labels in test_dataloader:
131 | inputs, labels = inputs.to(self.device), labels.to(self.device)
132 | outputs = self.model(inputs)
133 | correct += self.__accuracy(outputs, labels)
134 |
135 | self.logger.info(f"Test accuracy: {(correct / len(test_dataloader)) * 100} %")
136 |
137 | def plot_metrics(self) -> None:
138 | """Create plots for model metrics"""
139 |
140 | os.makedirs("plots", exist_ok=True) # create plots dir
141 |
142 | t_iters, t_loss = list(zip(*self.train_losses))
143 | _, v_loss = list(zip(*self.val_losses))
144 | _, acc = list(zip(*self.train_accuracies))
145 | _, v_acc = list(zip(*self.val_accuracies))
146 |
147 | fig, ax = plt.subplots(1, 2, figsize=(12, 5))
148 | fig.suptitle(f"Model: [{self.model_name}]")
149 |
150 | ax[0].set_title(f"Loss Curve (batch_size={self.batch_size}, lr={self.learning_rate})")
151 | ax[0].plot(t_iters, t_loss)
152 | ax[0].plot(t_iters, v_loss)
153 | ax[0].set_xlabel("Epochs")
154 | ax[0].set_ylabel("Loss")
155 | ax[0].legend(["Train", "Validation"])
156 | ax[0].set_xticks(t_iters)
157 |
158 | ax[1].set_title(f"Accuracy Curve (batch_size={self.batch_size}, lr={self.learning_rate})")
159 | ax[1].plot(t_iters, acc)
160 | ax[1].plot(t_iters, v_acc)
161 | ax[1].set_xlabel("Epochs")
162 | ax[1].set_ylabel("Accuracy")
163 | ax[1].legend(["Train", "Validation"])
164 | ax[1].set_xticks(t_iters)
165 |
166 | fig.savefig(f"plots/{self.model_name}_metrics.png")
167 | plt.show()
168 |
169 | def __accuracy(self, outputs: torch.Tensor, labels: torch.Tensor) -> float:
170 | """Compute accuracy given outputs as logits"""
171 |
172 | preds = torch.argmax(outputs, dim=1)
173 | return torch.sum(preds == labels) / len(preds)
174 |
--------------------------------------------------------------------------------