├── LeNet_300_MNIST_Magnitude_Winning_Ticket_Distribution_91.18900266306589.h5 ├── README.md ├── LeNet300_MNIST_Itraining_torch.py ├── Check_Winning_Ticket.ipynb ├── Conv_4_LTH_CIFAR_10_winning_ticket_verification.ipynb ├── Quantization_LTH_LeNet_300_100_MNIST.ipynb └── LeNet_300_100-Iterative_Pruning.ipynb /LeNet_300_MNIST_Magnitude_Winning_Ticket_Distribution_91.18900266306589.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arjun-majumdar/Lottery_Ticket_Hypothesis-TensorFlow_2/HEAD/LeNet_300_MNIST_Magnitude_Winning_Ticket_Distribution_91.18900266306589.h5 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks - Using TensorFlow 2 2 | 3 | A GitHub repository implementing __The Lottery Ticket Hypothesis__ paper by _Jonathan Frankle & Michael Carbin_ 4 | 5 | "lottery ticket hypothesis:" dense, randomly-initialized, feed-forward and/or convolutional networks contain subnetworks ("winning tickets") that - when trained in isolation - reach test accuracy comparable to the original network in a similar number of iterations. The winning tickets we find have won the initialization lottery: their connections have initial weights that make training particularly effective. 6 | 7 | The paper can be downloaded from: 8 | [The Lottery Ticket Hypothesis](https://arxiv.org/abs/1803.03635) 9 | 10 | 11 | # Comparing Rewinding and Fine-tuning in Neural Network Pruning - using PyTorch 2.X 12 | 13 | Implementation for the paper __Comparing Rewinding and Fine-tuning in Neural Network Pruning__ by Alex Renda et al. 14 | 15 | 16 | ## LTH Codes: 17 | 1. MNIST dataset using 300-100-10 Dense Fully connected neural network winning ticket identification. 18 | 1. MNIST dataset using LeNet-5 Convolutional Neural Networks. 19 | 1. Validation of the winning ticket identified for MNIST and CIFAR-10 dataset using relevant neural networks. 20 | 1. Conv-2/4/6 Convolutional Neural Network (CNN) for CIFAR10 dataset; pruning network till 0.5% of original connections remain and observe training and testing accuracies and losses. 21 | 1. Pruning Algorithm implementation: numpy based unstructured, layer-wise, absolute magnitude pruning and _tensorflow_model_optimization_ toolkit based pruning (not the focus of most codes) 22 | 23 | 24 | 25 | 26 | ### Prerequisites for the code to run: 27 | - Python 3.X 28 | - numpy 1.17 and/or above 29 | - TensorFlow 2.0 30 | - PyTorch 2.X 31 | - [tensorflow_model_optimization](https://www.tensorflow.org/model_optimization/guide/pruning/pruning_with_keras) (not focused on) 32 | -------------------------------------------------------------------------------- /LeNet300_MNIST_Itraining_torch.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | import torch.nn as nn 6 | import numpy as np 7 | import pickle 8 | from tqdm import tqdm, trange 9 | from LeNet300_swish_torch import LeNet300, init_weights 10 | from get_mnist_data import mnist_dataset 11 | 12 | 13 | print(f"torch version: {torch.__version__}") 14 | 15 | # Check if there are multiple devices (i.e., GPU cards)- 16 | print(f"Number of GPU(s) available = {torch.cuda.device_count()}") 17 | 18 | if torch.cuda.is_available(): 19 | print(f"Current GPU: {torch.cuda.current_device()}") 20 | print(f"Current GPU name: {torch.cuda.get_device_name(torch.cuda.current_device())}") 21 | else: 22 | print("PyTorch does not have access to GPU") 23 | 24 | # Device configuration- 25 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 26 | print(f'Available device is {device}\n\n') 27 | 28 | 29 | path_files = "/home/amajumdar/Downloads/.data/" 30 | batch_size = 512 31 | 32 | train_dataset, test_dataset, train_loader, test_loader = mnist_dataset( 33 | path_to_files = path_files, batch_size = batch_size 34 | ) 35 | 36 | 37 | model = LeNet300(beta = 1.0) 38 | model.apply(init_weights) 39 | 40 | # Save randomly initialized parameters- 41 | torch.save(model.state_dict(), "LeNet300_randomwts.pth") 42 | 43 | 44 | def count_trainable_params(model): 45 | # Count number of layer-wise parameters and total parameters- 46 | tot_params = 0 47 | for param in model.parameters(): 48 | layer_param = torch.count_nonzero(param) 49 | tot_params += layer_param.item() 50 | 51 | return tot_params 52 | 53 | tot_params = count_trainable_params(model) 54 | 55 | 56 | class CosineScheduler: 57 | def __init__( 58 | self, max_update, 59 | base_lr = 0.01, final_lr = 0, 60 | warmup_steps = 0, warmup_begin_lr = 0 61 | ): 62 | self.base_lr_orig = base_lr 63 | self.max_update = max_update 64 | self.final_lr = final_lr 65 | self.warmup_steps = warmup_steps 66 | self.warmup_begin_lr = warmup_begin_lr 67 | self.max_steps = self.max_update - self.warmup_steps 68 | 69 | 70 | def get_warmup_lr(self, epoch): 71 | increase = (self.base_lr_orig - self.warmup_begin_lr) \ 72 | * float(epoch) / float(self.warmup_steps) 73 | return self.warmup_begin_lr + increase 74 | 75 | 76 | def __call__(self, epoch): 77 | if epoch < self.warmup_steps: 78 | return self.get_warmup_lr(epoch) 79 | if epoch <= self.max_update: 80 | self.base_lr = self.final_lr + ( 81 | self.base_lr_orig - self.final_lr) * (1 + np.cos( 82 | np.pi * (epoch - self.warmup_steps) / self.max_steps)) / 2 83 | return self.base_lr 84 | 85 | 86 | def train_one_epoch( 87 | model, train_loader, 88 | train_dataset, optimizer 89 | ): 90 | ''' 91 | Function to perform one epoch of training by using 'train_loader'. 92 | Returns loss and number of correct predictions for this epoch. 93 | ''' 94 | running_loss = 0.0 95 | running_corrects = 0.0 96 | 97 | model.train() 98 | 99 | with tqdm(train_loader, unit = 'batch') as tepoch: 100 | for images, labels in tepoch: 101 | tepoch.set_description(f"Training: ") 102 | images = images.reshape(-1, 28 * 28) 103 | images = images.to(device) 104 | labels = labels.to(device) 105 | 106 | # Get model predictions- 107 | preds = model(images) 108 | 109 | # Compute loss- 110 | # output layer applies log-softmax (row-wise), hence, use 111 | # NLL-loss instead of Cross-entropy cost function- 112 | # loss = torch.nn.functional.nll_loss(preds, labels) 113 | cost_fn = nn.CrossEntropyLoss() 114 | loss = cost_fn(preds, labels) 115 | 116 | # Empty accumulated gradients- 117 | optimizer.zero_grad() 118 | 119 | # Perform backprop- 120 | loss.backward() 121 | 122 | # Update parameters- 123 | optimizer.step() 124 | 125 | ''' 126 | # LR scheduler- 127 | global step 128 | optimizer.param_groups[0]['lr'] = custom_lr_scheduler.get_lr(step) 129 | step += 1 130 | ''' 131 | 132 | # Compute model's performance statistics- 133 | running_loss += loss.item() * images.size(0) 134 | _, predicted = torch.max(preds, 1) 135 | running_corrects += torch.sum(predicted == labels.data) 136 | 137 | tepoch.set_postfix( 138 | loss = running_loss / len(train_dataset), 139 | accuracy = (running_corrects.double().cpu().numpy() / len(train_dataset)) * 100 140 | ) 141 | 142 | train_loss = running_loss / len(train_dataset) 143 | train_acc = (running_corrects.double() / len(train_dataset)) * 100 144 | 145 | return train_loss, train_acc.cpu().numpy() 146 | 147 | 148 | def test_one_epoch(model, test_loader, test_dataset): 149 | total = 0.0 150 | correct = 0.0 151 | running_loss_test = 0.0 152 | 153 | with torch.no_grad(): 154 | with tqdm(test_loader, unit = 'batch') as tepoch: 155 | for images, labels in tepoch: 156 | tepoch.set_description(f"Testing: ") 157 | images = images.reshape(-1, 28 * 28) 158 | images = images.to(device) 159 | labels = labels.to(device) 160 | 161 | # Set model to evaluation mode- 162 | model.eval() 163 | 164 | # Predict using trained model- 165 | outputs = model(images) 166 | _, y_pred = torch.max(outputs, 1) 167 | 168 | # Compute validation loss- 169 | # J_test = torch.nn.functional.nll_loss(outputs, labels) 170 | cost_fn = nn.CrossEntropyLoss() 171 | 172 | J_test = loss = cost_fn(outputs, labels) 173 | 174 | running_loss_test += J_test.item() * labels.size(0) 175 | 176 | # Total number of labels- 177 | total += labels.size(0) 178 | 179 | # Total number of correct predictions- 180 | correct += (y_pred == labels).sum() 181 | 182 | tepoch.set_postfix( 183 | test_loss = running_loss_test / len(test_dataset), 184 | test_acc = 100 * (correct.cpu().numpy() / total) 185 | ) 186 | 187 | 188 | # return (running_loss_val, correct, total) 189 | test_loss = running_loss_test / len(test_dataset) 190 | test_acc = (correct / total) * 100 191 | 192 | return test_loss, test_acc.cpu().numpy() 193 | 194 | 195 | def train_until_convergence( 196 | model, 197 | train_dataset, test_dataset, 198 | train_loader, test_loader, 199 | num_epochs = 50, warmup_epochs = 10, 200 | best_test_acc = 90 201 | ): 202 | 203 | # Python3 dict to contain training metrics- 204 | train_history = {} 205 | 206 | # Initialize parameters saving 'best' models- 207 | # best_test_acc = 90 208 | # num_epochs = 50 209 | 210 | # Use SGD optimizer- 211 | optimizer = torch.optim.SGD( 212 | params = model.parameters(), lr = 0.0001, 213 | momentum = 0.9, weight_decay = 5e-4 214 | ) 215 | 216 | # Decay lr in cosine manner unitl 45th epoch- 217 | scheduler = CosineScheduler( 218 | max_update = 45, base_lr = 0.03, 219 | final_lr = 0.001, warmup_steps = warmup_epochs, 220 | warmup_begin_lr = 0.0001 221 | ) 222 | 223 | 224 | for epoch in range(1, num_epochs + 1): 225 | 226 | # Update LR scheduler- 227 | for param_group in optimizer.param_groups: 228 | param_group['lr'] = scheduler(epoch) 229 | 230 | # Train and validate model for 1 epoch- 231 | train_loss, train_acc = train_one_epoch( 232 | model = model, train_loader = train_loader, 233 | train_dataset = train_dataset, 234 | optimizer = optimizer 235 | ) 236 | 237 | test_loss, test_acc = test_one_epoch( 238 | model = model, test_loader = test_loader, 239 | test_dataset = test_dataset 240 | ) 241 | 242 | curr_lr = optimizer.param_groups[0]['lr'] 243 | 244 | print(f"\nepoch: {epoch + 1} train loss = {train_loss:.4f}, " 245 | f"train accuracy = {train_acc:.2f}%, test loss = {test_loss:.4f}" 246 | f", test accuracy = {test_acc:.2f}% " 247 | f"LR = {curr_lr:.4f}\n") 248 | 249 | train_history[epoch + 1] = { 250 | 'loss': train_loss, 'acc': train_acc, 251 | 'test_loss': test_loss, 'test_acc': test_acc, 252 | 'lr': curr_lr, 253 | } 254 | 255 | # Save best weights achieved until now- 256 | if (test_acc > best_test_acc): 257 | # update 'best_val_loss' variable to lowest loss encountered so far- 258 | best_test_acc = test_acc 259 | 260 | print(f"Saving model with highest test acc = {test_acc:.3f}%\n") 261 | 262 | # Save trained model with 'best' testing accuracy- 263 | torch.save(model.state_dict(), "LeNet300_best_testacc_model.pth") 264 | torch.save(optimizer.state_dict(), "LeNet300_best_optimizer.pth") 265 | 266 | return train_history 267 | 268 | 269 | train_history = train_until_convergence( 270 | model = model, 271 | train_dataset = train_dataset, test_dataset = test_dataset, 272 | train_loader = train_loader, test_loader = test_loader, 273 | num_epochs = 50, warmup_epochs = 10, 274 | best_test_acc = 90 275 | ) 276 | 277 | with open("LeNet300_train_history.pkl", "wb") as file: 278 | pickle.dump(train_history, file) 279 | del file 280 | 281 | 282 | -------------------------------------------------------------------------------- /Check_Winning_Ticket.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import tensorflow as tf\n", 10 | "import numpy as np\n", 11 | "import math\n", 12 | "import tensorflow_model_optimization as tfmot\n", 13 | "from tensorflow_model_optimization.sparsity import keras as sparsity\n", 14 | "# from tensorflow.keras import datasets, layers, models\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "from tensorflow.keras.layers import AveragePooling2D, Conv2D\n", 17 | "from tensorflow.keras import models, layers, datasets\n", 18 | "from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, InputLayer\n", 19 | "from tensorflow.keras.models import Sequential, Model\n", 20 | "from tensorflow.keras.initializers import RandomNormal\n", 21 | "# import math\n", 22 | "from sklearn.metrics import accuracy_score, precision_score, recall_score" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "'2.0.0'" 34 | ] 35 | }, 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [ 42 | "tf.__version__" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "batch_size = 32\n", 52 | "num_classes = 10\n", 53 | "num_epochs = 50" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 4, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "# Data preprocessing and cleadning:\n", 63 | "# input image dimensions\n", 64 | "img_rows, img_cols = 28, 28\n", 65 | "\n", 66 | "# Load MNIST dataset-\n", 67 | "(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 5, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "\n", 80 | "'input_shape' which will be used = (28, 28, 1)\n", 81 | "\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "if tf.keras.backend.image_data_format() == 'channels_first':\n", 87 | " X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)\n", 88 | " X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)\n", 89 | " input_shape = (1, img_rows, img_cols)\n", 90 | "else:\n", 91 | " X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)\n", 92 | " X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)\n", 93 | " input_shape = (img_rows, img_cols, 1)\n", 94 | "\n", 95 | "print(\"\\n'input_shape' which will be used = {0}\\n\".format(input_shape))" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 6, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# Convert datasets to floating point types-\n", 105 | "X_train = X_train.astype('float32')\n", 106 | "X_test = X_test.astype('float32')\n", 107 | "\n", 108 | "# Normalize the training and testing datasets-\n", 109 | "X_train /= 255.0\n", 110 | "X_test /= 255.0" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 7, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "# convert class vectors/target to binary class matrices or one-hot encoded values-\n", 120 | "y_train = tf.keras.utils.to_categorical(y_train, num_classes)\n", 121 | "y_test = tf.keras.utils.to_categorical(y_test, num_classes)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# Reshape training and testing sets-\n", 131 | "X_train = X_train.reshape(X_train.shape[0], 784)\n", 132 | "X_test = X_test.reshape(X_test.shape[0], 784)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 9, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "name": "stdout", 142 | "output_type": "stream", 143 | "text": [ 144 | "\n", 145 | "Dimensions of training and testing sets are:\n", 146 | "X_train.shape = (60000, 784), y_train = (60000, 10)\n", 147 | "X_test.shape = (10000, 784), y_test = (10000, 10)\n" 148 | ] 149 | } 150 | ], 151 | "source": [ 152 | "print(\"\\nDimensions of training and testing sets are:\")\n", 153 | "print(\"X_train.shape = {0}, y_train = {1}\".format(X_train.shape, y_train.shape))\n", 154 | "print(\"X_test.shape = {0}, y_test = {1}\".format(X_test.shape, y_test.shape))" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 10, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "l = tf.keras.layers" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 11, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "def nn_model():\n", 173 | " \"\"\"\n", 174 | " Function to create LeNet 300-100-10\n", 175 | " model for MNIST classification\n", 176 | " \"\"\"\n", 177 | "\n", 178 | " model = Sequential()\n", 179 | "\n", 180 | " model.add(l.InputLayer(input_shape=(784, )))\n", 181 | "\n", 182 | " model.add(Flatten())\n", 183 | "\n", 184 | " model.add(Dense(units = 300, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()))\n", 185 | "\n", 186 | " # model.add(l.Dropout(0.2))\n", 187 | "\n", 188 | " model.add(Dense(units = 100, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()))\n", 189 | "\n", 190 | " # model.add(l.Dropout(0.1))\n", 191 | "\n", 192 | " model.add(Dense(units = num_classes, activation='softmax'))\n", 193 | " \n", 194 | " return model\n" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 12, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "# Specify the parameters to be used for layer-wise pruning, NO PRUNING is done here:\n", 204 | "pruning_params_unpruned = {\n", 205 | " 'pruning_schedule': sparsity.ConstantSparsity(\n", 206 | " target_sparsity=0.0, begin_step=0,\n", 207 | " end_step = 0, frequency=100\n", 208 | " )\n", 209 | "}" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 13, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "def pruned_nn(pruning_params):\n", 219 | " \"\"\"\n", 220 | " Function to define the architecture of a neural network model\n", 221 | " following 300 100 architecture for MNIST dataset and using\n", 222 | " provided parameter which are used to prune the model.\n", 223 | " \n", 224 | " Input: 'pruning_params' Python 3 dictionary containing parameters which are used for pruning\n", 225 | " Output: Returns designed and compiled neural network model\n", 226 | " \"\"\"\n", 227 | " \n", 228 | " pruned_model = Sequential()\n", 229 | " pruned_model.add(l.InputLayer(input_shape=(784, )))\n", 230 | " pruned_model.add(Flatten())\n", 231 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 232 | " Dense(units = 300, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()),\n", 233 | " **pruning_params))\n", 234 | " # pruned_model.add(l.Dropout(0.2))\n", 235 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 236 | " Dense(units = 100, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()),\n", 237 | " **pruning_params))\n", 238 | " # pruned_model.add(l.Dropout(0.1))\n", 239 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 240 | " Dense(units = num_classes, activation='softmax'),\n", 241 | " **pruning_params))\n", 242 | " \n", 243 | " # Compile pruned CNN-\n", 244 | " pruned_model.compile(\n", 245 | " loss=tf.keras.losses.categorical_crossentropy,\n", 246 | " # optimizer='adam',\n", 247 | " optimizer=tf.keras.optimizers.Adam(lr = 0.001),\n", 248 | " metrics=['accuracy'])\n", 249 | " \n", 250 | " return pruned_model\n" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 14, 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "name": "stdout", 267 | "output_type": "stream", 268 | "text": [ 269 | "WARNING:tensorflow:From /opt/conda/lib/python3.7/site-packages/tensorflow_model_optimization/python/core/sparsity/keras/pruning_wrapper.py:183: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n", 270 | "Instructions for updating:\n", 271 | "Please use `layer.add_weight` method instead.\n" 272 | ] 273 | } 274 | ], 275 | "source": [ 276 | "# Initialize model-\n", 277 | "model = pruned_nn(pruning_params_unpruned)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 15, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "# Load winning ticket weights-\n", 287 | "model.load_weights(\"Winning_Ticket_Weights_Experimental.h5\")" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 16, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "# Strip the pruning wrappers from pruned model-\n", 297 | "model_stripped = sparsity.strip_pruning(model)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 17, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "name": "stdout", 307 | "output_type": "stream", 308 | "text": [ 309 | "\n", 310 | "In Winning Ticket, number of nonzero parameters in each layer are: \n", 311 | "\n", 312 | "49627\n", 313 | "0\n", 314 | "6330\n", 315 | "0\n", 316 | "211\n", 317 | "0\n", 318 | "\n", 319 | "Total number of trainable parameters = 56168\n", 320 | "\n" 321 | ] 322 | } 323 | ], 324 | "source": [ 325 | "print(\"\\nIn Winning Ticket, number of nonzero parameters in each layer are: \\n\")\n", 326 | "\n", 327 | "model_sum_params = 0\n", 328 | "\n", 329 | "for layer in model_stripped.trainable_weights:\n", 330 | " print(tf.math.count_nonzero(layer, axis = None).numpy())\n", 331 | " model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n", 332 | "\n", 333 | "print(\"\\nTotal number of trainable parameters = {0}\\n\".format(model_sum_params))" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 20, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "# Instantiate a new neural network model for which, the mask is to be created,\n", 357 | "# according to the paper-\n", 358 | "mask_model = pruned_nn(pruning_params_unpruned)" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 21, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "# Load weights of GradientTape trained and PRUNED model-\n", 368 | "# mask_model.load_weights(\"Pruned_Weights.h5\")\n", 369 | "mask_model.load_weights(\"Winning_Ticket_Weights_Experimental.h5\")" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 22, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "# Strip the model of its pruning parameters-\n", 379 | "mask_model_stripped = sparsity.strip_pruning(mask_model)" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 23, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "# For each layer, for each weight which is 0, leave it, as is.\n", 389 | "# And for weights which survive the pruning,reinitialize it to ONE (1)-\n", 390 | "\n", 391 | "for wts in mask_model_stripped.trainable_weights:\n", 392 | " wts.assign(tf.where(tf.equal(wts, 0.), 0., 1.))" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": {}, 406 | "outputs": [], 407 | "source": [] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": {}, 412 | "source": [ 413 | "### Prepare dataset for _GradientTape_:" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 24, 419 | "metadata": {}, 420 | "outputs": [], 421 | "source": [ 422 | "# Create training and testing datasets-\n", 423 | "train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))\n", 424 | "test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 25, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "train_dataset = train_dataset.shuffle(buffer_size = 20000, reshuffle_each_iteration = True).batch(batch_size = batch_size, drop_remainder = False)" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 26, 439 | "metadata": {}, 440 | "outputs": [], 441 | "source": [ 442 | "test_dataset = test_dataset.batch(batch_size=batch_size, drop_remainder=False)" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 27, 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [ 451 | "# Choose an optimizer and loss function for training-\n", 452 | "loss_fn = tf.keras.losses.CategoricalCrossentropy()\n", 453 | "optimizer = tf.keras.optimizers.Adam(lr = 0.001)" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 28, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [ 462 | "# Select metrics to measure the error & accuracy of model.\n", 463 | "# These metrics accumulate the values over epochs and then\n", 464 | "# print the overall result-\n", 465 | "train_loss = tf.keras.metrics.Mean(name = 'train_loss')\n", 466 | "train_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'train_accuracy')\n", 467 | "\n", 468 | "test_loss = tf.keras.metrics.Mean(name = 'test_loss')\n", 469 | "test_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'test_accuracy')" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 29, 475 | "metadata": {}, 476 | "outputs": [], 477 | "source": [ 478 | "@tf.function\n", 479 | "def train_one_step(model, mask_model, optimizer, x, y):\n", 480 | " '''\n", 481 | " def train_step(data, labels):\n", 482 | " Function to compute one step of gradient descent optimization\n", 483 | " '''\n", 484 | " with tf.GradientTape() as tape:\n", 485 | " # Make predictions using defined model-\n", 486 | " y_pred = model(x)\n", 487 | "\n", 488 | " # Compute loss-\n", 489 | " loss = loss_fn(y, y_pred)\n", 490 | " \n", 491 | " # Compute gradients wrt defined loss and weights and biases-\n", 492 | " grads = tape.gradient(loss, model.trainable_variables)\n", 493 | " \n", 494 | " # type(grads)\n", 495 | " # list\n", 496 | " \n", 497 | " # List to hold element-wise multiplication between-\n", 498 | " # computed gradient and masks-\n", 499 | " grad_mask_mul = []\n", 500 | " \n", 501 | " # Perform element-wise multiplication between computed gradients and masks-\n", 502 | " for grad_layer, mask in zip(grads, mask_model.trainable_weights):\n", 503 | " grad_mask_mul.append(tf.math.multiply(grad_layer, mask))\n", 504 | " \n", 505 | " # Apply computed gradients to model's weights and biases-\n", 506 | " optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))\n", 507 | "\n", 508 | " # Compute accuracy-\n", 509 | " train_loss(loss)\n", 510 | " train_accuracy(y, y_pred)\n", 511 | "\n", 512 | " return None" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": 30, 518 | "metadata": {}, 519 | "outputs": [], 520 | "source": [ 521 | "@tf.function\n", 522 | "def test_step(model, optimizer, data, labels):\n", 523 | " \"\"\"\n", 524 | " Function to test model performance\n", 525 | " on testing dataset\n", 526 | " \"\"\"\n", 527 | " \n", 528 | " predictions = model(data)\n", 529 | " t_loss = loss_fn(labels, predictions)\n", 530 | "\n", 531 | " test_loss(t_loss)\n", 532 | " test_accuracy(labels, predictions)\n", 533 | "\n", 534 | " return None\n" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": null, 540 | "metadata": {}, 541 | "outputs": [], 542 | "source": [] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 31, 547 | "metadata": {}, 548 | "outputs": [], 549 | "source": [ 550 | "# Dictionary to hold scalar metrics-\n", 551 | "history = {}\n", 552 | "\n", 553 | "history['accuracy'] = np.zeros(num_epochs)\n", 554 | "history['val_accuracy'] = np.zeros(num_epochs)\n", 555 | "history['loss'] = np.zeros(num_epochs)\n", 556 | "history['val_loss'] = np.zeros(num_epochs)" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 32, 562 | "metadata": {}, 563 | "outputs": [], 564 | "source": [ 565 | "# User input-\n", 566 | "minimum_delta = 0.001\n", 567 | "patience = 3" 568 | ] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": 33, 573 | "metadata": {}, 574 | "outputs": [], 575 | "source": [ 576 | "best_val_loss = 1\n", 577 | "loc_patience = 0" 578 | ] 579 | }, 580 | { 581 | "cell_type": "code", 582 | "execution_count": 34, 583 | "metadata": {}, 584 | "outputs": [ 585 | { 586 | "name": "stdout", 587 | "output_type": "stream", 588 | "text": [ 589 | "Epoch 1, Loss: 0.1759, Accuracy: 98.9948, Test Loss: 0.0746, Test Accuracy: 99.534950\n", 590 | "Total number of trainable parameters = 56168\n", 591 | "\n", 592 | "Epoch 2, Loss: 0.0457, Accuracy: 99.7279, Test Loss: 0.0566, Test Accuracy: 99.618980\n", 593 | "Total number of trainable parameters = 56168\n", 594 | "\n", 595 | "Epoch 3, Loss: 0.0246, Accuracy: 99.8562, Test Loss: 0.0575, Test Accuracy: 99.628975\n", 596 | "Total number of trainable parameters = 56168\n", 597 | "\n", 598 | "Epoch 4, Loss: 0.0148, Accuracy: 99.9216, Test Loss: 0.0589, Test Accuracy: 99.656952\n", 599 | "Total number of trainable parameters = 56168\n", 600 | "\n", 601 | "Epoch 5, Loss: 0.0088, Accuracy: 99.9549, Test Loss: 0.0689, Test Accuracy: 99.633965\n", 602 | "Total number of trainable parameters = 56168\n", 603 | "\n", 604 | "\n", 605 | "'EarlyStopping' called!\n", 606 | "\n" 607 | ] 608 | } 609 | ], 610 | "source": [ 611 | "for epoch in range(num_epochs):\n", 612 | " \n", 613 | " if loc_patience >= patience:\n", 614 | " print(\"\\n'EarlyStopping' called!\\n\")\n", 615 | " break\n", 616 | " \n", 617 | " # Reset the metrics at the start of the next epoch\n", 618 | " train_loss.reset_states()\n", 619 | " train_accuracy.reset_states()\n", 620 | " test_loss.reset_states()\n", 621 | " test_accuracy.reset_states()\n", 622 | " \n", 623 | " for x, y in train_dataset:\n", 624 | " # train_step(x, y)\n", 625 | " train_one_step(model_stripped, mask_model_stripped, optimizer, x, y)\n", 626 | "\n", 627 | " for x_t, y_t in test_dataset:\n", 628 | " # test_step(x_t, y_t)\n", 629 | " test_step(model_stripped, optimizer, x_t, y_t)\n", 630 | "\n", 631 | " template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'\n", 632 | " \n", 633 | " history['accuracy'][epoch] = train_accuracy.result()\n", 634 | " history['loss'][epoch] = train_loss.result()\n", 635 | " history['val_loss'][epoch] = test_loss.result()\n", 636 | " history['val_accuracy'][epoch] = test_accuracy.result()\n", 637 | "\n", 638 | " print(template.format(epoch + 1, \n", 639 | " train_loss.result(), train_accuracy.result()*100,\n", 640 | " test_loss.result(), test_accuracy.result()*100))\n", 641 | " \n", 642 | " # Count number of non-zero parameters in each layer and in total-\n", 643 | " # print(\"layer-wise manner model, number of nonzero parameters in each layer are: \\n\")\n", 644 | "\n", 645 | " model_sum_params = 0\n", 646 | " \n", 647 | " for layer in model_stripped.trainable_weights:\n", 648 | " # print(tf.math.count_nonzero(layer, axis = None).numpy())\n", 649 | " model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n", 650 | " \n", 651 | " print(\"Total number of trainable parameters = {0}\\n\".format(model_sum_params))\n", 652 | "\n", 653 | " \n", 654 | " # Code for manual Early Stopping:\n", 655 | " if np.abs(test_loss.result() < best_val_loss) >= minimum_delta:\n", 656 | " # update 'best_val_loss' variable to lowest loss encountered so far-\n", 657 | " best_val_loss = test_loss.result()\n", 658 | " \n", 659 | " # reset 'loc_patience' variable-\n", 660 | " loc_patience = 0\n", 661 | " \n", 662 | " else: # there is no improvement in monitored metric 'val_loss'\n", 663 | " loc_patience += 1 # number of epochs without any improvement\n", 664 | " " 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": 35, 670 | "metadata": {}, 671 | "outputs": [], 672 | "source": [ 673 | "# Resize numpy arrays according to the epoch when 'EarlyStopping' was called-\n", 674 | "for metrics in history.keys():\n", 675 | " history[metrics] = np.resize(history[metrics], new_shape=epoch)" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": 36, 681 | "metadata": {}, 682 | "outputs": [ 683 | { 684 | "data": { 685 | "text/plain": [ 686 | "[0.06900685519112594, 0.9815]" 687 | ] 688 | }, 689 | "execution_count": 36, 690 | "metadata": {}, 691 | "output_type": "execute_result" 692 | } 693 | ], 694 | "source": [ 695 | "model.evaluate(X_test, y_test, verbose=0)" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 37, 701 | "metadata": {}, 702 | "outputs": [], 703 | "source": [ 704 | "y_pred = model.predict_classes(X_test)" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 41, 710 | "metadata": {}, 711 | "outputs": [ 712 | { 713 | "name": "stdout", 714 | "output_type": "stream", 715 | "text": [ 716 | "\n", 717 | "Accuracy of Winning Ticket (5-round) = 0.9815\n", 718 | "\n" 719 | ] 720 | } 721 | ], 722 | "source": [ 723 | "accuracy = accuracy_score(np.argmax(y_test, axis = 1), y_pred)\n", 724 | "\n", 725 | "print(\"\\nAccuracy of Winning Ticket (5-round) = {0:.4f}\\n\".format(accuracy))" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": null, 731 | "metadata": {}, 732 | "outputs": [], 733 | "source": [] 734 | } 735 | ], 736 | "metadata": { 737 | "kernelspec": { 738 | "display_name": "Python 3", 739 | "language": "python", 740 | "name": "python3" 741 | }, 742 | "language_info": { 743 | "codemirror_mode": { 744 | "name": "ipython", 745 | "version": 3 746 | }, 747 | "file_extension": ".py", 748 | "mimetype": "text/x-python", 749 | "name": "python", 750 | "nbconvert_exporter": "python", 751 | "pygments_lexer": "ipython3", 752 | "version": "3.7.6" 753 | } 754 | }, 755 | "nbformat": 4, 756 | "nbformat_minor": 4 757 | } 758 | -------------------------------------------------------------------------------- /Conv_4_LTH_CIFAR_10_winning_ticket_verification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# The Lottery Ticket Hypothesis - Conv-4 CNN for CIFAR-10 _winning ticket_ verification:\n", 8 | "\n", 9 | "Conv-4 Convolutional Neural Network the following architecture:\n", 10 | "\n", 11 | "1. __Convolutional Layers:__ 64, 64, pool\n", 12 | "1. __Convolutional Layers:__ 128, 128, pool\n", 13 | "1. __Dense Layers:__ 256, 256, 10\n", 14 | "\n", 15 | "Filter/Kernel size for convolutional layers is 3 x 3, with padding and stride of 1.\n", 16 | "\n", 17 | "Filter and Stride for max-pooling layers is 2 x 2.\n", 18 | "\n", 19 | "This CNN is used to verify the veracity of the sub-network (or, winning ticket) found using the iterative pruning rounds from _The Lottery Ticket Hypothesis_ paper." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stderr", 29 | "output_type": "stream", 30 | "text": [ 31 | "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", 32 | " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n", 33 | "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", 34 | " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n", 35 | "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", 36 | " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n", 37 | "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", 38 | " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n", 39 | "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", 40 | " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n", 41 | "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", 42 | " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "import tensorflow as tf\n", 48 | "import numpy as np\n", 49 | "import matplotlib.pyplot as plt\n", 50 | "import math\n", 51 | "import tensorflow_model_optimization as tfmot\n", 52 | "from tensorflow_model_optimization.sparsity import keras as sparsity\n", 53 | "# from tensorflow.keras import datasets, layers, models\n", 54 | "import matplotlib.pyplot as plt\n", 55 | "from tensorflow.keras.layers import AveragePooling2D, Conv2D, MaxPooling2D, ReLU\n", 56 | "from tensorflow.keras import models, layers, datasets\n", 57 | "from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, InputLayer\n", 58 | "from tensorflow.keras.models import Sequential, Model\n", 59 | "from tensorflow.keras.initializers import RandomNormal\n", 60 | "# import math\n", 61 | "from sklearn.metrics import accuracy_score, precision_score, recall_score" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 2, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "'2.0.0'" 80 | ] 81 | }, 82 | "execution_count": 2, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "tf.__version__" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 3, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "env: CUDA_DEVICE_ORDER=PCI_BUS_ID\n", 101 | "env: CUDA_VISIBLE_DEVICES=1\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "%env CUDA_DEVICE_ORDER=PCI_BUS_ID\n", 107 | "%env CUDA_VISIBLE_DEVICES=1" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 4, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "batch_size = 60\n", 124 | "num_classes = 10\n", 125 | "num_epochs = 100" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 5, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "# Data preprocessing and cleaning:\n", 135 | "# input image dimensions\n", 136 | "img_rows, img_cols = 32, 32\n", 137 | "\n", 138 | "# Load CIFAR-10 dataset-\n", 139 | "(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 6, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "\n", 152 | "'input_shape' which will be used = (32, 32, 3)\n", 153 | "\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "if tf.keras.backend.image_data_format() == 'channels_first':\n", 159 | " X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols)\n", 160 | " X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols)\n", 161 | " input_shape = (3, img_rows, img_cols)\n", 162 | "else:\n", 163 | " X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)\n", 164 | " X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)\n", 165 | " input_shape = (img_rows, img_cols, 3)\n", 166 | "\n", 167 | "print(\"\\n'input_shape' which will be used = {0}\\n\".format(input_shape))" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 7, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "# Convert datasets to floating point types-\n", 177 | "X_train = X_train.astype('float32')\n", 178 | "X_test = X_test.astype('float32')\n", 179 | "\n", 180 | "# Normalize the training and testing datasets-\n", 181 | "X_train /= 255.0\n", 182 | "X_test /= 255.0" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 8, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "# convert class vectors/target to binary class matrices or one-hot encoded values-\n", 192 | "y_train = tf.keras.utils.to_categorical(y_train, num_classes)\n", 193 | "y_test = tf.keras.utils.to_categorical(y_test, num_classes)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 9, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "name": "stdout", 203 | "output_type": "stream", 204 | "text": [ 205 | "\n", 206 | "Dimensions of training and testing sets are:\n", 207 | "X_train.shape = (50000, 32, 32, 3), y_train.shape = (50000, 10)\n", 208 | "X_test.shape = (10000, 32, 32, 3), y_test.shape = (10000, 10)\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "print(\"\\nDimensions of training and testing sets are:\")\n", 214 | "print(\"X_train.shape = {0}, y_train.shape = {1}\".format(X_train.shape, y_train.shape))\n", 215 | "print(\"X_test.shape = {0}, y_test.shape = {1}\".format(X_test.shape, y_test.shape))" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "### Prepare CIFAR10 dataset for _GradientTape_ training:" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 10, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "# Create training and testing datasets-\n", 246 | "train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))\n", 247 | "test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 11, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "train_dataset = train_dataset.shuffle(buffer_size = 20000, reshuffle_each_iteration = True).batch(batch_size = batch_size, drop_remainder = False)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 12, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "test_dataset = test_dataset.batch(batch_size=batch_size, drop_remainder=False)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 13, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "# Choose an optimizer and loss function for training-\n", 275 | "loss_fn = tf.keras.losses.CategoricalCrossentropy()\n", 276 | "optimizer = tf.keras.optimizers.Adam(lr = 0.0003)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 14, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "# Select metrics to measure the error & accuracy of model.\n", 286 | "# These metrics accumulate the values over epochs and then\n", 287 | "# print the overall result-\n", 288 | "train_loss = tf.keras.metrics.Mean(name = 'train_loss')\n", 289 | "train_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'train_accuracy')\n", 290 | "\n", 291 | "test_loss = tf.keras.metrics.Mean(name = 'test_loss')\n", 292 | "test_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'test_accuracy')" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 15, 305 | "metadata": {}, 306 | "outputs": [ 307 | { 308 | "name": "stdout", 309 | "output_type": "stream", 310 | "text": [ 311 | "'end_step parameter' for this dataset = 83400\n" 312 | ] 313 | } 314 | ], 315 | "source": [ 316 | "# The model is first trained without any pruning for 'num_epochs' epochs-\n", 317 | "epochs = num_epochs\n", 318 | "\n", 319 | "num_train_samples = X_train.shape[0]\n", 320 | "\n", 321 | "end_step = np.ceil(1.0 * num_train_samples / batch_size).astype(np.int32) * epochs\n", 322 | "\n", 323 | "print(\"'end_step parameter' for this dataset = {0}\".format(end_step))" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 16, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "# Specify the parameters to be used for layer-wise pruning, NO PRUNING is done here:\n", 333 | "pruning_params_unpruned = {\n", 334 | " 'pruning_schedule': sparsity.ConstantSparsity(\n", 335 | " target_sparsity=0.0, begin_step=0,\n", 336 | " end_step = end_step, frequency=100\n", 337 | " )\n", 338 | "}" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 17, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "l = tf.keras.layers" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": null, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 18, 367 | "metadata": {}, 368 | "outputs": [], 369 | "source": [ 370 | "def pruned_nn(pruning_params_conv, pruning_params_fc, pruning_params_op):\n", 371 | " \"\"\"\n", 372 | " Function to define the architecture of a neural network model\n", 373 | " following Conv-2 architecture for CIFAR-10 dataset and using\n", 374 | " provided parameter which are used to prune the model.\n", 375 | " \n", 376 | " Conv-4 architecture-\n", 377 | " 64, 64, pool -- convolutions\n", 378 | " 128, 128, pool -- convolutions\n", 379 | " 256, 256, 10 -- fully connected layers\n", 380 | " \n", 381 | " Input: 'pruning_params' Python 3 dictionary containing parameters which are used for pruning\n", 382 | " Output: Returns designed and compiled neural network model\n", 383 | " \"\"\"\n", 384 | " \n", 385 | " pruned_model = Sequential()\n", 386 | " \n", 387 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 388 | " Conv2D(\n", 389 | " filters = 64, kernel_size = (3, 3),\n", 390 | " activation='relu', kernel_initializer = tf.initializers.GlorotUniform(),\n", 391 | " strides = (1, 1), padding = 'same',\n", 392 | " input_shape=(32, 32, 3)\n", 393 | " ),\n", 394 | " **pruning_params_conv)\n", 395 | " )\n", 396 | " \n", 397 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 398 | " Conv2D(\n", 399 | " filters = 64, kernel_size = (3, 3),\n", 400 | " activation='relu', kernel_initializer = tf.initializers.GlorotUniform(),\n", 401 | " strides = (1, 1), padding = 'same'\n", 402 | " ),\n", 403 | " **pruning_params_conv)\n", 404 | " )\n", 405 | " \n", 406 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 407 | " MaxPooling2D(\n", 408 | " pool_size = (2, 2),\n", 409 | " strides = (2, 2)\n", 410 | " ),\n", 411 | " **pruning_params_conv)\n", 412 | " )\n", 413 | " \n", 414 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 415 | " Conv2D(\n", 416 | " filters = 128, kernel_size = (3, 3),\n", 417 | " activation='relu', kernel_initializer = tf.initializers.GlorotUniform(),\n", 418 | " strides = (1, 1), padding = 'same'\n", 419 | " ),\n", 420 | " **pruning_params_conv)\n", 421 | " )\n", 422 | "\n", 423 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 424 | " Conv2D(\n", 425 | " filters = 128, kernel_size = (3, 3),\n", 426 | " activation='relu', kernel_initializer = tf.initializers.GlorotUniform(),\n", 427 | " strides = (1, 1), padding = 'same'\n", 428 | " ),\n", 429 | " **pruning_params_conv)\n", 430 | " )\n", 431 | "\n", 432 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 433 | " MaxPooling2D(\n", 434 | " pool_size = (2, 2),\n", 435 | " strides = (2, 2)\n", 436 | " ),\n", 437 | " **pruning_params_conv)\n", 438 | " )\n", 439 | "\n", 440 | " \n", 441 | " pruned_model.add(Flatten())\n", 442 | " \n", 443 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 444 | " Dense(\n", 445 | " units = 256, activation='relu',\n", 446 | " kernel_initializer = tf.initializers.GlorotUniform()\n", 447 | " ),\n", 448 | " **pruning_params_fc)\n", 449 | " )\n", 450 | " \n", 451 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 452 | " Dense(\n", 453 | " units = 256, activation='relu',\n", 454 | " kernel_initializer = tf.initializers.GlorotUniform()\n", 455 | " ),\n", 456 | " **pruning_params_fc)\n", 457 | " )\n", 458 | " \n", 459 | " pruned_model.add(sparsity.prune_low_magnitude(\n", 460 | " Dense(\n", 461 | " units = 10, activation='softmax'\n", 462 | " ),\n", 463 | " **pruning_params_op)\n", 464 | " )\n", 465 | " \n", 466 | "\n", 467 | " # Compile pruned CNN-\n", 468 | " pruned_model.compile(\n", 469 | " loss=tf.keras.losses.categorical_crossentropy,\n", 470 | " # optimizer='adam',\n", 471 | " optimizer=tf.keras.optimizers.Adam(lr = 0.0003),\n", 472 | " metrics=['accuracy']\n", 473 | " )\n", 474 | " \n", 475 | " \n", 476 | " return pruned_model\n" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 19, 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "name": "stdout", 493 | "output_type": "stream", 494 | "text": [ 495 | "WARNING:tensorflow:From /opt/conda/lib/python3.7/site-packages/tensorflow_model_optimization/python/core/sparsity/keras/pruning_wrapper.py:183: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n", 496 | "Instructions for updating:\n", 497 | "Please use `layer.add_weight` method instead.\n" 498 | ] 499 | } 500 | ], 501 | "source": [ 502 | "# Initialize a CNN model-\n", 503 | "orig_model = pruned_nn(pruning_params_unpruned, pruning_params_unpruned, pruning_params_unpruned)" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 19, 509 | "metadata": {}, 510 | "outputs": [ 511 | { 512 | "data": { 513 | "text/plain": [ 514 | "'\\nimport os\\n\\n# Change to where the winning ticket is saved-\\nos.chdir(\"Run_2/Conv_4_CIFAR10/\")\\n'" 515 | ] 516 | }, 517 | "execution_count": 19, 518 | "metadata": {}, 519 | "output_type": "execute_result" 520 | } 521 | ], 522 | "source": [ 523 | "'''\n", 524 | "import os\n", 525 | "\n", 526 | "# Change to where the winning ticket is saved-\n", 527 | "os.chdir(\"Run_2/Conv_4_CIFAR10/\")\n", 528 | "'''" 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": null, 534 | "metadata": {}, 535 | "outputs": [], 536 | "source": [] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": 20, 541 | "metadata": {}, 542 | "outputs": [], 543 | "source": [ 544 | "# Load weights from before-\n", 545 | "orig_model.load_weights(\"Conv_4_CIFAR10_Winning_Ticket_Distribution_94.6035541009015.h5\")" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 21, 551 | "metadata": {}, 552 | "outputs": [], 553 | "source": [ 554 | "# Strip model of it's pruning parameters-\n", 555 | "orig_model_stripped = sparsity.strip_pruning(orig_model)" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": 22, 561 | "metadata": {}, 562 | "outputs": [ 563 | { 564 | "name": "stdout", 565 | "output_type": "stream", 566 | "text": [ 567 | "Model: \"sequential\"\n", 568 | "_________________________________________________________________\n", 569 | "Layer (type) Output Shape Param # \n", 570 | "=================================================================\n", 571 | "conv2d (Conv2D) (None, 32, 32, 64) 1792 \n", 572 | "_________________________________________________________________\n", 573 | "conv2d_1 (Conv2D) (None, 32, 32, 64) 36928 \n", 574 | "_________________________________________________________________\n", 575 | "max_pooling2d (MaxPooling2D) (None, 16, 16, 64) 0 \n", 576 | "_________________________________________________________________\n", 577 | "conv2d_2 (Conv2D) (None, 16, 16, 128) 73856 \n", 578 | "_________________________________________________________________\n", 579 | "conv2d_3 (Conv2D) (None, 16, 16, 128) 147584 \n", 580 | "_________________________________________________________________\n", 581 | "max_pooling2d_1 (MaxPooling2 (None, 8, 8, 128) 0 \n", 582 | "_________________________________________________________________\n", 583 | "flatten (Flatten) (None, 8192) 0 \n", 584 | "_________________________________________________________________\n", 585 | "dense (Dense) (None, 256) 2097408 \n", 586 | "_________________________________________________________________\n", 587 | "dense_1 (Dense) (None, 256) 65792 \n", 588 | "_________________________________________________________________\n", 589 | "dense_2 (Dense) (None, 10) 2570 \n", 590 | "=================================================================\n", 591 | "Total params: 2,425,930\n", 592 | "Trainable params: 2,425,930\n", 593 | "Non-trainable params: 0\n", 594 | "_________________________________________________________________\n" 595 | ] 596 | } 597 | ], 598 | "source": [ 599 | "# Get stripped defined model summary-\n", 600 | "orig_model_stripped.summary()" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": null, 606 | "metadata": {}, 607 | "outputs": [], 608 | "source": [] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": null, 613 | "metadata": {}, 614 | "outputs": [], 615 | "source": [] 616 | }, 617 | { 618 | "cell_type": "markdown", 619 | "metadata": {}, 620 | "source": [ 621 | "### Create mask using winning ticket:" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 23, 627 | "metadata": {}, 628 | "outputs": [], 629 | "source": [ 630 | "# Instantiate a new neural network model for which, the mask is to be created,\n", 631 | "# according to the paper-\n", 632 | "mask_model = pruned_nn(pruning_params_unpruned, pruning_params_unpruned, pruning_params_unpruned)" 633 | ] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "execution_count": 24, 638 | "metadata": {}, 639 | "outputs": [], 640 | "source": [ 641 | "# Load weights of PRUNED model-\n", 642 | "# mask_model.set_weights(orig_model.get_weights())\n", 643 | "mask_model.load_weights(\"Conv_4_CIFAR10_Winning_Ticket_Distribution_94.6035541009015.h5\")" 644 | ] 645 | }, 646 | { 647 | "cell_type": "code", 648 | "execution_count": 25, 649 | "metadata": {}, 650 | "outputs": [], 651 | "source": [ 652 | "# Strip the model of its pruning parameters-\n", 653 | "mask_model_stripped = sparsity.strip_pruning(mask_model)" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 26, 659 | "metadata": {}, 660 | "outputs": [], 661 | "source": [ 662 | "# For each layer, for each weight which is 0, leave it, as is.\n", 663 | "# And for weights which survive the pruning,reinitialize it to ONE (1)-\n", 664 | "for wts in mask_model_stripped.trainable_weights:\n", 665 | " wts.assign(tf.where(tf.equal(wts, 0.), 0., 1.))\n" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": 28, 671 | "metadata": {}, 672 | "outputs": [ 673 | { 674 | "name": "stdout", 675 | "output_type": "stream", 676 | "text": [ 677 | "\n", 678 | "Number of mask parameters = 130097\n", 679 | "\n" 680 | ] 681 | } 682 | ], 683 | "source": [ 684 | "# Count number of mask parameters-\n", 685 | "mask_sum_params = 0\n", 686 | "\n", 687 | "for layer in mask_model_stripped.trainable_weights:\n", 688 | " mask_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n", 689 | "\n", 690 | "print(\"\\nNumber of mask parameters = {0}\\n\".format(mask_sum_params))" 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": null, 696 | "metadata": {}, 697 | "outputs": [], 698 | "source": [] 699 | }, 700 | { 701 | "cell_type": "code", 702 | "execution_count": 29, 703 | "metadata": {}, 704 | "outputs": [ 705 | { 706 | "name": "stdout", 707 | "output_type": "stream", 708 | "text": [ 709 | "\n", 710 | "Number of training weights = 2425930 and non-trainabel weights = 2425040.0\n", 711 | "\n", 712 | "Total number of parameters = 4850970.0\n", 713 | "\n" 714 | ] 715 | } 716 | ], 717 | "source": [ 718 | "# Count number of trainable and non-trainable parameters-\n", 719 | "\n", 720 | "import tensorflow.keras.backend as K\n", 721 | "\n", 722 | "\n", 723 | "trainable_wts = np.sum([K.count_params(w) for w in orig_model.trainable_weights])\n", 724 | "non_trainable_wts = np.sum([K.count_params(w) for w in orig_model.non_trainable_weights])\n", 725 | "\n", 726 | "print(\"\\nNumber of training weights = {0} and non-trainabel weights = {1}\\n\".format(\n", 727 | " trainable_wts, non_trainable_wts\n", 728 | "))\n", 729 | "print(\"Total number of parameters = {0}\\n\".format(trainable_wts + non_trainable_wts))\n" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": null, 735 | "metadata": {}, 736 | "outputs": [], 737 | "source": [] 738 | }, 739 | { 740 | "cell_type": "code", 741 | "execution_count": 30, 742 | "metadata": {}, 743 | "outputs": [], 744 | "source": [ 745 | "# Count number of non-zero parameters in winning ticket-1-\n", 746 | "pruned_sum_params = 0\n", 747 | " \n", 748 | "for layer in orig_model_stripped.trainable_weights:\n", 749 | " # print(tf.math.count_nonzero(layer, axis = None).numpy())\n", 750 | " pruned_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n" 751 | ] 752 | }, 753 | { 754 | "cell_type": "code", 755 | "execution_count": 31, 756 | "metadata": {}, 757 | "outputs": [ 758 | { 759 | "name": "stdout", 760 | "output_type": "stream", 761 | "text": [ 762 | "\n", 763 | "Number of non-zero parameters in Conv-4 winning ticket (CIFAR-10) = 130097 with 94.6372% of weights pruned\n", 764 | "\n" 765 | ] 766 | } 767 | ], 768 | "source": [ 769 | "print(\"\\nNumber of non-zero parameters in Conv-4 winning ticket (CIFAR-10) = {0} with {1:.4f}% of weights pruned\\n\".format(pruned_sum_params, 100 - (pruned_sum_params / trainable_wts) * 100))" 770 | ] 771 | }, 772 | { 773 | "cell_type": "code", 774 | "execution_count": null, 775 | "metadata": {}, 776 | "outputs": [], 777 | "source": [] 778 | }, 779 | { 780 | "cell_type": "code", 781 | "execution_count": 32, 782 | "metadata": {}, 783 | "outputs": [], 784 | "source": [ 785 | "@tf.function\n", 786 | "def train_one_step(model, mask_model, optimizer, x, y):\n", 787 | " '''\n", 788 | " Function to compute one step of gradient descent optimization\n", 789 | " '''\n", 790 | " with tf.GradientTape() as tape:\n", 791 | " # Make predictions using defined model-\n", 792 | " y_pred = model(x)\n", 793 | "\n", 794 | " # Compute loss-\n", 795 | " loss = loss_fn(y, y_pred)\n", 796 | " \n", 797 | " # Compute gradients wrt defined loss and weights and biases-\n", 798 | " grads = tape.gradient(loss, model.trainable_variables)\n", 799 | " \n", 800 | " # type(grads)\n", 801 | " # list\n", 802 | "\n", 803 | " # List to hold element-wise multiplication between-\n", 804 | " # computed gradient and masks-\n", 805 | " grad_mask_mul = []\n", 806 | " \n", 807 | " # Perform element-wise multiplication between computed gradients and masks-\n", 808 | " for grad_layer, mask in zip(grads, mask_model.trainable_weights):\n", 809 | " grad_mask_mul.append(tf.math.multiply(grad_layer, mask))\n", 810 | " \n", 811 | " # Apply computed gradients to model's weights and biases-\n", 812 | " optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))\n", 813 | "\n", 814 | " # Compute accuracy-\n", 815 | " train_loss(loss)\n", 816 | " train_accuracy(y, y_pred)\n", 817 | "\n", 818 | " return None\n", 819 | " \n", 820 | " \n", 821 | "@tf.function\n", 822 | "def test_step(model, optimizer, data, labels):\n", 823 | " \"\"\"\n", 824 | " Function to test model performance\n", 825 | " on testing dataset\n", 826 | " \"\"\"\n", 827 | " \n", 828 | " predictions = model(data)\n", 829 | " t_loss = loss_fn(labels, predictions)\n", 830 | "\n", 831 | " test_loss(t_loss)\n", 832 | " test_accuracy(labels, predictions)\n", 833 | "\n", 834 | " return None\n" 835 | ] 836 | }, 837 | { 838 | "cell_type": "code", 839 | "execution_count": null, 840 | "metadata": {}, 841 | "outputs": [], 842 | "source": [] 843 | }, 844 | { 845 | "cell_type": "code", 846 | "execution_count": null, 847 | "metadata": {}, 848 | "outputs": [], 849 | "source": [] 850 | }, 851 | { 852 | "cell_type": "code", 853 | "execution_count": 33, 854 | "metadata": {}, 855 | "outputs": [], 856 | "source": [ 857 | "# User input parameters for Early Stopping in manual implementation-\n", 858 | "minimum_delta = 0.001\n", 859 | "patience = 3" 860 | ] 861 | }, 862 | { 863 | "cell_type": "code", 864 | "execution_count": 32, 865 | "metadata": {}, 866 | "outputs": [], 867 | "source": [ 868 | "# best_val_loss = 100\n", 869 | "# loc_patience = 0" 870 | ] 871 | }, 872 | { 873 | "cell_type": "code", 874 | "execution_count": null, 875 | "metadata": {}, 876 | "outputs": [], 877 | "source": [] 878 | }, 879 | { 880 | "cell_type": "code", 881 | "execution_count": 34, 882 | "metadata": {}, 883 | "outputs": [ 884 | { 885 | "name": "stdout", 886 | "output_type": "stream", 887 | "text": [ 888 | "Epoch 1, Loss: 1.3280, Accuracy: 54.7720, Test Loss: 1.0256, Test Accuracy: 64.450005\n", 889 | "Total number of trainable parameters = 130097\n", 890 | "\n", 891 | "Epoch 2, Loss: 0.8605, Accuracy: 70.5660, Test Loss: 0.8211, Test Accuracy: 72.050003\n", 892 | "Total number of trainable parameters = 130097\n", 893 | "\n", 894 | "Epoch 3, Loss: 0.6581, Accuracy: 77.4980, Test Loss: 0.7663, Test Accuracy: 73.979996\n", 895 | "Total number of trainable parameters = 130097\n", 896 | "\n", 897 | "Epoch 4, Loss: 0.5323, Accuracy: 81.8320, Test Loss: 0.7427, Test Accuracy: 74.959999\n", 898 | "Total number of trainable parameters = 130097\n", 899 | "\n", 900 | "Epoch 5, Loss: 0.4317, Accuracy: 85.2220, Test Loss: 0.7322, Test Accuracy: 76.430000\n", 901 | "Total number of trainable parameters = 130097\n", 902 | "\n", 903 | "Epoch 6, Loss: 0.3509, Accuracy: 88.0640, Test Loss: 0.7600, Test Accuracy: 76.560005\n", 904 | "Total number of trainable parameters = 130097\n", 905 | "\n", 906 | "Epoch 7, Loss: 0.2912, Accuracy: 90.0380, Test Loss: 0.8234, Test Accuracy: 76.380005\n", 907 | "Total number of trainable parameters = 130097\n", 908 | "\n", 909 | "Epoch 8, Loss: 0.2387, Accuracy: 92.0400, Test Loss: 0.8644, Test Accuracy: 76.290001\n", 910 | "Total number of trainable parameters = 130097\n", 911 | "\n", 912 | "\n", 913 | "'EarlyStopping' called!\n", 914 | "\n" 915 | ] 916 | } 917 | ], 918 | "source": [ 919 | "# Train winning ticket using 'GradientTape' to observe it's training behavior-\n", 920 | " \n", 921 | "# Initialize parameters for Early Stopping manual implementation-\n", 922 | "best_val_loss = 100\n", 923 | "loc_patience = 0\n", 924 | " \n", 925 | "for epoch in range(num_epochs):\n", 926 | " \n", 927 | " # print(\"\\n\\nEpoch: {0}\\n\\n\".format(epoch + 1))\n", 928 | " \n", 929 | " if loc_patience >= patience:\n", 930 | " print(\"\\n'EarlyStopping' called!\\n\")\n", 931 | " break\n", 932 | " \n", 933 | " # Reset the metrics at the start of the next epoch\n", 934 | " train_loss.reset_states()\n", 935 | " train_accuracy.reset_states()\n", 936 | " test_loss.reset_states()\n", 937 | " test_accuracy.reset_states()\n", 938 | " \n", 939 | " \n", 940 | " for x, y in train_dataset:\n", 941 | " train_one_step(orig_model_stripped, mask_model_stripped, optimizer, x, y)\n", 942 | "\n", 943 | " for x_t, y_t in test_dataset:\n", 944 | " test_step(orig_model_stripped, optimizer, x_t, y_t)\n", 945 | "\n", 946 | " template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'\n", 947 | " \n", 948 | " print(template.format(epoch + 1, \n", 949 | " train_loss.result(), train_accuracy.result()*100,\n", 950 | " test_loss.result(), test_accuracy.result()*100))\n", 951 | " \n", 952 | " # Count number of non-zero parameters in each layer and in total-\n", 953 | " # print(\"layer-wise manner model, number of nonzero parameters in each layer are: \\n\")\n", 954 | "\n", 955 | " model_sum_params = 0\n", 956 | " \n", 957 | " for layer in orig_model_stripped.trainable_weights:\n", 958 | " # print(tf.math.count_nonzero(layer, axis = None).numpy())\n", 959 | " model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n", 960 | " \n", 961 | " print(\"Total number of trainable parameters = {0}\\n\".format(model_sum_params))\n", 962 | "\n", 963 | " \n", 964 | " # Code for manual Early Stopping:\n", 965 | " if (test_loss.result() < best_val_loss) and (np.abs(test_loss.result() - best_val_loss) >= minimum_delta):\n", 966 | " # update 'best_val_loss' variable to lowest loss encountered so far-\n", 967 | " best_val_loss = test_loss.result()\n", 968 | " \n", 969 | " # reset 'loc_patience' variable-\n", 970 | " loc_patience = 0\n", 971 | " \n", 972 | " else: # there is no improvement in monitored metric 'val_loss'\n", 973 | " loc_patience += 1 # number of epochs without any improvement\n", 974 | "\n", 975 | " " 976 | ] 977 | }, 978 | { 979 | "cell_type": "code", 980 | "execution_count": null, 981 | "metadata": {}, 982 | "outputs": [], 983 | "source": [] 984 | }, 985 | { 986 | "cell_type": "code", 987 | "execution_count": null, 988 | "metadata": {}, 989 | "outputs": [], 990 | "source": [] 991 | }, 992 | { 993 | "cell_type": "code", 994 | "execution_count": 35, 995 | "metadata": {}, 996 | "outputs": [], 997 | "source": [ 998 | "import pickle, numpy as np" 999 | ] 1000 | }, 1001 | { 1002 | "cell_type": "code", 1003 | "execution_count": 36, 1004 | "metadata": {}, 1005 | "outputs": [], 1006 | "source": [ 1007 | "# Load Python 3 dictionary containing training metrics-\n", 1008 | "with open(\"Conv4_history_main_Winning_Ticket_Distribution_Experiment_2_Random_Weights_Experiment_2.pkl\", \"rb\") as f:\n", 1009 | " hm = pickle.load(f)" 1010 | ] 1011 | }, 1012 | { 1013 | "cell_type": "code", 1014 | "execution_count": 42, 1015 | "metadata": {}, 1016 | "outputs": [ 1017 | { 1018 | "name": "stdout", 1019 | "output_type": "stream", 1020 | "text": [ 1021 | "\n", 1022 | "Conv-4 val_accuracy = 75.6000% using 8 epochs containing 100.00% of weights\n", 1023 | "\n" 1024 | ] 1025 | } 1026 | ], 1027 | "source": [ 1028 | "epoch_length = len(hm[1]['val_accuracy'])\n", 1029 | "\n", 1030 | "print(\"\\nConv-4 val_accuracy = {0:.4f}% using {1} epochs containing {2:.2f}% of weights\\n\".format(\n", 1031 | " hm[1]['val_accuracy'][epoch_length - 1], epoch_length, 100\n", 1032 | "))\n" 1033 | ] 1034 | }, 1035 | { 1036 | "cell_type": "code", 1037 | "execution_count": null, 1038 | "metadata": {}, 1039 | "outputs": [], 1040 | "source": [] 1041 | }, 1042 | { 1043 | "cell_type": "code", 1044 | "execution_count": null, 1045 | "metadata": {}, 1046 | "outputs": [], 1047 | "source": [] 1048 | }, 1049 | { 1050 | "cell_type": "markdown", 1051 | "metadata": {}, 1052 | "source": [ 1053 | "### Observation:\n", 1054 | "\n", 1055 | "1. The over-parameterized, original Conv-4 CNN needed 8 epochs to reach a validation accuracy of 75.6%\n", 1056 | "1. The winning ticket is pruned to __94.6372%__ and needs 8 epochs to reach a __higher validation accuracy of 76.29%__\n", 1057 | "\n", 1058 | "This result shows the success of the _The Lottery Ticket Hypothesis_ applied to Conv-4 CNN for CIFAR-10 dataset." 1059 | ] 1060 | }, 1061 | { 1062 | "cell_type": "code", 1063 | "execution_count": null, 1064 | "metadata": {}, 1065 | "outputs": [], 1066 | "source": [] 1067 | }, 1068 | { 1069 | "cell_type": "code", 1070 | "execution_count": null, 1071 | "metadata": {}, 1072 | "outputs": [], 1073 | "source": [] 1074 | } 1075 | ], 1076 | "metadata": { 1077 | "kernelspec": { 1078 | "display_name": "Python 3", 1079 | "language": "python", 1080 | "name": "python3" 1081 | }, 1082 | "language_info": { 1083 | "codemirror_mode": { 1084 | "name": "ipython", 1085 | "version": 3 1086 | }, 1087 | "file_extension": ".py", 1088 | "mimetype": "text/x-python", 1089 | "name": "python", 1090 | "nbconvert_exporter": "python", 1091 | "pygments_lexer": "ipython3", 1092 | "version": "3.7.3" 1093 | } 1094 | }, 1095 | "nbformat": 4, 1096 | "nbformat_minor": 4 1097 | } 1098 | -------------------------------------------------------------------------------- /Quantization_LTH_LeNet_300_100_MNIST.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Quantization + LTH: LeNet-300-100 for MNIST" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "/home/arjun/.local/lib/python3.8/site-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", 20 | " import pandas.util.testing as tm\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "import tensorflow as tf\n", 26 | "import numpy as np\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "import seaborn as sns\n", 29 | "import math\n", 30 | "import tensorflow_model_optimization as tfmot\n", 31 | "# from tensorflow_model_optimization.sparsity import keras as sparsity\n", 32 | "# from tensorflow.keras import datasets, layers, models\n", 33 | "\n", 34 | "from tensorflow.keras.layers import AveragePooling2D, Conv2D, MaxPooling2D, ReLU\n", 35 | "from tensorflow.keras import models, layers, datasets\n", 36 | "from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, InputLayer\n", 37 | "from tensorflow.keras.models import Sequential, Model\n", 38 | "from tensorflow.keras.initializers import RandomNormal\n", 39 | "\n", 40 | "from sklearn.metrics import accuracy_score, precision_score, recall_score\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 2, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "'2.2.0'" 66 | ] 67 | }, 68 | "execution_count": 2, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": [ 74 | "tf.__version__" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "name": "stdout", 84 | "output_type": "stream", 85 | "text": [ 86 | "env: CUDA_DEVICE_ORDER=PCI_BUS_ID\n", 87 | "env: CUDA_VISIBLE_DEVICES=2\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "%env CUDA_DEVICE_ORDER=PCI_BUS_ID\n", 93 | "%env CUDA_VISIBLE_DEVICES=2" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 3, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "batch_size = 60\n", 103 | "num_classes = 10\n", 104 | "num_epochs = 100" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 4, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "# Data preprocessing and cleadning:\n", 114 | "# input image dimensions\n", 115 | "img_rows, img_cols = 28, 28\n", 116 | "\n", 117 | "# Load MNIST dataset-\n", 118 | "(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 5, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "name": "stdout", 128 | "output_type": "stream", 129 | "text": [ 130 | "\n", 131 | "'input_shape' which will be used = (28, 28, 1)\n", 132 | "\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "if tf.keras.backend.image_data_format() == 'channels_first':\n", 138 | " X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)\n", 139 | " X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)\n", 140 | " input_shape = (1, img_rows, img_cols)\n", 141 | "else:\n", 142 | " X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)\n", 143 | " X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)\n", 144 | " input_shape = (img_rows, img_cols, 1)\n", 145 | "\n", 146 | "print(\"\\n'input_shape' which will be used = {0}\\n\".format(input_shape))" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 6, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "# Convert datasets to floating point types-\n", 156 | "X_train = X_train.astype('float32')\n", 157 | "X_test = X_test.astype('float32')\n", 158 | "\n", 159 | "# Normalize the training and testing datasets-\n", 160 | "X_train /= 255.0\n", 161 | "X_test /= 255.0" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 7, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "# convert class vectors/target to binary class matrices or one-hot encoded values-\n", 171 | "y_train = tf.keras.utils.to_categorical(y_train, num_classes)\n", 172 | "y_test = tf.keras.utils.to_categorical(y_test, num_classes)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 8, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "((60000, 10), (10000, 10))" 184 | ] 185 | }, 186 | "execution_count": 8, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "y_train.shape, y_test.shape" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 9, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "name": "stdout", 202 | "output_type": "stream", 203 | "text": [ 204 | "\n", 205 | "X_train.shape = (60000, 28, 28, 1), y_train.shape = (60000, 10)\n", 206 | "\n", 207 | "X_test.shape = (10000, 28, 28, 1), y_test.shape = (10000, 10)\n", 208 | "\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "print(\"\\nX_train.shape = {0}, y_train.shape = {1}\".format(X_train.shape, y_train.shape))\n", 214 | "print(\"\\nX_test.shape = {0}, y_test.shape = {1}\\n\".format(X_test.shape, y_test.shape))" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 10, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "# Reshape training and testing sets-\n", 231 | "X_train = X_train.reshape(X_train.shape[0], 784)\n", 232 | "X_test = X_test.reshape(X_test.shape[0], 784)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 11, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | "\n", 245 | "Dimensions of training and testing sets are:\n", 246 | "X_train.shape = (60000, 784), y_train.shape = (60000, 10)\n", 247 | "X_test.shape = (10000, 784), y_test.shape = (10000, 10)\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "print(\"\\nDimensions of training and testing sets are:\")\n", 253 | "print(\"X_train.shape = {0}, y_train.shape = {1}\".format(X_train.shape, y_train.shape))\n", 254 | "print(\"X_test.shape = {0}, y_test.shape = {1}\".format(X_test.shape, y_test.shape))" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "### Prepare MNIST dataset for _GradientTape_ training:" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 12, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [ 284 | "# Create training and testing datasets-\n", 285 | "train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))\n", 286 | "test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 13, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "train_dataset = train_dataset.shuffle(buffer_size = 20000, reshuffle_each_iteration = True).batch(batch_size = batch_size, drop_remainder = False)" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 14, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "test_dataset = test_dataset.batch(batch_size=batch_size, drop_remainder=False)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 15, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "# Choose an optimizer and loss function for training-\n", 314 | "loss_fn = tf.keras.losses.CategoricalCrossentropy()\n", 315 | "optimizer = tf.keras.optimizers.Adam(lr = 0.0012)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 16, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "# Select metrics to measure the error & accuracy of model.\n", 325 | "# These metrics accumulate the values over epochs and then\n", 326 | "# print the overall result-\n", 327 | "train_loss = tf.keras.metrics.Mean(name = 'train_loss')\n", 328 | "train_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'train_accuracy')\n", 329 | "\n", 330 | "test_loss = tf.keras.metrics.Mean(name = 'test_loss')\n", 331 | "test_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'test_accuracy')" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 17, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "def lenet_nn():\n", 355 | "\t\"\"\"\n", 356 | "\tFunction to define the architecture of a neural network model\n", 357 | "\tfollowing 300 100 Dense Fully-Connected architecture for MNIST\n", 358 | "\tdataset.\n", 359 | " \n", 360 | "\tOutput: Returns designed and compiled neural network model\n", 361 | "\t\"\"\"\n", 362 | " \n", 363 | "\tmodel = Sequential()\n", 364 | "\tmodel.add(InputLayer(input_shape=(784, )))\n", 365 | "\t# model.add(Flatten())\n", 366 | "\tmodel.add(\n", 367 | "\t\tDense(\n", 368 | "\t\t\tunits = 300, activation='relu',\n", 369 | "\t\t\tkernel_initializer=tf.initializers.GlorotUniform()\n", 370 | "\t\t\t)\n", 371 | "\t\t)\n", 372 | "\n", 373 | "\t# model.add(l.Dropout(0.2))\n", 374 | "\n", 375 | "\tmodel.add(\n", 376 | "\t\tDense(\n", 377 | "\t\t\tunits = 100, activation='relu',\n", 378 | "\t\t\tkernel_initializer=tf.initializers.GlorotUniform()\n", 379 | "\t\t\t)\n", 380 | "\t\t)\n", 381 | " \n", 382 | "\t# model.add(l.Dropout(0.1))\n", 383 | "\n", 384 | "\tmodel.add(\n", 385 | "\t\tDense(\n", 386 | "\t\t\tunits = num_classes, activation='softmax'\n", 387 | "\t\t\t)\n", 388 | "\t\t)\n", 389 | " \n", 390 | "\n", 391 | "\t# Compile pruned NN-\n", 392 | "\tmodel.compile(\n", 393 | "\t\tloss=tf.keras.losses.categorical_crossentropy,\n", 394 | "\t\t# optimizer='adam',\n", 395 | "\t\toptimizer=tf.keras.optimizers.Adam(lr = 0.0012),\n", 396 | "\t\tmetrics=['accuracy'])\n", 397 | " \n", 398 | "\treturn model\n" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 18, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "# Initialize model-\n", 415 | "model = lenet_nn()" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 19, 421 | "metadata": {}, 422 | "outputs": [], 423 | "source": [ 424 | "# Load weights of winning ticket-\n", 425 | "model.load_weights(\"/home/arjun/Desktop/Codes/Lottery_Hypothesis-Resources/Latest_Works/LTH_Experiments/Experiment_number_5/LeNet_300_100_MNIST/LeNet_300_MNIST_Magnitude_Winning_Ticket_Distribution_91.18900266306589.h5\")" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 59, 431 | "metadata": {}, 432 | "outputs": [ 433 | { 434 | "name": "stdout", 435 | "output_type": "stream", 436 | "text": [ 437 | "layer: (784, 300) has 20204 non-zero parameters\n", 438 | "layer: (300,) has 0 non-zero parameters\n", 439 | "layer: (300, 100) has 2577 non-zero parameters\n", 440 | "layer: (100,) has 0 non-zero parameters\n", 441 | "layer: (100, 10) has 314 non-zero parameters\n", 442 | "layer: (10,) has 0 non-zero parameters\n", 443 | "\n", 444 | "Total # of non-zero parameters = 23095\n", 445 | "\n" 446 | ] 447 | } 448 | ], 449 | "source": [ 450 | "# Count number of non-zero parameters-\n", 451 | "winning_params = 0\n", 452 | "\n", 453 | "for layer in model.trainable_weights:\n", 454 | " nonzeroparams = tf.math.count_nonzero(layer, axis = None).numpy()\n", 455 | " print(\"layer: {0} has {1} non-zero parameters\".format(layer.shape, nonzeroparams))\n", 456 | " winning_params += nonzeroparams\n", 457 | "\n", 458 | "print(\"\\nTotal # of non-zero parameters = {0}\\n\".format(winning_params))" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": null, 464 | "metadata": {}, 465 | "outputs": [], 466 | "source": [] 467 | }, 468 | { 469 | "cell_type": "code", 470 | "execution_count": 21, 471 | "metadata": {}, 472 | "outputs": [ 473 | { 474 | "name": "stdout", 475 | "output_type": "stream", 476 | "text": [ 477 | "\n", 478 | "Number of training weights = 266610 and non-trainabel weights = 0.0\n", 479 | "\n", 480 | "Total number of parameters = 266610.0\n", 481 | "\n" 482 | ] 483 | } 484 | ], 485 | "source": [ 486 | "import tensorflow.keras.backend as K\n", 487 | "\n", 488 | "\n", 489 | "# METHOD-1: This also counts biases\n", 490 | "\n", 491 | "trainable_wts = np.sum([K.count_params(w) for w in model.trainable_weights])\n", 492 | "non_trainable_wts = np.sum([K.count_params(w) for w in model.non_trainable_weights])\n", 493 | "\n", 494 | "print(\"\\nNumber of training weights = {0} and non-trainabel weights = {1}\\n\".format(\n", 495 | " trainable_wts, non_trainable_wts\n", 496 | "))\n", 497 | "print(\"Total number of parameters = {0}\\n\".format(trainable_wts + non_trainable_wts))\n" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 23, 503 | "metadata": {}, 504 | "outputs": [ 505 | { 506 | "name": "stdout", 507 | "output_type": "stream", 508 | "text": [ 509 | "\n", 510 | "91.3375% of parameters have been pruned\n", 511 | "\n" 512 | ] 513 | } 514 | ], 515 | "source": [ 516 | "print(\"\\n{0:.4f}% of parameters have been pruned\\n\".format((trainable_wts - params) / trainable_wts * 100))" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": null, 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": [] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": 33, 536 | "metadata": {}, 537 | "outputs": [], 538 | "source": [ 539 | "# Create mask using winning ticket-\n", 540 | "\n", 541 | "# Instantiate a new neural network model for which, the mask is to be created,\n", 542 | "mask_model = lenet_nn()\n", 543 | " \n", 544 | "# Load weights of PRUNED model-\n", 545 | "mask_model.set_weights(model.get_weights())\n", 546 | " \n", 547 | "# For each layer, for each weight which is 0, leave it, as is.\n", 548 | "# And for weights which survive the pruning,reinitialize it to ONE (1)-\n", 549 | "for wts in mask_model.trainable_weights:\n", 550 | " wts.assign(tf.where(tf.equal(wts, 0.), 0., 1.))\n" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": 34, 556 | "metadata": {}, 557 | "outputs": [ 558 | { 559 | "name": "stdout", 560 | "output_type": "stream", 561 | "text": [ 562 | "layer: (784, 300) has 20204 non-zero masks\n", 563 | "layer: (300,) has 0 non-zero masks\n", 564 | "layer: (300, 100) has 2577 non-zero masks\n", 565 | "layer: (100,) has 0 non-zero masks\n", 566 | "layer: (100, 10) has 314 non-zero masks\n", 567 | "layer: (10,) has 0 non-zero masks\n", 568 | "\n", 569 | "Total # of non-zero masks = 23095\n", 570 | "\n" 571 | ] 572 | } 573 | ], 574 | "source": [ 575 | "# Count number of non-zero masks-\n", 576 | "mask_params = 0\n", 577 | "\n", 578 | "for layer in mask_model.trainable_weights:\n", 579 | " nonzeroparams = tf.math.count_nonzero(layer, axis = None).numpy()\n", 580 | " print(\"layer: {0} has {1} non-zero masks\".format(layer.shape, nonzeroparams))\n", 581 | " mask_params += nonzeroparams\n", 582 | "\n", 583 | "print(\"\\nTotal # of non-zero masks = {0}\\n\".format(mask_params))" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": 60, 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "name": "stdout", 593 | "output_type": "stream", 594 | "text": [ 595 | "\n", 596 | "number of non-zero parameters and masks matches!\n" 597 | ] 598 | } 599 | ], 600 | "source": [ 601 | "if mask_params == winning_params:\n", 602 | " print(\"\\nnumber of non-zero parameters and masks matches!\")\n", 603 | "else:\n", 604 | " print(\"\\nERROR! number of non-zero parameters and masks DO NOT MATCH!\")" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": null, 610 | "metadata": {}, 611 | "outputs": [], 612 | "source": [] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": null, 617 | "metadata": {}, 618 | "outputs": [], 619 | "source": [] 620 | }, 621 | { 622 | "cell_type": "markdown", 623 | "metadata": {}, 624 | "source": [ 625 | "### Clone and fine-tune pre-trained model with quantization aware training:" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": 24, 631 | "metadata": {}, 632 | "outputs": [], 633 | "source": [ 634 | "quantize_model = tfmot.quantization.keras.quantize_model\n", 635 | "\n", 636 | "# q_aware stands for for quantization aware.\n", 637 | "q_aware_model = quantize_model(model)" 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": 25, 643 | "metadata": {}, 644 | "outputs": [], 645 | "source": [ 646 | "# 'quantize_model' requires recompilation-\n", 647 | "q_aware_model.compile(\n", 648 | " optimizer = tf.keras.optimizers.Adam(lr = 0.0012),\n", 649 | " loss=tf.keras.losses.categorical_crossentropy,\n", 650 | " metrics=['accuracy']\n", 651 | ")\n" 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": 26, 657 | "metadata": {}, 658 | "outputs": [ 659 | { 660 | "name": "stdout", 661 | "output_type": "stream", 662 | "text": [ 663 | "Model: \"sequential\"\n", 664 | "_________________________________________________________________\n", 665 | "Layer (type) Output Shape Param # \n", 666 | "=================================================================\n", 667 | "quant_dense (QuantizeWrapper (None, 300) 235505 \n", 668 | "_________________________________________________________________\n", 669 | "quant_dense_1 (QuantizeWrapp (None, 100) 30105 \n", 670 | "_________________________________________________________________\n", 671 | "quant_dense_2 (QuantizeWrapp (None, 10) 1015 \n", 672 | "=================================================================\n", 673 | "Total params: 266,625\n", 674 | "Trainable params: 266,610\n", 675 | "Non-trainable params: 15\n", 676 | "_________________________________________________________________\n" 677 | ] 678 | } 679 | ], 680 | "source": [ 681 | "# Get quantization aware model summary-\n", 682 | "q_aware_model.summary()" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": null, 688 | "metadata": {}, 689 | "outputs": [], 690 | "source": [] 691 | }, 692 | { 693 | "cell_type": "code", 694 | "execution_count": null, 695 | "metadata": {}, 696 | "outputs": [], 697 | "source": [] 698 | }, 699 | { 700 | "cell_type": "markdown", 701 | "metadata": {}, 702 | "source": [ 703 | "### Train winning ticket model-" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": 39, 709 | "metadata": {}, 710 | "outputs": [], 711 | "source": [ 712 | "# User input parameters for Early Stopping in manual implementation-\n", 713 | "minimum_delta = 0.001\n", 714 | "patience = 3" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": 40, 720 | "metadata": {}, 721 | "outputs": [], 722 | "source": [ 723 | "best_val_loss = 100\n", 724 | "loc_patience = 0" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": null, 730 | "metadata": {}, 731 | "outputs": [], 732 | "source": [] 733 | }, 734 | { 735 | "cell_type": "code", 736 | "execution_count": 41, 737 | "metadata": {}, 738 | "outputs": [], 739 | "source": [ 740 | "# Initialize a new LeNet-300-100 model-\n", 741 | "winning_ticket_model = lenet_nn()\n", 742 | "\n", 743 | "# Load weights of winning ticket-\n", 744 | "winning_ticket_model.set_weights(model.get_weights())" 745 | ] 746 | }, 747 | { 748 | "cell_type": "code", 749 | "execution_count": null, 750 | "metadata": {}, 751 | "outputs": [], 752 | "source": [] 753 | }, 754 | { 755 | "cell_type": "code", 756 | "execution_count": null, 757 | "metadata": {}, 758 | "outputs": [], 759 | "source": [] 760 | }, 761 | { 762 | "cell_type": "code", 763 | "execution_count": 42, 764 | "metadata": {}, 765 | "outputs": [], 766 | "source": [ 767 | "# Define 'train_one_step()' and 'test_step()' functions here-\n", 768 | "@tf.function\n", 769 | "def train_one_step(model, mask_model, optimizer, x, y):\n", 770 | " '''\n", 771 | " Function to compute one step of gradient descent optimization\n", 772 | " '''\n", 773 | " with tf.GradientTape() as tape:\n", 774 | " # Make predictions using defined model-\n", 775 | " y_pred = model(x)\n", 776 | "\n", 777 | " # Compute loss-\n", 778 | " loss = loss_fn(y, y_pred)\n", 779 | " \n", 780 | " # Compute gradients wrt defined loss and weights and biases-\n", 781 | " grads = tape.gradient(loss, model.trainable_variables)\n", 782 | " \n", 783 | " # type(grads)\n", 784 | " # list\n", 785 | " \n", 786 | " # List to hold element-wise multiplication between-\n", 787 | " # computed gradient and masks-\n", 788 | " grad_mask_mul = []\n", 789 | " \n", 790 | " # Perform element-wise multiplication between computed gradients and masks-\n", 791 | " for grad_layer, mask in zip(grads, mask_model.trainable_weights):\n", 792 | " grad_mask_mul.append(tf.math.multiply(grad_layer, mask))\n", 793 | " \n", 794 | " # Apply computed gradients to model's weights and biases-\n", 795 | " optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))\n", 796 | "\n", 797 | " # Compute accuracy-\n", 798 | " train_loss(loss)\n", 799 | " train_accuracy(y, y_pred)\n", 800 | "\n", 801 | " return None\n", 802 | " \n", 803 | " \n", 804 | "@tf.function\n", 805 | "def test_step(model, optimizer, data, labels):\n", 806 | " \"\"\"\n", 807 | " Function to test model performance\n", 808 | " on testing dataset\n", 809 | " \"\"\"\n", 810 | " \n", 811 | " predictions = model(data)\n", 812 | " t_loss = loss_fn(labels, predictions)\n", 813 | "\n", 814 | " test_loss(t_loss)\n", 815 | " test_accuracy(labels, predictions)\n", 816 | "\n", 817 | " return None\n", 818 | "\n" 819 | ] 820 | }, 821 | { 822 | "cell_type": "code", 823 | "execution_count": 43, 824 | "metadata": {}, 825 | "outputs": [ 826 | { 827 | "name": "stdout", 828 | "output_type": "stream", 829 | "text": [ 830 | "Epoch 1, Loss: 0.1101, Accuracy: 97.2017, Test Loss: 0.0658, Test Accuracy: 97.989998\n", 831 | "Total number of trainable parameters = 23095\n", 832 | "\n", 833 | "Epoch 2, Loss: 0.0340, Accuracy: 99.0883, Test Loss: 0.0567, Test Accuracy: 98.199997\n", 834 | "Total number of trainable parameters = 23095\n", 835 | "\n", 836 | "Epoch 3, Loss: 0.0211, Accuracy: 99.4500, Test Loss: 0.0553, Test Accuracy: 98.259995\n", 837 | "Total number of trainable parameters = 23095\n", 838 | "\n", 839 | "Epoch 4, Loss: 0.0136, Accuracy: 99.6750, Test Loss: 0.0586, Test Accuracy: 98.229996\n", 840 | "Total number of trainable parameters = 23095\n", 841 | "\n", 842 | "Epoch 5, Loss: 0.0097, Accuracy: 99.7700, Test Loss: 0.0639, Test Accuracy: 98.110001\n", 843 | "Total number of trainable parameters = 23095\n", 844 | "\n", 845 | "Epoch 6, Loss: 0.0064, Accuracy: 99.8750, Test Loss: 0.0619, Test Accuracy: 98.299995\n", 846 | "Total number of trainable parameters = 23095\n", 847 | "\n", 848 | "\n", 849 | "'EarlyStopping' called!\n", 850 | "\n" 851 | ] 852 | } 853 | ], 854 | "source": [ 855 | "# Train model using 'GradientTape'-\n", 856 | " \n", 857 | "# Initialize parameters for Early Stopping manual implementation-\n", 858 | "# best_val_loss = 100\n", 859 | "# loc_patience = 0\n", 860 | " \n", 861 | "for epoch in range(num_epochs):\n", 862 | " \n", 863 | " if loc_patience >= patience:\n", 864 | " print(\"\\n'EarlyStopping' called!\\n\")\n", 865 | " break\n", 866 | " \n", 867 | " # Reset the metrics at the start of the next epoch\n", 868 | " train_loss.reset_states()\n", 869 | " train_accuracy.reset_states()\n", 870 | " test_loss.reset_states()\n", 871 | " test_accuracy.reset_states()\n", 872 | " \n", 873 | " \n", 874 | " for x, y in train_dataset:\n", 875 | " train_one_step(winning_ticket_model, mask_model, optimizer, x, y)\n", 876 | "\n", 877 | "\n", 878 | " for x_t, y_t in test_dataset:\n", 879 | " test_step(winning_ticket_model, optimizer, x_t, y_t)\n", 880 | "\n", 881 | " template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'\n", 882 | " \n", 883 | " '''\n", 884 | " # 'i' is the index for number of pruning rounds-\n", 885 | " history_main[i]['accuracy'][epoch] = train_accuracy.result() * 100\n", 886 | " history_main[i]['loss'][epoch] = train_loss.result()\n", 887 | " history_main[i]['val_loss'][epoch] = test_loss.result()\n", 888 | " history_main[i]['val_accuracy'][epoch] = test_accuracy.result() * 100\n", 889 | " ''' \n", 890 | "\n", 891 | " print(template.format(\n", 892 | " epoch + 1, train_loss.result(),\n", 893 | " train_accuracy.result()*100, test_loss.result(),\n", 894 | " test_accuracy.result()*100)\n", 895 | " )\n", 896 | " \n", 897 | " # Count number of non-zero parameters in each layer and in total-\n", 898 | " # print(\"layer-wise manner model, number of nonzero parameters in each layer are: \\n\")\n", 899 | " model_sum_params = 0\n", 900 | " \n", 901 | " for layer in winning_ticket_model.trainable_weights:\n", 902 | " # print(tf.math.count_nonzero(layer, axis = None).numpy())\n", 903 | " model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n", 904 | " \n", 905 | " print(\"Total number of trainable parameters = {0}\\n\".format(model_sum_params))\n", 906 | "\n", 907 | " \n", 908 | " # Code for manual Early Stopping:\n", 909 | " if np.abs(test_loss.result() < best_val_loss) >= minimum_delta:\n", 910 | " # update 'best_val_loss' variable to lowest loss encountered so far-\n", 911 | " best_val_loss = test_loss.result()\n", 912 | " \n", 913 | " # reset 'loc_patience' variable-\n", 914 | " loc_patience = 0\n", 915 | " \n", 916 | " else: # there is no improvement in monitored metric 'val_loss'\n", 917 | " loc_patience += 1 # number of epochs without any improvement\n" 918 | ] 919 | }, 920 | { 921 | "cell_type": "code", 922 | "execution_count": null, 923 | "metadata": {}, 924 | "outputs": [], 925 | "source": [] 926 | }, 927 | { 928 | "cell_type": "code", 929 | "execution_count": null, 930 | "metadata": {}, 931 | "outputs": [], 932 | "source": [] 933 | }, 934 | { 935 | "cell_type": "markdown", 936 | "metadata": {}, 937 | "source": [ 938 | "### Train _Quantized_ winning ticket model:" 939 | ] 940 | }, 941 | { 942 | "cell_type": "code", 943 | "execution_count": 44, 944 | "metadata": {}, 945 | "outputs": [], 946 | "source": [ 947 | "# User input parameters for Early Stopping in manual implementation-\n", 948 | "minimum_delta = 0.001\n", 949 | "patience = 3" 950 | ] 951 | }, 952 | { 953 | "cell_type": "code", 954 | "execution_count": 45, 955 | "metadata": {}, 956 | "outputs": [], 957 | "source": [ 958 | "best_val_loss = 100\n", 959 | "loc_patience = 0" 960 | ] 961 | }, 962 | { 963 | "cell_type": "code", 964 | "execution_count": null, 965 | "metadata": {}, 966 | "outputs": [], 967 | "source": [] 968 | }, 969 | { 970 | "cell_type": "code", 971 | "execution_count": 41, 972 | "metadata": {}, 973 | "outputs": [], 974 | "source": [ 975 | "# Initialize a new LeNet-300-100 model-\n", 976 | "# winning_ticket_model = lenet_nn()\n", 977 | "\n", 978 | "# Load weights of winning ticket-\n", 979 | "# winning_ticket_model.set_weights(model.get_weights())" 980 | ] 981 | }, 982 | { 983 | "cell_type": "code", 984 | "execution_count": null, 985 | "metadata": {}, 986 | "outputs": [], 987 | "source": [] 988 | }, 989 | { 990 | "cell_type": "code", 991 | "execution_count": null, 992 | "metadata": {}, 993 | "outputs": [], 994 | "source": [] 995 | }, 996 | { 997 | "cell_type": "code", 998 | "execution_count": 46, 999 | "metadata": {}, 1000 | "outputs": [], 1001 | "source": [ 1002 | "# Define 'train_one_step()' and 'test_step()' functions here-\n", 1003 | "@tf.function\n", 1004 | "def train_one_step(model, mask_model, optimizer, x, y):\n", 1005 | " '''\n", 1006 | " Function to compute one step of gradient descent optimization\n", 1007 | " '''\n", 1008 | " with tf.GradientTape() as tape:\n", 1009 | " # Make predictions using defined model-\n", 1010 | " y_pred = model(x)\n", 1011 | "\n", 1012 | " # Compute loss-\n", 1013 | " loss = loss_fn(y, y_pred)\n", 1014 | " \n", 1015 | " # Compute gradients wrt defined loss and weights and biases-\n", 1016 | " grads = tape.gradient(loss, model.trainable_variables)\n", 1017 | " \n", 1018 | " # type(grads)\n", 1019 | " # list\n", 1020 | " \n", 1021 | " # List to hold element-wise multiplication between-\n", 1022 | " # computed gradient and masks-\n", 1023 | " grad_mask_mul = []\n", 1024 | " \n", 1025 | " # Perform element-wise multiplication between computed gradients and masks-\n", 1026 | " for grad_layer, mask in zip(grads, mask_model.trainable_weights):\n", 1027 | " grad_mask_mul.append(tf.math.multiply(grad_layer, mask))\n", 1028 | " \n", 1029 | " # Apply computed gradients to model's weights and biases-\n", 1030 | " optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))\n", 1031 | "\n", 1032 | " # Compute accuracy-\n", 1033 | " train_loss(loss)\n", 1034 | " train_accuracy(y, y_pred)\n", 1035 | "\n", 1036 | " return None\n", 1037 | " \n", 1038 | " \n", 1039 | "@tf.function\n", 1040 | "def test_step(model, optimizer, data, labels):\n", 1041 | " \"\"\"\n", 1042 | " Function to test model performance\n", 1043 | " on testing dataset\n", 1044 | " \"\"\"\n", 1045 | " \n", 1046 | " predictions = model(data)\n", 1047 | " t_loss = loss_fn(labels, predictions)\n", 1048 | "\n", 1049 | " test_loss(t_loss)\n", 1050 | " test_accuracy(labels, predictions)\n", 1051 | "\n", 1052 | " return None\n", 1053 | "\n" 1054 | ] 1055 | }, 1056 | { 1057 | "cell_type": "code", 1058 | "execution_count": 49, 1059 | "metadata": {}, 1060 | "outputs": [ 1061 | { 1062 | "name": "stdout", 1063 | "output_type": "stream", 1064 | "text": [ 1065 | "Epoch 1/3\n", 1066 | "1000/1000 [==============================] - 5s 5ms/step - loss: 0.0392 - accuracy: 0.9874\n", 1067 | "Epoch 2/3\n", 1068 | "1000/1000 [==============================] - 5s 5ms/step - loss: 0.0311 - accuracy: 0.9894\n", 1069 | "Epoch 3/3\n", 1070 | "1000/1000 [==============================] - 5s 5ms/step - loss: 0.0266 - accuracy: 0.9911\n" 1071 | ] 1072 | } 1073 | ], 1074 | "source": [ 1075 | "history_q_aware = q_aware_model.fit(\n", 1076 | " x = X_train, y = y_train,\n", 1077 | " batch_size = batch_size,\n", 1078 | " epochs = 3\n", 1079 | ")\n" 1080 | ] 1081 | }, 1082 | { 1083 | "cell_type": "code", 1084 | "execution_count": null, 1085 | "metadata": {}, 1086 | "outputs": [], 1087 | "source": [] 1088 | }, 1089 | { 1090 | "cell_type": "code", 1091 | "execution_count": 54, 1092 | "metadata": {}, 1093 | "outputs": [], 1094 | "source": [ 1095 | "_, baseline_model_accuracy = winning_ticket_model.evaluate(X_test, y_test, verbose=0)\n", 1096 | "_, q_aware_model_accuracy = q_aware_model.evaluate(X_test, y_test, verbose=0)\n" 1097 | ] 1098 | }, 1099 | { 1100 | "cell_type": "markdown", 1101 | "metadata": {}, 1102 | "source": [ 1103 | "### There is minimal to no loss in test accuracy after quantization aware training, compared to the baseline:" 1104 | ] 1105 | }, 1106 | { 1107 | "cell_type": "code", 1108 | "execution_count": 55, 1109 | "metadata": {}, 1110 | "outputs": [ 1111 | { 1112 | "name": "stdout", 1113 | "output_type": "stream", 1114 | "text": [ 1115 | "Baseline test accuracy: 0.9829999804496765\n", 1116 | "Quant test accuracy: 0.9796000123023987\n" 1117 | ] 1118 | } 1119 | ], 1120 | "source": [ 1121 | "print('Baseline test accuracy:', baseline_model_accuracy)\n", 1122 | "print('Quant test accuracy:', q_aware_model_accuracy)" 1123 | ] 1124 | }, 1125 | { 1126 | "cell_type": "code", 1127 | "execution_count": null, 1128 | "metadata": {}, 1129 | "outputs": [], 1130 | "source": [] 1131 | }, 1132 | { 1133 | "cell_type": "code", 1134 | "execution_count": 57, 1135 | "metadata": {}, 1136 | "outputs": [ 1137 | { 1138 | "name": "stdout", 1139 | "output_type": "stream", 1140 | "text": [ 1141 | "layer: (300,) has 294 non-zero parameter\n", 1142 | "layer: (784, 300) has 204983 non-zero parameter\n", 1143 | "layer: (100,) has 99 non-zero parameter\n", 1144 | "layer: (300, 100) has 29100 non-zero parameter\n", 1145 | "layer: (10,) has 10 non-zero parameter\n", 1146 | "layer: (100, 10) has 990 non-zero parameter\n", 1147 | "\n", 1148 | "Total number of non-zero parameters = 235476\n", 1149 | "\n" 1150 | ] 1151 | } 1152 | ], 1153 | "source": [ 1154 | "q_params = 0\n", 1155 | "\n", 1156 | "for layer in q_aware_model.trainable_weights:\n", 1157 | " params = tf.math.count_nonzero(layer, axis = None).numpy()\n", 1158 | " print(\"layer: {0} has {1} non-zero parameter\".format(layer.shape, params))\n", 1159 | " q_params += params\n", 1160 | " \n", 1161 | "print(\"\\nTotal number of non-zero parameters = {0}\\n\".format(q_params))" 1162 | ] 1163 | }, 1164 | { 1165 | "cell_type": "code", 1166 | "execution_count": 61, 1167 | "metadata": {}, 1168 | "outputs": [ 1169 | { 1170 | "data": { 1171 | "text/plain": [ 1172 | "23095" 1173 | ] 1174 | }, 1175 | "execution_count": 61, 1176 | "metadata": {}, 1177 | "output_type": "execute_result" 1178 | } 1179 | ], 1180 | "source": [ 1181 | "winning_params" 1182 | ] 1183 | }, 1184 | { 1185 | "cell_type": "code", 1186 | "execution_count": null, 1187 | "metadata": {}, 1188 | "outputs": [], 1189 | "source": [] 1190 | }, 1191 | { 1192 | "cell_type": "code", 1193 | "execution_count": 47, 1194 | "metadata": {}, 1195 | "outputs": [ 1196 | { 1197 | "ename": "InvalidArgumentError", 1198 | "evalue": " var and grad do not have the same shape[10] [100,10]\n\t [[node Adam/Adam/update_4/ResourceApplyAdam (defined at :29) ]] [Op:__inference_train_one_step_20360]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node Adam/Adam/update_4/ResourceApplyAdam:\n Mul_4 (defined at :26)\t\n sequential/quant_dense_2/BiasAdd/ReadVariableOp/resource (defined at /home/arjun/.local/lib/python3.8/site-packages/tensorflow_model_optimization/python/core/quantization/keras/quantize_wrapper.py:162)\n\nFunction call stack:\ntrain_one_step\n", 1199 | "output_type": "error", 1200 | "traceback": [ 1201 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 1202 | "\u001b[0;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)", 1203 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtrain_dataset\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mtrain_one_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mq_aware_model\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask_model\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 1204 | "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 578\u001b[0m \u001b[0mxla_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 580\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 582\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 1205 | "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0;31m# Lifting succeeded, so variables are initialized and we can run the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 643\u001b[0m \u001b[0;31m# stateless function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 644\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 645\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 646\u001b[0m \u001b[0mcanon_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcanon_kwds\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 1206 | "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2418\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2419\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2420\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2422\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 1207 | "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m 1659\u001b[0m \u001b[0;31m`\u001b[0m\u001b[0margs\u001b[0m\u001b[0;31m`\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1660\u001b[0m \"\"\"\n\u001b[0;32m-> 1661\u001b[0;31m return self._call_flat(\n\u001b[0m\u001b[1;32m 1662\u001b[0m (t for t in nest.flatten((args, kwargs), expand_composites=True)\n\u001b[1;32m 1663\u001b[0m if isinstance(t, (ops.Tensor,\n", 1208 | "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m 1743\u001b[0m and executing_eagerly):\n\u001b[1;32m 1744\u001b[0m \u001b[0;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1745\u001b[0;31m return self._build_call_outputs(self._inference_function.call(\n\u001b[0m\u001b[1;32m 1746\u001b[0m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[1;32m 1747\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n", 1209 | "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m 591\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_InterpolateFunctionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 592\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcancellation_manager\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 593\u001b[0;31m outputs = execute.execute(\n\u001b[0m\u001b[1;32m 594\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msignature\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 595\u001b[0m \u001b[0mnum_outputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_num_outputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 1210 | "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0m\u001b[1;32m 60\u001b[0m inputs, attrs, num_outputs)\n\u001b[1;32m 61\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 1211 | "\u001b[0;31mInvalidArgumentError\u001b[0m: var and grad do not have the same shape[10] [100,10]\n\t [[node Adam/Adam/update_4/ResourceApplyAdam (defined at :29) ]] [Op:__inference_train_one_step_20360]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node Adam/Adam/update_4/ResourceApplyAdam:\n Mul_4 (defined at :26)\t\n sequential/quant_dense_2/BiasAdd/ReadVariableOp/resource (defined at /home/arjun/.local/lib/python3.8/site-packages/tensorflow_model_optimization/python/core/quantization/keras/quantize_wrapper.py:162)\n\nFunction call stack:\ntrain_one_step\n" 1212 | ] 1213 | } 1214 | ], 1215 | "source": [ 1216 | "# Train model using 'GradientTape'-\n", 1217 | " \n", 1218 | "# Initialize parameters for Early Stopping manual implementation-\n", 1219 | "# best_val_loss = 100\n", 1220 | "# loc_patience = 0\n", 1221 | " \n", 1222 | "for epoch in range(num_epochs):\n", 1223 | " \n", 1224 | " if loc_patience >= patience:\n", 1225 | " print(\"\\n'EarlyStopping' called!\\n\")\n", 1226 | " break\n", 1227 | " \n", 1228 | " # Reset the metrics at the start of the next epoch\n", 1229 | " train_loss.reset_states()\n", 1230 | " train_accuracy.reset_states()\n", 1231 | " test_loss.reset_states()\n", 1232 | " test_accuracy.reset_states()\n", 1233 | " \n", 1234 | " \n", 1235 | " for x, y in train_dataset:\n", 1236 | " train_one_step(q_aware_model, mask_model, optimizer, x, y)\n", 1237 | "\n", 1238 | "\n", 1239 | " for x_t, y_t in test_dataset:\n", 1240 | " test_step(q_aware_model, optimizer, x_t, y_t)\n", 1241 | "\n", 1242 | " template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'\n", 1243 | " \n", 1244 | " '''\n", 1245 | " # 'i' is the index for number of pruning rounds-\n", 1246 | " history_main[i]['accuracy'][epoch] = train_accuracy.result() * 100\n", 1247 | " history_main[i]['loss'][epoch] = train_loss.result()\n", 1248 | " history_main[i]['val_loss'][epoch] = test_loss.result()\n", 1249 | " history_main[i]['val_accuracy'][epoch] = test_accuracy.result() * 100\n", 1250 | " ''' \n", 1251 | "\n", 1252 | " print(template.format(\n", 1253 | " epoch + 1, train_loss.result(),\n", 1254 | " train_accuracy.result()*100, test_loss.result(),\n", 1255 | " test_accuracy.result()*100)\n", 1256 | " )\n", 1257 | " \n", 1258 | " # Count number of non-zero parameters in each layer and in total-\n", 1259 | " # print(\"layer-wise manner model, number of nonzero parameters in each layer are: \\n\")\n", 1260 | " model_sum_params = 0\n", 1261 | " \n", 1262 | " for layer in winning_ticket_model.trainable_weights:\n", 1263 | " # print(tf.math.count_nonzero(layer, axis = None).numpy())\n", 1264 | " model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n", 1265 | " \n", 1266 | " print(\"Total number of trainable parameters = {0}\\n\".format(model_sum_params))\n", 1267 | "\n", 1268 | " \n", 1269 | " # Code for manual Early Stopping:\n", 1270 | " if np.abs(test_loss.result() < best_val_loss) >= minimum_delta:\n", 1271 | " # update 'best_val_loss' variable to lowest loss encountered so far-\n", 1272 | " best_val_loss = test_loss.result()\n", 1273 | " \n", 1274 | " # reset 'loc_patience' variable-\n", 1275 | " loc_patience = 0\n", 1276 | " \n", 1277 | " else: # there is no improvement in monitored metric 'val_loss'\n", 1278 | " loc_patience += 1 # number of epochs without any improvement\n" 1279 | ] 1280 | }, 1281 | { 1282 | "cell_type": "code", 1283 | "execution_count": null, 1284 | "metadata": {}, 1285 | "outputs": [], 1286 | "source": [] 1287 | } 1288 | ], 1289 | "metadata": { 1290 | "kernelspec": { 1291 | "display_name": "Python 3", 1292 | "language": "python", 1293 | "name": "python3" 1294 | }, 1295 | "language_info": { 1296 | "codemirror_mode": { 1297 | "name": "ipython", 1298 | "version": 3 1299 | }, 1300 | "file_extension": ".py", 1301 | "mimetype": "text/x-python", 1302 | "name": "python", 1303 | "nbconvert_exporter": "python", 1304 | "pygments_lexer": "ipython3", 1305 | "version": "3.8.3" 1306 | } 1307 | }, 1308 | "nbformat": 4, 1309 | "nbformat_minor": 4 1310 | } 1311 | -------------------------------------------------------------------------------- /LeNet_300_100-Iterative_Pruning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "enhanced-reaction", 6 | "metadata": {}, 7 | "source": [ 8 | "# Iterative Pruning: _LeNet-300-100_ on MNIST" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "defined-postcard", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import torch\n", 19 | "import torch.nn as nn\n", 20 | "import torchvision\n", 21 | "import torch.nn.functional as F\n", 22 | "import numpy as np\n", 23 | "import torchvision.transforms as transforms\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import os" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "id": "explicit-sunset", 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "id": "constant-geneva", 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "PyTorch version: 1.7.1\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "print(f\"PyTorch version: {torch.__version__}\")" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "id": "shared-velvet", 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "id": "absent-panel", 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "Available device: cpu\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "# GPU device configuration-\n", 78 | "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", 79 | "\n", 80 | "print(f\"Available device: {device}\")" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "id": "proud-buffer", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "id": "expensive-parking", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "# Hyper-parameters-\n", 99 | "input_size = 784 # 28 x 28, flattened to be 1-D tensor\n", 100 | "hidden_size = 100\n", 101 | "num_classes = 10\n", 102 | "num_epochs = 20\n", 103 | "batch_size = 32\n", 104 | "learning_rate = 0.0012" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "id": "substantial-faith", 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 5, 118 | "id": "patient-bruce", 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "os.chdir(\"/home/arjun/Documents/Programs/Python_Codes/PyTorch_Resources/Good_Codes/\")" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 6, 128 | "id": "rotary-jacket", 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "# MNIST dataset statistics:\n", 133 | "# mean = tensor([0.1307]) & std dev = tensor([0.3081])\n", 134 | "mean = np.array([0.1307])\n", 135 | "std_dev = np.array([0.3081])" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 7, 141 | "id": "accredited-fleet", 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "# Define data set transformations to apply-\n", 146 | "transforms_apply = transforms.Compose([\n", 147 | " transforms.ToTensor(),\n", 148 | " transforms.Normalize(mean = mean, std = std_dev)\n", 149 | " ])" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 8, 155 | "id": "meaningful-means", 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# MNIST dataset-\n", 160 | "train_dataset = torchvision.datasets.MNIST(\n", 161 | " root = './data', train = True,\n", 162 | " transform = transforms_apply, download = True\n", 163 | " )\n", 164 | "\n", 165 | "test_dataset = torchvision.datasets.MNIST(\n", 166 | " root = './data', train = False,\n", 167 | " transform = transforms_apply\n", 168 | " )" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 9, 174 | "id": "cognitive-afghanistan", 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "len(train_dataset): 60000 & len(test_dataset): 10000\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "print(f\"len(train_dataset): {len(train_dataset)} & len(test_dataset): {len(test_dataset)}\")" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 10, 192 | "id": "lesbian-conditions", 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "# Create dataloader-\n", 197 | "train_loader = torch.utils.data.DataLoader(\n", 198 | " dataset = train_dataset, batch_size = batch_size,\n", 199 | " shuffle = True\n", 200 | " )\n", 201 | "\n", 202 | "test_loader = torch.utils.data.DataLoader(\n", 203 | " dataset = test_dataset, batch_size = batch_size,\n", 204 | " shuffle = False\n", 205 | " )" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 11, 211 | "id": "billion-second", 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "name": "stdout", 216 | "output_type": "stream", 217 | "text": [ 218 | "len(train_loader) = 1875 & len(test_loader) = 313\n" 219 | ] 220 | } 221 | ], 222 | "source": [ 223 | "print(f\"len(train_loader) = {len(train_loader)} & len(test_loader) = {len(test_loader)}\")" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "id": "local-breed", 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 12, 237 | "id": "graduate-light", 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "data": { 242 | "text/plain": [ 243 | "(torch.Size([32, 1, 28, 28]), torch.Size([32]))" 244 | ] 245 | }, 246 | "execution_count": 12, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "images, labels = next(iter(train_loader))\n", 253 | "\n", 254 | "images.shape, labels.shape" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 13, 260 | "id": "abroad-amplifier", 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "name": "stdout", 265 | "output_type": "stream", 266 | "text": [ 267 | "img_samples.shape = torch.Size([32, 1, 28, 28]), labels.shape = torch.Size([32])\n" 268 | ] 269 | } 270 | ], 271 | "source": [ 272 | "# Sanity check- one batch of data\n", 273 | "examples = iter(train_loader)\n", 274 | "\n", 275 | "# Unpack-\n", 276 | "img_samples, labels = examples.next()\n", 277 | "print(f\"img_samples.shape = {img_samples.shape}, labels.shape = {labels.shape}\")\n", 278 | "# We have '1' due to grey-scale images." 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 14, 284 | "id": "premium-preparation", 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD6CAYAAAC4RRw1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAeBklEQVR4nO3de5BUxdkG8OcVUWIQBAVcEVmSIELUBEIMFuSTREmAioFERFABFViTYAQlkRUCBIQAiuaCBIJyVQtiCQplNCuCSiyNcilUYEUuJdeVS5AgBoNAf3/s2OludmZnZ86cOX3m+VVR+/b2zJzWl21m3+nTLUopEBGRf87I9wCIiCgznMCJiDzFCZyIyFOcwImIPMUJnIjIU5zAiYg8ldUELiJdRWSziGwVkdKgBkX5xbzGF3MbL5LpOnARqQXgAwBdAOwGsBpAX6XUpuCGR2FjXuOLuY2fM7N47lUAtiqltgOAiCwC0ANA0r8MIsK7hiJCKSVJuphXvx1USjVK0lej3DKvkVJlXrMpoTQFsMto7058zyIiJSKyRkTWZHEtCg/z6rcdKfqqzS3zGllV5jWbd+BVvYM77V9spdQsALMA/ovuCeY1vqrNLfPql2zege8G0MxoXwxgb3bDoQhgXuOLuY2ZbCbw1QBaikgLETkLQB8Ay4IZFuUR8xpfzG3MZFxCUUqdEJG7AJQBqAVgjlJqY2Ajo7xgXuOLuY2fjJcRZnQx1tQiI8UqlBpjXiNlrVKqfRAvxLxGSpV55Z2YRESe4gROROQpTuBERJ7KZh04kddatWql45UrV1p9F110UdLnTZkyxWqXlnJLEcoPvgMnIvIUJ3AiIk+xhEIFa/DgwTouKiqy+p588kmrvWfPHh3v3r07twOjwDz22GNW+4477rDa48aN0/H48eNDGVOQ+A6ciMhTnMCJiDzFCZyIyFOsgXuguLhYx6+++qrVN3PmTB1Pnjw5pBH56aGHHrLaw4YNS/rYGTNmWO033ngjF0OiHHNr3qdOnbLa9evX1/GZZ9rT4YkTJ3I3sIDwHTgRkac4gRMReYq7EUaQWwoZNGiQjs8++2yr79xzz83oGoWyG+Gll16q41WrVll9jRs31rH7/3zs2LFW+/PPP8/B6HKCuxEaTp48abXdEorpa1/7mtXesSPV6XSh426ERERxwgmciMhTnMCJiDzFZYQRMXHiRB3fd999Vt+xY8d0fOutt4Y2pjh45plndGzWvAGgrKxMxx7XvKmA8R04EZGnOIETEXmKJZQ8MXfCA4A777xTx8ePH7f6+vfvr+Nnn302twPzXO/eva32ZZddlvSxb7/9to5ZMiEf8R04EZGnOIETEXmKEzgRkadYA0/45je/qeMlS5ZYfcOHD9dxNjXoDh066PjBBx+0+ioqKnTsLhX8+9//nvE1C427tYC5w5x7ks7s2bNDGRPlzxlnxPs9arz/64iIYqzaCVxE5ojIfhHZYHyvoYgsF5Etia8NcjtMChrzGl/MbeFIp4QyD8CjABYY3ysFsEIpNVlEShPtEcEPL3fMkgkAvPjiizr++OOPrT5zuVlN9O3b12pPmzZNx/v27bP6xowZo+OQSibzEMO8plo2OHfuXKu9c+fOXA8nX+YhhrlNpk6dOlZ71KhROnZ3H3Tbhw8f1rEPBzi4qn0HrpRaBeCQ8+0eAOYn4vkAegY7LMo15jW+mNvCkWkNvIlSqgIAEl8bV/N48gPzGl/MbQzlfBWKiJQAKMn1dShczGs8Ma9+yXQC3yciRUqpChEpArA/2QOVUrMAzALCP+GjVq1aVnv06NE6vvvuu60+s+48YoRdGtyzZ0/a1zRPzFmwYIHV99lnn+n4l7/8pdW3fPnytK+RQ17kNZWbb745ad9TTz0V4kgiJ63cRjWvqZjLcwGgtLQ07eean0vV5Oc8KjItoSwDMCARDwCwNJjhUJ4xr/HF3MZQOssIFwJ4E0ArEdktIgMBTAbQRUS2AOiSaJNHmNf4Ym4LR7UlFKVU3yRd1wY8lkBceOGFOp4xY4bVZx7gfPvtt1t9S5dm9obE3EUQAO644w4dr1u3zuq7/vrrdbx/f9LqRCh8y2sqTZs21bF76HMhilNuKTXeiUlE5ClO4EREnuIETkTkqdjtRmjuMFevXj2rb9CgQTrevHlzRq/frl07qz15sv1ZkLnj3fe+9z2r7+DBgxldk1L78Y9/rOOGDRvmcSRE4eI7cCIiT3ECJyLyVOxKKA0a/G+XzDZt2lh9vXr10nFZWVnS13jvvfes9llnnaXjCRMmWH3169e32uYdliyZhOPAgQM6dg8nrl27dlqv4eaxZcuWVnvgwIE6btGiRdLXmTdvntVetGhRWten4JiHOLgHOnz44YdWe/z48WEMKWf4DpyIyFOcwImIPMUJnIjIU2LeXp7zi4Wwu1mPHj10/MADD1h9l19+edLniYiO3333XavPvD370ksvtfrM+isAXHXVVTresWNHGiPOD6WUVP+o9ERp1zp3R7mioiIdu6f1mJ+X/OEPf7D6vvOd71jtTz/9VMfbtm2z+oqLi3Xs1lzNA7Lvvfdeq+/QIffMhUCsVUq1D+KFopTXVF544QWr3aVLFx27+Vi9erXVdncyjLAq88p34EREnuIETkTkKU7gRESeit06cHNb2Oeff97qM+th55xzjtXXv39/Hf/2t7+1+sxaqatRo0ZWe+vWrTqeNGmS1WeuCXZv5T958mTSa1AwBg8ebLW3b9+uY/fzkeHDh1vt9evX6/iVV16x+jp27Khj9zQn8+/Va6+9ZvXNnTs3jVFTVTp37qxj83On6tx33305GE3+8B04EZGnOIETEXkqdssIM2UuJ3J/RV6zZo2Op06davW5t2r/9Kc/1fFNN91k9ZlLFX//+99bfQ8//LCO9+7dm+6wMxbXZYTuNghf//rX03rewoULrfYtt9yS0fXdpYqbNm3SsXlwNgB07949o2tUI5bLCOvUqWO1//znP+u4X79+SZ9nLuMEgAEDBlht86DxiOMyQiKiOOEETkTkKU7gRESeit0ywnS5t8QvX75cx+7t2ObJ82ZNsyrPPPOMjkeOHGn1jRo1SsfDhg2z+s477zwdm1uXUs1MnDjRaj/xxBM6rlWrltVn1j+nTJmS24EBuOKKK3J+jbi68MILrXaqurfJ3AIB8KrmnRa+Ayci8hQncCIiTxVUCeWiiy7S8eLFi62+8vJyHV9zzTVW37FjxzK6nnv6h3kn4F//+ler75133snoGmRzT8AZPXq0jlu3bm31ffTRRzo+fvx4xtc0y3Fjx45N+jj3zmDKnLvLYDLm0t044jtwIiJPcQInIvJUtRO4iDQTkVdEpFxENorI0MT3G4rIchHZkviafMcnihzmNbZqM6+Fo9pb6UWkCECRUmqdiJwLYC2AngBuA3BIKTVZREoBNFBKjUj+Svm/NdfcHfDXv/611Tdu3Dgduyf5xNRFiEleUzFvbX/ppZesvosvvljH7ulJ7gk95s6B5g6DAHDPPffo2D2xvqKiQsfmSTFA9UtSM/QugNvjllfz1CMA2LJlS1rPc3eZdHcB9Uhmt9IrpSqUUusS8ScAygE0BdADwPzEw+aj8i8JeYJ5ja3PmdfCUaNVKCJSDKAtgLcANFFKVQCVk4GINE7ynBIAJVmOk3KIeY0n5jX+0t6NUETqAngNwESl1BIROayUOs/o/1gplbKuFvavZO7dluaOc40b239/mzVrFsqYouKL3Qh9zGumWrVqZbVffvllHTdt2jSQa5hLEwHguuuu03GOSiautUqp9nHLa6YlFHe3UI9lvhuhiNQGsBjAU0qpL/Zn3Jeoj39RJ98f1EgpHMxrPDGvhSOdVSgCYDaAcqXUI0bXMgBfbK47AMBS97kUXcxrrDGvBSKdGnhHAP0AvCci6xPfGwlgMoCnRWQggJ0AbszJCClXmNd4qgvmtWBUO4ErpV4HkOx+1GuDHU6whgwZYrWvvPJKHd92220hjyZafM5rptwlZGZ9ulevXlZfSYn9OZ75GcmCBQusPrMe627R8P7772c22MwdTXHaUizzmsqgQYOs9uOPP56nkeQG78QkIvIUJ3AiIk/F7lDjTp066dg9nPi5557T8Y03FnYJMK6HGlM8DzXmMkIeakxEFCucwImIPMUJnIjIU7E7kefEiRP5HgIRBcw93SpGte2s8B04EZGnOIETEXkqdiWUf/7znzrmr1lEFGd8B05E5ClO4EREnuIETkTkKU7gRESe4gROROQpTuBERJ7iBE5E5ClO4EREnuIETkTkKU7gRESeCvtW+oMAdgC4IBFHQSGOpXnAr8e8phbmWILMLfOaWt7zGuqRavqiImuCOvYpWxxLcKI0fo4lOFEaP8diYwmFiMhTnMCJiDyVrwl8Vp6uWxWOJThRGj/HEpwojZ9jMeSlBk5ERNljCYWIyFOcwImIPBXqBC4iXUVks4hsFZHSMK+duP4cEdkvIhuM7zUUkeUisiXxtUEI42gmIq+ISLmIbBSRofkaSxCYV2sssckt82qNJZJ5DW0CF5FaAKYD6AagDYC+ItImrOsnzAPQ1fleKYAVSqmWAFYk2rl2AsBwpVRrAB0ADEn8v8jHWLLCvJ4mFrllXk8TzbwqpUL5A+BqAGVG+34A94d1feO6xQA2GO3NAIoScRGAzXkY01IAXaIwFuaVuWVe/clrmCWUpgB2Ge3die/lWxOlVAUAJL42DvPiIlIMoC2At/I9lgwxr0l4nlvmNYko5TXMCVyq+F5Br2EUkboAFgMYppQ6ku/xZIh5rUIMcsu8ViFqeQ1zAt8NoJnRvhjA3hCvn8w+ESkCgMTX/WFcVERqo/IvwlNKqSX5HEuWmFdHTHLLvDqimNcwJ/DVAFqKSAsROQtAHwDLQrx+MssADEjEA1BZ28opEREAswGUK6UeyedYAsC8GmKUW+bVENm8hlz47w7gAwDbAIzKwwcPCwFUAPgcle8wBgI4H5WfHm9JfG0Ywjg6ofLX0XcBrE/86Z6PsTCvzC3z6m9eeSs9EZGneCcmEZGnOIETEXkqqwk837faUm4wr/HF3MZMFkX9Wqj8cOMrAM4C8A6ANtU8R/FPNP4wr7H9cyCo3Ebgv4V/qslrNu/ArwKwVSm1XSl1HMAiAD2yeD2KBubVbztS9DG3/qoyr9lM4GndaisiJSKyRkTWZHEtCg/zGl/V5pZ59cuZWTw3rVttlVKzkDh6SERO66fIYV7jq9rcMq9+yeYdeFRvtaXsMK/xxdzGTDYTeFRvtaXsMK/xxdzGTMYlFKXUCRG5C0AZKj/dnqOU2hjYyCgvmNf4Ym7jJ9Rb6VlTiw6lVFX10Iwwr5GyVinVPogXYl4jpcq88k5MIiJPcQInIvIUJ3AiIk9xAici8hQncCIiT3ECJyLyVDa30lMN9e7dW8eLFi2y+iqP3Kt0+PBhq+9HP/qRjj/88EOrb8+ePcENkChmGjVqZLVnzpxptXv27Klj82cQAMrLy3XcuXNnq+/AgQPBDDBLfAdOROQpTuBERJ7iBE5E5CneSp9DN9xwg9WeN2+ejs8555yMXnPEiBFWe+rUqRm9TiHeSt+uXTurbeZn5MiRVt9tt91mtVeuXKnjXbt2IcIK/lb6yy67TMcvvvii1XfJJZdYbXP+c2vgZt/y5cutvm7dumU9zhrirfRERHHCCZyIyFNcRpilpUuXWm3z165vf/vbVl+mZROynX322VZ7ypQpOv7qV7+a9HmtW7e22i1atNDxqVOnrL45c+ZY7V69euk44iWUguMuFdy0aZOO3RLxf/7zH6v97LPP6njz5s1WX2lpqY6/9a1vWX1mKWbnzp01HHFw+A6ciMhTnMCJiDzFCZyIyFOsgVfh3HPPtdpNmzZN+tjrr7/eaud6WebYsWOt9quvvqrjNWvW5PTa+WTWvefPn2/13XjjjWEPJxD9+/fX8bp166y+DRs2hD0cb91///1W2/wZdH8ezf/ngF0Dd5mfi0yYMMHqGzx4sI5Hjx6d/mADxnfgRESe4gROROQpllCq8P3vf99qL1myJE8jOZ27FPHMM+OZwrp161rtRx55RMdRLpnUr1/fas+YMUPHzZs3t/ratm2r471791p9+/bt0/G4ceOsvpdeeinrcfqua9euOh46dKjVZy4VrEnJxDVp0iQdt2nTxupzS175wnfgRESe4gROROQpTuBERJ6KZwE1A+bSwSFDhuRxJKk9/fTTVnv79u15Gklu9ejRw2oPHDgwo9f56KOPdPz4448nfZy7HPQb3/hG2tfo16+fjvv27Wv1/fCHP0zrNczb+t32+eefn/ZY4srcYRCwlw66SwUPHjyo46BOzjFzHCV8B05E5KlqJ3ARmSMi+0Vkg/G9hiKyXES2JL42yO0wKWjMa3wxt4Wj2gMdROT/ABwFsEApdXniew8COKSUmiwipQAaKKVGpHqdxPPyukF8nTp1dDxx4kSrz/wVOtWOdi53E/hPP/1Ux+7OZ7Vq1dJxgwbJf37cnfHMHQ9LSkqsvkOHDqU9Vsc1iHBeb7nlFqu9YMGCtJ7nHlr70EMP6dg9ENpUVFRktd2lo+aBD25JY+7cuTquyY6TGzdu1PHx48etPjPPW7dutfqOHDmS6mXXArgXAeQ23z+vpieeeMJqm38/3DnM/Jl0+15//XWrfe+99+p47dq1WY8zhzI70EEptQqAO0v0APDF/czzAfTMdnQULuY1vpjbwpHph5hNlFIVAKCUqhCRxskeKCIlAEqS9VOkMK/xlVZumVe/5HwVilJqFoBZQLR+JaPsMK/xxLz6JdMJfJ+IFCX+JS8CsD/IQQXF3VXQrHsHtVTQPZHHvFXXrduaS6HM+qfLrWubp8HkmBd5TcVcQgakrnubKioqrPbVV19ttc1Djvv06WP1pVv3fuGFF6z2rbfequN///vfab1GFrzOrbuMMNWOg8keBwCdOnWy2n/72990PG3aNKvP/ZwsijJdRrgMwIBEPADA0hSPJX8wr/HF3MZQOssIFwJ4E0ArEdktIgMBTAbQRUS2AOiSaJNHmNf4Ym4LR7XLCAO9WMg1tT/+8Y9W+6677sr6NadPn26177777rSfm24JxS0DNGnSJO1rpEspJdU/Kj25yOuXvvQlq71s2TIdu7tFmsxlnIB9B51b7jK5v6L37NnTao8ZM0bH7qHKJveAW7Pc4ub86NGjSV8nC1UuN8tEvmvg99xzj46nTp1q9ZlLBd27JM3dGt0ypntnrDn//etf/7L6unXrpuMILDHMbBkhERFFEydwIiJPcQInIvJU7HYjnDz5f5/N/OxnPwvkNc2696hRowJ5TUrt2LFjVtvMQaoa+Je//GWr/dhjj+n4s88+s/rKysp0/Ktf/crqu/3229Meq7kE0D0tKM4HTedaq1atdOx+Vvf+++/r2D1lx9zCwqxjA8ADDzxgtc1dDd0tEswlhp07d056/XziO3AiIk9xAici8pSXJZR69erp2CyZAJmXTU6cOKHjhx9+2Oozf83KxpVXXqljdxdDs71hwwaQzVwCaO4gB9gHHrvMX4vNpYgAcPLkSR1nczj0TTfdpGOWTHLD/Xkxlwi7u36mMnr0aKttltXc8kqjRo10fMMNN1h9UblLk+/AiYg8xQmciMhTnMCJiDzlZQ38iiuu0PGdd95p9WW6NcCjjz6q46Bq3m7dbPbs2Tp2x2m2f/7znwdy/Tgx//88//zzVp+55NDdUc6sbbt17kzr3itWrLDa69evz+h1KLXWrVvr2P152bRpUyDXMGvZbdu2tfrM7RRKS0utvsWLF+s4n0sK+Q6ciMhTnMCJiDzFCZyIyFNe1sDdtd+Z+NOf/mS1ze1Cg+Juc1mTE8spuW3btiVt79271+ozb4l3t4jNlLudbDZryCm57373uzqu7nT5IPzmN7+x2ubWs+4WDVH5WeY7cCIiT3ECJyLylJe/+3Xs2FHHNVk2+OSTT+rYXRb03//+N6OxuCe53HzzzTp2T/9I1yWXXGK1P/jgg4xepxC5SwzNkkpQJRT3YNwOHTro2N0ZjzKX6uBi8+cuqGV87uv87ne/0/GECROsvp/85Cc6XrduXSDXzwTfgRMReYoTOBGRpziBExF5yssaeLoOHz5stVeuXKnjVDXv5s2bW+3rrrsu6WPNE1+A9Gvybl37H//4h467d+9u9b388stpvSbZ9WgA6N27d86vaS5JZQ08OOZSQfdzB3Obilxt7WqeUu9uZ3vBBRfk5Jo1xXfgRESe4gROROQpL0so5q8zqUoW7q89Z5zxv3+v3ENrzaWJ7jK+a6+9Nu1rpFtCMUsmAFBSUpLW8yg1t9w1dOjQtJ43fvx4q3306FGr/eCDDyZ9bu3atXXcpEkTq2/fvn1pXZ9OV15ermPz5xMABg0apOO//OUvVt/BgwcDub65VDHVocr5xHfgRESeqnYCF5FmIvKKiJSLyEYRGZr4fkMRWS4iWxJfG+R+uBQU5jW2ajOvhSOdd+AnAAxXSrUG0AHAEBFpA6AUwAqlVEsAKxJt8gfzGl/Ma4GotgaulKoAUJGIPxGRcgBNAfQA0DnxsPkAXgUwIiejPH1MaT2ufv36Vts9bT7VY4MYy5EjR6y2WZ/dvXt3RtcLShTzmk87d+602j/4wQ/Sfm6dOnV0fPnll1t9eaiBf66UWgf4n9cDBw7o2P2sqbi4WMevvfaa1WcuMaxJrbpr165W2/z8xD35vqysLO3XzaUafYgpIsUA2gJ4C0CTxCQApVSFiDRO8pwSAPyELsKY13hiXuMv7QlcROoCWAxgmFLqiPsvYjJKqVkAZiVeI7MDKylnmNd4Yl4LQ1oTuIjURuVfhqeUUksS394nIkWJf82LAOzP1SCDkmmZJJVDhw5Z7VWrVunY3M0MANauXRv49bMRl7wGYcqUKVa7YcOGaT938ODBOnYPPM6HuOR10qRJOjYPOAbs3QBbtWpl9a1evVrH/fv3t/rcO2XN15k5c6bVZ5ZH3ed5s4xQKv/png2gXCn1iNG1DMCARDwAwNLgh0e5wrzGGvNaINJ5B94RQD8A74nI+sT3RgKYDOBpERkIYCeAG3MyQsoV5jWe6oJ5LRjprEJ5HUCyAlryWxQp0pjX2DqqlGJeC4TU5ESbrC8W0Ici06ZN0/EvfvGLIF4ybdOnT7fab775ptVeuHBhmMPJWIof8hqL0odd7du3t9rLli3TsXube6beeOMNq92nTx8d79mzJ5BrZGGtUqp99Q+rXpTy6p58Ze5A6J60ZG6ZcerUqaR9br+7VNCse7u19DyoMq+8lZ6IyFOcwImIPOVlCaVevXo6btOmjdVnLgdzN4FPpW/fvjp278ozbdy40Wp/8sknaV8jSuJaQnGZd0Y+99xzVl+LFi3Sfh1zCah5ODZgH+gQAbEsobjMQ1fcA8rNnT3d+c1dD79p0yYdjxkzxuqL2OEcLKEQEcUJJ3AiIk9xAici8pSXNXDKXqHUwE3t2rWz2uZh0e42C2+//bbVNg9H3rVrVw5GF5iCqIEXINbAiYjihBM4EZGnvDzUmCgT69ats9o12XGQKIr4DpyIyFOcwImIPMUJnIjIU5zAiYg8xQmciMhTnMCJiDzFCZyIyFOcwImIPMUJnIjIU5zAiYg8Ffat9AcB7ABwQSKOgkIcS/PqH1IjzGtqYY4lyNwyr6nlPa+hbierLyqyJqgtL7PFsQQnSuPnWIITpfFzLDaWUIiIPMUJnIjIU/mawGfl6bpV4ViCE6XxcyzBidL4ORZDXmrgRESUPZZQiIg8xQmciMhToU7gItJVRDaLyFYRKQ3z2onrzxGR/SKywfheQxFZLiJbEl8bhDCOZiLyioiUi8hGERmar7EEgXm1xhKb3DKv1lgimdfQJnARqQVgOoBuANoA6CsibcK6fsI8AF2d75UCWKGUaglgRaKdaycADFdKtQbQAcCQxP+LfIwlK8zraWKRW+b1NNHMq1IqlD8ArgZQZrTvB3B/WNc3rlsMYIPR3gygKBEXAdichzEtBdAlCmNhXplb5tWfvIZZQmkKYJfR3p34Xr41UUpVAEDia+MwLy4ixQDaAngr32PJEPOahOe5ZV6TiFJew5zApYrvFfQaRhGpC2AxgGFKqSP5Hk+GmNcqxCC3zGsVopbXMCfw3QCaGe2LAewN8frJ7BORIgBIfN0fxkVFpDYq/yI8pZRaks+xZIl5dcQkt8yrI4p5DXMCXw2gpYi0EJGzAPQBsCzE6yezDMCARDwAlbWtnBIRATAbQLlS6pF8jiUAzKshRrllXg2RzWvIhf/uAD4AsA3AqDx88LAQQAWAz1H5DmMggPNR+enxlsTXhiGMoxMqfx19F8D6xJ/u+RgL88rcMq/+5pW30hMReYp3YhIReYoTOBGRpziBExF5ihM4EZGnOIETEXmKEzgRkac4gRMReer/AexO0x5Zn7nQAAAAAElFTkSuQmCC\n", 290 | "text/plain": [ 291 | "
" 292 | ] 293 | }, 294 | "metadata": { 295 | "needs_background": "light" 296 | }, 297 | "output_type": "display_data" 298 | } 299 | ], 300 | "source": [ 301 | "# images[0][0].shape # the second index '0' accesses the 1st channel\n", 302 | "# torch.Size([28, 28])\n", 303 | "\n", 304 | "# Visualize the digits-\n", 305 | "for i in range(6):\n", 306 | " plt.subplot(2, 3, i + 1) # 2 rows & 3 columns\n", 307 | " plt.imshow(images[i][0], cmap = 'gray') # '0' to access first channel\n", 308 | " # plt.imshow(img_samples[i][0], cmap='gray') # '0' to access first channel\n", 309 | "plt.show()" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "id": "separated-lewis", 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 15, 323 | "id": "filled-magic", 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "data": { 328 | "text/plain": [ 329 | "(tensor(-0.4242), tensor(2.8215))" 330 | ] 331 | }, 332 | "execution_count": 15, 333 | "metadata": {}, 334 | "output_type": "execute_result" 335 | } 336 | ], 337 | "source": [ 338 | "# Sanity check- Check whether transformations have been applied.\n", 339 | "# Look at first image out of 32 images-\n", 340 | "img_samples[0, :, :, :].min(), img_samples[0, :, :, :].max()" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "id": "suburban-vulnerability", 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "id": "confused-unemployment", 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 16, 362 | "id": "guided-distribution", 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "class LeNet300(nn.Module):\n", 367 | " def __init__(self):\n", 368 | " super(LeNet300, self).__init__()\n", 369 | " \n", 370 | " # Define layers-\n", 371 | " self.fc1 = nn.Linear(in_features = input_size, out_features = 300)\n", 372 | " self.fc2 = nn.Linear(in_features = 300, out_features = 100)\n", 373 | " self.output = nn.Linear(in_features = 100, out_features = 10)\n", 374 | " \n", 375 | " self.weights_initialization()\n", 376 | " \n", 377 | " \n", 378 | " def forward(self, x):\n", 379 | " out = F.relu(self.fc1(x))\n", 380 | " out = F.relu(self.fc2(out))\n", 381 | " return self.output(out)\n", 382 | " \n", 383 | " \n", 384 | " def weights_initialization(self):\n", 385 | " '''\n", 386 | " When we define all the modules such as the layers in '__init__()'\n", 387 | " method above, these are all stored in 'self.modules()'.\n", 388 | " We go through each module one by one. This is the entire network,\n", 389 | " basically.\n", 390 | " '''\n", 391 | " for m in self.modules():\n", 392 | " if isinstance(m, nn.Linear):\n", 393 | " nn.init.xavier_normal_(m.weight)\n", 394 | " nn.init.constant_(m.bias, 0)\n" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "id": "weighted-genius", 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 17, 408 | "id": "twenty-ecology", 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [ 412 | "# Initialize an instance of LeNet-300-100 dense neural network-\n", 413 | "model = LeNet300()" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "id": "solar-virtue", 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 18, 427 | "id": "dated-mixture", 428 | "metadata": {}, 429 | "outputs": [ 430 | { 431 | "name": "stdout", 432 | "output_type": "stream", 433 | "text": [ 434 | "layer name: fc1.weight, has shape: torch.Size([300, 784])\n", 435 | "bias layer name: fc1.bias has shape: torch.Size([300])\n", 436 | "layer name: fc2.weight, has shape: torch.Size([100, 300])\n", 437 | "bias layer name: fc2.bias has shape: torch.Size([100])\n", 438 | "layer name: output.weight, has shape: torch.Size([10, 100])\n", 439 | "bias layer name: output.bias has shape: torch.Size([10])\n" 440 | ] 441 | } 442 | ], 443 | "source": [ 444 | "for name, param in model.named_parameters():\n", 445 | " # We do not prune bias term\n", 446 | " if 'weight' in name:\n", 447 | " print(f\"layer name: {name}, has shape: {param.shape}\")\n", 448 | " elif 'bias' in name:\n", 449 | " print(f\"bias layer name: {name} has shape: {param.shape}\")" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": null, 455 | "id": "conceptual-mechanics", 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 19, 463 | "id": "buried-worst", 464 | "metadata": {}, 465 | "outputs": [], 466 | "source": [ 467 | "# Define loss and optimizer-\n", 468 | "loss = nn.CrossEntropyLoss() # applies softmax for us\n", 469 | "optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "id": "competitive-homeless", 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "# Print optimizer's state_dict\n", 480 | "print(\"Optimizer's 'state_dict':\")\n", 481 | "for var_name in optimizer.state_dict():\n", 482 | " print(f\"var_name: {var_name} \\t {optimizer.state_dict()[var_name]}\")" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "id": "registered-container", 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 20, 496 | "id": "explicit-there", 497 | "metadata": {}, 498 | "outputs": [ 499 | { 500 | "name": "stdout", 501 | "output_type": "stream", 502 | "text": [ 503 | "num_steps = 1875 & len(train_dataset)/batch_size = 1875.0\n", 504 | "number of training steps in one epoch = 1875\n" 505 | ] 506 | } 507 | ], 508 | "source": [ 509 | "# Training loop-\n", 510 | "num_steps = len(train_loader)\n", 511 | "\n", 512 | "print(f\"num_steps = {num_steps} & len(train_dataset)/batch_size = {len(train_dataset) / batch_size}\")\n", 513 | "print(f\"number of training steps in one epoch = {num_steps}\")" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": null, 519 | "id": "auburn-ukraine", 520 | "metadata": {}, 521 | "outputs": [], 522 | "source": [] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 25, 527 | "id": "objective-pearl", 528 | "metadata": {}, 529 | "outputs": [], 530 | "source": [ 531 | "def count_params(model):\n", 532 | " \n", 533 | " tot_params = 0\n", 534 | " for layer_name, param in model.named_parameters():\n", 535 | " # print(f\"{layer_name}.shape = {param.shape} has {torch.count_nonzero(param.data)} non-zero params\")\n", 536 | " tot_params += torch.count_nonzero(param.data)\n", 537 | " \n", 538 | " return tot_params" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": null, 544 | "id": "macro-titanium", 545 | "metadata": {}, 546 | "outputs": [], 547 | "source": [] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "id": "minute-evolution", 553 | "metadata": {}, 554 | "outputs": [], 555 | "source": [] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": null, 560 | "id": "southeast-diesel", 561 | "metadata": {}, 562 | "outputs": [], 563 | "source": [] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": null, 568 | "id": "worldwide-correction", 569 | "metadata": {}, 570 | "outputs": [], 571 | "source": [] 572 | }, 573 | { 574 | "cell_type": "markdown", 575 | "id": "leading-paintball", 576 | "metadata": {}, 577 | "source": [ 578 | "### Train defined model:" 579 | ] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "execution_count": 21, 584 | "id": "stuffed-andrews", 585 | "metadata": {}, 586 | "outputs": [], 587 | "source": [ 588 | "# User input parameters for Early Stopping in manual implementation-\n", 589 | "minimum_delta = 0.001\n", 590 | "patience = 5\n", 591 | "\n", 592 | "# Initialize parameters for Early Stopping manual implementation-\n", 593 | "best_val_loss = 100\n", 594 | "loc_patience = 0" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": 22, 600 | "id": "happy-istanbul", 601 | "metadata": {}, 602 | "outputs": [], 603 | "source": [ 604 | "# Python3 lists to store model training metrics-\n", 605 | "training_acc = []\n", 606 | "validation_acc = []\n", 607 | "training_loss = []\n", 608 | "validation_loss = []" 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": null, 614 | "id": "conservative-workshop", 615 | "metadata": {}, 616 | "outputs": [], 617 | "source": [] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": 23, 622 | "id": "foreign-cooperation", 623 | "metadata": {}, 624 | "outputs": [], 625 | "source": [ 626 | "def train_with_grad_freezing(model, epoch):\n", 627 | " '''\n", 628 | " Function to train one epoch of training dataset.\n", 629 | " '''\n", 630 | " \n", 631 | " running_loss = 0.0\n", 632 | " running_corrects = 0.0\n", 633 | " \n", 634 | " for batch, (images, labels) in enumerate(train_loader):\n", 635 | " # Reshape images first-\n", 636 | " # 32, 1, 28, 28\n", 637 | " # Input size needs to be 32, 784-\n", 638 | " images = images.reshape(-1, 28 * 28 * 1).to(device)\n", 639 | " # Tries to push to GPU if available\n", 640 | " labels = labels.to(device)\n", 641 | " # images, labels = images.reshape(-1, 28 * 28 * 1).to(device), labels.to(device)\n", 642 | " \n", 643 | " # Set defined model to training mode-\n", 644 | " model.train()\n", 645 | " \n", 646 | " # Backward pass-\n", 647 | " optimizer.zero_grad() # empty accumulated gradients\n", 648 | "\n", 649 | " # Forward pass-\n", 650 | " outputs = model(images)\n", 651 | "\n", 652 | " # Compute loss-\n", 653 | " J = loss(outputs, labels)\n", 654 | " \n", 655 | " # Perform backpropagation-\n", 656 | " J.backward()\n", 657 | " \n", 658 | " # Freezing Pruned weights by making their gradients Zero\n", 659 | " for layer_name, param in model.named_parameters():\n", 660 | " if 'weight' in layer_name:\n", 661 | " tensor = param.data.cpu().numpy()\n", 662 | " grad_tensor = param.grad.data.cpu().numpy()\n", 663 | " # grad_tensor = np.where(tensor < EPS, 0, grad_tensor)\n", 664 | " grad_tensor = np.where(tensor == 0, 0, grad_tensor)\n", 665 | " param.grad.data = torch.from_numpy(grad_tensor).to(device)\n", 666 | "\n", 667 | " # Update parameters-\n", 668 | " optimizer.step()\n", 669 | " \n", 670 | " # Compute model's performance statistics-\n", 671 | " running_loss += J.item() * images.size(0)\n", 672 | " _, predicted = torch.max(outputs, 1)\n", 673 | " running_corrects += torch.sum(predicted == labels.data)\n", 674 | "\n", 675 | " '''\n", 676 | " # Print information every 100 steps-\n", 677 | " if (batch + 1) % 100 == 0:\n", 678 | " print(f\"epoch {epoch + 1}/{num_epochs}, step {batch + 1}/{num_steps}, loss = {J.item():.4f}\")\n", 679 | " '''\n", 680 | " \n", 681 | " epoch_loss = running_loss / len(train_dataset)\n", 682 | " epoch_acc = running_corrects.double() / len(train_dataset)\n", 683 | " \n", 684 | " return epoch_loss, epoch_acc\n", 685 | " \n" 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": null, 691 | "id": "killing-surveillance", 692 | "metadata": {}, 693 | "outputs": [], 694 | "source": [] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": 24, 699 | "id": "spread-lodge", 700 | "metadata": {}, 701 | "outputs": [], 702 | "source": [ 703 | "def test(model, epoch):\n", 704 | " '''\n", 705 | " Function to validate performance of trained 'model' on testing set.\n", 706 | " '''\n", 707 | " \n", 708 | " running_loss_val = 0.0\n", 709 | "\n", 710 | " total = 0\n", 711 | " correct = 0\n", 712 | " \n", 713 | " with torch.no_grad():\n", 714 | " for images, labels in test_loader:\n", 715 | "\n", 716 | " # Place features (images) and targets (labels) to GPU-\n", 717 | " # images = images.to(device)\n", 718 | " images = images.reshape(-1, 28 * 28 * 1).to(device)\n", 719 | " labels = labels.to(device)\n", 720 | " # images, labels = images.reshape(-1, 28 * 28 * 1).to(device), targets.to(device)\n", 721 | " # print(f\"images.shape = {images.shape}, labels.shape = {labels.shape}\")\n", 722 | " \n", 723 | " # Set model to evaluation mode-\n", 724 | " model.eval()\n", 725 | " \n", 726 | " # Make predictions using trained model-\n", 727 | " outputs = model(images)\n", 728 | " _, y_pred = torch.max(outputs, 1)\n", 729 | "\n", 730 | " # Compute validation loss-\n", 731 | " J_val = loss(outputs, labels)\n", 732 | "\n", 733 | " running_loss_val += J_val.item() * labels.size(0)\n", 734 | " \n", 735 | " # Total number of labels-\n", 736 | " total += labels.size(0)\n", 737 | "\n", 738 | " # Total number of correct predictions-\n", 739 | " correct += (y_pred == labels).sum()\n", 740 | "\n", 741 | " epoch_val_loss = running_loss_val / len(test_dataset)\n", 742 | " val_acc = 100 * (correct / total)\n", 743 | " \n", 744 | " return epoch_val_loss, val_acc\n", 745 | "\n", 746 | " " 747 | ] 748 | }, 749 | { 750 | "cell_type": "code", 751 | "execution_count": null, 752 | "id": "authentic-vision", 753 | "metadata": {}, 754 | "outputs": [], 755 | "source": [] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": null, 760 | "id": "arbitrary-heritage", 761 | "metadata": {}, 762 | "outputs": [], 763 | "source": [] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": 26, 768 | "id": "collectible-swing", 769 | "metadata": {}, 770 | "outputs": [ 771 | { 772 | "name": "stdout", 773 | "output_type": "stream", 774 | "text": [ 775 | "\n", 776 | "epoch: 1, # of params = 266610, training loss = 0.2036, training accuracy = 93.77%, val_loss = 0.1190 & val_accuracy = 96.21%\n", 777 | "\n", 778 | "\n", 779 | "Saving model with lowest val_loss = 0.1190\n", 780 | "\n", 781 | "epoch: 2, # of params = 266610, training loss = 0.0968, training accuracy = 97.04%, val_loss = 0.1003 & val_accuracy = 96.91%\n", 782 | "\n", 783 | "\n", 784 | "Saving model with lowest val_loss = 0.1003\n", 785 | "\n", 786 | "epoch: 3, # of params = 266610, training loss = 0.0738, training accuracy = 97.65%, val_loss = 0.1087 & val_accuracy = 96.67%\n", 787 | "\n", 788 | "\n", 789 | "epoch: 4, # of params = 266610, training loss = 0.0608, training accuracy = 98.06%, val_loss = 0.0971 & val_accuracy = 97.31%\n", 790 | "\n", 791 | "\n", 792 | "Saving model with lowest val_loss = 0.0971\n", 793 | "\n", 794 | "epoch: 5, # of params = 266610, training loss = 0.0523, training accuracy = 98.36%, val_loss = 0.0992 & val_accuracy = 97.24%\n", 795 | "\n", 796 | "\n", 797 | "epoch: 6, # of params = 266610, training loss = 0.0419, training accuracy = 98.65%, val_loss = 0.0917 & val_accuracy = 97.65%\n", 798 | "\n", 799 | "\n", 800 | "Saving model with lowest val_loss = 0.0917\n", 801 | "\n", 802 | "epoch: 7, # of params = 266610, training loss = 0.0387, training accuracy = 98.79%, val_loss = 0.0983 & val_accuracy = 97.59%\n", 803 | "\n", 804 | "\n", 805 | "epoch: 8, # of params = 266610, training loss = 0.0344, training accuracy = 98.92%, val_loss = 0.1141 & val_accuracy = 97.47%\n", 806 | "\n", 807 | "\n", 808 | "epoch: 9, # of params = 266610, training loss = 0.0332, training accuracy = 98.97%, val_loss = 0.1070 & val_accuracy = 97.84%\n", 809 | "\n", 810 | "\n", 811 | "epoch: 10, # of params = 266610, training loss = 0.0300, training accuracy = 99.11%, val_loss = 0.1006 & val_accuracy = 97.86%\n", 812 | "\n", 813 | "\n", 814 | "epoch: 11, # of params = 266610, training loss = 0.0295, training accuracy = 99.09%, val_loss = 0.1223 & val_accuracy = 97.52%\n", 815 | "\n", 816 | "\n", 817 | "\n", 818 | "Early stopping called. Exiting model training!\n", 819 | "\n", 820 | "\n" 821 | ] 822 | } 823 | ], 824 | "source": [ 825 | "# Training loop-\n", 826 | "for curr_epoch in range(1, num_epochs):\n", 827 | " \n", 828 | " if loc_patience >= patience:\n", 829 | " print(\"\\n\\nEarly stopping called. Exiting model training!\\n\\n\")\n", 830 | " break\n", 831 | " \n", 832 | " # epoch_loss, epoch_acc = train(model = model, epoch = curr_epoch)\n", 833 | " epoch_loss, epoch_acc = train_with_grad_freezing(model = model, epoch = curr_epoch)\n", 834 | " epoch_val_loss, val_acc = test(model = model, epoch = curr_epoch)\n", 835 | " \n", 836 | " remaining_params = count_params(model)\n", 837 | " # Pruned LeNet-300-100 model has 226730 trainable parameters\n", 838 | " \n", 839 | " print(f\"\\nepoch: {curr_epoch}, # of params = {remaining_params}, training loss = {epoch_loss:.4f}, training accuracy = {epoch_acc * 100:.2f}%, val_loss = {epoch_val_loss:.4f} & val_accuracy = {val_acc:.2f}%\\n\") \n", 840 | " # print(f\"\\nepoch: {curr_epoch} training loss = {epoch_loss:.4f}, training accuracy = {epoch_acc * 100:.2f}%, val_loss = {epoch_val_loss:.4f} & val_accuracy = {val_acc:.2f}%\\n\")\n", 841 | "\n", 842 | " \n", 843 | " # Code for manual Early Stopping:\n", 844 | " # if np.abs(epoch_val_loss < best_val_loss) >= minimum_delta:\n", 845 | " if (epoch_val_loss < best_val_loss) and np.abs(epoch_val_loss - best_val_loss) >= minimum_delta:\n", 846 | " # print(f\"epoch_val_loss = {epoch_val_loss:.4f}, best_val_loss = {best_val_loss:.4f}\")\n", 847 | " \n", 848 | " # update 'best_val_loss' variable to lowest loss encountered so far-\n", 849 | " best_val_loss = epoch_val_loss\n", 850 | " \n", 851 | " # reset 'loc_patience' variable-\n", 852 | " loc_patience = 0\n", 853 | " \n", 854 | " print(f\"\\nSaving model with lowest val_loss = {epoch_val_loss:.4f}\")\n", 855 | " \n", 856 | " # Save trained model with validation accuracy-\n", 857 | " # torch.save(model.state_dict, f\"LeNet-300-100_Trained_{val_acc}.pth\")\n", 858 | " torch.save(model.state_dict(), \"LeNet-300-100_Trained.pth\")\n", 859 | " \n", 860 | " else: # there is no improvement in monitored metric 'val_loss'\n", 861 | " loc_patience += 1 # number of epochs without any improvement\n", 862 | "\n", 863 | "\n", 864 | " training_acc.append(epoch_acc * 100)\n", 865 | " validation_acc.append(val_acc)\n", 866 | " training_loss.append(epoch_loss)\n", 867 | " validation_loss.append(epoch_val_loss)\n", 868 | " " 869 | ] 870 | }, 871 | { 872 | "cell_type": "code", 873 | "execution_count": null, 874 | "id": "empirical-swift", 875 | "metadata": {}, 876 | "outputs": [], 877 | "source": [] 878 | }, 879 | { 880 | "cell_type": "code", 881 | "execution_count": 28, 882 | "id": "declared-thursday", 883 | "metadata": {}, 884 | "outputs": [], 885 | "source": [ 886 | "os.chdir(\"/home/arjun/Deep_Learning_Resources/LTH-Resources/\")" 887 | ] 888 | }, 889 | { 890 | "cell_type": "code", 891 | "execution_count": 29, 892 | "id": "published-compiler", 893 | "metadata": {}, 894 | "outputs": [], 895 | "source": [ 896 | "# Initialize a new model for best weights achieved during training-\n", 897 | "best_model = LeNet300()" 898 | ] 899 | }, 900 | { 901 | "cell_type": "code", 902 | "execution_count": 30, 903 | "id": "preceding-receipt", 904 | "metadata": {}, 905 | "outputs": [ 906 | { 907 | "data": { 908 | "text/plain": [ 909 | "" 910 | ] 911 | }, 912 | "execution_count": 30, 913 | "metadata": {}, 914 | "output_type": "execute_result" 915 | } 916 | ], 917 | "source": [ 918 | "best_model.load_state_dict(torch.load('LeNet-300-100_Trained.pth'))" 919 | ] 920 | }, 921 | { 922 | "cell_type": "code", 923 | "execution_count": null, 924 | "id": "indie-provision", 925 | "metadata": {}, 926 | "outputs": [], 927 | "source": [] 928 | }, 929 | { 930 | "cell_type": "code", 931 | "execution_count": 31, 932 | "id": "exceptional-poverty", 933 | "metadata": {}, 934 | "outputs": [], 935 | "source": [ 936 | "# Compute trained model's metrics on validation data-\n", 937 | "val_loss, val_acc = test(model = best_model, epoch = 1)" 938 | ] 939 | }, 940 | { 941 | "cell_type": "code", 942 | "execution_count": 32, 943 | "id": "precious-highlight", 944 | "metadata": {}, 945 | "outputs": [ 946 | { 947 | "name": "stdout", 948 | "output_type": "stream", 949 | "text": [ 950 | "\n", 951 | "LeNet-300-100 trained model metrics:\n", 952 | "val_loss = 0.0917 & val_accuracy = 97.65%\n" 953 | ] 954 | } 955 | ], 956 | "source": [ 957 | "print(\"\\nLeNet-300-100 trained model metrics:\")\n", 958 | "print(f\"val_loss = {val_loss:.4f} & val_accuracy = {val_acc:.2f}%\")" 959 | ] 960 | }, 961 | { 962 | "cell_type": "code", 963 | "execution_count": null, 964 | "id": "infectious-syndication", 965 | "metadata": {}, 966 | "outputs": [], 967 | "source": [] 968 | }, 969 | { 970 | "cell_type": "code", 971 | "execution_count": 33, 972 | "id": "korean-raising", 973 | "metadata": {}, 974 | "outputs": [], 975 | "source": [ 976 | "# Delete 'model' since it's performance degraded due to 'patience' which led to over-fitting-\n", 977 | "del model" 978 | ] 979 | }, 980 | { 981 | "cell_type": "code", 982 | "execution_count": null, 983 | "id": "close-league", 984 | "metadata": {}, 985 | "outputs": [], 986 | "source": [] 987 | }, 988 | { 989 | "cell_type": "code", 990 | "execution_count": null, 991 | "id": "empirical-renewal", 992 | "metadata": {}, 993 | "outputs": [], 994 | "source": [] 995 | }, 996 | { 997 | "cell_type": "code", 998 | "execution_count": 34, 999 | "id": "incoming-projection", 1000 | "metadata": {}, 1001 | "outputs": [], 1002 | "source": [ 1003 | "def prune_lenet(model, pruning_params_fc, pruning_params_op):\n", 1004 | " '''\n", 1005 | " Function to prune top p% of trained weights using the provided parameters using\n", 1006 | " magnitude-based weight pruning.\n", 1007 | " \n", 1008 | " Inputs:\n", 1009 | " 'model' is the PyTorch 1.7 defined neural network\n", 1010 | " 'pruning_params_fc' is the percentage of weights to prune for dense, fully-connected layer\n", 1011 | " 'pruning_params_op' is the percentage of weights to prune for output layer\n", 1012 | "\n", 1013 | " Returns:\n", 1014 | " Python dict containing pruned layers\n", 1015 | " '''\n", 1016 | " \n", 1017 | " # Python3 dict to hold pruned weights-\n", 1018 | " pruned_d = {}\n", 1019 | " \n", 1020 | " # Sample code- populate each layer with relevant weights-\n", 1021 | " for layer_name, param in best_model.named_parameters():\n", 1022 | " # pruned_d[layer_name] = torch.zeros_like(param.data)\n", 1023 | " x = param.data.numpy()\n", 1024 | " \n", 1025 | " if len(x.shape) == 2 and x.shape[0] != 10:\n", 1026 | " # FC layer-\n", 1027 | " # print(layer_name, param.shape)\n", 1028 | " \n", 1029 | " # Compute absolute value of 'x'-\n", 1030 | " x_abs = np.abs(x)\n", 1031 | "\n", 1032 | " # Mask values to zero which are less than 'p' in terms of magnitude-\n", 1033 | " x_abs[x_abs < np.percentile(x_abs, pruning_params_fc)] = 0\n", 1034 | "\n", 1035 | " # Where 'x_abs' equals 0, keep 0, else, replace with values of 'x'-\n", 1036 | " # OR\n", 1037 | " # If x_abs == 0 (condition) is True, use the value of 0, otherwise\n", 1038 | " # use the value in 'x'\n", 1039 | " x_mod = np.where(x_abs == 0, 0, x)\n", 1040 | " \n", 1041 | " # Counts the number of non-zero values in the array 'x_mod'-\n", 1042 | " # np.count_nonzero(x_mod)\n", 1043 | " \n", 1044 | " # pruned_weights.append(x_mod)\n", 1045 | " pruned_d[layer_name] = torch.from_numpy(x_mod)\n", 1046 | " \n", 1047 | " elif len(x.shape) == 2 and x.shape[0] == 10:\n", 1048 | " # print(\"output layer\", param.shape)\n", 1049 | " \n", 1050 | " # Output layer-\n", 1051 | " # print(layer_name, param.shape)\n", 1052 | " \n", 1053 | " # Compute absolute value of 'x'-\n", 1054 | " x_abs = np.abs(x)\n", 1055 | "\n", 1056 | " # Mask values to zero which are less than 'p' in terms of magnitude-\n", 1057 | " x_abs[x_abs < np.percentile(x_abs, pruning_params_op)] = 0\n", 1058 | "\n", 1059 | " # Where 'x_abs' equals 0, keep 0, else, replace with values of 'x'-\n", 1060 | " # OR\n", 1061 | " # If x_abs == 0 (condition) is True, use the value of 0, otherwise\n", 1062 | " # use the value in 'x'\n", 1063 | " x_mod = np.where(x_abs == 0, 0, x)\n", 1064 | " \n", 1065 | " # Counts the number of non-zero values in the array 'x_mod'-\n", 1066 | " # np.count_nonzero(x_mod)\n", 1067 | " \n", 1068 | " # pruned_weights.append(x_mod)\n", 1069 | " pruned_d[layer_name] = torch.from_numpy(x_mod)\n", 1070 | " \n", 1071 | " else:\n", 1072 | " pruned_d[layer_name] = param.data\n", 1073 | "\n", 1074 | " \n", 1075 | " return pruned_d\n", 1076 | " \n", 1077 | " " 1078 | ] 1079 | }, 1080 | { 1081 | "cell_type": "code", 1082 | "execution_count": null, 1083 | "id": "psychological-deposit", 1084 | "metadata": {}, 1085 | "outputs": [], 1086 | "source": [] 1087 | }, 1088 | { 1089 | "cell_type": "code", 1090 | "execution_count": 43, 1091 | "id": "periodic-commercial", 1092 | "metadata": {}, 1093 | "outputs": [], 1094 | "source": [ 1095 | "# Prune 15% of smallest magnitude weights in FC layers and 10% in output layer-\n", 1096 | "pruned_d = prune_lenet(model = best_model, pruning_params_fc = 15, pruning_params_op = 10)" 1097 | ] 1098 | }, 1099 | { 1100 | "cell_type": "code", 1101 | "execution_count": 36, 1102 | "id": "banner-narrative", 1103 | "metadata": {}, 1104 | "outputs": [ 1105 | { 1106 | "data": { 1107 | "text/plain": [ 1108 | "dict" 1109 | ] 1110 | }, 1111 | "execution_count": 36, 1112 | "metadata": {}, 1113 | "output_type": "execute_result" 1114 | } 1115 | ], 1116 | "source": [ 1117 | "type(pruned_d)" 1118 | ] 1119 | }, 1120 | { 1121 | "cell_type": "code", 1122 | "execution_count": null, 1123 | "id": "thorough-equation", 1124 | "metadata": {}, 1125 | "outputs": [], 1126 | "source": [] 1127 | }, 1128 | { 1129 | "cell_type": "code", 1130 | "execution_count": 44, 1131 | "id": "humanitarian-scott", 1132 | "metadata": {}, 1133 | "outputs": [ 1134 | { 1135 | "data": { 1136 | "text/plain": [ 1137 | "" 1138 | ] 1139 | }, 1140 | "execution_count": 44, 1141 | "metadata": {}, 1142 | "output_type": "execute_result" 1143 | } 1144 | ], 1145 | "source": [ 1146 | "# Initialize and load pruned Python3 dict into a new model-\n", 1147 | "pruned_model = LeNet300()\n", 1148 | "pruned_model.load_state_dict(pruned_d)" 1149 | ] 1150 | }, 1151 | { 1152 | "cell_type": "code", 1153 | "execution_count": null, 1154 | "id": "accepting-ancient", 1155 | "metadata": {}, 1156 | "outputs": [], 1157 | "source": [] 1158 | }, 1159 | { 1160 | "cell_type": "code", 1161 | "execution_count": 45, 1162 | "id": "handed-arthritis", 1163 | "metadata": {}, 1164 | "outputs": [], 1165 | "source": [ 1166 | "params_pruned = count_params(pruned_model)" 1167 | ] 1168 | }, 1169 | { 1170 | "cell_type": "code", 1171 | "execution_count": 46, 1172 | "id": "innovative-wound", 1173 | "metadata": {}, 1174 | "outputs": [ 1175 | { 1176 | "name": "stdout", 1177 | "output_type": "stream", 1178 | "text": [ 1179 | "# of non-zero parameters in pruned model = 226730\n" 1180 | ] 1181 | } 1182 | ], 1183 | "source": [ 1184 | "print(f\"# of non-zero parameters in pruned model = {params_pruned.numpy()}\")" 1185 | ] 1186 | }, 1187 | { 1188 | "cell_type": "code", 1189 | "execution_count": null, 1190 | "id": "conscious-space", 1191 | "metadata": {}, 1192 | "outputs": [], 1193 | "source": [] 1194 | }, 1195 | { 1196 | "cell_type": "code", 1197 | "execution_count": null, 1198 | "id": "bridal-player", 1199 | "metadata": {}, 1200 | "outputs": [], 1201 | "source": [] 1202 | }, 1203 | { 1204 | "cell_type": "code", 1205 | "execution_count": null, 1206 | "id": "arabic-algeria", 1207 | "metadata": {}, 1208 | "outputs": [], 1209 | "source": [] 1210 | }, 1211 | { 1212 | "cell_type": "code", 1213 | "execution_count": null, 1214 | "id": "gorgeous-strand", 1215 | "metadata": {}, 1216 | "outputs": [], 1217 | "source": [] 1218 | }, 1219 | { 1220 | "cell_type": "markdown", 1221 | "id": "frank-convenience", 1222 | "metadata": {}, 1223 | "source": [ 1224 | "### Re-train pruned model:" 1225 | ] 1226 | }, 1227 | { 1228 | "cell_type": "code", 1229 | "execution_count": 47, 1230 | "id": "fiscal-medline", 1231 | "metadata": {}, 1232 | "outputs": [], 1233 | "source": [ 1234 | "# User input parameters for Early Stopping in manual implementation-\n", 1235 | "minimum_delta = 0.001\n", 1236 | "patience = 5\n", 1237 | "\n", 1238 | "# Initialize parameters for Early Stopping manual implementation-\n", 1239 | "best_val_loss = 100\n", 1240 | "loc_patience = 0" 1241 | ] 1242 | }, 1243 | { 1244 | "cell_type": "code", 1245 | "execution_count": 48, 1246 | "id": "indian-keyboard", 1247 | "metadata": {}, 1248 | "outputs": [], 1249 | "source": [ 1250 | "# Python3 lists to store model training metrics-\n", 1251 | "training_acc = []\n", 1252 | "validation_acc = []\n", 1253 | "training_loss = []\n", 1254 | "validation_loss = []" 1255 | ] 1256 | }, 1257 | { 1258 | "cell_type": "code", 1259 | "execution_count": null, 1260 | "id": "photographic-tutorial", 1261 | "metadata": {}, 1262 | "outputs": [], 1263 | "source": [] 1264 | }, 1265 | { 1266 | "cell_type": "code", 1267 | "execution_count": 49, 1268 | "id": "lovely-playback", 1269 | "metadata": {}, 1270 | "outputs": [ 1271 | { 1272 | "name": "stdout", 1273 | "output_type": "stream", 1274 | "text": [ 1275 | "\n", 1276 | "epoch: 1, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n", 1277 | "\n", 1278 | "\n", 1279 | "Saving model with lowest val_loss = 0.0910\n", 1280 | "\n", 1281 | "epoch: 2, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n", 1282 | "\n", 1283 | "\n", 1284 | "epoch: 3, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n", 1285 | "\n", 1286 | "\n", 1287 | "epoch: 4, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n", 1288 | "\n", 1289 | "\n", 1290 | "epoch: 5, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n", 1291 | "\n", 1292 | "\n", 1293 | "epoch: 6, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n", 1294 | "\n", 1295 | "\n", 1296 | "\n", 1297 | "Early stopping called. Exiting model training!\n", 1298 | "\n", 1299 | "\n" 1300 | ] 1301 | } 1302 | ], 1303 | "source": [ 1304 | "# Training loop-\n", 1305 | "for curr_epoch in range(1, num_epochs):\n", 1306 | " \n", 1307 | " if loc_patience >= patience:\n", 1308 | " print(\"\\n\\nEarly stopping called. Exiting model training!\\n\\n\")\n", 1309 | " break\n", 1310 | " \n", 1311 | " # epoch_loss, epoch_acc = train(model = model, epoch = curr_epoch)\n", 1312 | " epoch_loss, epoch_acc = train_with_grad_freezing(model = pruned_model, epoch = curr_epoch)\n", 1313 | " epoch_val_loss, val_acc = test(model = pruned_model, epoch = curr_epoch)\n", 1314 | " \n", 1315 | " remaining_params = count_params(pruned_model)\n", 1316 | " \n", 1317 | " print(f\"\\nepoch: {curr_epoch}, # of params = {remaining_params}, training loss = {epoch_loss:.4f}, training accuracy = {epoch_acc * 100:.2f}%, val_loss = {epoch_val_loss:.4f} & val_accuracy = {val_acc:.2f}%\\n\") \n", 1318 | " # print(f\"\\nepoch: {curr_epoch} training loss = {epoch_loss:.4f}, training accuracy = {epoch_acc * 100:.2f}%, val_loss = {epoch_val_loss:.4f} & val_accuracy = {val_acc:.2f}%\\n\")\n", 1319 | "\n", 1320 | " \n", 1321 | " # Code for manual Early Stopping:\n", 1322 | " # if np.abs(epoch_val_loss < best_val_loss) >= minimum_delta:\n", 1323 | " if (epoch_val_loss < best_val_loss) and np.abs(epoch_val_loss - best_val_loss) >= minimum_delta:\n", 1324 | " # print(f\"epoch_val_loss = {epoch_val_loss:.4f}, best_val_loss = {best_val_loss:.4f}\")\n", 1325 | " \n", 1326 | " # update 'best_val_loss' variable to lowest loss encountered so far-\n", 1327 | " best_val_loss = epoch_val_loss\n", 1328 | " \n", 1329 | " # reset 'loc_patience' variable-\n", 1330 | " loc_patience = 0\n", 1331 | " \n", 1332 | " print(f\"\\nSaving model with lowest val_loss = {epoch_val_loss:.4f}\")\n", 1333 | " \n", 1334 | " # Save trained model with validation accuracy-\n", 1335 | " # torch.save(model.state_dict, f\"LeNet-300-100_Trained_{val_acc}.pth\")\n", 1336 | " torch.save(pruned_model.state_dict(), \"LeNet-300-100_Test_Trained.pth\")\n", 1337 | " \n", 1338 | " else: # there is no improvement in monitored metric 'val_loss'\n", 1339 | " loc_patience += 1 # number of epochs without any improvement\n", 1340 | "\n", 1341 | "\n", 1342 | " training_acc.append(epoch_acc * 100)\n", 1343 | " validation_acc.append(val_acc)\n", 1344 | " training_loss.append(epoch_loss)\n", 1345 | " validation_loss.append(epoch_val_loss)\n", 1346 | " " 1347 | ] 1348 | }, 1349 | { 1350 | "cell_type": "code", 1351 | "execution_count": null, 1352 | "id": "exceptional-tissue", 1353 | "metadata": {}, 1354 | "outputs": [], 1355 | "source": [] 1356 | }, 1357 | { 1358 | "cell_type": "code", 1359 | "execution_count": null, 1360 | "id": "beginning-singing", 1361 | "metadata": {}, 1362 | "outputs": [], 1363 | "source": [] 1364 | } 1365 | ], 1366 | "metadata": { 1367 | "kernelspec": { 1368 | "display_name": "Python 3", 1369 | "language": "python", 1370 | "name": "python3" 1371 | }, 1372 | "language_info": { 1373 | "codemirror_mode": { 1374 | "name": "ipython", 1375 | "version": 3 1376 | }, 1377 | "file_extension": ".py", 1378 | "mimetype": "text/x-python", 1379 | "name": "python", 1380 | "nbconvert_exporter": "python", 1381 | "pygments_lexer": "ipython3", 1382 | "version": "3.8.8" 1383 | } 1384 | }, 1385 | "nbformat": 4, 1386 | "nbformat_minor": 5 1387 | } 1388 | --------------------------------------------------------------------------------