├── LeNet_300_MNIST_Magnitude_Winning_Ticket_Distribution_91.18900266306589.h5
├── README.md
├── LeNet300_MNIST_Itraining_torch.py
├── Check_Winning_Ticket.ipynb
├── Conv_4_LTH_CIFAR_10_winning_ticket_verification.ipynb
├── Quantization_LTH_LeNet_300_100_MNIST.ipynb
└── LeNet_300_100-Iterative_Pruning.ipynb


/LeNet_300_MNIST_Magnitude_Winning_Ticket_Distribution_91.18900266306589.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arjun-majumdar/Lottery_Ticket_Hypothesis-TensorFlow_2/HEAD/LeNet_300_MNIST_Magnitude_Winning_Ticket_Distribution_91.18900266306589.h5


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks - Using TensorFlow 2
 2 | 
 3 | A GitHub repository implementing __The Lottery Ticket Hypothesis__ paper by _Jonathan Frankle & Michael Carbin_
 4 | 
 5 | "lottery ticket hypothesis:" dense, randomly-initialized, feed-forward and/or convolutional networks contain subnetworks ("winning tickets") that - when trained in isolation - reach test accuracy comparable to the original network in a similar number of iterations. The winning tickets we find have won the initialization lottery: their connections have initial weights that make training particularly effective. 
 6 | 
 7 | The paper can be downloaded from:
 8 | [The Lottery Ticket Hypothesis](https://arxiv.org/abs/1803.03635)
 9 | 
10 | 
11 | # Comparing Rewinding and Fine-tuning in Neural Network Pruning - using PyTorch 2.X
12 | 
13 | Implementation for the paper __Comparing Rewinding and Fine-tuning in Neural Network Pruning__ by Alex Renda et al.
14 | 
15 | 
16 | ## LTH Codes:
17 | 1. MNIST dataset using 300-100-10 Dense Fully connected neural network winning ticket identification.
18 | 1. MNIST dataset using LeNet-5 Convolutional Neural Networks.
19 | 1. Validation of the winning ticket identified for MNIST and CIFAR-10 dataset using relevant neural networks.
20 | 1. Conv-2/4/6 Convolutional Neural Network (CNN) for CIFAR10 dataset; pruning network till 0.5% of original connections remain and observe training and testing accuracies and losses.
21 | 1. Pruning Algorithm implementation: numpy based unstructured, layer-wise, absolute magnitude pruning and _tensorflow_model_optimization_ toolkit based pruning (not the focus of most codes)
22 | 
23 | 
24 | 
25 | 
26 | ### Prerequisites for the code to run:
27 | - Python 3.X
28 | - numpy 1.17 and/or above
29 | - TensorFlow 2.0
30 | - PyTorch 2.X
31 | - [tensorflow_model_optimization](https://www.tensorflow.org/model_optimization/guide/pruning/pruning_with_keras) (not focused on)
32 | 


--------------------------------------------------------------------------------
/LeNet300_MNIST_Itraining_torch.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | import torch.nn as nn
  6 | import numpy as np
  7 | import pickle
  8 | from tqdm import tqdm, trange
  9 | from LeNet300_swish_torch import LeNet300, init_weights
 10 | from get_mnist_data import mnist_dataset
 11 | 
 12 | 
 13 | print(f"torch version: {torch.__version__}")
 14 | 
 15 | # Check if there are multiple devices (i.e., GPU cards)-
 16 | print(f"Number of GPU(s) available = {torch.cuda.device_count()}")
 17 | 
 18 | if torch.cuda.is_available():
 19 |     print(f"Current GPU: {torch.cuda.current_device()}")
 20 |     print(f"Current GPU name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 21 | else:
 22 |     print("PyTorch does not have access to GPU")
 23 | 
 24 | # Device configuration-
 25 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 26 | print(f'Available device is {device}\n\n')
 27 | 
 28 | 
 29 | path_files = "/home/amajumdar/Downloads/.data/"
 30 | batch_size = 512
 31 | 
 32 | train_dataset, test_dataset, train_loader, test_loader = mnist_dataset(
 33 |     path_to_files = path_files, batch_size = batch_size
 34 |     )
 35 | 
 36 | 
 37 | model = LeNet300(beta = 1.0)
 38 | model.apply(init_weights)
 39 | 
 40 | # Save randomly initialized parameters-
 41 | torch.save(model.state_dict(), "LeNet300_randomwts.pth")
 42 | 
 43 | 
 44 | def count_trainable_params(model):
 45 |     # Count number of layer-wise parameters and total parameters-
 46 |     tot_params = 0
 47 |     for param in model.parameters():
 48 |         layer_param = torch.count_nonzero(param)
 49 |         tot_params += layer_param.item()
 50 | 
 51 |     return tot_params
 52 | 
 53 | tot_params = count_trainable_params(model)
 54 | 
 55 | 
 56 | class CosineScheduler:
 57 |     def __init__(
 58 |         self, max_update,
 59 |         base_lr = 0.01, final_lr = 0,
 60 |         warmup_steps = 0, warmup_begin_lr = 0
 61 |     ):
 62 |         self.base_lr_orig = base_lr
 63 |         self.max_update = max_update
 64 |         self.final_lr = final_lr
 65 |         self.warmup_steps = warmup_steps
 66 |         self.warmup_begin_lr = warmup_begin_lr
 67 |         self.max_steps = self.max_update - self.warmup_steps
 68 | 
 69 | 
 70 |     def get_warmup_lr(self, epoch):
 71 |         increase = (self.base_lr_orig - self.warmup_begin_lr) \
 72 |                        * float(epoch) / float(self.warmup_steps)
 73 |         return self.warmup_begin_lr + increase
 74 | 
 75 | 
 76 |     def __call__(self, epoch):
 77 |         if epoch < self.warmup_steps:
 78 |             return self.get_warmup_lr(epoch)
 79 |         if epoch <= self.max_update:
 80 |             self.base_lr = self.final_lr + (
 81 |                 self.base_lr_orig - self.final_lr) * (1 + np.cos(
 82 |                 np.pi * (epoch - self.warmup_steps) / self.max_steps)) / 2
 83 |         return self.base_lr
 84 | 
 85 | 
 86 | def train_one_epoch(
 87 |     model, train_loader,
 88 |     train_dataset, optimizer
 89 |     ):
 90 |     '''
 91 |     Function to perform one epoch of training by using 'train_loader'.
 92 |     Returns loss and number of correct predictions for this epoch.
 93 |     '''
 94 |     running_loss = 0.0
 95 |     running_corrects = 0.0
 96 | 
 97 |     model.train()
 98 | 
 99 |     with tqdm(train_loader, unit = 'batch') as tepoch:
100 |         for images, labels in tepoch:
101 |             tepoch.set_description(f"Training: ")
102 |             images = images.reshape(-1, 28 * 28)
103 |             images = images.to(device)
104 |             labels = labels.to(device)
105 | 
106 |             # Get model predictions-
107 |             preds = model(images)
108 | 
109 |             # Compute loss-
110 |             # output layer applies log-softmax (row-wise), hence, use
111 |             # NLL-loss instead of Cross-entropy cost function-
112 |             # loss = torch.nn.functional.nll_loss(preds, labels)
113 |             cost_fn = nn.CrossEntropyLoss()
114 |             loss = cost_fn(preds, labels)
115 | 
116 |             # Empty accumulated gradients-
117 |             optimizer.zero_grad()
118 | 
119 |             # Perform backprop-
120 |             loss.backward()
121 | 
122 |             # Update parameters-
123 |             optimizer.step()
124 | 
125 |             '''
126 |             # LR scheduler-
127 |             global step
128 |             optimizer.param_groups[0]['lr'] = custom_lr_scheduler.get_lr(step)
129 |             step += 1
130 |             '''
131 | 
132 |             # Compute model's performance statistics-
133 |             running_loss += loss.item() * images.size(0)
134 |             _, predicted = torch.max(preds, 1)
135 |             running_corrects += torch.sum(predicted == labels.data)
136 | 
137 |             tepoch.set_postfix(
138 |                 loss = running_loss / len(train_dataset),
139 |                 accuracy = (running_corrects.double().cpu().numpy() / len(train_dataset)) * 100
140 |             )
141 | 
142 |     train_loss = running_loss / len(train_dataset)
143 |     train_acc = (running_corrects.double() / len(train_dataset)) * 100
144 | 
145 |     return train_loss, train_acc.cpu().numpy()
146 | 
147 | 
148 | def test_one_epoch(model, test_loader, test_dataset):
149 |     total = 0.0
150 |     correct = 0.0
151 |     running_loss_test = 0.0
152 | 
153 |     with torch.no_grad():
154 |         with tqdm(test_loader, unit = 'batch') as tepoch:
155 |             for images, labels in tepoch:
156 |                 tepoch.set_description(f"Testing: ")
157 |                 images = images.reshape(-1, 28 * 28)
158 |                 images = images.to(device)
159 |                 labels = labels.to(device)
160 | 
161 |                 # Set model to evaluation mode-
162 |                 model.eval()
163 | 
164 |                 # Predict using trained model-
165 |                 outputs = model(images)
166 |                 _, y_pred = torch.max(outputs, 1)
167 | 
168 |                 # Compute validation loss-
169 |                 # J_test = torch.nn.functional.nll_loss(outputs, labels)
170 |                 cost_fn = nn.CrossEntropyLoss()
171 | 
172 |                 J_test = loss = cost_fn(outputs, labels)
173 | 
174 |                 running_loss_test += J_test.item() * labels.size(0)
175 | 
176 |                 # Total number of labels-
177 |                 total += labels.size(0)
178 | 
179 |                 # Total number of correct predictions-
180 |                 correct += (y_pred == labels).sum()
181 | 
182 |                 tepoch.set_postfix(
183 |                     test_loss = running_loss_test / len(test_dataset),
184 |                     test_acc = 100 * (correct.cpu().numpy() / total)
185 |                 )
186 | 
187 | 
188 |     # return (running_loss_val, correct, total)
189 |     test_loss = running_loss_test / len(test_dataset)
190 |     test_acc = (correct / total) * 100
191 | 
192 |     return test_loss, test_acc.cpu().numpy()
193 | 
194 | 
195 | def train_until_convergence(
196 |     model,
197 |     train_dataset, test_dataset,
198 |     train_loader, test_loader,
199 |     num_epochs = 50, warmup_epochs = 10,
200 |     best_test_acc = 90
201 |     ):
202 | 
203 |     # Python3 dict to contain training metrics-
204 |     train_history = {}
205 | 
206 |     # Initialize parameters saving 'best' models-
207 |     # best_test_acc = 90
208 |     # num_epochs = 50
209 | 
210 |     # Use SGD optimizer-
211 |     optimizer = torch.optim.SGD(
212 |         params = model.parameters(), lr = 0.0001,
213 |         momentum = 0.9, weight_decay = 5e-4
214 |     )
215 | 
216 |     # Decay lr in cosine manner unitl 45th epoch-
217 |     scheduler = CosineScheduler(
218 |         max_update = 45, base_lr = 0.03,
219 |         final_lr = 0.001, warmup_steps = warmup_epochs,
220 |         warmup_begin_lr = 0.0001
221 |     )
222 | 
223 | 
224 |     for epoch in range(1, num_epochs + 1):
225 | 
226 |         # Update LR scheduler-
227 |         for param_group in optimizer.param_groups:
228 |             param_group['lr'] = scheduler(epoch)
229 | 
230 |         # Train and validate model for 1 epoch-
231 |         train_loss, train_acc = train_one_epoch(
232 |             model = model, train_loader = train_loader,
233 |             train_dataset = train_dataset,
234 |             optimizer = optimizer
235 |         )
236 | 
237 |         test_loss, test_acc = test_one_epoch(
238 |             model = model, test_loader = test_loader,
239 |             test_dataset = test_dataset
240 |         )
241 | 
242 |         curr_lr = optimizer.param_groups[0]['lr']
243 | 
244 |         print(f"\nepoch: {epoch + 1} train loss = {train_loss:.4f}, "
245 |             f"train accuracy = {train_acc:.2f}%, test loss = {test_loss:.4f}"
246 |             f", test accuracy = {test_acc:.2f}% "
247 |             f"LR = {curr_lr:.4f}\n")
248 | 
249 |         train_history[epoch + 1] = {
250 |             'loss': train_loss, 'acc': train_acc,
251 |             'test_loss': test_loss, 'test_acc': test_acc,
252 |             'lr': curr_lr,
253 |         }
254 | 
255 |         # Save best weights achieved until now-
256 |         if (test_acc > best_test_acc):
257 |             # update 'best_val_loss' variable to lowest loss encountered so far-
258 |             best_test_acc = test_acc
259 | 
260 |             print(f"Saving model with highest test acc = {test_acc:.3f}%\n")
261 | 
262 |             # Save trained model with 'best' testing accuracy-
263 |             torch.save(model.state_dict(), "LeNet300_best_testacc_model.pth")
264 |             torch.save(optimizer.state_dict(), "LeNet300_best_optimizer.pth")
265 | 
266 |     return train_history
267 | 
268 | 
269 | train_history = train_until_convergence(
270 |     model = model,
271 |     train_dataset = train_dataset, test_dataset = test_dataset,
272 |     train_loader = train_loader, test_loader = test_loader,
273 |     num_epochs = 50, warmup_epochs = 10,
274 |     best_test_acc = 90
275 | )
276 | 
277 | with open("LeNet300_train_history.pkl", "wb") as file:
278 |     pickle.dump(train_history, file)
279 | del file
280 | 
281 | 
282 | 


--------------------------------------------------------------------------------
/Check_Winning_Ticket.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import tensorflow as tf\n",
 10 |     "import numpy as np\n",
 11 |     "import math\n",
 12 |     "import tensorflow_model_optimization as tfmot\n",
 13 |     "from tensorflow_model_optimization.sparsity import keras as sparsity\n",
 14 |     "# from tensorflow.keras import datasets, layers, models\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "from tensorflow.keras.layers import AveragePooling2D, Conv2D\n",
 17 |     "from tensorflow.keras import models, layers, datasets\n",
 18 |     "from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, InputLayer\n",
 19 |     "from tensorflow.keras.models import Sequential, Model\n",
 20 |     "from tensorflow.keras.initializers import RandomNormal\n",
 21 |     "# import math\n",
 22 |     "from sklearn.metrics import accuracy_score, precision_score, recall_score"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "data": {
 32 |       "text/plain": [
 33 |        "'2.0.0'"
 34 |       ]
 35 |      },
 36 |      "execution_count": 2,
 37 |      "metadata": {},
 38 |      "output_type": "execute_result"
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "tf.__version__"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "batch_size = 32\n",
 52 |     "num_classes = 10\n",
 53 |     "num_epochs = 50"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 4,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "# Data preprocessing and cleadning:\n",
 63 |     "# input image dimensions\n",
 64 |     "img_rows, img_cols = 28, 28\n",
 65 |     "\n",
 66 |     "# Load MNIST dataset-\n",
 67 |     "(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 5,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "\n",
 80 |       "'input_shape' which will be used = (28, 28, 1)\n",
 81 |       "\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "if tf.keras.backend.image_data_format() == 'channels_first':\n",
 87 |     "    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)\n",
 88 |     "    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)\n",
 89 |     "    input_shape = (1, img_rows, img_cols)\n",
 90 |     "else:\n",
 91 |     "    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)\n",
 92 |     "    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)\n",
 93 |     "    input_shape = (img_rows, img_cols, 1)\n",
 94 |     "\n",
 95 |     "print(\"\\n'input_shape' which will be used = {0}\\n\".format(input_shape))"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 6,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "# Convert datasets to floating point types-\n",
105 |     "X_train = X_train.astype('float32')\n",
106 |     "X_test = X_test.astype('float32')\n",
107 |     "\n",
108 |     "# Normalize the training and testing datasets-\n",
109 |     "X_train /= 255.0\n",
110 |     "X_test /= 255.0"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 7,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "# convert class vectors/target to binary class matrices or one-hot encoded values-\n",
120 |     "y_train = tf.keras.utils.to_categorical(y_train, num_classes)\n",
121 |     "y_test = tf.keras.utils.to_categorical(y_test, num_classes)"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 8,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "# Reshape training and testing sets-\n",
131 |     "X_train = X_train.reshape(X_train.shape[0], 784)\n",
132 |     "X_test = X_test.reshape(X_test.shape[0], 784)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 9,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "name": "stdout",
142 |      "output_type": "stream",
143 |      "text": [
144 |       "\n",
145 |       "Dimensions of training and testing sets are:\n",
146 |       "X_train.shape = (60000, 784), y_train = (60000, 10)\n",
147 |       "X_test.shape = (10000, 784), y_test = (10000, 10)\n"
148 |      ]
149 |     }
150 |    ],
151 |    "source": [
152 |     "print(\"\\nDimensions of training and testing sets are:\")\n",
153 |     "print(\"X_train.shape = {0}, y_train = {1}\".format(X_train.shape, y_train.shape))\n",
154 |     "print(\"X_test.shape = {0}, y_test = {1}\".format(X_test.shape, y_test.shape))"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 10,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "l = tf.keras.layers"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 11,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "def nn_model():\n",
173 |     "    \"\"\"\n",
174 |     "    Function to create LeNet 300-100-10\n",
175 |     "    model for MNIST classification\n",
176 |     "    \"\"\"\n",
177 |     "\n",
178 |     "    model = Sequential()\n",
179 |     "\n",
180 |     "    model.add(l.InputLayer(input_shape=(784, )))\n",
181 |     "\n",
182 |     "    model.add(Flatten())\n",
183 |     "\n",
184 |     "    model.add(Dense(units = 300, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()))\n",
185 |     "\n",
186 |     "    # model.add(l.Dropout(0.2))\n",
187 |     "\n",
188 |     "    model.add(Dense(units = 100, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()))\n",
189 |     "\n",
190 |     "    # model.add(l.Dropout(0.1))\n",
191 |     "\n",
192 |     "    model.add(Dense(units = num_classes, activation='softmax'))\n",
193 |     "    \n",
194 |     "    return model\n"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 12,
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": [
203 |     "# Specify the parameters to be used for layer-wise pruning, NO PRUNING is done here:\n",
204 |     "pruning_params_unpruned = {\n",
205 |     "    'pruning_schedule': sparsity.ConstantSparsity(\n",
206 |     "        target_sparsity=0.0, begin_step=0,\n",
207 |     "        end_step = 0, frequency=100\n",
208 |     "    )\n",
209 |     "}"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 13,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "def pruned_nn(pruning_params):\n",
219 |     "    \"\"\"\n",
220 |     "    Function to define the architecture of a neural network model\n",
221 |     "    following 300 100 architecture for MNIST dataset and using\n",
222 |     "    provided parameter which are used to prune the model.\n",
223 |     "    \n",
224 |     "    Input: 'pruning_params' Python 3 dictionary containing parameters which are used for pruning\n",
225 |     "    Output: Returns designed and compiled neural network model\n",
226 |     "    \"\"\"\n",
227 |     "    \n",
228 |     "    pruned_model = Sequential()\n",
229 |     "    pruned_model.add(l.InputLayer(input_shape=(784, )))\n",
230 |     "    pruned_model.add(Flatten())\n",
231 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
232 |     "        Dense(units = 300, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()),\n",
233 |     "        **pruning_params))\n",
234 |     "    # pruned_model.add(l.Dropout(0.2))\n",
235 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
236 |     "        Dense(units = 100, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()),\n",
237 |     "        **pruning_params))\n",
238 |     "    # pruned_model.add(l.Dropout(0.1))\n",
239 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
240 |     "        Dense(units = num_classes, activation='softmax'),\n",
241 |     "        **pruning_params))\n",
242 |     "    \n",
243 |     "    # Compile pruned CNN-\n",
244 |     "    pruned_model.compile(\n",
245 |     "        loss=tf.keras.losses.categorical_crossentropy,\n",
246 |     "        # optimizer='adam',\n",
247 |     "        optimizer=tf.keras.optimizers.Adam(lr = 0.001),\n",
248 |     "        metrics=['accuracy'])\n",
249 |     "    \n",
250 |     "    return pruned_model\n"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": []
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 14,
263 |    "metadata": {},
264 |    "outputs": [
265 |     {
266 |      "name": "stdout",
267 |      "output_type": "stream",
268 |      "text": [
269 |       "WARNING:tensorflow:From /opt/conda/lib/python3.7/site-packages/tensorflow_model_optimization/python/core/sparsity/keras/pruning_wrapper.py:183: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
270 |       "Instructions for updating:\n",
271 |       "Please use `layer.add_weight` method instead.\n"
272 |      ]
273 |     }
274 |    ],
275 |    "source": [
276 |     "# Initialize model-\n",
277 |     "model = pruned_nn(pruning_params_unpruned)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 15,
283 |    "metadata": {},
284 |    "outputs": [],
285 |    "source": [
286 |     "# Load winning ticket weights-\n",
287 |     "model.load_weights(\"Winning_Ticket_Weights_Experimental.h5\")"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 16,
293 |    "metadata": {},
294 |    "outputs": [],
295 |    "source": [
296 |     "# Strip the pruning wrappers from pruned model-\n",
297 |     "model_stripped = sparsity.strip_pruning(model)"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": 17,
303 |    "metadata": {},
304 |    "outputs": [
305 |     {
306 |      "name": "stdout",
307 |      "output_type": "stream",
308 |      "text": [
309 |       "\n",
310 |       "In Winning Ticket, number of nonzero parameters in each layer are: \n",
311 |       "\n",
312 |       "49627\n",
313 |       "0\n",
314 |       "6330\n",
315 |       "0\n",
316 |       "211\n",
317 |       "0\n",
318 |       "\n",
319 |       "Total number of trainable parameters = 56168\n",
320 |       "\n"
321 |      ]
322 |     }
323 |    ],
324 |    "source": [
325 |     "print(\"\\nIn Winning Ticket, number of nonzero parameters in each layer are: \\n\")\n",
326 |     "\n",
327 |     "model_sum_params = 0\n",
328 |     "\n",
329 |     "for layer in model_stripped.trainable_weights:\n",
330 |     "    print(tf.math.count_nonzero(layer, axis = None).numpy())\n",
331 |     "    model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n",
332 |     "\n",
333 |     "print(\"\\nTotal number of trainable parameters = {0}\\n\".format(model_sum_params))"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": null,
339 |    "metadata": {},
340 |    "outputs": [],
341 |    "source": []
342 |   },
343 |   {
344 |    "cell_type": "code",
345 |    "execution_count": null,
346 |    "metadata": {},
347 |    "outputs": [],
348 |    "source": []
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 20,
353 |    "metadata": {},
354 |    "outputs": [],
355 |    "source": [
356 |     "# Instantiate a new neural network model for which, the mask is to be created,\n",
357 |     "# according to the paper-\n",
358 |     "mask_model = pruned_nn(pruning_params_unpruned)"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": 21,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "# Load weights of GradientTape trained and PRUNED model-\n",
368 |     "# mask_model.load_weights(\"Pruned_Weights.h5\")\n",
369 |     "mask_model.load_weights(\"Winning_Ticket_Weights_Experimental.h5\")"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": 22,
375 |    "metadata": {},
376 |    "outputs": [],
377 |    "source": [
378 |     "# Strip the model of its pruning parameters-\n",
379 |     "mask_model_stripped = sparsity.strip_pruning(mask_model)"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": 23,
385 |    "metadata": {},
386 |    "outputs": [],
387 |    "source": [
388 |     "# For each layer, for each weight which is 0, leave it, as is.\n",
389 |     "# And for weights which survive the pruning,reinitialize it to ONE (1)-\n",
390 |     "\n",
391 |     "for wts in mask_model_stripped.trainable_weights:\n",
392 |     "    wts.assign(tf.where(tf.equal(wts, 0.), 0., 1.))"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": null,
398 |    "metadata": {},
399 |    "outputs": [],
400 |    "source": []
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": null,
405 |    "metadata": {},
406 |    "outputs": [],
407 |    "source": []
408 |   },
409 |   {
410 |    "cell_type": "markdown",
411 |    "metadata": {},
412 |    "source": [
413 |     "### Prepare dataset for _GradientTape_:"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": 24,
419 |    "metadata": {},
420 |    "outputs": [],
421 |    "source": [
422 |     "# Create training and testing datasets-\n",
423 |     "train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))\n",
424 |     "test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "code",
429 |    "execution_count": 25,
430 |    "metadata": {},
431 |    "outputs": [],
432 |    "source": [
433 |     "train_dataset = train_dataset.shuffle(buffer_size = 20000, reshuffle_each_iteration = True).batch(batch_size = batch_size, drop_remainder = False)"
434 |    ]
435 |   },
436 |   {
437 |    "cell_type": "code",
438 |    "execution_count": 26,
439 |    "metadata": {},
440 |    "outputs": [],
441 |    "source": [
442 |     "test_dataset = test_dataset.batch(batch_size=batch_size, drop_remainder=False)"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "code",
447 |    "execution_count": 27,
448 |    "metadata": {},
449 |    "outputs": [],
450 |    "source": [
451 |     "# Choose an optimizer and loss function for training-\n",
452 |     "loss_fn = tf.keras.losses.CategoricalCrossentropy()\n",
453 |     "optimizer = tf.keras.optimizers.Adam(lr = 0.001)"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": 28,
459 |    "metadata": {},
460 |    "outputs": [],
461 |    "source": [
462 |     "# Select metrics to measure the error & accuracy of model.\n",
463 |     "# These metrics accumulate the values over epochs and then\n",
464 |     "# print the overall result-\n",
465 |     "train_loss = tf.keras.metrics.Mean(name = 'train_loss')\n",
466 |     "train_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'train_accuracy')\n",
467 |     "\n",
468 |     "test_loss = tf.keras.metrics.Mean(name = 'test_loss')\n",
469 |     "test_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'test_accuracy')"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "code",
474 |    "execution_count": 29,
475 |    "metadata": {},
476 |    "outputs": [],
477 |    "source": [
478 |     "@tf.function\n",
479 |     "def train_one_step(model, mask_model, optimizer, x, y):\n",
480 |     "    '''\n",
481 |     "    def train_step(data, labels):\n",
482 |     "    Function to compute one step of gradient descent optimization\n",
483 |     "    '''\n",
484 |     "    with tf.GradientTape() as tape:\n",
485 |     "        # Make predictions using defined model-\n",
486 |     "        y_pred = model(x)\n",
487 |     "\n",
488 |     "        # Compute loss-\n",
489 |     "        loss = loss_fn(y, y_pred)\n",
490 |     "        \n",
491 |     "    # Compute gradients wrt defined loss and weights and biases-\n",
492 |     "    grads = tape.gradient(loss, model.trainable_variables)\n",
493 |     "    \n",
494 |     "    # type(grads)\n",
495 |     "    # list\n",
496 |     "    \n",
497 |     "    # List to hold element-wise multiplication between-\n",
498 |     "    # computed gradient and masks-\n",
499 |     "    grad_mask_mul = []\n",
500 |     "    \n",
501 |     "    # Perform element-wise multiplication between computed gradients and masks-\n",
502 |     "    for grad_layer, mask in zip(grads, mask_model.trainable_weights):\n",
503 |     "        grad_mask_mul.append(tf.math.multiply(grad_layer, mask))\n",
504 |     "    \n",
505 |     "    # Apply computed gradients to model's weights and biases-\n",
506 |     "    optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))\n",
507 |     "\n",
508 |     "    # Compute accuracy-\n",
509 |     "    train_loss(loss)\n",
510 |     "    train_accuracy(y, y_pred)\n",
511 |     "\n",
512 |     "    return None"
513 |    ]
514 |   },
515 |   {
516 |    "cell_type": "code",
517 |    "execution_count": 30,
518 |    "metadata": {},
519 |    "outputs": [],
520 |    "source": [
521 |     "@tf.function\n",
522 |     "def test_step(model, optimizer, data, labels):\n",
523 |     "    \"\"\"\n",
524 |     "    Function to test model performance\n",
525 |     "    on testing dataset\n",
526 |     "    \"\"\"\n",
527 |     "    \n",
528 |     "    predictions = model(data)\n",
529 |     "    t_loss = loss_fn(labels, predictions)\n",
530 |     "\n",
531 |     "    test_loss(t_loss)\n",
532 |     "    test_accuracy(labels, predictions)\n",
533 |     "\n",
534 |     "    return None\n"
535 |    ]
536 |   },
537 |   {
538 |    "cell_type": "code",
539 |    "execution_count": null,
540 |    "metadata": {},
541 |    "outputs": [],
542 |    "source": []
543 |   },
544 |   {
545 |    "cell_type": "code",
546 |    "execution_count": 31,
547 |    "metadata": {},
548 |    "outputs": [],
549 |    "source": [
550 |     "# Dictionary to hold scalar metrics-\n",
551 |     "history = {}\n",
552 |     "\n",
553 |     "history['accuracy'] = np.zeros(num_epochs)\n",
554 |     "history['val_accuracy'] = np.zeros(num_epochs)\n",
555 |     "history['loss'] = np.zeros(num_epochs)\n",
556 |     "history['val_loss'] = np.zeros(num_epochs)"
557 |    ]
558 |   },
559 |   {
560 |    "cell_type": "code",
561 |    "execution_count": 32,
562 |    "metadata": {},
563 |    "outputs": [],
564 |    "source": [
565 |     "# User input-\n",
566 |     "minimum_delta = 0.001\n",
567 |     "patience = 3"
568 |    ]
569 |   },
570 |   {
571 |    "cell_type": "code",
572 |    "execution_count": 33,
573 |    "metadata": {},
574 |    "outputs": [],
575 |    "source": [
576 |     "best_val_loss = 1\n",
577 |     "loc_patience = 0"
578 |    ]
579 |   },
580 |   {
581 |    "cell_type": "code",
582 |    "execution_count": 34,
583 |    "metadata": {},
584 |    "outputs": [
585 |     {
586 |      "name": "stdout",
587 |      "output_type": "stream",
588 |      "text": [
589 |       "Epoch 1, Loss: 0.1759, Accuracy: 98.9948, Test Loss: 0.0746, Test Accuracy: 99.534950\n",
590 |       "Total number of trainable parameters = 56168\n",
591 |       "\n",
592 |       "Epoch 2, Loss: 0.0457, Accuracy: 99.7279, Test Loss: 0.0566, Test Accuracy: 99.618980\n",
593 |       "Total number of trainable parameters = 56168\n",
594 |       "\n",
595 |       "Epoch 3, Loss: 0.0246, Accuracy: 99.8562, Test Loss: 0.0575, Test Accuracy: 99.628975\n",
596 |       "Total number of trainable parameters = 56168\n",
597 |       "\n",
598 |       "Epoch 4, Loss: 0.0148, Accuracy: 99.9216, Test Loss: 0.0589, Test Accuracy: 99.656952\n",
599 |       "Total number of trainable parameters = 56168\n",
600 |       "\n",
601 |       "Epoch 5, Loss: 0.0088, Accuracy: 99.9549, Test Loss: 0.0689, Test Accuracy: 99.633965\n",
602 |       "Total number of trainable parameters = 56168\n",
603 |       "\n",
604 |       "\n",
605 |       "'EarlyStopping' called!\n",
606 |       "\n"
607 |      ]
608 |     }
609 |    ],
610 |    "source": [
611 |     "for epoch in range(num_epochs):\n",
612 |     "    \n",
613 |     "    if loc_patience >= patience:\n",
614 |     "        print(\"\\n'EarlyStopping' called!\\n\")\n",
615 |     "        break\n",
616 |     "        \n",
617 |     "    # Reset the metrics at the start of the next epoch\n",
618 |     "    train_loss.reset_states()\n",
619 |     "    train_accuracy.reset_states()\n",
620 |     "    test_loss.reset_states()\n",
621 |     "    test_accuracy.reset_states()\n",
622 |     "            \n",
623 |     "    for x, y in train_dataset:\n",
624 |     "        # train_step(x, y)\n",
625 |     "        train_one_step(model_stripped, mask_model_stripped, optimizer, x, y)\n",
626 |     "\n",
627 |     "    for x_t, y_t in test_dataset:\n",
628 |     "        # test_step(x_t, y_t)\n",
629 |     "        test_step(model_stripped, optimizer, x_t, y_t)\n",
630 |     "\n",
631 |     "    template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'\n",
632 |     "    \n",
633 |     "    history['accuracy'][epoch] = train_accuracy.result()\n",
634 |     "    history['loss'][epoch] = train_loss.result()\n",
635 |     "    history['val_loss'][epoch] = test_loss.result()\n",
636 |     "    history['val_accuracy'][epoch] = test_accuracy.result()\n",
637 |     "\n",
638 |     "    print(template.format(epoch + 1, \n",
639 |     "                              train_loss.result(), train_accuracy.result()*100,\n",
640 |     "                              test_loss.result(), test_accuracy.result()*100))\n",
641 |     "    \n",
642 |     "    # Count number of non-zero parameters in each layer and in total-\n",
643 |     "    # print(\"layer-wise manner model, number of nonzero parameters in each layer are: \\n\")\n",
644 |     "\n",
645 |     "    model_sum_params = 0\n",
646 |     "    \n",
647 |     "    for layer in model_stripped.trainable_weights:\n",
648 |     "        # print(tf.math.count_nonzero(layer, axis = None).numpy())\n",
649 |     "        model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n",
650 |     "    \n",
651 |     "    print(\"Total number of trainable parameters = {0}\\n\".format(model_sum_params))\n",
652 |     "\n",
653 |     "    \n",
654 |     "    # Code for manual Early Stopping:\n",
655 |     "    if np.abs(test_loss.result() < best_val_loss) >= minimum_delta:\n",
656 |     "        # update 'best_val_loss' variable to lowest loss encountered so far-\n",
657 |     "        best_val_loss = test_loss.result()\n",
658 |     "        \n",
659 |     "        # reset 'loc_patience' variable-\n",
660 |     "        loc_patience = 0\n",
661 |     "        \n",
662 |     "    else:  # there is no improvement in monitored metric 'val_loss'\n",
663 |     "        loc_patience += 1  # number of epochs without any improvement\n",
664 |     "    "
665 |    ]
666 |   },
667 |   {
668 |    "cell_type": "code",
669 |    "execution_count": 35,
670 |    "metadata": {},
671 |    "outputs": [],
672 |    "source": [
673 |     "# Resize numpy arrays according to the epoch when 'EarlyStopping' was called-\n",
674 |     "for metrics in history.keys():\n",
675 |     "    history[metrics] = np.resize(history[metrics], new_shape=epoch)"
676 |    ]
677 |   },
678 |   {
679 |    "cell_type": "code",
680 |    "execution_count": 36,
681 |    "metadata": {},
682 |    "outputs": [
683 |     {
684 |      "data": {
685 |       "text/plain": [
686 |        "[0.06900685519112594, 0.9815]"
687 |       ]
688 |      },
689 |      "execution_count": 36,
690 |      "metadata": {},
691 |      "output_type": "execute_result"
692 |     }
693 |    ],
694 |    "source": [
695 |     "model.evaluate(X_test, y_test, verbose=0)"
696 |    ]
697 |   },
698 |   {
699 |    "cell_type": "code",
700 |    "execution_count": 37,
701 |    "metadata": {},
702 |    "outputs": [],
703 |    "source": [
704 |     "y_pred = model.predict_classes(X_test)"
705 |    ]
706 |   },
707 |   {
708 |    "cell_type": "code",
709 |    "execution_count": 41,
710 |    "metadata": {},
711 |    "outputs": [
712 |     {
713 |      "name": "stdout",
714 |      "output_type": "stream",
715 |      "text": [
716 |       "\n",
717 |       "Accuracy of Winning Ticket (5-round) = 0.9815\n",
718 |       "\n"
719 |      ]
720 |     }
721 |    ],
722 |    "source": [
723 |     "accuracy = accuracy_score(np.argmax(y_test, axis = 1), y_pred)\n",
724 |     "\n",
725 |     "print(\"\\nAccuracy of Winning Ticket (5-round) = {0:.4f}\\n\".format(accuracy))"
726 |    ]
727 |   },
728 |   {
729 |    "cell_type": "code",
730 |    "execution_count": null,
731 |    "metadata": {},
732 |    "outputs": [],
733 |    "source": []
734 |   }
735 |  ],
736 |  "metadata": {
737 |   "kernelspec": {
738 |    "display_name": "Python 3",
739 |    "language": "python",
740 |    "name": "python3"
741 |   },
742 |   "language_info": {
743 |    "codemirror_mode": {
744 |     "name": "ipython",
745 |     "version": 3
746 |    },
747 |    "file_extension": ".py",
748 |    "mimetype": "text/x-python",
749 |    "name": "python",
750 |    "nbconvert_exporter": "python",
751 |    "pygments_lexer": "ipython3",
752 |    "version": "3.7.6"
753 |   }
754 |  },
755 |  "nbformat": 4,
756 |  "nbformat_minor": 4
757 | }
758 | 


--------------------------------------------------------------------------------
/Conv_4_LTH_CIFAR_10_winning_ticket_verification.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# The Lottery Ticket Hypothesis - Conv-4 CNN for CIFAR-10 _winning ticket_ verification:\n",
   8 |     "\n",
   9 |     "Conv-4 Convolutional Neural Network the following architecture:\n",
  10 |     "\n",
  11 |     "1. __Convolutional Layers:__ 64, 64, pool\n",
  12 |     "1. __Convolutional Layers:__ 128, 128, pool\n",
  13 |     "1. __Dense Layers:__ 256, 256, 10\n",
  14 |     "\n",
  15 |     "Filter/Kernel size for convolutional layers is 3 x 3, with padding and stride of 1.\n",
  16 |     "\n",
  17 |     "Filter and Stride for max-pooling layers is 2 x 2.\n",
  18 |     "\n",
  19 |     "This CNN is used to verify the veracity of the sub-network (or, winning ticket) found using the iterative pruning rounds from _The Lottery Ticket Hypothesis_ paper."
  20 |    ]
  21 |   },
  22 |   {
  23 |    "cell_type": "code",
  24 |    "execution_count": 1,
  25 |    "metadata": {},
  26 |    "outputs": [
  27 |     {
  28 |      "name": "stderr",
  29 |      "output_type": "stream",
  30 |      "text": [
  31 |       "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
  32 |       "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
  33 |       "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
  34 |       "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
  35 |       "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
  36 |       "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
  37 |       "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
  38 |       "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
  39 |       "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
  40 |       "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
  41 |       "/opt/conda/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
  42 |       "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
  43 |      ]
  44 |     }
  45 |    ],
  46 |    "source": [
  47 |     "import tensorflow as tf\n",
  48 |     "import numpy as np\n",
  49 |     "import matplotlib.pyplot as plt\n",
  50 |     "import math\n",
  51 |     "import tensorflow_model_optimization as tfmot\n",
  52 |     "from tensorflow_model_optimization.sparsity import keras as sparsity\n",
  53 |     "# from tensorflow.keras import datasets, layers, models\n",
  54 |     "import matplotlib.pyplot as plt\n",
  55 |     "from tensorflow.keras.layers import AveragePooling2D, Conv2D, MaxPooling2D, ReLU\n",
  56 |     "from tensorflow.keras import models, layers, datasets\n",
  57 |     "from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, InputLayer\n",
  58 |     "from tensorflow.keras.models import Sequential, Model\n",
  59 |     "from tensorflow.keras.initializers import RandomNormal\n",
  60 |     "# import math\n",
  61 |     "from sklearn.metrics import accuracy_score, precision_score, recall_score"
  62 |    ]
  63 |   },
  64 |   {
  65 |    "cell_type": "code",
  66 |    "execution_count": null,
  67 |    "metadata": {},
  68 |    "outputs": [],
  69 |    "source": []
  70 |   },
  71 |   {
  72 |    "cell_type": "code",
  73 |    "execution_count": 2,
  74 |    "metadata": {},
  75 |    "outputs": [
  76 |     {
  77 |      "data": {
  78 |       "text/plain": [
  79 |        "'2.0.0'"
  80 |       ]
  81 |      },
  82 |      "execution_count": 2,
  83 |      "metadata": {},
  84 |      "output_type": "execute_result"
  85 |     }
  86 |    ],
  87 |    "source": [
  88 |     "tf.__version__"
  89 |    ]
  90 |   },
  91 |   {
  92 |    "cell_type": "code",
  93 |    "execution_count": 3,
  94 |    "metadata": {},
  95 |    "outputs": [
  96 |     {
  97 |      "name": "stdout",
  98 |      "output_type": "stream",
  99 |      "text": [
 100 |       "env: CUDA_DEVICE_ORDER=PCI_BUS_ID\n",
 101 |       "env: CUDA_VISIBLE_DEVICES=1\n"
 102 |      ]
 103 |     }
 104 |    ],
 105 |    "source": [
 106 |     "%env CUDA_DEVICE_ORDER=PCI_BUS_ID\n",
 107 |     "%env CUDA_VISIBLE_DEVICES=1"
 108 |    ]
 109 |   },
 110 |   {
 111 |    "cell_type": "code",
 112 |    "execution_count": null,
 113 |    "metadata": {},
 114 |    "outputs": [],
 115 |    "source": []
 116 |   },
 117 |   {
 118 |    "cell_type": "code",
 119 |    "execution_count": 4,
 120 |    "metadata": {},
 121 |    "outputs": [],
 122 |    "source": [
 123 |     "batch_size = 60\n",
 124 |     "num_classes = 10\n",
 125 |     "num_epochs = 100"
 126 |    ]
 127 |   },
 128 |   {
 129 |    "cell_type": "code",
 130 |    "execution_count": 5,
 131 |    "metadata": {},
 132 |    "outputs": [],
 133 |    "source": [
 134 |     "# Data preprocessing and cleaning:\n",
 135 |     "# input image dimensions\n",
 136 |     "img_rows, img_cols = 32, 32\n",
 137 |     "\n",
 138 |     "# Load CIFAR-10 dataset-\n",
 139 |     "(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()"
 140 |    ]
 141 |   },
 142 |   {
 143 |    "cell_type": "code",
 144 |    "execution_count": 6,
 145 |    "metadata": {},
 146 |    "outputs": [
 147 |     {
 148 |      "name": "stdout",
 149 |      "output_type": "stream",
 150 |      "text": [
 151 |       "\n",
 152 |       "'input_shape' which will be used = (32, 32, 3)\n",
 153 |       "\n"
 154 |      ]
 155 |     }
 156 |    ],
 157 |    "source": [
 158 |     "if tf.keras.backend.image_data_format() == 'channels_first':\n",
 159 |     "    X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols)\n",
 160 |     "    X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols)\n",
 161 |     "    input_shape = (3, img_rows, img_cols)\n",
 162 |     "else:\n",
 163 |     "    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)\n",
 164 |     "    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)\n",
 165 |     "    input_shape = (img_rows, img_cols, 3)\n",
 166 |     "\n",
 167 |     "print(\"\\n'input_shape' which will be used = {0}\\n\".format(input_shape))"
 168 |    ]
 169 |   },
 170 |   {
 171 |    "cell_type": "code",
 172 |    "execution_count": 7,
 173 |    "metadata": {},
 174 |    "outputs": [],
 175 |    "source": [
 176 |     "# Convert datasets to floating point types-\n",
 177 |     "X_train = X_train.astype('float32')\n",
 178 |     "X_test = X_test.astype('float32')\n",
 179 |     "\n",
 180 |     "# Normalize the training and testing datasets-\n",
 181 |     "X_train /= 255.0\n",
 182 |     "X_test /= 255.0"
 183 |    ]
 184 |   },
 185 |   {
 186 |    "cell_type": "code",
 187 |    "execution_count": 8,
 188 |    "metadata": {},
 189 |    "outputs": [],
 190 |    "source": [
 191 |     "# convert class vectors/target to binary class matrices or one-hot encoded values-\n",
 192 |     "y_train = tf.keras.utils.to_categorical(y_train, num_classes)\n",
 193 |     "y_test = tf.keras.utils.to_categorical(y_test, num_classes)"
 194 |    ]
 195 |   },
 196 |   {
 197 |    "cell_type": "code",
 198 |    "execution_count": 9,
 199 |    "metadata": {},
 200 |    "outputs": [
 201 |     {
 202 |      "name": "stdout",
 203 |      "output_type": "stream",
 204 |      "text": [
 205 |       "\n",
 206 |       "Dimensions of training and testing sets are:\n",
 207 |       "X_train.shape = (50000, 32, 32, 3), y_train.shape = (50000, 10)\n",
 208 |       "X_test.shape = (10000, 32, 32, 3), y_test.shape = (10000, 10)\n"
 209 |      ]
 210 |     }
 211 |    ],
 212 |    "source": [
 213 |     "print(\"\\nDimensions of training and testing sets are:\")\n",
 214 |     "print(\"X_train.shape = {0}, y_train.shape = {1}\".format(X_train.shape, y_train.shape))\n",
 215 |     "print(\"X_test.shape = {0}, y_test.shape = {1}\".format(X_test.shape, y_test.shape))"
 216 |    ]
 217 |   },
 218 |   {
 219 |    "cell_type": "code",
 220 |    "execution_count": null,
 221 |    "metadata": {},
 222 |    "outputs": [],
 223 |    "source": []
 224 |   },
 225 |   {
 226 |    "cell_type": "code",
 227 |    "execution_count": null,
 228 |    "metadata": {},
 229 |    "outputs": [],
 230 |    "source": []
 231 |   },
 232 |   {
 233 |    "cell_type": "markdown",
 234 |    "metadata": {},
 235 |    "source": [
 236 |     "### Prepare CIFAR10 dataset for _GradientTape_ training:"
 237 |    ]
 238 |   },
 239 |   {
 240 |    "cell_type": "code",
 241 |    "execution_count": 10,
 242 |    "metadata": {},
 243 |    "outputs": [],
 244 |    "source": [
 245 |     "# Create training and testing datasets-\n",
 246 |     "train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))\n",
 247 |     "test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))"
 248 |    ]
 249 |   },
 250 |   {
 251 |    "cell_type": "code",
 252 |    "execution_count": 11,
 253 |    "metadata": {},
 254 |    "outputs": [],
 255 |    "source": [
 256 |     "train_dataset = train_dataset.shuffle(buffer_size = 20000, reshuffle_each_iteration = True).batch(batch_size = batch_size, drop_remainder = False)"
 257 |    ]
 258 |   },
 259 |   {
 260 |    "cell_type": "code",
 261 |    "execution_count": 12,
 262 |    "metadata": {},
 263 |    "outputs": [],
 264 |    "source": [
 265 |     "test_dataset = test_dataset.batch(batch_size=batch_size, drop_remainder=False)"
 266 |    ]
 267 |   },
 268 |   {
 269 |    "cell_type": "code",
 270 |    "execution_count": 13,
 271 |    "metadata": {},
 272 |    "outputs": [],
 273 |    "source": [
 274 |     "# Choose an optimizer and loss function for training-\n",
 275 |     "loss_fn = tf.keras.losses.CategoricalCrossentropy()\n",
 276 |     "optimizer = tf.keras.optimizers.Adam(lr = 0.0003)"
 277 |    ]
 278 |   },
 279 |   {
 280 |    "cell_type": "code",
 281 |    "execution_count": 14,
 282 |    "metadata": {},
 283 |    "outputs": [],
 284 |    "source": [
 285 |     "# Select metrics to measure the error & accuracy of model.\n",
 286 |     "# These metrics accumulate the values over epochs and then\n",
 287 |     "# print the overall result-\n",
 288 |     "train_loss = tf.keras.metrics.Mean(name = 'train_loss')\n",
 289 |     "train_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'train_accuracy')\n",
 290 |     "\n",
 291 |     "test_loss = tf.keras.metrics.Mean(name = 'test_loss')\n",
 292 |     "test_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'test_accuracy')"
 293 |    ]
 294 |   },
 295 |   {
 296 |    "cell_type": "code",
 297 |    "execution_count": null,
 298 |    "metadata": {},
 299 |    "outputs": [],
 300 |    "source": []
 301 |   },
 302 |   {
 303 |    "cell_type": "code",
 304 |    "execution_count": 15,
 305 |    "metadata": {},
 306 |    "outputs": [
 307 |     {
 308 |      "name": "stdout",
 309 |      "output_type": "stream",
 310 |      "text": [
 311 |       "'end_step parameter' for this dataset =  83400\n"
 312 |      ]
 313 |     }
 314 |    ],
 315 |    "source": [
 316 |     "# The model is first trained without any pruning for 'num_epochs' epochs-\n",
 317 |     "epochs = num_epochs\n",
 318 |     "\n",
 319 |     "num_train_samples = X_train.shape[0]\n",
 320 |     "\n",
 321 |     "end_step = np.ceil(1.0 * num_train_samples / batch_size).astype(np.int32) * epochs\n",
 322 |     "\n",
 323 |     "print(\"'end_step parameter' for this dataset =  {0}\".format(end_step))"
 324 |    ]
 325 |   },
 326 |   {
 327 |    "cell_type": "code",
 328 |    "execution_count": 16,
 329 |    "metadata": {},
 330 |    "outputs": [],
 331 |    "source": [
 332 |     "# Specify the parameters to be used for layer-wise pruning, NO PRUNING is done here:\n",
 333 |     "pruning_params_unpruned = {\n",
 334 |     "    'pruning_schedule': sparsity.ConstantSparsity(\n",
 335 |     "        target_sparsity=0.0, begin_step=0,\n",
 336 |     "        end_step = end_step, frequency=100\n",
 337 |     "    )\n",
 338 |     "}"
 339 |    ]
 340 |   },
 341 |   {
 342 |    "cell_type": "code",
 343 |    "execution_count": null,
 344 |    "metadata": {},
 345 |    "outputs": [],
 346 |    "source": []
 347 |   },
 348 |   {
 349 |    "cell_type": "code",
 350 |    "execution_count": 17,
 351 |    "metadata": {},
 352 |    "outputs": [],
 353 |    "source": [
 354 |     "l = tf.keras.layers"
 355 |    ]
 356 |   },
 357 |   {
 358 |    "cell_type": "code",
 359 |    "execution_count": null,
 360 |    "metadata": {},
 361 |    "outputs": [],
 362 |    "source": []
 363 |   },
 364 |   {
 365 |    "cell_type": "code",
 366 |    "execution_count": 18,
 367 |    "metadata": {},
 368 |    "outputs": [],
 369 |    "source": [
 370 |     "def pruned_nn(pruning_params_conv, pruning_params_fc, pruning_params_op):\n",
 371 |     "    \"\"\"\n",
 372 |     "    Function to define the architecture of a neural network model\n",
 373 |     "    following Conv-2 architecture for CIFAR-10 dataset and using\n",
 374 |     "    provided parameter which are used to prune the model.\n",
 375 |     "    \n",
 376 |     "    Conv-4 architecture-\n",
 377 |     "    64, 64, pool  -- convolutions\n",
 378 |     "    128, 128, pool -- convolutions\n",
 379 |     "    256, 256, 10  -- fully connected layers\n",
 380 |     "    \n",
 381 |     "    Input: 'pruning_params' Python 3 dictionary containing parameters which are used for pruning\n",
 382 |     "    Output: Returns designed and compiled neural network model\n",
 383 |     "    \"\"\"\n",
 384 |     "    \n",
 385 |     "    pruned_model = Sequential()\n",
 386 |     "    \n",
 387 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
 388 |     "        Conv2D(\n",
 389 |     "            filters = 64, kernel_size = (3, 3),\n",
 390 |     "            activation='relu', kernel_initializer = tf.initializers.GlorotUniform(),\n",
 391 |     "            strides = (1, 1), padding = 'same',\n",
 392 |     "            input_shape=(32, 32, 3)\n",
 393 |     "        ),\n",
 394 |     "        **pruning_params_conv)\n",
 395 |     "    )\n",
 396 |     "        \n",
 397 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
 398 |     "        Conv2D(\n",
 399 |     "            filters = 64, kernel_size = (3, 3),\n",
 400 |     "            activation='relu', kernel_initializer = tf.initializers.GlorotUniform(),\n",
 401 |     "            strides = (1, 1), padding = 'same'\n",
 402 |     "        ),\n",
 403 |     "        **pruning_params_conv)\n",
 404 |     "    )\n",
 405 |     "    \n",
 406 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
 407 |     "        MaxPooling2D(\n",
 408 |     "            pool_size = (2, 2),\n",
 409 |     "            strides = (2, 2)\n",
 410 |     "        ),\n",
 411 |     "        **pruning_params_conv)\n",
 412 |     "    )\n",
 413 |     "    \n",
 414 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
 415 |     "        Conv2D(\n",
 416 |     "            filters = 128, kernel_size = (3, 3),\n",
 417 |     "            activation='relu', kernel_initializer = tf.initializers.GlorotUniform(),\n",
 418 |     "            strides = (1, 1), padding = 'same'\n",
 419 |     "        ),\n",
 420 |     "        **pruning_params_conv)\n",
 421 |     "    )\n",
 422 |     "\n",
 423 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
 424 |     "        Conv2D(\n",
 425 |     "            filters = 128, kernel_size = (3, 3),\n",
 426 |     "            activation='relu', kernel_initializer = tf.initializers.GlorotUniform(),\n",
 427 |     "            strides = (1, 1), padding = 'same'\n",
 428 |     "        ),\n",
 429 |     "        **pruning_params_conv)\n",
 430 |     "    )\n",
 431 |     "\n",
 432 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
 433 |     "        MaxPooling2D(\n",
 434 |     "            pool_size = (2, 2),\n",
 435 |     "            strides = (2, 2)\n",
 436 |     "        ),\n",
 437 |     "        **pruning_params_conv)\n",
 438 |     "    )\n",
 439 |     "\n",
 440 |     "    \n",
 441 |     "    pruned_model.add(Flatten())\n",
 442 |     "    \n",
 443 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
 444 |     "        Dense(\n",
 445 |     "            units = 256, activation='relu',\n",
 446 |     "            kernel_initializer = tf.initializers.GlorotUniform()\n",
 447 |     "        ),\n",
 448 |     "        **pruning_params_fc)\n",
 449 |     "    )\n",
 450 |     "    \n",
 451 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
 452 |     "        Dense(\n",
 453 |     "            units = 256, activation='relu',\n",
 454 |     "            kernel_initializer = tf.initializers.GlorotUniform()\n",
 455 |     "        ),\n",
 456 |     "        **pruning_params_fc)\n",
 457 |     "    )\n",
 458 |     "    \n",
 459 |     "    pruned_model.add(sparsity.prune_low_magnitude(\n",
 460 |     "        Dense(\n",
 461 |     "            units = 10, activation='softmax'\n",
 462 |     "        ),\n",
 463 |     "        **pruning_params_op)\n",
 464 |     "    )\n",
 465 |     "    \n",
 466 |     "\n",
 467 |     "    # Compile pruned CNN-\n",
 468 |     "    pruned_model.compile(\n",
 469 |     "        loss=tf.keras.losses.categorical_crossentropy,\n",
 470 |     "        # optimizer='adam',\n",
 471 |     "        optimizer=tf.keras.optimizers.Adam(lr = 0.0003),\n",
 472 |     "        metrics=['accuracy']\n",
 473 |     "    )\n",
 474 |     "    \n",
 475 |     "    \n",
 476 |     "    return pruned_model\n"
 477 |    ]
 478 |   },
 479 |   {
 480 |    "cell_type": "code",
 481 |    "execution_count": null,
 482 |    "metadata": {},
 483 |    "outputs": [],
 484 |    "source": []
 485 |   },
 486 |   {
 487 |    "cell_type": "code",
 488 |    "execution_count": 19,
 489 |    "metadata": {},
 490 |    "outputs": [
 491 |     {
 492 |      "name": "stdout",
 493 |      "output_type": "stream",
 494 |      "text": [
 495 |       "WARNING:tensorflow:From /opt/conda/lib/python3.7/site-packages/tensorflow_model_optimization/python/core/sparsity/keras/pruning_wrapper.py:183: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
 496 |       "Instructions for updating:\n",
 497 |       "Please use `layer.add_weight` method instead.\n"
 498 |      ]
 499 |     }
 500 |    ],
 501 |    "source": [
 502 |     "# Initialize a CNN model-\n",
 503 |     "orig_model = pruned_nn(pruning_params_unpruned, pruning_params_unpruned, pruning_params_unpruned)"
 504 |    ]
 505 |   },
 506 |   {
 507 |    "cell_type": "code",
 508 |    "execution_count": 19,
 509 |    "metadata": {},
 510 |    "outputs": [
 511 |     {
 512 |      "data": {
 513 |       "text/plain": [
 514 |        "'\\nimport os\\n\\n# Change to where the winning ticket is saved-\\nos.chdir(\"Run_2/Conv_4_CIFAR10/\")\\n'"
 515 |       ]
 516 |      },
 517 |      "execution_count": 19,
 518 |      "metadata": {},
 519 |      "output_type": "execute_result"
 520 |     }
 521 |    ],
 522 |    "source": [
 523 |     "'''\n",
 524 |     "import os\n",
 525 |     "\n",
 526 |     "# Change to where the winning ticket is saved-\n",
 527 |     "os.chdir(\"Run_2/Conv_4_CIFAR10/\")\n",
 528 |     "'''"
 529 |    ]
 530 |   },
 531 |   {
 532 |    "cell_type": "code",
 533 |    "execution_count": null,
 534 |    "metadata": {},
 535 |    "outputs": [],
 536 |    "source": []
 537 |   },
 538 |   {
 539 |    "cell_type": "code",
 540 |    "execution_count": 20,
 541 |    "metadata": {},
 542 |    "outputs": [],
 543 |    "source": [
 544 |     "# Load weights from before-\n",
 545 |     "orig_model.load_weights(\"Conv_4_CIFAR10_Winning_Ticket_Distribution_94.6035541009015.h5\")"
 546 |    ]
 547 |   },
 548 |   {
 549 |    "cell_type": "code",
 550 |    "execution_count": 21,
 551 |    "metadata": {},
 552 |    "outputs": [],
 553 |    "source": [
 554 |     "# Strip model of it's pruning parameters-\n",
 555 |     "orig_model_stripped = sparsity.strip_pruning(orig_model)"
 556 |    ]
 557 |   },
 558 |   {
 559 |    "cell_type": "code",
 560 |    "execution_count": 22,
 561 |    "metadata": {},
 562 |    "outputs": [
 563 |     {
 564 |      "name": "stdout",
 565 |      "output_type": "stream",
 566 |      "text": [
 567 |       "Model: \"sequential\"\n",
 568 |       "_________________________________________________________________\n",
 569 |       "Layer (type)                 Output Shape              Param #   \n",
 570 |       "=================================================================\n",
 571 |       "conv2d (Conv2D)              (None, 32, 32, 64)        1792      \n",
 572 |       "_________________________________________________________________\n",
 573 |       "conv2d_1 (Conv2D)            (None, 32, 32, 64)        36928     \n",
 574 |       "_________________________________________________________________\n",
 575 |       "max_pooling2d (MaxPooling2D) (None, 16, 16, 64)        0         \n",
 576 |       "_________________________________________________________________\n",
 577 |       "conv2d_2 (Conv2D)            (None, 16, 16, 128)       73856     \n",
 578 |       "_________________________________________________________________\n",
 579 |       "conv2d_3 (Conv2D)            (None, 16, 16, 128)       147584    \n",
 580 |       "_________________________________________________________________\n",
 581 |       "max_pooling2d_1 (MaxPooling2 (None, 8, 8, 128)         0         \n",
 582 |       "_________________________________________________________________\n",
 583 |       "flatten (Flatten)            (None, 8192)              0         \n",
 584 |       "_________________________________________________________________\n",
 585 |       "dense (Dense)                (None, 256)               2097408   \n",
 586 |       "_________________________________________________________________\n",
 587 |       "dense_1 (Dense)              (None, 256)               65792     \n",
 588 |       "_________________________________________________________________\n",
 589 |       "dense_2 (Dense)              (None, 10)                2570      \n",
 590 |       "=================================================================\n",
 591 |       "Total params: 2,425,930\n",
 592 |       "Trainable params: 2,425,930\n",
 593 |       "Non-trainable params: 0\n",
 594 |       "_________________________________________________________________\n"
 595 |      ]
 596 |     }
 597 |    ],
 598 |    "source": [
 599 |     "# Get stripped defined model summary-\n",
 600 |     "orig_model_stripped.summary()"
 601 |    ]
 602 |   },
 603 |   {
 604 |    "cell_type": "code",
 605 |    "execution_count": null,
 606 |    "metadata": {},
 607 |    "outputs": [],
 608 |    "source": []
 609 |   },
 610 |   {
 611 |    "cell_type": "code",
 612 |    "execution_count": null,
 613 |    "metadata": {},
 614 |    "outputs": [],
 615 |    "source": []
 616 |   },
 617 |   {
 618 |    "cell_type": "markdown",
 619 |    "metadata": {},
 620 |    "source": [
 621 |     "### Create mask using winning ticket:"
 622 |    ]
 623 |   },
 624 |   {
 625 |    "cell_type": "code",
 626 |    "execution_count": 23,
 627 |    "metadata": {},
 628 |    "outputs": [],
 629 |    "source": [
 630 |     "# Instantiate a new neural network model for which, the mask is to be created,\n",
 631 |     "# according to the paper-\n",
 632 |     "mask_model = pruned_nn(pruning_params_unpruned, pruning_params_unpruned, pruning_params_unpruned)"
 633 |    ]
 634 |   },
 635 |   {
 636 |    "cell_type": "code",
 637 |    "execution_count": 24,
 638 |    "metadata": {},
 639 |    "outputs": [],
 640 |    "source": [
 641 |     "# Load weights of PRUNED model-\n",
 642 |     "# mask_model.set_weights(orig_model.get_weights())\n",
 643 |     "mask_model.load_weights(\"Conv_4_CIFAR10_Winning_Ticket_Distribution_94.6035541009015.h5\")"
 644 |    ]
 645 |   },
 646 |   {
 647 |    "cell_type": "code",
 648 |    "execution_count": 25,
 649 |    "metadata": {},
 650 |    "outputs": [],
 651 |    "source": [
 652 |     "# Strip the model of its pruning parameters-\n",
 653 |     "mask_model_stripped = sparsity.strip_pruning(mask_model)"
 654 |    ]
 655 |   },
 656 |   {
 657 |    "cell_type": "code",
 658 |    "execution_count": 26,
 659 |    "metadata": {},
 660 |    "outputs": [],
 661 |    "source": [
 662 |     "# For each layer, for each weight which is 0, leave it, as is.\n",
 663 |     "# And for weights which survive the pruning,reinitialize it to ONE (1)-\n",
 664 |     "for wts in mask_model_stripped.trainable_weights:\n",
 665 |     "    wts.assign(tf.where(tf.equal(wts, 0.), 0., 1.))\n"
 666 |    ]
 667 |   },
 668 |   {
 669 |    "cell_type": "code",
 670 |    "execution_count": 28,
 671 |    "metadata": {},
 672 |    "outputs": [
 673 |     {
 674 |      "name": "stdout",
 675 |      "output_type": "stream",
 676 |      "text": [
 677 |       "\n",
 678 |       "Number of mask parameters = 130097\n",
 679 |       "\n"
 680 |      ]
 681 |     }
 682 |    ],
 683 |    "source": [
 684 |     "# Count number of mask parameters-\n",
 685 |     "mask_sum_params = 0\n",
 686 |     "\n",
 687 |     "for layer in mask_model_stripped.trainable_weights:\n",
 688 |     "    mask_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n",
 689 |     "\n",
 690 |     "print(\"\\nNumber of mask parameters = {0}\\n\".format(mask_sum_params))"
 691 |    ]
 692 |   },
 693 |   {
 694 |    "cell_type": "code",
 695 |    "execution_count": null,
 696 |    "metadata": {},
 697 |    "outputs": [],
 698 |    "source": []
 699 |   },
 700 |   {
 701 |    "cell_type": "code",
 702 |    "execution_count": 29,
 703 |    "metadata": {},
 704 |    "outputs": [
 705 |     {
 706 |      "name": "stdout",
 707 |      "output_type": "stream",
 708 |      "text": [
 709 |       "\n",
 710 |       "Number of training weights = 2425930 and non-trainabel weights = 2425040.0\n",
 711 |       "\n",
 712 |       "Total number of parameters = 4850970.0\n",
 713 |       "\n"
 714 |      ]
 715 |     }
 716 |    ],
 717 |    "source": [
 718 |     "# Count number of trainable and non-trainable parameters-\n",
 719 |     "\n",
 720 |     "import tensorflow.keras.backend as K\n",
 721 |     "\n",
 722 |     "\n",
 723 |     "trainable_wts = np.sum([K.count_params(w) for w in orig_model.trainable_weights])\n",
 724 |     "non_trainable_wts = np.sum([K.count_params(w) for w in orig_model.non_trainable_weights])\n",
 725 |     "\n",
 726 |     "print(\"\\nNumber of training weights = {0} and non-trainabel weights = {1}\\n\".format(\n",
 727 |     "    trainable_wts, non_trainable_wts\n",
 728 |     "))\n",
 729 |     "print(\"Total number of parameters = {0}\\n\".format(trainable_wts + non_trainable_wts))\n"
 730 |    ]
 731 |   },
 732 |   {
 733 |    "cell_type": "code",
 734 |    "execution_count": null,
 735 |    "metadata": {},
 736 |    "outputs": [],
 737 |    "source": []
 738 |   },
 739 |   {
 740 |    "cell_type": "code",
 741 |    "execution_count": 30,
 742 |    "metadata": {},
 743 |    "outputs": [],
 744 |    "source": [
 745 |     "# Count number of non-zero parameters in winning ticket-1-\n",
 746 |     "pruned_sum_params = 0\n",
 747 |     "    \n",
 748 |     "for layer in orig_model_stripped.trainable_weights:\n",
 749 |     "    # print(tf.math.count_nonzero(layer, axis = None).numpy())\n",
 750 |     "    pruned_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n"
 751 |    ]
 752 |   },
 753 |   {
 754 |    "cell_type": "code",
 755 |    "execution_count": 31,
 756 |    "metadata": {},
 757 |    "outputs": [
 758 |     {
 759 |      "name": "stdout",
 760 |      "output_type": "stream",
 761 |      "text": [
 762 |       "\n",
 763 |       "Number of non-zero parameters in Conv-4 winning ticket (CIFAR-10) = 130097 with 94.6372% of weights pruned\n",
 764 |       "\n"
 765 |      ]
 766 |     }
 767 |    ],
 768 |    "source": [
 769 |     "print(\"\\nNumber of non-zero parameters in Conv-4 winning ticket (CIFAR-10) = {0} with {1:.4f}% of weights pruned\\n\".format(pruned_sum_params, 100 - (pruned_sum_params / trainable_wts) * 100))"
 770 |    ]
 771 |   },
 772 |   {
 773 |    "cell_type": "code",
 774 |    "execution_count": null,
 775 |    "metadata": {},
 776 |    "outputs": [],
 777 |    "source": []
 778 |   },
 779 |   {
 780 |    "cell_type": "code",
 781 |    "execution_count": 32,
 782 |    "metadata": {},
 783 |    "outputs": [],
 784 |    "source": [
 785 |     "@tf.function\n",
 786 |     "def train_one_step(model, mask_model, optimizer, x, y):\n",
 787 |     "    '''\n",
 788 |     "    Function to compute one step of gradient descent optimization\n",
 789 |     "    '''\n",
 790 |     "    with tf.GradientTape() as tape:\n",
 791 |     "        # Make predictions using defined model-\n",
 792 |     "        y_pred = model(x)\n",
 793 |     "\n",
 794 |     "        # Compute loss-\n",
 795 |     "        loss = loss_fn(y, y_pred)\n",
 796 |     "            \n",
 797 |     "    # Compute gradients wrt defined loss and weights and biases-\n",
 798 |     "    grads = tape.gradient(loss, model.trainable_variables)\n",
 799 |     "    \n",
 800 |     "    # type(grads)\n",
 801 |     "    # list\n",
 802 |     "\n",
 803 |     "    # List to hold element-wise multiplication between-\n",
 804 |     "    # computed gradient and masks-\n",
 805 |     "    grad_mask_mul = []\n",
 806 |     "    \n",
 807 |     "    # Perform element-wise multiplication between computed gradients and masks-\n",
 808 |     "    for grad_layer, mask in zip(grads, mask_model.trainable_weights):\n",
 809 |     "        grad_mask_mul.append(tf.math.multiply(grad_layer, mask))\n",
 810 |     "    \n",
 811 |     "    # Apply computed gradients to model's weights and biases-\n",
 812 |     "    optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))\n",
 813 |     "\n",
 814 |     "    # Compute accuracy-\n",
 815 |     "    train_loss(loss)\n",
 816 |     "    train_accuracy(y, y_pred)\n",
 817 |     "\n",
 818 |     "    return None\n",
 819 |     "    \n",
 820 |     "    \n",
 821 |     "@tf.function\n",
 822 |     "def test_step(model, optimizer, data, labels):\n",
 823 |     "    \"\"\"\n",
 824 |     "    Function to test model performance\n",
 825 |     "    on testing dataset\n",
 826 |     "    \"\"\"\n",
 827 |     "    \n",
 828 |     "    predictions = model(data)\n",
 829 |     "    t_loss = loss_fn(labels, predictions)\n",
 830 |     "\n",
 831 |     "    test_loss(t_loss)\n",
 832 |     "    test_accuracy(labels, predictions)\n",
 833 |     "\n",
 834 |     "    return None\n"
 835 |    ]
 836 |   },
 837 |   {
 838 |    "cell_type": "code",
 839 |    "execution_count": null,
 840 |    "metadata": {},
 841 |    "outputs": [],
 842 |    "source": []
 843 |   },
 844 |   {
 845 |    "cell_type": "code",
 846 |    "execution_count": null,
 847 |    "metadata": {},
 848 |    "outputs": [],
 849 |    "source": []
 850 |   },
 851 |   {
 852 |    "cell_type": "code",
 853 |    "execution_count": 33,
 854 |    "metadata": {},
 855 |    "outputs": [],
 856 |    "source": [
 857 |     "# User input parameters for Early Stopping in manual implementation-\n",
 858 |     "minimum_delta = 0.001\n",
 859 |     "patience = 3"
 860 |    ]
 861 |   },
 862 |   {
 863 |    "cell_type": "code",
 864 |    "execution_count": 32,
 865 |    "metadata": {},
 866 |    "outputs": [],
 867 |    "source": [
 868 |     "# best_val_loss = 100\n",
 869 |     "# loc_patience = 0"
 870 |    ]
 871 |   },
 872 |   {
 873 |    "cell_type": "code",
 874 |    "execution_count": null,
 875 |    "metadata": {},
 876 |    "outputs": [],
 877 |    "source": []
 878 |   },
 879 |   {
 880 |    "cell_type": "code",
 881 |    "execution_count": 34,
 882 |    "metadata": {},
 883 |    "outputs": [
 884 |     {
 885 |      "name": "stdout",
 886 |      "output_type": "stream",
 887 |      "text": [
 888 |       "Epoch 1, Loss: 1.3280, Accuracy: 54.7720, Test Loss: 1.0256, Test Accuracy: 64.450005\n",
 889 |       "Total number of trainable parameters = 130097\n",
 890 |       "\n",
 891 |       "Epoch 2, Loss: 0.8605, Accuracy: 70.5660, Test Loss: 0.8211, Test Accuracy: 72.050003\n",
 892 |       "Total number of trainable parameters = 130097\n",
 893 |       "\n",
 894 |       "Epoch 3, Loss: 0.6581, Accuracy: 77.4980, Test Loss: 0.7663, Test Accuracy: 73.979996\n",
 895 |       "Total number of trainable parameters = 130097\n",
 896 |       "\n",
 897 |       "Epoch 4, Loss: 0.5323, Accuracy: 81.8320, Test Loss: 0.7427, Test Accuracy: 74.959999\n",
 898 |       "Total number of trainable parameters = 130097\n",
 899 |       "\n",
 900 |       "Epoch 5, Loss: 0.4317, Accuracy: 85.2220, Test Loss: 0.7322, Test Accuracy: 76.430000\n",
 901 |       "Total number of trainable parameters = 130097\n",
 902 |       "\n",
 903 |       "Epoch 6, Loss: 0.3509, Accuracy: 88.0640, Test Loss: 0.7600, Test Accuracy: 76.560005\n",
 904 |       "Total number of trainable parameters = 130097\n",
 905 |       "\n",
 906 |       "Epoch 7, Loss: 0.2912, Accuracy: 90.0380, Test Loss: 0.8234, Test Accuracy: 76.380005\n",
 907 |       "Total number of trainable parameters = 130097\n",
 908 |       "\n",
 909 |       "Epoch 8, Loss: 0.2387, Accuracy: 92.0400, Test Loss: 0.8644, Test Accuracy: 76.290001\n",
 910 |       "Total number of trainable parameters = 130097\n",
 911 |       "\n",
 912 |       "\n",
 913 |       "'EarlyStopping' called!\n",
 914 |       "\n"
 915 |      ]
 916 |     }
 917 |    ],
 918 |    "source": [
 919 |     "# Train winning ticket using 'GradientTape' to observe it's training behavior-\n",
 920 |     "    \n",
 921 |     "# Initialize parameters for Early Stopping manual implementation-\n",
 922 |     "best_val_loss = 100\n",
 923 |     "loc_patience = 0\n",
 924 |     "    \n",
 925 |     "for epoch in range(num_epochs):\n",
 926 |     "    \n",
 927 |     "    # print(\"\\n\\nEpoch: {0}\\n\\n\".format(epoch + 1))\n",
 928 |     "    \n",
 929 |     "    if loc_patience >= patience:\n",
 930 |     "        print(\"\\n'EarlyStopping' called!\\n\")\n",
 931 |     "        break\n",
 932 |     "        \n",
 933 |     "    # Reset the metrics at the start of the next epoch\n",
 934 |     "    train_loss.reset_states()\n",
 935 |     "    train_accuracy.reset_states()\n",
 936 |     "    test_loss.reset_states()\n",
 937 |     "    test_accuracy.reset_states()\n",
 938 |     "        \n",
 939 |     "    \n",
 940 |     "    for x, y in train_dataset:\n",
 941 |     "        train_one_step(orig_model_stripped, mask_model_stripped, optimizer, x, y)\n",
 942 |     "\n",
 943 |     "    for x_t, y_t in test_dataset:\n",
 944 |     "        test_step(orig_model_stripped, optimizer, x_t, y_t)\n",
 945 |     "\n",
 946 |     "    template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'\n",
 947 |     "    \n",
 948 |     "    print(template.format(epoch + 1, \n",
 949 |     "                          train_loss.result(), train_accuracy.result()*100,\n",
 950 |     "                          test_loss.result(), test_accuracy.result()*100))\n",
 951 |     "        \n",
 952 |     "    # Count number of non-zero parameters in each layer and in total-\n",
 953 |     "    # print(\"layer-wise manner model, number of nonzero parameters in each layer are: \\n\")\n",
 954 |     "\n",
 955 |     "    model_sum_params = 0\n",
 956 |     "    \n",
 957 |     "    for layer in orig_model_stripped.trainable_weights:\n",
 958 |     "        # print(tf.math.count_nonzero(layer, axis = None).numpy())\n",
 959 |     "        model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n",
 960 |     "    \n",
 961 |     "    print(\"Total number of trainable parameters = {0}\\n\".format(model_sum_params))\n",
 962 |     "\n",
 963 |     "    \n",
 964 |     "    # Code for manual Early Stopping:\n",
 965 |     "    if (test_loss.result() < best_val_loss) and (np.abs(test_loss.result() - best_val_loss) >= minimum_delta):\n",
 966 |     "        # update 'best_val_loss' variable to lowest loss encountered so far-\n",
 967 |     "        best_val_loss = test_loss.result()\n",
 968 |     "        \n",
 969 |     "        # reset 'loc_patience' variable-\n",
 970 |     "        loc_patience = 0\n",
 971 |     "        \n",
 972 |     "    else:  # there is no improvement in monitored metric 'val_loss'\n",
 973 |     "        loc_patience += 1  # number of epochs without any improvement\n",
 974 |     "\n",
 975 |     "    "
 976 |    ]
 977 |   },
 978 |   {
 979 |    "cell_type": "code",
 980 |    "execution_count": null,
 981 |    "metadata": {},
 982 |    "outputs": [],
 983 |    "source": []
 984 |   },
 985 |   {
 986 |    "cell_type": "code",
 987 |    "execution_count": null,
 988 |    "metadata": {},
 989 |    "outputs": [],
 990 |    "source": []
 991 |   },
 992 |   {
 993 |    "cell_type": "code",
 994 |    "execution_count": 35,
 995 |    "metadata": {},
 996 |    "outputs": [],
 997 |    "source": [
 998 |     "import pickle, numpy as np"
 999 |    ]
1000 |   },
1001 |   {
1002 |    "cell_type": "code",
1003 |    "execution_count": 36,
1004 |    "metadata": {},
1005 |    "outputs": [],
1006 |    "source": [
1007 |     "# Load Python 3 dictionary containing training metrics-\n",
1008 |     "with open(\"Conv4_history_main_Winning_Ticket_Distribution_Experiment_2_Random_Weights_Experiment_2.pkl\", \"rb\") as f:\n",
1009 |     "    hm = pickle.load(f)"
1010 |    ]
1011 |   },
1012 |   {
1013 |    "cell_type": "code",
1014 |    "execution_count": 42,
1015 |    "metadata": {},
1016 |    "outputs": [
1017 |     {
1018 |      "name": "stdout",
1019 |      "output_type": "stream",
1020 |      "text": [
1021 |       "\n",
1022 |       "Conv-4 val_accuracy = 75.6000% using 8 epochs containing 100.00% of weights\n",
1023 |       "\n"
1024 |      ]
1025 |     }
1026 |    ],
1027 |    "source": [
1028 |     "epoch_length = len(hm[1]['val_accuracy'])\n",
1029 |     "\n",
1030 |     "print(\"\\nConv-4 val_accuracy = {0:.4f}% using {1} epochs containing {2:.2f}% of weights\\n\".format(\n",
1031 |     "    hm[1]['val_accuracy'][epoch_length - 1], epoch_length, 100\n",
1032 |     "))\n"
1033 |    ]
1034 |   },
1035 |   {
1036 |    "cell_type": "code",
1037 |    "execution_count": null,
1038 |    "metadata": {},
1039 |    "outputs": [],
1040 |    "source": []
1041 |   },
1042 |   {
1043 |    "cell_type": "code",
1044 |    "execution_count": null,
1045 |    "metadata": {},
1046 |    "outputs": [],
1047 |    "source": []
1048 |   },
1049 |   {
1050 |    "cell_type": "markdown",
1051 |    "metadata": {},
1052 |    "source": [
1053 |     "### Observation:\n",
1054 |     "\n",
1055 |     "1. The over-parameterized, original Conv-4 CNN needed 8 epochs to reach a validation accuracy of 75.6%\n",
1056 |     "1. The winning ticket is pruned to __94.6372%__ and needs 8 epochs to reach a __higher validation accuracy of 76.29%__\n",
1057 |     "\n",
1058 |     "This result shows the success of the _The Lottery Ticket Hypothesis_ applied to Conv-4 CNN for CIFAR-10 dataset."
1059 |    ]
1060 |   },
1061 |   {
1062 |    "cell_type": "code",
1063 |    "execution_count": null,
1064 |    "metadata": {},
1065 |    "outputs": [],
1066 |    "source": []
1067 |   },
1068 |   {
1069 |    "cell_type": "code",
1070 |    "execution_count": null,
1071 |    "metadata": {},
1072 |    "outputs": [],
1073 |    "source": []
1074 |   }
1075 |  ],
1076 |  "metadata": {
1077 |   "kernelspec": {
1078 |    "display_name": "Python 3",
1079 |    "language": "python",
1080 |    "name": "python3"
1081 |   },
1082 |   "language_info": {
1083 |    "codemirror_mode": {
1084 |     "name": "ipython",
1085 |     "version": 3
1086 |    },
1087 |    "file_extension": ".py",
1088 |    "mimetype": "text/x-python",
1089 |    "name": "python",
1090 |    "nbconvert_exporter": "python",
1091 |    "pygments_lexer": "ipython3",
1092 |    "version": "3.7.3"
1093 |   }
1094 |  },
1095 |  "nbformat": 4,
1096 |  "nbformat_minor": 4
1097 | }
1098 | 


--------------------------------------------------------------------------------
/Quantization_LTH_LeNet_300_100_MNIST.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Quantization + LTH: LeNet-300-100 for MNIST"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "code",
  12 |    "execution_count": 1,
  13 |    "metadata": {},
  14 |    "outputs": [
  15 |     {
  16 |      "name": "stderr",
  17 |      "output_type": "stream",
  18 |      "text": [
  19 |       "/home/arjun/.local/lib/python3.8/site-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
  20 |       "  import pandas.util.testing as tm\n"
  21 |      ]
  22 |     }
  23 |    ],
  24 |    "source": [
  25 |     "import tensorflow as tf\n",
  26 |     "import numpy as np\n",
  27 |     "import matplotlib.pyplot as plt\n",
  28 |     "import seaborn as sns\n",
  29 |     "import math\n",
  30 |     "import tensorflow_model_optimization as tfmot\n",
  31 |     "# from tensorflow_model_optimization.sparsity import keras as sparsity\n",
  32 |     "# from tensorflow.keras import datasets, layers, models\n",
  33 |     "\n",
  34 |     "from tensorflow.keras.layers import AveragePooling2D, Conv2D, MaxPooling2D, ReLU\n",
  35 |     "from tensorflow.keras import models, layers, datasets\n",
  36 |     "from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, InputLayer\n",
  37 |     "from tensorflow.keras.models import Sequential, Model\n",
  38 |     "from tensorflow.keras.initializers import RandomNormal\n",
  39 |     "\n",
  40 |     "from sklearn.metrics import accuracy_score, precision_score, recall_score\n"
  41 |    ]
  42 |   },
  43 |   {
  44 |    "cell_type": "code",
  45 |    "execution_count": null,
  46 |    "metadata": {},
  47 |    "outputs": [],
  48 |    "source": []
  49 |   },
  50 |   {
  51 |    "cell_type": "code",
  52 |    "execution_count": null,
  53 |    "metadata": {},
  54 |    "outputs": [],
  55 |    "source": []
  56 |   },
  57 |   {
  58 |    "cell_type": "code",
  59 |    "execution_count": 2,
  60 |    "metadata": {},
  61 |    "outputs": [
  62 |     {
  63 |      "data": {
  64 |       "text/plain": [
  65 |        "'2.2.0'"
  66 |       ]
  67 |      },
  68 |      "execution_count": 2,
  69 |      "metadata": {},
  70 |      "output_type": "execute_result"
  71 |     }
  72 |    ],
  73 |    "source": [
  74 |     "tf.__version__"
  75 |    ]
  76 |   },
  77 |   {
  78 |    "cell_type": "code",
  79 |    "execution_count": 3,
  80 |    "metadata": {},
  81 |    "outputs": [
  82 |     {
  83 |      "name": "stdout",
  84 |      "output_type": "stream",
  85 |      "text": [
  86 |       "env: CUDA_DEVICE_ORDER=PCI_BUS_ID\n",
  87 |       "env: CUDA_VISIBLE_DEVICES=2\n"
  88 |      ]
  89 |     }
  90 |    ],
  91 |    "source": [
  92 |     "%env CUDA_DEVICE_ORDER=PCI_BUS_ID\n",
  93 |     "%env CUDA_VISIBLE_DEVICES=2"
  94 |    ]
  95 |   },
  96 |   {
  97 |    "cell_type": "code",
  98 |    "execution_count": 3,
  99 |    "metadata": {},
 100 |    "outputs": [],
 101 |    "source": [
 102 |     "batch_size = 60\n",
 103 |     "num_classes = 10\n",
 104 |     "num_epochs = 100"
 105 |    ]
 106 |   },
 107 |   {
 108 |    "cell_type": "code",
 109 |    "execution_count": 4,
 110 |    "metadata": {},
 111 |    "outputs": [],
 112 |    "source": [
 113 |     "# Data preprocessing and cleadning:\n",
 114 |     "# input image dimensions\n",
 115 |     "img_rows, img_cols = 28, 28\n",
 116 |     "\n",
 117 |     "# Load MNIST dataset-\n",
 118 |     "(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()"
 119 |    ]
 120 |   },
 121 |   {
 122 |    "cell_type": "code",
 123 |    "execution_count": 5,
 124 |    "metadata": {},
 125 |    "outputs": [
 126 |     {
 127 |      "name": "stdout",
 128 |      "output_type": "stream",
 129 |      "text": [
 130 |       "\n",
 131 |       "'input_shape' which will be used = (28, 28, 1)\n",
 132 |       "\n"
 133 |      ]
 134 |     }
 135 |    ],
 136 |    "source": [
 137 |     "if tf.keras.backend.image_data_format() == 'channels_first':\n",
 138 |     "    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)\n",
 139 |     "    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)\n",
 140 |     "    input_shape = (1, img_rows, img_cols)\n",
 141 |     "else:\n",
 142 |     "    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)\n",
 143 |     "    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)\n",
 144 |     "    input_shape = (img_rows, img_cols, 1)\n",
 145 |     "\n",
 146 |     "print(\"\\n'input_shape' which will be used = {0}\\n\".format(input_shape))"
 147 |    ]
 148 |   },
 149 |   {
 150 |    "cell_type": "code",
 151 |    "execution_count": 6,
 152 |    "metadata": {},
 153 |    "outputs": [],
 154 |    "source": [
 155 |     "# Convert datasets to floating point types-\n",
 156 |     "X_train = X_train.astype('float32')\n",
 157 |     "X_test = X_test.astype('float32')\n",
 158 |     "\n",
 159 |     "# Normalize the training and testing datasets-\n",
 160 |     "X_train /= 255.0\n",
 161 |     "X_test /= 255.0"
 162 |    ]
 163 |   },
 164 |   {
 165 |    "cell_type": "code",
 166 |    "execution_count": 7,
 167 |    "metadata": {},
 168 |    "outputs": [],
 169 |    "source": [
 170 |     "# convert class vectors/target to binary class matrices or one-hot encoded values-\n",
 171 |     "y_train = tf.keras.utils.to_categorical(y_train, num_classes)\n",
 172 |     "y_test = tf.keras.utils.to_categorical(y_test, num_classes)"
 173 |    ]
 174 |   },
 175 |   {
 176 |    "cell_type": "code",
 177 |    "execution_count": 8,
 178 |    "metadata": {},
 179 |    "outputs": [
 180 |     {
 181 |      "data": {
 182 |       "text/plain": [
 183 |        "((60000, 10), (10000, 10))"
 184 |       ]
 185 |      },
 186 |      "execution_count": 8,
 187 |      "metadata": {},
 188 |      "output_type": "execute_result"
 189 |     }
 190 |    ],
 191 |    "source": [
 192 |     "y_train.shape, y_test.shape"
 193 |    ]
 194 |   },
 195 |   {
 196 |    "cell_type": "code",
 197 |    "execution_count": 9,
 198 |    "metadata": {},
 199 |    "outputs": [
 200 |     {
 201 |      "name": "stdout",
 202 |      "output_type": "stream",
 203 |      "text": [
 204 |       "\n",
 205 |       "X_train.shape = (60000, 28, 28, 1), y_train.shape = (60000, 10)\n",
 206 |       "\n",
 207 |       "X_test.shape = (10000, 28, 28, 1), y_test.shape = (10000, 10)\n",
 208 |       "\n"
 209 |      ]
 210 |     }
 211 |    ],
 212 |    "source": [
 213 |     "print(\"\\nX_train.shape = {0}, y_train.shape = {1}\".format(X_train.shape, y_train.shape))\n",
 214 |     "print(\"\\nX_test.shape = {0}, y_test.shape = {1}\\n\".format(X_test.shape, y_test.shape))"
 215 |    ]
 216 |   },
 217 |   {
 218 |    "cell_type": "code",
 219 |    "execution_count": null,
 220 |    "metadata": {},
 221 |    "outputs": [],
 222 |    "source": []
 223 |   },
 224 |   {
 225 |    "cell_type": "code",
 226 |    "execution_count": 10,
 227 |    "metadata": {},
 228 |    "outputs": [],
 229 |    "source": [
 230 |     "# Reshape training and testing sets-\n",
 231 |     "X_train = X_train.reshape(X_train.shape[0], 784)\n",
 232 |     "X_test = X_test.reshape(X_test.shape[0], 784)"
 233 |    ]
 234 |   },
 235 |   {
 236 |    "cell_type": "code",
 237 |    "execution_count": 11,
 238 |    "metadata": {},
 239 |    "outputs": [
 240 |     {
 241 |      "name": "stdout",
 242 |      "output_type": "stream",
 243 |      "text": [
 244 |       "\n",
 245 |       "Dimensions of training and testing sets are:\n",
 246 |       "X_train.shape = (60000, 784), y_train.shape = (60000, 10)\n",
 247 |       "X_test.shape = (10000, 784), y_test.shape = (10000, 10)\n"
 248 |      ]
 249 |     }
 250 |    ],
 251 |    "source": [
 252 |     "print(\"\\nDimensions of training and testing sets are:\")\n",
 253 |     "print(\"X_train.shape = {0}, y_train.shape = {1}\".format(X_train.shape, y_train.shape))\n",
 254 |     "print(\"X_test.shape = {0}, y_test.shape = {1}\".format(X_test.shape, y_test.shape))"
 255 |    ]
 256 |   },
 257 |   {
 258 |    "cell_type": "code",
 259 |    "execution_count": null,
 260 |    "metadata": {},
 261 |    "outputs": [],
 262 |    "source": []
 263 |   },
 264 |   {
 265 |    "cell_type": "code",
 266 |    "execution_count": null,
 267 |    "metadata": {},
 268 |    "outputs": [],
 269 |    "source": []
 270 |   },
 271 |   {
 272 |    "cell_type": "markdown",
 273 |    "metadata": {},
 274 |    "source": [
 275 |     "### Prepare MNIST dataset for _GradientTape_ training:"
 276 |    ]
 277 |   },
 278 |   {
 279 |    "cell_type": "code",
 280 |    "execution_count": 12,
 281 |    "metadata": {},
 282 |    "outputs": [],
 283 |    "source": [
 284 |     "# Create training and testing datasets-\n",
 285 |     "train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))\n",
 286 |     "test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))"
 287 |    ]
 288 |   },
 289 |   {
 290 |    "cell_type": "code",
 291 |    "execution_count": 13,
 292 |    "metadata": {},
 293 |    "outputs": [],
 294 |    "source": [
 295 |     "train_dataset = train_dataset.shuffle(buffer_size = 20000, reshuffle_each_iteration = True).batch(batch_size = batch_size, drop_remainder = False)"
 296 |    ]
 297 |   },
 298 |   {
 299 |    "cell_type": "code",
 300 |    "execution_count": 14,
 301 |    "metadata": {},
 302 |    "outputs": [],
 303 |    "source": [
 304 |     "test_dataset = test_dataset.batch(batch_size=batch_size, drop_remainder=False)"
 305 |    ]
 306 |   },
 307 |   {
 308 |    "cell_type": "code",
 309 |    "execution_count": 15,
 310 |    "metadata": {},
 311 |    "outputs": [],
 312 |    "source": [
 313 |     "# Choose an optimizer and loss function for training-\n",
 314 |     "loss_fn = tf.keras.losses.CategoricalCrossentropy()\n",
 315 |     "optimizer = tf.keras.optimizers.Adam(lr = 0.0012)"
 316 |    ]
 317 |   },
 318 |   {
 319 |    "cell_type": "code",
 320 |    "execution_count": 16,
 321 |    "metadata": {},
 322 |    "outputs": [],
 323 |    "source": [
 324 |     "# Select metrics to measure the error & accuracy of model.\n",
 325 |     "# These metrics accumulate the values over epochs and then\n",
 326 |     "# print the overall result-\n",
 327 |     "train_loss = tf.keras.metrics.Mean(name = 'train_loss')\n",
 328 |     "train_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'train_accuracy')\n",
 329 |     "\n",
 330 |     "test_loss = tf.keras.metrics.Mean(name = 'test_loss')\n",
 331 |     "test_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'test_accuracy')"
 332 |    ]
 333 |   },
 334 |   {
 335 |    "cell_type": "code",
 336 |    "execution_count": null,
 337 |    "metadata": {},
 338 |    "outputs": [],
 339 |    "source": []
 340 |   },
 341 |   {
 342 |    "cell_type": "code",
 343 |    "execution_count": null,
 344 |    "metadata": {},
 345 |    "outputs": [],
 346 |    "source": []
 347 |   },
 348 |   {
 349 |    "cell_type": "code",
 350 |    "execution_count": 17,
 351 |    "metadata": {},
 352 |    "outputs": [],
 353 |    "source": [
 354 |     "def lenet_nn():\n",
 355 |     "\t\"\"\"\n",
 356 |     "\tFunction to define the architecture of a neural network model\n",
 357 |     "\tfollowing 300 100 Dense Fully-Connected architecture for MNIST\n",
 358 |     "\tdataset.\n",
 359 |     "    \n",
 360 |     "\tOutput: Returns designed and compiled neural network model\n",
 361 |     "\t\"\"\"\n",
 362 |     "    \n",
 363 |     "\tmodel = Sequential()\n",
 364 |     "\tmodel.add(InputLayer(input_shape=(784, )))\n",
 365 |     "\t# model.add(Flatten())\n",
 366 |     "\tmodel.add(\n",
 367 |     "\t\tDense(\n",
 368 |     "\t\t\tunits = 300, activation='relu',\n",
 369 |     "\t\t\tkernel_initializer=tf.initializers.GlorotUniform()\n",
 370 |     "\t\t\t)\n",
 371 |     "\t\t)\n",
 372 |     "\n",
 373 |     "\t# model.add(l.Dropout(0.2))\n",
 374 |     "\n",
 375 |     "\tmodel.add(\n",
 376 |     "\t\tDense(\n",
 377 |     "\t\t\tunits = 100, activation='relu',\n",
 378 |     "\t\t\tkernel_initializer=tf.initializers.GlorotUniform()\n",
 379 |     "\t\t\t)\n",
 380 |     "\t\t)\n",
 381 |     "        \n",
 382 |     "\t# model.add(l.Dropout(0.1))\n",
 383 |     "\n",
 384 |     "\tmodel.add(\n",
 385 |     "\t\tDense(\n",
 386 |     "\t\t\tunits = num_classes, activation='softmax'\n",
 387 |     "\t\t\t)\n",
 388 |     "\t\t)\n",
 389 |     "    \n",
 390 |     "\n",
 391 |     "\t# Compile pruned NN-\n",
 392 |     "\tmodel.compile(\n",
 393 |     "\t\tloss=tf.keras.losses.categorical_crossentropy,\n",
 394 |     "\t\t# optimizer='adam',\n",
 395 |     "\t\toptimizer=tf.keras.optimizers.Adam(lr = 0.0012),\n",
 396 |     "\t\tmetrics=['accuracy'])\n",
 397 |     "    \n",
 398 |     "\treturn model\n"
 399 |    ]
 400 |   },
 401 |   {
 402 |    "cell_type": "code",
 403 |    "execution_count": null,
 404 |    "metadata": {},
 405 |    "outputs": [],
 406 |    "source": []
 407 |   },
 408 |   {
 409 |    "cell_type": "code",
 410 |    "execution_count": 18,
 411 |    "metadata": {},
 412 |    "outputs": [],
 413 |    "source": [
 414 |     "# Initialize model-\n",
 415 |     "model = lenet_nn()"
 416 |    ]
 417 |   },
 418 |   {
 419 |    "cell_type": "code",
 420 |    "execution_count": 19,
 421 |    "metadata": {},
 422 |    "outputs": [],
 423 |    "source": [
 424 |     "# Load weights of winning ticket-\n",
 425 |     "model.load_weights(\"/home/arjun/Desktop/Codes/Lottery_Hypothesis-Resources/Latest_Works/LTH_Experiments/Experiment_number_5/LeNet_300_100_MNIST/LeNet_300_MNIST_Magnitude_Winning_Ticket_Distribution_91.18900266306589.h5\")"
 426 |    ]
 427 |   },
 428 |   {
 429 |    "cell_type": "code",
 430 |    "execution_count": 59,
 431 |    "metadata": {},
 432 |    "outputs": [
 433 |     {
 434 |      "name": "stdout",
 435 |      "output_type": "stream",
 436 |      "text": [
 437 |       "layer: (784, 300) has 20204 non-zero parameters\n",
 438 |       "layer: (300,) has 0 non-zero parameters\n",
 439 |       "layer: (300, 100) has 2577 non-zero parameters\n",
 440 |       "layer: (100,) has 0 non-zero parameters\n",
 441 |       "layer: (100, 10) has 314 non-zero parameters\n",
 442 |       "layer: (10,) has 0 non-zero parameters\n",
 443 |       "\n",
 444 |       "Total # of non-zero parameters = 23095\n",
 445 |       "\n"
 446 |      ]
 447 |     }
 448 |    ],
 449 |    "source": [
 450 |     "# Count number of non-zero parameters-\n",
 451 |     "winning_params = 0\n",
 452 |     "\n",
 453 |     "for layer in model.trainable_weights:\n",
 454 |     "    nonzeroparams = tf.math.count_nonzero(layer, axis = None).numpy()\n",
 455 |     "    print(\"layer: {0} has {1} non-zero parameters\".format(layer.shape, nonzeroparams))\n",
 456 |     "    winning_params += nonzeroparams\n",
 457 |     "\n",
 458 |     "print(\"\\nTotal # of non-zero parameters = {0}\\n\".format(winning_params))"
 459 |    ]
 460 |   },
 461 |   {
 462 |    "cell_type": "code",
 463 |    "execution_count": null,
 464 |    "metadata": {},
 465 |    "outputs": [],
 466 |    "source": []
 467 |   },
 468 |   {
 469 |    "cell_type": "code",
 470 |    "execution_count": 21,
 471 |    "metadata": {},
 472 |    "outputs": [
 473 |     {
 474 |      "name": "stdout",
 475 |      "output_type": "stream",
 476 |      "text": [
 477 |       "\n",
 478 |       "Number of training weights = 266610 and non-trainabel weights = 0.0\n",
 479 |       "\n",
 480 |       "Total number of parameters = 266610.0\n",
 481 |       "\n"
 482 |      ]
 483 |     }
 484 |    ],
 485 |    "source": [
 486 |     "import tensorflow.keras.backend as K\n",
 487 |     "\n",
 488 |     "\n",
 489 |     "# METHOD-1: This also counts biases\n",
 490 |     "\n",
 491 |     "trainable_wts = np.sum([K.count_params(w) for w in model.trainable_weights])\n",
 492 |     "non_trainable_wts = np.sum([K.count_params(w) for w in model.non_trainable_weights])\n",
 493 |     "\n",
 494 |     "print(\"\\nNumber of training weights = {0} and non-trainabel weights = {1}\\n\".format(\n",
 495 |     "    trainable_wts, non_trainable_wts\n",
 496 |     "))\n",
 497 |     "print(\"Total number of parameters = {0}\\n\".format(trainable_wts + non_trainable_wts))\n"
 498 |    ]
 499 |   },
 500 |   {
 501 |    "cell_type": "code",
 502 |    "execution_count": 23,
 503 |    "metadata": {},
 504 |    "outputs": [
 505 |     {
 506 |      "name": "stdout",
 507 |      "output_type": "stream",
 508 |      "text": [
 509 |       "\n",
 510 |       "91.3375% of parameters have been pruned\n",
 511 |       "\n"
 512 |      ]
 513 |     }
 514 |    ],
 515 |    "source": [
 516 |     "print(\"\\n{0:.4f}% of parameters have been pruned\\n\".format((trainable_wts - params) / trainable_wts * 100))"
 517 |    ]
 518 |   },
 519 |   {
 520 |    "cell_type": "code",
 521 |    "execution_count": null,
 522 |    "metadata": {},
 523 |    "outputs": [],
 524 |    "source": []
 525 |   },
 526 |   {
 527 |    "cell_type": "code",
 528 |    "execution_count": null,
 529 |    "metadata": {},
 530 |    "outputs": [],
 531 |    "source": []
 532 |   },
 533 |   {
 534 |    "cell_type": "code",
 535 |    "execution_count": 33,
 536 |    "metadata": {},
 537 |    "outputs": [],
 538 |    "source": [
 539 |     "# Create mask using winning ticket-\n",
 540 |     "\n",
 541 |     "# Instantiate a new neural network model for which, the mask is to be created,\n",
 542 |     "mask_model = lenet_nn()\n",
 543 |     "    \n",
 544 |     "# Load weights of PRUNED model-\n",
 545 |     "mask_model.set_weights(model.get_weights())\n",
 546 |     "    \n",
 547 |     "# For each layer, for each weight which is 0, leave it, as is.\n",
 548 |     "# And for weights which survive the pruning,reinitialize it to ONE (1)-\n",
 549 |     "for wts in mask_model.trainable_weights:\n",
 550 |     "    wts.assign(tf.where(tf.equal(wts, 0.), 0., 1.))\n"
 551 |    ]
 552 |   },
 553 |   {
 554 |    "cell_type": "code",
 555 |    "execution_count": 34,
 556 |    "metadata": {},
 557 |    "outputs": [
 558 |     {
 559 |      "name": "stdout",
 560 |      "output_type": "stream",
 561 |      "text": [
 562 |       "layer: (784, 300) has 20204 non-zero masks\n",
 563 |       "layer: (300,) has 0 non-zero masks\n",
 564 |       "layer: (300, 100) has 2577 non-zero masks\n",
 565 |       "layer: (100,) has 0 non-zero masks\n",
 566 |       "layer: (100, 10) has 314 non-zero masks\n",
 567 |       "layer: (10,) has 0 non-zero masks\n",
 568 |       "\n",
 569 |       "Total # of non-zero masks = 23095\n",
 570 |       "\n"
 571 |      ]
 572 |     }
 573 |    ],
 574 |    "source": [
 575 |     "# Count number of non-zero masks-\n",
 576 |     "mask_params = 0\n",
 577 |     "\n",
 578 |     "for layer in mask_model.trainable_weights:\n",
 579 |     "    nonzeroparams = tf.math.count_nonzero(layer, axis = None).numpy()\n",
 580 |     "    print(\"layer: {0} has {1} non-zero masks\".format(layer.shape, nonzeroparams))\n",
 581 |     "    mask_params += nonzeroparams\n",
 582 |     "\n",
 583 |     "print(\"\\nTotal # of non-zero masks = {0}\\n\".format(mask_params))"
 584 |    ]
 585 |   },
 586 |   {
 587 |    "cell_type": "code",
 588 |    "execution_count": 60,
 589 |    "metadata": {},
 590 |    "outputs": [
 591 |     {
 592 |      "name": "stdout",
 593 |      "output_type": "stream",
 594 |      "text": [
 595 |       "\n",
 596 |       "number of non-zero parameters and masks matches!\n"
 597 |      ]
 598 |     }
 599 |    ],
 600 |    "source": [
 601 |     "if mask_params == winning_params:\n",
 602 |     "    print(\"\\nnumber of non-zero parameters and masks matches!\")\n",
 603 |     "else:\n",
 604 |     "    print(\"\\nERROR! number of non-zero parameters and masks DO NOT MATCH!\")"
 605 |    ]
 606 |   },
 607 |   {
 608 |    "cell_type": "code",
 609 |    "execution_count": null,
 610 |    "metadata": {},
 611 |    "outputs": [],
 612 |    "source": []
 613 |   },
 614 |   {
 615 |    "cell_type": "code",
 616 |    "execution_count": null,
 617 |    "metadata": {},
 618 |    "outputs": [],
 619 |    "source": []
 620 |   },
 621 |   {
 622 |    "cell_type": "markdown",
 623 |    "metadata": {},
 624 |    "source": [
 625 |     "### Clone and fine-tune pre-trained model with quantization aware training:"
 626 |    ]
 627 |   },
 628 |   {
 629 |    "cell_type": "code",
 630 |    "execution_count": 24,
 631 |    "metadata": {},
 632 |    "outputs": [],
 633 |    "source": [
 634 |     "quantize_model = tfmot.quantization.keras.quantize_model\n",
 635 |     "\n",
 636 |     "# q_aware stands for for quantization aware.\n",
 637 |     "q_aware_model = quantize_model(model)"
 638 |    ]
 639 |   },
 640 |   {
 641 |    "cell_type": "code",
 642 |    "execution_count": 25,
 643 |    "metadata": {},
 644 |    "outputs": [],
 645 |    "source": [
 646 |     "# 'quantize_model' requires recompilation-\n",
 647 |     "q_aware_model.compile(\n",
 648 |     "    optimizer = tf.keras.optimizers.Adam(lr = 0.0012),\n",
 649 |     "    loss=tf.keras.losses.categorical_crossentropy,\n",
 650 |     "    metrics=['accuracy']\n",
 651 |     ")\n"
 652 |    ]
 653 |   },
 654 |   {
 655 |    "cell_type": "code",
 656 |    "execution_count": 26,
 657 |    "metadata": {},
 658 |    "outputs": [
 659 |     {
 660 |      "name": "stdout",
 661 |      "output_type": "stream",
 662 |      "text": [
 663 |       "Model: \"sequential\"\n",
 664 |       "_________________________________________________________________\n",
 665 |       "Layer (type)                 Output Shape              Param #   \n",
 666 |       "=================================================================\n",
 667 |       "quant_dense (QuantizeWrapper (None, 300)               235505    \n",
 668 |       "_________________________________________________________________\n",
 669 |       "quant_dense_1 (QuantizeWrapp (None, 100)               30105     \n",
 670 |       "_________________________________________________________________\n",
 671 |       "quant_dense_2 (QuantizeWrapp (None, 10)                1015      \n",
 672 |       "=================================================================\n",
 673 |       "Total params: 266,625\n",
 674 |       "Trainable params: 266,610\n",
 675 |       "Non-trainable params: 15\n",
 676 |       "_________________________________________________________________\n"
 677 |      ]
 678 |     }
 679 |    ],
 680 |    "source": [
 681 |     "# Get quantization aware model summary-\n",
 682 |     "q_aware_model.summary()"
 683 |    ]
 684 |   },
 685 |   {
 686 |    "cell_type": "code",
 687 |    "execution_count": null,
 688 |    "metadata": {},
 689 |    "outputs": [],
 690 |    "source": []
 691 |   },
 692 |   {
 693 |    "cell_type": "code",
 694 |    "execution_count": null,
 695 |    "metadata": {},
 696 |    "outputs": [],
 697 |    "source": []
 698 |   },
 699 |   {
 700 |    "cell_type": "markdown",
 701 |    "metadata": {},
 702 |    "source": [
 703 |     "### Train winning ticket model-"
 704 |    ]
 705 |   },
 706 |   {
 707 |    "cell_type": "code",
 708 |    "execution_count": 39,
 709 |    "metadata": {},
 710 |    "outputs": [],
 711 |    "source": [
 712 |     "# User input parameters for Early Stopping in manual implementation-\n",
 713 |     "minimum_delta = 0.001\n",
 714 |     "patience = 3"
 715 |    ]
 716 |   },
 717 |   {
 718 |    "cell_type": "code",
 719 |    "execution_count": 40,
 720 |    "metadata": {},
 721 |    "outputs": [],
 722 |    "source": [
 723 |     "best_val_loss = 100\n",
 724 |     "loc_patience = 0"
 725 |    ]
 726 |   },
 727 |   {
 728 |    "cell_type": "code",
 729 |    "execution_count": null,
 730 |    "metadata": {},
 731 |    "outputs": [],
 732 |    "source": []
 733 |   },
 734 |   {
 735 |    "cell_type": "code",
 736 |    "execution_count": 41,
 737 |    "metadata": {},
 738 |    "outputs": [],
 739 |    "source": [
 740 |     "# Initialize a new LeNet-300-100 model-\n",
 741 |     "winning_ticket_model = lenet_nn()\n",
 742 |     "\n",
 743 |     "# Load weights of winning ticket-\n",
 744 |     "winning_ticket_model.set_weights(model.get_weights())"
 745 |    ]
 746 |   },
 747 |   {
 748 |    "cell_type": "code",
 749 |    "execution_count": null,
 750 |    "metadata": {},
 751 |    "outputs": [],
 752 |    "source": []
 753 |   },
 754 |   {
 755 |    "cell_type": "code",
 756 |    "execution_count": null,
 757 |    "metadata": {},
 758 |    "outputs": [],
 759 |    "source": []
 760 |   },
 761 |   {
 762 |    "cell_type": "code",
 763 |    "execution_count": 42,
 764 |    "metadata": {},
 765 |    "outputs": [],
 766 |    "source": [
 767 |     "# Define 'train_one_step()' and 'test_step()' functions here-\n",
 768 |     "@tf.function\n",
 769 |     "def train_one_step(model, mask_model, optimizer, x, y):\n",
 770 |     "    '''\n",
 771 |     "    Function to compute one step of gradient descent optimization\n",
 772 |     "    '''\n",
 773 |     "    with tf.GradientTape() as tape:\n",
 774 |     "        # Make predictions using defined model-\n",
 775 |     "        y_pred = model(x)\n",
 776 |     "\n",
 777 |     "        # Compute loss-\n",
 778 |     "        loss = loss_fn(y, y_pred)\n",
 779 |     "        \n",
 780 |     "    # Compute gradients wrt defined loss and weights and biases-\n",
 781 |     "    grads = tape.gradient(loss, model.trainable_variables)\n",
 782 |     "    \n",
 783 |     "    # type(grads)\n",
 784 |     "    # list\n",
 785 |     "    \n",
 786 |     "    # List to hold element-wise multiplication between-\n",
 787 |     "    # computed gradient and masks-\n",
 788 |     "    grad_mask_mul = []\n",
 789 |     "    \n",
 790 |     "    # Perform element-wise multiplication between computed gradients and masks-\n",
 791 |     "    for grad_layer, mask in zip(grads, mask_model.trainable_weights):\n",
 792 |     "        grad_mask_mul.append(tf.math.multiply(grad_layer, mask))\n",
 793 |     "    \n",
 794 |     "    # Apply computed gradients to model's weights and biases-\n",
 795 |     "    optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))\n",
 796 |     "\n",
 797 |     "    # Compute accuracy-\n",
 798 |     "    train_loss(loss)\n",
 799 |     "    train_accuracy(y, y_pred)\n",
 800 |     "\n",
 801 |     "    return None\n",
 802 |     "    \n",
 803 |     "    \n",
 804 |     "@tf.function\n",
 805 |     "def test_step(model, optimizer, data, labels):\n",
 806 |     "    \"\"\"\n",
 807 |     "    Function to test model performance\n",
 808 |     "    on testing dataset\n",
 809 |     "    \"\"\"\n",
 810 |     "    \n",
 811 |     "    predictions = model(data)\n",
 812 |     "    t_loss = loss_fn(labels, predictions)\n",
 813 |     "\n",
 814 |     "    test_loss(t_loss)\n",
 815 |     "    test_accuracy(labels, predictions)\n",
 816 |     "\n",
 817 |     "    return None\n",
 818 |     "\n"
 819 |    ]
 820 |   },
 821 |   {
 822 |    "cell_type": "code",
 823 |    "execution_count": 43,
 824 |    "metadata": {},
 825 |    "outputs": [
 826 |     {
 827 |      "name": "stdout",
 828 |      "output_type": "stream",
 829 |      "text": [
 830 |       "Epoch 1, Loss: 0.1101, Accuracy: 97.2017, Test Loss: 0.0658, Test Accuracy: 97.989998\n",
 831 |       "Total number of trainable parameters = 23095\n",
 832 |       "\n",
 833 |       "Epoch 2, Loss: 0.0340, Accuracy: 99.0883, Test Loss: 0.0567, Test Accuracy: 98.199997\n",
 834 |       "Total number of trainable parameters = 23095\n",
 835 |       "\n",
 836 |       "Epoch 3, Loss: 0.0211, Accuracy: 99.4500, Test Loss: 0.0553, Test Accuracy: 98.259995\n",
 837 |       "Total number of trainable parameters = 23095\n",
 838 |       "\n",
 839 |       "Epoch 4, Loss: 0.0136, Accuracy: 99.6750, Test Loss: 0.0586, Test Accuracy: 98.229996\n",
 840 |       "Total number of trainable parameters = 23095\n",
 841 |       "\n",
 842 |       "Epoch 5, Loss: 0.0097, Accuracy: 99.7700, Test Loss: 0.0639, Test Accuracy: 98.110001\n",
 843 |       "Total number of trainable parameters = 23095\n",
 844 |       "\n",
 845 |       "Epoch 6, Loss: 0.0064, Accuracy: 99.8750, Test Loss: 0.0619, Test Accuracy: 98.299995\n",
 846 |       "Total number of trainable parameters = 23095\n",
 847 |       "\n",
 848 |       "\n",
 849 |       "'EarlyStopping' called!\n",
 850 |       "\n"
 851 |      ]
 852 |     }
 853 |    ],
 854 |    "source": [
 855 |     "# Train model using 'GradientTape'-\n",
 856 |     "    \n",
 857 |     "# Initialize parameters for Early Stopping manual implementation-\n",
 858 |     "# best_val_loss = 100\n",
 859 |     "# loc_patience = 0\n",
 860 |     "    \n",
 861 |     "for epoch in range(num_epochs):\n",
 862 |     "    \n",
 863 |     "    if loc_patience >= patience:\n",
 864 |     "        print(\"\\n'EarlyStopping' called!\\n\")\n",
 865 |     "        break\n",
 866 |     "        \n",
 867 |     "    # Reset the metrics at the start of the next epoch\n",
 868 |     "    train_loss.reset_states()\n",
 869 |     "    train_accuracy.reset_states()\n",
 870 |     "    test_loss.reset_states()\n",
 871 |     "    test_accuracy.reset_states()\n",
 872 |     "            \n",
 873 |     "    \n",
 874 |     "    for x, y in train_dataset:\n",
 875 |     "        train_one_step(winning_ticket_model, mask_model, optimizer, x, y)\n",
 876 |     "\n",
 877 |     "\n",
 878 |     "    for x_t, y_t in test_dataset:\n",
 879 |     "        test_step(winning_ticket_model, optimizer, x_t, y_t)\n",
 880 |     "\n",
 881 |     "    template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'\n",
 882 |     "    \n",
 883 |     "    '''\n",
 884 |     "    # 'i' is the index for number of pruning rounds-\n",
 885 |     "    history_main[i]['accuracy'][epoch] = train_accuracy.result() * 100\n",
 886 |     "    history_main[i]['loss'][epoch] = train_loss.result()\n",
 887 |     "    history_main[i]['val_loss'][epoch] = test_loss.result()\n",
 888 |     "    history_main[i]['val_accuracy'][epoch] = test_accuracy.result() * 100\n",
 889 |     "    ''' \n",
 890 |     "\n",
 891 |     "    print(template.format(\n",
 892 |     "        epoch + 1, train_loss.result(),\n",
 893 |     "        train_accuracy.result()*100, test_loss.result(),\n",
 894 |     "        test_accuracy.result()*100)\n",
 895 |     "         )\n",
 896 |     "    \n",
 897 |     "    # Count number of non-zero parameters in each layer and in total-\n",
 898 |     "    # print(\"layer-wise manner model, number of nonzero parameters in each layer are: \\n\")\n",
 899 |     "    model_sum_params = 0\n",
 900 |     "    \n",
 901 |     "    for layer in winning_ticket_model.trainable_weights:\n",
 902 |     "        # print(tf.math.count_nonzero(layer, axis = None).numpy())\n",
 903 |     "        model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n",
 904 |     "    \n",
 905 |     "    print(\"Total number of trainable parameters = {0}\\n\".format(model_sum_params))\n",
 906 |     "\n",
 907 |     "    \n",
 908 |     "    # Code for manual Early Stopping:\n",
 909 |     "    if np.abs(test_loss.result() < best_val_loss) >= minimum_delta:\n",
 910 |     "        # update 'best_val_loss' variable to lowest loss encountered so far-\n",
 911 |     "        best_val_loss = test_loss.result()\n",
 912 |     "        \n",
 913 |     "        # reset 'loc_patience' variable-\n",
 914 |     "        loc_patience = 0\n",
 915 |     "        \n",
 916 |     "    else:  # there is no improvement in monitored metric 'val_loss'\n",
 917 |     "        loc_patience += 1  # number of epochs without any improvement\n"
 918 |    ]
 919 |   },
 920 |   {
 921 |    "cell_type": "code",
 922 |    "execution_count": null,
 923 |    "metadata": {},
 924 |    "outputs": [],
 925 |    "source": []
 926 |   },
 927 |   {
 928 |    "cell_type": "code",
 929 |    "execution_count": null,
 930 |    "metadata": {},
 931 |    "outputs": [],
 932 |    "source": []
 933 |   },
 934 |   {
 935 |    "cell_type": "markdown",
 936 |    "metadata": {},
 937 |    "source": [
 938 |     "### Train _Quantized_ winning ticket model:"
 939 |    ]
 940 |   },
 941 |   {
 942 |    "cell_type": "code",
 943 |    "execution_count": 44,
 944 |    "metadata": {},
 945 |    "outputs": [],
 946 |    "source": [
 947 |     "# User input parameters for Early Stopping in manual implementation-\n",
 948 |     "minimum_delta = 0.001\n",
 949 |     "patience = 3"
 950 |    ]
 951 |   },
 952 |   {
 953 |    "cell_type": "code",
 954 |    "execution_count": 45,
 955 |    "metadata": {},
 956 |    "outputs": [],
 957 |    "source": [
 958 |     "best_val_loss = 100\n",
 959 |     "loc_patience = 0"
 960 |    ]
 961 |   },
 962 |   {
 963 |    "cell_type": "code",
 964 |    "execution_count": null,
 965 |    "metadata": {},
 966 |    "outputs": [],
 967 |    "source": []
 968 |   },
 969 |   {
 970 |    "cell_type": "code",
 971 |    "execution_count": 41,
 972 |    "metadata": {},
 973 |    "outputs": [],
 974 |    "source": [
 975 |     "# Initialize a new LeNet-300-100 model-\n",
 976 |     "# winning_ticket_model = lenet_nn()\n",
 977 |     "\n",
 978 |     "# Load weights of winning ticket-\n",
 979 |     "# winning_ticket_model.set_weights(model.get_weights())"
 980 |    ]
 981 |   },
 982 |   {
 983 |    "cell_type": "code",
 984 |    "execution_count": null,
 985 |    "metadata": {},
 986 |    "outputs": [],
 987 |    "source": []
 988 |   },
 989 |   {
 990 |    "cell_type": "code",
 991 |    "execution_count": null,
 992 |    "metadata": {},
 993 |    "outputs": [],
 994 |    "source": []
 995 |   },
 996 |   {
 997 |    "cell_type": "code",
 998 |    "execution_count": 46,
 999 |    "metadata": {},
1000 |    "outputs": [],
1001 |    "source": [
1002 |     "# Define 'train_one_step()' and 'test_step()' functions here-\n",
1003 |     "@tf.function\n",
1004 |     "def train_one_step(model, mask_model, optimizer, x, y):\n",
1005 |     "    '''\n",
1006 |     "    Function to compute one step of gradient descent optimization\n",
1007 |     "    '''\n",
1008 |     "    with tf.GradientTape() as tape:\n",
1009 |     "        # Make predictions using defined model-\n",
1010 |     "        y_pred = model(x)\n",
1011 |     "\n",
1012 |     "        # Compute loss-\n",
1013 |     "        loss = loss_fn(y, y_pred)\n",
1014 |     "        \n",
1015 |     "    # Compute gradients wrt defined loss and weights and biases-\n",
1016 |     "    grads = tape.gradient(loss, model.trainable_variables)\n",
1017 |     "    \n",
1018 |     "    # type(grads)\n",
1019 |     "    # list\n",
1020 |     "    \n",
1021 |     "    # List to hold element-wise multiplication between-\n",
1022 |     "    # computed gradient and masks-\n",
1023 |     "    grad_mask_mul = []\n",
1024 |     "    \n",
1025 |     "    # Perform element-wise multiplication between computed gradients and masks-\n",
1026 |     "    for grad_layer, mask in zip(grads, mask_model.trainable_weights):\n",
1027 |     "        grad_mask_mul.append(tf.math.multiply(grad_layer, mask))\n",
1028 |     "    \n",
1029 |     "    # Apply computed gradients to model's weights and biases-\n",
1030 |     "    optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))\n",
1031 |     "\n",
1032 |     "    # Compute accuracy-\n",
1033 |     "    train_loss(loss)\n",
1034 |     "    train_accuracy(y, y_pred)\n",
1035 |     "\n",
1036 |     "    return None\n",
1037 |     "    \n",
1038 |     "    \n",
1039 |     "@tf.function\n",
1040 |     "def test_step(model, optimizer, data, labels):\n",
1041 |     "    \"\"\"\n",
1042 |     "    Function to test model performance\n",
1043 |     "    on testing dataset\n",
1044 |     "    \"\"\"\n",
1045 |     "    \n",
1046 |     "    predictions = model(data)\n",
1047 |     "    t_loss = loss_fn(labels, predictions)\n",
1048 |     "\n",
1049 |     "    test_loss(t_loss)\n",
1050 |     "    test_accuracy(labels, predictions)\n",
1051 |     "\n",
1052 |     "    return None\n",
1053 |     "\n"
1054 |    ]
1055 |   },
1056 |   {
1057 |    "cell_type": "code",
1058 |    "execution_count": 49,
1059 |    "metadata": {},
1060 |    "outputs": [
1061 |     {
1062 |      "name": "stdout",
1063 |      "output_type": "stream",
1064 |      "text": [
1065 |       "Epoch 1/3\n",
1066 |       "1000/1000 [==============================] - 5s 5ms/step - loss: 0.0392 - accuracy: 0.9874\n",
1067 |       "Epoch 2/3\n",
1068 |       "1000/1000 [==============================] - 5s 5ms/step - loss: 0.0311 - accuracy: 0.9894\n",
1069 |       "Epoch 3/3\n",
1070 |       "1000/1000 [==============================] - 5s 5ms/step - loss: 0.0266 - accuracy: 0.9911\n"
1071 |      ]
1072 |     }
1073 |    ],
1074 |    "source": [
1075 |     "history_q_aware = q_aware_model.fit(\n",
1076 |     "    x = X_train, y = y_train,\n",
1077 |     "    batch_size = batch_size,\n",
1078 |     "    epochs = 3\n",
1079 |     ")\n"
1080 |    ]
1081 |   },
1082 |   {
1083 |    "cell_type": "code",
1084 |    "execution_count": null,
1085 |    "metadata": {},
1086 |    "outputs": [],
1087 |    "source": []
1088 |   },
1089 |   {
1090 |    "cell_type": "code",
1091 |    "execution_count": 54,
1092 |    "metadata": {},
1093 |    "outputs": [],
1094 |    "source": [
1095 |     "_, baseline_model_accuracy = winning_ticket_model.evaluate(X_test, y_test, verbose=0)\n",
1096 |     "_, q_aware_model_accuracy = q_aware_model.evaluate(X_test, y_test, verbose=0)\n"
1097 |    ]
1098 |   },
1099 |   {
1100 |    "cell_type": "markdown",
1101 |    "metadata": {},
1102 |    "source": [
1103 |     "### There is minimal to no loss in test accuracy after quantization aware training, compared to the baseline:"
1104 |    ]
1105 |   },
1106 |   {
1107 |    "cell_type": "code",
1108 |    "execution_count": 55,
1109 |    "metadata": {},
1110 |    "outputs": [
1111 |     {
1112 |      "name": "stdout",
1113 |      "output_type": "stream",
1114 |      "text": [
1115 |       "Baseline test accuracy: 0.9829999804496765\n",
1116 |       "Quant test accuracy: 0.9796000123023987\n"
1117 |      ]
1118 |     }
1119 |    ],
1120 |    "source": [
1121 |     "print('Baseline test accuracy:', baseline_model_accuracy)\n",
1122 |     "print('Quant test accuracy:', q_aware_model_accuracy)"
1123 |    ]
1124 |   },
1125 |   {
1126 |    "cell_type": "code",
1127 |    "execution_count": null,
1128 |    "metadata": {},
1129 |    "outputs": [],
1130 |    "source": []
1131 |   },
1132 |   {
1133 |    "cell_type": "code",
1134 |    "execution_count": 57,
1135 |    "metadata": {},
1136 |    "outputs": [
1137 |     {
1138 |      "name": "stdout",
1139 |      "output_type": "stream",
1140 |      "text": [
1141 |       "layer: (300,) has 294 non-zero parameter\n",
1142 |       "layer: (784, 300) has 204983 non-zero parameter\n",
1143 |       "layer: (100,) has 99 non-zero parameter\n",
1144 |       "layer: (300, 100) has 29100 non-zero parameter\n",
1145 |       "layer: (10,) has 10 non-zero parameter\n",
1146 |       "layer: (100, 10) has 990 non-zero parameter\n",
1147 |       "\n",
1148 |       "Total number of non-zero parameters = 235476\n",
1149 |       "\n"
1150 |      ]
1151 |     }
1152 |    ],
1153 |    "source": [
1154 |     "q_params = 0\n",
1155 |     "\n",
1156 |     "for layer in q_aware_model.trainable_weights:\n",
1157 |     "    params = tf.math.count_nonzero(layer, axis = None).numpy()\n",
1158 |     "    print(\"layer: {0} has {1} non-zero parameter\".format(layer.shape, params))\n",
1159 |     "    q_params += params\n",
1160 |     "    \n",
1161 |     "print(\"\\nTotal number of non-zero parameters = {0}\\n\".format(q_params))"
1162 |    ]
1163 |   },
1164 |   {
1165 |    "cell_type": "code",
1166 |    "execution_count": 61,
1167 |    "metadata": {},
1168 |    "outputs": [
1169 |     {
1170 |      "data": {
1171 |       "text/plain": [
1172 |        "23095"
1173 |       ]
1174 |      },
1175 |      "execution_count": 61,
1176 |      "metadata": {},
1177 |      "output_type": "execute_result"
1178 |     }
1179 |    ],
1180 |    "source": [
1181 |     "winning_params"
1182 |    ]
1183 |   },
1184 |   {
1185 |    "cell_type": "code",
1186 |    "execution_count": null,
1187 |    "metadata": {},
1188 |    "outputs": [],
1189 |    "source": []
1190 |   },
1191 |   {
1192 |    "cell_type": "code",
1193 |    "execution_count": 47,
1194 |    "metadata": {},
1195 |    "outputs": [
1196 |     {
1197 |      "ename": "InvalidArgumentError",
1198 |      "evalue": " var and grad do not have the same shape[10] [100,10]\n\t [[node Adam/Adam/update_4/ResourceApplyAdam (defined at <ipython-input-37-9c297d161e54>:29) ]] [Op:__inference_train_one_step_20360]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node Adam/Adam/update_4/ResourceApplyAdam:\n Mul_4 (defined at <ipython-input-37-9c297d161e54>:26)\t\n sequential/quant_dense_2/BiasAdd/ReadVariableOp/resource (defined at /home/arjun/.local/lib/python3.8/site-packages/tensorflow_model_optimization/python/core/quantization/keras/quantize_wrapper.py:162)\n\nFunction call stack:\ntrain_one_step\n",
1199 |      "output_type": "error",
1200 |      "traceback": [
1201 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1202 |       "\u001b[0;31mInvalidArgumentError\u001b[0m                      Traceback (most recent call last)",
1203 |       "\u001b[0;32m<ipython-input-47-bca851ce138d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtrain_dataset\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m         \u001b[0mtrain_one_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mq_aware_model\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask_model\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     22\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
1204 |       "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    578\u001b[0m         \u001b[0mxla_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    579\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 580\u001b[0;31m       \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    582\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1205 |       "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    642\u001b[0m         \u001b[0;31m# Lifting succeeded, so variables are initialized and we can run the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    643\u001b[0m         \u001b[0;31m# stateless function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 644\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    645\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    646\u001b[0m       \u001b[0mcanon_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcanon_kwds\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1206 |       "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   2418\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2419\u001b[0m       \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2420\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2422\u001b[0m   \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1207 |       "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m   1659\u001b[0m       \u001b[0;31m`\u001b[0m\u001b[0margs\u001b[0m\u001b[0;31m`\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1660\u001b[0m     \"\"\"\n\u001b[0;32m-> 1661\u001b[0;31m     return self._call_flat(\n\u001b[0m\u001b[1;32m   1662\u001b[0m         (t for t in nest.flatten((args, kwargs), expand_composites=True)\n\u001b[1;32m   1663\u001b[0m          if isinstance(t, (ops.Tensor,\n",
1208 |       "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m   1743\u001b[0m         and executing_eagerly):\n\u001b[1;32m   1744\u001b[0m       \u001b[0;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1745\u001b[0;31m       return self._build_call_outputs(self._inference_function.call(\n\u001b[0m\u001b[1;32m   1746\u001b[0m           ctx, args, cancellation_manager=cancellation_manager))\n\u001b[1;32m   1747\u001b[0m     forward_backward = self._select_forward_and_backward_functions(\n",
1209 |       "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m    591\u001b[0m       \u001b[0;32mwith\u001b[0m \u001b[0m_InterpolateFunctionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    592\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcancellation_manager\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 593\u001b[0;31m           outputs = execute.execute(\n\u001b[0m\u001b[1;32m    594\u001b[0m               \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msignature\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    595\u001b[0m               \u001b[0mnum_outputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_num_outputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1210 |       "\u001b[0;32m~/.local/lib/python3.8/site-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m     57\u001b[0m   \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     58\u001b[0m     \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0m\u001b[1;32m     60\u001b[0m                                         inputs, attrs, num_outputs)\n\u001b[1;32m     61\u001b[0m   \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1211 |       "\u001b[0;31mInvalidArgumentError\u001b[0m:  var and grad do not have the same shape[10] [100,10]\n\t [[node Adam/Adam/update_4/ResourceApplyAdam (defined at <ipython-input-37-9c297d161e54>:29) ]] [Op:__inference_train_one_step_20360]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node Adam/Adam/update_4/ResourceApplyAdam:\n Mul_4 (defined at <ipython-input-37-9c297d161e54>:26)\t\n sequential/quant_dense_2/BiasAdd/ReadVariableOp/resource (defined at /home/arjun/.local/lib/python3.8/site-packages/tensorflow_model_optimization/python/core/quantization/keras/quantize_wrapper.py:162)\n\nFunction call stack:\ntrain_one_step\n"
1212 |      ]
1213 |     }
1214 |    ],
1215 |    "source": [
1216 |     "# Train model using 'GradientTape'-\n",
1217 |     "    \n",
1218 |     "# Initialize parameters for Early Stopping manual implementation-\n",
1219 |     "# best_val_loss = 100\n",
1220 |     "# loc_patience = 0\n",
1221 |     "    \n",
1222 |     "for epoch in range(num_epochs):\n",
1223 |     "    \n",
1224 |     "    if loc_patience >= patience:\n",
1225 |     "        print(\"\\n'EarlyStopping' called!\\n\")\n",
1226 |     "        break\n",
1227 |     "        \n",
1228 |     "    # Reset the metrics at the start of the next epoch\n",
1229 |     "    train_loss.reset_states()\n",
1230 |     "    train_accuracy.reset_states()\n",
1231 |     "    test_loss.reset_states()\n",
1232 |     "    test_accuracy.reset_states()\n",
1233 |     "            \n",
1234 |     "    \n",
1235 |     "    for x, y in train_dataset:\n",
1236 |     "        train_one_step(q_aware_model, mask_model, optimizer, x, y)\n",
1237 |     "\n",
1238 |     "\n",
1239 |     "    for x_t, y_t in test_dataset:\n",
1240 |     "        test_step(q_aware_model, optimizer, x_t, y_t)\n",
1241 |     "\n",
1242 |     "    template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'\n",
1243 |     "    \n",
1244 |     "    '''\n",
1245 |     "    # 'i' is the index for number of pruning rounds-\n",
1246 |     "    history_main[i]['accuracy'][epoch] = train_accuracy.result() * 100\n",
1247 |     "    history_main[i]['loss'][epoch] = train_loss.result()\n",
1248 |     "    history_main[i]['val_loss'][epoch] = test_loss.result()\n",
1249 |     "    history_main[i]['val_accuracy'][epoch] = test_accuracy.result() * 100\n",
1250 |     "    ''' \n",
1251 |     "\n",
1252 |     "    print(template.format(\n",
1253 |     "        epoch + 1, train_loss.result(),\n",
1254 |     "        train_accuracy.result()*100, test_loss.result(),\n",
1255 |     "        test_accuracy.result()*100)\n",
1256 |     "         )\n",
1257 |     "    \n",
1258 |     "    # Count number of non-zero parameters in each layer and in total-\n",
1259 |     "    # print(\"layer-wise manner model, number of nonzero parameters in each layer are: \\n\")\n",
1260 |     "    model_sum_params = 0\n",
1261 |     "    \n",
1262 |     "    for layer in winning_ticket_model.trainable_weights:\n",
1263 |     "        # print(tf.math.count_nonzero(layer, axis = None).numpy())\n",
1264 |     "        model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()\n",
1265 |     "    \n",
1266 |     "    print(\"Total number of trainable parameters = {0}\\n\".format(model_sum_params))\n",
1267 |     "\n",
1268 |     "    \n",
1269 |     "    # Code for manual Early Stopping:\n",
1270 |     "    if np.abs(test_loss.result() < best_val_loss) >= minimum_delta:\n",
1271 |     "        # update 'best_val_loss' variable to lowest loss encountered so far-\n",
1272 |     "        best_val_loss = test_loss.result()\n",
1273 |     "        \n",
1274 |     "        # reset 'loc_patience' variable-\n",
1275 |     "        loc_patience = 0\n",
1276 |     "        \n",
1277 |     "    else:  # there is no improvement in monitored metric 'val_loss'\n",
1278 |     "        loc_patience += 1  # number of epochs without any improvement\n"
1279 |    ]
1280 |   },
1281 |   {
1282 |    "cell_type": "code",
1283 |    "execution_count": null,
1284 |    "metadata": {},
1285 |    "outputs": [],
1286 |    "source": []
1287 |   }
1288 |  ],
1289 |  "metadata": {
1290 |   "kernelspec": {
1291 |    "display_name": "Python 3",
1292 |    "language": "python",
1293 |    "name": "python3"
1294 |   },
1295 |   "language_info": {
1296 |    "codemirror_mode": {
1297 |     "name": "ipython",
1298 |     "version": 3
1299 |    },
1300 |    "file_extension": ".py",
1301 |    "mimetype": "text/x-python",
1302 |    "name": "python",
1303 |    "nbconvert_exporter": "python",
1304 |    "pygments_lexer": "ipython3",
1305 |    "version": "3.8.3"
1306 |   }
1307 |  },
1308 |  "nbformat": 4,
1309 |  "nbformat_minor": 4
1310 | }
1311 | 


--------------------------------------------------------------------------------
/LeNet_300_100-Iterative_Pruning.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "enhanced-reaction",
   6 |    "metadata": {},
   7 |    "source": [
   8 |     "# Iterative Pruning: _LeNet-300-100_ on MNIST"
   9 |    ]
  10 |   },
  11 |   {
  12 |    "cell_type": "code",
  13 |    "execution_count": 1,
  14 |    "id": "defined-postcard",
  15 |    "metadata": {},
  16 |    "outputs": [],
  17 |    "source": [
  18 |     "import torch\n",
  19 |     "import torch.nn as nn\n",
  20 |     "import torchvision\n",
  21 |     "import torch.nn.functional as F\n",
  22 |     "import numpy as np\n",
  23 |     "import torchvision.transforms as transforms\n",
  24 |     "import matplotlib.pyplot as plt\n",
  25 |     "import os"
  26 |    ]
  27 |   },
  28 |   {
  29 |    "cell_type": "code",
  30 |    "execution_count": null,
  31 |    "id": "explicit-sunset",
  32 |    "metadata": {},
  33 |    "outputs": [],
  34 |    "source": []
  35 |   },
  36 |   {
  37 |    "cell_type": "code",
  38 |    "execution_count": 2,
  39 |    "id": "constant-geneva",
  40 |    "metadata": {},
  41 |    "outputs": [
  42 |     {
  43 |      "name": "stdout",
  44 |      "output_type": "stream",
  45 |      "text": [
  46 |       "PyTorch version: 1.7.1\n"
  47 |      ]
  48 |     }
  49 |    ],
  50 |    "source": [
  51 |     "print(f\"PyTorch version: {torch.__version__}\")"
  52 |    ]
  53 |   },
  54 |   {
  55 |    "cell_type": "code",
  56 |    "execution_count": null,
  57 |    "id": "shared-velvet",
  58 |    "metadata": {},
  59 |    "outputs": [],
  60 |    "source": []
  61 |   },
  62 |   {
  63 |    "cell_type": "code",
  64 |    "execution_count": 3,
  65 |    "id": "absent-panel",
  66 |    "metadata": {},
  67 |    "outputs": [
  68 |     {
  69 |      "name": "stdout",
  70 |      "output_type": "stream",
  71 |      "text": [
  72 |       "Available device: cpu\n"
  73 |      ]
  74 |     }
  75 |    ],
  76 |    "source": [
  77 |     "# GPU device configuration-\n",
  78 |     "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
  79 |     "\n",
  80 |     "print(f\"Available device: {device}\")"
  81 |    ]
  82 |   },
  83 |   {
  84 |    "cell_type": "code",
  85 |    "execution_count": null,
  86 |    "id": "proud-buffer",
  87 |    "metadata": {},
  88 |    "outputs": [],
  89 |    "source": []
  90 |   },
  91 |   {
  92 |    "cell_type": "code",
  93 |    "execution_count": 4,
  94 |    "id": "expensive-parking",
  95 |    "metadata": {},
  96 |    "outputs": [],
  97 |    "source": [
  98 |     "# Hyper-parameters-\n",
  99 |     "input_size = 784    # 28 x 28, flattened to be 1-D tensor\n",
 100 |     "hidden_size = 100\n",
 101 |     "num_classes = 10\n",
 102 |     "num_epochs = 20\n",
 103 |     "batch_size = 32\n",
 104 |     "learning_rate = 0.0012"
 105 |    ]
 106 |   },
 107 |   {
 108 |    "cell_type": "code",
 109 |    "execution_count": null,
 110 |    "id": "substantial-faith",
 111 |    "metadata": {},
 112 |    "outputs": [],
 113 |    "source": []
 114 |   },
 115 |   {
 116 |    "cell_type": "code",
 117 |    "execution_count": 5,
 118 |    "id": "patient-bruce",
 119 |    "metadata": {},
 120 |    "outputs": [],
 121 |    "source": [
 122 |     "os.chdir(\"/home/arjun/Documents/Programs/Python_Codes/PyTorch_Resources/Good_Codes/\")"
 123 |    ]
 124 |   },
 125 |   {
 126 |    "cell_type": "code",
 127 |    "execution_count": 6,
 128 |    "id": "rotary-jacket",
 129 |    "metadata": {},
 130 |    "outputs": [],
 131 |    "source": [
 132 |     "# MNIST dataset statistics:\n",
 133 |     "# mean = tensor([0.1307]) & std dev = tensor([0.3081])\n",
 134 |     "mean = np.array([0.1307])\n",
 135 |     "std_dev = np.array([0.3081])"
 136 |    ]
 137 |   },
 138 |   {
 139 |    "cell_type": "code",
 140 |    "execution_count": 7,
 141 |    "id": "accredited-fleet",
 142 |    "metadata": {},
 143 |    "outputs": [],
 144 |    "source": [
 145 |     "# Define data set transformations to apply-\n",
 146 |     "transforms_apply = transforms.Compose([\n",
 147 |     "    transforms.ToTensor(),\n",
 148 |     "    transforms.Normalize(mean = mean, std = std_dev)\n",
 149 |     "    ])"
 150 |    ]
 151 |   },
 152 |   {
 153 |    "cell_type": "code",
 154 |    "execution_count": 8,
 155 |    "id": "meaningful-means",
 156 |    "metadata": {},
 157 |    "outputs": [],
 158 |    "source": [
 159 |     "# MNIST dataset-\n",
 160 |     "train_dataset = torchvision.datasets.MNIST(\n",
 161 |     "        root = './data', train = True,\n",
 162 |     "        transform = transforms_apply, download = True\n",
 163 |     "        )\n",
 164 |     "\n",
 165 |     "test_dataset = torchvision.datasets.MNIST(\n",
 166 |     "        root = './data', train = False,\n",
 167 |     "        transform = transforms_apply\n",
 168 |     "        )"
 169 |    ]
 170 |   },
 171 |   {
 172 |    "cell_type": "code",
 173 |    "execution_count": 9,
 174 |    "id": "cognitive-afghanistan",
 175 |    "metadata": {},
 176 |    "outputs": [
 177 |     {
 178 |      "name": "stdout",
 179 |      "output_type": "stream",
 180 |      "text": [
 181 |       "len(train_dataset): 60000 & len(test_dataset): 10000\n"
 182 |      ]
 183 |     }
 184 |    ],
 185 |    "source": [
 186 |     "print(f\"len(train_dataset): {len(train_dataset)} & len(test_dataset): {len(test_dataset)}\")"
 187 |    ]
 188 |   },
 189 |   {
 190 |    "cell_type": "code",
 191 |    "execution_count": 10,
 192 |    "id": "lesbian-conditions",
 193 |    "metadata": {},
 194 |    "outputs": [],
 195 |    "source": [
 196 |     "# Create dataloader-\n",
 197 |     "train_loader = torch.utils.data.DataLoader(\n",
 198 |     "        dataset = train_dataset, batch_size = batch_size,\n",
 199 |     "        shuffle = True\n",
 200 |     "        )\n",
 201 |     "\n",
 202 |     "test_loader = torch.utils.data.DataLoader(\n",
 203 |     "        dataset = test_dataset, batch_size = batch_size,\n",
 204 |     "        shuffle = False\n",
 205 |     "        )"
 206 |    ]
 207 |   },
 208 |   {
 209 |    "cell_type": "code",
 210 |    "execution_count": 11,
 211 |    "id": "billion-second",
 212 |    "metadata": {},
 213 |    "outputs": [
 214 |     {
 215 |      "name": "stdout",
 216 |      "output_type": "stream",
 217 |      "text": [
 218 |       "len(train_loader) = 1875 & len(test_loader) = 313\n"
 219 |      ]
 220 |     }
 221 |    ],
 222 |    "source": [
 223 |     "print(f\"len(train_loader) = {len(train_loader)} & len(test_loader) = {len(test_loader)}\")"
 224 |    ]
 225 |   },
 226 |   {
 227 |    "cell_type": "code",
 228 |    "execution_count": null,
 229 |    "id": "local-breed",
 230 |    "metadata": {},
 231 |    "outputs": [],
 232 |    "source": []
 233 |   },
 234 |   {
 235 |    "cell_type": "code",
 236 |    "execution_count": 12,
 237 |    "id": "graduate-light",
 238 |    "metadata": {},
 239 |    "outputs": [
 240 |     {
 241 |      "data": {
 242 |       "text/plain": [
 243 |        "(torch.Size([32, 1, 28, 28]), torch.Size([32]))"
 244 |       ]
 245 |      },
 246 |      "execution_count": 12,
 247 |      "metadata": {},
 248 |      "output_type": "execute_result"
 249 |     }
 250 |    ],
 251 |    "source": [
 252 |     "images, labels = next(iter(train_loader))\n",
 253 |     "\n",
 254 |     "images.shape, labels.shape"
 255 |    ]
 256 |   },
 257 |   {
 258 |    "cell_type": "code",
 259 |    "execution_count": 13,
 260 |    "id": "abroad-amplifier",
 261 |    "metadata": {},
 262 |    "outputs": [
 263 |     {
 264 |      "name": "stdout",
 265 |      "output_type": "stream",
 266 |      "text": [
 267 |       "img_samples.shape = torch.Size([32, 1, 28, 28]), labels.shape = torch.Size([32])\n"
 268 |      ]
 269 |     }
 270 |    ],
 271 |    "source": [
 272 |     "# Sanity check- one batch of data\n",
 273 |     "examples = iter(train_loader)\n",
 274 |     "\n",
 275 |     "# Unpack-\n",
 276 |     "img_samples, labels = examples.next()\n",
 277 |     "print(f\"img_samples.shape = {img_samples.shape}, labels.shape = {labels.shape}\")\n",
 278 |     "# We have '1' due to grey-scale images."
 279 |    ]
 280 |   },
 281 |   {
 282 |    "cell_type": "code",
 283 |    "execution_count": 14,
 284 |    "id": "premium-preparation",
 285 |    "metadata": {},
 286 |    "outputs": [
 287 |     {
 288 |      "data": {
 289 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD6CAYAAAC4RRw1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAeBklEQVR4nO3de5BUxdkG8OcVUWIQBAVcEVmSIELUBEIMFuSTREmAioFERFABFViTYAQlkRUCBIQAiuaCBIJyVQtiCQplNCuCSiyNcilUYEUuJdeVS5AgBoNAf3/s2OludmZnZ86cOX3m+VVR+/b2zJzWl21m3+nTLUopEBGRf87I9wCIiCgznMCJiDzFCZyIyFOcwImIPMUJnIjIU5zAiYg8ldUELiJdRWSziGwVkdKgBkX5xbzGF3MbL5LpOnARqQXgAwBdAOwGsBpAX6XUpuCGR2FjXuOLuY2fM7N47lUAtiqltgOAiCwC0ANA0r8MIsK7hiJCKSVJuphXvx1USjVK0lej3DKvkVJlXrMpoTQFsMto7058zyIiJSKyRkTWZHEtCg/z6rcdKfqqzS3zGllV5jWbd+BVvYM77V9spdQsALMA/ovuCeY1vqrNLfPql2zege8G0MxoXwxgb3bDoQhgXuOLuY2ZbCbw1QBaikgLETkLQB8Ay4IZFuUR8xpfzG3MZFxCUUqdEJG7AJQBqAVgjlJqY2Ajo7xgXuOLuY2fjJcRZnQx1tQiI8UqlBpjXiNlrVKqfRAvxLxGSpV55Z2YRESe4gROROQpTuBERJ7KZh04kddatWql45UrV1p9F110UdLnTZkyxWqXlnJLEcoPvgMnIvIUJ3AiIk+xhEIFa/DgwTouKiqy+p588kmrvWfPHh3v3r07twOjwDz22GNW+4477rDa48aN0/H48eNDGVOQ+A6ciMhTnMCJiDzFCZyIyFOsgXuguLhYx6+++qrVN3PmTB1Pnjw5pBH56aGHHrLaw4YNS/rYGTNmWO033ngjF0OiHHNr3qdOnbLa9evX1/GZZ9rT4YkTJ3I3sIDwHTgRkac4gRMReYq7EUaQWwoZNGiQjs8++2yr79xzz83oGoWyG+Gll16q41WrVll9jRs31rH7/3zs2LFW+/PPP8/B6HKCuxEaTp48abXdEorpa1/7mtXesSPV6XSh426ERERxwgmciMhTnMCJiDzFZYQRMXHiRB3fd999Vt+xY8d0fOutt4Y2pjh45plndGzWvAGgrKxMxx7XvKmA8R04EZGnOIETEXmKJZQ8MXfCA4A777xTx8ePH7f6+vfvr+Nnn302twPzXO/eva32ZZddlvSxb7/9to5ZMiEf8R04EZGnOIETEXmKEzgRkadYA0/45je/qeMlS5ZYfcOHD9dxNjXoDh066PjBBx+0+ioqKnTsLhX8+9//nvE1C427tYC5w5x7ks7s2bNDGRPlzxlnxPs9arz/64iIYqzaCVxE5ojIfhHZYHyvoYgsF5Etia8NcjtMChrzGl/MbeFIp4QyD8CjABYY3ysFsEIpNVlEShPtEcEPL3fMkgkAvPjiizr++OOPrT5zuVlN9O3b12pPmzZNx/v27bP6xowZo+OQSibzEMO8plo2OHfuXKu9c+fOXA8nX+YhhrlNpk6dOlZ71KhROnZ3H3Tbhw8f1rEPBzi4qn0HrpRaBeCQ8+0eAOYn4vkAegY7LMo15jW+mNvCkWkNvIlSqgIAEl8bV/N48gPzGl/MbQzlfBWKiJQAKMn1dShczGs8Ma9+yXQC3yciRUqpChEpArA/2QOVUrMAzALCP+GjVq1aVnv06NE6vvvuu60+s+48YoRdGtyzZ0/a1zRPzFmwYIHV99lnn+n4l7/8pdW3fPnytK+RQ17kNZWbb745ad9TTz0V4kgiJ63cRjWvqZjLcwGgtLQ07eean0vV5Oc8KjItoSwDMCARDwCwNJjhUJ4xr/HF3MZQOssIFwJ4E0ArEdktIgMBTAbQRUS2AOiSaJNHmNf4Ym4LR7UlFKVU3yRd1wY8lkBceOGFOp4xY4bVZx7gfPvtt1t9S5dm9obE3EUQAO644w4dr1u3zuq7/vrrdbx/f9LqRCh8y2sqTZs21bF76HMhilNuKTXeiUlE5ClO4EREnuIETkTkqdjtRmjuMFevXj2rb9CgQTrevHlzRq/frl07qz15sv1ZkLnj3fe+9z2r7+DBgxldk1L78Y9/rOOGDRvmcSRE4eI7cCIiT3ECJyLyVOxKKA0a/G+XzDZt2lh9vXr10nFZWVnS13jvvfes9llnnaXjCRMmWH3169e32uYdliyZhOPAgQM6dg8nrl27dlqv4eaxZcuWVnvgwIE6btGiRdLXmTdvntVetGhRWten4JiHOLgHOnz44YdWe/z48WEMKWf4DpyIyFOcwImIPMUJnIjIU2LeXp7zi4Wwu1mPHj10/MADD1h9l19+edLniYiO3333XavPvD370ksvtfrM+isAXHXVVTresWNHGiPOD6WUVP+o9ERp1zp3R7mioiIdu6f1mJ+X/OEPf7D6vvOd71jtTz/9VMfbtm2z+oqLi3Xs1lzNA7Lvvfdeq+/QIffMhUCsVUq1D+KFopTXVF544QWr3aVLFx27+Vi9erXVdncyjLAq88p34EREnuIETkTkKU7gRESeit06cHNb2Oeff97qM+th55xzjtXXv39/Hf/2t7+1+sxaqatRo0ZWe+vWrTqeNGmS1WeuCXZv5T958mTSa1AwBg8ebLW3b9+uY/fzkeHDh1vt9evX6/iVV16x+jp27Khj9zQn8+/Va6+9ZvXNnTs3jVFTVTp37qxj83On6tx33305GE3+8B04EZGnOIETEXkqdssIM2UuJ3J/RV6zZo2Op06davW5t2r/9Kc/1fFNN91k9ZlLFX//+99bfQ8//LCO9+7dm+6wMxbXZYTuNghf//rX03rewoULrfYtt9yS0fXdpYqbNm3SsXlwNgB07949o2tUI5bLCOvUqWO1//znP+u4X79+SZ9nLuMEgAEDBlht86DxiOMyQiKiOOEETkTkKU7gRESeit0ywnS5t8QvX75cx+7t2ObJ82ZNsyrPPPOMjkeOHGn1jRo1SsfDhg2z+s477zwdm1uXUs1MnDjRaj/xxBM6rlWrltVn1j+nTJmS24EBuOKKK3J+jbi68MILrXaqurfJ3AIB8KrmnRa+Ayci8hQncCIiTxVUCeWiiy7S8eLFi62+8vJyHV9zzTVW37FjxzK6nnv6h3kn4F//+ler75133snoGmRzT8AZPXq0jlu3bm31ffTRRzo+fvx4xtc0y3Fjx45N+jj3zmDKnLvLYDLm0t044jtwIiJPcQInIvJUtRO4iDQTkVdEpFxENorI0MT3G4rIchHZkviafMcnihzmNbZqM6+Fo9pb6UWkCECRUmqdiJwLYC2AngBuA3BIKTVZREoBNFBKjUj+Svm/NdfcHfDXv/611Tdu3Dgduyf5xNRFiEleUzFvbX/ppZesvosvvljH7ulJ7gk95s6B5g6DAHDPPffo2D2xvqKiQsfmSTFA9UtSM/QugNvjllfz1CMA2LJlS1rPc3eZdHcB9Uhmt9IrpSqUUusS8ScAygE0BdADwPzEw+aj8i8JeYJ5ja3PmdfCUaNVKCJSDKAtgLcANFFKVQCVk4GINE7ynBIAJVmOk3KIeY0n5jX+0t6NUETqAngNwESl1BIROayUOs/o/1gplbKuFvavZO7dluaOc40b239/mzVrFsqYouKL3Qh9zGumWrVqZbVffvllHTdt2jSQa5hLEwHguuuu03GOSiautUqp9nHLa6YlFHe3UI9lvhuhiNQGsBjAU0qpL/Zn3Jeoj39RJ98f1EgpHMxrPDGvhSOdVSgCYDaAcqXUI0bXMgBfbK47AMBS97kUXcxrrDGvBSKdGnhHAP0AvCci6xPfGwlgMoCnRWQggJ0AbszJCClXmNd4qgvmtWBUO4ErpV4HkOx+1GuDHU6whgwZYrWvvPJKHd92220hjyZafM5rptwlZGZ9ulevXlZfSYn9OZ75GcmCBQusPrMe627R8P7772c22MwdTXHaUizzmsqgQYOs9uOPP56nkeQG78QkIvIUJ3AiIk/F7lDjTp066dg9nPi5557T8Y03FnYJMK6HGlM8DzXmMkIeakxEFCucwImIPMUJnIjIU7E7kefEiRP5HgIRBcw93SpGte2s8B04EZGnOIETEXkqdiWUf/7znzrmr1lEFGd8B05E5ClO4EREnuIETkTkKU7gRESe4gROROQpTuBERJ7iBE5E5ClO4EREnuIETkTkKU7gRESeCvtW+oMAdgC4IBFHQSGOpXnAr8e8phbmWILMLfOaWt7zGuqRavqiImuCOvYpWxxLcKI0fo4lOFEaP8diYwmFiMhTnMCJiDyVrwl8Vp6uWxWOJThRGj/HEpwojZ9jMeSlBk5ERNljCYWIyFOcwImIPBXqBC4iXUVks4hsFZHSMK+duP4cEdkvIhuM7zUUkeUisiXxtUEI42gmIq+ISLmIbBSRofkaSxCYV2sssckt82qNJZJ5DW0CF5FaAKYD6AagDYC+ItImrOsnzAPQ1fleKYAVSqmWAFYk2rl2AsBwpVRrAB0ADEn8v8jHWLLCvJ4mFrllXk8TzbwqpUL5A+BqAGVG+34A94d1feO6xQA2GO3NAIoScRGAzXkY01IAXaIwFuaVuWVe/clrmCWUpgB2Ge3die/lWxOlVAUAJL42DvPiIlIMoC2At/I9lgwxr0l4nlvmNYko5TXMCVyq+F5Br2EUkboAFgMYppQ6ku/xZIh5rUIMcsu8ViFqeQ1zAt8NoJnRvhjA3hCvn8w+ESkCgMTX/WFcVERqo/IvwlNKqSX5HEuWmFdHTHLLvDqimNcwJ/DVAFqKSAsROQtAHwDLQrx+MssADEjEA1BZ28opEREAswGUK6UeyedYAsC8GmKUW+bVENm8hlz47w7gAwDbAIzKwwcPCwFUAPgcle8wBgI4H5WfHm9JfG0Ywjg6ofLX0XcBrE/86Z6PsTCvzC3z6m9eeSs9EZGneCcmEZGnOIETEXkqqwk837faUm4wr/HF3MZMFkX9Wqj8cOMrAM4C8A6ANtU8R/FPNP4wr7H9cyCo3Ebgv4V/qslrNu/ArwKwVSm1XSl1HMAiAD2yeD2KBubVbztS9DG3/qoyr9lM4GndaisiJSKyRkTWZHEtCg/zGl/V5pZ59cuZWTw3rVttlVKzkDh6SERO66fIYV7jq9rcMq9+yeYdeFRvtaXsMK/xxdzGTDYTeFRvtaXsMK/xxdzGTMYlFKXUCRG5C0AZKj/dnqOU2hjYyCgvmNf4Ym7jJ9Rb6VlTiw6lVFX10Iwwr5GyVinVPogXYl4jpcq88k5MIiJPcQInIvIUJ3AiIk9xAici8hQncCIiT3ECJyLyVDa30lMN9e7dW8eLFi2y+iqP3Kt0+PBhq+9HP/qRjj/88EOrb8+ePcENkChmGjVqZLVnzpxptXv27Klj82cQAMrLy3XcuXNnq+/AgQPBDDBLfAdOROQpTuBERJ7iBE5E5CneSp9DN9xwg9WeN2+ejs8555yMXnPEiBFWe+rUqRm9TiHeSt+uXTurbeZn5MiRVt9tt91mtVeuXKnjXbt2IcIK/lb6yy67TMcvvvii1XfJJZdYbXP+c2vgZt/y5cutvm7dumU9zhrirfRERHHCCZyIyFNcRpilpUuXWm3z165vf/vbVl+mZROynX322VZ7ypQpOv7qV7+a9HmtW7e22i1atNDxqVOnrL45c+ZY7V69euk44iWUguMuFdy0aZOO3RLxf/7zH6v97LPP6njz5s1WX2lpqY6/9a1vWX1mKWbnzp01HHFw+A6ciMhTnMCJiDzFCZyIyFOsgVfh3HPPtdpNmzZN+tjrr7/eaud6WebYsWOt9quvvqrjNWvW5PTa+WTWvefPn2/13XjjjWEPJxD9+/fX8bp166y+DRs2hD0cb91///1W2/wZdH8ezf/ngF0Dd5mfi0yYMMHqGzx4sI5Hjx6d/mADxnfgRESe4gROROQpllCq8P3vf99qL1myJE8jOZ27FPHMM+OZwrp161rtRx55RMdRLpnUr1/fas+YMUPHzZs3t/ratm2r471791p9+/bt0/G4ceOsvpdeeinrcfqua9euOh46dKjVZy4VrEnJxDVp0iQdt2nTxupzS175wnfgRESe4gROROQpTuBERJ6KZwE1A+bSwSFDhuRxJKk9/fTTVnv79u15Gklu9ejRw2oPHDgwo9f56KOPdPz4448nfZy7HPQb3/hG2tfo16+fjvv27Wv1/fCHP0zrNczb+t32+eefn/ZY4srcYRCwlw66SwUPHjyo46BOzjFzHCV8B05E5KlqJ3ARmSMi+0Vkg/G9hiKyXES2JL42yO0wKWjMa3wxt4Wj2gMdROT/ABwFsEApdXniew8COKSUmiwipQAaKKVGpHqdxPPyukF8nTp1dDxx4kSrz/wVOtWOdi53E/hPP/1Ux+7OZ7Vq1dJxgwbJf37cnfHMHQ9LSkqsvkOHDqU9Vsc1iHBeb7nlFqu9YMGCtJ7nHlr70EMP6dg9ENpUVFRktd2lo+aBD25JY+7cuTquyY6TGzdu1PHx48etPjPPW7dutfqOHDmS6mXXArgXAeQ23z+vpieeeMJqm38/3DnM/Jl0+15//XWrfe+99+p47dq1WY8zhzI70EEptQqAO0v0APDF/czzAfTMdnQULuY1vpjbwpHph5hNlFIVAKCUqhCRxskeKCIlAEqS9VOkMK/xlVZumVe/5HwVilJqFoBZQLR+JaPsMK/xxLz6JdMJfJ+IFCX+JS8CsD/IQQXF3VXQrHsHtVTQPZHHvFXXrduaS6HM+qfLrWubp8HkmBd5TcVcQgakrnubKioqrPbVV19ttc1Djvv06WP1pVv3fuGFF6z2rbfequN///vfab1GFrzOrbuMMNWOg8keBwCdOnWy2n/72990PG3aNKvP/ZwsijJdRrgMwIBEPADA0hSPJX8wr/HF3MZQOssIFwJ4E0ArEdktIgMBTAbQRUS2AOiSaJNHmNf4Ym4LR7XLCAO9WMg1tT/+8Y9W+6677sr6NadPn26177777rSfm24JxS0DNGnSJO1rpEspJdU/Kj25yOuXvvQlq71s2TIdu7tFmsxlnIB9B51b7jK5v6L37NnTao8ZM0bH7qHKJveAW7Pc4ub86NGjSV8nC1UuN8tEvmvg99xzj46nTp1q9ZlLBd27JM3dGt0ypntnrDn//etf/7L6unXrpuMILDHMbBkhERFFEydwIiJPcQInIvJU7HYjnDz5f5/N/OxnPwvkNc2696hRowJ5TUrt2LFjVtvMQaoa+Je//GWr/dhjj+n4s88+s/rKysp0/Ktf/crqu/3229Meq7kE0D0tKM4HTedaq1atdOx+Vvf+++/r2D1lx9zCwqxjA8ADDzxgtc1dDd0tEswlhp07d056/XziO3AiIk9xAici8pSXJZR69erp2CyZAJmXTU6cOKHjhx9+2Oozf83KxpVXXqljdxdDs71hwwaQzVwCaO4gB9gHHrvMX4vNpYgAcPLkSR1nczj0TTfdpGOWTHLD/Xkxlwi7u36mMnr0aKttltXc8kqjRo10fMMNN1h9UblLk+/AiYg8xQmciMhTnMCJiDzlZQ38iiuu0PGdd95p9WW6NcCjjz6q46Bq3m7dbPbs2Tp2x2m2f/7znwdy/Tgx//88//zzVp+55NDdUc6sbbt17kzr3itWrLDa69evz+h1KLXWrVvr2P152bRpUyDXMGvZbdu2tfrM7RRKS0utvsWLF+s4n0sK+Q6ciMhTnMCJiDzFCZyIyFNe1sDdtd+Z+NOf/mS1ze1Cg+Juc1mTE8spuW3btiVt79271+ozb4l3t4jNlLudbDZryCm57373uzqu7nT5IPzmN7+x2ubWs+4WDVH5WeY7cCIiT3ECJyLylJe/+3Xs2FHHNVk2+OSTT+rYXRb03//+N6OxuCe53HzzzTp2T/9I1yWXXGK1P/jgg4xepxC5SwzNkkpQJRT3YNwOHTro2N0ZjzKX6uBi8+cuqGV87uv87ne/0/GECROsvp/85Cc6XrduXSDXzwTfgRMReYoTOBGRpziBExF5yssaeLoOHz5stVeuXKnjVDXv5s2bW+3rrrsu6WPNE1+A9Gvybl37H//4h467d+9u9b388stpvSbZ9WgA6N27d86vaS5JZQ08OOZSQfdzB3Obilxt7WqeUu9uZ3vBBRfk5Jo1xXfgRESe4gROROQpL0so5q8zqUoW7q89Z5zxv3+v3ENrzaWJ7jK+a6+9Nu1rpFtCMUsmAFBSUpLW8yg1t9w1dOjQtJ43fvx4q3306FGr/eCDDyZ9bu3atXXcpEkTq2/fvn1pXZ9OV15ermPz5xMABg0apOO//OUvVt/BgwcDub65VDHVocr5xHfgRESeqnYCF5FmIvKKiJSLyEYRGZr4fkMRWS4iWxJfG+R+uBQU5jW2ajOvhSOdd+AnAAxXSrUG0AHAEBFpA6AUwAqlVEsAKxJt8gfzGl/Ma4GotgaulKoAUJGIPxGRcgBNAfQA0DnxsPkAXgUwIiejPH1MaT2ufv36Vts9bT7VY4MYy5EjR6y2WZ/dvXt3RtcLShTzmk87d+602j/4wQ/Sfm6dOnV0fPnll1t9eaiBf66UWgf4n9cDBw7o2P2sqbi4WMevvfaa1WcuMaxJrbpr165W2/z8xD35vqysLO3XzaUafYgpIsUA2gJ4C0CTxCQApVSFiDRO8pwSAPyELsKY13hiXuMv7QlcROoCWAxgmFLqiPsvYjJKqVkAZiVeI7MDKylnmNd4Yl4LQ1oTuIjURuVfhqeUUksS394nIkWJf82LAOzP1SCDkmmZJJVDhw5Z7VWrVunY3M0MANauXRv49bMRl7wGYcqUKVa7YcOGaT938ODBOnYPPM6HuOR10qRJOjYPOAbs3QBbtWpl9a1evVrH/fv3t/rcO2XN15k5c6bVZ5ZH3ed5s4xQKv/png2gXCn1iNG1DMCARDwAwNLgh0e5wrzGGvNaINJ5B94RQD8A74nI+sT3RgKYDOBpERkIYCeAG3MyQsoV5jWe6oJ5LRjprEJ5HUCyAlryWxQp0pjX2DqqlGJeC4TU5ESbrC8W0Ici06ZN0/EvfvGLIF4ybdOnT7fab775ptVeuHBhmMPJWIof8hqL0odd7du3t9rLli3TsXube6beeOMNq92nTx8d79mzJ5BrZGGtUqp99Q+rXpTy6p58Ze5A6J60ZG6ZcerUqaR9br+7VNCse7u19DyoMq+8lZ6IyFOcwImIPOVlCaVevXo6btOmjdVnLgdzN4FPpW/fvjp278ozbdy40Wp/8sknaV8jSuJaQnGZd0Y+99xzVl+LFi3Sfh1zCah5ODZgH+gQAbEsobjMQ1fcA8rNnT3d+c1dD79p0yYdjxkzxuqL2OEcLKEQEcUJJ3AiIk9xAici8pSXNXDKXqHUwE3t2rWz2uZh0e42C2+//bbVNg9H3rVrVw5GF5iCqIEXINbAiYjihBM4EZGnvDzUmCgT69ats9o12XGQKIr4DpyIyFOcwImIPMUJnIjIU5zAiYg8xQmciMhTnMCJiDzFCZyIyFOcwImIPMUJnIjIU5zAiYg8Ffat9AcB7ABwQSKOgkIcS/PqH1IjzGtqYY4lyNwyr6nlPa+hbierLyqyJqgtL7PFsQQnSuPnWIITpfFzLDaWUIiIPMUJnIjIU/mawGfl6bpV4ViCE6XxcyzBidL4ORZDXmrgRESUPZZQiIg8xQmciMhToU7gItJVRDaLyFYRKQ3z2onrzxGR/SKywfheQxFZLiJbEl8bhDCOZiLyioiUi8hGERmar7EEgXm1xhKb3DKv1lgimdfQJnARqQVgOoBuANoA6CsibcK6fsI8AF2d75UCWKGUaglgRaKdaycADFdKtQbQAcCQxP+LfIwlK8zraWKRW+b1NNHMq1IqlD8ArgZQZrTvB3B/WNc3rlsMYIPR3gygKBEXAdichzEtBdAlCmNhXplb5tWfvIZZQmkKYJfR3p34Xr41UUpVAEDia+MwLy4ixQDaAngr32PJEPOahOe5ZV6TiFJew5zApYrvFfQaRhGpC2AxgGFKqSP5Hk+GmNcqxCC3zGsVopbXMCfw3QCaGe2LAewN8frJ7BORIgBIfN0fxkVFpDYq/yI8pZRaks+xZIl5dcQkt8yrI4p5DXMCXw2gpYi0EJGzAPQBsCzE6yezDMCARDwAlbWtnBIRATAbQLlS6pF8jiUAzKshRrllXg2RzWvIhf/uAD4AsA3AqDx88LAQQAWAz1H5DmMggPNR+enxlsTXhiGMoxMqfx19F8D6xJ/u+RgL88rcMq/+5pW30hMReYp3YhIReYoTOBGRpziBExF5ihM4EZGnOIETEXmKEzgRkac4gRMReer/AexO0x5Zn7nQAAAAAElFTkSuQmCC\n",
 290 |       "text/plain": [
 291 |        "<Figure size 432x288 with 6 Axes>"
 292 |       ]
 293 |      },
 294 |      "metadata": {
 295 |       "needs_background": "light"
 296 |      },
 297 |      "output_type": "display_data"
 298 |     }
 299 |    ],
 300 |    "source": [
 301 |     "# images[0][0].shape      # the second index '0' accesses the 1st channel\n",
 302 |     "# torch.Size([28, 28])\n",
 303 |     "\n",
 304 |     "# Visualize the digits-\n",
 305 |     "for i in range(6):\n",
 306 |     "    plt.subplot(2, 3, i + 1)    # 2 rows & 3 columns\n",
 307 |     "    plt.imshow(images[i][0], cmap = 'gray')     # '0' to access first channel\n",
 308 |     "    # plt.imshow(img_samples[i][0], cmap='gray')  # '0' to access first channel\n",
 309 |     "plt.show()"
 310 |    ]
 311 |   },
 312 |   {
 313 |    "cell_type": "code",
 314 |    "execution_count": null,
 315 |    "id": "separated-lewis",
 316 |    "metadata": {},
 317 |    "outputs": [],
 318 |    "source": []
 319 |   },
 320 |   {
 321 |    "cell_type": "code",
 322 |    "execution_count": 15,
 323 |    "id": "filled-magic",
 324 |    "metadata": {},
 325 |    "outputs": [
 326 |     {
 327 |      "data": {
 328 |       "text/plain": [
 329 |        "(tensor(-0.4242), tensor(2.8215))"
 330 |       ]
 331 |      },
 332 |      "execution_count": 15,
 333 |      "metadata": {},
 334 |      "output_type": "execute_result"
 335 |     }
 336 |    ],
 337 |    "source": [
 338 |     "# Sanity check- Check whether transformations have been applied.\n",
 339 |     "# Look at first image out of 32 images-\n",
 340 |     "img_samples[0, :, :, :].min(), img_samples[0, :, :, :].max()"
 341 |    ]
 342 |   },
 343 |   {
 344 |    "cell_type": "code",
 345 |    "execution_count": null,
 346 |    "id": "suburban-vulnerability",
 347 |    "metadata": {},
 348 |    "outputs": [],
 349 |    "source": []
 350 |   },
 351 |   {
 352 |    "cell_type": "code",
 353 |    "execution_count": null,
 354 |    "id": "confused-unemployment",
 355 |    "metadata": {},
 356 |    "outputs": [],
 357 |    "source": []
 358 |   },
 359 |   {
 360 |    "cell_type": "code",
 361 |    "execution_count": 16,
 362 |    "id": "guided-distribution",
 363 |    "metadata": {},
 364 |    "outputs": [],
 365 |    "source": [
 366 |     "class LeNet300(nn.Module):\n",
 367 |     "    def __init__(self):\n",
 368 |     "        super(LeNet300, self).__init__()\n",
 369 |     "        \n",
 370 |     "        # Define layers-\n",
 371 |     "        self.fc1 = nn.Linear(in_features = input_size, out_features = 300)\n",
 372 |     "        self.fc2 = nn.Linear(in_features = 300, out_features = 100)\n",
 373 |     "        self.output = nn.Linear(in_features = 100, out_features = 10)\n",
 374 |     "        \n",
 375 |     "        self.weights_initialization()\n",
 376 |     "    \n",
 377 |     "    \n",
 378 |     "    def forward(self, x):\n",
 379 |     "        out = F.relu(self.fc1(x))\n",
 380 |     "        out = F.relu(self.fc2(out))\n",
 381 |     "        return self.output(out)\n",
 382 |     "    \n",
 383 |     "    \n",
 384 |     "    def weights_initialization(self):\n",
 385 |     "        '''\n",
 386 |     "        When we define all the modules such as the layers in '__init__()'\n",
 387 |     "        method above, these are all stored in 'self.modules()'.\n",
 388 |     "        We go through each module one by one. This is the entire network,\n",
 389 |     "        basically.\n",
 390 |     "        '''\n",
 391 |     "        for m in self.modules():\n",
 392 |     "            if isinstance(m, nn.Linear):\n",
 393 |     "                nn.init.xavier_normal_(m.weight)\n",
 394 |     "                nn.init.constant_(m.bias, 0)\n"
 395 |    ]
 396 |   },
 397 |   {
 398 |    "cell_type": "code",
 399 |    "execution_count": null,
 400 |    "id": "weighted-genius",
 401 |    "metadata": {},
 402 |    "outputs": [],
 403 |    "source": []
 404 |   },
 405 |   {
 406 |    "cell_type": "code",
 407 |    "execution_count": 17,
 408 |    "id": "twenty-ecology",
 409 |    "metadata": {},
 410 |    "outputs": [],
 411 |    "source": [
 412 |     "# Initialize an instance of LeNet-300-100 dense neural network-\n",
 413 |     "model = LeNet300()"
 414 |    ]
 415 |   },
 416 |   {
 417 |    "cell_type": "code",
 418 |    "execution_count": null,
 419 |    "id": "solar-virtue",
 420 |    "metadata": {},
 421 |    "outputs": [],
 422 |    "source": []
 423 |   },
 424 |   {
 425 |    "cell_type": "code",
 426 |    "execution_count": 18,
 427 |    "id": "dated-mixture",
 428 |    "metadata": {},
 429 |    "outputs": [
 430 |     {
 431 |      "name": "stdout",
 432 |      "output_type": "stream",
 433 |      "text": [
 434 |       "layer name: fc1.weight, has shape: torch.Size([300, 784])\n",
 435 |       "bias layer name: fc1.bias has shape: torch.Size([300])\n",
 436 |       "layer name: fc2.weight, has shape: torch.Size([100, 300])\n",
 437 |       "bias layer name: fc2.bias has shape: torch.Size([100])\n",
 438 |       "layer name: output.weight, has shape: torch.Size([10, 100])\n",
 439 |       "bias layer name: output.bias has shape: torch.Size([10])\n"
 440 |      ]
 441 |     }
 442 |    ],
 443 |    "source": [
 444 |     "for name, param in model.named_parameters():\n",
 445 |     "    # We do not prune bias term\n",
 446 |     "    if 'weight' in name:\n",
 447 |     "        print(f\"layer name: {name}, has shape: {param.shape}\")\n",
 448 |     "    elif 'bias' in name:\n",
 449 |     "        print(f\"bias layer name: {name} has shape: {param.shape}\")"
 450 |    ]
 451 |   },
 452 |   {
 453 |    "cell_type": "code",
 454 |    "execution_count": null,
 455 |    "id": "conceptual-mechanics",
 456 |    "metadata": {},
 457 |    "outputs": [],
 458 |    "source": []
 459 |   },
 460 |   {
 461 |    "cell_type": "code",
 462 |    "execution_count": 19,
 463 |    "id": "buried-worst",
 464 |    "metadata": {},
 465 |    "outputs": [],
 466 |    "source": [
 467 |     "# Define loss and optimizer-\n",
 468 |     "loss = nn.CrossEntropyLoss()    # applies softmax for us\n",
 469 |     "optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)"
 470 |    ]
 471 |   },
 472 |   {
 473 |    "cell_type": "code",
 474 |    "execution_count": null,
 475 |    "id": "competitive-homeless",
 476 |    "metadata": {},
 477 |    "outputs": [],
 478 |    "source": [
 479 |     "# Print optimizer's state_dict\n",
 480 |     "print(\"Optimizer's 'state_dict':\")\n",
 481 |     "for var_name in optimizer.state_dict():\n",
 482 |     "    print(f\"var_name: {var_name} \\t {optimizer.state_dict()[var_name]}\")"
 483 |    ]
 484 |   },
 485 |   {
 486 |    "cell_type": "code",
 487 |    "execution_count": null,
 488 |    "id": "registered-container",
 489 |    "metadata": {},
 490 |    "outputs": [],
 491 |    "source": []
 492 |   },
 493 |   {
 494 |    "cell_type": "code",
 495 |    "execution_count": 20,
 496 |    "id": "explicit-there",
 497 |    "metadata": {},
 498 |    "outputs": [
 499 |     {
 500 |      "name": "stdout",
 501 |      "output_type": "stream",
 502 |      "text": [
 503 |       "num_steps = 1875 & len(train_dataset)/batch_size = 1875.0\n",
 504 |       "number of training steps in one epoch = 1875\n"
 505 |      ]
 506 |     }
 507 |    ],
 508 |    "source": [
 509 |     "# Training loop-\n",
 510 |     "num_steps = len(train_loader)\n",
 511 |     "\n",
 512 |     "print(f\"num_steps = {num_steps} & len(train_dataset)/batch_size = {len(train_dataset) / batch_size}\")\n",
 513 |     "print(f\"number of training steps in one epoch = {num_steps}\")"
 514 |    ]
 515 |   },
 516 |   {
 517 |    "cell_type": "code",
 518 |    "execution_count": null,
 519 |    "id": "auburn-ukraine",
 520 |    "metadata": {},
 521 |    "outputs": [],
 522 |    "source": []
 523 |   },
 524 |   {
 525 |    "cell_type": "code",
 526 |    "execution_count": 25,
 527 |    "id": "objective-pearl",
 528 |    "metadata": {},
 529 |    "outputs": [],
 530 |    "source": [
 531 |     "def count_params(model):\n",
 532 |     "    \n",
 533 |     "    tot_params = 0\n",
 534 |     "    for layer_name, param in model.named_parameters():\n",
 535 |     "        # print(f\"{layer_name}.shape = {param.shape} has {torch.count_nonzero(param.data)} non-zero params\")\n",
 536 |     "        tot_params += torch.count_nonzero(param.data)\n",
 537 |     "    \n",
 538 |     "    return tot_params"
 539 |    ]
 540 |   },
 541 |   {
 542 |    "cell_type": "code",
 543 |    "execution_count": null,
 544 |    "id": "macro-titanium",
 545 |    "metadata": {},
 546 |    "outputs": [],
 547 |    "source": []
 548 |   },
 549 |   {
 550 |    "cell_type": "code",
 551 |    "execution_count": null,
 552 |    "id": "minute-evolution",
 553 |    "metadata": {},
 554 |    "outputs": [],
 555 |    "source": []
 556 |   },
 557 |   {
 558 |    "cell_type": "code",
 559 |    "execution_count": null,
 560 |    "id": "southeast-diesel",
 561 |    "metadata": {},
 562 |    "outputs": [],
 563 |    "source": []
 564 |   },
 565 |   {
 566 |    "cell_type": "code",
 567 |    "execution_count": null,
 568 |    "id": "worldwide-correction",
 569 |    "metadata": {},
 570 |    "outputs": [],
 571 |    "source": []
 572 |   },
 573 |   {
 574 |    "cell_type": "markdown",
 575 |    "id": "leading-paintball",
 576 |    "metadata": {},
 577 |    "source": [
 578 |     "### Train defined model:"
 579 |    ]
 580 |   },
 581 |   {
 582 |    "cell_type": "code",
 583 |    "execution_count": 21,
 584 |    "id": "stuffed-andrews",
 585 |    "metadata": {},
 586 |    "outputs": [],
 587 |    "source": [
 588 |     "# User input parameters for Early Stopping in manual implementation-\n",
 589 |     "minimum_delta = 0.001\n",
 590 |     "patience = 5\n",
 591 |     "\n",
 592 |     "# Initialize parameters for Early Stopping manual implementation-\n",
 593 |     "best_val_loss = 100\n",
 594 |     "loc_patience = 0"
 595 |    ]
 596 |   },
 597 |   {
 598 |    "cell_type": "code",
 599 |    "execution_count": 22,
 600 |    "id": "happy-istanbul",
 601 |    "metadata": {},
 602 |    "outputs": [],
 603 |    "source": [
 604 |     "# Python3 lists to store model training metrics-\n",
 605 |     "training_acc = []\n",
 606 |     "validation_acc = []\n",
 607 |     "training_loss = []\n",
 608 |     "validation_loss = []"
 609 |    ]
 610 |   },
 611 |   {
 612 |    "cell_type": "code",
 613 |    "execution_count": null,
 614 |    "id": "conservative-workshop",
 615 |    "metadata": {},
 616 |    "outputs": [],
 617 |    "source": []
 618 |   },
 619 |   {
 620 |    "cell_type": "code",
 621 |    "execution_count": 23,
 622 |    "id": "foreign-cooperation",
 623 |    "metadata": {},
 624 |    "outputs": [],
 625 |    "source": [
 626 |     "def train_with_grad_freezing(model, epoch):\n",
 627 |     "    '''\n",
 628 |     "    Function to train one epoch of training dataset.\n",
 629 |     "    '''\n",
 630 |     "        \n",
 631 |     "    running_loss = 0.0\n",
 632 |     "    running_corrects = 0.0\n",
 633 |     "        \n",
 634 |     "    for batch, (images, labels) in enumerate(train_loader):\n",
 635 |     "        # Reshape images first-\n",
 636 |     "        # 32, 1, 28, 28\n",
 637 |     "        # Input size needs to be 32, 784-\n",
 638 |     "        images = images.reshape(-1, 28 * 28 * 1).to(device)\n",
 639 |     "        # Tries to push to GPU if available\n",
 640 |     "        labels = labels.to(device)\n",
 641 |     "        # images, labels = images.reshape(-1, 28 * 28 * 1).to(device), labels.to(device)\n",
 642 |     "        \n",
 643 |     "        # Set defined model to training mode-\n",
 644 |     "        model.train()\n",
 645 |     "        \n",
 646 |     "        # Backward pass-\n",
 647 |     "        optimizer.zero_grad()   # empty accumulated gradients\n",
 648 |     "\n",
 649 |     "        # Forward pass-\n",
 650 |     "        outputs = model(images)\n",
 651 |     "\n",
 652 |     "        # Compute loss-\n",
 653 |     "        J = loss(outputs, labels)\n",
 654 |     "        \n",
 655 |     "        # Perform backpropagation-\n",
 656 |     "        J.backward()\n",
 657 |     "        \n",
 658 |     "        # Freezing Pruned weights by making their gradients Zero\n",
 659 |     "        for layer_name, param in model.named_parameters():\n",
 660 |     "            if 'weight' in layer_name:\n",
 661 |     "                tensor = param.data.cpu().numpy()\n",
 662 |     "                grad_tensor = param.grad.data.cpu().numpy()\n",
 663 |     "                # grad_tensor = np.where(tensor < EPS, 0, grad_tensor)\n",
 664 |     "                grad_tensor = np.where(tensor == 0, 0, grad_tensor)\n",
 665 |     "                param.grad.data = torch.from_numpy(grad_tensor).to(device)\n",
 666 |     "\n",
 667 |     "        # Update parameters-\n",
 668 |     "        optimizer.step()\n",
 669 |     "        \n",
 670 |     "        # Compute model's performance statistics-\n",
 671 |     "        running_loss += J.item() * images.size(0)\n",
 672 |     "        _, predicted = torch.max(outputs, 1)\n",
 673 |     "        running_corrects += torch.sum(predicted == labels.data)\n",
 674 |     "\n",
 675 |     "        '''\n",
 676 |     "        # Print information every 100 steps-\n",
 677 |     "        if (batch + 1) % 100 == 0:\n",
 678 |     "            print(f\"epoch {epoch + 1}/{num_epochs}, step {batch + 1}/{num_steps}, loss = {J.item():.4f}\")\n",
 679 |     "        '''\n",
 680 |     "        \n",
 681 |     "    epoch_loss = running_loss / len(train_dataset)\n",
 682 |     "    epoch_acc = running_corrects.double() / len(train_dataset)\n",
 683 |     "    \n",
 684 |     "    return epoch_loss, epoch_acc\n",
 685 |     " \n"
 686 |    ]
 687 |   },
 688 |   {
 689 |    "cell_type": "code",
 690 |    "execution_count": null,
 691 |    "id": "killing-surveillance",
 692 |    "metadata": {},
 693 |    "outputs": [],
 694 |    "source": []
 695 |   },
 696 |   {
 697 |    "cell_type": "code",
 698 |    "execution_count": 24,
 699 |    "id": "spread-lodge",
 700 |    "metadata": {},
 701 |    "outputs": [],
 702 |    "source": [
 703 |     "def test(model, epoch):\n",
 704 |     "    '''\n",
 705 |     "    Function to validate performance of trained 'model' on testing set.\n",
 706 |     "    '''\n",
 707 |     "    \n",
 708 |     "    running_loss_val = 0.0\n",
 709 |     "\n",
 710 |     "    total = 0\n",
 711 |     "    correct = 0\n",
 712 |     "    \n",
 713 |     "    with torch.no_grad():\n",
 714 |     "        for images, labels in test_loader:\n",
 715 |     "\n",
 716 |     "            # Place features (images) and targets (labels) to GPU-\n",
 717 |     "            # images = images.to(device)\n",
 718 |     "            images = images.reshape(-1, 28 * 28 * 1).to(device)\n",
 719 |     "            labels = labels.to(device)\n",
 720 |     "            # images, labels = images.reshape(-1, 28 * 28 * 1).to(device), targets.to(device)\n",
 721 |     "            # print(f\"images.shape = {images.shape}, labels.shape = {labels.shape}\")\n",
 722 |     "            \n",
 723 |     "            # Set model to evaluation mode-\n",
 724 |     "            model.eval()\n",
 725 |     "    \n",
 726 |     "            # Make predictions using trained model-\n",
 727 |     "            outputs = model(images)\n",
 728 |     "            _, y_pred = torch.max(outputs, 1)\n",
 729 |     "\n",
 730 |     "            # Compute validation loss-\n",
 731 |     "            J_val = loss(outputs, labels)\n",
 732 |     "\n",
 733 |     "            running_loss_val += J_val.item() * labels.size(0)\n",
 734 |     "    \n",
 735 |     "            # Total number of labels-\n",
 736 |     "            total += labels.size(0)\n",
 737 |     "\n",
 738 |     "            # Total number of correct predictions-\n",
 739 |     "            correct += (y_pred == labels).sum()\n",
 740 |     "\n",
 741 |     "    epoch_val_loss = running_loss_val / len(test_dataset)\n",
 742 |     "    val_acc = 100 * (correct / total)\n",
 743 |     "    \n",
 744 |     "    return epoch_val_loss, val_acc\n",
 745 |     "\n",
 746 |     "    "
 747 |    ]
 748 |   },
 749 |   {
 750 |    "cell_type": "code",
 751 |    "execution_count": null,
 752 |    "id": "authentic-vision",
 753 |    "metadata": {},
 754 |    "outputs": [],
 755 |    "source": []
 756 |   },
 757 |   {
 758 |    "cell_type": "code",
 759 |    "execution_count": null,
 760 |    "id": "arbitrary-heritage",
 761 |    "metadata": {},
 762 |    "outputs": [],
 763 |    "source": []
 764 |   },
 765 |   {
 766 |    "cell_type": "code",
 767 |    "execution_count": 26,
 768 |    "id": "collectible-swing",
 769 |    "metadata": {},
 770 |    "outputs": [
 771 |     {
 772 |      "name": "stdout",
 773 |      "output_type": "stream",
 774 |      "text": [
 775 |       "\n",
 776 |       "epoch: 1, # of params = 266610, training loss = 0.2036, training accuracy = 93.77%, val_loss = 0.1190 & val_accuracy = 96.21%\n",
 777 |       "\n",
 778 |       "\n",
 779 |       "Saving model with lowest val_loss = 0.1190\n",
 780 |       "\n",
 781 |       "epoch: 2, # of params = 266610, training loss = 0.0968, training accuracy = 97.04%, val_loss = 0.1003 & val_accuracy = 96.91%\n",
 782 |       "\n",
 783 |       "\n",
 784 |       "Saving model with lowest val_loss = 0.1003\n",
 785 |       "\n",
 786 |       "epoch: 3, # of params = 266610, training loss = 0.0738, training accuracy = 97.65%, val_loss = 0.1087 & val_accuracy = 96.67%\n",
 787 |       "\n",
 788 |       "\n",
 789 |       "epoch: 4, # of params = 266610, training loss = 0.0608, training accuracy = 98.06%, val_loss = 0.0971 & val_accuracy = 97.31%\n",
 790 |       "\n",
 791 |       "\n",
 792 |       "Saving model with lowest val_loss = 0.0971\n",
 793 |       "\n",
 794 |       "epoch: 5, # of params = 266610, training loss = 0.0523, training accuracy = 98.36%, val_loss = 0.0992 & val_accuracy = 97.24%\n",
 795 |       "\n",
 796 |       "\n",
 797 |       "epoch: 6, # of params = 266610, training loss = 0.0419, training accuracy = 98.65%, val_loss = 0.0917 & val_accuracy = 97.65%\n",
 798 |       "\n",
 799 |       "\n",
 800 |       "Saving model with lowest val_loss = 0.0917\n",
 801 |       "\n",
 802 |       "epoch: 7, # of params = 266610, training loss = 0.0387, training accuracy = 98.79%, val_loss = 0.0983 & val_accuracy = 97.59%\n",
 803 |       "\n",
 804 |       "\n",
 805 |       "epoch: 8, # of params = 266610, training loss = 0.0344, training accuracy = 98.92%, val_loss = 0.1141 & val_accuracy = 97.47%\n",
 806 |       "\n",
 807 |       "\n",
 808 |       "epoch: 9, # of params = 266610, training loss = 0.0332, training accuracy = 98.97%, val_loss = 0.1070 & val_accuracy = 97.84%\n",
 809 |       "\n",
 810 |       "\n",
 811 |       "epoch: 10, # of params = 266610, training loss = 0.0300, training accuracy = 99.11%, val_loss = 0.1006 & val_accuracy = 97.86%\n",
 812 |       "\n",
 813 |       "\n",
 814 |       "epoch: 11, # of params = 266610, training loss = 0.0295, training accuracy = 99.09%, val_loss = 0.1223 & val_accuracy = 97.52%\n",
 815 |       "\n",
 816 |       "\n",
 817 |       "\n",
 818 |       "Early stopping called. Exiting model training!\n",
 819 |       "\n",
 820 |       "\n"
 821 |      ]
 822 |     }
 823 |    ],
 824 |    "source": [
 825 |     "# Training loop-\n",
 826 |     "for curr_epoch in range(1, num_epochs):\n",
 827 |     "    \n",
 828 |     "    if loc_patience >= patience:\n",
 829 |     "        print(\"\\n\\nEarly stopping called. Exiting model training!\\n\\n\")\n",
 830 |     "        break\n",
 831 |     "        \n",
 832 |     "    # epoch_loss, epoch_acc = train(model = model, epoch = curr_epoch)\n",
 833 |     "    epoch_loss, epoch_acc = train_with_grad_freezing(model = model, epoch = curr_epoch)\n",
 834 |     "    epoch_val_loss, val_acc = test(model = model, epoch = curr_epoch)\n",
 835 |     "    \n",
 836 |     "    remaining_params = count_params(model)\n",
 837 |     "    # Pruned LeNet-300-100 model has 226730 trainable parameters\n",
 838 |     "    \n",
 839 |     "    print(f\"\\nepoch: {curr_epoch}, # of params = {remaining_params}, training loss = {epoch_loss:.4f}, training accuracy = {epoch_acc * 100:.2f}%, val_loss = {epoch_val_loss:.4f} & val_accuracy = {val_acc:.2f}%\\n\")    \n",
 840 |     "    # print(f\"\\nepoch: {curr_epoch} training loss = {epoch_loss:.4f}, training accuracy = {epoch_acc * 100:.2f}%, val_loss = {epoch_val_loss:.4f} & val_accuracy = {val_acc:.2f}%\\n\")\n",
 841 |     "\n",
 842 |     "    \n",
 843 |     "    # Code for manual Early Stopping:\n",
 844 |     "    # if np.abs(epoch_val_loss < best_val_loss) >= minimum_delta:\n",
 845 |     "    if (epoch_val_loss < best_val_loss) and np.abs(epoch_val_loss - best_val_loss) >= minimum_delta:\n",
 846 |     "        # print(f\"epoch_val_loss = {epoch_val_loss:.4f}, best_val_loss = {best_val_loss:.4f}\")\n",
 847 |     "        \n",
 848 |     "        # update 'best_val_loss' variable to lowest loss encountered so far-\n",
 849 |     "        best_val_loss = epoch_val_loss\n",
 850 |     "        \n",
 851 |     "        # reset 'loc_patience' variable-\n",
 852 |     "        loc_patience = 0\n",
 853 |     "        \n",
 854 |     "        print(f\"\\nSaving model with lowest val_loss = {epoch_val_loss:.4f}\")\n",
 855 |     "        \n",
 856 |     "        # Save trained model with validation accuracy-\n",
 857 |     "        # torch.save(model.state_dict, f\"LeNet-300-100_Trained_{val_acc}.pth\")\n",
 858 |     "        torch.save(model.state_dict(), \"LeNet-300-100_Trained.pth\")\n",
 859 |     "        \n",
 860 |     "    else:  # there is no improvement in monitored metric 'val_loss'\n",
 861 |     "        loc_patience += 1  # number of epochs without any improvement\n",
 862 |     "\n",
 863 |     "\n",
 864 |     "    training_acc.append(epoch_acc * 100)\n",
 865 |     "    validation_acc.append(val_acc)\n",
 866 |     "    training_loss.append(epoch_loss)\n",
 867 |     "    validation_loss.append(epoch_val_loss)\n",
 868 |     "    "
 869 |    ]
 870 |   },
 871 |   {
 872 |    "cell_type": "code",
 873 |    "execution_count": null,
 874 |    "id": "empirical-swift",
 875 |    "metadata": {},
 876 |    "outputs": [],
 877 |    "source": []
 878 |   },
 879 |   {
 880 |    "cell_type": "code",
 881 |    "execution_count": 28,
 882 |    "id": "declared-thursday",
 883 |    "metadata": {},
 884 |    "outputs": [],
 885 |    "source": [
 886 |     "os.chdir(\"/home/arjun/Deep_Learning_Resources/LTH-Resources/\")"
 887 |    ]
 888 |   },
 889 |   {
 890 |    "cell_type": "code",
 891 |    "execution_count": 29,
 892 |    "id": "published-compiler",
 893 |    "metadata": {},
 894 |    "outputs": [],
 895 |    "source": [
 896 |     "# Initialize a new model for best weights achieved during training-\n",
 897 |     "best_model = LeNet300()"
 898 |    ]
 899 |   },
 900 |   {
 901 |    "cell_type": "code",
 902 |    "execution_count": 30,
 903 |    "id": "preceding-receipt",
 904 |    "metadata": {},
 905 |    "outputs": [
 906 |     {
 907 |      "data": {
 908 |       "text/plain": [
 909 |        "<All keys matched successfully>"
 910 |       ]
 911 |      },
 912 |      "execution_count": 30,
 913 |      "metadata": {},
 914 |      "output_type": "execute_result"
 915 |     }
 916 |    ],
 917 |    "source": [
 918 |     "best_model.load_state_dict(torch.load('LeNet-300-100_Trained.pth'))"
 919 |    ]
 920 |   },
 921 |   {
 922 |    "cell_type": "code",
 923 |    "execution_count": null,
 924 |    "id": "indie-provision",
 925 |    "metadata": {},
 926 |    "outputs": [],
 927 |    "source": []
 928 |   },
 929 |   {
 930 |    "cell_type": "code",
 931 |    "execution_count": 31,
 932 |    "id": "exceptional-poverty",
 933 |    "metadata": {},
 934 |    "outputs": [],
 935 |    "source": [
 936 |     "# Compute trained model's metrics on validation data-\n",
 937 |     "val_loss, val_acc = test(model = best_model, epoch = 1)"
 938 |    ]
 939 |   },
 940 |   {
 941 |    "cell_type": "code",
 942 |    "execution_count": 32,
 943 |    "id": "precious-highlight",
 944 |    "metadata": {},
 945 |    "outputs": [
 946 |     {
 947 |      "name": "stdout",
 948 |      "output_type": "stream",
 949 |      "text": [
 950 |       "\n",
 951 |       "LeNet-300-100 trained model metrics:\n",
 952 |       "val_loss = 0.0917 & val_accuracy = 97.65%\n"
 953 |      ]
 954 |     }
 955 |    ],
 956 |    "source": [
 957 |     "print(\"\\nLeNet-300-100 trained model metrics:\")\n",
 958 |     "print(f\"val_loss = {val_loss:.4f} & val_accuracy = {val_acc:.2f}%\")"
 959 |    ]
 960 |   },
 961 |   {
 962 |    "cell_type": "code",
 963 |    "execution_count": null,
 964 |    "id": "infectious-syndication",
 965 |    "metadata": {},
 966 |    "outputs": [],
 967 |    "source": []
 968 |   },
 969 |   {
 970 |    "cell_type": "code",
 971 |    "execution_count": 33,
 972 |    "id": "korean-raising",
 973 |    "metadata": {},
 974 |    "outputs": [],
 975 |    "source": [
 976 |     "# Delete 'model' since it's performance degraded due to 'patience' which led to over-fitting-\n",
 977 |     "del model"
 978 |    ]
 979 |   },
 980 |   {
 981 |    "cell_type": "code",
 982 |    "execution_count": null,
 983 |    "id": "close-league",
 984 |    "metadata": {},
 985 |    "outputs": [],
 986 |    "source": []
 987 |   },
 988 |   {
 989 |    "cell_type": "code",
 990 |    "execution_count": null,
 991 |    "id": "empirical-renewal",
 992 |    "metadata": {},
 993 |    "outputs": [],
 994 |    "source": []
 995 |   },
 996 |   {
 997 |    "cell_type": "code",
 998 |    "execution_count": 34,
 999 |    "id": "incoming-projection",
1000 |    "metadata": {},
1001 |    "outputs": [],
1002 |    "source": [
1003 |     "def prune_lenet(model, pruning_params_fc, pruning_params_op):\n",
1004 |     "    '''\n",
1005 |     "    Function to prune top p% of trained weights using the provided parameters using\n",
1006 |     "    magnitude-based weight pruning.\n",
1007 |     "    \n",
1008 |     "    Inputs:\n",
1009 |     "    'model' is the PyTorch 1.7 defined neural network\n",
1010 |     "    'pruning_params_fc' is the percentage of weights to prune for dense, fully-connected layer\n",
1011 |     "    'pruning_params_op' is the percentage of weights to prune for output layer\n",
1012 |     "\n",
1013 |     "    Returns:\n",
1014 |     "    Python dict containing pruned layers\n",
1015 |     "    '''\n",
1016 |     "    \n",
1017 |     "    # Python3 dict to hold pruned weights-\n",
1018 |     "    pruned_d = {}\n",
1019 |     "    \n",
1020 |     "    # Sample code- populate each layer with relevant weights-\n",
1021 |     "    for layer_name, param in best_model.named_parameters():\n",
1022 |     "        # pruned_d[layer_name] = torch.zeros_like(param.data)\n",
1023 |     "        x = param.data.numpy()\n",
1024 |     "    \n",
1025 |     "        if len(x.shape) == 2 and x.shape[0] != 10:\n",
1026 |     "            # FC layer-\n",
1027 |     "            # print(layer_name, param.shape)\n",
1028 |     "    \n",
1029 |     "            # Compute absolute value of 'x'-\n",
1030 |     "            x_abs = np.abs(x)\n",
1031 |     "\n",
1032 |     "            # Mask values to zero which are less than 'p' in terms of magnitude-\n",
1033 |     "            x_abs[x_abs < np.percentile(x_abs, pruning_params_fc)] = 0\n",
1034 |     "\n",
1035 |     "            # Where 'x_abs' equals 0, keep 0, else, replace with values of 'x'-\n",
1036 |     "            # OR\n",
1037 |     "            # If x_abs == 0 (condition) is True, use the value of 0, otherwise\n",
1038 |     "            # use the value in 'x'\n",
1039 |     "            x_mod = np.where(x_abs == 0, 0, x)\n",
1040 |     "    \n",
1041 |     "            # Counts the number of non-zero values in the array 'x_mod'-\n",
1042 |     "            # np.count_nonzero(x_mod)\n",
1043 |     "    \n",
1044 |     "            # pruned_weights.append(x_mod)\n",
1045 |     "            pruned_d[layer_name] = torch.from_numpy(x_mod)\n",
1046 |     "    \n",
1047 |     "        elif len(x.shape) == 2 and x.shape[0] == 10:\n",
1048 |     "            # print(\"output layer\", param.shape)\n",
1049 |     "        \n",
1050 |     "            # Output layer-\n",
1051 |     "            # print(layer_name, param.shape)\n",
1052 |     "    \n",
1053 |     "            # Compute absolute value of 'x'-\n",
1054 |     "            x_abs = np.abs(x)\n",
1055 |     "\n",
1056 |     "            # Mask values to zero which are less than 'p' in terms of magnitude-\n",
1057 |     "            x_abs[x_abs < np.percentile(x_abs, pruning_params_op)] = 0\n",
1058 |     "\n",
1059 |     "            # Where 'x_abs' equals 0, keep 0, else, replace with values of 'x'-\n",
1060 |     "            # OR\n",
1061 |     "            # If x_abs == 0 (condition) is True, use the value of 0, otherwise\n",
1062 |     "            # use the value in 'x'\n",
1063 |     "            x_mod = np.where(x_abs == 0, 0, x)\n",
1064 |     "    \n",
1065 |     "            # Counts the number of non-zero values in the array 'x_mod'-\n",
1066 |     "            # np.count_nonzero(x_mod)\n",
1067 |     "    \n",
1068 |     "            # pruned_weights.append(x_mod)\n",
1069 |     "            pruned_d[layer_name] = torch.from_numpy(x_mod)\n",
1070 |     "    \n",
1071 |     "        else:\n",
1072 |     "            pruned_d[layer_name] = param.data\n",
1073 |     "\n",
1074 |     "    \n",
1075 |     "    return pruned_d\n",
1076 |     "    \n",
1077 |     "    "
1078 |    ]
1079 |   },
1080 |   {
1081 |    "cell_type": "code",
1082 |    "execution_count": null,
1083 |    "id": "psychological-deposit",
1084 |    "metadata": {},
1085 |    "outputs": [],
1086 |    "source": []
1087 |   },
1088 |   {
1089 |    "cell_type": "code",
1090 |    "execution_count": 43,
1091 |    "id": "periodic-commercial",
1092 |    "metadata": {},
1093 |    "outputs": [],
1094 |    "source": [
1095 |     "# Prune 15% of smallest magnitude weights in FC layers and 10% in output layer-\n",
1096 |     "pruned_d = prune_lenet(model = best_model, pruning_params_fc = 15, pruning_params_op = 10)"
1097 |    ]
1098 |   },
1099 |   {
1100 |    "cell_type": "code",
1101 |    "execution_count": 36,
1102 |    "id": "banner-narrative",
1103 |    "metadata": {},
1104 |    "outputs": [
1105 |     {
1106 |      "data": {
1107 |       "text/plain": [
1108 |        "dict"
1109 |       ]
1110 |      },
1111 |      "execution_count": 36,
1112 |      "metadata": {},
1113 |      "output_type": "execute_result"
1114 |     }
1115 |    ],
1116 |    "source": [
1117 |     "type(pruned_d)"
1118 |    ]
1119 |   },
1120 |   {
1121 |    "cell_type": "code",
1122 |    "execution_count": null,
1123 |    "id": "thorough-equation",
1124 |    "metadata": {},
1125 |    "outputs": [],
1126 |    "source": []
1127 |   },
1128 |   {
1129 |    "cell_type": "code",
1130 |    "execution_count": 44,
1131 |    "id": "humanitarian-scott",
1132 |    "metadata": {},
1133 |    "outputs": [
1134 |     {
1135 |      "data": {
1136 |       "text/plain": [
1137 |        "<All keys matched successfully>"
1138 |       ]
1139 |      },
1140 |      "execution_count": 44,
1141 |      "metadata": {},
1142 |      "output_type": "execute_result"
1143 |     }
1144 |    ],
1145 |    "source": [
1146 |     "# Initialize and load pruned Python3 dict into a new model-\n",
1147 |     "pruned_model = LeNet300()\n",
1148 |     "pruned_model.load_state_dict(pruned_d)"
1149 |    ]
1150 |   },
1151 |   {
1152 |    "cell_type": "code",
1153 |    "execution_count": null,
1154 |    "id": "accepting-ancient",
1155 |    "metadata": {},
1156 |    "outputs": [],
1157 |    "source": []
1158 |   },
1159 |   {
1160 |    "cell_type": "code",
1161 |    "execution_count": 45,
1162 |    "id": "handed-arthritis",
1163 |    "metadata": {},
1164 |    "outputs": [],
1165 |    "source": [
1166 |     "params_pruned = count_params(pruned_model)"
1167 |    ]
1168 |   },
1169 |   {
1170 |    "cell_type": "code",
1171 |    "execution_count": 46,
1172 |    "id": "innovative-wound",
1173 |    "metadata": {},
1174 |    "outputs": [
1175 |     {
1176 |      "name": "stdout",
1177 |      "output_type": "stream",
1178 |      "text": [
1179 |       "# of non-zero parameters in pruned model = 226730\n"
1180 |      ]
1181 |     }
1182 |    ],
1183 |    "source": [
1184 |     "print(f\"# of non-zero parameters in pruned model = {params_pruned.numpy()}\")"
1185 |    ]
1186 |   },
1187 |   {
1188 |    "cell_type": "code",
1189 |    "execution_count": null,
1190 |    "id": "conscious-space",
1191 |    "metadata": {},
1192 |    "outputs": [],
1193 |    "source": []
1194 |   },
1195 |   {
1196 |    "cell_type": "code",
1197 |    "execution_count": null,
1198 |    "id": "bridal-player",
1199 |    "metadata": {},
1200 |    "outputs": [],
1201 |    "source": []
1202 |   },
1203 |   {
1204 |    "cell_type": "code",
1205 |    "execution_count": null,
1206 |    "id": "arabic-algeria",
1207 |    "metadata": {},
1208 |    "outputs": [],
1209 |    "source": []
1210 |   },
1211 |   {
1212 |    "cell_type": "code",
1213 |    "execution_count": null,
1214 |    "id": "gorgeous-strand",
1215 |    "metadata": {},
1216 |    "outputs": [],
1217 |    "source": []
1218 |   },
1219 |   {
1220 |    "cell_type": "markdown",
1221 |    "id": "frank-convenience",
1222 |    "metadata": {},
1223 |    "source": [
1224 |     "### Re-train pruned model:"
1225 |    ]
1226 |   },
1227 |   {
1228 |    "cell_type": "code",
1229 |    "execution_count": 47,
1230 |    "id": "fiscal-medline",
1231 |    "metadata": {},
1232 |    "outputs": [],
1233 |    "source": [
1234 |     "# User input parameters for Early Stopping in manual implementation-\n",
1235 |     "minimum_delta = 0.001\n",
1236 |     "patience = 5\n",
1237 |     "\n",
1238 |     "# Initialize parameters for Early Stopping manual implementation-\n",
1239 |     "best_val_loss = 100\n",
1240 |     "loc_patience = 0"
1241 |    ]
1242 |   },
1243 |   {
1244 |    "cell_type": "code",
1245 |    "execution_count": 48,
1246 |    "id": "indian-keyboard",
1247 |    "metadata": {},
1248 |    "outputs": [],
1249 |    "source": [
1250 |     "# Python3 lists to store model training metrics-\n",
1251 |     "training_acc = []\n",
1252 |     "validation_acc = []\n",
1253 |     "training_loss = []\n",
1254 |     "validation_loss = []"
1255 |    ]
1256 |   },
1257 |   {
1258 |    "cell_type": "code",
1259 |    "execution_count": null,
1260 |    "id": "photographic-tutorial",
1261 |    "metadata": {},
1262 |    "outputs": [],
1263 |    "source": []
1264 |   },
1265 |   {
1266 |    "cell_type": "code",
1267 |    "execution_count": 49,
1268 |    "id": "lovely-playback",
1269 |    "metadata": {},
1270 |    "outputs": [
1271 |     {
1272 |      "name": "stdout",
1273 |      "output_type": "stream",
1274 |      "text": [
1275 |       "\n",
1276 |       "epoch: 1, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n",
1277 |       "\n",
1278 |       "\n",
1279 |       "Saving model with lowest val_loss = 0.0910\n",
1280 |       "\n",
1281 |       "epoch: 2, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n",
1282 |       "\n",
1283 |       "\n",
1284 |       "epoch: 3, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n",
1285 |       "\n",
1286 |       "\n",
1287 |       "epoch: 4, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n",
1288 |       "\n",
1289 |       "\n",
1290 |       "epoch: 5, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n",
1291 |       "\n",
1292 |       "\n",
1293 |       "epoch: 6, # of params = 226730, training loss = 0.0285, training accuracy = 99.04%, val_loss = 0.0910 & val_accuracy = 97.68%\n",
1294 |       "\n",
1295 |       "\n",
1296 |       "\n",
1297 |       "Early stopping called. Exiting model training!\n",
1298 |       "\n",
1299 |       "\n"
1300 |      ]
1301 |     }
1302 |    ],
1303 |    "source": [
1304 |     "# Training loop-\n",
1305 |     "for curr_epoch in range(1, num_epochs):\n",
1306 |     "    \n",
1307 |     "    if loc_patience >= patience:\n",
1308 |     "        print(\"\\n\\nEarly stopping called. Exiting model training!\\n\\n\")\n",
1309 |     "        break\n",
1310 |     "        \n",
1311 |     "    # epoch_loss, epoch_acc = train(model = model, epoch = curr_epoch)\n",
1312 |     "    epoch_loss, epoch_acc = train_with_grad_freezing(model = pruned_model, epoch = curr_epoch)\n",
1313 |     "    epoch_val_loss, val_acc = test(model = pruned_model, epoch = curr_epoch)\n",
1314 |     "    \n",
1315 |     "    remaining_params = count_params(pruned_model)\n",
1316 |     "    \n",
1317 |     "    print(f\"\\nepoch: {curr_epoch}, # of params = {remaining_params}, training loss = {epoch_loss:.4f}, training accuracy = {epoch_acc * 100:.2f}%, val_loss = {epoch_val_loss:.4f} & val_accuracy = {val_acc:.2f}%\\n\")    \n",
1318 |     "    # print(f\"\\nepoch: {curr_epoch} training loss = {epoch_loss:.4f}, training accuracy = {epoch_acc * 100:.2f}%, val_loss = {epoch_val_loss:.4f} & val_accuracy = {val_acc:.2f}%\\n\")\n",
1319 |     "\n",
1320 |     "    \n",
1321 |     "    # Code for manual Early Stopping:\n",
1322 |     "    # if np.abs(epoch_val_loss < best_val_loss) >= minimum_delta:\n",
1323 |     "    if (epoch_val_loss < best_val_loss) and np.abs(epoch_val_loss - best_val_loss) >= minimum_delta:\n",
1324 |     "        # print(f\"epoch_val_loss = {epoch_val_loss:.4f}, best_val_loss = {best_val_loss:.4f}\")\n",
1325 |     "        \n",
1326 |     "        # update 'best_val_loss' variable to lowest loss encountered so far-\n",
1327 |     "        best_val_loss = epoch_val_loss\n",
1328 |     "        \n",
1329 |     "        # reset 'loc_patience' variable-\n",
1330 |     "        loc_patience = 0\n",
1331 |     "        \n",
1332 |     "        print(f\"\\nSaving model with lowest val_loss = {epoch_val_loss:.4f}\")\n",
1333 |     "        \n",
1334 |     "        # Save trained model with validation accuracy-\n",
1335 |     "        # torch.save(model.state_dict, f\"LeNet-300-100_Trained_{val_acc}.pth\")\n",
1336 |     "        torch.save(pruned_model.state_dict(), \"LeNet-300-100_Test_Trained.pth\")\n",
1337 |     "        \n",
1338 |     "    else:  # there is no improvement in monitored metric 'val_loss'\n",
1339 |     "        loc_patience += 1  # number of epochs without any improvement\n",
1340 |     "\n",
1341 |     "\n",
1342 |     "    training_acc.append(epoch_acc * 100)\n",
1343 |     "    validation_acc.append(val_acc)\n",
1344 |     "    training_loss.append(epoch_loss)\n",
1345 |     "    validation_loss.append(epoch_val_loss)\n",
1346 |     "    "
1347 |    ]
1348 |   },
1349 |   {
1350 |    "cell_type": "code",
1351 |    "execution_count": null,
1352 |    "id": "exceptional-tissue",
1353 |    "metadata": {},
1354 |    "outputs": [],
1355 |    "source": []
1356 |   },
1357 |   {
1358 |    "cell_type": "code",
1359 |    "execution_count": null,
1360 |    "id": "beginning-singing",
1361 |    "metadata": {},
1362 |    "outputs": [],
1363 |    "source": []
1364 |   }
1365 |  ],
1366 |  "metadata": {
1367 |   "kernelspec": {
1368 |    "display_name": "Python 3",
1369 |    "language": "python",
1370 |    "name": "python3"
1371 |   },
1372 |   "language_info": {
1373 |    "codemirror_mode": {
1374 |     "name": "ipython",
1375 |     "version": 3
1376 |    },
1377 |    "file_extension": ".py",
1378 |    "mimetype": "text/x-python",
1379 |    "name": "python",
1380 |    "nbconvert_exporter": "python",
1381 |    "pygments_lexer": "ipython3",
1382 |    "version": "3.8.8"
1383 |   }
1384 |  },
1385 |  "nbformat": 4,
1386 |  "nbformat_minor": 5
1387 | }
1388 | 


--------------------------------------------------------------------------------