├── README.md
└── DNN.py


/README.md:
--------------------------------------------------------------------------------
1 | ## Image Classification on CIFAR-10 Dataset using Multi-Layer Perceptrons in Python from scratch.
2 | 
3 | #### CIFAR10 Dataset can be found here - https://www.cs.toronto.edu/~kriz/cifar.html
4 | #### Download the Python Batches Dataset and put it in %pwd/cifar-10-batches-py/  
5 | 
6 | Implemented a N layer Deep Neural Network with Backpropagation for CIFAR-10 Image Classification from scratch in Python, to get the basic understanding of the backpropagation and gradient descent algorithm. 
7 | 
8 | Warning: Due to lack of GPU support and no. of layers being > 1 the model trains very slow.
9 | 


--------------------------------------------------------------------------------
/DNN.py:
--------------------------------------------------------------------------------
  1 | import cPickle
  2 | import numpy as np
  3 | 
  4 | 
  5 | def unpickle(fileName):
  6 |     '''
  7 |     Description: retrieve data from CIFAR-10 Pickles
  8 |     Params: fileName = filename to unpickle
  9 |     Outputs: Unpickled Dict
 10 |     '''
 11 |     with open(fileName, 'rb') as f:
 12 |         dict = cPickle.load(f)
 13 |     return dict
 14 | 
 15 | 
 16 | def merge_batches(num_to_load=1):
 17 |     '''
 18 |     Description: Merge batches of CIFAR-10 data pickles
 19 |     Params: num_to_load = number of batches of CIFAR-10 to load and merge
 20 |     Outputs: merged features and labels from specified no. of batches of CIFAR-10
 21 |     '''
 22 |     for i in range(1):
 23 |         fileName = "cifar-10-batches-py/data_batch_" + str(i + 1)
 24 |         data = unpickle(fileName)
 25 |         if i == 0:
 26 |             features = data["data"]
 27 |             labels = np.array(data["labels"])
 28 |         else:
 29 |             features = np.append(features, data["data"], axis=0)
 30 |             labels = np.append(labels, data["labels"], axis=0)
 31 |     return features, labels
 32 | 
 33 | 
 34 | def one_hot_encode(data):
 35 |     '''
 36 |     Description: Encode Target Label IDs to one hot vector of size L where L is the
 37 |     number of unique labels
 38 |     Params: data = list of label IDs
 39 |     Outputs: List of One Hot Vectors
 40 |     '''
 41 |     one_hot = np.zeros((data.shape[0], 10))
 42 |     one_hot[np.arange(data.shape[0]), data] = 1
 43 |     return one_hot
 44 | 
 45 | 
 46 | def normalize(data):
 47 |     '''
 48 |     Description: Normalize Pixel values
 49 |     Params: list of Image Pixel Features
 50 |     Outputs: Normalized Image Pixel Features
 51 |     '''
 52 |     return data / 255.0
 53 | 
 54 | 
 55 | def preprocess(num_to_load=1):
 56 |     '''
 57 |     Description: helper function to load and preprocess CIFAR-10 training data batches
 58 |     Params: num_to_load = number of batches of CIFAR-10 to load and merge
 59 |     Outputs: Pre-processed CIFAR-10 image features and labels
 60 |     '''
 61 |     X, y = merge_batches(num_to_load=1)
 62 |     X = normalize(X)
 63 |     X = X.reshape(-1, 3072, 1)
 64 |     y = one_hot_encode(y)
 65 |     y = y.reshape(-1, 10, 1)
 66 |     return X, y
 67 | 
 68 | 
 69 | def dataset_split(X, y, ratio=0.8):
 70 |     '''
 71 |     Description: helper function to split training data into training and validation
 72 |     Params: X=image features
 73 |             y=labels
 74 |             ratio = ratio of training data from total data
 75 |     Outputs: training data (features and labels) and validation data
 76 |     '''
 77 |     split = int(ratio * X.shape[0])
 78 |     indices = np.random.permutation(X.shape[0])
 79 |     training_idx, val_idx = indices[:split], indices[split:]
 80 |     X_train, X_val = X[training_idx, :], X[val_idx, :]
 81 |     y_train, y_val = y[training_idx, :], y[val_idx, :]
 82 |     print "Records in Training Dataset", X_train.shape[0]
 83 |     print "Records in Validation Dataset", X_val.shape[0]
 84 |     return X_train, y_train, X_val, y_val
 85 | 
 86 | 
 87 | def sigmoid(out):
 88 |     '''
 89 |     Description: Sigmoid Activation
 90 |     Params: out = a list/matrix to perform the activation on
 91 |     Outputs: Sigmoid activated list/matrix
 92 |     '''
 93 |     return 1.0 / (1.0 + np.exp(-out))
 94 | 
 95 | 
 96 | def delta_sigmoid(out):
 97 |     '''
 98 |     Description: Derivative of Sigmoid Activation
 99 |     Params: out = a list/matrix to perform the activation on
100 |     Outputs: Delta(Sigmoid) activated list/matrix
101 |     '''
102 |     return sigmoid(out) * (1 - sigmoid(out))
103 | 	
104 | def SigmoidCrossEntropyLoss(a, y):
105 |         """
106 | 		Description: Calculate Sigmoid cross entropy loss
107 | 		Params: a = activation
108 | 				y = target one hot vector
109 | 		Outputs: a loss value
110 |         """
111 |         return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))
112 | 
113 | 
114 | class DNN(object):
115 |     '''
116 |             Description: Class to define the Deep Neural Network
117 |     '''
118 | 
119 |     def __init__(self, sizes):
120 |         '''
121 |         Description: initialize the biases and weights using a Gaussian
122 |         distribution with mean 0, and variance 1.
123 |         Biases are not set for 1st layer that is the input layer.
124 |         Params: sizes = a list of size L; where L is the number of layers
125 |                         in the deep neural network and each element of list contains
126 |                         the number of neuron in that layer.
127 |                         first and last elements of the list corresponds to the input
128 |                         layer and output layer respectively
129 |                         intermediate layers are hidden layers.
130 |         '''
131 |         self.num_layers = len(sizes)
132 |         # setting appropriate dimensions for weights and biases
133 |         self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
134 |         self.weights = [np.random.randn(y, x)
135 |                         for x, y in zip(sizes[:-1], sizes[1:])]
136 | 
137 |     def feedforward(self, x):
138 |         '''
139 |         Description: Forward Passes an image feature matrix through the Deep Neural
140 |                                  Network Architecture.
141 |         Params: x = Image Features
142 |         Outputs: 2 lists which stores outputs and activations at every layer,
143 |                  1st list is non-activated and 2nd list is activated
144 |                  The last element of the 2nd list corresponds to the scores against
145 |                  10 labels in the dataset.
146 |         '''
147 |         activation = x
148 |         activations = [x]  # list to store activations for every layer
149 |         outs = []  # list to store out vectors for every layer
150 |         for b, w in zip(self.biases, self.weights):
151 |             out = np.dot(w, activation) + b
152 |             outs.append(out)
153 |             activation = sigmoid(out)
154 |             activations.append(activation)
155 |         return outs, activations
156 | 
157 |     def get_batch(self, X, y, batch_size):
158 |         '''
159 |         Description: A data iterator for batching of image features and labels
160 |         Params: X, y = lists of Features and corresponding labels, these lists
161 |                                    have to be batched.
162 |                 batch_size =  size of the batch
163 |         Outputs: a batch of image features and labels of size = batch_size
164 |         '''
165 |         for batch_idx in range(0, X.shape[0], batch_size):
166 |             batch = zip(X[batch_idx:batch_idx + batch_size],
167 |                         y[batch_idx:batch_idx + batch_size])
168 |             yield batch
169 | 
170 |     def train(self, X, y, batch_size=100, learning_rate=0.2, epochs=1000):
171 |         '''
172 |         Description: Batch-wise trains image features against corresponding labels.
173 |                      The weights and biases of the neural network are updated through
174 |                      backpropagation on batches using SGD
175 |                      del_b and del_w are of same size as all the weights and biases
176 |                      of all the layers. del_b and del_w contains the gradients which
177 |                      are used to update weights and biases
178 | 
179 |         Params: X, y = lists of training features and corresponding labels
180 |                 batch_size =  size of the batch
181 |                 learning_rate = eta; controls the size of changes in weights & biases
182 |                 epochs = no. of times to iterate of the whole data
183 |         '''
184 |         n_batches = X.shape[0] / batch_size
185 |         for j in xrange(epochs):
186 |             batch_iter = self.get_batch(X, y, batch_size)
187 |             for i in range(n_batches):
188 |                 batch = batch_iter.next()
189 |                 # same shape as self.biases
190 |                 del_b = [np.zeros(b.shape) for b in self.biases]
191 |                 # same shape as self.weights
192 |                 del_w = [np.zeros(w.shape) for w in self.weights]
193 |                 for batch_X, batch_y in batch:
194 |                     # accumulate all the bias and weight gradients
195 |                     loss, delta_del_b, delta_del_w = self.backpropagate(
196 |                         batch_X, batch_y)
197 |                     del_b = [db + ddb for db, ddb in zip(del_b, delta_del_b)]
198 |                     del_w = [dw + ddw for dw, ddw in zip(del_w, delta_del_w)]
199 |             # update weight and biases by multiplying ratio learning rate and batch_size
200 |             # multiplied with the accumulated gradients(partial derivatives)
201 |             # calculate change in weight(delta) and biases and update weight
202 |             # with the changes
203 |             self.weights = [w - (learning_rate / batch_size)
204 |                             * delw for w, delw in zip(self.weights, del_w)]
205 |             self.biases = [b - (learning_rate / batch_size)
206 |                            * delb for b, delb in zip(self.biases, del_b)]
207 |             print("\nEpoch %d complete\tLoss: %f\n"%(j, loss))
208 | 
209 |     def backpropagate(self, x, y):
210 | 		'''
211 | 		Description: Based on the derivative(delta) of cost function the gradients(rate of change 
212 | 					of cost function with respect to weights and biases) of weights
213 | 					and biases are calculated.
214 | 					del_b and del_w are of same size as all the weights and biases
215 | 					of all the layers. del_b and del_w are the gradients(partial derivatives)
216 | 					which are used to update the weights and biases.
217 | 					cost function here is SigmoidCrossEntropyLoss cost, hence cost_deriv is :
218 | 					delta C = activation(output_layer) - target
219 | 		Params: x, y = training feature and corresponding label (a training pair)
220 | 		Outputs: del_b: gradient of bias
221 | 				 del_w: gradient of weight
222 | 		'''
223 | 		del_b = [np.zeros(b.shape) for b in self.biases]
224 | 		del_w = [np.zeros(w.shape) for w in self.weights]
225 | 
226 | 		# for calculating the current loss or cost forward pass through the neural net once
227 | 		# outs and activations are lists to store activations and out vector
228 | 		# for every layer
229 | 		outs, activations = self.feedforward(x)
230 | 		#Cost function:
231 | 		loss = SigmoidCrossEntropyLoss(activations[-1],y)
232 | 		# calculate derivative of cost Sigmoid Cross entropy which is to be minimized
233 | 		delta_cost = activations[-1] - y
234 | 		# backward pass to reduce cost
235 | 		# gradients at output layers
236 | 		delta = delta_cost
237 | 		del_b[-1] = delta
238 | 		del_w[-1] = np.dot(delta, activations[-2].T)
239 | 
240 | 		# updating gradients of each layer using reverse or negative indexing, by propagating
241 | 		# gradients of previous layers to current layer so that gradients of weights and biases
242 | 		# at each layer can be calculated
243 | 		for l in xrange(2, self.num_layers):
244 | 			out = outs[-l]
245 | 			delta_activation = delta_sigmoid(out)
246 | 			delta = np.dot(self.weights[-l + 1].T, delta) * delta_activation
247 | 			del_b[-l] = delta
248 | 			del_w[-l] = np.dot(delta, activations[-l - 1].T)
249 | 		return (loss, del_b, del_w)
250 | 
251 |     def eval(self, X, y):
252 |         '''
253 |         Description: Based on trained(updated) weights and biases, predict a label and compare
254 |                      it with original label and calculate accuracy
255 |         Params: X, y = a data example from validation dataset (image features, labels)
256 |         Outputs: accuracy of prediction
257 |         '''
258 |         count = 0
259 |         for x, _y in zip(X, y):
260 |             outs, activations = self.feedforward(x)
261 |             # postion of maximum value is the predicted label
262 |             if np.argmax(activations[-1]) == np.argmax(_y):
263 |                 count += 1
264 |         print("Accuracy: %f" % ((float(count) / X.shape[0]) * 100))
265 | 
266 |     def predict(self, X):
267 |         '''
268 |         Description: Based on trained(updated) weights and biases, predict a label for an
269 |                                  image which does not have a label.
270 |         Params: X = list of features of unknown images
271 |         Outputs: list containing the predicted label for the corresponding unknown image
272 |         '''
273 |         labels = unpickle("cifar-10-batches-py/batches.meta")["label_names"]
274 |         preds = np.array([])
275 |         for x in X:
276 |             outs, activations = self.feedforward(x)
277 |             preds = np.append(preds, np.argmax(activations[-1]))
278 |         preds = np.array([labels[int(p)] for p in preds])
279 |         return preds
280 | 
281 | 
282 | def main():
283 |     X, y = preprocess(num_to_load=1)
284 |     X_train, y_train, X_val, y_val = dataset_split(X, y)
285 |     # 32*32*3=3072, height and width of an image in the dataset is 32 and 3 is for RGB channel
286 |     #[3072,1000,100,10] implies a neural network with 1 input layer of size 3072, 3 hidden
287 |     # layers of size M, N and a output layer of size 10, hence 4
288 |     # layers(including input layer), more layers can be added to the list for increasing layers
289 |     model = DNN([3072, 50, 30, 10])  # initialize the model
290 |     model.train(X_train, y_train, epochs=15)  # train the model
291 |     model.eval(X_val, y_val)  # check accuracy using validation set
292 |     # preprocess test dataset
293 |     test_X = unpickle("cifar-10-batches-py/test_batch")["data"] / 255.0
294 |     test_X = test_X.reshape(-1, 3072, 1)
295 |     # make predictions of test dataset
296 |     print model.predict(test_X)
297 | 
298 | main()
299 | 


--------------------------------------------------------------------------------