├── README.md └── DNN.py /README.md: -------------------------------------------------------------------------------- 1 | ## Image Classification on CIFAR-10 Dataset using Multi-Layer Perceptrons in Python from scratch. 2 | 3 | #### CIFAR10 Dataset can be found here - https://www.cs.toronto.edu/~kriz/cifar.html 4 | #### Download the Python Batches Dataset and put it in %pwd/cifar-10-batches-py/ 5 | 6 | Implemented a N layer Deep Neural Network with Backpropagation for CIFAR-10 Image Classification from scratch in Python, to get the basic understanding of the backpropagation and gradient descent algorithm. 7 | 8 | Warning: Due to lack of GPU support and no. of layers being > 1 the model trains very slow. 9 | -------------------------------------------------------------------------------- /DNN.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | import numpy as np 3 | 4 | 5 | def unpickle(fileName): 6 | ''' 7 | Description: retrieve data from CIFAR-10 Pickles 8 | Params: fileName = filename to unpickle 9 | Outputs: Unpickled Dict 10 | ''' 11 | with open(fileName, 'rb') as f: 12 | dict = cPickle.load(f) 13 | return dict 14 | 15 | 16 | def merge_batches(num_to_load=1): 17 | ''' 18 | Description: Merge batches of CIFAR-10 data pickles 19 | Params: num_to_load = number of batches of CIFAR-10 to load and merge 20 | Outputs: merged features and labels from specified no. of batches of CIFAR-10 21 | ''' 22 | for i in range(1): 23 | fileName = "cifar-10-batches-py/data_batch_" + str(i + 1) 24 | data = unpickle(fileName) 25 | if i == 0: 26 | features = data["data"] 27 | labels = np.array(data["labels"]) 28 | else: 29 | features = np.append(features, data["data"], axis=0) 30 | labels = np.append(labels, data["labels"], axis=0) 31 | return features, labels 32 | 33 | 34 | def one_hot_encode(data): 35 | ''' 36 | Description: Encode Target Label IDs to one hot vector of size L where L is the 37 | number of unique labels 38 | Params: data = list of label IDs 39 | Outputs: List of One Hot Vectors 40 | ''' 41 | one_hot = np.zeros((data.shape[0], 10)) 42 | one_hot[np.arange(data.shape[0]), data] = 1 43 | return one_hot 44 | 45 | 46 | def normalize(data): 47 | ''' 48 | Description: Normalize Pixel values 49 | Params: list of Image Pixel Features 50 | Outputs: Normalized Image Pixel Features 51 | ''' 52 | return data / 255.0 53 | 54 | 55 | def preprocess(num_to_load=1): 56 | ''' 57 | Description: helper function to load and preprocess CIFAR-10 training data batches 58 | Params: num_to_load = number of batches of CIFAR-10 to load and merge 59 | Outputs: Pre-processed CIFAR-10 image features and labels 60 | ''' 61 | X, y = merge_batches(num_to_load=1) 62 | X = normalize(X) 63 | X = X.reshape(-1, 3072, 1) 64 | y = one_hot_encode(y) 65 | y = y.reshape(-1, 10, 1) 66 | return X, y 67 | 68 | 69 | def dataset_split(X, y, ratio=0.8): 70 | ''' 71 | Description: helper function to split training data into training and validation 72 | Params: X=image features 73 | y=labels 74 | ratio = ratio of training data from total data 75 | Outputs: training data (features and labels) and validation data 76 | ''' 77 | split = int(ratio * X.shape[0]) 78 | indices = np.random.permutation(X.shape[0]) 79 | training_idx, val_idx = indices[:split], indices[split:] 80 | X_train, X_val = X[training_idx, :], X[val_idx, :] 81 | y_train, y_val = y[training_idx, :], y[val_idx, :] 82 | print "Records in Training Dataset", X_train.shape[0] 83 | print "Records in Validation Dataset", X_val.shape[0] 84 | return X_train, y_train, X_val, y_val 85 | 86 | 87 | def sigmoid(out): 88 | ''' 89 | Description: Sigmoid Activation 90 | Params: out = a list/matrix to perform the activation on 91 | Outputs: Sigmoid activated list/matrix 92 | ''' 93 | return 1.0 / (1.0 + np.exp(-out)) 94 | 95 | 96 | def delta_sigmoid(out): 97 | ''' 98 | Description: Derivative of Sigmoid Activation 99 | Params: out = a list/matrix to perform the activation on 100 | Outputs: Delta(Sigmoid) activated list/matrix 101 | ''' 102 | return sigmoid(out) * (1 - sigmoid(out)) 103 | 104 | def SigmoidCrossEntropyLoss(a, y): 105 | """ 106 | Description: Calculate Sigmoid cross entropy loss 107 | Params: a = activation 108 | y = target one hot vector 109 | Outputs: a loss value 110 | """ 111 | return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a))) 112 | 113 | 114 | class DNN(object): 115 | ''' 116 | Description: Class to define the Deep Neural Network 117 | ''' 118 | 119 | def __init__(self, sizes): 120 | ''' 121 | Description: initialize the biases and weights using a Gaussian 122 | distribution with mean 0, and variance 1. 123 | Biases are not set for 1st layer that is the input layer. 124 | Params: sizes = a list of size L; where L is the number of layers 125 | in the deep neural network and each element of list contains 126 | the number of neuron in that layer. 127 | first and last elements of the list corresponds to the input 128 | layer and output layer respectively 129 | intermediate layers are hidden layers. 130 | ''' 131 | self.num_layers = len(sizes) 132 | # setting appropriate dimensions for weights and biases 133 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]] 134 | self.weights = [np.random.randn(y, x) 135 | for x, y in zip(sizes[:-1], sizes[1:])] 136 | 137 | def feedforward(self, x): 138 | ''' 139 | Description: Forward Passes an image feature matrix through the Deep Neural 140 | Network Architecture. 141 | Params: x = Image Features 142 | Outputs: 2 lists which stores outputs and activations at every layer, 143 | 1st list is non-activated and 2nd list is activated 144 | The last element of the 2nd list corresponds to the scores against 145 | 10 labels in the dataset. 146 | ''' 147 | activation = x 148 | activations = [x] # list to store activations for every layer 149 | outs = [] # list to store out vectors for every layer 150 | for b, w in zip(self.biases, self.weights): 151 | out = np.dot(w, activation) + b 152 | outs.append(out) 153 | activation = sigmoid(out) 154 | activations.append(activation) 155 | return outs, activations 156 | 157 | def get_batch(self, X, y, batch_size): 158 | ''' 159 | Description: A data iterator for batching of image features and labels 160 | Params: X, y = lists of Features and corresponding labels, these lists 161 | have to be batched. 162 | batch_size = size of the batch 163 | Outputs: a batch of image features and labels of size = batch_size 164 | ''' 165 | for batch_idx in range(0, X.shape[0], batch_size): 166 | batch = zip(X[batch_idx:batch_idx + batch_size], 167 | y[batch_idx:batch_idx + batch_size]) 168 | yield batch 169 | 170 | def train(self, X, y, batch_size=100, learning_rate=0.2, epochs=1000): 171 | ''' 172 | Description: Batch-wise trains image features against corresponding labels. 173 | The weights and biases of the neural network are updated through 174 | backpropagation on batches using SGD 175 | del_b and del_w are of same size as all the weights and biases 176 | of all the layers. del_b and del_w contains the gradients which 177 | are used to update weights and biases 178 | 179 | Params: X, y = lists of training features and corresponding labels 180 | batch_size = size of the batch 181 | learning_rate = eta; controls the size of changes in weights & biases 182 | epochs = no. of times to iterate of the whole data 183 | ''' 184 | n_batches = X.shape[0] / batch_size 185 | for j in xrange(epochs): 186 | batch_iter = self.get_batch(X, y, batch_size) 187 | for i in range(n_batches): 188 | batch = batch_iter.next() 189 | # same shape as self.biases 190 | del_b = [np.zeros(b.shape) for b in self.biases] 191 | # same shape as self.weights 192 | del_w = [np.zeros(w.shape) for w in self.weights] 193 | for batch_X, batch_y in batch: 194 | # accumulate all the bias and weight gradients 195 | loss, delta_del_b, delta_del_w = self.backpropagate( 196 | batch_X, batch_y) 197 | del_b = [db + ddb for db, ddb in zip(del_b, delta_del_b)] 198 | del_w = [dw + ddw for dw, ddw in zip(del_w, delta_del_w)] 199 | # update weight and biases by multiplying ratio learning rate and batch_size 200 | # multiplied with the accumulated gradients(partial derivatives) 201 | # calculate change in weight(delta) and biases and update weight 202 | # with the changes 203 | self.weights = [w - (learning_rate / batch_size) 204 | * delw for w, delw in zip(self.weights, del_w)] 205 | self.biases = [b - (learning_rate / batch_size) 206 | * delb for b, delb in zip(self.biases, del_b)] 207 | print("\nEpoch %d complete\tLoss: %f\n"%(j, loss)) 208 | 209 | def backpropagate(self, x, y): 210 | ''' 211 | Description: Based on the derivative(delta) of cost function the gradients(rate of change 212 | of cost function with respect to weights and biases) of weights 213 | and biases are calculated. 214 | del_b and del_w are of same size as all the weights and biases 215 | of all the layers. del_b and del_w are the gradients(partial derivatives) 216 | which are used to update the weights and biases. 217 | cost function here is SigmoidCrossEntropyLoss cost, hence cost_deriv is : 218 | delta C = activation(output_layer) - target 219 | Params: x, y = training feature and corresponding label (a training pair) 220 | Outputs: del_b: gradient of bias 221 | del_w: gradient of weight 222 | ''' 223 | del_b = [np.zeros(b.shape) for b in self.biases] 224 | del_w = [np.zeros(w.shape) for w in self.weights] 225 | 226 | # for calculating the current loss or cost forward pass through the neural net once 227 | # outs and activations are lists to store activations and out vector 228 | # for every layer 229 | outs, activations = self.feedforward(x) 230 | #Cost function: 231 | loss = SigmoidCrossEntropyLoss(activations[-1],y) 232 | # calculate derivative of cost Sigmoid Cross entropy which is to be minimized 233 | delta_cost = activations[-1] - y 234 | # backward pass to reduce cost 235 | # gradients at output layers 236 | delta = delta_cost 237 | del_b[-1] = delta 238 | del_w[-1] = np.dot(delta, activations[-2].T) 239 | 240 | # updating gradients of each layer using reverse or negative indexing, by propagating 241 | # gradients of previous layers to current layer so that gradients of weights and biases 242 | # at each layer can be calculated 243 | for l in xrange(2, self.num_layers): 244 | out = outs[-l] 245 | delta_activation = delta_sigmoid(out) 246 | delta = np.dot(self.weights[-l + 1].T, delta) * delta_activation 247 | del_b[-l] = delta 248 | del_w[-l] = np.dot(delta, activations[-l - 1].T) 249 | return (loss, del_b, del_w) 250 | 251 | def eval(self, X, y): 252 | ''' 253 | Description: Based on trained(updated) weights and biases, predict a label and compare 254 | it with original label and calculate accuracy 255 | Params: X, y = a data example from validation dataset (image features, labels) 256 | Outputs: accuracy of prediction 257 | ''' 258 | count = 0 259 | for x, _y in zip(X, y): 260 | outs, activations = self.feedforward(x) 261 | # postion of maximum value is the predicted label 262 | if np.argmax(activations[-1]) == np.argmax(_y): 263 | count += 1 264 | print("Accuracy: %f" % ((float(count) / X.shape[0]) * 100)) 265 | 266 | def predict(self, X): 267 | ''' 268 | Description: Based on trained(updated) weights and biases, predict a label for an 269 | image which does not have a label. 270 | Params: X = list of features of unknown images 271 | Outputs: list containing the predicted label for the corresponding unknown image 272 | ''' 273 | labels = unpickle("cifar-10-batches-py/batches.meta")["label_names"] 274 | preds = np.array([]) 275 | for x in X: 276 | outs, activations = self.feedforward(x) 277 | preds = np.append(preds, np.argmax(activations[-1])) 278 | preds = np.array([labels[int(p)] for p in preds]) 279 | return preds 280 | 281 | 282 | def main(): 283 | X, y = preprocess(num_to_load=1) 284 | X_train, y_train, X_val, y_val = dataset_split(X, y) 285 | # 32*32*3=3072, height and width of an image in the dataset is 32 and 3 is for RGB channel 286 | #[3072,1000,100,10] implies a neural network with 1 input layer of size 3072, 3 hidden 287 | # layers of size M, N and a output layer of size 10, hence 4 288 | # layers(including input layer), more layers can be added to the list for increasing layers 289 | model = DNN([3072, 50, 30, 10]) # initialize the model 290 | model.train(X_train, y_train, epochs=15) # train the model 291 | model.eval(X_val, y_val) # check accuracy using validation set 292 | # preprocess test dataset 293 | test_X = unpickle("cifar-10-batches-py/test_batch")["data"] / 255.0 294 | test_X = test_X.reshape(-1, 3072, 1) 295 | # make predictions of test dataset 296 | print model.predict(test_X) 297 | 298 | main() 299 | --------------------------------------------------------------------------------