├── README ├── softmaxRegression.py ├── t10k-images-idx3-ubyte.gz ├── t10k-labels-idx1-ubyte.gz ├── train-images-idx3-ubyte.gz └── train-labels-idx1-ubyte.gz /README: -------------------------------------------------------------------------------- 1 | -> This is a solution to the Softmax Regression exercise in the Stanford UFLDL Tutorial(http://ufldl.stanford.edu/wiki/index.php/Exercise:Softmax_Regression) 2 | -> The code has been written in Python using Scipy and Numpy 3 | -> The code is bound by The MIT License (MIT) 4 | 5 | Running the code: 6 | 7 | -> Download the gunzip data files and the code file 'softmaxRegression.py' 8 | -> Put them in the same folder, extract the gunzips and run the program by typing in 'python softmaxRegression.py' in the command line 9 | -> You should get an output saying 'Accuracy : 0.9262', it signifies an accuracy of 92.6% 10 | -> The code takes about 5 minutes to execute on an i3 processor 11 | -------------------------------------------------------------------------------- /softmaxRegression.py: -------------------------------------------------------------------------------- 1 | # This piece of software is bound by The MIT License (MIT) 2 | # Copyright (c) 2014 Siddharth Agrawal 3 | # Code written by : Siddharth Agrawal 4 | # Email ID : siddharth.950@gmail.com 5 | 6 | import struct 7 | import numpy 8 | import array 9 | import time 10 | import scipy.sparse 11 | import scipy.optimize 12 | 13 | ########################################################################################### 14 | """ The Softmax Regression class """ 15 | 16 | class SoftmaxRegression(object): 17 | 18 | ####################################################################################### 19 | """ Initialization of Regressor object """ 20 | 21 | def __init__(self, input_size, num_classes, lamda): 22 | 23 | """ Initialize parameters of the Regressor object """ 24 | 25 | self.input_size = input_size # input vector size 26 | self.num_classes = num_classes # number of classes 27 | self.lamda = lamda # weight decay parameter 28 | 29 | """ Randomly initialize the class weights """ 30 | 31 | rand = numpy.random.RandomState(int(time.time())) 32 | 33 | self.theta = 0.005 * numpy.asarray(rand.normal(size = (num_classes*input_size, 1))) 34 | 35 | ####################################################################################### 36 | """ Returns the groundtruth matrix for a set of labels """ 37 | 38 | def getGroundTruth(self, labels): 39 | 40 | """ Prepare data needed to construct groundtruth matrix """ 41 | 42 | labels = numpy.array(labels).flatten() 43 | data = numpy.ones(len(labels)) 44 | indptr = numpy.arange(len(labels)+1) 45 | 46 | """ Compute the groundtruth matrix and return """ 47 | 48 | ground_truth = scipy.sparse.csr_matrix((data, labels, indptr)) 49 | ground_truth = numpy.transpose(ground_truth.todense()) 50 | 51 | return ground_truth 52 | 53 | ####################################################################################### 54 | """ Returns the cost and gradient of 'theta' at a particular 'theta' """ 55 | 56 | def softmaxCost(self, theta, input, labels): 57 | 58 | """ Compute the groundtruth matrix """ 59 | 60 | ground_truth = self.getGroundTruth(labels) 61 | 62 | """ Reshape 'theta' for ease of computation """ 63 | 64 | theta = theta.reshape(self.num_classes, self.input_size) 65 | 66 | """ Compute the class probabilities for each example """ 67 | 68 | theta_x = numpy.dot(theta, input) 69 | hypothesis = numpy.exp(theta_x) 70 | probabilities = hypothesis / numpy.sum(hypothesis, axis = 0) 71 | 72 | """ Compute the traditional cost term """ 73 | 74 | cost_examples = numpy.multiply(ground_truth, numpy.log(probabilities)) 75 | traditional_cost = -(numpy.sum(cost_examples) / input.shape[1]) 76 | 77 | """ Compute the weight decay term """ 78 | 79 | theta_squared = numpy.multiply(theta, theta) 80 | weight_decay = 0.5 * self.lamda * numpy.sum(theta_squared) 81 | 82 | """ Add both terms to get the cost """ 83 | 84 | cost = traditional_cost + weight_decay 85 | 86 | """ Compute and unroll 'theta' gradient """ 87 | 88 | theta_grad = -numpy.dot(ground_truth - probabilities, numpy.transpose(input)) 89 | theta_grad = theta_grad / input.shape[1] + self.lamda * theta 90 | theta_grad = numpy.array(theta_grad) 91 | theta_grad = theta_grad.flatten() 92 | 93 | return [cost, theta_grad] 94 | 95 | ####################################################################################### 96 | """ Returns predicted classes for a set of inputs """ 97 | 98 | def softmaxPredict(self, theta, input): 99 | 100 | """ Reshape 'theta' for ease of computation """ 101 | 102 | theta = theta.reshape(self.num_classes, self.input_size) 103 | 104 | """ Compute the class probabilities for each example """ 105 | 106 | theta_x = numpy.dot(theta, input) 107 | hypothesis = numpy.exp(theta_x) 108 | probabilities = hypothesis / numpy.sum(hypothesis, axis = 0) 109 | 110 | """ Give the predictions based on probability values """ 111 | 112 | predictions = numpy.zeros((input.shape[1], 1)) 113 | predictions[:, 0] = numpy.argmax(probabilities, axis = 0) 114 | 115 | return predictions 116 | 117 | ########################################################################################### 118 | """ Loads the images from the provided file name """ 119 | 120 | def loadMNISTImages(file_name): 121 | 122 | """ Open the file """ 123 | 124 | image_file = open(file_name, 'rb') 125 | 126 | """ Read header information from the file """ 127 | 128 | head1 = image_file.read(4) 129 | head2 = image_file.read(4) 130 | head3 = image_file.read(4) 131 | head4 = image_file.read(4) 132 | 133 | """ Format the header information for useful data """ 134 | 135 | num_examples = struct.unpack('>I', head2)[0] 136 | num_rows = struct.unpack('>I', head3)[0] 137 | num_cols = struct.unpack('>I', head4)[0] 138 | 139 | """ Initialize dataset as array of zeros """ 140 | 141 | dataset = numpy.zeros((num_rows*num_cols, num_examples)) 142 | 143 | """ Read the actual image data """ 144 | 145 | images_raw = array.array('B', image_file.read()) 146 | image_file.close() 147 | 148 | """ Arrange the data in columns """ 149 | 150 | for i in range(num_examples): 151 | 152 | limit1 = num_rows * num_cols * i 153 | limit2 = num_rows * num_cols * (i + 1) 154 | 155 | dataset[:, i] = images_raw[limit1 : limit2] 156 | 157 | """ Normalize and return the dataset """ 158 | 159 | return dataset / 255 160 | 161 | ########################################################################################### 162 | """ Loads the image labels from the provided file name """ 163 | 164 | def loadMNISTLabels(file_name): 165 | 166 | """ Open the file """ 167 | 168 | label_file = open(file_name, 'rb') 169 | 170 | """ Read header information from the file """ 171 | 172 | head1 = label_file.read(4) 173 | head2 = label_file.read(4) 174 | 175 | """ Format the header information for useful data """ 176 | 177 | num_examples = struct.unpack('>I', head2)[0] 178 | 179 | """ Initialize data labels as array of zeros """ 180 | 181 | labels = numpy.zeros((num_examples, 1), dtype = numpy.int) 182 | 183 | """ Read the label data """ 184 | 185 | labels_raw = array.array('b', label_file.read()) 186 | label_file.close() 187 | 188 | """ Copy and return the label data """ 189 | 190 | labels[:, 0] = labels_raw[:] 191 | 192 | return labels 193 | 194 | ########################################################################################### 195 | """ Loads data, trains the model and predicts classes for test data """ 196 | 197 | def executeSoftmaxRegression(): 198 | 199 | """ Initialize parameters of the Regressor """ 200 | 201 | input_size = 784 # input vector size 202 | num_classes = 10 # number of classes 203 | lamda = 0.0001 # weight decay parameter 204 | max_iterations = 100 # number of optimization iterations 205 | 206 | """ Load MNIST training images and labels """ 207 | 208 | training_data = loadMNISTImages('train-images.idx3-ubyte') 209 | training_labels = loadMNISTLabels('train-labels.idx1-ubyte') 210 | 211 | """ Initialize Softmax Regressor with the above parameters """ 212 | 213 | regressor = SoftmaxRegression(input_size, num_classes, lamda) 214 | 215 | """ Run the L-BFGS algorithm to get the optimal parameter values """ 216 | 217 | opt_solution = scipy.optimize.minimize(regressor.softmaxCost, regressor.theta, 218 | args = (training_data, training_labels,), method = 'L-BFGS-B', 219 | jac = True, options = {'maxiter': max_iterations}) 220 | opt_theta = opt_solution.x 221 | 222 | """ Load MNIST test images and labels """ 223 | 224 | test_data = loadMNISTImages('t10k-images.idx3-ubyte') 225 | test_labels = loadMNISTLabels('t10k-labels.idx1-ubyte') 226 | 227 | """ Obtain predictions from the trained model """ 228 | 229 | predictions = regressor.softmaxPredict(opt_theta, test_data) 230 | 231 | """ Print accuracy of the trained model """ 232 | 233 | correct = test_labels[:, 0] == predictions[:, 0] 234 | print """Accuracy :""", numpy.mean(correct) 235 | 236 | executeSoftmaxRegression() 237 | -------------------------------------------------------------------------------- /t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siddharth-agrawal/Softmax-Regression/89860eba0c1b4ba178ae5e7d394c7df678207bed/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siddharth-agrawal/Softmax-Regression/89860eba0c1b4ba178ae5e7d394c7df678207bed/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siddharth-agrawal/Softmax-Regression/89860eba0c1b4ba178ae5e7d394c7df678207bed/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siddharth-agrawal/Softmax-Regression/89860eba0c1b4ba178ae5e7d394c7df678207bed/train-labels-idx1-ubyte.gz --------------------------------------------------------------------------------