├── README
├── softmaxRegression.py
├── t10k-images-idx3-ubyte.gz
├── t10k-labels-idx1-ubyte.gz
├── train-images-idx3-ubyte.gz
└── train-labels-idx1-ubyte.gz


/README:
--------------------------------------------------------------------------------
 1 | -> This is a solution to the Softmax Regression exercise in the Stanford UFLDL Tutorial(http://ufldl.stanford.edu/wiki/index.php/Exercise:Softmax_Regression)
 2 | -> The code has been written in Python using Scipy and Numpy
 3 | -> The code is bound by The MIT License (MIT)
 4 | 
 5 | Running the code:
 6 | 
 7 | -> Download the gunzip data files and the code file 'softmaxRegression.py'
 8 | -> Put them in the same folder, extract the gunzips and run the program by typing in 'python softmaxRegression.py' in the command line
 9 | -> You should get an output saying 'Accuracy : 0.9262', it signifies an accuracy of 92.6%
10 | -> The code takes about 5 minutes to execute on an i3 processor
11 | 


--------------------------------------------------------------------------------
/softmaxRegression.py:
--------------------------------------------------------------------------------
  1 | # This piece of software is bound by The MIT License (MIT)
  2 | # Copyright (c) 2014 Siddharth Agrawal
  3 | # Code written by : Siddharth Agrawal
  4 | # Email ID : siddharth.950@gmail.com
  5 | 
  6 | import struct
  7 | import numpy
  8 | import array
  9 | import time
 10 | import scipy.sparse
 11 | import scipy.optimize
 12 | 
 13 | ###########################################################################################
 14 | """ The Softmax Regression class """
 15 | 
 16 | class SoftmaxRegression(object):
 17 | 
 18 |     #######################################################################################
 19 |     """ Initialization of Regressor object """
 20 | 
 21 |     def __init__(self, input_size, num_classes, lamda):
 22 |     
 23 |         """ Initialize parameters of the Regressor object """
 24 |     
 25 |         self.input_size  = input_size  # input vector size
 26 |         self.num_classes = num_classes # number of classes
 27 |         self.lamda       = lamda       # weight decay parameter
 28 |         
 29 |         """ Randomly initialize the class weights """
 30 |         
 31 |         rand = numpy.random.RandomState(int(time.time()))
 32 |         
 33 |         self.theta = 0.005 * numpy.asarray(rand.normal(size = (num_classes*input_size, 1)))
 34 |     
 35 |     #######################################################################################
 36 |     """ Returns the groundtruth matrix for a set of labels """
 37 |         
 38 |     def getGroundTruth(self, labels):
 39 |     
 40 |         """ Prepare data needed to construct groundtruth matrix """
 41 |     
 42 |         labels = numpy.array(labels).flatten()
 43 |         data   = numpy.ones(len(labels))
 44 |         indptr = numpy.arange(len(labels)+1)
 45 |         
 46 |         """ Compute the groundtruth matrix and return """
 47 |         
 48 |         ground_truth = scipy.sparse.csr_matrix((data, labels, indptr))
 49 |         ground_truth = numpy.transpose(ground_truth.todense())
 50 |         
 51 |         return ground_truth
 52 |         
 53 |     #######################################################################################
 54 |     """ Returns the cost and gradient of 'theta' at a particular 'theta' """
 55 |         
 56 |     def softmaxCost(self, theta, input, labels):
 57 |     
 58 |         """ Compute the groundtruth matrix """
 59 |     
 60 |         ground_truth = self.getGroundTruth(labels)
 61 |         
 62 |         """ Reshape 'theta' for ease of computation """
 63 |         
 64 |         theta = theta.reshape(self.num_classes, self.input_size)
 65 |         
 66 |         """ Compute the class probabilities for each example """
 67 |         
 68 |         theta_x       = numpy.dot(theta, input)
 69 |         hypothesis    = numpy.exp(theta_x)      
 70 |         probabilities = hypothesis / numpy.sum(hypothesis, axis = 0)
 71 |         
 72 |         """ Compute the traditional cost term """
 73 |         
 74 |         cost_examples    = numpy.multiply(ground_truth, numpy.log(probabilities))
 75 |         traditional_cost = -(numpy.sum(cost_examples) / input.shape[1])
 76 |         
 77 |         """ Compute the weight decay term """
 78 |         
 79 |         theta_squared = numpy.multiply(theta, theta)
 80 |         weight_decay  = 0.5 * self.lamda * numpy.sum(theta_squared)
 81 |         
 82 |         """ Add both terms to get the cost """
 83 |         
 84 |         cost = traditional_cost + weight_decay
 85 |         
 86 |         """ Compute and unroll 'theta' gradient """
 87 |         
 88 |         theta_grad = -numpy.dot(ground_truth - probabilities, numpy.transpose(input))
 89 |         theta_grad = theta_grad / input.shape[1] + self.lamda * theta
 90 |         theta_grad = numpy.array(theta_grad)
 91 |         theta_grad = theta_grad.flatten()
 92 |         
 93 |         return [cost, theta_grad]
 94 |     
 95 |     #######################################################################################
 96 |     """ Returns predicted classes for a set of inputs """
 97 |             
 98 |     def softmaxPredict(self, theta, input):
 99 |     
100 |         """ Reshape 'theta' for ease of computation """
101 |     
102 |         theta = theta.reshape(self.num_classes, self.input_size)
103 |         
104 |         """ Compute the class probabilities for each example """
105 |         
106 |         theta_x       = numpy.dot(theta, input)
107 |         hypothesis    = numpy.exp(theta_x)      
108 |         probabilities = hypothesis / numpy.sum(hypothesis, axis = 0)
109 |         
110 |         """ Give the predictions based on probability values """
111 |         
112 |         predictions = numpy.zeros((input.shape[1], 1))
113 |         predictions[:, 0] = numpy.argmax(probabilities, axis = 0)
114 |         
115 |         return predictions
116 | 
117 | ###########################################################################################
118 | """ Loads the images from the provided file name """
119 | 
120 | def loadMNISTImages(file_name):
121 | 
122 |     """ Open the file """
123 | 
124 |     image_file = open(file_name, 'rb')
125 |     
126 |     """ Read header information from the file """
127 |     
128 |     head1 = image_file.read(4)
129 |     head2 = image_file.read(4)
130 |     head3 = image_file.read(4)
131 |     head4 = image_file.read(4)
132 |     
133 |     """ Format the header information for useful data """
134 |     
135 |     num_examples = struct.unpack('>I', head2)[0]
136 |     num_rows     = struct.unpack('>I', head3)[0]
137 |     num_cols     = struct.unpack('>I', head4)[0]
138 |     
139 |     """ Initialize dataset as array of zeros """
140 |     
141 |     dataset = numpy.zeros((num_rows*num_cols, num_examples))
142 |     
143 |     """ Read the actual image data """
144 |     
145 |     images_raw  = array.array('B', image_file.read())
146 |     image_file.close()
147 |     
148 |     """ Arrange the data in columns """
149 |     
150 |     for i in range(num_examples):
151 |     
152 |         limit1 = num_rows * num_cols * i
153 |         limit2 = num_rows * num_cols * (i + 1)
154 |         
155 |         dataset[:, i] = images_raw[limit1 : limit2]
156 |     
157 |     """ Normalize and return the dataset """    
158 |             
159 |     return dataset / 255
160 | 
161 | ###########################################################################################
162 | """ Loads the image labels from the provided file name """
163 |     
164 | def loadMNISTLabels(file_name):
165 | 
166 |     """ Open the file """
167 | 
168 |     label_file = open(file_name, 'rb')
169 |     
170 |     """ Read header information from the file """
171 |     
172 |     head1 = label_file.read(4)
173 |     head2 = label_file.read(4)
174 |     
175 |     """ Format the header information for useful data """
176 |     
177 |     num_examples = struct.unpack('>I', head2)[0]
178 |     
179 |     """ Initialize data labels as array of zeros """
180 |     
181 |     labels = numpy.zeros((num_examples, 1), dtype = numpy.int)
182 |     
183 |     """ Read the label data """
184 |     
185 |     labels_raw = array.array('b', label_file.read())
186 |     label_file.close()
187 |     
188 |     """ Copy and return the label data """
189 |     
190 |     labels[:, 0] = labels_raw[:]
191 |     
192 |     return labels
193 | 
194 | ###########################################################################################
195 | """ Loads data, trains the model and predicts classes for test data """
196 | 
197 | def executeSoftmaxRegression():
198 |     
199 |     """ Initialize parameters of the Regressor """
200 |     
201 |     input_size     = 784    # input vector size
202 |     num_classes    = 10     # number of classes
203 |     lamda          = 0.0001 # weight decay parameter
204 |     max_iterations = 100    # number of optimization iterations
205 |     
206 |     """ Load MNIST training images and labels """
207 |     
208 |     training_data   = loadMNISTImages('train-images.idx3-ubyte')
209 |     training_labels = loadMNISTLabels('train-labels.idx1-ubyte')
210 |     
211 |     """ Initialize Softmax Regressor with the above parameters """
212 |     
213 |     regressor = SoftmaxRegression(input_size, num_classes, lamda)
214 |     
215 |     """ Run the L-BFGS algorithm to get the optimal parameter values """
216 |     
217 |     opt_solution  = scipy.optimize.minimize(regressor.softmaxCost, regressor.theta, 
218 |                                             args = (training_data, training_labels,), method = 'L-BFGS-B', 
219 |                                             jac = True, options = {'maxiter': max_iterations})
220 |     opt_theta     = opt_solution.x
221 |     
222 |     """ Load MNIST test images and labels """
223 |     
224 |     test_data   = loadMNISTImages('t10k-images.idx3-ubyte') 
225 |     test_labels = loadMNISTLabels('t10k-labels.idx1-ubyte')
226 |     
227 |     """ Obtain predictions from the trained model """
228 |     
229 |     predictions = regressor.softmaxPredict(opt_theta, test_data)
230 |     
231 |     """ Print accuracy of the trained model """
232 |     
233 |     correct = test_labels[:, 0] == predictions[:, 0]
234 |     print """Accuracy :""", numpy.mean(correct)
235 |     
236 | executeSoftmaxRegression()
237 | 


--------------------------------------------------------------------------------
/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/siddharth-agrawal/Softmax-Regression/89860eba0c1b4ba178ae5e7d394c7df678207bed/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/siddharth-agrawal/Softmax-Regression/89860eba0c1b4ba178ae5e7d394c7df678207bed/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/siddharth-agrawal/Softmax-Regression/89860eba0c1b4ba178ae5e7d394c7df678207bed/train-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/siddharth-agrawal/Softmax-Regression/89860eba0c1b4ba178ae5e7d394c7df678207bed/train-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------