├── A layman's guide to the project.pdf ├── Code ├── Images │ ├── HU200_EP1000_R0.1.jpg │ ├── Image_MNIST_Training_Row_1.jpg │ ├── TS100_EP100_R5.jpg │ └── ValidationHoldout7030.JPG ├── MNISTDataset.mat ├── calculationOfPixels.m ├── checkNNGradients.m ├── checkTestAccuracy.m ├── checkTrainingAccuracy.m ├── computeNumericalGradient.m ├── createExternalImage.m ├── createMNISTDataset.m ├── debugInitializeWeights.m ├── displayData.m ├── fmincg.m ├── getAllFileNames.m ├── gradientDescent.m ├── loadMNISTImages.m ├── loadMNISTLabels.m ├── neuralNetwork.m ├── neuralNetworkWeights - HU200_EP1000_R0.1.mat ├── neuralNetworkWeights-98.35.mat ├── neuralNetworkWeights.mat ├── nnCostFunction.m ├── pre-processing of image.m ├── predict.m ├── predictExternalImage.m ├── predictPercentExternalImage.m ├── predictPercentage.m ├── randInitializeWeights.m ├── savedNeuralNetworkWeights.mat ├── sigmoid.m ├── sigmoidGradient.m ├── t10k-images.idx3-ubyte ├── t10k-labels.idx1-ubyte ├── train-images.idx3-ubyte ├── train-labels.idx1-ubyte ├── trainNeuralNetwork.m ├── try_run.m ├── validationCurve.m └── validationCurveHoldout.m ├── LICENSE ├── README.md └── Sample Images ├── First Set ├── Eight.png ├── Five.png ├── Four.png ├── Nine.png ├── One.png ├── Seven.png ├── Six.png ├── Three.png ├── Two.png └── Zero.png ├── Sample Test ├── Eight.jpg ├── Five.png ├── Four.jpg ├── Nine.png ├── One.png ├── Seven.png ├── Six.png ├── Three.png ├── Two.jpg ├── Zero.png └── info.txt ├── Second Set ├── Eight(2).png ├── Five(2).png ├── Five(2.1).png ├── Four(2).png ├── Nine(2).png ├── One(2).png ├── Seven(2).png ├── Six(2).png ├── Six(2.1).png ├── Three(2).png ├── Two(2).png └── Zero(2).png └── Third Set ├── Eight(3).jpg ├── Five(3).jpg ├── Four(3).jpg ├── Nine(3).jpg ├── One(3).jpg ├── Seven(3).jpg ├── Six(3).jpg ├── Three(3).jpg ├── Two(3).jpg └── Zero(3).jpg /A layman's guide to the project.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/A layman's guide to the project.pdf -------------------------------------------------------------------------------- /Code/Images/HU200_EP1000_R0.1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/Images/HU200_EP1000_R0.1.jpg -------------------------------------------------------------------------------- /Code/Images/Image_MNIST_Training_Row_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/Images/Image_MNIST_Training_Row_1.jpg -------------------------------------------------------------------------------- /Code/Images/TS100_EP100_R5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/Images/TS100_EP100_R5.jpg -------------------------------------------------------------------------------- /Code/Images/ValidationHoldout7030.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/Images/ValidationHoldout7030.JPG -------------------------------------------------------------------------------- /Code/MNISTDataset.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/MNISTDataset.mat -------------------------------------------------------------------------------- /Code/calculationOfPixels.m: -------------------------------------------------------------------------------- 1 | function [ width,height ] = calculationOfPixels( imagefile ) 2 | %width and height are variables calculated by this function 3 | %we take image file as input 4 | %round the dimensions so that the product is 784 5 | 6 | I=imread(imagefile); 7 | [h,w]=size(I); 8 | 9 | h1=h;h2=h;w1=w;w2=w; 10 | while mod(h1,28)~=0 || mod(h2,28)~=0 11 | h1=h1-1; 12 | h2=h2+1; 13 | end 14 | 15 | if mod(h1,28)==0 16 | h=h1; 17 | else 18 | h=h2; 19 | end 20 | 21 | while mod(w1,28)~=0 || mod(w2,28)~=0 22 | w1=w1-1; 23 | w2=w2+1; 24 | end 25 | 26 | if mod(w1,28)==0 27 | w=w1; 28 | else 29 | w=w2; 30 | end 31 | 32 | h1=h;w1=w; 33 | if h1>w1 34 | while w1*h1~=784 35 | h1=h1-7; 36 | end 37 | 38 | else 39 | while w1*h1~=784 40 | w1=w1-7; 41 | end 42 | end 43 | 44 | w=w1;h=h1; 45 | 46 | width=w;height=h; 47 | end 48 | 49 | -------------------------------------------------------------------------------- /Code/checkNNGradients.m: -------------------------------------------------------------------------------- 1 | function checkNNGradients(lambda) 2 | %CHECKNNGRADIENTS Creates a small neural network to check the 3 | %backpropagation gradients 4 | % CHECKNNGRADIENTS(lambda) Creates a small neural network to check the 5 | % backpropagation gradients, it will output the analytical gradients 6 | % produced by our backprop code and the numerical gradients (computed 7 | % using computeNumericalGradient). These two gradient computations should 8 | % result in very similar values. 9 | % 10 | 11 | if ~exist('lambda', 'var') || isempty(lambda) 12 | lambda = 0; 13 | end 14 | 15 | input_layer_size = 3; 16 | hidden_layer_size = 5; 17 | num_labels = 3; 18 | m = 5; 19 | 20 | % We generate some 'random' test data 21 | Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size); 22 | Theta2 = debugInitializeWeights(num_labels, hidden_layer_size); 23 | % Reusing debugInitializeWeights to generate X 24 | X = debugInitializeWeights(m, input_layer_size - 1); 25 | y = 1 + mod(1:m, num_labels)'; 26 | 27 | % Unroll parameters 28 | nn_params = [Theta1(:) ; Theta2(:)]; 29 | 30 | % Short hand for cost function 31 | costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ... 32 | num_labels, X, y, lambda); 33 | 34 | [cost, grad] = costFunc(nn_params); 35 | numgrad = computeNumericalGradient(costFunc, nn_params); 36 | 37 | % Visually examine the two gradient computations. The two columns 38 | % we get should be very similar. 39 | disp([numgrad grad]); 40 | fprintf(['The above two columns we get should be very similar.\n' ... 41 | '(Left-Our Numerical Gradient, Right-Analytical Gradient)\n\n']); 42 | 43 | % Evaluate the norm of the difference between two solutions. 44 | % If we have a correct implementation, and assuming we used EPSILON = 0.0001 45 | % in computeNumericalGradient.m, then diff below should be less than 1e-9 46 | diff = norm(numgrad-grad)/norm(numgrad+grad); 47 | 48 | fprintf(['If our backpropagation implementation is correct, then \n' ... 49 | 'the relative difference will be small (less than 1e-9). \n' ... 50 | '\nRelative Difference: %g\n'], diff); 51 | 52 | end 53 | -------------------------------------------------------------------------------- /Code/checkTestAccuracy.m: -------------------------------------------------------------------------------- 1 | %% ================= Implement Predict for Test Set ================= 2 | % Now we would like to use it to predict the test(or validation) labels. 3 | % You will now implement the "predict" function to use the neural network to 4 | % predict the labels of the test set. This lets you test the neural network 5 | % on images on which it is not trained. 6 | 7 | % Initialization 8 | clear ; close all; clc 9 | 10 | %% =========== Part 1: Loading and Visualizing Data ============= 11 | % We start the exercise by first loading and visualizing the dataset. 12 | 13 | % Load Test Data 14 | fprintf('Loading and Visualizing Data ...\n') 15 | 16 | load('MNISTDataset'); 17 | testX = testImages((1:10000),:); 18 | testy = testLabels((1:10000),:); 19 | n = size(testX, 1); 20 | load('neuralNetworkWeights.mat'); 21 | 22 | % Randomly select 100 data points to display 23 | sel = randperm(size(testX, 1)); 24 | sel = sel(1:100); 25 | 26 | displayData(testX(sel, :)); 27 | 28 | fprintf('Program paused. Press enter to continue.\n'); 29 | pause; 30 | 31 | pred = predict(Theta1, Theta2, testX); 32 | 33 | fprintf('\nTest Set Accuracy: %f\n', mean(double(pred == testy)) * 100); 34 | 35 | fprintf('Program paused. Press enter to continue.\n'); 36 | pause; 37 | 38 | %% =========== Part 2: Predicting test images ============= 39 | % To give you an idea of the network's output, you can also run 40 | % through the examples one at the a time to see what it is predicting. 41 | 42 | % Randomly permute examples 43 | rp = randperm(n); 44 | 45 | for i = 1:n 46 | % Display 47 | fprintf('\nDisplaying Example Image\n'); 48 | displayData(testX(rp(i), :)); 49 | 50 | pred = predict(Theta1, Theta2, testX(rp(i),:)); 51 | fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10)); 52 | 53 | % Pause with quit option 54 | s = input('Paused - press enter to continue, q to exit:','s'); 55 | if s == 'q' 56 | break 57 | end 58 | end -------------------------------------------------------------------------------- /Code/checkTrainingAccuracy.m: -------------------------------------------------------------------------------- 1 | %% ================= Implement Predict for Training Set ================= 2 | % After training the neural network, we would like to use it to predict 3 | % the labels. You will now implement the "predict" function to use the 4 | % neural network to predict the labels of the training set. This lets 5 | % you compute the training set accuracy. 6 | 7 | % Initialization 8 | clear ; close all; clc 9 | 10 | %% =========== Part 1: Loading and Visualizing Data ============= 11 | % We start the exercise by first loading and visualizing the dataset. 12 | % You will be working with a dataset that contains handwritten digits. 13 | % 14 | 15 | % Load Training Data 16 | fprintf('Loading and Visualizing Data ...\n') 17 | 18 | load('MNISTDataset'); 19 | load('MNISTDataset'); 20 | X = trainingImages((1:60000),:); 21 | y = trainingLabels((1:60000),:); 22 | m = size(X, 1); 23 | load('neuralNetworkWeights.mat'); 24 | 25 | % Randomly select 100 data points to display 26 | sel = randperm(size(X, 1)); 27 | sel = sel(1:100); 28 | 29 | displayData(X(sel, :)); 30 | 31 | fprintf('Program paused. Press enter to continue.\n'); 32 | pause; 33 | pred = predict(Theta1, Theta2, X); 34 | 35 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 36 | fprintf('Program paused. Press enter to continue.\n'); 37 | pause; 38 | 39 | %% =========== Part 2: Predicting training images ============= 40 | % To give you an idea of the network's output, you can also run 41 | % through the examples one at the a time to see what it is predicting. 42 | 43 | % Randomly permute examples 44 | rp = randperm(m); 45 | 46 | for i = 1:m 47 | % Display 48 | fprintf('\nDisplaying Example Image\n'); 49 | displayData(X(rp(i), :)); 50 | 51 | pred = predict(Theta1, Theta2, X(rp(i),:)); 52 | fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10)); 53 | 54 | % Pause with quit option 55 | s = input('Paused - press enter to continue, q to exit:','s'); 56 | if s == 'q' 57 | break 58 | end 59 | end -------------------------------------------------------------------------------- /Code/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" 3 | %and gives us a numerical estimate of the gradient. 4 | % numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical 5 | % gradient of the function J around theta. Calling y = J(theta) should 6 | % return the function value at theta. 7 | 8 | % Notes: The following code implements numerical gradient checking, and 9 | % returns the numerical gradient.It sets numgrad(i) to (a numerical 10 | % approximation of) the partial derivative of J with respect to the 11 | % i-th input argument, evaluated at theta. (i.e., numgrad(i) should 12 | % be the (approximately) the partial derivative of J with respect 13 | % to theta(i).) 14 | % 15 | 16 | numgrad = zeros(size(theta)); 17 | perturb = zeros(size(theta)); 18 | e = 1e-4; 19 | for p = 1:numel(theta) 20 | % Set perturbation vector 21 | perturb(p) = e; 22 | loss1 = J(theta - perturb); 23 | loss2 = J(theta + perturb); 24 | % Compute Numerical Gradient 25 | numgrad(p) = (loss2 - loss1) / (2*e); 26 | perturb(p) = 0; 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /Code/createExternalImage.m: -------------------------------------------------------------------------------- 1 | function createExternalImage(row_num, imagename) 2 | % CREATEEXTERNALIMAGE Creates, displays and saves an image with the name 'imagename.extension' using the 3 | % pixels of the 'row_num'-th image of the MNIST Training Dataset 4 | 5 | %% Initialization 6 | 7 | if ~exist('row_num', 'var') || isempty(row_num) 8 | row_num = 1; 9 | end 10 | 11 | if ~exist('imagename', 'var') || isempty(imagename) 12 | imagename = ['Image_MNIST_Training_Row_' num2str(row_num) '.jpg']; 13 | end 14 | 15 | % Load the data from MNIST Dataset 16 | data = load('MNISTDataset.mat'); 17 | a = data.trainingImages(row_num, :); 18 | 19 | fprintf('Displaying and saving requested image ...\n'); 20 | 21 | % Reshape the row vector to a 28*28 pixel image 22 | a = reshape(a, 28, 28); 23 | imshow(a); 24 | 25 | % Save the image 26 | imwrite(mat2gray(a),imagename); 27 | 28 | % Find information about the image 29 | % info = imfinfo(imagename); 30 | % disp(info.ColorType); 31 | 32 | fprintf('Image saved with the name: %s', imagename); 33 | end -------------------------------------------------------------------------------- /Code/createMNISTDataset.m: -------------------------------------------------------------------------------- 1 | %% ============= Create the MNISTDataset.mat from ubyte files ============= 2 | % Requires the files: 3 | % train-images.idx3-ubyte 4 | % train-labels.idx1-ubyte 5 | % t10k-images.idx3-ubyte 6 | % t10k-labels.idx1-ubyte 7 | % You can download these files from http://yann.lecun.com/exdb/mnist/ 8 | 9 | %% Initialization 10 | clear ; close all; clc 11 | 12 | fprintf('Loading and Saving Data ...\n') 13 | 14 | % Load the training images and their labels 15 | trainingImages = loadMNISTImages('train-images.idx3-ubyte'); 16 | trainingLabels = loadMNISTLabels('train-labels.idx1-ubyte'); 17 | 18 | % Change the labels which are '0' to '10' 19 | trainingLabels(trainingLabels == 0) = 10; 20 | 21 | % Load the test images and their labels 22 | testImages = loadMNISTImages('t10k-images.idx3-ubyte'); 23 | testLabels = loadMNISTLabels('t10k-labels.idx1-ubyte'); 24 | 25 | % Change the labels which are '0' to '10' 26 | testLabels(testLabels == 0) = 10; 27 | 28 | % Save the train images and labels, and test images and labels to file: 29 | % 'MNISTDataset' 30 | save('MNISTDataset'); 31 | 32 | fprintf('Done\n') -------------------------------------------------------------------------------- /Code/debugInitializeWeights.m: -------------------------------------------------------------------------------- 1 | function W = debugInitializeWeights(fan_out, fan_in) 2 | %DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in 3 | %incoming connections and fan_out outgoing connections using a fixed 4 | %strategy, this will help you later in debugging 5 | % W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights 6 | % of a layer with fan_in incoming connections and fan_out outgoing 7 | % connections using a fix set of values 8 | % 9 | % Note that W should be set to a matrix of size(1 + fan_in, fan_out) as 10 | % the first row of W handles the "bias" terms 11 | % 12 | 13 | % Set W to zeros 14 | W = zeros(fan_out, 1 + fan_in); 15 | 16 | % Initialize W using "sin", this ensures that W is always of the same 17 | % values and will be useful for debugging 18 | W = reshape(sin(1:numel(W)), size(W)) / 10; 19 | 20 | % ========================================================================= 21 | 22 | end 23 | -------------------------------------------------------------------------------- /Code/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m, n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /Code/fmincg.m: -------------------------------------------------------------------------------- 1 | function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) 2 | % Minimize a continuous differentialble multivariate function. Starting point 3 | % is given by "X" (D by 1), and the function named in the string "f", must 4 | % return a function value and a vector of partial derivatives. The Polack- 5 | % Ribiere flavour of conjugate gradients is used to compute search directions, 6 | % and a line search using quadratic and cubic polynomial approximations and the 7 | % Wolfe-Powell stopping criteria is used together with the slope ratio method 8 | % for guessing initial step sizes. Additionally a bunch of checks are made to 9 | % make sure that exploration is taking place and that extrapolation will not 10 | % be unboundedly large. The "length" gives the length of the run: if it is 11 | % positive, it gives the maximum number of line searches, if negative its 12 | % absolute gives the maximum allowed number of function evaluations. You can 13 | % (optionally) give "length" a second component, which will indicate the 14 | % reduction in function value to be expected in the first line-search (defaults 15 | % to 1.0). The function returns when either its length is up, or if no further 16 | % progress can be made (ie, we are at a minimum, or so close that due to 17 | % numerical problems, we cannot get any closer). If the function terminates 18 | % within a few iterations, it could be an indication that the function value 19 | % and derivatives are not consistent (ie, there may be a bug in the 20 | % implementation of your "f" function). The function returns the found 21 | % solution "X", a vector of function values "fX" indicating the progress made 22 | % and "i" the number of iterations (line searches or function evaluations, 23 | % depending on the sign of "length") used. 24 | % 25 | % Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) 26 | % 27 | % See also: checkgrad 28 | % 29 | % Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13 30 | % 31 | % 32 | % (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen 33 | % 34 | % Permission is granted for anyone to copy, use, or modify these 35 | % programs and accompanying documents for purposes of research or 36 | % education, provided this copyright notice is retained, and note is 37 | % made of any changes that have been made. 38 | % 39 | % These programs and documents are distributed without any warranty, 40 | % express or implied. As the programs were written for research 41 | % purposes only, they have not been tested to the degree that would be 42 | % advisable in any important application. All use of these programs is 43 | % entirely at the user's own risk. 44 | % 45 | % [ml-class] Changes Made: 46 | % 1) Function name and argument specifications 47 | % 2) Output display 48 | % 49 | 50 | % Read options 51 | if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter') 52 | length = options.MaxIter; 53 | else 54 | length = 100; 55 | end 56 | 57 | 58 | RHO = 0.01; % a bunch of constants for line searches 59 | SIG = 0.5; % RHO and SIG are the constants in the Wolfe-Powell conditions 60 | INT = 0.1; % don't reevaluate within 0.1 of the limit of the current bracket 61 | EXT = 3.0; % extrapolate maximum 3 times the current bracket 62 | MAX = 20; % max 20 function evaluations per line search 63 | RATIO = 100; % maximum allowed slope ratio 64 | 65 | argstr = ['feval(f, X']; % compose string used to call function 66 | for i = 1:(nargin - 3) 67 | argstr = [argstr, ',P', int2str(i)]; 68 | end 69 | argstr = [argstr, ')']; 70 | 71 | if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end 72 | S=['Iteration ']; 73 | 74 | i = 0; % zero the run length counter 75 | ls_failed = 0; % no previous line search has failed 76 | fX = []; 77 | [f1 df1] = eval(argstr); % get function value and gradient 78 | i = i + (length<0); % count epochs?! 79 | s = -df1; % search direction is steepest 80 | d1 = -s'*s; % this is the slope 81 | z1 = red/(1-d1); % initial step is red/(|s|+1) 82 | 83 | while i < abs(length) % while not finished 84 | i = i + (length>0); % count iterations?! 85 | 86 | X0 = X; f0 = f1; df0 = df1; % make a copy of current values 87 | X = X + z1*s; % begin line search 88 | [f2 df2] = eval(argstr); 89 | i = i + (length<0); % count epochs?! 90 | d2 = df2'*s; 91 | f3 = f1; d3 = d1; z3 = -z1; % initialize point 3 equal to point 1 92 | if length>0, M = MAX; else M = min(MAX, -length-i); end 93 | success = 0; limit = -1; % initialize quanteties 94 | while 1 95 | while ((f2 > f1+z1*RHO*d1) || (d2 > -SIG*d1)) && (M > 0) 96 | limit = z1; % tighten the bracket 97 | if f2 > f1 98 | z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3); % quadratic fit 99 | else 100 | A = 6*(f2-f3)/z3+3*(d2+d3); % cubic fit 101 | B = 3*(f3-f2)-z3*(d3+2*d2); 102 | z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A; % numerical error possible - ok! 103 | end 104 | if isnan(z2) || isinf(z2) 105 | z2 = z3/2; % if we had a numerical problem then bisect 106 | end 107 | z2 = max(min(z2, INT*z3),(1-INT)*z3); % don't accept too close to limits 108 | z1 = z1 + z2; % update the step 109 | X = X + z2*s; 110 | [f2 df2] = eval(argstr); 111 | M = M - 1; i = i + (length<0); % count epochs?! 112 | d2 = df2'*s; 113 | z3 = z3-z2; % z3 is now relative to the location of z2 114 | end 115 | if f2 > f1+z1*RHO*d1 || d2 > -SIG*d1 116 | break; % this is a failure 117 | elseif d2 > SIG*d1 118 | success = 1; break; % success 119 | elseif M == 0 120 | break; % failure 121 | end 122 | A = 6*(f2-f3)/z3+3*(d2+d3); % make cubic extrapolation 123 | B = 3*(f3-f2)-z3*(d3+2*d2); 124 | z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3)); % num. error possible - ok! 125 | if ~isreal(z2) || isnan(z2) || isinf(z2) || z2 < 0 % num prob or wrong sign? 126 | if limit < -0.5 % if we have no upper limit 127 | z2 = z1 * (EXT-1); % the extrapolate the maximum amount 128 | else 129 | z2 = (limit-z1)/2; % otherwise bisect 130 | end 131 | elseif (limit > -0.5) && (z2+z1 > limit) % extraplation beyond max? 132 | z2 = (limit-z1)/2; % bisect 133 | elseif (limit < -0.5) && (z2+z1 > z1*EXT) % extrapolation beyond limit 134 | z2 = z1*(EXT-1.0); % set to extrapolation limit 135 | elseif z2 < -z3*INT 136 | z2 = -z3*INT; 137 | elseif (limit > -0.5) && (z2 < (limit-z1)*(1.0-INT)) % too close to limit? 138 | z2 = (limit-z1)*(1.0-INT); 139 | end 140 | f3 = f2; d3 = d2; z3 = -z2; % set point 3 equal to point 2 141 | z1 = z1 + z2; X = X + z2*s; % update current estimates 142 | [f2 df2] = eval(argstr); 143 | M = M - 1; i = i + (length<0); % count epochs?! 144 | d2 = df2'*s; 145 | end % end of line search 146 | 147 | if success % if line search succeeded 148 | f1 = f2; fX = [fX' f1]'; 149 | fprintf('%s %4i | Cost: %4.6e\r', S, i, f1); 150 | s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2; % Polack-Ribiere direction 151 | tmp = df1; df1 = df2; df2 = tmp; % swap derivatives 152 | d2 = df1'*s; 153 | if d2 > 0 % new slope must be negative 154 | s = -df1; % otherwise use steepest direction 155 | d2 = -s'*s; 156 | end 157 | z1 = z1 * min(RATIO, d1/(d2-realmin)); % slope ratio but max RATIO 158 | d1 = d2; 159 | ls_failed = 0; % this line search did not fail 160 | else 161 | X = X0; f1 = f0; df1 = df0; % restore point from before failed line search 162 | if ls_failed || i > abs(length) % line search failed twice in a row 163 | break; % or we ran out of time, so we give up 164 | end 165 | tmp = df1; df1 = df2; df2 = tmp; % swap derivatives 166 | s = -df1; % try steepest 167 | d1 = -s'*s; 168 | z1 = 1/(1-d1); 169 | ls_failed = 1; % this line search failed 170 | end 171 | if exist('OCTAVE_VERSION') 172 | fflush(stdout); 173 | end 174 | end 175 | fprintf('\n'); 176 | -------------------------------------------------------------------------------- /Code/getAllFileNames.m: -------------------------------------------------------------------------------- 1 | function fileList = getAllFileNames(dirName) 2 | 3 | dirData = dir(dirName); % Get the data for the current directory 4 | dirIndex = [dirData.isdir]; % Find the index for directories 5 | fileList = {dirData(~dirIndex).name}'; % Get a list of the files 6 | if ~isempty(fileList) 7 | fileList = cellfun(@(x) fullfile(dirName,x),... % Prepend path to files 8 | fileList,'UniformOutput',false); 9 | end 10 | subDirs = {dirData(dirIndex).name}; % Get a list of the subdirectories 11 | validIndex = ~ismember(subDirs,{'.','..'}); % Find index of subdirectories 12 | % that are not '.' or '..' 13 | for iDir = find(validIndex) % Loop over valid subdirectories 14 | nextDir = fullfile(dirName,subDirs{iDir}); % Get the subdirectory path 15 | fileList = [fileList; getAllFileNames(nextDir)]; % Recursively call getAllFileNames 16 | end 17 | 18 | end 19 | -------------------------------------------------------------------------------- /Code/gradientDescent.m: -------------------------------------------------------------------------------- 1 | function [nn_params, J_history] = gradientDescent(nn_params, ... 2 | input_layer_size, ... 3 | hidden_layer_size, ... 4 | num_labels, ... 5 | X, y, lambda, alpha, MaxIter) 6 | %GRADIENTDESCENT Performs gradient descent to learn theta 7 | % [nn_params, J_history] = gradientDescent(nn_params, ... 8 | % input_layer_size, ... 9 | % hidden_layer_size, ... 10 | % num_labels, ... 11 | % X, y, lambda, alpha, MaxIter) updates theta by 12 | % taking num_iters gradient steps with learning rate alpha 13 | 14 | % Initialize some useful values 15 | J_history = zeros(MaxIter, 1); 16 | 17 | for iter = 1:MaxIter 18 | 19 | [J, grad] = nnCostFunction(nn_params, ... 20 | input_layer_size, ... 21 | hidden_layer_size, ... 22 | num_labels, ... 23 | X, y, lambda); 24 | % ====================== CODE ====================== 25 | % Instructions: Perform a single gradient step on the parameter vector 26 | % theta. 27 | % 28 | % Hint: While debugging, it can be useful to print out the values 29 | % of the cost function (computeCostMulti) and gradient here. 30 | % 31 | 32 | nn_params = nn_params - alpha*grad; 33 | 34 | % ============================================================ 35 | 36 | % Save the cost J in every iteration 37 | J_history(iter) = J; 38 | 39 | fprintf('Iteration \t | Cost: %4.6e\n', J); 40 | end 41 | 42 | end 43 | -------------------------------------------------------------------------------- /Code/loadMNISTImages.m: -------------------------------------------------------------------------------- 1 | function images = loadMNISTImages(filename) 2 | % loadMNISTImages returns a [number of MNIST images]x28x28 matrix containing 3 | % the raw MNIST images 4 | 5 | fp = fopen(filename, 'r', 'b'); 6 | if(fp == -1) 7 | error('Could not open file'); 8 | end 9 | 10 | header = fread(fp, 1, 'int32'); 11 | if header ~= 2051 12 | error('Invalid image file header'); 13 | end 14 | 15 | numImages = fread(fp, 1, 'int32'); 16 | numRows = fread(fp, 1, 'int32'); 17 | numCols = fread(fp, 1, 'int32'); 18 | 19 | images = fread(fp, inf, 'unsigned char'); 20 | images = reshape(images, numCols, numRows, numImages); 21 | images = permute(images,[2 1 3]); 22 | 23 | fclose(fp); 24 | 25 | % Reshape to #pixels x #examples 26 | images = reshape(images, size(images, 1) * size(images, 2), size(images, 3)); 27 | 28 | images = images'; 29 | 30 | % Convert to double and rescale to [0,1] 31 | images = double(images)/ 255; 32 | 33 | end -------------------------------------------------------------------------------- /Code/loadMNISTLabels.m: -------------------------------------------------------------------------------- 1 | function labels = loadMNISTLabels(filename) 2 | %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing 3 | %the labels for the MNIST images 4 | 5 | fp = fopen(filename, 'r', 'b'); 6 | if(fp == -1) 7 | error('Could not open file'); 8 | end 9 | 10 | header = fread(fp, 1, 'int32'); 11 | if header ~= 2049 12 | error('Invalid label file header'); 13 | end 14 | 15 | numLabels = fread(fp, 1, 'int32'); 16 | 17 | labels = fread(fp, inf, 'unsigned char'); 18 | 19 | if(size(labels,1) ~= numLabels) 20 | error('Mismatch in label count'); 21 | end 22 | 23 | fclose(fp); 24 | 25 | end -------------------------------------------------------------------------------- /Code/neuralNetwork.m: -------------------------------------------------------------------------------- 1 | function neuralNetwork(hidden_layer_size, num_train_images, num_test_images, MaxIter, reg_param) 2 | % LOADDATA Loads the dataset of MNIST 3 | % function neuralNetwork(hidden_layer_size, num_train_images, num_test_images, MaxIter, reg_param) 4 | % is used to run all the parts of our neural network. 5 | 6 | %% Machine Learning - Neural Network Learning & Prediction 7 | 8 | % Instructions 9 | % ------------ 10 | % 11 | % This file can run all the codes and programs of the neural network 12 | % without user intervention. After running each part of this code, the 13 | % function pauses and waits for the user to continue with the next part. 14 | % Currently this function requires the following created functions: 15 | 16 | % displayData.m -> Display 2D data in a nice grid 17 | % nnCostFunction.m -> Implements the neural network cost function for a two layer neural network which performs classification 18 | % sigmoidGradient.m -> returns the gradient of the sigmoid function 19 | % randInitializeWeights.m -> Randomly initialize the weights of a layer of neurons in the neural network 20 | % checkNNGradients.m -> Creates a small neural network to check the backpropagation gradients 21 | % validationCurveHoldout.m -> Generate the training and validation errors needed to plot a validation curve that we can use to select regularization parameter 22 | % fmincg.m -> a function which works similarly to "fminunc" 23 | % predict.m -> Predict the label of an input for a trained neural network 24 | 25 | %% Initialization 26 | clear ; close all; clc 27 | 28 | if ~exist('hidden_layer_size', 'var') || isempty(hidden_layer_size) 29 | hidden_layer_size = 25; 30 | end 31 | 32 | if ~exist('num_train_images', 'var') || isempty(num_train_images) 33 | num_train_images = 60000; 34 | end 35 | 36 | if ~exist('num_test_images', 'var') || isempty(num_test_images) 37 | num_test_images = 10000; 38 | end 39 | 40 | if ~exist('MaxIter', 'var') || isempty(MaxIter) 41 | MaxIter = 30; 42 | end 43 | 44 | if ~exist('reg_param', 'var') || isempty(reg_param) 45 | reg_param = 0.1; 46 | end 47 | 48 | %% Setup the parameters you will use for this exercise 49 | input_layer_size = 784; % 28x28 Input Images of Digits 50 | num_labels = 10; % 10 labels, from 1 to 10 51 | % (note that we have mapped "0" to label 10) 52 | 53 | %% =========== Part 1: Loading and Visualizing Data ============= 54 | % We start by first loading and visualizing the dataset. 55 | % We will be working with a dataset that contains handwritten digits. 56 | 57 | % Load Training Data 58 | fprintf('Loading and Visualizing Data ...\n') 59 | 60 | load('MNISTDataset'); 61 | X = trainingImages((1:num_train_images),:); 62 | y = trainingLabels((1:num_train_images),:); 63 | testX = testImages((1:num_test_images),:); 64 | testy = testLabels((1:num_test_images),:); 65 | 66 | m = size(X, 1); 67 | n = size(testX, 1); 68 | 69 | % Randomly select 100 data points to display 70 | sel = randperm(size(X, 1)); 71 | sel = sel(1:100); 72 | 73 | displayData(X(sel, :)); 74 | 75 | fprintf('Program paused. Press enter to continue.\n'); 76 | pause; 77 | 78 | %% ================ Part 2: Loading Parameters ================ 79 | % In this part, we load some pre-initialized 80 | % neural network parameters. 81 | 82 | % Initialize variables Theta1 and Theta2 83 | Theta1 = zeros(hidden_layer_size, input_layer_size + 1); 84 | Theta2 = zeros(num_labels, hidden_layer_size + 1); 85 | 86 | fprintf('\nLoading Saved Neural Network Parameters ...\n') 87 | 88 | % Load the weights into variables Theta1 and Theta2 89 | load('savedNeuralNetworkWeights.mat'); 90 | 91 | % Unroll parameters 92 | nn_params = [Theta1(:) ; Theta2(:)]; 93 | 94 | %% ================ Part 3: Compute Cost (Feedforward) ================ 95 | % To the neural network, we should first start by implementing the 96 | % feedforward part of the neural network that returns the cost only. We 97 | % should run the code in nnCostFunction.m to return cost. After 98 | % implementing the feedforward to compute the cost, we can verify that 99 | % our implementation is correct by verifying that we get the same cost 100 | % as for the fixed debugging parameters. 101 | % 102 | % We will implement the feedforward cost *without* regularization 103 | % first so that it will be easier for us to debug. Later, in part 4, we 104 | % will implement the regularized cost. 105 | % 106 | fprintf('\nFeedforward Using Neural Network ...\n') 107 | 108 | % Weight regularization parameter (we set this to 0 here). 109 | lambda = 0; 110 | 111 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... 112 | num_labels, X, y, lambda); 113 | 114 | fprintf(['Cost at parameters (loaded from savedNeuralNetworkWeights): %f '... 115 | '\n(this value should be about 0.153726)\n'], J); 116 | 117 | fprintf('\nProgram paused. Press enter to continue.\n'); 118 | pause; 119 | 120 | %% =============== Part 4: Implement Regularization =============== 121 | % Once our cost function implementation is correct, we should now 122 | % continue to implement the regularization with the cost. 123 | % 124 | 125 | fprintf('\nChecking Cost Function (w/ Regularization) ... \n') 126 | 127 | % Weight regularization parameter (we set this to 1 here). 128 | lambda = 1; 129 | 130 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... 131 | num_labels, X, y, lambda); 132 | 133 | fprintf(['Cost at parameters (loaded from savedNeuralNetworkWeights): %f '... 134 | '\n(this value should be about 0.192581)\n'], J); 135 | 136 | fprintf('Program paused. Press enter to continue.\n'); 137 | pause; 138 | 139 | %% ================ Part 5: Sigmoid Gradient ================ 140 | % Before we start implementing the neural network, we will first 141 | % implement the gradient for the sigmoid function. 142 | 143 | fprintf('\nEvaluating sigmoid gradient...\n') 144 | 145 | g = sigmoidGradient([-1 -0.5 0 0.5 1]); 146 | fprintf('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n '); 147 | fprintf('%f ', g); 148 | fprintf('\n\n'); 149 | 150 | fprintf('Program paused. Press enter to continue.\n'); 151 | pause; 152 | 153 | %% ================ Part 6: Initializing Parameters ================ 154 | % In this part, we will be starting to implement a two 155 | % layer neural network that classifies digits. We will start by 156 | % implementing a function to initialize the weights of the neural network 157 | % (randInitializeWeights.m) 158 | 159 | fprintf('\nInitializing Neural Network Parameters ...\n') 160 | 161 | initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size); 162 | initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels); 163 | 164 | % Unroll parameters 165 | initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)]; 166 | 167 | %% =============== Part 7: Implement Backpropagation =============== 168 | % Once our cost calculation is complete, we should implement the 169 | % backpropagation algorithm for the neural network. We should add to the 170 | % code we've written in nnCostFunction.m to return the partial 171 | % derivatives of the parameters. 172 | % 173 | fprintf('\nChecking Backpropagation... \n'); 174 | 175 | % Check gradients by running checkNNGradients 176 | checkNNGradients; 177 | 178 | fprintf('\nProgram paused. Press enter to continue.\n'); 179 | pause; 180 | 181 | %% =============== Part 8: Implement Regularization =============== 182 | % Once our backpropagation implementation is correct, we should now 183 | % continue to implement the regularization with the cost and gradient. 184 | % 185 | 186 | fprintf('\nChecking Backpropagation (w/ Regularization) ... \n') 187 | 188 | % Check gradients by running checkNNGradients 189 | lambda = 3; 190 | checkNNGradients(lambda); 191 | 192 | % Also output the costFunction debugging values 193 | debug_J = nnCostFunction(nn_params, input_layer_size, ... 194 | hidden_layer_size, num_labels, X, y, lambda); 195 | 196 | fprintf(['\n\nCost at (fixed) debugging parameters (w/ lambda = %f): %f ' ... 197 | '\n(for lambda = 3, this value should be about 0.270292)\n\n'], lambda, debug_J); 198 | 199 | fprintf('Program paused. Press enter to continue.\n'); 200 | pause; 201 | 202 | %% =========== Part 9: Validation for Selecting Lambda ============= 203 | % We will now implement validationCurve to test various values of 204 | % lambda on a validation set. We will then use this to select the 205 | % "best" lambda value. 206 | % 207 | 208 | % K-fold Cross Validation 209 | % num_folds = 6; 210 | % [lambda_vec, error_train, error_val] = validationCurve(input_layer_size, hidden_layer_size, ... 211 | % num_labels, X, y, MaxIter, num_folds); 212 | 213 | [lambda_vec, error_train, error_val] = validationCurveHoldout(input_layer_size, hidden_layer_size, ... 214 | num_labels, X, y, MaxIter, 0.3); 215 | 216 | 217 | close all; 218 | plot(lambda_vec, error_train, lambda_vec, error_val); 219 | legend('Train', 'Cross Validation'); 220 | xlabel('lambda'); 221 | ylabel('Error'); 222 | 223 | fprintf('lambda\t\tTrain Error\tValidation Error\n'); 224 | for i = 1:length(lambda_vec) 225 | fprintf(' %f\t%f\t%f\n', ... 226 | lambda_vec(i), error_train(i), error_val(i)); 227 | end 228 | 229 | fprintf('Program paused. Press enter to continue.\n'); 230 | pause; 231 | 232 | %% =================== Part 10: Training NN =================== 233 | % We have now implemented all the code necessary to train a neural 234 | % network. To train the neural network, we will now use "fmincg", which 235 | % is a function which works similarly to "fminunc". These 236 | % advanced optimizers are able to train our cost functions efficiently as 237 | % long as we provide them with the gradient computations. 238 | % 239 | fprintf('\nTraining Neural Network... \n') 240 | 241 | % Afterwards change the MaxIter to a larger 242 | % value to see how more training helps. 243 | options = optimset('MaxIter', MaxIter); 244 | 245 | % We should also try different values of lambda 246 | lambda = reg_param; 247 | 248 | % Create "short hand" for the cost function to be minimized 249 | costFunction = @(p) nnCostFunction(p, ... 250 | input_layer_size, ... 251 | hidden_layer_size, ... 252 | num_labels, X, y, lambda); 253 | 254 | % Now, costFunction is a function that takes in only one argument (the 255 | % neural network parameters) 256 | [nn_params, cost] = fmincg(costFunction, initial_nn_params, options); 257 | 258 | % Obtain Theta1 and Theta2 back from nn_params 259 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... 260 | hidden_layer_size, (input_layer_size + 1)); 261 | 262 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... 263 | num_labels, (hidden_layer_size + 1)); 264 | 265 | save('neuralNetworkWeights.mat','Theta1','Theta2') 266 | fprintf('Program paused. Press enter to continue.\n'); 267 | pause; 268 | 269 | 270 | %% ================= Part 11: Visualize Weights ================= 271 | % We can now "visualize" what the neural network is learning by 272 | % displaying the hidden units to see what features they are capturing in 273 | % the data. 274 | 275 | fprintf('\nVisualizing Neural Network... \n') 276 | 277 | % Switch on the below line afterwards 278 | displayData(Theta1(:, 2:end)); 279 | 280 | fprintf('\nProgram paused. Press enter to continue.\n'); 281 | pause; 282 | 283 | %% ================= Part 12: Implement Predict for Training Set ================= 284 | % After training the neural network, we would like to use it to predict 285 | % the labels. We will now implement the "predict" function to use the 286 | % neural network to predict the labels of the training set. This lets 287 | % us compute the training set accuracy. 288 | 289 | pred = predict(Theta1, Theta2, X); 290 | 291 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 292 | 293 | fprintf('Program paused. Press enter to continue.\n'); 294 | pause; 295 | 296 | % To give an idea of the network's output, we can also run 297 | % through the examples one at the a time to see what it is predicting. 298 | 299 | % Randomly permute examples 300 | rp = randperm(m); 301 | 302 | for i = 1:m 303 | % Display 304 | fprintf('\nDisplaying Example Image\n'); 305 | displayData(X(rp(i), :)); 306 | 307 | pred = predict(Theta1, Theta2, X(rp(i),:)); 308 | fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10)); 309 | 310 | % Pause with quit option 311 | s = input('Paused - press enter to continue, q to exit:','s'); 312 | if s == 'q' 313 | break 314 | end 315 | end 316 | 317 | fprintf('Program paused. Press enter to continue.\n'); 318 | pause; 319 | 320 | 321 | %% ================= Part 13: Implement Predict for Test Set ================= 322 | % Now we would like to use it to predict the test(or validation) labels. 323 | % We will now implement the "predict" function to use the neural network to 324 | % predict the labels of the test set. This lets us test the neural network 325 | % on images on which it is not trained. 326 | 327 | pred = predict(Theta1, Theta2, testX); 328 | 329 | fprintf('\nTest Set Accuracy: %f\n', mean(double(pred == testy)) * 100); 330 | 331 | fprintf('Program paused. Press enter to continue.\n'); 332 | pause; 333 | 334 | % To giv an idea of the network's output,we can also run 335 | % through the examples one at the a time to see what it is predicting. 336 | 337 | % Randomly permute examples 338 | rp = randperm(n); 339 | 340 | for i = 1:n 341 | % Display 342 | fprintf('\nDisplaying Example Image\n'); 343 | displayData(testX(rp(i), :)); 344 | 345 | pred = predict(Theta1, Theta2, testX(rp(i),:)); 346 | fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10)); 347 | 348 | % Pause with quit option 349 | s = input('Paused - press enter to continue, q to exit:','s'); 350 | if s == 'q' 351 | break 352 | end 353 | end 354 | 355 | end -------------------------------------------------------------------------------- /Code/neuralNetworkWeights - HU200_EP1000_R0.1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/neuralNetworkWeights - HU200_EP1000_R0.1.mat -------------------------------------------------------------------------------- /Code/neuralNetworkWeights-98.35.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/neuralNetworkWeights-98.35.mat -------------------------------------------------------------------------------- /Code/neuralNetworkWeights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/neuralNetworkWeights.mat -------------------------------------------------------------------------------- /Code/nnCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = nnCostFunction(nn_params, ... 2 | input_layer_size, ... 3 | hidden_layer_size, ... 4 | num_labels, ... 5 | X, y, lambda) 6 | %NNCOSTFUNCTION Implements the neural network cost function for a two layer 7 | %neural network which performs classification 8 | % [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ... 9 | % X, y, lambda) computes the cost and gradient of the neural network. The 10 | % parameters for the neural network are "unrolled" into the vector 11 | % nn_params and need to be converted back into the weight matrices. 12 | % 13 | % The returned parameter grad should be a "unrolled" vector of the 14 | % partial derivatives of the neural network. 15 | % 16 | 17 | % Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices 18 | % for our 2 layer neural network 19 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... 20 | hidden_layer_size, (input_layer_size + 1)); 21 | 22 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... 23 | num_labels, (hidden_layer_size + 1)); 24 | 25 | % Setup some useful variables 26 | m = size(X, 1); 27 | 28 | % We need to return the following variables correctly 29 | J = 0; 30 | Theta1_grad = zeros(size(Theta1)); 31 | Theta2_grad = zeros(size(Theta2)); 32 | 33 | % ====================== CODE ====================== 34 | % Instructions: We should complete the code by working through the 35 | % following parts. 36 | % 37 | % Part 1: Feedforward the neural network and return the cost in the 38 | % variable J. After implementing Part 1, we can verify that our 39 | % cost function computation is correct by verifying the cost 40 | % computed in neuralNetwork.m 41 | % 42 | % Part 2: Implement the backpropagation algorithm to compute the gradients 43 | % Theta1_grad and Theta2_grad. We should return the partial derivatives of 44 | % the cost function with respect to Theta1 and Theta2 in Theta1_grad and 45 | % Theta2_grad, respectively. After implementing Part 2, we can check 46 | % that our implementation is correct by running checkNNGradients 47 | % 48 | % Note: The vector y passed into the function is a vector of labels 49 | % containing values from 1..K. We need to map this vector into a 50 | % binary vector of 1's and 0's to be used with the neural network 51 | % cost function. 52 | % 53 | % Part 3: Implement regularization with the cost function and gradients. 54 | % 55 | % Hint: We can implement this around the code for 56 | % backpropagation. That is, we can compute the gradients for 57 | % the regularization separately and then add them to Theta1_grad 58 | % and Theta2_grad from Part 2. 59 | % 60 | 61 | % Part 1: Feedforward Propagation & Cost Function 62 | %-------------------------------------------------------------- 63 | X = [ones(m,1) X]; 64 | a1 = X; 65 | z2 = a1 * Theta1'; 66 | a2 = sigmoid(z2); 67 | a2 = [ones(m,1) a2]; 68 | a3 = sigmoid(a2 * Theta2'); 69 | 70 | yv = bsxfun(@eq, y, 1:num_labels); 71 | 72 | cost = -yv.*log(a3) - (1-yv).*log(1 - a3); 73 | 74 | J = (1/m) * sum(sum(cost,2)); 75 | 76 | reg = (lambda/(2*m)) * (sum(sum(Theta1(:,2:end).^2)) + sum(sum(Theta2(:,2:end).^2))); 77 | 78 | J = J + reg; 79 | 80 | % Part 2: Backpropagation Algorithm 81 | % ------------------------------------------------------------- 82 | delta3 = a3 - yv; 83 | 84 | delta3_X_Theta2 = delta3 * Theta2; 85 | delta2 = delta3_X_Theta2(:,2:end) .* sigmoidGradient(z2); 86 | 87 | % Accumulated Gradient for Theta1 88 | Delta1 = delta2' * a1; 89 | 90 | % Accumulated Gradient for Theta2 91 | Delta2 = delta3' * a2; 92 | 93 | % Gradient for Theta1 94 | Theta1_grad = Delta1 / m + lambda*[zeros(hidden_layer_size , 1) Theta1(:,2:end)] / m; 95 | 96 | % Gradient for Theta 2 97 | Theta2_grad = Delta2 / m + lambda*[zeros(num_labels , 1) Theta2(:,2:end)] / m; 98 | 99 | % ========================================================================= 100 | 101 | % Unroll gradients 102 | grad = [Theta1_grad(:) ; Theta2_grad(:)]; 103 | 104 | end 105 | -------------------------------------------------------------------------------- /Code/pre-processing of image.m: -------------------------------------------------------------------------------- 1 | %%the code for segmentation done in class by us.. 2 | 3 | clc;clear all; 4 | 5 | %%fetching file 6 | [fname path]=uigetfile('*.*','enter an image'); 7 | fname=strcat(path,fname); 8 | 9 | %reading into variable 10 | c=imread(fname); 11 | 12 | %2d conversion 13 | c=rgb2gray(c); 14 | 15 | %inverting image 16 | c=~c; 17 | 18 | %measuring area of digits 19 | se = strel('square',7); 20 | im_close = imclose(c, se); 21 | s = regionprops(im_close, 'BoundingBox'); 22 | 23 | %creating boxes around digits 24 | bb = round(reshape([s.BoundingBox], 4, []).'); 25 | figure; 26 | imshow(c); 27 | 28 | %extracting boxes with individual digits 29 | for idx = 1 : numel(s) 30 | rectangle('Position', bb(idx,:), 'edgecolor', 'red'); 31 | end 32 | 33 | %string the segmented digits into array 34 | num = cell(1, numel(s)); 35 | for idx = 1 : numel(s) 36 | num{idx} = c(bb(idx,2):bb(idx,2)+bb(idx,4)-1, bb(idx,1):bb(idx,1)+bb(idx,3)-1); 37 | end 38 | figure; 39 | imshow(num{5}); 40 | 41 | %writing a digit into a image file 42 | imwrite(mat2gray(num{5}),'test1.jpg'); 43 | I=imread('test1.jpg'); 44 | 45 | %binary conversion 46 | BW=imbinarize(I); 47 | figure; 48 | imshow(BW); 49 | 50 | %filling up spaces 51 | filled=imfill(BW,'holes'); 52 | figure; 53 | imshow(filled); 54 | 55 | 56 | 57 | %%%%% the indexes for the images are counted up and down. 58 | %%%%%%% i mean to say that in the diagram .. index of 2 is 1 ,, 59 | 60 | %%%% index of 5 is 3 ,, index of 8 is 4,, index of 3 is 2,, 61 | %%%%% index of 7 is 5...so on -------------------------------------------------------------------------------- /Code/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(Theta1, Theta2, X) 2 | %PREDICT Predict the label of an input given a trained neural network 3 | % p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the 4 | % trained weights of a neural network (Theta1, Theta2) 5 | 6 | % Useful values 7 | m = size(X, 1); 8 | num_labels = size(Theta2, 1); 9 | 10 | % We need to return the following variables correctly 11 | p = zeros(size(X, 1), 1); 12 | 13 | h1 = sigmoid([ones(m, 1) X] * Theta1'); 14 | h2 = sigmoid([ones(m, 1) h1] * Theta2'); 15 | [dummy, p] = max(h2, [], 2); 16 | 17 | % ========================================================================= 18 | 19 | 20 | end 21 | -------------------------------------------------------------------------------- /Code/predictExternalImage.m: -------------------------------------------------------------------------------- 1 | function p = predictExternalImage(imagefile) 2 | % PREDICTEXTERNALIMAGE Predicts what digit is contained in an external 3 | % image given a trained neural network 4 | % p = PREDICTEXTERNALIMAGE(imagefile) outputs the predicted label of an image 5 | 6 | %% Initialization 7 | 8 | if ~exist('imagefile', 'var') || isempty(imagefile) 9 | error('Pass an image file as an argument'); 10 | end 11 | 12 | fprintf('Displaying 28-by-28 grayscale image: %s ...\n', imagefile); 13 | 14 | % Find information about the image 15 | % info = imfinfo(imagefile); 16 | % disp(info.ColorType); 17 | 18 | % Read RGB image 19 | a = imread(imagefile); 20 | 21 | % Resize image to a 28*28 pixel image 22 | a = imresize(a,[28 28]); 23 | 24 | % Convert the RGB image to grayscale 25 | a = rgb2gray(a); 26 | 27 | % Display black image on white background 28 | % imshow(a);figure 29 | 30 | % Display white image on black background 31 | a = 255 - a; 32 | imshow(a); 33 | 34 | % Feature Scaling to match MNIST Data 35 | a = double(a)/255; 36 | 37 | % Convert to a row vector 38 | a = a(:)'; 39 | 40 | % Load .mat file containing Theta1 and Theta2 41 | load('neuralNetworkWeights.mat'); 42 | 43 | % Predict the label of the image using the 'predict' function 44 | p = predict(Theta1, Theta2, a); 45 | 46 | % Display the predicted digit on the console 47 | fprintf('Predicted digit: %.0f\n', mod(p,10)); 48 | end -------------------------------------------------------------------------------- /Code/predictPercentExternalImage.m: -------------------------------------------------------------------------------- 1 | function [percent, predict] = predictPercentExternalImage(imagefile) 2 | % PREDICTEXTERNALIMAGE Predicts what digit is contained in an external 3 | % image given a trained neural network 4 | % p = PREDICTEXTERNALIMAGE(imagefile) outputs the predicted label of an image 5 | 6 | %% Initialization 7 | 8 | if ~exist('imagefile', 'var') || isempty(imagefile) 9 | error('Pass an image file as an argument'); 10 | end 11 | 12 | fprintf('Displaying 28-by-28 grayscale image: %s ...\n', imagefile); 13 | 14 | % Find information about the image 15 | % info = imfinfo(imagefile); 16 | % disp(info.ColorType); 17 | 18 | % Read RGB image 19 | a = imread(imagefile); 20 | 21 | % Resize image to a 28*28 pixel image 22 | a = imresize(a,[28 28]); 23 | 24 | % Convert the RGB image to grayscale (if the image is not already in 25 | % grayscale) 26 | if(size(a,3)==3) 27 | a = rgb2gray(a); 28 | end 29 | 30 | % Display black image on white background 31 | % imshow(a);figure 32 | 33 | % Display white image on black background 34 | a = 255 - a; 35 | imshow(a); 36 | 37 | % Feature Scaling to match MNIST Data 38 | a = double(a)/255; 39 | 40 | % Convert to a row vector 41 | a = a(:)'; 42 | 43 | % Load .mat file containing Theta1 and Theta2 44 | load('neuralNetworkWeights.mat'); 45 | 46 | % Predict the label of the image using the 'predict' function 47 | [percent, predict] = predictPercentage(Theta1, Theta2, a); 48 | 49 | % Display the predicted digit on the console 50 | fprintf('Predicted digit: %.0f Probability: %.2f%%\n', mod(predict,10), (percent * 100)); 51 | end -------------------------------------------------------------------------------- /Code/predictPercentage.m: -------------------------------------------------------------------------------- 1 | function [percent, predict] = predictPercentage(Theta1, Theta2, X) 2 | %PREDICTPERCENTAGE Predict the label of an input and the probability given a trained neural network 3 | % p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X and its probability given the 4 | % trained weights of a neural network (Theta1, Theta2) 5 | 6 | % Useful values 7 | m = size(X, 1); 8 | 9 | % You need to return the following variables correctly 10 | predict = zeros(size(X, 1), 1); 11 | 12 | h1 = sigmoid([ones(m, 1) X] * Theta1'); 13 | h2 = sigmoid([ones(m, 1) h1] * Theta2'); 14 | [percent, predict] = max(h2, [], 2); 15 | end -------------------------------------------------------------------------------- /Code/randInitializeWeights.m: -------------------------------------------------------------------------------- 1 | function W = randInitializeWeights(L_in, L_out) 2 | %RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in 3 | %incoming connections and L_out outgoing connections 4 | % W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 5 | % of a layer with L_in incoming connections and L_out outgoing 6 | % connections. 7 | % 8 | % Note that W should be set to a matrix of size(L_out, 1 + L_in) as 9 | % the first column of W handles the "bias" terms 10 | % 11 | 12 | % We need to return the following variables correctly 13 | W = zeros(L_out, 1 + L_in); 14 | 15 | % ====================== CODE ====================== 16 | % Instructions: Initialize W randomly so that we break the symmetry while 17 | % training the neural network. 18 | % 19 | % Note: The first column of W corresponds to the parameters for the bias unit 20 | % 21 | 22 | 23 | % Randomly initialize the weights to small values 24 | epsilon_init = 0.09; 25 | W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init; 26 | 27 | % ========================================================================= 28 | 29 | end -------------------------------------------------------------------------------- /Code/savedNeuralNetworkWeights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/savedNeuralNetworkWeights.mat -------------------------------------------------------------------------------- /Code/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid functoon 3 | % J = SIGMOID(z) computes the sigmoid of z. 4 | 5 | g = 1.0 ./ (1.0 + exp(-z)); 6 | end 7 | -------------------------------------------------------------------------------- /Code/sigmoidGradient.m: -------------------------------------------------------------------------------- 1 | function g = sigmoidGradient(z) 2 | %SIGMOIDGRADIENT returns the gradient of the sigmoid function 3 | %evaluated at z 4 | % g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function 5 | % evaluated at z. This should work regardless if z is a matrix or a 6 | % vector. In particular, if z is a vector or matrix, we should return 7 | % the gradient for each element. 8 | 9 | g = zeros(size(z)); 10 | 11 | % ====================== CODE ====================== 12 | % Instructions: Compute the gradient of the sigmoid function evaluated at 13 | % each value of z (z can be a matrix, vector or scalar). 14 | 15 | 16 | g = sigmoid(z).*(1-sigmoid(z)); 17 | 18 | % ============================================================= 19 | 20 | end -------------------------------------------------------------------------------- /Code/t10k-images.idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/t10k-images.idx3-ubyte -------------------------------------------------------------------------------- /Code/t10k-labels.idx1-ubyte: -------------------------------------------------------------------------------- 1 | '                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             -------------------------------------------------------------------------------- /Code/train-images.idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/train-images.idx3-ubyte -------------------------------------------------------------------------------- /Code/train-labels.idx1-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Code/train-labels.idx1-ubyte -------------------------------------------------------------------------------- /Code/trainNeuralNetwork.m: -------------------------------------------------------------------------------- 1 | function [nn_params, cost] = trainNeuralNetwork(input_layer_size, ... 2 | hidden_layer_size, ... 3 | num_labels, X, y, MaxIter, reg_param) 4 | %TRAINLINEARREG Trains neural network given a dataset (X, y) and a 5 | %regularization parameter lambda 6 | % [nn_params, cost] = trainNeuralNetwork(input_layer_size, 7 | % hidden_layer_size, num_labels, X, y, MaxIter, reg_param) 8 | % trains the neural network using the dataset (X, y) and 9 | % regularization parameter lambda. Returns the 10 | % trained parameters theta. 11 | % 12 | 13 | fprintf('\nInitializing Neural Network Parameters ...\n') 14 | 15 | initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size); 16 | initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels); 17 | 18 | % Unroll parameters 19 | initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)]; 20 | 21 | options = optimset('MaxIter', MaxIter); 22 | 23 | % We should also try different values of lambda 24 | lambda = reg_param; 25 | 26 | % Create "short hand" for the cost function to be minimized 27 | costFunction = @(p) nnCostFunction(p, ... 28 | input_layer_size, ... 29 | hidden_layer_size, ... 30 | num_labels, X, y, lambda); 31 | 32 | % Now, costFunction is a function that takes in only one argument (the 33 | % neural network parameters) 34 | [nn_params, cost] = fmincg(costFunction, initial_nn_params, options); 35 | 36 | end 37 | -------------------------------------------------------------------------------- /Code/try_run.m: -------------------------------------------------------------------------------- 1 | clc; 2 | clear all; 3 | 4 | %%%%%%%%%%%%%%%%%%% displays a picture from the mnist data set 5 | data=load('MNIST_All(Dataset).mat'); 6 | b=data.train4(1520,:); 7 | b=reshape(b,28,28); 8 | imshow(b);figure; 9 | 10 | imwrite(mat2gray(b),'Name.jpg'); 11 | info = imfinfo('Name.jpg'); 12 | disp(info.ColorType); 13 | %c=imread('Name.jpg'); 14 | % figure; 15 | 16 | 17 | %%%%%%%%%%%%%%%%%%% displays an external scanned image 18 | basename='nine'; 19 | imagefile=[basename '.jpg']; 20 | info = imfinfo(imagefile); 21 | disp(info.ColorType); 22 | a=imread(imagefile); 23 | a=imresize(a,[28 28]); 24 | a=rgb2gray(a); 25 | imshow(a);figure %%//normal display 26 | c=reshape(a,1,size(a,1)*size(a,2)); 27 | for i=1:784 28 | c(1,i)=255-c(1,i); 29 | end 30 | c=reshape(c,28,28); 31 | imshow(c); %%//after subtracting 255.. 32 | 33 | %%but what to do if image is black and white???? 34 | %%we can use if else 35 | %%relace this code with the second one above to check if working properly. 36 | -------------------------------------------------------------------------------- /Code/validationCurve.m: -------------------------------------------------------------------------------- 1 | function [lambda_vec, error_train, error_val] = ... 2 | validationCurve(input_layer_size, hidden_layer_size, ... 3 | num_labels, X, y, MaxIter, num_folds) 4 | %VALIDATIONCURVE Generate the train and validation errors needed to 5 | %plot a validation curve that we can use to select lambda 6 | % [lambda_vec, error_train, error_val] = ... 7 | % VALIDATIONCURVE(input_layer_size, hidden_layer_size, ... 8 | % num_labels, X, y, MaxIter, num_folds) returns the train 9 | % and validation errors (in error_train, error_val) 10 | % for different values of lambda. 11 | % 12 | 13 | % Number of images in X 14 | m = size(X, 1); 15 | 16 | % Number of images in one fold 17 | one_fold = m/num_folds; 18 | 19 | % Selected values of lambda (we should not change this) 20 | lambda_vec = [0.1 0.3 1 3]'; 21 | 22 | % We need to return these variables correctly. 23 | error_train = zeros(length(lambda_vec), 1); 24 | error_val = zeros(length(lambda_vec), 1); 25 | 26 | % ====================== CODE ====================== 27 | % Instructions: This function will return training errors in 28 | % error_train and the validation errors in error_val. The 29 | % vector lambda_vec contains the different lambda parameters 30 | % to use for each calculation of the errors, i.e, 31 | % error_train(i), and error_val(i) should give 32 | % us the errors obtained after training with 33 | % lambda = lambda_vec(i) 34 | % 35 | % Note: We can loop over lambda_vec with the following: 36 | % 37 | % for i = 1:length(lambda_vec) 38 | % lambda = lambda_vec(i); 39 | % % Compute train / val errors when training neural 40 | % % network with regularization parameter lambda 41 | % % We should store the result in error_train(i) 42 | % % and error_val(i) 43 | % .... 44 | % 45 | % end 46 | % 47 | % 48 | 49 | for i = 1:length(lambda_vec) 50 | lambda = lambda_vec(i); 51 | for j = 1:num_folds 52 | a = (j - 1) * one_fold + 1; 53 | b = j * one_fold; 54 | Xval = X(a:b, :); 55 | yval = y(a:b, :); 56 | Xtrain = [X(1:(a-1), :); X((b+1):m, :)]; 57 | ytrain = [y(1:(a-1), :); y((b+1):m, :)]; 58 | [nn_params] = trainNeuralNetwork(input_layer_size, ... 59 | hidden_layer_size, ... 60 | num_labels, Xtrain, ytrain, MaxIter, lambda); 61 | 62 | error_train(i) = error_train(i) + nnCostFunction(nn_params, ... 63 | input_layer_size, ... 64 | hidden_layer_size, ... 65 | num_labels, ... 66 | Xtrain, ytrain, 0); 67 | 68 | error_val(i)= error_val(i) + nnCostFunction(nn_params, ... 69 | input_layer_size, ... 70 | hidden_layer_size, ... 71 | num_labels, ... 72 | Xval, yval, 0); 73 | end 74 | 75 | error_train = error_train/num_folds; 76 | error_val = error_val/num_folds; 77 | 78 | end 79 | % ========================================================================= 80 | 81 | end 82 | -------------------------------------------------------------------------------- /Code/validationCurveHoldout.m: -------------------------------------------------------------------------------- 1 | function [lambda_vec, error_train, error_val] = ... 2 | validationCurveHoldout(input_layer_size, hidden_layer_size, ... 3 | num_labels, X, y, MaxIter, validation_to_all_ratio) 4 | %VALIDATIONCURVEHOLDOUT Generate the train and validation errors needed to 5 | %plot a validation curve that we can use to select lambda 6 | % [lambda_vec, error_train, error_val] = ... 7 | % VALIDATIONCURVE(input_layer_size, hidden_layer_size, ... 8 | % num_labels, X, y, MaxIter, validation_to_all_ratio) returns the train 9 | % and validation errors (in error_train, error_val) 10 | % for different values of lambda. 11 | % 12 | 13 | % Number of images in validation set 14 | m = size(X, 1); 15 | m_val = m * validation_to_all_ratio; 16 | 17 | % Create the validation set and training set 18 | Xval = X(1:m_val, :); 19 | yval = y(1:m_val, :); 20 | Xtrain = X(m_val + 1:end, :); 21 | ytrain = y(m_val + 1:end, :); 22 | 23 | % Selected values of lambda (we should not change this) 24 | lambda_vec = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3 10]'; 25 | 26 | % We need to return these variables correctly. 27 | error_train = zeros(length(lambda_vec), 1); 28 | error_val = zeros(length(lambda_vec), 1); 29 | 30 | % ====================== CODE ====================== 31 | % Instructions: This function will to return training errors in 32 | % error_train and the validation errors in error_val. The 33 | % vector lambda_vec contains the different lambda parameters 34 | % to use for each calculation of the errors, i.e, 35 | % error_train(i), and error_val(i) should give 36 | % us the errors obtained after training with 37 | % lambda = lambda_vec(i) 38 | % 39 | % Note: We can loop over lambda_vec with the following: 40 | % 41 | % for i = 1:length(lambda_vec) 42 | % lambda = lambda_vec(i); 43 | % % Compute train / val errors when training neural 44 | % % network with regularization parameter lambda 45 | % % We should store the result in error_train(i) 46 | % % and error_val(i) 47 | % .... 48 | % 49 | % end 50 | % 51 | % 52 | 53 | for i = 1:length(lambda_vec) 54 | lambda = lambda_vec(i); 55 | [nn_params] = trainNeuralNetwork(input_layer_size, ... 56 | hidden_layer_size, ... 57 | num_labels, Xtrain, ytrain, MaxIter, lambda); 58 | 59 | error_train(i) = nnCostFunction(nn_params, input_layer_size, ... 60 | hidden_layer_size, num_labels, ... 61 | Xtrain, ytrain, 0); 62 | 63 | error_val(i)= nnCostFunction(nn_params, input_layer_size, ... 64 | hidden_layer_size, num_labels, ... 65 | Xval, yval, 0); 66 | end 67 | % ========================================================================= 68 | 69 | end 70 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jishnu Dey 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # B.Tech Final Year Project - Handwritten Digits Recognition Using Neural Networks 2 | # Duration: Oct 2016 – Jun 2017 3 | 4 | The purpose of this project is to take handwritten digits as input, process the digits, train the neural network algorithm with the processed data, to recognize the pattern and successfully identify the test digits. The popular MNIST dataset is used for the training and testing purposes. The IDE used is MATLAB 5 | 6 | # Procedure in short 7 | This project presents a study on recognition of handwritten digits (from 0 to 9) using neural networks from the popular MNIST dataset. I built a two-layer neural network (one hidden layer) and tried different techniques and observed the training set accuracy and test set accuracy. The final network of connections and weights obtained by using 200 hidden neurons and training for 1000 iterations using the 'fmincg' optimization algorithm and regularisation parameter = 0.1, I achieved 98.29% test set accuracy, ie. Test Set Error Percent = 1.71, which is on par with the accuracies achieved using similar classifiers by LeCun and Simard. Other than the 'fmincg' function, I have not used any prebuilt functions. My model: 2-layer NN, 200 HU, Cross-Entropy Loss (without any preprocessing). 8 | 9 | # Test Set Accuracy achieved with various parameters on the MNIST dataset 10 | 11 | 12 | Number of hidden neurons | Number of iterations (epochs) | Training Set Accuracy (%) | Test Set Accuracy (%) 13 | ---------- | ---------- | ---------- | ---------- 14 | 25 | 30 | 89.9533 | 90.18 15 | 50 | 30 | 91.54 | 91.57 16 | 100 | 30 | 90.29 | 90.94 17 | 200 | 100 | 96.261667 | 96.07 18 | 200 | 1000 | 100 | __98.29__ 19 | --- 20 | We achieved maximum accuracy of 98.29% (error = 1.71%) on the test set of the MNIST dataset without using any ready-made toolkit. 21 | 22 | # Code Info (or How to run the code) 23 | #### Start with the [_neuralNetwork.m_](https://github.com/deyjishnu/digit-recognition/blob/master/Code/neuralNetwork.m) code in the [_Code_](https://github.com/deyjishnu/digit-recognition/blob/master/Code) folder. This neuralNetwork.m uses many functions, many of which are defined in the other .m files in the _Code_ folder. Adequate comments are provided in the right places for understanding the code. 24 | 25 | * [_neuralNetwork.m_](https://github.com/deyjishnu/digit-recognition/blob/master/Code/neuralNetwork.m) -> Main code 26 | 27 | * _.idx3-ubyte_ -> Contains the original training images and labels 28 | 29 | * _[loadMNISTImages.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/loadMNISTImages.m), [loadMNISTLabels.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/loadMNISTLabels.m)_ -> Loads the training images and their labels from the original idx3-ubyte files 30 | 31 | * _[displayData.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/displayData.m)_ -> Displays 2D data in a nice grid 32 | 33 | * _[nnCostFunction.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/nnCostFunction.m)_ -> Implements the neural network cost function for a two layer neural network which performs classification 34 | 35 | * _[sigmoidGradient.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/sigmoidGradient.m)_ -> Returns the gradient of the sigmoid function 36 | 37 | * _[sigmoid.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/sigmoid.m)_ -> Computes sigmoid function 38 | 39 | * _[randInitializeWeights.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/randInitializeWeights.m)_ -> Randomly initialize the weights of a layer of neurons in the neural network 40 | 41 | * _[checkNNGradients.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/checkNNGradients.m)_ -> Creates a small neural network to check the backpropagation gradients 42 | 43 | * _[validationCurveHoldout.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/validationCurveHoldout.m)_ -> Generates the training and validation errors needed to plot a validation curve that we can use to select regularization parameter 44 | 45 | * _[fmincg.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/fmincg.m)_ -> a function which works similarly to "fminunc" 46 | 47 | * _[predict.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/predict.m)_ -> Predicts the label of an input given a trained neural network 48 | 49 | * _[predictExternalImage.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/predictExternalImage.m)_ -> Predicts what digit is contained in an external image for a trained neural network 50 | 51 | * _[predictPercentExternalImage.m](https://github.com/deyjishnu/digit-recognition/blob/master/Code/predictPercentExternalImage.m)_ -> Predicts what digit is contained in an external image, along with the probability given a trained neural network 52 | 53 | * _neuralNetworkWeights - HU200_EP1000_R0.1.mat, neuralNetworkWeights-98.35.mat, savedNeuralNetworkWeights.mat_ -> Saved Neural Network weights 54 | 55 | # Sample Images 56 | Image sets present inside the [_Sample Images_](https://github.com/deyjishnu/digit-recognition/tree/master/Sample%20Images) folder are created using Microsoft Paint. These are 28x28 pixel images of digits 0-9 to mimic the MNIST dataset. These images are used for external image recognition. 57 | 58 | # Where to learn the Basics and More 59 | We have a detailed guide for the project in [_A layman's guide to the project_](https://github.com/deyjishnu/digit-recognition/blob/master/A%20layman's%20guide%20to%20the%20project.pdf). 60 | Two research papers which helped us: 61 | 1. [Handwritten Digit Recognition with a Back-Propagation Network](https://papers.nips.cc/paper/293-handwritten-digit-recognition-with-a-back-propagation-network) by LeCun et al 62 | 2. [A Set of Features Extraction Methods for the Recognition of the Isolated Handwritten Digits](https://www.researchgate.net/profile/Salim_Ouchtati/publication/271908379_A_Set_of_Features_Extraction_Methods_for_the_Recognition_of_the_Isolated_Handwritten_Digits/links/57ec4a4d08aebb1961ffa8f5/A-Set-of-Features-Extraction-Methods-for-the-Recognition-of-the-Isolated-Handwritten-Digits.pdf) by _Ouchtati, S and Redjimi, M and Bedda, M_ 63 | 64 | If you want to learn what neural networks are and how to start with Matlab, you can start with this popular [course](https://www.coursera.org/learn/machine-learning) by Andrew Ng. 65 | 66 | # Acknowledgement 67 | 68 | Effective noise removal from external images is a very complex process and 69 | requires in-depth knowledge in this domain. And without good noise 70 | removal, it is impossible to achieve good success rate in detecting 71 | digits from external images. 72 | 73 | As we didn’t prioritise on processing of external images over getting good results on test data set, the results on external images is not good and is inconsistent. 74 | 75 | The segmentation algorithm can also be vastly improved to identify individual 76 | objects from all types of images, without false positives. 77 | -------------------------------------------------------------------------------- /Sample Images/First Set/Eight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/First Set/Eight.png -------------------------------------------------------------------------------- /Sample Images/First Set/Five.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/First Set/Five.png -------------------------------------------------------------------------------- /Sample Images/First Set/Four.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/First Set/Four.png -------------------------------------------------------------------------------- /Sample Images/First Set/Nine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/First Set/Nine.png -------------------------------------------------------------------------------- /Sample Images/First Set/One.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/First Set/One.png -------------------------------------------------------------------------------- /Sample Images/First Set/Seven.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/First Set/Seven.png -------------------------------------------------------------------------------- /Sample Images/First Set/Six.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/First Set/Six.png -------------------------------------------------------------------------------- /Sample Images/First Set/Three.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/First Set/Three.png -------------------------------------------------------------------------------- /Sample Images/First Set/Two.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/First Set/Two.png -------------------------------------------------------------------------------- /Sample Images/First Set/Zero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/First Set/Zero.png -------------------------------------------------------------------------------- /Sample Images/Sample Test/Eight.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Sample Test/Eight.jpg -------------------------------------------------------------------------------- /Sample Images/Sample Test/Five.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Sample Test/Five.png -------------------------------------------------------------------------------- /Sample Images/Sample Test/Four.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Sample Test/Four.jpg -------------------------------------------------------------------------------- /Sample Images/Sample Test/Nine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Sample Test/Nine.png -------------------------------------------------------------------------------- /Sample Images/Sample Test/One.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Sample Test/One.png -------------------------------------------------------------------------------- /Sample Images/Sample Test/Seven.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Sample Test/Seven.png -------------------------------------------------------------------------------- /Sample Images/Sample Test/Six.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Sample Test/Six.png -------------------------------------------------------------------------------- /Sample Images/Sample Test/Three.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Sample Test/Three.png -------------------------------------------------------------------------------- /Sample Images/Sample Test/Two.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Sample Test/Two.jpg -------------------------------------------------------------------------------- /Sample Images/Sample Test/Zero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Sample Test/Zero.png -------------------------------------------------------------------------------- /Sample Images/Sample Test/info.txt: -------------------------------------------------------------------------------- 1 | Zero.png 0 --> 99.97 2 | One.png 1 --> 61.18 3 | Two(3).jpg 2 --> 46.25 4 | Three(2).png 9 --> 1.89 5 | Four(3).jpg 4 --> 99.04 6 | Five.png 5 --> 74.87 7 | Six(2).png 6 --> 99.46 8 | Seven.png 7 --> 99.40 9 | Eight(3).jpg 8 --> 89.20 10 | Nine.png 7 --> 39.29 -------------------------------------------------------------------------------- /Sample Images/Second Set/Eight(2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Eight(2).png -------------------------------------------------------------------------------- /Sample Images/Second Set/Five(2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Five(2).png -------------------------------------------------------------------------------- /Sample Images/Second Set/Five(2.1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Five(2.1).png -------------------------------------------------------------------------------- /Sample Images/Second Set/Four(2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Four(2).png -------------------------------------------------------------------------------- /Sample Images/Second Set/Nine(2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Nine(2).png -------------------------------------------------------------------------------- /Sample Images/Second Set/One(2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/One(2).png -------------------------------------------------------------------------------- /Sample Images/Second Set/Seven(2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Seven(2).png -------------------------------------------------------------------------------- /Sample Images/Second Set/Six(2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Six(2).png -------------------------------------------------------------------------------- /Sample Images/Second Set/Six(2.1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Six(2.1).png -------------------------------------------------------------------------------- /Sample Images/Second Set/Three(2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Three(2).png -------------------------------------------------------------------------------- /Sample Images/Second Set/Two(2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Two(2).png -------------------------------------------------------------------------------- /Sample Images/Second Set/Zero(2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Second Set/Zero(2).png -------------------------------------------------------------------------------- /Sample Images/Third Set/Eight(3).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Third Set/Eight(3).jpg -------------------------------------------------------------------------------- /Sample Images/Third Set/Five(3).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Third Set/Five(3).jpg -------------------------------------------------------------------------------- /Sample Images/Third Set/Four(3).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Third Set/Four(3).jpg -------------------------------------------------------------------------------- /Sample Images/Third Set/Nine(3).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Third Set/Nine(3).jpg -------------------------------------------------------------------------------- /Sample Images/Third Set/One(3).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Third Set/One(3).jpg -------------------------------------------------------------------------------- /Sample Images/Third Set/Seven(3).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Third Set/Seven(3).jpg -------------------------------------------------------------------------------- /Sample Images/Third Set/Six(3).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Third Set/Six(3).jpg -------------------------------------------------------------------------------- /Sample Images/Third Set/Three(3).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Third Set/Three(3).jpg -------------------------------------------------------------------------------- /Sample Images/Third Set/Two(3).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Third Set/Two(3).jpg -------------------------------------------------------------------------------- /Sample Images/Third Set/Zero(3).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyjishnu/digit-recognition/d872ac5d7f3356fc93f6c428dbee234c180de0ae/Sample Images/Third Set/Zero(3).jpg --------------------------------------------------------------------------------