├── Lectures
    ├── Lecture1.pdf
    ├── Lecture10.pdf
    ├── Lecture11.pdf
    ├── Lecture12.pdf
    ├── Lecture13.pdf
    ├── Lecture14.pdf
    ├── Lecture15.pdf
    ├── Lecture16.pdf
    ├── Lecture17.pdf
    ├── Lecture18.pdf
    ├── Lecture2.pdf
    ├── Lecture3.pdf
    ├── Lecture4.pdf
    ├── Lecture6.pdf
    ├── Lecture7.pdf
    ├── Lecture8.pdf
    └── Lecture9.pdf
├── README.rst
├── ex1.pdf
├── ex1
    ├── computeCost.m
    ├── computeCostMulti.m
    ├── ex1.m
    ├── ex1_multi.m
    ├── ex1data1.txt
    ├── ex1data2.txt
    ├── featureNormalize.m
    ├── gradientDescent.m
    ├── gradientDescentMulti.m
    ├── normalEqn.m
    ├── plotData.m
    ├── submit.m
    └── warmUpExercise.m
├── ex2.pdf
├── ex2
    ├── costFunction.m
    ├── costFunctionReg.m
    ├── ex2.m
    ├── ex2_reg.m
    ├── ex2data1.txt
    ├── ex2data2.txt
    ├── mapFeature.m
    ├── plotData.m
    ├── plotDecisionBoundary.m
    ├── predict.m
    ├── sigmoid.m
    ├── submit.m
    └── submitWeb.m
├── ex3.pdf
├── ex3
    ├── displayData.m
    ├── ex3.m
    ├── ex3_nn.m
    ├── ex3data1.mat
    ├── ex3weights.mat
    ├── fmincg.m
    ├── lrCostFunction.m
    ├── oneVsAll.m
    ├── predict.m
    ├── predictOneVsAll.m
    ├── sigmoid.m
    ├── submit.m
    └── submitWeb.m
├── ex4.pdf
├── ex4
    ├── checkNNGradients.m
    ├── computeNumericalGradient.m
    ├── debugInitializeWeights.m
    ├── displayData.m
    ├── ex4.m
    ├── ex4data1.mat
    ├── ex4weights.mat
    ├── fmincg.m
    ├── nnCostFunction.m
    ├── predict.m
    ├── randInitializeWeights.m
    ├── sigmoid.m
    ├── sigmoidGradient.m
    ├── submit.m
    └── submitWeb.m
├── ex5.pdf
├── ex5
    ├── ex5.m
    ├── ex5data1.mat
    ├── featureNormalize.m
    ├── fmincg.m
    ├── learningCurve.m
    ├── linearRegCostFunction.m
    ├── plotFit.m
    ├── polyFeatures.m
    ├── submit.m
    ├── submitWeb.m
    ├── trainLinearReg.m
    └── validationCurve.m
├── ex6.pdf
├── ex6
    ├── dataset3Params.m
    ├── emailFeatures.m
    ├── emailSample1.txt
    ├── emailSample2.txt
    ├── ex6.m
    ├── ex6_spam.m
    ├── ex6data1.mat
    ├── ex6data2.mat
    ├── ex6data3.mat
    ├── gaussianKernel.m
    ├── getVocabList.m
    ├── linearKernel.m
    ├── plotData.m
    ├── porterStemmer.m
    ├── processEmail.m
    ├── readFile.m
    ├── spamSample1.txt
    ├── spamSample2.txt
    ├── spamTest.mat
    ├── spamTrain.mat
    ├── submit.m
    ├── submitWeb.m
    ├── svmPredict.m
    ├── svmTrain.m
    ├── visualizeBoundary.m
    ├── visualizeBoundaryLinear.m
    └── vocab.txt
├── ex7.pdf
├── ex7
    ├── bird_small.mat
    ├── bird_small.png
    ├── computeCentroids.m
    ├── displayData.m
    ├── drawLine.m
    ├── ex7.m
    ├── ex7_pca.m
    ├── ex7data1.mat
    ├── ex7data2.mat
    ├── ex7faces.mat
    ├── featureNormalize.m
    ├── findClosestCentroids.m
    ├── kMeansInitCentroids.m
    ├── pca.m
    ├── plotDataPoints.m
    ├── plotProgresskMeans.m
    ├── projectData.m
    ├── recoverData.m
    ├── runkMeans.m
    ├── submit.m
    └── submitWeb.m
├── ex8.pdf
├── ex8
    ├── checkCostFunction.m
    ├── cofiCostFunc.m
    ├── computeNumericalGradient.m
    ├── estimateGaussian.m
    ├── ex8.m
    ├── ex8_cofi.m
    ├── ex8_movieParams.mat
    ├── ex8_movies.mat
    ├── ex8data1.mat
    ├── ex8data2.mat
    ├── fmincg.m
    ├── loadMovieList.m
    ├── movie_ids.txt
    ├── multivariateGaussian.m
    ├── normalizeRatings.m
    ├── selectThreshold.m
    ├── submit.m
    ├── submitWeb.m
    └── visualizeFit.m
└── octave_tutorial.m


/Lectures/Lecture1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture1.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture10.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture11.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture11.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture12.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture13.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture13.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture14.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture14.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture15.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture15.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture16.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture16.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture17.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture17.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture18.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture18.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture2.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture3.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture4.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture6.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture7.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture8.pdf


--------------------------------------------------------------------------------
/Lectures/Lecture9.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture9.pdf


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Introduction
 2 | ============
 3 | 
 4 | There are my solutions for Programming Exercises from Machine Learning Stanford classes. Made in `GNU Octave`_.
 5 | 
 6 | .. _GNU Octave: https://www.gnu.org/software/octave/
 7 | 
 8 | Honor Code
 9 | ==========
10 | 
11 | This is quote from `Course Info`_ page:
12 | 
13 |     For the programming exercises, you are welcome to discuss them with other
14 |     students, discuss specific algorithms, properties of algorithms, etc.; we
15 |     ask only that you not look at any source code written by a different
16 |     student, nor show your solution code to other students.
17 | 
18 |     -- Professor Andrew Ng & The ml-class Team
19 | 
20 | .. _Course Info: http://www.ml-class.org/course/resources/index?page=course-info
21 | 


--------------------------------------------------------------------------------
/ex1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex1.pdf


--------------------------------------------------------------------------------
/ex1/computeCost.m:
--------------------------------------------------------------------------------
 1 | function J = computeCost(X, y, theta)
 2 | %COMPUTECOST Compute cost for linear regression
 3 | %   J = COMPUTECOST(X, y, theta) computes the cost of using theta as the
 4 | %   parameter for linear regression to fit the data points in X and y
 5 | 
 6 | % Initialize some useful values
 7 | m = length(y); % number of training examples
 8 | 
 9 | % You need to return the following variables correctly 
10 | J = 0;
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | % Instructions: Compute the cost of a particular choice of theta
14 | %               You should set J to the cost.
15 | 
16 | % This is vectorized version of J's computation
17 | % Note that we are using element-wise square ( .^ ) instead of matrix
18 | % multiplicationi ( ^ ).
19 | J = 1/(2*m) * sum((X*theta - y) .^ 2);
20 | 
21 | % =========================================================================
22 | 
23 | end
24 | 


--------------------------------------------------------------------------------
/ex1/computeCostMulti.m:
--------------------------------------------------------------------------------
1 | function J = computeCostMulti(X, y, theta)
2 | %COMPUTECOSTMULTI Compute cost for linear regression with multiple variables
3 | %   J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the
4 | %   parameter for linear regression to fit the data points in X and y
5 | 
6 | J = computeCost(X, y, theta);
7 | 
8 | end
9 | 


--------------------------------------------------------------------------------
/ex1/ex1.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class - Exercise 1: Linear Regression
  3 | 
  4 | %  Instructions
  5 | %  ------------
  6 | % 
  7 | %  This file contains code that helps you get started on the
  8 | %  linear exercise. You will need to complete the following functions 
  9 | %  in this exericse:
 10 | %
 11 | %     warmUpExercise.m
 12 | %     plotData.m
 13 | %     gradientDescent.m
 14 | %     computeCost.m
 15 | %     gradientDescentMulti.m
 16 | %     computeCostMulti.m
 17 | %     featureNormalize.m
 18 | %     normalEqn.m
 19 | %
 20 | %  For this exercise, you will not need to change any code in this file,
 21 | %  or any other files other than those mentioned above.
 22 | %
 23 | % x refers to the population size in 10,000s
 24 | % y refers to the profit in $10,000s
 25 | %
 26 | 
 27 | %% Initialization
 28 | clear all; close all; clc
 29 | 
 30 | %% ==================== Part 1: Basic Function ====================
 31 | % Complete warmUpExercise.m 
 32 | fprintf('Running warmUpExercise ... \n');
 33 | fprintf('5x5 Identity Matrix: \n');
 34 | warmUpExercise()
 35 | 
 36 | fprintf('Program paused. Press enter to continue.\n');
 37 | pause;
 38 | 
 39 | 
 40 | %% ======================= Part 2: Plotting =======================
 41 | fprintf('Plotting Data ...\n')
 42 | data = csvread('ex1data1.txt');
 43 | X = data(:, 1); y = data(:, 2);
 44 | m = length(y); % number of training examples
 45 | 
 46 | % Plot Data
 47 | % Note: You have to complete the code in plotData.m
 48 | plotData(X, y);
 49 | 
 50 | fprintf('Program paused. Press enter to continue.\n');
 51 | pause;
 52 | 
 53 | %% =================== Part 3: Gradient descent ===================
 54 | fprintf('Running Gradient Descent ...\n')
 55 | 
 56 | X = [ones(m, 1), data(:,1)]; % Add a column of ones to x
 57 | theta = zeros(2, 1); % initialize fitting parameters
 58 | 
 59 | % Some gradient descent settings
 60 | iterations = 1500;
 61 | alpha = 0.01;
 62 | 
 63 | % compute and display initial cost
 64 | computeCost(X, y, theta)
 65 | 
 66 | % run gradient descent
 67 | theta = gradientDescent(X, y, theta, alpha, iterations);
 68 | 
 69 | % print theta to screen
 70 | fprintf('Theta found by gradient descent: ');
 71 | fprintf('%f %f \n', theta(1), theta(2));
 72 | 
 73 | % Plot the linear fit
 74 | hold on; % keep previous plot visible
 75 | plot(X(:,2), X*theta, '-')
 76 | legend('Training data', 'Linear regression')
 77 | hold off % don't overlay any more plots on this figure
 78 | 
 79 | % Predict values for population sizes of 35,000 and 70,000
 80 | predict1 = [1, 3.5] *theta;
 81 | fprintf('For population = 35,000, we predict a profit of %f\n',...
 82 |     predict1*10000);
 83 | predict2 = [1, 7] * theta;
 84 | fprintf('For population = 70,000, we predict a profit of %f\n',...
 85 |     predict2*10000);
 86 | 
 87 | fprintf('Program paused. Press enter to continue.\n');
 88 | pause;
 89 | 
 90 | %% ============= Part 4: Visualizing J(theta_0, theta_1) =============
 91 | fprintf('Visualizing J(theta_0, theta_1) ...\n')
 92 | 
 93 | % Grid over which we will calculate J
 94 | theta0_vals = linspace(-10, 10, 100);
 95 | theta1_vals = linspace(-1, 4, 100);
 96 | 
 97 | % initialize J_vals to a matrix of 0's
 98 | J_vals = zeros(length(theta0_vals), length(theta1_vals));
 99 | 
100 | % Fill out J_vals
101 | for i = 1:length(theta0_vals)
102 |     for j = 1:length(theta1_vals)
103 | 	  t = [theta0_vals(i); theta1_vals(j)];    
104 | 	  J_vals(i,j) = computeCost(X, y, t);
105 |     end
106 | end
107 | 
108 | 
109 | % Because of the way meshgrids work in the surf command, we need to 
110 | % transpose J_vals before calling surf, or else the axes will be flipped
111 | J_vals = J_vals';
112 | % Surface plot
113 | figure;
114 | surf(theta0_vals, theta1_vals, J_vals)
115 | xlabel('\theta_0'); ylabel('\theta_1');
116 | 
117 | % Contour plot
118 | figure;
119 | % Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100
120 | contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20))
121 | xlabel('\theta_0'); ylabel('\theta_1');
122 | hold on;
123 | plot(theta(1), theta(2), 'rx', 'MarkerSize', 10, 'LineWidth', 2);
124 | 
125 | fprintf('Program paused. Press enter to continue.\n');
126 | pause;
127 | 


--------------------------------------------------------------------------------
/ex1/ex1_multi.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class
  3 | %  Exercise 1: Linear regression with multiple variables
  4 | %
  5 | %  Instructions
  6 | %  ------------
  7 | % 
  8 | %  This file contains code that helps you get started on the
  9 | %  linear regression exercise. 
 10 | %
 11 | %  You will need to complete the following functions in this 
 12 | %  exericse:
 13 | %
 14 | %     warmUpExercise.m
 15 | %     plotData.m
 16 | %     gradientDescent.m
 17 | %     computeCost.m
 18 | %     gradientDescentMulti.m
 19 | %     computeCostMulti.m
 20 | %     featureNormalize.m
 21 | %     normalEqn.m
 22 | %
 23 | %  For this part of the exercise, you will need to change some
 24 | %  parts of the code below for various experiments (e.g., changing
 25 | %  learning rates).
 26 | %
 27 | 
 28 | %% Clear and Close Figures
 29 | clear all; close all; clc
 30 | 
 31 | %% Initialization
 32 | % 1650 sq-ft, 3 br house
 33 | % Don't know about naming conventions in Ocvate/Matlab, so using nerdCaps
 34 | houseToCheck = [1, 1650, 3];
 35 | 
 36 | %% ================ Part 1: Feature Normalization ================
 37 | 
 38 | fprintf('Loading data ...\n');
 39 | 
 40 | %% Load Data
 41 | data = csvread('ex1data2.txt');
 42 | X = data(:, 1:2);
 43 | y = data(:, 3);
 44 | m = length(y);
 45 | 
 46 | % Print out some data points
 47 | fprintf('First 10 examples from the dataset: \n');
 48 | fprintf(' x = [%.0f %.0f], y = %.0f \n', [X(1:10,:) y(1:10,:)]');
 49 | 
 50 | fprintf('Program paused. Press enter to continue.\n');
 51 | pause;
 52 | 
 53 | % Scale features and set them to zero mean
 54 | fprintf('Normalizing Features ...\n');
 55 | 
 56 | [X mu sigma] = featureNormalize(X);
 57 | 
 58 | % Add intercept term to X
 59 | X = [ones(m, 1) X];
 60 | 
 61 | 
 62 | %% ================ Part 2: Gradient Descent ================
 63 | 
 64 | % ====================== YOUR CODE HERE ======================
 65 | % Instructions: We have provided you with the following starter
 66 | %               code that runs gradient descent with a particular
 67 | %               learning rate (alpha). 
 68 | %
 69 | %               Your task is to first make sure that your functions - 
 70 | %               computeCost and gradientDescent already work with 
 71 | %               this starter code and support multiple variables.
 72 | %
 73 | %               After that, try running gradient descent with 
 74 | %               different values of alpha and see which one gives
 75 | %               you the best result.
 76 | %
 77 | %               Finally, you should complete the code at the end
 78 | %               to predict the price of a 1650 sq-ft, 3 br house.
 79 | %
 80 | % Hint: By using the 'hold on' command, you can plot multiple
 81 | %       graphs on the same figure.
 82 | %
 83 | % Hint: At prediction, make sure you do the same feature normalization.
 84 | %
 85 | 
 86 | fprintf('Running gradient descent ...\n');
 87 | 
 88 | % Choose some alpha value
 89 | % TODO(SaveTheRbtz@): We should for some clever way to find an alpha instead
 90 | % of manually brute forcing it. May be in gradientDescent function if we see
 91 | % that thetas are increasing - devide alpha by half(just like TCP does with
 92 | % it's window size when drop happends).
 93 | alpha = 1;
 94 | num_iters = 100;
 95 | 
 96 | % Init Theta and Run Gradient Descent 
 97 | theta = zeros(3, 1);
 98 | [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters);
 99 | 
100 | % Plot the convergence graph
101 | figure;
102 | plot(1:numel(J_history), J_history, '-b', 'LineWidth', 2);
103 | xlabel('Number of iterations');
104 | ylabel('Cost J');
105 | 
106 | % Display gradient descent's result
107 | fprintf('Theta computed from gradient descent: \n');
108 | fprintf(' %f \n', theta);
109 | fprintf('\n');
110 | 
111 | fprintf('Program paused. Press enter to continue.\n');
112 | pause;
113 | 
114 | % Estimate the price of a 1650 sq-ft, 3 br house
115 | % ====================== YOUR CODE HERE ======================
116 | % Recall that the first column of X is all-ones. Thus, it does
117 | % not need to be normalized.
118 | figure;
119 | hold on;
120 | 
121 | % Plot data
122 | scatter3(X(:, 2), X(:, 3), y, 'r');
123 | 
124 | xlabel('sq. feet');
125 | ylabel('bedrooms');
126 | zlabel('price');
127 | 
128 | % Drawing a linear regression line
129 | % Three std. dev. each direction
130 | limit = 3;
131 | m = length(y);
132 | val = linspace(-limit, limit, m)';
133 | z_val = [ones(m, 1), val, val] * theta;
134 | plot3(val, val, z_val);
135 | 
136 | legend('data', 'gradient decent')
137 | 
138 | % Normalizing data
139 | % NB! The first column of X is all-ones
140 | houseToCheckNormalized = [1 ((houseToCheck(2:3) - mu) ./ sigma)]
141 | % Computing price
142 | price = houseToCheckNormalized * theta;
143 | % ============================================================
144 | 
145 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ...
146 |          '(using gradient descent):\n $%f\n'], price);
147 | 
148 | fprintf('Program paused. Press enter to continue.\n');
149 | pause;
150 | 
151 | %% ================ Part 3: Normal Equations ================
152 | 
153 | fprintf('Solving with normal equations...\n');
154 | 
155 | % ====================== YOUR CODE HERE ======================
156 | % Instructions: The following code computes the closed form 
157 | %               solution for linear regression using the normal
158 | %               equations. You should complete the code in 
159 | %               normalEqn.m
160 | %
161 | %               After doing so, you should complete this code 
162 | %               to predict the price of a 1650 sq-ft, 3 br house.
163 | %
164 | 
165 | %% Load Data
166 | data = csvread('ex1data2.txt');
167 | X = data(:, 1:2);
168 | y = data(:, 3);
169 | m = length(y);
170 | 
171 | % Add intercept term to X
172 | X = [ones(m, 1) X];
173 | 
174 | % Calculate the parameters from the normal equation
175 | theta = normalEqn(X, y);
176 | 
177 | % Display normal equation's result
178 | fprintf('Theta computed from the normal equations: \n');
179 | fprintf(' %f \n', theta);
180 | fprintf('\n');
181 | 
182 | % Estimate the price of a 1650 sq-ft, 3 br house
183 | % ====================== YOUR CODE HERE ======================
184 | price = houseToCheck * theta;
185 | 
186 | 
187 | % ============================================================
188 | 
189 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ...
190 |          '(using normal equations):\n $%f\n'], price);
191 | 


--------------------------------------------------------------------------------
/ex1/ex1data1.txt:
--------------------------------------------------------------------------------
 1 | 6.1101,17.592
 2 | 5.5277,9.1302
 3 | 8.5186,13.662
 4 | 7.0032,11.854
 5 | 5.8598,6.8233
 6 | 8.3829,11.886
 7 | 7.4764,4.3483
 8 | 8.5781,12
 9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 | 


--------------------------------------------------------------------------------
/ex1/ex1data2.txt:
--------------------------------------------------------------------------------
 1 | 2104,3,399900
 2 | 1600,3,329900
 3 | 2400,3,369000
 4 | 1416,2,232000
 5 | 3000,4,539900
 6 | 1985,4,299900
 7 | 1534,3,314900
 8 | 1427,3,198999
 9 | 1380,3,212000
10 | 1494,3,242500
11 | 1940,4,239999
12 | 2000,3,347000
13 | 1890,3,329999
14 | 4478,5,699900
15 | 1268,3,259900
16 | 2300,4,449900
17 | 1320,2,299900
18 | 1236,3,199900
19 | 2609,4,499998
20 | 3031,4,599000
21 | 1767,3,252900
22 | 1888,2,255000
23 | 1604,3,242900
24 | 1962,4,259900
25 | 3890,3,573900
26 | 1100,3,249900
27 | 1458,3,464500
28 | 2526,3,469000
29 | 2200,3,475000
30 | 2637,3,299900
31 | 1839,2,349900
32 | 1000,1,169900
33 | 2040,4,314900
34 | 3137,3,579900
35 | 1811,4,285900
36 | 1437,3,249900
37 | 1239,3,229900
38 | 2132,4,345000
39 | 4215,4,549000
40 | 2162,4,287000
41 | 1664,2,368500
42 | 2238,3,329900
43 | 2567,4,314000
44 | 1200,3,299000
45 | 852,2,179900
46 | 1852,4,299900
47 | 1203,3,239500
48 | 


--------------------------------------------------------------------------------
/ex1/featureNormalize.m:
--------------------------------------------------------------------------------
 1 | function [X_norm, mu, sigma] = featureNormalize(X)
 2 | %FEATURENORMALIZE Normalizes the features in X 
 3 | %   FEATURENORMALIZE(X) returns a normalized version of X where
 4 | %   the mean value of each feature is 0 and the standard deviation
 5 | %   is 1. This is often a good preprocessing step to do when
 6 | %   working with learning algorithms.
 7 | 
 8 | % You need to set these values correctly
 9 | X_norm = X;
10 | mu = zeros(1, size(X, 2));
11 | sigma = zeros(1, size(X, 2));
12 | 
13 | % ====================== YOUR CODE HERE ======================
14 | % Instructions: First, for each feature dimension, compute the mean
15 | %               of the feature and subtract it from the dataset,
16 | %               storing the mean value in mu. Next, compute the 
17 | %               standard deviation of each feature and divide
18 | %               each feature by it's standard deviation, storing
19 | %               the standard deviation in sigma. 
20 | %
21 | %               Note that X is a matrix where each column is a 
22 | %               feature and each row is an example. You need 
23 | %               to perform the normalization separately for 
24 | %               each feature. 
25 | %
26 | % Hint: You might find the 'mean' and 'std' functions useful.
27 | %       
28 | 
29 | mu = mean(X);
30 | sigma = std(X);
31 | 
32 | % bsxfun applies function element-by-element to two maticies
33 | X_norm = bsxfun(@minus, X, mu);
34 | X_norm = bsxfun(@rdivide, X_norm, sigma);
35 | % ============================================================
36 | 
37 | end
38 | 


--------------------------------------------------------------------------------
/ex1/gradientDescent.m:
--------------------------------------------------------------------------------
 1 | function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
 2 | %GRADIENTDESCENT Performs gradient descent to learn theta
 3 | %   theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by 
 4 | %   taking num_iters gradient steps with learning rate alpha
 5 | 
 6 | % Initialize some useful values
 7 | m = length(y); % number of training examples
 8 | J_history = zeros(num_iters, 1);
 9 | 
10 | for iter = 1:num_iters
11 | 
12 |     % ====================== YOUR CODE HERE ======================
13 |     % Instructions: Perform a single gradient step on the parameter vector
14 |     %               theta. 
15 |     %
16 |     % Hint: While debugging, it can be useful to print out the values
17 |     %       of the cost function (computeCost) and gradient here.
18 |     %
19 |     theta = theta - alpha*(1/m)*(X'*(X*theta - y));
20 | 
21 |     % ============================================================
22 | 
23 |     % Save the cost J in every iteration    
24 |     J_history(iter) = computeCost(X, y, theta);
25 | 
26 | end
27 | 
28 | end
29 | 


--------------------------------------------------------------------------------
/ex1/gradientDescentMulti.m:
--------------------------------------------------------------------------------
1 | function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters)
2 | %GRADIENTDESCENTMULTI Performs gradient descent to learn theta
3 | %   theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by
4 | %   taking num_iters gradient steps with learning rate alpha
5 |     [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters);
6 | end
7 | 


--------------------------------------------------------------------------------
/ex1/normalEqn.m:
--------------------------------------------------------------------------------
 1 | function [theta] = normalEqn(X, y)
 2 | %NORMALEQN Computes the closed-form solution to linear regression 
 3 | %   NORMALEQN(X,y) computes the closed-form solution to linear 
 4 | %   regression using the normal equations.
 5 | 
 6 | theta = zeros(size(X, 2), 1);
 7 | 
 8 | % ====================== YOUR CODE HERE ======================
 9 | % Instructions: Complete the code to compute the closed form solution
10 | %               to linear regression and put the result in theta.
11 | %
12 | 
13 | % ---------------------- Sample Solution ----------------------
14 | theta = pinv(X'*X)*X'*y;
15 | 
16 | % -------------------------------------------------------------
17 | 
18 | 
19 | % ============================================================
20 | 
21 | end
22 | 


--------------------------------------------------------------------------------
/ex1/plotData.m:
--------------------------------------------------------------------------------
 1 | function plotData(x, y)
 2 | %PLOTDATA Plots the data points x and y into a new figure 
 3 | %   PLOTDATA(x,y) plots the data points and gives the figure axes labels of
 4 | %   population and profit.
 5 | 
 6 | % ====================== YOUR CODE HERE ======================
 7 | % Instructions: Plot the training data into a figure using the 
 8 | %               "figure" and "plot" commands. Set the axes labels using
 9 | %               the "xlabel" and "ylabel" commands. Assume the 
10 | %               population and revenue data have been passed in
11 | %               as the x and y arguments of this function.
12 | %
13 | % Hint: You can use the 'rx' option with plot to have the markers
14 | %       appear as red crosses. Furthermore, you can make the
15 | %       markers larger by using plot(..., 'rx', 'MarkerSize', 10);
16 | 
17 | figure; % open a new figure window
18 | 
19 | plot(x, y, 'rx', 'MarkerSize', 10);
20 | xlabel('Population');
21 | ylabel('Revenue');
22 | 
23 | % ============================================================
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/ex1/warmUpExercise.m:
--------------------------------------------------------------------------------
 1 | function A = warmUpExercise()
 2 | %WARMUPEXERCISE Example function in octave
 3 | %   A = WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix
 4 | 
 5 | A = [];
 6 | % ============= YOUR CODE HERE ==============
 7 | % Instructions: Return the 5x5 identity matrix 
 8 | %               In octave, we return values by defining which variables
 9 | %               represent the return values (at the top of the file)
10 | %               and then set them accordingly. 
11 | 
12 | A = eye(5);
13 | 
14 | % ===========================================
15 | 
16 | 
17 | end
18 | 


--------------------------------------------------------------------------------
/ex2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex2.pdf


--------------------------------------------------------------------------------
/ex2/costFunction.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = costFunction(theta, X, y)
 2 | %COSTFUNCTION Compute cost and gradient for logistic regression
 3 | %   J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the
 4 | %   parameter for logistic regression and the gradient of the cost
 5 | %   w.r.t. to the parameters.
 6 | 
 7 | % Initialize some useful values
 8 | m = length(y); % number of training examples
 9 | 
10 | % You need to return the following variables correctly 
11 | J = 0;
12 | grad = zeros(size(theta));
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | % Instructions: Compute the cost of a particular choice of theta.
16 | %               You should set J to the cost.
17 | %               Compute the partial derivatives and set grad to the partial
18 | %               derivatives of the cost w.r.t. each parameter in theta
19 | %
20 | % Note: grad should have the same dimensions as theta
21 | %
22 | h0 = sigmoid(X*theta);
23 | 
24 | J = (1/m)*sum(-y.*log(h0) - (1-y).*log(1-h0));
25 | grad = (1/m)*(X'*(h0-y));
26 | % =============================================================
27 | 
28 | end
29 | 


--------------------------------------------------------------------------------
/ex2/costFunctionReg.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = costFunctionReg(theta, X, y, lambda)
 2 | %COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
 3 | %   J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
 4 | %   theta as the parameter for regularized logistic regression and the
 5 | %   gradient of the cost w.r.t. to the parameters. 
 6 | 
 7 | % Initialize some useful values
 8 | m = length(y); % number of training examples
 9 | 
10 | % You need to return the following variables correctly 
11 | J = 0;
12 | grad = zeros(size(theta));
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | % Instructions: Compute the cost of a particular choice of theta.
16 | %               You should set J to the cost.
17 | %               Compute the partial derivatives and set grad to the partial
18 | %               derivatives of the cost w.r.t. each parameter in theta
19 | 
20 | [J, grad] = costFunction(theta, X, y);
21 | penalize = sum(theta(2:end) .^ 2);
22 | J = J + lambda/(2*m) * penalize;
23 | 
24 | grad(2:end) = grad(2:end) + (lambda/m)*theta(2:end);
25 | % =============================================================
26 | 
27 | end
28 | 


--------------------------------------------------------------------------------
/ex2/ex2.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class - Exercise 2: Logistic Regression
  3 | %
  4 | %  Instructions
  5 | %  ------------
  6 | % 
  7 | %  This file contains code that helps you get started on the logistic
  8 | %  regression exercise. You will need to complete the following functions 
  9 | %  in this exericse:
 10 | %
 11 | %     sigmoid.m
 12 | %     costFunction.m
 13 | %     predict.m
 14 | %     costFunctionReg.m
 15 | %
 16 | %  For this exercise, you will not need to change any code in this file,
 17 | %  or any other files other than those mentioned above.
 18 | %
 19 | 
 20 | %% Initialization
 21 | clear ; close all; clc
 22 | 
 23 | %% Load Data
 24 | %  The first two columns contains the exam scores and the third column
 25 | %  contains the label.
 26 | 
 27 | data = load('ex2data1.txt');
 28 | X = data(:, [1, 2]); y = data(:, 3);
 29 | 
 30 | %% ==================== Part 1: Plotting ====================
 31 | %  We start the exercise by first plotting the data to understand the 
 32 | %  the problem we are working with.
 33 | 
 34 | fprintf(['Plotting data with + indicating (y = 1) examples and o ' ...
 35 |          'indicating (y = 0) examples.\n']);
 36 | 
 37 | plotData(X, y);
 38 | 
 39 | % Put some labels 
 40 | hold on;
 41 | % Labels and Legend
 42 | xlabel('Exam 1 score')
 43 | ylabel('Exam 2 score')
 44 | 
 45 | % Specified in plot order
 46 | legend('Admitted', 'Not admitted')
 47 | hold off;
 48 | 
 49 | fprintf('\nProgram paused. Press enter to continue.\n');
 50 | pause;
 51 | 
 52 | 
 53 | %% ============ Part 2: Compute Cost and Gradient ============
 54 | %  In this part of the exercise, you will implement the cost and gradient
 55 | %  for logistic regression. You neeed to complete the code in 
 56 | %  costFunction.m
 57 | 
 58 | %  Setup the data matrix appropriately, and add ones for the intercept term
 59 | [m, n] = size(X);
 60 | 
 61 | % Add intercept term to x and X_test
 62 | X = [ones(m, 1) X];
 63 | 
 64 | % Initialize fitting parameters
 65 | initial_theta = zeros(n + 1, 1);
 66 | 
 67 | % Compute and display initial cost and gradient
 68 | [cost, grad] = costFunction(initial_theta, X, y);
 69 | 
 70 | fprintf('Cost at initial theta (zeros): %f\n', cost);
 71 | fprintf('Gradient at initial theta (zeros): \n');
 72 | fprintf(' %f \n', grad);
 73 | 
 74 | fprintf('\nProgram paused. Press enter to continue.\n');
 75 | pause;
 76 | 
 77 | 
 78 | %% ============= Part 3: Optimizing using fminunc  =============
 79 | %  In this exercise, you will use a built-in function (fminunc) to find the
 80 | %  optimal parameters theta.
 81 | 
 82 | %  Set options for fminunc
 83 | options = optimset('GradObj', 'on', 'MaxIter', 400);
 84 | 
 85 | %  Run fminunc to obtain the optimal theta
 86 | %  This function will return theta and the cost 
 87 | [theta, cost] = ...
 88 | 	fminunc(@(t)(costFunction(t, X, y)), initial_theta, options);
 89 | 
 90 | % Print theta to screen
 91 | fprintf('Cost at theta found by fminunc: %f\n', cost);
 92 | fprintf('theta: \n');
 93 | fprintf(' %f \n', theta);
 94 | 
 95 | % Plot Boundary
 96 | plotDecisionBoundary(theta, X, y);
 97 | 
 98 | % Put some labels 
 99 | hold on;
100 | % Labels and Legend
101 | xlabel('Exam 1 score')
102 | ylabel('Exam 2 score')
103 | 
104 | % Specified in plot order
105 | legend('Admitted', 'Not admitted')
106 | hold off;
107 | 
108 | fprintf('\nProgram paused. Press enter to continue.\n');
109 | pause;
110 | 
111 | %% ============== Part 4: Predict and Accuracies ==============
112 | %  After learning the parameters, you'll like to use it to predict the outcomes
113 | %  on unseen data. In this part, you will use the logistic regression model
114 | %  to predict the probability that a student with score 20 on exam 1 and 
115 | %  score 80 on exam 2 will be admitted.
116 | %
117 | %  Furthermore, you will compute the training and test set accuracies of 
118 | %  our model.
119 | %
120 | %  Your task is to complete the code in predict.m
121 | 
122 | %  Predict probability for a student with score 45 on exam 1 
123 | %  and score 85 on exam 2 
124 | 
125 | prob = sigmoid([1 45 85] * theta);
126 | fprintf(['For a student with scores 45 and 85, we predict an admission ' ...
127 |          'probability of %f\n\n'], prob);
128 | 
129 | % Compute accuracy on our training set
130 | p = predict(theta, X);
131 | 
132 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100);
133 | 
134 | fprintf('\nProgram paused. Press enter to continue.\n');
135 | pause;
136 | 
137 | 


--------------------------------------------------------------------------------
/ex2/ex2_reg.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class - Exercise 2: Logistic Regression
  3 | %
  4 | %  Instructions
  5 | %  ------------
  6 | % 
  7 | %  This file contains code that helps you get started on the second part
  8 | %  of the exercise which covers regularization with logistic regression.
  9 | %
 10 | %  You will need to complete the following functions in this exericse:
 11 | %
 12 | %     sigmoid.m
 13 | %     costFunction.m
 14 | %     predict.m
 15 | %     costFunctionReg.m
 16 | %
 17 | %  For this exercise, you will not need to change any code in this file,
 18 | %  or any other files other than those mentioned above.
 19 | %
 20 | 
 21 | %% Initialization
 22 | clear ; close all; clc
 23 | 
 24 | %% Load Data
 25 | %  The first two columns contains the exam scores and the third column
 26 | %  contains the label.
 27 | 
 28 | data = load('ex2data2.txt');
 29 | X = data(:, [1, 2]); y = data(:, 3);
 30 | 
 31 | plotData(X, y);
 32 | 
 33 | % Put some labels 
 34 | hold on;
 35 | 
 36 | % Labels and Legend
 37 | xlabel('Microchip Test 1')
 38 | ylabel('Microchip Test 2')
 39 | 
 40 | % Specified in plot order
 41 | legend('y = 1', 'y = 0')
 42 | hold off;
 43 | 
 44 | 
 45 | %% =========== Part 1: Regularized Logistic Regression ============
 46 | %  In this part, you are given a dataset with data points that are not
 47 | %  linearly separable. However, you would still like to use logistic 
 48 | %  regression to classify the data points. 
 49 | %
 50 | %  To do so, you introduce more features to use -- in particular, you add
 51 | %  polynomial features to our data matrix (similar to polynomial
 52 | %  regression).
 53 | %
 54 | 
 55 | % Add Polynomial Features
 56 | 
 57 | % Note that mapFeature also adds a column of ones for us, so the intercept
 58 | % term is handled
 59 | X = mapFeature(X(:,1), X(:,2));
 60 | 
 61 | % Initialize fitting parameters
 62 | initial_theta = zeros(size(X, 2), 1);
 63 | 
 64 | % Set regularization parameter lambda to 1
 65 | lambda = 1;
 66 | 
 67 | % Compute and display initial cost and gradient for regularized logistic
 68 | % regression
 69 | [cost, grad] = costFunctionReg(initial_theta, X, y, lambda);
 70 | 
 71 | fprintf('Cost at initial theta (zeros): %f\n', cost);
 72 | 
 73 | fprintf('\nProgram paused. Press enter to continue.\n');
 74 | pause;
 75 | 
 76 | %% ============= Part 2: Regularization and Accuracies =============
 77 | %  Optional Exercise:
 78 | %  In this part, you will get to try different values of lambda and 
 79 | %  see how regularization affects the decision coundart
 80 | %
 81 | %  Try the following values of lambda (0, 1, 10, 100).
 82 | %
 83 | %  How does the decision boundary change when you vary lambda? How does
 84 | %  the training set accuracy vary?
 85 | %
 86 | 
 87 | % Initialize fitting parameters
 88 | initial_theta = zeros(size(X, 2), 1);
 89 | 
 90 | % Set regularization parameter lambda to 1 (you should vary this)
 91 | lambda = 1;
 92 | 
 93 | % Set Options
 94 | options = optimset('GradObj', 'on', 'MaxIter', 400);
 95 | 
 96 | % Optimize
 97 | [theta, J, exit_flag] = ...
 98 | 	fminunc(@(t)(costFunctionReg(t, X, y, lambda)), initial_theta, options);
 99 | 
100 | % Plot Boundary
101 | plotDecisionBoundary(theta, X, y);
102 | hold on;
103 | title(sprintf('lambda = %g', lambda))
104 | 
105 | % Labels and Legend
106 | xlabel('Microchip Test 1')
107 | ylabel('Microchip Test 2')
108 | 
109 | legend('y = 1', 'y = 0', 'Decision boundary')
110 | hold off;
111 | 
112 | % Compute accuracy on our training set
113 | p = predict(theta, X);
114 | 
115 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100);
116 | 
117 | pause;
118 | 


--------------------------------------------------------------------------------
/ex2/ex2data1.txt:
--------------------------------------------------------------------------------
  1 | 34.62365962451697,78.0246928153624,0
  2 | 30.28671076822607,43.89499752400101,0
  3 | 35.84740876993872,72.90219802708364,0
  4 | 60.18259938620976,86.30855209546826,1
  5 | 79.0327360507101,75.3443764369103,1
  6 | 45.08327747668339,56.3163717815305,0
  7 | 61.10666453684766,96.51142588489624,1
  8 | 75.02474556738889,46.55401354116538,1
  9 | 76.09878670226257,87.42056971926803,1
 10 | 84.43281996120035,43.53339331072109,1
 11 | 95.86155507093572,38.22527805795094,0
 12 | 75.01365838958247,30.60326323428011,0
 13 | 82.30705337399482,76.48196330235604,1
 14 | 69.36458875970939,97.71869196188608,1
 15 | 39.53833914367223,76.03681085115882,0
 16 | 53.9710521485623,89.20735013750205,1
 17 | 69.07014406283025,52.74046973016765,1
 18 | 67.94685547711617,46.67857410673128,0
 19 | 70.66150955499435,92.92713789364831,1
 20 | 76.97878372747498,47.57596364975532,1
 21 | 67.37202754570876,42.83843832029179,0
 22 | 89.67677575072079,65.79936592745237,1
 23 | 50.534788289883,48.85581152764205,0
 24 | 34.21206097786789,44.20952859866288,0
 25 | 77.9240914545704,68.9723599933059,1
 26 | 62.27101367004632,69.95445795447587,1
 27 | 80.1901807509566,44.82162893218353,1
 28 | 93.114388797442,38.80067033713209,0
 29 | 61.83020602312595,50.25610789244621,0
 30 | 38.78580379679423,64.99568095539578,0
 31 | 61.379289447425,72.80788731317097,1
 32 | 85.40451939411645,57.05198397627122,1
 33 | 52.10797973193984,63.12762376881715,0
 34 | 52.04540476831827,69.43286012045222,1
 35 | 40.23689373545111,71.16774802184875,0
 36 | 54.63510555424817,52.21388588061123,0
 37 | 33.91550010906887,98.86943574220611,0
 38 | 64.17698887494485,80.90806058670817,1
 39 | 74.78925295941542,41.57341522824434,0
 40 | 34.1836400264419,75.2377203360134,0
 41 | 83.90239366249155,56.30804621605327,1
 42 | 51.54772026906181,46.85629026349976,0
 43 | 94.44336776917852,65.56892160559052,1
 44 | 82.36875375713919,40.61825515970618,0
 45 | 51.04775177128865,45.82270145776001,0
 46 | 62.22267576120188,52.06099194836679,0
 47 | 77.19303492601364,70.45820000180959,1
 48 | 97.77159928000232,86.7278223300282,1
 49 | 62.07306379667647,96.76882412413983,1
 50 | 91.56497449807442,88.69629254546599,1
 51 | 79.94481794066932,74.16311935043758,1
 52 | 99.2725269292572,60.99903099844988,1
 53 | 90.54671411399852,43.39060180650027,1
 54 | 34.52451385320009,60.39634245837173,0
 55 | 50.2864961189907,49.80453881323059,0
 56 | 49.58667721632031,59.80895099453265,0
 57 | 97.64563396007767,68.86157272420604,1
 58 | 32.57720016809309,95.59854761387875,0
 59 | 74.24869136721598,69.82457122657193,1
 60 | 71.79646205863379,78.45356224515052,1
 61 | 75.3956114656803,85.75993667331619,1
 62 | 35.28611281526193,47.02051394723416,0
 63 | 56.25381749711624,39.26147251058019,0
 64 | 30.05882244669796,49.59297386723685,0
 65 | 44.66826172480893,66.45008614558913,0
 66 | 66.56089447242954,41.09209807936973,0
 67 | 40.45755098375164,97.53518548909936,1
 68 | 49.07256321908844,51.88321182073966,0
 69 | 80.27957401466998,92.11606081344084,1
 70 | 66.74671856944039,60.99139402740988,1
 71 | 32.72283304060323,43.30717306430063,0
 72 | 64.0393204150601,78.03168802018232,1
 73 | 72.34649422579923,96.22759296761404,1
 74 | 60.45788573918959,73.09499809758037,1
 75 | 58.84095621726802,75.85844831279042,1
 76 | 99.82785779692128,72.36925193383885,1
 77 | 47.26426910848174,88.47586499559782,1
 78 | 50.45815980285988,75.80985952982456,1
 79 | 60.45555629271532,42.50840943572217,0
 80 | 82.22666157785568,42.71987853716458,0
 81 | 88.9138964166533,69.80378889835472,1
 82 | 94.83450672430196,45.69430680250754,1
 83 | 67.31925746917527,66.58935317747915,1
 84 | 57.23870631569862,59.51428198012956,1
 85 | 80.36675600171273,90.96014789746954,1
 86 | 68.46852178591112,85.59430710452014,1
 87 | 42.0754545384731,78.84478600148043,0
 88 | 75.47770200533905,90.42453899753964,1
 89 | 78.63542434898018,96.64742716885644,1
 90 | 52.34800398794107,60.76950525602592,0
 91 | 94.09433112516793,77.15910509073893,1
 92 | 90.44855097096364,87.50879176484702,1
 93 | 55.48216114069585,35.57070347228866,0
 94 | 74.49269241843041,84.84513684930135,1
 95 | 89.84580670720979,45.35828361091658,1
 96 | 83.48916274498238,48.38028579728175,1
 97 | 42.2617008099817,87.10385094025457,1
 98 | 99.31500880510394,68.77540947206617,1
 99 | 55.34001756003703,64.9319380069486,1
100 | 74.77589300092767,89.52981289513276,1
101 | 


--------------------------------------------------------------------------------
/ex2/ex2data2.txt:
--------------------------------------------------------------------------------
  1 | 0.051267,0.69956,1
  2 | -0.092742,0.68494,1
  3 | -0.21371,0.69225,1
  4 | -0.375,0.50219,1
  5 | -0.51325,0.46564,1
  6 | -0.52477,0.2098,1
  7 | -0.39804,0.034357,1
  8 | -0.30588,-0.19225,1
  9 | 0.016705,-0.40424,1
 10 | 0.13191,-0.51389,1
 11 | 0.38537,-0.56506,1
 12 | 0.52938,-0.5212,1
 13 | 0.63882,-0.24342,1
 14 | 0.73675,-0.18494,1
 15 | 0.54666,0.48757,1
 16 | 0.322,0.5826,1
 17 | 0.16647,0.53874,1
 18 | -0.046659,0.81652,1
 19 | -0.17339,0.69956,1
 20 | -0.47869,0.63377,1
 21 | -0.60541,0.59722,1
 22 | -0.62846,0.33406,1
 23 | -0.59389,0.005117,1
 24 | -0.42108,-0.27266,1
 25 | -0.11578,-0.39693,1
 26 | 0.20104,-0.60161,1
 27 | 0.46601,-0.53582,1
 28 | 0.67339,-0.53582,1
 29 | -0.13882,0.54605,1
 30 | -0.29435,0.77997,1
 31 | -0.26555,0.96272,1
 32 | -0.16187,0.8019,1
 33 | -0.17339,0.64839,1
 34 | -0.28283,0.47295,1
 35 | -0.36348,0.31213,1
 36 | -0.30012,0.027047,1
 37 | -0.23675,-0.21418,1
 38 | -0.06394,-0.18494,1
 39 | 0.062788,-0.16301,1
 40 | 0.22984,-0.41155,1
 41 | 0.2932,-0.2288,1
 42 | 0.48329,-0.18494,1
 43 | 0.64459,-0.14108,1
 44 | 0.46025,0.012427,1
 45 | 0.6273,0.15863,1
 46 | 0.57546,0.26827,1
 47 | 0.72523,0.44371,1
 48 | 0.22408,0.52412,1
 49 | 0.44297,0.67032,1
 50 | 0.322,0.69225,1
 51 | 0.13767,0.57529,1
 52 | -0.0063364,0.39985,1
 53 | -0.092742,0.55336,1
 54 | -0.20795,0.35599,1
 55 | -0.20795,0.17325,1
 56 | -0.43836,0.21711,1
 57 | -0.21947,-0.016813,1
 58 | -0.13882,-0.27266,1
 59 | 0.18376,0.93348,0
 60 | 0.22408,0.77997,0
 61 | 0.29896,0.61915,0
 62 | 0.50634,0.75804,0
 63 | 0.61578,0.7288,0
 64 | 0.60426,0.59722,0
 65 | 0.76555,0.50219,0
 66 | 0.92684,0.3633,0
 67 | 0.82316,0.27558,0
 68 | 0.96141,0.085526,0
 69 | 0.93836,0.012427,0
 70 | 0.86348,-0.082602,0
 71 | 0.89804,-0.20687,0
 72 | 0.85196,-0.36769,0
 73 | 0.82892,-0.5212,0
 74 | 0.79435,-0.55775,0
 75 | 0.59274,-0.7405,0
 76 | 0.51786,-0.5943,0
 77 | 0.46601,-0.41886,0
 78 | 0.35081,-0.57968,0
 79 | 0.28744,-0.76974,0
 80 | 0.085829,-0.75512,0
 81 | 0.14919,-0.57968,0
 82 | -0.13306,-0.4481,0
 83 | -0.40956,-0.41155,0
 84 | -0.39228,-0.25804,0
 85 | -0.74366,-0.25804,0
 86 | -0.69758,0.041667,0
 87 | -0.75518,0.2902,0
 88 | -0.69758,0.68494,0
 89 | -0.4038,0.70687,0
 90 | -0.38076,0.91886,0
 91 | -0.50749,0.90424,0
 92 | -0.54781,0.70687,0
 93 | 0.10311,0.77997,0
 94 | 0.057028,0.91886,0
 95 | -0.10426,0.99196,0
 96 | -0.081221,1.1089,0
 97 | 0.28744,1.087,0
 98 | 0.39689,0.82383,0
 99 | 0.63882,0.88962,0
100 | 0.82316,0.66301,0
101 | 0.67339,0.64108,0
102 | 1.0709,0.10015,0
103 | -0.046659,-0.57968,0
104 | -0.23675,-0.63816,0
105 | -0.15035,-0.36769,0
106 | -0.49021,-0.3019,0
107 | -0.46717,-0.13377,0
108 | -0.28859,-0.060673,0
109 | -0.61118,-0.067982,0
110 | -0.66302,-0.21418,0
111 | -0.59965,-0.41886,0
112 | -0.72638,-0.082602,0
113 | -0.83007,0.31213,0
114 | -0.72062,0.53874,0
115 | -0.59389,0.49488,0
116 | -0.48445,0.99927,0
117 | -0.0063364,0.99927,0
118 | 0.63265,-0.030612,0
119 | 


--------------------------------------------------------------------------------
/ex2/mapFeature.m:
--------------------------------------------------------------------------------
 1 | function out = mapFeature(X1, X2)
 2 | % MAPFEATURE Feature mapping function to polynomial features
 3 | %
 4 | %   MAPFEATURE(X1, X2) maps the two input features
 5 | %   to quadratic features used in the regularization exercise.
 6 | %
 7 | %   Returns a new feature array with more features, comprising of 
 8 | %   X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc..
 9 | %
10 | %   Inputs X1, X2 must be the same size
11 | %
12 | 
13 | degree = 6;
14 | out = ones(size(X1(:,1)));
15 | for i = 1:degree
16 |     for j = 0:i
17 |         out(:, end+1) = (X1.^(i-j)).*(X2.^j);
18 |     end
19 | end
20 | 
21 | end


--------------------------------------------------------------------------------
/ex2/plotData.m:
--------------------------------------------------------------------------------
 1 | function plotData(X, y)
 2 | %PLOTDATA Plots the data points X and y into a new figure 
 3 | %   PLOTDATA(x,y) plots the data points with + for the positive examples
 4 | %   and o for the negative examples. X is assumed to be a Mx2 matrix.
 5 | 
 6 | % Create New Figure
 7 | figure; hold on;
 8 | 
 9 | % ====================== YOUR CODE HERE ======================
10 | % Instructions: Plot the positive and negative examples on a
11 | %               2D plot, using the option 'k+' for the positive
12 | %               examples and 'ko' for the negative examples.
13 | %
14 | 
15 | negative = find(y==0); positive = find(y==1);
16 | plot(X(positive, 1), X(positive, 2), 'k+')
17 | plot(X(negative, 1), X(negative, 2), 'ko')
18 | 
19 | % =========================================================================
20 | 
21 | 
22 | 
23 | hold off;
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/ex2/plotDecisionBoundary.m:
--------------------------------------------------------------------------------
 1 | function plotDecisionBoundary(theta, X, y)
 2 | %PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with
 3 | %the decision boundary defined by theta
 4 | %   PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the 
 5 | %   positive examples and o for the negative examples. X is assumed to be 
 6 | %   a either 
 7 | %   1) Mx3 matrix, where the first column is an all-ones column for the 
 8 | %      intercept.
 9 | %   2) MxN, N>3 matrix, where the first column is all-ones
10 | 
11 | % Plot Data
12 | plotData(X(:,2:3), y);
13 | hold on
14 | 
15 | if size(X, 2) <= 3
16 |     % Only need 2 points to define a line, so choose two endpoints
17 |     plot_x = [min(X(:,2))-2,  max(X(:,2))+2];
18 | 
19 |     % Calculate the decision boundary line
20 |     plot_y = (-1./theta(3)).*(theta(2).*plot_x + theta(1));
21 | 
22 |     % Plot, and adjust axes for better viewing
23 |     plot(plot_x, plot_y)
24 |     
25 |     % Legend, specific for the exercise
26 |     legend('Admitted', 'Not admitted', 'Decision Boundary')
27 |     axis([30, 100, 30, 100])
28 | else
29 |     % Here is the grid range
30 |     u = linspace(-1, 1.5, 50);
31 |     v = linspace(-1, 1.5, 50);
32 | 
33 |     z = zeros(length(u), length(v));
34 |     % Evaluate z = theta*x over the grid
35 |     for i = 1:length(u)
36 |         for j = 1:length(v)
37 |             z(i,j) = mapFeature(u(i), v(j))*theta;
38 |         end
39 |     end
40 |     z = z'; % important to transpose z before calling contour
41 | 
42 |     % Plot z = 0
43 |     % Notice you need to specify the range [0, 0]
44 |     contour(u, v, z, [0, 0], 'LineWidth', 2)
45 | end
46 | hold off
47 | 
48 | end
49 | 


--------------------------------------------------------------------------------
/ex2/predict.m:
--------------------------------------------------------------------------------
 1 | function p = predict(theta, X)
 2 | %PREDICT Predict whether the label is 0 or 1 using learned logistic 
 3 | %regression parameters theta
 4 | %   p = PREDICT(theta, X) computes the predictions for X using a 
 5 | %   threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1)
 6 | 
 7 | m = size(X, 1); % Number of training examples
 8 | 
 9 | % You need to return the following variables correctly
10 | p = zeros(m, 1);
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | % Instructions: Complete the following code to make predictions using
14 | %               your learned logistic regression parameters. 
15 | %               You should set p to a vector of 0's and 1's
16 | %
17 | 
18 | p = round(sigmoid(X*theta));
19 | % =========================================================================
20 | 
21 | 
22 | end
23 | 


--------------------------------------------------------------------------------
/ex2/sigmoid.m:
--------------------------------------------------------------------------------
 1 | function g = sigmoid(z)
 2 | %SIGMOID Compute sigmoid functoon
 3 | %   J = SIGMOID(z) computes the sigmoid of z.
 4 | 
 5 | % You need to return the following variables correctly 
 6 | g = zeros(size(z));
 7 | 
 8 | % ====================== YOUR CODE HERE ======================
 9 | % Instructions: Compute the sigmoid of each value of z (z can be a matrix,
10 | %               vector or scalar).
11 | g = 1 ./ (1 + e .^ -z);
12 | 
13 | % =============================================================
14 | 
15 | end
16 | 


--------------------------------------------------------------------------------
/ex3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex3.pdf


--------------------------------------------------------------------------------
/ex3/displayData.m:
--------------------------------------------------------------------------------
 1 | function [h, display_array] = displayData(X, example_width)
 2 | %DISPLAYDATA Display 2D data in a nice grid
 3 | %   [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
 4 | %   stored in X in a nice grid. It returns the figure handle h and the
 5 | %   displayed array if requested.
 6 | 
 7 | % Set example_width automatically if not passed in
 8 | if ~exist('example_width', 'var') || isempty(example_width)
 9 |     example_width = round(sqrt(size(X, 2)));
10 | end
11 | 
12 | % Gray Image
13 | colormap(gray);
14 | 
15 | % Compute rows, cols
16 | [m n] = size(X);
17 | example_height = (n / example_width);
18 | 
19 | % Compute number of items to display
20 | display_rows = floor(sqrt(m));
21 | display_cols = ceil(m / display_rows);
22 | 
23 | % Between images padding
24 | pad = 1;
25 | 
26 | % Setup blank display
27 | display_array = - ones(pad + display_rows * (example_height + pad), ...
28 |                        pad + display_cols * (example_width + pad));
29 | 
30 | % Copy each example into a patch on the display array
31 | curr_ex = 1;
32 | for j = 1:display_rows
33 |     for i = 1:display_cols
34 |         if curr_ex > m,
35 |             break;
36 |         end
37 |         % Copy the patch
38 | 
39 |         % Get the max value of the patch
40 |         max_val = max(abs(X(curr_ex, :)));
41 |         display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
42 |                       pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
43 |                         reshape(X(curr_ex, :), example_height, example_width) / max_val;
44 |         curr_ex = curr_ex + 1;
45 |     end
46 |     if curr_ex > m,
47 |         break;
48 |     end
49 | end
50 | 
51 | % Display Image
52 | h = imagesc(display_array, [-1 1]);
53 | 
54 | % Do not show axis
55 | axis image off
56 | 
57 | drawnow;
58 | 
59 | end
60 | 


--------------------------------------------------------------------------------
/ex3/ex3.m:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env octave
 2 | %% Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all
 3 | 
 4 | %  Instructions
 5 | %  ------------
 6 | % 
 7 | %  This file contains code that helps you get started on the
 8 | %  linear exercise. You will need to complete the following functions 
 9 | %  in this exericse:
10 | %
11 | %     lrCostFunction.m (logistic regression cost function)
12 | %     oneVsAll.m
13 | %     predictOneVsAll.m
14 | %     predict.m
15 | %
16 | %  For this exercise, you will not need to change any code in this file,
17 | %  or any other files other than those mentioned above.
18 | %
19 | 
20 | %% Initialization
21 | clear ; close all; clc
22 | 
23 | %% Setup the parameters you will use for this part of the exercise
24 | input_layer_size  = 400;  % 20x20 Input Images of Digits
25 | num_labels = 10;          % 10 labels, from 1 to 10   
26 |                           % (note that we have mapped "0" to label 10)
27 | 
28 | %% =========== Part 1: Loading and Visualizing Data =============
29 | %  We start the exercise by first loading and visualizing the dataset. 
30 | %  You will be working with a dataset that contains handwritten digits.
31 | %
32 | 
33 | % Load Training Data
34 | fprintf('Loading and Visualizing Data ...\n')
35 | 
36 | load('ex3data1.mat'); % training data stored in arrays X, y
37 | m = size(X, 1);
38 | 
39 | % Randomly select 100 data points to display
40 | rand_indices = randperm(m);
41 | sel = X(rand_indices(1:100), :);
42 | 
43 | displayData(sel);
44 | 
45 | fprintf('Program paused. Press enter to continue.\n');
46 | pause;
47 | 
48 | %% ============ Part 2: Vectorize Logistic Regression ============
49 | %  In this part of the exercise, you will reuse your logistic regression
50 | %  code from the last exercise. You task here is to make sure that your
51 | %  regularized logistic regression implementation is vectorized. After
52 | %  that, you will implement one-vs-all classification for the handwritten
53 | %  digit dataset.
54 | %
55 | 
56 | fprintf('\nTraining One-vs-All Logistic Regression...\n')
57 | 
58 | lambda = 0.1;
59 | [all_theta] = oneVsAll(X, y, num_labels, lambda);
60 | 
61 | fprintf('Program paused. Press enter to continue.\n');
62 | pause;
63 | 
64 | 
65 | %% ================ Part 3: Predict for One-Vs-All ================
66 | %  After ...
67 | pred = predictOneVsAll(all_theta, X);
68 | 
69 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100);
70 | 
71 | pause;
72 | 


--------------------------------------------------------------------------------
/ex3/ex3_nn.m:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env octave
 2 | %% Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks
 3 | 
 4 | %  Instructions
 5 | %  ------------
 6 | % 
 7 | %  This file contains code that helps you get started on the
 8 | %  linear exercise. You will need to complete the following functions 
 9 | %  in this exericse:
10 | %
11 | %     lrCostFunction.m (logistic regression cost function)
12 | %     oneVsAll.m
13 | %     predictOneVsAll.m
14 | %     predict.m
15 | %
16 | %  For this exercise, you will not need to change any code in this file,
17 | %  or any other files other than those mentioned above.
18 | %
19 | 
20 | %% Initialization
21 | clear ; close all; clc
22 | 
23 | %% Setup the parameters you will use for this exercise
24 | input_layer_size  = 400;  % 20x20 Input Images of Digits
25 | hidden_layer_size = 25;   % 25 hidden units
26 | num_labels = 10;          % 10 labels, from 1 to 10   
27 |                           % (note that we have mapped "0" to label 10)
28 | 
29 | %% =========== Part 1: Loading and Visualizing Data =============
30 | %  We start the exercise by first loading and visualizing the dataset. 
31 | %  You will be working with a dataset that contains handwritten digits.
32 | %
33 | 
34 | % Load Training Data
35 | fprintf('Loading and Visualizing Data ...\n')
36 | 
37 | load('ex3data1.mat');
38 | m = size(X, 1);
39 | 
40 | % Randomly select 100 data points to display
41 | sel = randperm(size(X, 1));
42 | sel = sel(1:100);
43 | 
44 | displayData(X(sel, :));
45 | 
46 | fprintf('Program paused. Press enter to continue.\n');
47 | pause;
48 | 
49 | %% ================ Part 2: Loading Pameters ================
50 | % In this part of the exercise, we load some pre-initialized 
51 | % neural network parameters.
52 | 
53 | fprintf('\nLoading Saved Neural Network Parameters ...\n')
54 | 
55 | % Load the weights into variables Theta1 and Theta2
56 | load('ex3weights.mat');
57 | 
58 | %% ================= Part 3: Implement Predict =================
59 | %  After training the neural network, we would like to use it to predict
60 | %  the labels. You will now implement the "predict" function to use the
61 | %  neural network to predict the labels of the training set. This lets
62 | %  you compute the training set accuracy.
63 | 
64 | pred = predict(Theta1, Theta2, X);
65 | 
66 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100);
67 | 
68 | fprintf('Program paused. Press enter to continue.\n');
69 | pause;
70 | 
71 | %  To give you an idea of the network's output, you can also run
72 | %  through the examples one at the a time to see what it is predicting.
73 | 
74 | %  Randomly permute examples
75 | rp = randperm(m);
76 | 
77 | for i = 1:m
78 |     % Display 
79 |     fprintf('\nDisplaying Example Image\n');
80 |     displayData(X(rp(i), :));
81 | 
82 |     pred = predict(Theta1, Theta2, X(rp(i),:));
83 |     fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10));
84 |     
85 |     % Pause
86 |     fprintf('Program paused. Press enter to continue.\n');
87 |     pause;
88 | end
89 | 
90 | 


--------------------------------------------------------------------------------
/ex3/ex3data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex3/ex3data1.mat


--------------------------------------------------------------------------------
/ex3/ex3weights.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex3/ex3weights.mat


--------------------------------------------------------------------------------
/ex3/fmincg.m:
--------------------------------------------------------------------------------
  1 | function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5)
  2 | % Minimize a continuous differentialble multivariate function. Starting point
  3 | % is given by "X" (D by 1), and the function named in the string "f", must
  4 | % return a function value and a vector of partial derivatives. The Polack-
  5 | % Ribiere flavour of conjugate gradients is used to compute search directions,
  6 | % and a line search using quadratic and cubic polynomial approximations and the
  7 | % Wolfe-Powell stopping criteria is used together with the slope ratio method
  8 | % for guessing initial step sizes. Additionally a bunch of checks are made to
  9 | % make sure that exploration is taking place and that extrapolation will not
 10 | % be unboundedly large. The "length" gives the length of the run: if it is
 11 | % positive, it gives the maximum number of line searches, if negative its
 12 | % absolute gives the maximum allowed number of function evaluations. You can
 13 | % (optionally) give "length" a second component, which will indicate the
 14 | % reduction in function value to be expected in the first line-search (defaults
 15 | % to 1.0). The function returns when either its length is up, or if no further
 16 | % progress can be made (ie, we are at a minimum, or so close that due to
 17 | % numerical problems, we cannot get any closer). If the function terminates
 18 | % within a few iterations, it could be an indication that the function value
 19 | % and derivatives are not consistent (ie, there may be a bug in the
 20 | % implementation of your "f" function). The function returns the found
 21 | % solution "X", a vector of function values "fX" indicating the progress made
 22 | % and "i" the number of iterations (line searches or function evaluations,
 23 | % depending on the sign of "length") used.
 24 | %
 25 | % Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5)
 26 | %
 27 | % See also: checkgrad 
 28 | %
 29 | % Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13
 30 | %
 31 | %
 32 | % (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen
 33 | % 
 34 | % Permission is granted for anyone to copy, use, or modify these
 35 | % programs and accompanying documents for purposes of research or
 36 | % education, provided this copyright notice is retained, and note is
 37 | % made of any changes that have been made.
 38 | % 
 39 | % These programs and documents are distributed without any warranty,
 40 | % express or implied.  As the programs were written for research
 41 | % purposes only, they have not been tested to the degree that would be
 42 | % advisable in any important application.  All use of these programs is
 43 | % entirely at the user's own risk.
 44 | %
 45 | % [ml-class] Changes Made:
 46 | % 1) Function name and argument specifications
 47 | % 2) Output display
 48 | %
 49 | 
 50 | % Read options
 51 | if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter')
 52 |     length = options.MaxIter;
 53 | else
 54 |     length = 100;
 55 | end
 56 | 
 57 | 
 58 | RHO = 0.01;                            % a bunch of constants for line searches
 59 | SIG = 0.5;       % RHO and SIG are the constants in the Wolfe-Powell conditions
 60 | INT = 0.1;    % don't reevaluate within 0.1 of the limit of the current bracket
 61 | EXT = 3.0;                    % extrapolate maximum 3 times the current bracket
 62 | MAX = 20;                         % max 20 function evaluations per line search
 63 | RATIO = 100;                                      % maximum allowed slope ratio
 64 | 
 65 | argstr = ['feval(f, X'];                      % compose string used to call function
 66 | for i = 1:(nargin - 3)
 67 |   argstr = [argstr, ',P', int2str(i)];
 68 | end
 69 | argstr = [argstr, ')'];
 70 | 
 71 | if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end
 72 | S=['Iteration '];
 73 | 
 74 | i = 0;                                            % zero the run length counter
 75 | ls_failed = 0;                             % no previous line search has failed
 76 | fX = [];
 77 | [f1 df1] = eval(argstr);                      % get function value and gradient
 78 | i = i + (length<0);                                            % count epochs?!
 79 | s = -df1;                                        % search direction is steepest
 80 | d1 = -s'*s;                                                 % this is the slope
 81 | z1 = red/(1-d1);                                  % initial step is red/(|s|+1)
 82 | 
 83 | while i < abs(length)                                      % while not finished
 84 |   i = i + (length>0);                                      % count iterations?!
 85 | 
 86 |   X0 = X; f0 = f1; df0 = df1;                   % make a copy of current values
 87 |   X = X + z1*s;                                             % begin line search
 88 |   [f2 df2] = eval(argstr);
 89 |   i = i + (length<0);                                          % count epochs?!
 90 |   d2 = df2'*s;
 91 |   f3 = f1; d3 = d1; z3 = -z1;             % initialize point 3 equal to point 1
 92 |   if length>0, M = MAX; else M = min(MAX, -length-i); end
 93 |   success = 0; limit = -1;                     % initialize quanteties
 94 |   while 1
 95 |     while ((f2 > f1+z1*RHO*d1) || (d2 > -SIG*d1)) && (M > 0) 
 96 |       limit = z1;                                         % tighten the bracket
 97 |       if f2 > f1
 98 |         z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3);                 % quadratic fit
 99 |       else
100 |         A = 6*(f2-f3)/z3+3*(d2+d3);                                 % cubic fit
101 |         B = 3*(f3-f2)-z3*(d3+2*d2);
102 |         z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A;       % numerical error possible - ok!
103 |       end
104 |       if isnan(z2) || isinf(z2)
105 |         z2 = z3/2;                  % if we had a numerical problem then bisect
106 |       end
107 |       z2 = max(min(z2, INT*z3),(1-INT)*z3);  % don't accept too close to limits
108 |       z1 = z1 + z2;                                           % update the step
109 |       X = X + z2*s;
110 |       [f2 df2] = eval(argstr);
111 |       M = M - 1; i = i + (length<0);                           % count epochs?!
112 |       d2 = df2'*s;
113 |       z3 = z3-z2;                    % z3 is now relative to the location of z2
114 |     end
115 |     if f2 > f1+z1*RHO*d1 || d2 > -SIG*d1
116 |       break;                                                % this is a failure
117 |     elseif d2 > SIG*d1
118 |       success = 1; break;                                             % success
119 |     elseif M == 0
120 |       break;                                                          % failure
121 |     end
122 |     A = 6*(f2-f3)/z3+3*(d2+d3);                      % make cubic extrapolation
123 |     B = 3*(f3-f2)-z3*(d3+2*d2);
124 |     z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3));        % num. error possible - ok!
125 |     if ~isreal(z2) || isnan(z2) || isinf(z2) || z2 < 0   % num prob or wrong sign?
126 |       if limit < -0.5                               % if we have no upper limit
127 |         z2 = z1 * (EXT-1);                 % the extrapolate the maximum amount
128 |       else
129 |         z2 = (limit-z1)/2;                                   % otherwise bisect
130 |       end
131 |     elseif (limit > -0.5) && (z2+z1 > limit)          % extraplation beyond max?
132 |       z2 = (limit-z1)/2;                                               % bisect
133 |     elseif (limit < -0.5) && (z2+z1 > z1*EXT)       % extrapolation beyond limit
134 |       z2 = z1*(EXT-1.0);                           % set to extrapolation limit
135 |     elseif z2 < -z3*INT
136 |       z2 = -z3*INT;
137 |     elseif (limit > -0.5) && (z2 < (limit-z1)*(1.0-INT))   % too close to limit?
138 |       z2 = (limit-z1)*(1.0-INT);
139 |     end
140 |     f3 = f2; d3 = d2; z3 = -z2;                  % set point 3 equal to point 2
141 |     z1 = z1 + z2; X = X + z2*s;                      % update current estimates
142 |     [f2 df2] = eval(argstr);
143 |     M = M - 1; i = i + (length<0);                             % count epochs?!
144 |     d2 = df2'*s;
145 |   end                                                      % end of line search
146 | 
147 |   if success                                         % if line search succeeded
148 |     f1 = f2; fX = [fX' f1]';
149 |     fprintf('%s %4i | Cost: %4.6e\r', S, i, f1);
150 |     s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2;      % Polack-Ribiere direction
151 |     tmp = df1; df1 = df2; df2 = tmp;                         % swap derivatives
152 |     d2 = df1'*s;
153 |     if d2 > 0                                      % new slope must be negative
154 |       s = -df1;                              % otherwise use steepest direction
155 |       d2 = -s'*s;    
156 |     end
157 |     z1 = z1 * min(RATIO, d1/(d2-realmin));          % slope ratio but max RATIO
158 |     d1 = d2;
159 |     ls_failed = 0;                              % this line search did not fail
160 |   else
161 |     X = X0; f1 = f0; df1 = df0;  % restore point from before failed line search
162 |     if ls_failed || i > abs(length)          % line search failed twice in a row
163 |       break;                             % or we ran out of time, so we give up
164 |     end
165 |     tmp = df1; df1 = df2; df2 = tmp;                         % swap derivatives
166 |     s = -df1;                                                    % try steepest
167 |     d1 = -s'*s;
168 |     z1 = 1/(1-d1);                     
169 |     ls_failed = 1;                                    % this line search failed
170 |   end
171 |   if exist('OCTAVE_VERSION')
172 |     fflush(stdout);
173 |   end
174 | end
175 | fprintf('\n');
176 | 


--------------------------------------------------------------------------------
/ex3/lrCostFunction.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = lrCostFunction(theta, X, y, lambda)
 2 | %LRCOSTFUNCTION Compute cost and gradient for logistic regression with 
 3 | %regularization
 4 | %   J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
 5 | %   theta as the parameter for regularized logistic regression and the
 6 | %   gradient of the cost w.r.t. to the parameters. 
 7 | 
 8 | % Initialize some useful values
 9 | m = length(y); % number of training examples
10 | 
11 | % You need to return the following variables correctly 
12 | J = 0;
13 | grad = zeros(size(theta));
14 | 
15 | % ====================== YOUR CODE HERE ======================
16 | % Instructions: Compute the cost of a particular choice of theta.
17 | %               You should set J to the cost.
18 | %               Compute the partial derivatives and set grad to the partial
19 | %               derivatives of the cost w.r.t. each parameter in theta
20 | %
21 | % Hint: The computation of the cost function and gradients can be
22 | %       efficiently vectorized. For example, consider the computation
23 | %
24 | %           sigmoid(X * theta)
25 | %
26 | %       Each row of the resulting matrix will contain the value of the
27 | %       prediction for that example. You can make use of this to vectorize
28 | %       the cost function and gradient computations. 
29 | %
30 | % Hint: When computing the gradient of the regularized cost function, 
31 | %       there're many possible vectorized solutions, but one solution
32 | %       looks like:
33 | %           grad = (unregularized gradient for logistic regression)
34 | %           temp = theta; 
35 | %           temp(1) = 0;   % because we don't add anything for j = 0  
36 | %           grad = grad + YOUR_CODE_HERE (using the temp variable)
37 | %
38 | 
39 | addpath('../ex2');
40 | [J, grad] = costFunctionReg(theta, X, y, lambda);
41 | 
42 | % =============================================================
43 | 
44 | end
45 | 


--------------------------------------------------------------------------------
/ex3/oneVsAll.m:
--------------------------------------------------------------------------------
 1 | function [all_theta] = oneVsAll(X, y, num_labels, lambda)
 2 | %ONEVSALL trains multiple logistic regression classifiers and returns all
 3 | %the classifiers in a matrix all_theta, where the i-th row of all_theta 
 4 | %corresponds to the classifier for label i
 5 | %   [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels
 6 | %   logisitc regression classifiers and returns each of these classifiers
 7 | %   in a matrix all_theta, where the i-th row of all_theta corresponds 
 8 | %   to the classifier for label i
 9 | 
10 | % Some useful variables
11 | m = size(X, 1);
12 | n = size(X, 2);
13 | 
14 | % You need to return the following variables correctly 
15 | all_theta = zeros(num_labels, n + 1);
16 | 
17 | % Add ones to the X data matrix
18 | X = [ones(m, 1) X];
19 | 
20 | % ====================== YOUR CODE HERE ======================
21 | % Instructions: You should complete the following code to train num_labels
22 | %               logistic regression classifiers with regularization
23 | %               parameter lambda. 
24 | %
25 | % Hint: theta(:) will return a column vector.
26 | %
27 | % Hint: You can use y == c to obtain a vector of 1's and 0's that tell use 
28 | %       whether the ground truth is true/false for this class.
29 | %
30 | % Note: For this assignment, we recommend using fmincg to optimize the cost
31 | %       function. It is okay to use a for-loop (for c = 1:num_labels) to
32 | %       loop over the different classes.
33 | %
34 | %       fmincg works similarly to fminunc, but is more efficient when we
35 | %       are dealing with large number of parameters.
36 | %
37 | % Example Code for fmincg:
38 | %
39 | %     % Set Initial theta
40 | %     initial_theta = zeros(n + 1, 1);
41 | %     
42 | %     % Set options for fminunc
43 | %     options = optimset('GradObj', 'on', 'MaxIter', 50);
44 | % 
45 | %     % Run fmincg to obtain the optimal theta
46 | %     % This function will return theta and the cost 
47 | %     [theta] = ...
48 | %         fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ...
49 | %                 initial_theta, options);
50 | %
51 | 
52 | for c = 1:num_labels
53 |     options = optimset('GradObj', 'on', 'MaxIter', 50);
54 | 
55 |     all_theta(c, :) = fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ...
56 |             zeros(n + 1, 1), options);
57 | 
58 | % =========================================================================
59 | 
60 | 
61 | end
62 | 


--------------------------------------------------------------------------------
/ex3/predict.m:
--------------------------------------------------------------------------------
 1 | function p = predict(Theta1, Theta2, X)
 2 | %PREDICT Predict the label of an input given a trained neural network
 3 | %   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
 4 | %   trained weights of a neural network (Theta1, Theta2)
 5 | 
 6 | % Useful values
 7 | m = size(X, 1);
 8 | num_labels = size(Theta2, 1);
 9 | 
10 | % You need to return the following variables correctly 
11 | p = zeros(m, 1);
12 | 
13 | % ====================== YOUR CODE HERE ======================
14 | % Instructions: Complete the following code to make predictions using
15 | %               your learned neural network. You should set p to a 
16 | %               vector containing labels between 1 to num_labels.
17 | %
18 | % Hint: The max function might come in useful. In particular, the max
19 | %       function can also return the index of the max element, for more
20 | %       information see 'help max'. If your examples are in rows, then, you
21 | %       can use max(A, [], 2) to obtain the max for each row.
22 | %
23 | 
24 | A1 = [ones(1, m); X'];
25 | A2 = [ones(1, m); sigmoid(Theta1*A1)];
26 | A3 = sigmoid(Theta2*A2);
27 | [value, p] = max(A3', [], 2);
28 | 
29 | % =========================================================================
30 | 
31 | 
32 | end
33 | 


--------------------------------------------------------------------------------
/ex3/predictOneVsAll.m:
--------------------------------------------------------------------------------
 1 | function p = predictOneVsAll(all_theta, X)
 2 | %PREDICT Predict the label for a trained one-vs-all classifier. The labels 
 3 | %are in the range 1..K, where K = size(all_theta, 1). 
 4 | %  p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions
 5 | %  for each example in the matrix X. Note that X contains the examples in
 6 | %  rows. all_theta is a matrix where the i-th row is a trained logistic
 7 | %  regression theta vector for the i-th class. You should set p to a vector
 8 | %  of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2
 9 | %  for 4 examples) 
10 | 
11 | m = size(X, 1);
12 | num_labels = size(all_theta, 1);
13 | 
14 | % You need to return the following variables correctly 
15 | p = zeros(size(X, 1), 1);
16 | 
17 | % Add ones to the X data matrix
18 | X = [ones(m, 1) X];
19 | 
20 | % ====================== YOUR CODE HERE ======================
21 | % Instructions: Complete the following code to make predictions using
22 | %               your learned logistic regression parameters (one-vs-all).
23 | %               You should set p to a vector of predictions (from 1 to
24 | %               num_labels).
25 | %
26 | % Hint: This code can be done all vectorized using the max function.
27 | %       In particular, the max function can also return the index of the 
28 | %       max element, for more information see 'help max'. If your examples 
29 | %       are in rows, then, you can use max(A, [], 2) to obtain the max 
30 | %       for each row.
31 | %       
32 | 
33 | [value, p] = max((X*all_theta'), [], 2);
34 | 
35 | % =========================================================================
36 | 
37 | 
38 | end
39 | 


--------------------------------------------------------------------------------
/ex3/sigmoid.m:
--------------------------------------------------------------------------------
1 | function g = sigmoid(z)
2 | %SIGMOID Compute sigmoid functoon
3 | %   J = SIGMOID(z) computes the sigmoid of z.
4 | 
5 | g = 1.0 ./ (1.0 + exp(-z));
6 | end
7 | 


--------------------------------------------------------------------------------
/ex4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex4.pdf


--------------------------------------------------------------------------------
/ex4/checkNNGradients.m:
--------------------------------------------------------------------------------
 1 | function checkNNGradients(lambda)
 2 | %CHECKNNGRADIENTS Creates a small neural network to check the
 3 | %backpropagation gradients
 4 | %   CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
 5 | %   backpropagation gradients, it will output the analytical gradients
 6 | %   produced by your backprop code and the numerical gradients (computed
 7 | %   using computeNumericalGradient). These two gradient computations should
 8 | %   result in very similar values.
 9 | %
10 | 
11 | if ~exist('lambda', 'var') || isempty(lambda)
12 |     lambda = 0;
13 | end
14 | 
15 | input_layer_size = 3;
16 | hidden_layer_size = 5;
17 | num_labels = 3;
18 | m = 5;
19 | 
20 | % We generate some 'random' test data
21 | Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size);
22 | Theta2 = debugInitializeWeights(num_labels, hidden_layer_size);
23 | % Reusing debugInitializeWeights to generate X
24 | X  = debugInitializeWeights(m, input_layer_size - 1);
25 | y  = 1 + mod(1:m, num_labels)';
26 | 
27 | % Unroll parameters
28 | nn_params = [Theta1(:) ; Theta2(:)];
29 | 
30 | % Short hand for cost function
31 | costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ...
32 |                                num_labels, X, y, lambda);
33 | 
34 | [cost, grad] = costFunc(nn_params);
35 | numgrad = computeNumericalGradient(costFunc, nn_params);
36 | 
37 | % Visually examine the two gradient computations.  The two columns
38 | % you get should be very similar. 
39 | disp([numgrad grad]);
40 | fprintf(['The above two columns you get should be very similar.\n' ...
41 |          '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']);
42 | 
43 | % Evaluate the norm of the difference between two solutions.  
44 | % If you have a correct implementation, and assuming you used EPSILON = 0.0001 
45 | % in computeNumericalGradient.m, then diff below should be less than 1e-9
46 | diff = norm(numgrad-grad)/norm(numgrad+grad);
47 | 
48 | fprintf(['If your backpropagation implementation is correct, then \n' ...
49 |          'the relative difference will be small (less than 1e-9). \n' ...
50 |          '\nRelative Difference: %g\n'], diff);
51 | 
52 | end
53 | 


--------------------------------------------------------------------------------
/ex4/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
 3 | %and gives us a numerical estimate of the gradient.
 4 | %   numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
 5 | %   gradient of the function J around theta. Calling y = J(theta) should
 6 | %   return the function value at theta.
 7 | 
 8 | % Notes: The following code implements numerical gradient checking, and 
 9 | %        returns the numerical gradient.It sets numgrad(i) to (a numerical 
10 | %        approximation of) the partial derivative of J with respect to the 
11 | %        i-th input argument, evaluated at theta. (i.e., numgrad(i) should 
12 | %        be the (approximately) the partial derivative of J with respect 
13 | %        to theta(i).)
14 | %                
15 | 
16 | numgrad = zeros(size(theta));
17 | perturb = zeros(size(theta));
18 | e = 1e-4;
19 | for p = 1:numel(theta)
20 |     % Set perturbation vector
21 |     perturb(p) = e;
22 |     loss1 = J(theta - perturb);
23 |     loss2 = J(theta + perturb);
24 |     % Compute Numerical Gradient
25 |     numgrad(p) = (loss2 - loss1) / (2*e);
26 |     perturb(p) = 0;
27 | end
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/ex4/debugInitializeWeights.m:
--------------------------------------------------------------------------------
 1 | function W = debugInitializeWeights(fan_out, fan_in)
 2 | %DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in
 3 | %incoming connections and fan_out outgoing connections using a fixed
 4 | %strategy, this will help you later in debugging
 5 | %   W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights 
 6 | %   of a layer with fan_in incoming connections and fan_out outgoing 
 7 | %   connections using a fix set of values
 8 | %
 9 | %   Note that W should be set to a matrix of size(1 + fan_in, fan_out) as
10 | %   the first row of W handles the "bias" terms
11 | %
12 | 
13 | % Set W to zeros
14 | W = zeros(fan_out, 1 + fan_in);
15 | 
16 | % Initialize W using "sin", this ensures that W is always of the same
17 | % values and will be useful for debugging
18 | W = reshape(sin(1:numel(W)), size(W)) / 10;
19 | 
20 | % =========================================================================
21 | 
22 | end
23 | 


--------------------------------------------------------------------------------
/ex4/displayData.m:
--------------------------------------------------------------------------------
 1 | function [h, display_array] = displayData(X, example_width)
 2 | %DISPLAYDATA Display 2D data in a nice grid
 3 | %   [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
 4 | %   stored in X in a nice grid. It returns the figure handle h and the 
 5 | %   displayed array if requested.
 6 | 
 7 | % Set example_width automatically if not passed in
 8 | if ~exist('example_width', 'var') || isempty(example_width) 
 9 | 	example_width = round(sqrt(size(X, 2)));
10 | end
11 | 
12 | % Gray Image
13 | colormap(gray);
14 | 
15 | % Compute rows, cols
16 | [m n] = size(X);
17 | example_height = (n / example_width);
18 | 
19 | % Compute number of items to display
20 | display_rows = floor(sqrt(m));
21 | display_cols = ceil(m / display_rows);
22 | 
23 | % Between images padding
24 | pad = 1;
25 | 
26 | % Setup blank display
27 | display_array = - ones(pad + display_rows * (example_height + pad), ...
28 |                        pad + display_cols * (example_width + pad));
29 | 
30 | % Copy each example into a patch on the display array
31 | curr_ex = 1;
32 | for j = 1:display_rows
33 | 	for i = 1:display_cols
34 | 		if curr_ex > m, 
35 | 			break; 
36 | 		end
37 | 		% Copy the patch
38 | 		
39 | 		% Get the max value of the patch
40 | 		max_val = max(abs(X(curr_ex, :)));
41 | 		display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
42 | 		              pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
43 | 						reshape(X(curr_ex, :), example_height, example_width) / max_val;
44 | 		curr_ex = curr_ex + 1;
45 | 	end
46 | 	if curr_ex > m, 
47 | 		break; 
48 | 	end
49 | end
50 | 
51 | % Display Image
52 | h = imagesc(display_array, [-1 1]);
53 | 
54 | % Do not show axis
55 | axis image off
56 | 
57 | drawnow;
58 | 
59 | end
60 | 


--------------------------------------------------------------------------------
/ex4/ex4.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class - Exercise 4 Neural Network Learning
  3 | 
  4 | %  Instructions
  5 | %  ------------
  6 | % 
  7 | %  This file contains code that helps you get started on the
  8 | %  linear exercise. You will need to complete the following functions 
  9 | %  in this exericse:
 10 | %
 11 | %     sigmoidGradient.m
 12 | %     randInitializeWeights.m
 13 | %     nnCostFunction.m
 14 | %
 15 | %  For this exercise, you will not need to change any code in this file,
 16 | %  or any other files other than those mentioned above.
 17 | %
 18 | 
 19 | %% Initialization
 20 | clear ; close all; clc
 21 | 
 22 | %% Setup the parameters you will use for this exercise
 23 | input_layer_size  = 400;  % 20x20 Input Images of Digits
 24 | hidden_layer_size = 25;   % 25 hidden units
 25 | num_labels = 10;          % 10 labels, from 1 to 10   
 26 |                           % (note that we have mapped "0" to label 10)
 27 | 
 28 | %% =========== Part 1: Loading and Visualizing Data =============
 29 | %  We start the exercise by first loading and visualizing the dataset. 
 30 | %  You will be working with a dataset that contains handwritten digits.
 31 | %
 32 | 
 33 | % Load Training Data
 34 | fprintf('Loading and Visualizing Data ...\n')
 35 | 
 36 | load('ex4data1.mat');
 37 | m = size(X, 1);
 38 | 
 39 | % Randomly select 100 data points to display
 40 | sel = randperm(size(X, 1));
 41 | sel = sel(1:100);
 42 | 
 43 | displayData(X(sel, :));
 44 | 
 45 | fprintf('Program paused. Press enter to continue.\n');
 46 | pause;
 47 | 
 48 | 
 49 | %% ================ Part 2: Loading Pameters ================
 50 | % In this part of the exercise, we load some pre-initialized 
 51 | % neural network parameters.
 52 | 
 53 | fprintf('\nLoading Saved Neural Network Parameters ...\n')
 54 | 
 55 | % Load the weights into variables Theta1 and Theta2
 56 | load('ex4weights.mat');
 57 | 
 58 | % Unroll parameters 
 59 | nn_params = [Theta1(:) ; Theta2(:)];
 60 | 
 61 | %% ================ Part 3: Compute Cost (Feedforward) ================
 62 | %  To the neural network, you should first start by implementing the
 63 | %  feedforward part of the neural network that returns the cost only. You
 64 | %  should complete the code in nnCostFunction.m to return cost. After
 65 | %  implementing the feedforward to compute the cost, you can verify that
 66 | %  your implementation is correct by verifying that you get the same cost
 67 | %  as us for the fixed debugging parameters.
 68 | %
 69 | %  We suggest implementing the feedforward cost *without* regularization
 70 | %  first so that it will be easier for you to debug. Later, in part 4, you
 71 | %  will get to implement the regularized cost.
 72 | %
 73 | fprintf('\nFeedforward Using Neural Network ...\n')
 74 | 
 75 | % Weight regularization parameter (we set this to 0 here).
 76 | lambda = 0;
 77 | 
 78 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ...
 79 |                    num_labels, X, y, lambda);
 80 | 
 81 | fprintf(['Cost at parameters (loaded from ex4weights): %f '...
 82 |          '\n(this value should be about 0.287629)\n'], J);
 83 | 
 84 | fprintf('\nProgram paused. Press enter to continue.\n');
 85 | pause;
 86 | 
 87 | %% =============== Part 4: Implement Regularization ===============
 88 | %  Once your cost function implementation is correct, you should now
 89 | %  continue to implement the regularization with the cost.
 90 | %
 91 | 
 92 | fprintf('\nChecking Cost Function (w/ Regularization) ... \n')
 93 | 
 94 | % Weight regularization parameter (we set this to 1 here).
 95 | lambda = 1;
 96 | 
 97 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ...
 98 |                    num_labels, X, y, lambda);
 99 | 
100 | fprintf(['Cost at parameters (loaded from ex4weights): %f '...
101 |          '\n(this value should be about 0.383770)\n'], J);
102 | 
103 | fprintf('Program paused. Press enter to continue.\n');
104 | pause;
105 | 
106 | 
107 | %% ================ Part 5: Sigmoid Gradient  ================
108 | %  Before you start implementing the neural network, you will first
109 | %  implement the gradient for the sigmoid function. You should complete the
110 | %  code in the sigmoidGradient.m file.
111 | %
112 | 
113 | fprintf('\nEvaluating sigmoid gradient...\n')
114 | 
115 | g = sigmoidGradient([1 -0.5 0 0.5 1]);
116 | fprintf('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n  ');
117 | fprintf('%f ', g);
118 | fprintf('\n\n');
119 | 
120 | fprintf('Program paused. Press enter to continue.\n');
121 | pause;
122 | 
123 | 
124 | %% ================ Part 6: Initializing Pameters ================
125 | %  In this part of the exercise, you will be starting to implment a two
126 | %  layer neural network that classifies digits. You will start by
127 | %  implementing a function to initialize the weights of the neural network
128 | %  (randInitializeWeights.m)
129 | 
130 | fprintf('\nInitializing Neural Network Parameters ...\n')
131 | 
132 | initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size);
133 | initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels);
134 | 
135 | % Unroll parameters
136 | initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)];
137 | 
138 | 
139 | %% =============== Part 7: Implement Backpropagation ===============
140 | %  Once your cost matches up with ours, you should proceed to implement the
141 | %  backpropagation algorithm for the neural network. You should add to the
142 | %  code you've written in nnCostFunction.m to return the partial
143 | %  derivatives of the parameters.
144 | %
145 | fprintf('\nChecking Backpropagation... \n');
146 | 
147 | %  Check gradients by running checkNNGradients
148 | checkNNGradients;
149 | 
150 | fprintf('\nProgram paused. Press enter to continue.\n');
151 | pause;
152 | 
153 | 
154 | %% =============== Part 8: Implement Regularization ===============
155 | %  Once your backpropagation implementation is correct, you should now
156 | %  continue to implement the regularization with the cost and gradient.
157 | %
158 | 
159 | fprintf('\nChecking Backpropagation (w/ Regularization) ... \n')
160 | 
161 | %  Check gradients by running checkNNGradients
162 | lambda = 3;
163 | checkNNGradients(lambda);
164 | 
165 | % Also output the costFunction debugging values
166 | debug_J  = nnCostFunction(nn_params, input_layer_size, ...
167 |                           hidden_layer_size, num_labels, X, y, lambda);
168 | 
169 | fprintf(['\n\nCost at (fixed) debugging parameters (w/ lambda = 10): %f ' ...
170 |          '\n(this value should be about 0.576051)\n\n'], debug_J);
171 | 
172 | fprintf('Program paused. Press enter to continue.\n');
173 | pause;
174 | 
175 | 
176 | %% =================== Part 8: Training NN ===================
177 | %  You have now implemented all the code necessary to train a neural 
178 | %  network. To train your neural network, we will now use "fmincg", which
179 | %  is a function which works similarly to "fminunc". Recall that these
180 | %  advanced optimizers are able to train our cost functions efficiently as
181 | %  long as we provide them with the gradient computations.
182 | %
183 | fprintf('\nTraining Neural Network... \n')
184 | 
185 | %  After you have completed the assignment, change the MaxIter to a larger
186 | %  value to see how more training helps.
187 | options = optimset('MaxIter', 400);
188 | 
189 | %  You should also try different values of lambda
190 | lambda = 1;
191 | 
192 | % Create "short hand" for the cost function to be minimized
193 | costFunction = @(p) nnCostFunction(p, ...
194 |                                    input_layer_size, ...
195 |                                    hidden_layer_size, ...
196 |                                    num_labels, X, y, lambda);
197 | 
198 | % Now, costFunction is a function that takes in only one argument (the
199 | % neural network parameters)
200 | [nn_params, cost] = fmincg(costFunction, initial_nn_params, options);
201 | 
202 | % Obtain Theta1 and Theta2 back from nn_params
203 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
204 |                  hidden_layer_size, (input_layer_size + 1));
205 | 
206 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
207 |                  num_labels, (hidden_layer_size + 1));
208 | 
209 | fprintf('Program paused. Press enter to continue.\n');
210 | pause;
211 | 
212 | 
213 | %% ================= Part 9: Visualize Weights =================
214 | %  You can now "visualize" what the neural network is learning by 
215 | %  displaying the hidden units to see what features they are capturing in 
216 | %  the data.
217 | 
218 | fprintf('\nVisualizing Neural Network... \n')
219 | 
220 | displayData(Theta1(:, 2:end));
221 | 
222 | fprintf('\nProgram paused. Press enter to continue.\n');
223 | pause;
224 | 
225 | %% ================= Part 10: Implement Predict =================
226 | %  After training the neural network, we would like to use it to predict
227 | %  the labels. You will now implement the "predict" function to use the
228 | %  neural network to predict the labels of the training set. This lets
229 | %  you compute the training set accuracy.
230 | 
231 | pred = predict(Theta1, Theta2, X);
232 | 
233 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100);
234 | 
235 | 
236 | 


--------------------------------------------------------------------------------
/ex4/ex4data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex4/ex4data1.mat


--------------------------------------------------------------------------------
/ex4/ex4weights.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex4/ex4weights.mat


--------------------------------------------------------------------------------
/ex4/fmincg.m:
--------------------------------------------------------------------------------
1 | ../ex3/fmincg.m


--------------------------------------------------------------------------------
/ex4/nnCostFunction.m:
--------------------------------------------------------------------------------
  1 | function [J grad] = nnCostFunction(nn_params, ...
  2 |                                    input_layer_size, ...
  3 |                                    hidden_layer_size, ...
  4 |                                    num_labels, ...
  5 |                                    X, y, lambda)
  6 | %NNCOSTFUNCTION Implements the neural network cost function for a two layer
  7 | %neural network which performs classification
  8 | %   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
  9 | %   X, y, lambda) computes the cost and gradient of the neural network. The
 10 | %   parameters for the neural network are "unrolled" into the vector
 11 | %   nn_params and need to be converted back into the weight matrices. 
 12 | % 
 13 | %   The returned parameter grad should be a "unrolled" vector of the
 14 | %   partial derivatives of the neural network.
 15 | %
 16 | 
 17 | % Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
 18 | % for our 2 layer neural network
 19 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
 20 |                  hidden_layer_size, (input_layer_size + 1));
 21 | 
 22 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
 23 |                  num_labels, (hidden_layer_size + 1));
 24 | 
 25 | % Setup some useful variables
 26 | m = size(X, 1);
 27 |          
 28 | % You need to return the following variables correctly 
 29 | J = 0;
 30 | Theta1_grad = zeros(size(Theta1));
 31 | Theta2_grad = zeros(size(Theta2));
 32 | 
 33 | % ====================== YOUR CODE HERE ======================
 34 | % Instructions: You should complete the code by working through the
 35 | %               following parts.
 36 | %
 37 | % Part 1: Feedforward the neural network and return the cost in the
 38 | %         variable J. After implementing Part 1, you can verify that your
 39 | %         cost function computation is correct by verifying the cost
 40 | %         computed in ex4.m
 41 | %
 42 | % Part 2: Implement the backpropagation algorithm to compute the gradients
 43 | %         Theta1_grad and Theta2_grad. You should return the partial derivatives of
 44 | %         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
 45 | %         Theta2_grad, respectively. After implementing Part 2, you can check
 46 | %         that your implementation is correct by running checkNNGradients
 47 | %
 48 | %         Note: The vector y passed into the function is a vector of labels
 49 | %               containing values from 1..K. You need to map this vector into a 
 50 | %               binary vector of 1's and 0's to be used with the neural network
 51 | %               cost function.
 52 | %
 53 | %         Hint: We recommend implementing backpropagation using a for-loop
 54 | %               over the training examples if you are implementing it for the 
 55 | %               first time.
 56 | %
 57 | % Part 3: Implement regularization with the cost function and gradients.
 58 | %
 59 | %         Hint: You can implement this around the code for
 60 | %               backpropagation. That is, you can compute the gradients for
 61 | %               the regularization separately and then add them to Theta1_grad
 62 | %               and Theta2_grad from Part 2.
 63 | %
 64 | 
 65 | % Convert y to matrix
 66 | % XXX(SaveTheRbtz@): Curious how it can be vectorized (Should reread ex3 for
 67 | % logical arrays)
 68 | number_of_classes = length(unique(y));
 69 | Y = zeros(number_of_classes, m);
 70 | for i = 1:m
 71 |     Y(y(i), i) = 1;
 72 | endfor
 73 | 
 74 | % Do forward propagation
 75 | % Copy/Paste from ex3 predict.m
 76 | % FIXME(SaveTheRbtz@): Move to separate function
 77 | A1 = [ones(1, m); X'];
 78 | Z2=Theta1*A1;
 79 | A2 = [ones(1, m); sigmoid(Z2)];
 80 | Z3=Theta2*A2;
 81 | A3 = sigmoid(Z3);
 82 | 
 83 | % A3 here is our h0
 84 | h0 = A3;
 85 | 
 86 | % Compute cost function
 87 | % XXX(SaveTheRbtz@): Slightly modified version of ex2 costFunction
 88 | J = (1/m)*sum(sum(-Y.*log(h0) - (1-Y).*log(1-h0)));
 89 | 
 90 | % Add some regularization
 91 | % XXX(SaveTheRbtz@): Also borrowed from ex2 costFunctionReg
 92 | penalize = sum(sum(Theta1(:, 2:end) .^ 2)) + sum(sum(Theta2(:, 2:end) .^ 2));
 93 | J = J + (lambda/(2*m)) * penalize;
 94 | 
 95 | % Implement backpropagation
 96 | delta_3 = A3 - Y;
 97 | delta_2 = (Theta2'*delta_3)(2:end, :) .* sigmoidGradient(Z2);
 98 | 
 99 | % Calculate gradients
100 | Theta1_unreg_grad = (delta_2 * A1')/m;
101 | Theta2_unreg_grad = (delta_3 * A2')/m;
102 | 
103 | % Regularize
104 | Theta1_grad = Theta1_unreg_grad + (lambda/m) * Theta1;
105 | Theta2_grad = Theta2_unreg_grad + (lambda/m) * Theta2;
106 | 
107 | Theta1_grad(:, 1) = Theta1_unreg_grad(:, 1);
108 | Theta2_grad(:, 1) = Theta2_unreg_grad(:, 1);
109 | 
110 | % -------------------------------------------------------------
111 | 
112 | % =========================================================================
113 | 
114 | % Unroll gradients
115 | grad = [Theta1_grad(:) ; Theta2_grad(:)];
116 | 
117 | 
118 | end
119 | 


--------------------------------------------------------------------------------
/ex4/predict.m:
--------------------------------------------------------------------------------
 1 | function p = predict(Theta1, Theta2, X)
 2 | %PREDICT Predict the label of an input given a trained neural network
 3 | %   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
 4 | %   trained weights of a neural network (Theta1, Theta2)
 5 | 
 6 | % Useful values
 7 | m = size(X, 1);
 8 | num_labels = size(Theta2, 1);
 9 | 
10 | % You need to return the following variables correctly 
11 | p = zeros(size(X, 1), 1);
12 | 
13 | h1 = sigmoid([ones(m, 1) X] * Theta1');
14 | h2 = sigmoid([ones(m, 1) h1] * Theta2');
15 | [dummy, p] = max(h2, [], 2);
16 | 
17 | % =========================================================================
18 | 
19 | 
20 | end
21 | 


--------------------------------------------------------------------------------
/ex4/randInitializeWeights.m:
--------------------------------------------------------------------------------
 1 | function W = randInitializeWeights(L_in, L_out)
 2 | %RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
 3 | %incoming connections and L_out outgoing connections
 4 | %   W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 
 5 | %   of a layer with L_in incoming connections and L_out outgoing 
 6 | %   connections. 
 7 | %
 8 | %   Note that W should be set to a matrix of size(L_out, 1 + L_in) as
 9 | %   the first row of W handles the "bias" terms
10 | %
11 | 
12 | % You need to return the following variables correctly 
13 | W = zeros(L_out, 1 + L_in);
14 | 
15 | % ====================== YOUR CODE HERE ======================
16 | % Instructions: Initialize W randomly so that we break the symmetry while
17 | %               training the neural network.
18 | %
19 | % Note: The first row of W corresponds to the parameters for the bias units
20 | %
21 | 
22 | % Randomly initialize the weights to small values
23 | % XXX(SaveTheRbtz@): This one was given in text book
24 | % Book also suggests setting epsilon based on number of nodes in network:
25 | %epsilon_init = (sqrt(6)/(sqrt(L_in + L_out)));
26 | epsilon_init = 0.12;
27 | W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init;
28 | 
29 | % =========================================================================
30 | 
31 | end
32 | 


--------------------------------------------------------------------------------
/ex4/sigmoid.m:
--------------------------------------------------------------------------------
1 | function g = sigmoid(z)
2 | %SIGMOID Compute sigmoid functoon
3 | %   J = SIGMOID(z) computes the sigmoid of z.
4 | 
5 | g = 1.0 ./ (1.0 + exp(-z));
6 | end
7 | 


--------------------------------------------------------------------------------
/ex4/sigmoidGradient.m:
--------------------------------------------------------------------------------
 1 | function g = sigmoidGradient(z)
 2 | %SIGMOIDGRADIENT returns the gradient of the sigmoid function
 3 | %evaluated at z
 4 | %   g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function
 5 | %   evaluated at z. This should work regardless if z is a matrix or a
 6 | %   vector. In particular, if z is a vector or matrix, you should return
 7 | %   the gradient for each element.
 8 | 
 9 | g = zeros(size(z));
10 | 
11 | % ====================== YOUR CODE HERE ======================
12 | % Instructions: Compute the gradient of the sigmoid function evaluated at
13 | %               each value of z (z can be a matrix, vector or scalar).
14 | 
15 | g = sigmoid(z) .* (1 - sigmoid(z));
16 | 
17 | % =============================================================
18 | 
19 | 
20 | 
21 | 
22 | end
23 | 


--------------------------------------------------------------------------------
/ex5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex5.pdf


--------------------------------------------------------------------------------
/ex5/ex5.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class
  3 | %  Exercise 5 | Regularized Linear Regression and Bias-Variance
  4 | %
  5 | %  Instructions
  6 | %  ------------
  7 | % 
  8 | %  This file contains code that helps you get started on the
  9 | %  exercise. You will need to complete the following functions:
 10 | %
 11 | %     linearRegCostFunction.m
 12 | %     learningCurve.m
 13 | %     validationCurve.m
 14 | %
 15 | %  For this exercise, you will not need to change any code in this file,
 16 | %  or any other files other than those mentioned above.
 17 | %
 18 | 
 19 | %% Initialization
 20 | clear ; close all; clc
 21 | 
 22 | %% =========== Part 1: Loading and Visualizing Data =============
 23 | %  We start the exercise by first loading and visualizing the dataset. 
 24 | %  The following code will load the dataset into your environment and plot
 25 | %  the data.
 26 | %
 27 | 
 28 | % Load Training Data
 29 | fprintf('Loading and Visualizing Data ...\n')
 30 | 
 31 | % Load from ex5data1: 
 32 | % You will have X, y, Xval, yval, Xtest, ytest in your environment
 33 | load ('ex5data1.mat');
 34 | 
 35 | % m = Number of examples
 36 | m = size(X, 1);
 37 | 
 38 | % Plot training data
 39 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5);
 40 | xlabel('Change in water level (x)');
 41 | ylabel('Water flowing out of the dam (y)');
 42 | 
 43 | fprintf('Program paused. Press enter to continue.\n');
 44 | pause;
 45 | 
 46 | %% =========== Part 2: Regularized Linear Regression Cost =============
 47 | %  You should now implement the cost function for regularized linear 
 48 | %  regression. 
 49 | %
 50 | 
 51 | theta = [1 ; 1];
 52 | J = linearRegCostFunction([ones(m, 1) X], y, theta, 1);
 53 | 
 54 | fprintf(['Cost at theta = [1 ; 1]: %f '...
 55 |          '\n(this value should be about 303.993192)\n'], J);
 56 | 
 57 | fprintf('Program paused. Press enter to continue.\n');
 58 | pause;
 59 | 
 60 | %% =========== Part 3: Regularized Linear Regression Gradient =============
 61 | %  You should now implement the gradient for regularized linear 
 62 | %  regression.
 63 | %
 64 | 
 65 | theta = [1 ; 1];
 66 | [J, grad] = linearRegCostFunction([ones(m, 1) X], y, theta, 1);
 67 | 
 68 | fprintf(['Gradient at theta = [1 ; 1]:  [%f; %f] '...
 69 |          '\n(this value should be about [-15.303016; 598.250744])\n'], ...
 70 |          grad(1), grad(2));
 71 | 
 72 | fprintf('Program paused. Press enter to continue.\n');
 73 | pause;
 74 | 
 75 | 
 76 | %% =========== Part 4: Train Linear Regression =============
 77 | %  Once you have implemented the cost and gradient correctly, the
 78 | %  trainLinearReg function will use your cost function to train 
 79 | %  regularized linear regression.
 80 | % 
 81 | %  Write Up Note: The data is non-linear, so this will not give a great 
 82 | %                 fit.
 83 | %
 84 | 
 85 | %  Train linear regression with lambda = 0
 86 | lambda = 0;
 87 | [theta] = trainLinearReg([ones(m, 1) X], y, lambda);
 88 | 
 89 | %  Plot fit over the data
 90 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5);
 91 | xlabel('Change in water level (x)');
 92 | ylabel('Water flowing out of the dam (y)');
 93 | hold on;
 94 | plot(X, [ones(m, 1) X]*theta, '--', 'LineWidth', 2)
 95 | hold off;
 96 | 
 97 | fprintf('Program paused. Press enter to continue.\n');
 98 | pause;
 99 | 
100 | 
101 | %% =========== Part 5: Learning Curve for Linear Regression =============
102 | %  Next, you should implement the learningCurve function. 
103 | %
104 | %  Write Up Note: Since the model is underfitting the data, we expect to
105 | %                 see a graph with "high bias" -- slide 8 in ML-advice.pdf 
106 | %
107 | 
108 | lambda = 0;
109 | [error_train, error_val] = ...
110 |     learningCurve([ones(m, 1) X], y, ...
111 |                   [ones(size(Xval, 1), 1) Xval], yval, ...
112 |                   lambda);
113 | 
114 | plot(1:m, error_train, 1:m, error_val);
115 | title('Learning curve for linear regression')
116 | legend('Train', 'Cross Validation')
117 | xlabel('Number of training examples')
118 | ylabel('Error')
119 | axis([0 13 0 150])
120 | 
121 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n');
122 | for i = 1:m
123 |     fprintf('  \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i));
124 | end
125 | 
126 | fprintf('Program paused. Press enter to continue.\n');
127 | pause;
128 | 
129 | %% =========== Part 6: Feature Mapping for Polynomial Regression =============
130 | %  One solution to this is to use polynomial regression. You should now
131 | %  complete polyFeatures to map each example into its powers
132 | %
133 | 
134 | p = 8;
135 | 
136 | % Map X onto Polynomial Features and Normalize
137 | X_poly = polyFeatures(X, p);
138 | [X_poly, mu, sigma] = featureNormalize(X_poly);  % Normalize
139 | X_poly = [ones(m, 1), X_poly];                   % Add Ones
140 | 
141 | % Map X_poly_test and normalize (using mu and sigma)
142 | X_poly_test = polyFeatures(Xtest, p);
143 | X_poly_test = bsxfun(@minus, X_poly_test, mu);
144 | X_poly_test = bsxfun(@rdivide, X_poly_test, sigma);
145 | X_poly_test = [ones(size(X_poly_test, 1), 1), X_poly_test];         % Add Ones
146 | 
147 | % Map X_poly_val and normalize (using mu and sigma)
148 | X_poly_val = polyFeatures(Xval, p);
149 | X_poly_val = bsxfun(@minus, X_poly_val, mu);
150 | X_poly_val = bsxfun(@rdivide, X_poly_val, sigma);
151 | X_poly_val = [ones(size(X_poly_val, 1), 1), X_poly_val];           % Add Ones
152 | 
153 | fprintf('Normalized Training Example 1:\n');
154 | fprintf('  %f  \n', X_poly(1, :));
155 | 
156 | fprintf('\nProgram paused. Press enter to continue.\n');
157 | pause;
158 | 
159 | 
160 | 
161 | %% =========== Part 7: Learning Curve for Polynomial Regression =============
162 | %  Now, you will get to experiment with polynomial regression with multiple
163 | %  values of lambda. The code below runs polynomial regression with 
164 | %  lambda = 0. You should try running the code with different values of
165 | %  lambda to see how the fit and learning curve change.
166 | %
167 | 
168 | lambda = 0;
169 | [theta] = trainLinearReg(X_poly, y, lambda);
170 | 
171 | % Plot training data and fit
172 | figure(1);
173 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5);
174 | plotFit(min(X), max(X), mu, sigma, theta, p);
175 | xlabel('Change in water level (x)');
176 | ylabel('Water flowing out of the dam (y)');
177 | title (sprintf('Polynomial Regression Fit (lambda = %f)', lambda));
178 | 
179 | figure(2);
180 | [error_train, error_val] = ...
181 |     learningCurve(X_poly, y, X_poly_val, yval, lambda);
182 | % Should we use logarithmix scale there?
183 | plot(1:m, error_train, 1:m, error_val);
184 | 
185 | title(sprintf('Polynomial Regression Learning Curve (lambda = %f)', lambda));
186 | xlabel('Number of training examples')
187 | ylabel('Error')
188 | axis([0 13 0 100])
189 | legend('Train', 'Cross Validation')
190 | 
191 | fprintf('Polynomial Regression (lambda = %f)\n\n', lambda);
192 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n');
193 | for i = 1:m
194 |     fprintf('  \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i));
195 | end
196 | 
197 | fprintf('Program paused. Press enter to continue.\n');
198 | pause;
199 | 
200 | %% =========== Part 8: Validation for Selecting Lambda =============
201 | %  You will now implement validationCurve to test various values of 
202 | %  lambda on a validation set. You will then use this to select the
203 | %  "best" lambda value.
204 | %
205 | 
206 | [lambda_vec, error_train, error_val] = ...
207 |     validationCurve(X_poly, y, X_poly_val, yval);
208 | 
209 | close all;
210 | plot(lambda_vec, error_train, lambda_vec, error_val);
211 | legend('Train', 'Cross Validation');
212 | xlabel('lambda');
213 | ylabel('Error');
214 | 
215 | fprintf('lambda\t\tTrain Error\tValidation Error\n');
216 | for i = 1:length(lambda_vec)
217 | 	fprintf(' %f\t%f\t%f\n', ...
218 |             lambda_vec(i), error_train(i), error_val(i));
219 | end
220 | 
221 | fprintf('Program paused. Press enter to continue.\n');
222 | pause;
223 | 


--------------------------------------------------------------------------------
/ex5/ex5data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex5/ex5data1.mat


--------------------------------------------------------------------------------
/ex5/featureNormalize.m:
--------------------------------------------------------------------------------
1 | ../ex1/featureNormalize.m


--------------------------------------------------------------------------------
/ex5/fmincg.m:
--------------------------------------------------------------------------------
1 | ../ex3/fmincg.m


--------------------------------------------------------------------------------
/ex5/learningCurve.m:
--------------------------------------------------------------------------------
 1 | function [error_train, error_val] = ...
 2 |     learningCurve(X, y, Xval, yval, lambda)
 3 | %LEARNINGCURVE Generates the train and cross validation set errors needed 
 4 | %to plot a learning curve
 5 | %   [error_train, error_val] = ...
 6 | %       LEARNINGCURVE(X, y, Xval, yval, lambda) returns the train and
 7 | %       cross validation set errors for a learning curve. In particular, 
 8 | %       it returns two vectors of the same length - error_train and 
 9 | %       error_val. Then, error_train(i) contains the training error for
10 | %       i examples (and similarly for error_val(i)).
11 | %
12 | %   In this function, you will compute the train and test errors for
13 | %   dataset sizes from 1 up to m. In practice, when working with larger
14 | %   datasets, you might want to do this in larger intervals.
15 | %
16 | 
17 | % Number of training examples
18 | m = size(X, 1);
19 | 
20 | % You need to return these values correctly
21 | error_train = zeros(m, 1);
22 | error_val   = zeros(m, 1);
23 | 
24 | % ====================== YOUR CODE HERE ======================
25 | % Instructions: Fill in this function to return training errors in 
26 | %               error_train and the cross validation errors in error_val. 
27 | %               The vector numex_vec contains the number of training 
28 | %               examples to use for each calculation of training error and 
29 | %               cross validation error, i.e, error_train(i) and 
30 | %               error_val(i) should give you the errors
31 | %               obtained after training on i examples.
32 | %
33 | % Note: You should evaluate the training error on the first i training
34 | %       examples (i.e., X(1:i, :) and y(1:i)).
35 | %
36 | %       For the cross-validation error, you should instead evaluate on
37 | %       the _entire_ cross validation set (Xval and yval).
38 | %
39 | % Note: If you are using your cost function (linearRegCostFunction)
40 | %       to compute the training and cross validation error, you should 
41 | %       call the function with the lambda argument set to 0. 
42 | %       Do note that you will still need to use lambda when running
43 | %       the training to obtain the theta parameters.
44 | %
45 | % Hint: You can loop over the examples with the following:
46 | %
47 | %       for i = 1:m
48 | %           % Compute train/cross validation errors using training examples 
49 | %           % X(1:i, :) and y(1:i), storing the result in 
50 | %           % error_train(i) and error_val(i)
51 | %           ....
52 | %           
53 | %       end
54 | %
55 | 
56 | % ---------------------- Sample Solution ----------------------
57 | 
58 | for i = 1:m
59 |     X_trimmed = X(1:i, :);
60 |     y_trimmed = y(1:i);
61 |     theta = trainLinearReg(X_trimmed, y_trimmed, lambda);
62 |     error_train(i) = linearRegCostFunction(X_trimmed, y_trimmed, theta, 0)(1);
63 |     error_val(i) = linearRegCostFunction(Xval, yval, theta, 0)(1);
64 | 
65 | % -------------------------------------------------------------
66 | 
67 | % =========================================================================
68 | 
69 | end
70 | 


--------------------------------------------------------------------------------
/ex5/linearRegCostFunction.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = linearRegCostFunction(X, y, theta, lambda)
 2 | %LINEARREGCOSTFUNCTION Compute cost and gradient for regularized linear 
 3 | %regression with multiple variables
 4 | %   [J, grad] = LINEARREGCOSTFUNCTION(X, y, theta, lambda) computes the 
 5 | %   cost of using theta as the parameter for linear regression to fit the 
 6 | %   data points in X and y. Returns the cost in J and the gradient in grad
 7 | 
 8 | % Initialize some useful values
 9 | m = length(y); % number of training examples
10 | 
11 | % You need to return the following variables correctly 
12 | J = 0;
13 | grad = zeros(size(theta));
14 | 
15 | % ====================== YOUR CODE HERE ======================
16 | % Instructions: Compute the cost and gradient of regularized linear 
17 | %               regression for a particular choice of theta.
18 | %
19 | %               You should set J to the cost and grad to the gradient.
20 | %
21 | 
22 | % We can reuse ex1's computeCost() but it be messier and slower
23 | h0 = X*theta;
24 | J = (sum((h0 - y) .^ 2) + lambda*sum(theta(2:end) .^ 2))/(2*m);
25 | 
26 | grad = (1/m)*(X'*(h0-y)) + [0; (lambda/m)*theta(2:end)];
27 | 
28 | % =========================================================================
29 | 
30 | grad = grad(:);
31 | 
32 | end
33 | 


--------------------------------------------------------------------------------
/ex5/plotFit.m:
--------------------------------------------------------------------------------
 1 | function plotFit(min_x, max_x, mu, sigma, theta, p)
 2 | %PLOTFIT Plots a learned polynomial regression fit over an existing figure.
 3 | %Also works with linear regression.
 4 | %   PLOTFIT(min_x, max_x, mu, sigma, theta, p) plots the learned polynomial
 5 | %   fit with power p and feature normalization (mu, sigma).
 6 | 
 7 | % Hold on to the current figure
 8 | hold on;
 9 | 
10 | % We plot a range slightly bigger than the min and max values to get
11 | % an idea of how the fit will vary outside the range of the data points
12 | x = (min_x - 15: 0.05 : max_x + 25)';
13 | 
14 | % Map the X values 
15 | X_poly = polyFeatures(x, p);
16 | X_poly = bsxfun(@minus, X_poly, mu);
17 | X_poly = bsxfun(@rdivide, X_poly, sigma);
18 | 
19 | % Add ones
20 | X_poly = [ones(size(x, 1), 1) X_poly];
21 | 
22 | % Plot
23 | plot(x, X_poly * theta, '--', 'LineWidth', 2)
24 | 
25 | % Hold off to the current figure
26 | hold off
27 | 
28 | end
29 | 


--------------------------------------------------------------------------------
/ex5/polyFeatures.m:
--------------------------------------------------------------------------------
 1 | function [X_poly] = polyFeatures(X, p)
 2 | %POLYFEATURES Maps X (1D vector) into the p-th power
 3 | %   [X_poly] = POLYFEATURES(X, p) takes a data matrix X (size m x 1) and
 4 | %   maps each example into its polynomial features where
 5 | %   X_poly(i, :) = [X(i) X(i).^2 X(i).^3 ...  X(i).^p];
 6 | %
 7 | 
 8 | 
 9 | % You need to return the following variables correctly.
10 | X_poly = zeros(numel(X), p);
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | % Instructions: Given a vector X, return a matrix X_poly where the p-th 
14 | %               column of X contains the values of X to the p-th power.
15 | %
16 | % 
17 | 
18 | % Think of some vectorized implementation
19 | for i = 1:numel(X)
20 |     for j = 1:p
21 |         X_poly(i, j) = X(i)^j;
22 |     end
23 | end
24 | 
25 | % =========================================================================
26 | 
27 | end
28 | 


--------------------------------------------------------------------------------
/ex5/trainLinearReg.m:
--------------------------------------------------------------------------------
 1 | function [theta] = trainLinearReg(X, y, lambda)
 2 | %TRAINLINEARREG Trains linear regression given a dataset (X, y) and a
 3 | %regularization parameter lambda
 4 | %   [theta] = TRAINLINEARREG (X, y, lambda) trains linear regression using
 5 | %   the dataset (X, y) and regularization parameter lambda. Returns the
 6 | %   trained parameters theta.
 7 | %
 8 | 
 9 | % Initialize Theta
10 | initial_theta = zeros(size(X, 2), 1); 
11 | 
12 | % Create "short hand" for the cost function to be minimized
13 | costFunction = @(t) linearRegCostFunction(X, y, t, lambda);
14 | 
15 | % Now, costFunction is a function that takes in only one argument
16 | options = optimset('MaxIter', 200, 'GradObj', 'on');
17 | 
18 | % Minimize using fmincg
19 | % XXX(SaveTheRbtz@): Disable warnings here?
20 | theta = fmincg(costFunction, initial_theta, options);
21 | 
22 | end
23 | 


--------------------------------------------------------------------------------
/ex5/validationCurve.m:
--------------------------------------------------------------------------------
 1 | function [lambda_vec, error_train, error_val] = ...
 2 |     validationCurve(X, y, Xval, yval)
 3 | %VALIDATIONCURVE Generate the train and validation errors needed to
 4 | %plot a validation curve that we can use to select lambda
 5 | %   [lambda_vec, error_train, error_val] = ...
 6 | %       VALIDATIONCURVE(X, y, Xval, yval) returns the train
 7 | %       and validation errors (in error_train, error_val)
 8 | %       for different values of lambda. You are given the training set (X,
 9 | %       y) and validation set (Xval, yval).
10 | %
11 | 
12 | % Selected values of lambda (you should not change this)
13 | lambda_vec = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3 10]';
14 | 
15 | % You need to return these variables correctly.
16 | error_train = zeros(length(lambda_vec), 1);
17 | error_val = zeros(length(lambda_vec), 1);
18 | 
19 | % ====================== YOUR CODE HERE ======================
20 | % Instructions: Fill in this function to return training errors in 
21 | %               error_train and the validation errors in error_val. The 
22 | %               vector lambda_vec contains the different lambda parameters 
23 | %               to use for each calculation of the errors, i.e, 
24 | %               error_train(i), and error_val(i) should give 
25 | %               you the errors obtained after training with 
26 | %               lambda = lambda_vec(i)
27 | %
28 | % Note: You can loop over lambda_vec with the following:
29 | %
30 | %       for i = 1:length(lambda_vec)
31 | %           lambda = lambda_vec(i);
32 | %           % Compute train / val errors when training linear 
33 | %           % regression with regularization parameter lambda
34 | %           % You should store the result in error_train(i)
35 | %           % and error_val(i)
36 | %           ....
37 | %           
38 | %       end
39 | %
40 | %
41 | 
42 | for i = 1:length(lambda_vec)
43 |     lambda = lambda_vec(i);
44 |     theta = trainLinearReg(X, y, lambda);
45 |     error_train(i) = linearRegCostFunction(X, y, theta, 0)(1);
46 |     error_val(i) = linearRegCostFunction(Xval, yval, theta, 0)(1);
47 | 
48 | % =========================================================================
49 | 
50 | end
51 | 


--------------------------------------------------------------------------------
/ex6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6.pdf


--------------------------------------------------------------------------------
/ex6/dataset3Params.m:
--------------------------------------------------------------------------------
 1 | function [C, sigma] = dataset3Params(X, y, Xval, yval)
 2 | %DATASET3PARAMS returns your choice of C and sigma for Part 3 of the exercise
 3 | %where you select the optimal (C, sigma) learning parameters to use for SVM
 4 | %with RBF kernel
 5 | %   [C, sigma] = DATASET3PARAMS(X, y, Xval, yval) returns your choice of C and 
 6 | %   sigma. You should complete this function to return the optimal C and 
 7 | %   sigma based on a cross-validation set.
 8 | %
 9 | 
10 | % You need to return the following variables correctly.
11 | C = 1;
12 | sigma = 0.1;
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | % Instructions: Fill in this function to return the optimal C and sigma
16 | %               learning parameters found using the cross validation set.
17 | %               You can use svmPredict to predict the labels on the cross
18 | %               validation set. For example, 
19 | %                   predictions = svmPredict(model, Xval);
20 | %               will return the predictions on the cross validation set.
21 | %
22 | %  Note: You can compute the prediction error using 
23 | %        mean(double(predictions ~= yval))
24 | %
25 | 
26 | if(0)
27 | C_vec = [0.01 0.03 0.1 0.3 1 3 10];
28 | sigma_vec = [0.01 0.03 0.1 0.3 1 3 10];
29 | 
30 | result = [];
31 | minimum = [0 0 0];
32 | 
33 | % XXX(SaveTheRbtz): A lot can be optimized here
34 | for c = 1:length(C_vec)
35 |     for s = 1:length(sigma_vec)
36 |         model = svmTrain(X, y, C_vec(c), @(x1, x2) gaussianKernel(x1, x2, sigma_vec(s)));
37 |         predictions = svmPredict(model, Xval);
38 |         result = [ result; mean(double(predictions ~= yval)) C_vec(c) sigma_vec(s) ];
39 |     endfor
40 | endfor
41 | 
42 | % MATLAB's unstack would be usefull here =(
43 | minimum = sortrows(result)(1,:);
44 | C = minimum(2);
45 | sigma = minimum(3);
46 | 
47 | endif
48 | % =========================================================================
49 | 
50 | end
51 | 


--------------------------------------------------------------------------------
/ex6/emailFeatures.m:
--------------------------------------------------------------------------------
 1 | function x = emailFeatures(word_indices)
 2 | %EMAILFEATURES takes in a word_indices vector and produces a feature vector
 3 | %from the word indices
 4 | %   x = EMAILFEATURES(word_indices) takes in a word_indices vector and 
 5 | %   produces a feature vector from the word indices. 
 6 | 
 7 | % Total number of words in the dictionary
 8 | n = 1899;
 9 | 
10 | % You need to return the following variables correctly.
11 | x = zeros(n, 1);
12 | 
13 | % ====================== YOUR CODE HERE ======================
14 | % Instructions: Fill in this function to return a feature vector for the
15 | %               given email (word_indices). To help make it easier to 
16 | %               process the emails, we have have already pre-processed each
17 | %               email and converted each word in the email into an index in
18 | %               a fixed dictionary (of 1899 words). The variable
19 | %               word_indices contains the list of indices of the words
20 | %               which occur in one email.
21 | % 
22 | %               Concretely, if an email has the text:
23 | %
24 | %                  The quick brown fox jumped over the lazy dog.
25 | %
26 | %               Then, the word_indices vector for this text might look 
27 | %               like:
28 | %               
29 | %                   60  100   33   44   10     53  60  58   5
30 | %
31 | %               where, we have mapped each word onto a number, for example:
32 | %
33 | %                   the   -- 60
34 | %                   quick -- 100
35 | %                   ...
36 | %
37 | %              (note: the above numbers are just an example and are not the
38 | %               actual mappings).
39 | %
40 | %              Your task is take one such word_indices vector and construct
41 | %              a binary feature vector that indicates whether a particular
42 | %              word occurs in the email. That is, x(i) = 1 when word i
43 | %              is present in the email. Concretely, if the word 'the' (say,
44 | %              index 60) appears in the email, then x(60) = 1. The feature
45 | %              vector should look like:
46 | %
47 | %              x = [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..];
48 | %
49 | %
50 | 
51 | % Hope this is not very "clumzy" solution
52 | x = arrayfun(@(i) ~isempty(word_indices(word_indices==i)), 1:n);
53 | 
54 | % =========================================================================
55 |     
56 | 
57 | end
58 | 


--------------------------------------------------------------------------------
/ex6/emailSample1.txt:
--------------------------------------------------------------------------------
 1 | > Anyone knows how much it costs to host a web portal ?
 2 | >
 3 | Well, it depends on how many visitors you're expecting.
 4 | This can be anywhere from less than 10 bucks a month to a couple of $100. 
 5 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 
 6 | if youre running something big..
 7 | 
 8 | To unsubscribe yourself from this mailing list, send an email to:
 9 | groupname-unsubscribe@egroups.com
10 | 
11 | 


--------------------------------------------------------------------------------
/ex6/emailSample2.txt:
--------------------------------------------------------------------------------
 1 | Folks,
 2 |  
 3 | my first time posting - have a bit of Unix experience, but am new to Linux.
 4 | 
 5 |  
 6 | Just got a new PC at home - Dell box with Windows XP. Added a second hard disk
 7 | for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went
 8 | fine except it didn't pick up my monitor.
 9 |  
10 | I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4
11 | Ti4200 video card, both of which are probably too new to feature in Suse's default
12 | set. I downloaded a driver from the nVidia website and installed it using RPM.
13 | Then I ran Sax2 (as was recommended in some postings I found on the net), but
14 | it still doesn't feature my video card in the available list. What next?
15 |  
16 | Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice,
17 | the whole machine crashes (in Linux, not Windows) - even the on/off switch is
18 | inactive, leaving me to reach for the power cable instead.
19 |  
20 | If anyone can help me in any way with these probs., I'd be really grateful -
21 | I've searched the 'net but have run out of ideas.
22 |  
23 | Or should I be going for a different version of Linux such as RedHat? Opinions
24 | welcome.
25 |  
26 | Thanks a lot,
27 | Peter
28 | 
29 | -- 
30 | Irish Linux Users' Group: ilug@linux.ie
31 | http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information.
32 | List maintainer: listmaster@linux.ie
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/ex6/ex6.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class
  3 | %  Exercise 6 | Support Vector Machines
  4 | %
  5 | %  Instructions
  6 | %  ------------
  7 | % 
  8 | %  This file contains code that helps you get started on the
  9 | %  exercise. You will need to complete the following functions:
 10 | %
 11 | %     gaussianKernel.m
 12 | %     dataset3Params.m
 13 | %     processEmail.m
 14 | %     emailFeatures.m
 15 | %
 16 | %  For this exercise, you will not need to change any code in this file,
 17 | %  or any other files other than those mentioned above.
 18 | %
 19 | 
 20 | %% Initialization
 21 | clear ; close all; clc
 22 | 
 23 | %% =============== Part 1: Loading and Visualizing Data ================
 24 | %  We start the exercise by first loading and visualizing the dataset. 
 25 | %  The following code will load the dataset into your environment and plot
 26 | %  the data.
 27 | %
 28 | 
 29 | fprintf('Loading and Visualizing Data ...\n')
 30 | 
 31 | % Load from ex6data1: 
 32 | % You will have X, y in your environment
 33 | load('ex6data1.mat');
 34 | 
 35 | % Plot training data
 36 | plotData(X, y);
 37 | 
 38 | fprintf('Program paused. Press enter to continue.\n');
 39 | pause;
 40 | 
 41 | %% ==================== Part 2: Training Linear SVM ====================
 42 | %  The following code will train a linear SVM on the dataset and plot the
 43 | %  decision boundary learned.
 44 | %
 45 | 
 46 | % Load from ex6data1: 
 47 | % You will have X, y in your environment
 48 | load('ex6data1.mat');
 49 | 
 50 | fprintf('\nTraining Linear SVM ...\n')
 51 | 
 52 | % You should try to change the C value below and see how the decision
 53 | % boundary varies (e.g., try C = 1000)
 54 | C = 1;
 55 | model = svmTrain(X, y, C, @linearKernel, 1e-3, 20);
 56 | visualizeBoundaryLinear(X, y, model);
 57 | 
 58 | fprintf('Program paused. Press enter to continue.\n');
 59 | pause;
 60 | 
 61 | %% =============== Part 3: Implementing Gaussian Kernel ===============
 62 | %  You will now implement the Gaussian kernel to use
 63 | %  with the SVM. You should complete the code in gaussianKernel.m
 64 | %
 65 | fprintf('\nEvaluating the Gaussian Kernel ...\n')
 66 | 
 67 | x1 = [1 2 1]; x2 = [0 4 -1]; sigma = 2;
 68 | sim = gaussianKernel(x1, x2, sigma);
 69 | 
 70 | fprintf(['Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 0.5 :' ...
 71 |          '\n\t%f\n(this value should be about 0.324652)\n'], sim);
 72 | 
 73 | fprintf('Program paused. Press enter to continue.\n');
 74 | pause;
 75 | 
 76 | %% =============== Part 4: Visualizing Dataset 2 ================
 77 | %  The following code will load the next dataset into your environment and 
 78 | %  plot the data. 
 79 | %
 80 | 
 81 | fprintf('Loading and Visualizing Data ...\n')
 82 | 
 83 | % Load from ex6data2: 
 84 | % You will have X, y in your environment
 85 | load('ex6data2.mat');
 86 | 
 87 | % Plot training data
 88 | plotData(X, y);
 89 | 
 90 | fprintf('Program paused. Press enter to continue.\n');
 91 | pause;
 92 | 
 93 | %% ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ==========
 94 | %  After you have implemented the kernel, we can now use it to train the 
 95 | %  SVM classifier.
 96 | % 
 97 | fprintf('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n');
 98 | 
 99 | % Load from ex6data2: 
100 | % You will have X, y in your environment
101 | load('ex6data2.mat');
102 | 
103 | % SVM Parameters
104 | C = 1; sigma = 0.1;
105 | 
106 | % We set the tolerance and max_passes lower here so that the code will run
107 | % faster. However, in practice, you will want to run the training to
108 | % convergence.
109 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 
110 | visualizeBoundary(X, y, model);
111 | 
112 | fprintf('Program paused. Press enter to continue.\n');
113 | pause;
114 | 
115 | %% =============== Part 6: Visualizing Dataset 3 ================
116 | %  The following code will load the next dataset into your environment and 
117 | %  plot the data. 
118 | %
119 | 
120 | fprintf('Loading and Visualizing Data ...\n')
121 | 
122 | % Load from ex6data3: 
123 | % You will have X, y in your environment
124 | load('ex6data3.mat');
125 | 
126 | % Plot training data
127 | plotData(X, y);
128 | 
129 | fprintf('Program paused. Press enter to continue.\n');
130 | pause;
131 | 
132 | %% ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ==========
133 | 
134 | %  This is a different dataset that you can use to experiment with. Try
135 | %  different values of C and sigma here.
136 | % 
137 | 
138 | % Load from ex6data3: 
139 | % You will have X, y in your environment
140 | load('ex6data3.mat');
141 | 
142 | % Try different SVM Parameters here
143 | [C, sigma] = dataset3Params(X, y, Xval, yval);
144 | 
145 | % Train the SVM
146 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma));
147 | visualizeBoundary(X, y, model);
148 | 
149 | fprintf('Program paused. Press enter to continue.\n');
150 | pause;
151 | 
152 | 


--------------------------------------------------------------------------------
/ex6/ex6_spam.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class
  3 | %  Exercise 6 | Spam Classification with SVMs
  4 | %
  5 | %  Instructions
  6 | %  ------------
  7 | % 
  8 | %  This file contains code that helps you get started on the
  9 | %  exercise. You will need to complete the following functions:
 10 | %
 11 | %     gaussianKernel.m
 12 | %     dataset3Params.m
 13 | %     processEmail.m
 14 | %     emailFeatures.m
 15 | %
 16 | %  For this exercise, you will not need to change any code in this file,
 17 | %  or any other files other than those mentioned above.
 18 | %
 19 | 
 20 | %% Initialization
 21 | clear ; close all; clc
 22 | 
 23 | %% ==================== Part 1: Email Preprocessing ====================
 24 | %  To use an SVM to classify emails into Spam v.s. Non-Spam, you first need
 25 | %  to convert each email into a vector of features. In this part, you will
 26 | %  implement the preprocessing steps for each email. You should
 27 | %  complete the code in processEmail.m to produce a word indices vector
 28 | %  for a given email.
 29 | 
 30 | fprintf('\nPreprocessing sample email (emailSample1.txt)\n');
 31 | 
 32 | % Extract Features
 33 | file_contents = readFile('emailSample1.txt');
 34 | word_indices  = processEmail(file_contents);
 35 | 
 36 | % Print Stats
 37 | fprintf('Word Indices: \n');
 38 | fprintf(' %d', word_indices);
 39 | fprintf('\n\n');
 40 | 
 41 | fprintf('Program paused. Press enter to continue.\n');
 42 | pause;
 43 | 
 44 | %% ==================== Part 2: Feature Extraction ====================
 45 | %  Now, you will convert each email into a vector of features in R^n. 
 46 | %  You should complete the code in emailFeatures.m to produce a feature
 47 | %  vector for a given email.
 48 | 
 49 | fprintf('\nExtracting features from sample email (emailSample1.txt)\n');
 50 | 
 51 | % Extract Features
 52 | file_contents = readFile('emailSample1.txt');
 53 | word_indices  = processEmail(file_contents);
 54 | features      = emailFeatures(word_indices);
 55 | 
 56 | % Print Stats
 57 | fprintf('Length of feature vector: %d\n', length(features));
 58 | fprintf('Number of non-zero entries: %d\n', sum(features > 0));
 59 | 
 60 | fprintf('Program paused. Press enter to continue.\n');
 61 | pause;
 62 | 
 63 | %% =========== Part 3: Train Linear SVM for Spam Classification ========
 64 | %  In this section, you will train a linear classifier to determine if an
 65 | %  email is Spam or Not-Spam.
 66 | 
 67 | % Load the Spam Email dataset
 68 | % You will have X, y in your environment
 69 | load('spamTrain.mat');
 70 | 
 71 | fprintf('\nTraining Linear SVM (Spam Classification)\n')
 72 | fprintf('(this may take 1 to 2 minutes) ...\n')
 73 | 
 74 | C = 0.1;
 75 | model = svmTrain(X, y, C, @linearKernel);
 76 | 
 77 | p = svmPredict(model, X);
 78 | 
 79 | fprintf('Training Accuracy: %f\n', mean(double(p == y)) * 100);
 80 | 
 81 | %% =================== Part 4: Test Spam Classification ================
 82 | %  After training the classifier, we can evaluate it on a test set. We have
 83 | %  included a test set in spamTest.mat
 84 | 
 85 | % Load the test dataset
 86 | % You will have Xtest, ytest in your environment
 87 | load('spamTest.mat');
 88 | 
 89 | fprintf('\nEvaluating the trained Linear SVM on a test set ...\n')
 90 | 
 91 | p = svmPredict(model, Xtest);
 92 | 
 93 | fprintf('Test Accuracy: %f\n', mean(double(p == ytest)) * 100);
 94 | pause;
 95 | 
 96 | 
 97 | %% ================= Part 5: Top Predictors of Spam ====================
 98 | %  Since the model we are training is a linear SVM, we can inspect the
 99 | %  weights learned by the model to understand better how it is determining
100 | %  whether an email is spam or not. The following code finds the words with
101 | %  the highest weights in the classifier. Informally, the classifier
102 | %  'thinks' that these words are the most likely indicators of spam.
103 | %
104 | 
105 | % Sort the weights and obtin the vocabulary list
106 | [weight, idx] = sort(model.w, 'descend');
107 | vocabList = getVocabList();
108 | 
109 | fprintf('\nTop predictors of spam: \n');
110 | for i = 1:15
111 |     fprintf(' %-15s (%f) \n', vocabList{idx(i)}, weight(i));
112 | end
113 | 
114 | fprintf('\n\n');
115 | fprintf('\nProgram paused. Press enter to continue.\n');
116 | pause;
117 | 
118 | %% =================== Part 6: Try Your Own Emails =====================
119 | %  Now that you've trained the spam classifier, you can use it on your own
120 | %  emails! In the starter code, we have included spamSample1.txt,
121 | %  spamSample2.txt, emailSample1.txt and emailSample2.txt as examples. 
122 | %  The following code reads in one of these emails and then uses your 
123 | %  learned SVM classifier to determine whether the email is Spam or 
124 | %  Not Spam
125 | 
126 | % Set the file to be read in (change this to spamSample2.txt,
127 | % emailSample1.txt or emailSample2.txt to see different predictions on
128 | % different emails types). Try your own emails as well!
129 | filename = 'spamSample1.txt';
130 | 
131 | % Read and predict
132 | file_contents = readFile(filename);
133 | word_indices  = processEmail(file_contents);
134 | x             = emailFeatures(word_indices);
135 | p = svmPredict(model, x);
136 | 
137 | fprintf('\nProcessed %s\n\nSpam Classification: %d\n', filename, p);
138 | fprintf('(1 indicates spam, 0 indicates not spam)\n\n');
139 | 
140 | 


--------------------------------------------------------------------------------
/ex6/ex6data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6/ex6data1.mat


--------------------------------------------------------------------------------
/ex6/ex6data2.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6/ex6data2.mat


--------------------------------------------------------------------------------
/ex6/ex6data3.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6/ex6data3.mat


--------------------------------------------------------------------------------
/ex6/gaussianKernel.m:
--------------------------------------------------------------------------------
 1 | function sim = gaussianKernel(x1, x2, sigma)
 2 | %RBFKERNEL returns a radial basis function kernel between x1 and x2
 3 | %   sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2
 4 | %   and returns the value in sim
 5 | 
 6 | % Ensure that x1 and x2 are column vectors
 7 | x1 = x1(:); x2 = x2(:);
 8 | 
 9 | % You need to return the following variables correctly.
10 | sim = 0;
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | % Instructions: Fill in this function to return the similarity between x1
14 | %               and x2 computed using a Gaussian kernel with bandwidth
15 | %               sigma
16 | %
17 | %
18 | 
19 | % I've used matrix multiplication insted of instead of square and sum
20 | sim = exp(-((x1 - x2)'*(x1-x2))/(2*(sigma^2)));
21 | % Canonic implementation is:
22 | % sim = exp(-sum((x1 - x2) .^ 2)/(2*(sigma^2)));
23 | % TODO(SaveTheRbtz@): Should test that i'm not trying to outsmart an optimizer
24 | 
25 | % =============================================================
26 |     
27 | end
28 | 


--------------------------------------------------------------------------------
/ex6/getVocabList.m:
--------------------------------------------------------------------------------
 1 | function vocabList = getVocabList()
 2 | %GETVOCABLIST reads the fixed vocabulary list in vocab.txt and returns a
 3 | %cell array of the words
 4 | %   vocabList = GETVOCABLIST() reads the fixed vocabulary list in vocab.txt 
 5 | %   and returns a cell array of the words in vocabList.
 6 | 
 7 | 
 8 | %% Read the fixed vocabulary list
 9 | fid = fopen('vocab.txt');
10 | 
11 | % Store all dictionary words in cell array vocab{}
12 | n = 1899;  % Total number of words in the dictionary
13 | 
14 | % For ease of implementation, we use a struct to map the strings => integers
15 | % In practice, you'll want to use some form of hashmap
16 | vocabList = cell(n, 1);
17 | for i = 1:n
18 |     % Word Index (can ignore since it will be = i)
19 |     fscanf(fid, '%d', 1);
20 |     % Actual Word
21 |     vocabList{i} = fscanf(fid, '%s', 1);
22 | end
23 | fclose(fid);
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/ex6/linearKernel.m:
--------------------------------------------------------------------------------
 1 | function sim = linearKernel(x1, x2)
 2 | %LINEARKERNEL returns a linear kernel between x1 and x2
 3 | %   sim = linearKernel(x1, x2) returns a linear kernel between x1 and x2
 4 | %   and returns the value in sim
 5 | 
 6 | % Ensure that x1 and x2 are column vectors
 7 | x1 = x1(:); x2 = x2(:);
 8 | 
 9 | % Compute the kernel
10 | sim = x1' * x2;  % dot product
11 | 
12 | end


--------------------------------------------------------------------------------
/ex6/plotData.m:
--------------------------------------------------------------------------------
 1 | function plotData(X, y)
 2 | %PLOTDATA Plots the data points X and y into a new figure 
 3 | %   PLOTDATA(x,y) plots the data points with + for the positive examples
 4 | %   and o for the negative examples. X is assumed to be a Mx2 matrix.
 5 | %
 6 | % Note: This was slightly modified such that it expects y = 1 or y = 0
 7 | 
 8 | % Find Indices of Positive and Negative Examples
 9 | pos = find(y == 1); neg = find(y == 0);
10 | 
11 | % Plot Examples
12 | plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 1, 'MarkerSize', 7)
13 | hold on;
14 | plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7)
15 | hold off;
16 | 
17 | end
18 | 


--------------------------------------------------------------------------------
/ex6/processEmail.m:
--------------------------------------------------------------------------------
  1 | function word_indices = processEmail(email_contents)
  2 | %PROCESSEMAIL preprocesses a the body of an email and
  3 | %returns a list of word_indices 
  4 | %   word_indices = PROCESSEMAIL(email_contents) preprocesses 
  5 | %   the body of an email and returns a list of indices of the 
  6 | %   words contained in the email. 
  7 | %
  8 | 
  9 | % Load Vocabulary
 10 | vocabList = getVocabList();
 11 | 
 12 | % Init return value
 13 | word_indices = [];
 14 | 
 15 | % ========================== Preprocess Email ===========================
 16 | 
 17 | % Find the Headers ( \n\n and remove )
 18 | % Uncomment the following lines if you are working with raw emails with the
 19 | % full headers
 20 | 
 21 | % hdrstart = strfind(email_contents, ([char(10) char(10)]));
 22 | % email_contents = email_contents(hdrstart(1):end);
 23 | 
 24 | % Lower case
 25 | email_contents = lower(email_contents);
 26 | 
 27 | % Strip all HTML
 28 | % Looks for any expression that starts with < and ends with > and replace
 29 | % and does not have any < or > in the tag it with a space
 30 | email_contents = regexprep(email_contents, '<[^<>]+>', ' ');
 31 | 
 32 | % Handle Numbers
 33 | % Look for one or more characters between 0-9
 34 | email_contents = regexprep(email_contents, '[0-9]+', 'number');
 35 | 
 36 | % Handle URLS
 37 | % Look for strings starting with http:// or https://
 38 | email_contents = regexprep(email_contents, ...
 39 |                            '(http|https)://[^\s]*', 'httpaddr');
 40 | 
 41 | % Handle Email Addresses
 42 | % Look for strings with @ in the middle
 43 | email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr');
 44 | 
 45 | % Handle $ sign
 46 | email_contents = regexprep(email_contents, '[$]+', 'dollar');
 47 | 
 48 | 
 49 | % ========================== Tokenize Email ===========================
 50 | 
 51 | % Output the email to screen as well
 52 | fprintf('\n==== Processed Email ====\n\n');
 53 | 
 54 | % Process file
 55 | l = 0;
 56 | 
 57 | while ~isempty(email_contents)
 58 | 
 59 |     % Tokenize and also get rid of any punctuation
 60 |     [str, email_contents] = ...
 61 |        strtok(email_contents, ...
 62 |               [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
 63 |    
 64 |     % Remove any non alphanumeric characters
 65 |     str = regexprep(str, '[^a-zA-Z0-9]', '');
 66 | 
 67 |     % Stem the word 
 68 |     % (the porterStemmer sometimes has issues, so we use a try catch block)
 69 |     try str = porterStemmer(strtrim(str)); 
 70 |     catch str = ''; continue;
 71 |     end;
 72 | 
 73 |     % Skip the word if it is too short
 74 |     if length(str) < 1
 75 |        continue;
 76 |     end
 77 | 
 78 |     % Look up the word in the dictionary and add to word_indices if
 79 |     % found
 80 |     % ====================== YOUR CODE HERE ======================
 81 |     % Instructions: Fill in this function to add the index of str to
 82 |     %               word_indices if it is in the vocabulary. At this point
 83 |     %               of the code, you have a stemmed word from the email in
 84 |     %               the variable str. You should look up str in the
 85 |     %               vocabulary list (vocabList). If a match exists, you
 86 |     %               should add the index of the word to the word_indices
 87 |     %               vector. Concretely, if str = 'action', then you should
 88 |     %               look up the vocabulary list to find where in vocabList
 89 |     %               'action' appears. For example, if vocabList{18} =
 90 |     %               'action', then, you should add 18 to the word_indices 
 91 |     %               vector (e.g., word_indices = [word_indices ; 18]; ).
 92 |     % 
 93 |     % Note: vocabList{idx} returns a the word with index idx in the
 94 |     %       vocabulary list.
 95 |     % 
 96 |     % Note: You can use strcmp(str1, str2) to compare two strings (str1 and
 97 |     %       str2). It will return 1 only if the two strings are equivalent.
 98 |     %
 99 | 
100 |     word_indices = [word_indices strmatch(str, vocabList, 'exact')];
101 | 
102 |     % =============================================================
103 | 
104 | 
105 |     % Print to screen, ensuring that the output lines are not too long
106 |     if (l + length(str) + 1) > 78
107 |         fprintf('\n');
108 |         l = 0;
109 |     end
110 |     fprintf('%s ', str);
111 |     l = l + length(str) + 1;
112 | 
113 | end
114 | 
115 | % Print footer
116 | fprintf('\n\n=========================\n');
117 | 
118 | end
119 | 


--------------------------------------------------------------------------------
/ex6/readFile.m:
--------------------------------------------------------------------------------
 1 | function file_contents = readFile(filename)
 2 | %READFILE reads a file and returns its entire contents 
 3 | %   file_contents = READFILE(filename) reads a file and returns its entire
 4 | %   contents in file_contents
 5 | %
 6 | 
 7 | % Load File
 8 | fid = fopen(filename);
 9 | if fid
10 |     file_contents = fscanf(fid, '%c', inf);
11 |     fclose(fid);
12 | else
13 |     file_contents = '';
14 |     fprintf('Unable to open %s\n', filename);
15 | end
16 | 
17 | end
18 | 
19 | 


--------------------------------------------------------------------------------
/ex6/spamSample1.txt:
--------------------------------------------------------------------------------
 1 | Do You Want To Make $1000 Or More Per Week?
 2 | 
 3 |  
 4 | 
 5 | If you are a motivated and qualified individual - I 
 6 | will personally demonstrate to you a system that will 
 7 | make you $1,000 per week or more! This is NOT mlm.
 8 | 
 9 |  
10 | 
11 | Call our 24 hour pre-recorded number to get the 
12 | details.  
13 | 
14 |  
15 | 
16 | 000-456-789
17 | 
18 |  
19 | 
20 | I need people who want to make serious money.  Make 
21 | the call and get the facts. 
22 | 
23 | Invest 2 minutes in yourself now!
24 | 
25 |  
26 | 
27 | 000-456-789
28 | 
29 |  
30 | 
31 | Looking forward to your call and I will introduce you 
32 | to people like yourself who
33 | are currently making $10,000 plus per week!
34 | 
35 |  
36 | 
37 | 000-456-789
38 | 
39 | 
40 | 
41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72
42 | 
43 | 


--------------------------------------------------------------------------------
/ex6/spamSample2.txt:
--------------------------------------------------------------------------------
1 | Best Buy Viagra Generic Online
2 | 
3 | Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed!
4 | 
5 | We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers!
6 | http://medphysitcstech.ru
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/ex6/spamTest.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6/spamTest.mat


--------------------------------------------------------------------------------
/ex6/spamTrain.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6/spamTrain.mat


--------------------------------------------------------------------------------
/ex6/submit.m:
--------------------------------------------------------------------------------
  1 | function submit(partId)
  2 | %SUBMIT Submit your code and output to the ml-class servers
  3 | %   SUBMIT() will connect to the ml-class server and submit your solution
  4 | 
  5 |   fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ...
  6 |           homework_id());
  7 |   if ~exist('partId', 'var') || isempty(partId)
  8 |     partId = promptPart();
  9 |   end
 10 |   
 11 |   % Check valid partId
 12 |   partNames = validParts();
 13 |   if ~isValidPartId(partId)
 14 |     fprintf('!! Invalid homework part selected.\n');
 15 |     fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1);
 16 |     fprintf('!! Submission Cancelled\n');
 17 |     return
 18 |   end
 19 | 
 20 |   [login password] = loginPrompt();
 21 |   if isempty(login)
 22 |     fprintf('!! Submission Cancelled\n');
 23 |     return
 24 |   end
 25 | 
 26 |   fprintf('\n== Connecting to ml-class ... '); 
 27 |   if exist('OCTAVE_VERSION') 
 28 |     fflush(stdout);
 29 |   end
 30 |   
 31 |   % Setup submit list
 32 |   if partId == numel(partNames) + 1
 33 |     submitParts = 1:numel(partNames);
 34 |   else
 35 |     submitParts = [partId];
 36 |   end
 37 | 
 38 |   for s = 1:numel(submitParts)
 39 |     % Submit this part
 40 |     partId = submitParts(s);
 41 |     
 42 |     % Get Challenge
 43 |     [login, ch, signature] = getChallenge(login);
 44 |     if isempty(login) || isempty(ch) || isempty(signature)
 45 |       % Some error occured, error string in first return element.
 46 |       fprintf('\n!! Error: %s\n\n', login);
 47 |       return
 48 |     end
 49 |   
 50 |     % Attempt Submission with Challenge
 51 |     ch_resp = challengeResponse(login, password, ch);
 52 |     [result, str] = submitSolution(login, ch_resp, partId, output(partId), ...
 53 |                                  source(partId), signature);
 54 |                                  
 55 |     fprintf('\n== [ml-class] Submitted Homework %s - Part %d - %s\n', ...
 56 |             homework_id(), partId, partNames{partId});
 57 |     fprintf('== %s\n', strtrim(str));
 58 |     if exist('OCTAVE_VERSION') 
 59 |       fflush(stdout);
 60 |     end
 61 |   end
 62 |   
 63 | end
 64 | 
 65 | % ================== CONFIGURABLES FOR EACH HOMEWORK ==================
 66 | 
 67 | function id = homework_id() 
 68 |   id = '6';
 69 | end
 70 | 
 71 | function [partNames] = validParts()
 72 |   partNames = { 'Gaussian Kernel', ...
 73 |                 'Parameters (C, sigma) for Dataset 3', ...
 74 |                 'Email Preprocessing' ...
 75 |                 'Email Feature Extraction' ...
 76 |                 };
 77 | end
 78 | 
 79 | function srcs = sources()
 80 |   % Separated by part
 81 |   srcs = { { 'gaussianKernel.m' }, ...
 82 |            { 'dataset3Params.m' }, ...
 83 |            { 'processEmail.m' }, ...
 84 |            { 'emailFeatures.m' } };
 85 | end
 86 | 
 87 | function out = output(partId)
 88 |   % Random Test Cases
 89 |   x1 = sin(1:10)';
 90 |   x2 = cos(1:10)';
 91 |   ec = 'the quick brown fox jumped over the lazy dog';
 92 |   wi = 1 + abs(round(x1 * 1863));
 93 |   wi = [wi ; wi];
 94 |   if partId == 1
 95 |     sim = gaussianKernel(x1, x2, 2);
 96 |     out = sprintf('%0.5f ', sim);
 97 |   elseif partId == 2
 98 |     load('ex6data3.mat');
 99 |     [C, sigma] = dataset3Params(X, y, Xval, yval);
100 |     out = sprintf('%0.5f ', C);
101 |     out = [out sprintf('%0.5f ', sigma)];
102 |   elseif partId == 3
103 |     word_indices = processEmail(ec);
104 |     out = sprintf('%d ', word_indices);
105 |   elseif partId == 4
106 |     x = emailFeatures(wi);
107 |     out = sprintf('%d ', x);
108 |   end 
109 | end
110 | 
111 | function url = challenge_url()
112 |   url = 'http://www.ml-class.org/course/homework/challenge';
113 | end
114 | 
115 | function url = submit_url()
116 |   url = 'http://www.ml-class.org/course/homework/submit';
117 | end
118 | 
119 | % ========================= CHALLENGE HELPERS =========================
120 | 
121 | function src = source(partId)
122 |   src = '';
123 |   src_files = sources();
124 |   if partId <= numel(src_files)
125 |       flist = src_files{partId};
126 |       for i = 1:numel(flist)
127 |           fid = fopen(flist{i});
128 |           while ~feof(fid)
129 |             line = fgets(fid);
130 |             src = [src line];
131 |           end
132 |           fclose(fid);
133 |           src = [src '||||||||'];
134 |       end
135 |   end
136 | end
137 | 
138 | function ret = isValidPartId(partId)
139 |   partNames = validParts();
140 |   ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1);
141 | end
142 | 
143 | function partId = promptPart()
144 |   fprintf('== Select which part(s) to submit:\n', ...
145 |           homework_id());
146 |   partNames = validParts();
147 |   srcFiles = sources();
148 |   for i = 1:numel(partNames)
149 |     fprintf('==   %d) %s [', i, partNames{i});
150 |     fprintf(' %s ', srcFiles{i}{:});
151 |     fprintf(']\n');
152 |   end
153 |   fprintf('==   %d) All of the above \n==\nEnter your choice [1-%d]: ', ...
154 |           numel(partNames) + 1, numel(partNames) + 1);
155 |   selPart = input('', 's');
156 |   partId = str2num(selPart);
157 |   if ~isValidPartId(partId)
158 |     partId = -1;
159 |   end
160 | end
161 | 
162 | function [email,ch,signature] = getChallenge(email)
163 |   str = urlread(challenge_url(), 'post', {'email_address', email});
164 | 
165 |   str = strtrim(str);
166 |   [email, str] = strtok (str, '|');
167 |   [ch, str] = strtok (str, '|');
168 |   [signature, str] = strtok (str, '|');
169 | end
170 | 
171 | 
172 | function [result, str] = submitSolution(email, ch_resp, part, output, ...
173 |                                         source, signature)
174 | 
175 |   params = {'homework', homework_id(), ...
176 |             'part', num2str(part), ...
177 |             'email', email, ...
178 |             'output', output, ...
179 |             'source', source, ...
180 |             'challenge_response', ch_resp, ...
181 |             'signature', signature};
182 | 
183 |   str = urlread(submit_url(), 'post', params);
184 |   
185 |   % Parse str to read for success / failure
186 |   result = 0;
187 | 
188 | end
189 | 
190 | % =========================== LOGIN HELPERS ===========================
191 | 
192 | function [login password] = loginPrompt()
193 |   % Prompt for password
194 |   [login password] = basicPrompt();
195 |   
196 |   if isempty(login) || isempty(password)
197 |     login = []; password = [];
198 |   end
199 | end
200 | 
201 | 
202 | function [login password] = basicPrompt()
203 |   login = input('Login (Email address): ', 's');
204 |   password = input('Password: ', 's');
205 | end
206 | 
207 | 
208 | function [str] = challengeResponse(email, passwd, challenge)
209 |   salt = ')~/|]QMB3[!W`?OVt7qC"@+}';
210 |   str = sha1([challenge sha1([salt email passwd])]);
211 |   sel = randperm(numel(str));
212 |   sel = sort(sel(1:16));
213 |   str = str(sel);
214 | end
215 | 
216 | 
217 | % =============================== SHA-1 ================================
218 | 
219 | function hash = sha1(str)
220 |   
221 |   % Initialize variables
222 |   h0 = uint32(1732584193);
223 |   h1 = uint32(4023233417);
224 |   h2 = uint32(2562383102);
225 |   h3 = uint32(271733878);
226 |   h4 = uint32(3285377520);
227 |   
228 |   % Convert to word array
229 |   strlen = numel(str);
230 | 
231 |   % Break string into chars and append the bit 1 to the message
232 |   mC = [double(str) 128];
233 |   mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')];
234 |   
235 |   numB = strlen * 8;
236 |   if exist('idivide')
237 |     numC = idivide(uint32(numB + 65), 512, 'ceil');
238 |   else
239 |     numC = ceil(double(numB + 65)/512);
240 |   end
241 |   numW = numC * 16;
242 |   mW = zeros(numW, 1, 'uint32');
243 |   
244 |   idx = 1;
245 |   for i = 1:4:strlen + 1
246 |     mW(idx) = bitor(bitor(bitor( ...
247 |                   bitshift(uint32(mC(i)), 24), ...
248 |                   bitshift(uint32(mC(i+1)), 16)), ...
249 |                   bitshift(uint32(mC(i+2)), 8)), ...
250 |                   uint32(mC(i+3)));
251 |     idx = idx + 1;
252 |   end
253 |   
254 |   % Append length of message
255 |   mW(numW - 1) = uint32(bitshift(uint64(numB), -32));
256 |   mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32));
257 | 
258 |   % Process the message in successive 512-bit chs
259 |   for cId = 1 : double(numC)
260 |     cSt = (cId - 1) * 16 + 1;
261 |     cEnd = cId * 16;
262 |     ch = mW(cSt : cEnd);
263 |     
264 |     % Extend the sixteen 32-bit words into eighty 32-bit words
265 |     for j = 17 : 80
266 |       ch(j) = ch(j - 3);
267 |       ch(j) = bitxor(ch(j), ch(j - 8));
268 |       ch(j) = bitxor(ch(j), ch(j - 14));
269 |       ch(j) = bitxor(ch(j), ch(j - 16));
270 |       ch(j) = bitrotate(ch(j), 1);
271 |     end
272 |   
273 |     % Initialize hash value for this ch
274 |     a = h0;
275 |     b = h1;
276 |     c = h2;
277 |     d = h3;
278 |     e = h4;
279 |     
280 |     % Main loop
281 |     for i = 1 : 80
282 |       if(i >= 1 && i <= 20)
283 |         f = bitor(bitand(b, c), bitand(bitcmp(b), d));
284 |         k = uint32(1518500249);
285 |       elseif(i >= 21 && i <= 40)
286 |         f = bitxor(bitxor(b, c), d);
287 |         k = uint32(1859775393);
288 |       elseif(i >= 41 && i <= 60)
289 |         f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d));
290 |         k = uint32(2400959708);
291 |       elseif(i >= 61 && i <= 80)
292 |         f = bitxor(bitxor(b, c), d);
293 |         k = uint32(3395469782);
294 |       end
295 |       
296 |       t = bitrotate(a, 5);
297 |       t = bitadd(t, f);
298 |       t = bitadd(t, e);
299 |       t = bitadd(t, k);
300 |       t = bitadd(t, ch(i));
301 |       e = d;
302 |       d = c;
303 |       c = bitrotate(b, 30);
304 |       b = a;
305 |       a = t;
306 |       
307 |     end
308 |     h0 = bitadd(h0, a);
309 |     h1 = bitadd(h1, b);
310 |     h2 = bitadd(h2, c);
311 |     h3 = bitadd(h3, d);
312 |     h4 = bitadd(h4, e);
313 | 
314 |   end
315 | 
316 |   hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]);
317 |   
318 |   hash = lower(hash);
319 | 
320 | end
321 | 
322 | function ret = bitadd(iA, iB)
323 |   ret = double(iA) + double(iB);
324 |   ret = bitset(ret, 33, 0);
325 |   ret = uint32(ret);
326 | end
327 | 
328 | function ret = bitrotate(iA, places)
329 |   t = bitshift(iA, places - 32);
330 |   ret = bitshift(iA, places);
331 |   ret = bitor(ret, t);
332 | end
333 | 


--------------------------------------------------------------------------------
/ex6/svmPredict.m:
--------------------------------------------------------------------------------
 1 | function pred = svmPredict(model, X)
 2 | %SVMPREDICT returns a vector of predictions using a trained SVM model
 3 | %(svmTrain). 
 4 | %   pred = SVMPREDICT(model, X) returns a vector of predictions using a 
 5 | %   trained SVM model (svmTrain). X is a mxn matrix where there each 
 6 | %   example is a row. model is a svm model returned from svmTrain.
 7 | %   predictions pred is a m x 1 column of predictions of {0, 1} values.
 8 | %
 9 | 
10 | % Check if we are getting a column vector, if so, then assume that we only
11 | % need to do prediction for a single example
12 | if (size(X, 2) == 1)
13 |     % Examples should be in rows
14 |     X = X';
15 | end
16 | 
17 | % Dataset 
18 | m = size(X, 1);
19 | p = zeros(m, 1);
20 | pred = zeros(m, 1);
21 | 
22 | if strcmp(func2str(model.kernelFunction), 'linearKernel')
23 |     % We can use the weights and bias directly if working with the 
24 |     % linear kernel
25 |     p = X * model.w + model.b;
26 | elseif strfind(func2str(model.kernelFunction), 'gaussianKernel')
27 |     % Vectorized RBF Kernel
28 |     % This is equivalent to computing the kernel on every pair of examples
29 |     X1 = sum(X.^2, 2);
30 |     X2 = sum(model.X.^2, 2)';
31 |     K = bsxfun(@plus, X1, bsxfun(@plus, X2, - 2 * X * model.X'));
32 |     K = model.kernelFunction(1, 0) .^ K;
33 |     K = bsxfun(@times, model.y', K);
34 |     K = bsxfun(@times, model.alphas', K);
35 |     p = sum(K, 2);
36 | else
37 |     % Other Non-linear kernel
38 |     for i = 1:m
39 |         prediction = 0;
40 |         for j = 1:size(model.X, 1)
41 |             prediction = prediction + ...
42 |                 model.alphas(j) * model.y(j) * ...
43 |                 model.kernelFunction(X(i,:)', model.X(j,:)');
44 |         end
45 |         p(i) = prediction + model.b;
46 |     end
47 | end
48 | 
49 | % Convert predictions into 0 / 1
50 | pred(p >= 0) =  1;
51 | pred(p <  0) =  0;
52 | 
53 | end
54 | 
55 | 


--------------------------------------------------------------------------------
/ex6/svmTrain.m:
--------------------------------------------------------------------------------
  1 | function [model] = svmTrain(X, Y, C, kernelFunction, ...
  2 |                             tol, max_passes)
  3 | %SVMTRAIN Trains an SVM classifier using a simplified version of the SMO 
  4 | %algorithm. 
  5 | %   [model] = SVMTRAIN(X, Y, C, kernelFunction, tol, max_passes) trains an
  6 | %   SVM classifier and returns trained model. X is the matrix of training 
  7 | %   examples.  Each row is a training example, and the jth column holds the 
  8 | %   jth feature.  Y is a column matrix containing 1 for positive examples 
  9 | %   and 0 for negative examples.  C is the standard SVM regularization 
 10 | %   parameter.  tol is a tolerance value used for determining equality of 
 11 | %   floating point numbers. max_passes controls the number of iterations
 12 | %   over the dataset (without changes to alpha) before the algorithm quits.
 13 | %
 14 | % Note: This is a simplified version of the SMO algorithm for training
 15 | %       SVMs. In practice, if you want to train an SVM classifier, we
 16 | %       recommend using an optimized package such as:  
 17 | %
 18 | %           LIBSVM   (http://www.csie.ntu.edu.tw/~cjlin/libsvm/)
 19 | %           SVMLight (http://svmlight.joachims.org/)
 20 | %
 21 | %
 22 | 
 23 | if ~exist('tol', 'var') || isempty(tol)
 24 |     tol = 1e-3;
 25 | end
 26 | 
 27 | if ~exist('max_passes', 'var') || isempty(max_passes)
 28 |     max_passes = 5;
 29 | end
 30 | 
 31 | % Data parameters
 32 | m = size(X, 1);
 33 | n = size(X, 2);
 34 | 
 35 | % Map 0 to -1
 36 | Y(Y==0) = -1;
 37 | 
 38 | % Variables
 39 | alphas = zeros(m, 1);
 40 | b = 0;
 41 | E = zeros(m, 1);
 42 | passes = 0;
 43 | eta = 0;
 44 | L = 0;
 45 | H = 0;
 46 | 
 47 | % Pre-compute the Kernel Matrix since our dataset is small
 48 | % (in practice, optimized SVM packages that handle large datasets
 49 | %  gracefully will _not_ do this)
 50 | % 
 51 | % We have implemented optimized vectorized version of the Kernels here so
 52 | % that the svm training will run faster.
 53 | if strcmp(func2str(kernelFunction), 'linearKernel')
 54 |     % Vectorized computation for the Linear Kernel
 55 |     % This is equivalent to computing the kernel on every pair of examples
 56 |     K = X*X';
 57 | elseif strfind(func2str(kernelFunction), 'gaussianKernel')
 58 |     % Vectorized RBF Kernel
 59 |     % This is equivalent to computing the kernel on every pair of examples
 60 |     X2 = sum(X.^2, 2);
 61 |     K = bsxfun(@plus, X2, bsxfun(@plus, X2', - 2 * (X * X')));
 62 |     K = kernelFunction(1, 0) .^ K;
 63 | else
 64 |     % Pre-compute the Kernel Matrix
 65 |     % The following can be slow due to the lack of vectorization
 66 |     K = zeros(m);
 67 |     for i = 1:m
 68 |         for j = i:m
 69 |              K(i,j) = kernelFunction(X(i,:)', X(j,:)');
 70 |              K(j,i) = K(i,j); %the matrix is symmetric
 71 |         end
 72 |     end
 73 | end
 74 | 
 75 | % Train
 76 | fprintf('\nTraining ...');
 77 | dots = 12;
 78 | while passes < max_passes,
 79 |             
 80 |     num_changed_alphas = 0;
 81 |     for i = 1:m,
 82 |         
 83 |         % Calculate Ei = f(x(i)) - y(i) using (2). 
 84 |         % E(i) = b + sum (X(i, :) * (repmat(alphas.*Y,1,n).*X)') - Y(i);
 85 |         E(i) = b + sum (alphas.*Y.*K(:,i)) - Y(i);
 86 |         
 87 |         if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0)),
 88 |             
 89 |             % In practice, there are many heuristics one can use to select
 90 |             % the i and j. In this simplified code, we select them randomly.
 91 |             j = ceil(m * rand());
 92 |             while j == i,  % Make sure i \neq j
 93 |                 j = ceil(m * rand());
 94 |             end
 95 | 
 96 |             % Calculate Ej = f(x(j)) - y(j) using (2).
 97 |             E(j) = b + sum (alphas.*Y.*K(:,j)) - Y(j);
 98 | 
 99 |             % Save old alphas
100 |             alpha_i_old = alphas(i);
101 |             alpha_j_old = alphas(j);
102 |             
103 |             % Compute L and H by (10) or (11). 
104 |             if (Y(i) == Y(j)),
105 |                 L = max(0, alphas(j) + alphas(i) - C);
106 |                 H = min(C, alphas(j) + alphas(i));
107 |             else
108 |                 L = max(0, alphas(j) - alphas(i));
109 |                 H = min(C, C + alphas(j) - alphas(i));
110 |             end
111 |            
112 |             if (L == H),
113 |                 % continue to next i. 
114 |                 continue;
115 |             end
116 | 
117 |             % Compute eta by (14).
118 |             eta = 2 * K(i,j) - K(i,i) - K(j,j);
119 |             if (eta >= 0),
120 |                 % continue to next i. 
121 |                 continue;
122 |             end
123 |             
124 |             % Compute and clip new value for alpha j using (12) and (15).
125 |             alphas(j) = alphas(j) - (Y(j) * (E(i) - E(j))) / eta;
126 |             
127 |             % Clip
128 |             alphas(j) = min (H, alphas(j));
129 |             alphas(j) = max (L, alphas(j));
130 |             
131 |             % Check if change in alpha is significant
132 |             if (abs(alphas(j) - alpha_j_old) < tol),
133 |                 % continue to next i. 
134 |                 % replace anyway
135 |                 alphas(j) = alpha_j_old;
136 |                 continue;
137 |             end
138 |             
139 |             % Determine value for alpha i using (16). 
140 |             alphas(i) = alphas(i) + Y(i)*Y(j)*(alpha_j_old - alphas(j));
141 |             
142 |             % Compute b1 and b2 using (17) and (18) respectively. 
143 |             b1 = b - E(i) ...
144 |                  - Y(i) * (alphas(i) - alpha_i_old) *  K(i,j)' ...
145 |                  - Y(j) * (alphas(j) - alpha_j_old) *  K(i,j)';
146 |             b2 = b - E(j) ...
147 |                  - Y(i) * (alphas(i) - alpha_i_old) *  K(i,j)' ...
148 |                  - Y(j) * (alphas(j) - alpha_j_old) *  K(j,j)';
149 | 
150 |             % Compute b by (19). 
151 |             if (0 < alphas(i) && alphas(i) < C),
152 |                 b = b1;
153 |             elseif (0 < alphas(j) && alphas(j) < C),
154 |                 b = b2;
155 |             else
156 |                 b = (b1+b2)/2;
157 |             end
158 | 
159 |             num_changed_alphas = num_changed_alphas + 1;
160 | 
161 |         end
162 |         
163 |     end
164 |     
165 |     if (num_changed_alphas == 0),
166 |         passes = passes + 1;
167 |     else
168 |         passes = 0;
169 |     end
170 | 
171 |     fprintf('.');
172 |     dots = dots + 1;
173 |     if dots > 78
174 |         dots = 0;
175 |         fprintf('\n');
176 |     end
177 |     if exist('OCTAVE_VERSION')
178 |         fflush(stdout);
179 |     end
180 | end
181 | fprintf(' Done! \n\n');
182 | 
183 | % Save the model
184 | idx = alphas > 0;
185 | model.X= X(idx,:);
186 | model.y= Y(idx);
187 | model.kernelFunction = kernelFunction;
188 | model.b= b;
189 | model.alphas= alphas(idx);
190 | model.w = ((alphas.*Y)'*X)';
191 | 
192 | end
193 | 


--------------------------------------------------------------------------------
/ex6/visualizeBoundary.m:
--------------------------------------------------------------------------------
 1 | function visualizeBoundary(X, y, model, varargin)
 2 | %VISUALIZEBOUNDARY plots a non-linear decision boundary learned by the SVM
 3 | %   VISUALIZEBOUNDARYLINEAR(X, y, model) plots a non-linear decision 
 4 | %   boundary learned by the SVM and overlays the data on it
 5 | 
 6 | % Plot the training data on top of the boundary
 7 | plotData(X, y)
 8 | 
 9 | % Make classification predictions over a grid of values
10 | x1plot = linspace(min(X(:,1)), max(X(:,1)), 100)';
11 | x2plot = linspace(min(X(:,2)), max(X(:,2)), 100)';
12 | [X1, X2] = meshgrid(x1plot, x2plot);
13 | vals = zeros(size(X1));
14 | for i = 1:size(X1, 2)
15 |    this_X = [X1(:, i), X2(:, i)];
16 |    vals(:, i) = svmPredict(model, this_X);
17 | end
18 | 
19 | % Plot the SVM boundary
20 | hold on
21 | contour(X1, X2, vals, [0 0], 'Color', 'b');
22 | hold off;
23 | 
24 | end
25 | 


--------------------------------------------------------------------------------
/ex6/visualizeBoundaryLinear.m:
--------------------------------------------------------------------------------
 1 | function visualizeBoundaryLinear(X, y, model)
 2 | %VISUALIZEBOUNDARYLINEAR plots a linear decision boundary learned by the
 3 | %SVM
 4 | %   VISUALIZEBOUNDARYLINEAR(X, y, model) plots a linear decision boundary 
 5 | %   learned by the SVM and overlays the data on it
 6 | 
 7 | w = model.w;
 8 | b = model.b;
 9 | xp = linspace(min(X(:,1)), max(X(:,1)), 100);
10 | yp = - (w(1)*xp + b)/w(2);
11 | plotData(X, y);
12 | hold on;
13 | plot(xp, yp, '-b'); 
14 | hold off
15 | 
16 | end
17 | 


--------------------------------------------------------------------------------
/ex7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7.pdf


--------------------------------------------------------------------------------
/ex7/bird_small.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7/bird_small.mat


--------------------------------------------------------------------------------
/ex7/bird_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7/bird_small.png


--------------------------------------------------------------------------------
/ex7/computeCentroids.m:
--------------------------------------------------------------------------------
 1 | function centroids = computeCentroids(X, idx, K)
 2 | %COMPUTECENTROIDS returs the new centroids by computing the means of the 
 3 | %data points assigned to each centroid.
 4 | %   centroids = COMPUTECENTROIDS(X, idx, K) returns the new centroids by 
 5 | %   computing the means of the data points assigned to each centroid. It is
 6 | %   given a dataset X where each row is a single data point, a vector
 7 | %   idx of centroid assignments (i.e. each entry in range [1..K]) for each
 8 | %   example, and K, the number of centroids. You should return a matrix
 9 | %   centroids, where each row of centroids is the mean of the data points
10 | %   assigned to it.
11 | %
12 | 
13 | % Useful variables
14 | [m n] = size(X);
15 | 
16 | % You need to return the following variables correctly.
17 | centroids = zeros(K, n);
18 | 
19 | 
20 | % ====================== YOUR CODE HERE ======================
21 | % Instructions: Go over every centroid and compute mean of all points that
22 | %               belong to it. Concretely, the row vector centroids(i, :)
23 | %               should contain the mean of the data points assigned to
24 | %               centroid i.
25 | %
26 | % Note: You can use a for-loop over the centroids to compute this.
27 | %
28 | 
29 | % TODO(SaveTheRbtz@): See if it can be futher vectorized
30 | for k = 1:K
31 |     point_indeces = find(idx==k);
32 |     centroids(k, :) = sum(X(point_indeces, :)) ./ length(point_indeces);
33 | 
34 | % =============================================================
35 | 
36 | 
37 | end
38 | 
39 | 


--------------------------------------------------------------------------------
/ex7/displayData.m:
--------------------------------------------------------------------------------
 1 | function [h, display_array] = displayData(X, example_width)
 2 | %DISPLAYDATA Display 2D data in a nice grid
 3 | %   [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
 4 | %   stored in X in a nice grid. It returns the figure handle h and the 
 5 | %   displayed array if requested.
 6 | 
 7 | % Set example_width automatically if not passed in
 8 | if ~exist('example_width', 'var') || isempty(example_width) 
 9 | 	example_width = round(sqrt(size(X, 2)));
10 | end
11 | 
12 | % Gray Image
13 | colormap(gray);
14 | 
15 | % Compute rows, cols
16 | [m n] = size(X);
17 | example_height = (n / example_width);
18 | 
19 | % Compute number of items to display
20 | display_rows = floor(sqrt(m));
21 | display_cols = ceil(m / display_rows);
22 | 
23 | % Between images padding
24 | pad = 1;
25 | 
26 | % Setup blank display
27 | display_array = - ones(pad + display_rows * (example_height + pad), ...
28 |                        pad + display_cols * (example_width + pad));
29 | 
30 | % Copy each example into a patch on the display array
31 | curr_ex = 1;
32 | for j = 1:display_rows
33 | 	for i = 1:display_cols
34 | 		if curr_ex > m, 
35 | 			break; 
36 | 		end
37 | 		% Copy the patch
38 | 		
39 | 		% Get the max value of the patch
40 | 		max_val = max(abs(X(curr_ex, :)));
41 | 		display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
42 | 		              pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
43 | 						reshape(X(curr_ex, :), example_height, example_width) / max_val;
44 | 		curr_ex = curr_ex + 1;
45 | 	end
46 | 	if curr_ex > m, 
47 | 		break; 
48 | 	end
49 | end
50 | 
51 | % Display Image
52 | h = imagesc(display_array, [-1 1]);
53 | 
54 | % Do not show axis
55 | axis image off
56 | 
57 | drawnow;
58 | 
59 | end
60 | 


--------------------------------------------------------------------------------
/ex7/drawLine.m:
--------------------------------------------------------------------------------
1 | function drawLine(p1, p2, varargin)
2 | %DRAWLINE Draws a line from point p1 to point p2
3 | %   DRAWLINE(p1, p2) Draws a line from point p1 to point p2 and holds the
4 | %   current figure
5 | 
6 | plot([p1(1) p2(1)], [p1(2) p2(2)], varargin{:});
7 | 
8 | end


--------------------------------------------------------------------------------
/ex7/ex7.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class
  3 | %  Exercise 7 | Principle Component Analysis and K-Means Clustering
  4 | %
  5 | %  Instructions
  6 | %  ------------
  7 | %
  8 | %  This file contains code that helps you get started on the
  9 | %  exercise. You will need to complete the following functions:
 10 | %
 11 | %     pca.m
 12 | %     projectData.m
 13 | %     recoverData.m
 14 | %     computeCentroids.m
 15 | %     findClosestCentroids.m
 16 | %     kMeansInitCentroids.m
 17 | %
 18 | %  For this exercise, you will not need to change any code in this file,
 19 | %  or any other files other than those mentioned above.
 20 | %
 21 | 
 22 | %% Initialization
 23 | clear ; close all; clc
 24 | 
 25 | %% ================= Part 1: Find Closest Centroids ====================
 26 | %  To help you implement K-Means, we have divided the learning algorithm 
 27 | %  into two functions -- findClosestCentroids and computeCentroids. In this
 28 | %  part, you shoudl complete the code in the findClosestCentroids function. 
 29 | %
 30 | fprintf('Finding closest centroids.\n\n');
 31 | 
 32 | % Load an example dataset that we will be using
 33 | load('ex7data2.mat');
 34 | 
 35 | % Select an initial set of centroids
 36 | K = 3; % 3 Centroids
 37 | initial_centroids = [3 3; 6 2; 8 5];
 38 | 
 39 | % Find the closest centroids for the examples using the
 40 | % initial_centroids
 41 | idx = findClosestCentroids(X, initial_centroids);
 42 | 
 43 | fprintf('Closest centroids for the first 3 examples: \n')
 44 | fprintf(' %d', idx(1:3));
 45 | fprintf('\n(the closest centroids should be 1, 3, 2 respectively)\n');
 46 | 
 47 | fprintf('Program paused. Press enter to continue.\n');
 48 | pause;
 49 | 
 50 | %% ===================== Part 2: Compute Means =========================
 51 | %  After implementing the closest centroids function, you should now
 52 | %  complete the computeCentroids function.
 53 | %
 54 | fprintf('\nComputing centroids means.\n\n');
 55 | 
 56 | %  Compute means based on the closest centroids found in the previous part.
 57 | centroids = computeCentroids(X, idx, K);
 58 | 
 59 | fprintf('Centroids computed after initial finding of closest centroids: \n')
 60 | fprintf(' %f %f \n' , centroids');
 61 | fprintf('\n(the centroids should be\n');
 62 | fprintf('   [ 2.428301 3.157924 ]\n');
 63 | fprintf('   [ 5.813503 2.633656 ]\n');
 64 | fprintf('   [ 7.119387 3.616684 ]\n\n');
 65 | 
 66 | fprintf('Program paused. Press enter to continue.\n');
 67 | pause;
 68 | 
 69 | 
 70 | %% =================== Part 3: K-Means Clustering ======================
 71 | %  After you have completed the two functions computeCentroids and
 72 | %  findClosestCentroids, you have all the necessary pieces to run the
 73 | %  kMeans algorithm. In this part, you will run the K-Means algorithm on
 74 | %  the example dataset we have provided. 
 75 | %
 76 | fprintf('\nRunning K-Means clustering on example dataset.\n\n');
 77 | 
 78 | % Load an example dataset
 79 | load('ex7data2.mat');
 80 | 
 81 | % Settings for running K-Means
 82 | K = 3;
 83 | max_iters = 10;
 84 | 
 85 | % For consistency, here we set centroids to specific values
 86 | % but in practice you want to generate them automatically, such as by
 87 | % settings them to be random examples (as can be seen in
 88 | % kMeansInitCentroids).
 89 | initial_centroids = [3 3; 6 2; 8 5];
 90 | 
 91 | % Run K-Means algorithm. The 'true' at the end tells our function to plot
 92 | % the progress of K-Means
 93 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters, true);
 94 | fprintf('\nK-Means Done.\n\n');
 95 | 
 96 | fprintf('Program paused. Press enter to continue.\n');
 97 | pause;
 98 | 
 99 | %% ============= Part 4: K-Means Clustering on Pixels ===============
100 | %  In this exercise, you will use K-Means to compress an image. To do this,
101 | %  you will first run K-Means on the colors of the pixels in the image and
102 | %  then you will map each pixel on to it's closest centroid.
103 | %  
104 | %  You should now complete the code in kMeansInitCentroids.m
105 | %
106 | 
107 | fprintf('\nRunning K-Means clustering on pixels from an image.\n\n');
108 | 
109 | %  Load an image of a bird
110 | A = double(imread('bird_small.png'));
111 | 
112 | % If imread does not work for you, you can try instead
113 | %   load ('bird_small.mat');
114 | 
115 | A = A / 255; % Divide by 255 so that all values are in the range 0 - 1
116 | 
117 | % Size of the image
118 | img_size = size(A);
119 | 
120 | % Reshape the image into an Nx3 matrix where N = number of pixels.
121 | % Each row will contain the Red, Green and Blue pixel values
122 | % This gives us our dataset matrix X that we will use K-Means on.
123 | X = reshape(A, img_size(1) * img_size(2), 3);
124 | 
125 | % Run your K-Means algorithm on this data
126 | % You should try different values of K and max_iters here
127 | K = 16; 
128 | max_iters = 10;
129 | 
130 | % When using K-Means, it is important the initialize the centroids
131 | % randomly. 
132 | % You should complete the code in kMeansInitCentroids.m before proceeding
133 | initial_centroids = kMeansInitCentroids(X, K);
134 | 
135 | % Run K-Means
136 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters);
137 | 
138 | fprintf('Program paused. Press enter to continue.\n');
139 | pause;
140 | 
141 | 
142 | %% ================= Part 5: Image Compression ======================
143 | %  In this part of the exercise, you will use the clusters of K-Means to
144 | %  compress an image. To do this, we first find the closest clusters for
145 | %  each example. After that, we 
146 | 
147 | fprintf('\nApplying K-Means to compress an image.\n\n');
148 | 
149 | % Find closest cluster members
150 | idx = findClosestCentroids(X, centroids);
151 | 
152 | % Essentially, now we have represented the image X as in terms of the
153 | % indices in idx. 
154 | 
155 | % We can now recover the image from the indices (idx) by mapping each pixel
156 | % (specified by it's index in idx) to the centroid value
157 | X_recovered = centroids(idx,:);
158 | 
159 | % Reshape the recovered image into proper dimensions
160 | X_recovered = reshape(X_recovered, img_size(1), img_size(2), 3);
161 | 
162 | % Display the original image 
163 | subplot(1, 2, 1);
164 | imagesc(A); 
165 | title('Original');
166 | 
167 | % Display compressed image side by side
168 | subplot(1, 2, 2);
169 | imagesc(X_recovered)
170 | title(sprintf('Compressed, with %d colors.', K));
171 | 
172 | 
173 | fprintf('Program paused. Press enter to continue.\n');
174 | pause;
175 | 
176 | 


--------------------------------------------------------------------------------
/ex7/ex7_pca.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class
  3 | %  Exercise 7 | Principle Component Analysis and K-Means Clustering
  4 | %
  5 | %  Instructions
  6 | %  ------------
  7 | %
  8 | %  This file contains code that helps you get started on the
  9 | %  exercise. You will need to complete the following functions:
 10 | %
 11 | %     pca.m
 12 | %     projectData.m
 13 | %     recoverData.m
 14 | %     computeCentroids.m
 15 | %     findClosestCentroids.m
 16 | %     kMeansInitCentroids.m
 17 | %
 18 | %  For this exercise, you will not need to change any code in this file,
 19 | %  or any other files other than those mentioned above.
 20 | %
 21 | 
 22 | %% Initialization
 23 | clear ; close all; clc
 24 | 
 25 | %% ================== Part 1: Load Example Dataset  ===================
 26 | %  We start this exercise by using a small dataset that is easily to
 27 | %  visualize
 28 | %
 29 | fprintf('Visualizing example dataset for PCA.\n\n');
 30 | 
 31 | %  The following command loads the dataset. You should now have the 
 32 | %  variable X in your environment
 33 | load ('ex7data1.mat');
 34 | 
 35 | %  Visualize the example dataset
 36 | plot(X(:, 1), X(:, 2), 'bo');
 37 | axis([0.5 6.5 2 8]); axis square;
 38 | 
 39 | fprintf('Program paused. Press enter to continue.\n');
 40 | pause;
 41 | 
 42 | 
 43 | %% =============== Part 2: Principal Component Analysis ===============
 44 | %  You should now implement PCA, a dimension reduction technique. You
 45 | %  should complete the code in pca.m
 46 | %
 47 | fprintf('\nRunning PCA on example dataset.\n\n');
 48 | 
 49 | %  Before running PCA, it is important to first normalize X
 50 | [X_norm, mu, sigma] = featureNormalize(X);
 51 | 
 52 | %  Run PCA
 53 | [U, S] = pca(X_norm);
 54 | 
 55 | %  Compute mu, the mean of the each feature
 56 | 
 57 | %  Draw the eigenvectors centered at mean of data. These lines show the
 58 | %  directions of maximum variations in the dataset.
 59 | hold on;
 60 | drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2);
 61 | drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2);
 62 | hold off;
 63 | 
 64 | fprintf('Top eigenvector: \n');
 65 | fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1));
 66 | fprintf('\n(you should expect to see -0.707107 -0.707107)\n');
 67 | 
 68 | fprintf('Program paused. Press enter to continue.\n');
 69 | pause;
 70 | 
 71 | 
 72 | %% =================== Part 3: Dimension Reduction ===================
 73 | %  You should now implement the projection step to map the data onto the 
 74 | %  first k eigenvectors. The code will then plot the data in this reduced 
 75 | %  dimensional space.  This will show you what the data looks like when 
 76 | %  using only the corresponding eigenvectors to reconstruct it.
 77 | %
 78 | %  You should complete the code in projectData.m
 79 | %
 80 | fprintf('\nDimension reduction on example dataset.\n\n');
 81 | 
 82 | %  Plot the normalized dataset (returned from pca)
 83 | plot(X_norm(:, 1), X_norm(:, 2), 'bo');
 84 | axis([-4 3 -4 3]); axis square
 85 | 
 86 | %  Project the data onto K = 1 dimension
 87 | K = 1;
 88 | Z = projectData(X_norm, U, K);
 89 | fprintf('Projection of the first example: %f\n', Z(1));
 90 | fprintf('\n(this value should be about 1.481274)\n\n');
 91 | 
 92 | X_rec  = recoverData(Z, U, K);
 93 | fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2));
 94 | fprintf('\n(this value should be about  -1.047419 -1.047419)\n\n');
 95 | 
 96 | %  Draw lines connecting the projected points to the original points
 97 | hold on;
 98 | plot(X_rec(:, 1), X_rec(:, 2), 'ro');
 99 | for i = 1:size(X_norm, 1)
100 |     drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1);
101 | end
102 | hold off
103 | 
104 | fprintf('Program paused. Press enter to continue.\n');
105 | pause;
106 | 
107 | %% =============== Part 4: Loading and Visualizing Face Data =============
108 | %  We start the exercise by first loading and visualizing the dataset.
109 | %  The following code will load the dataset into your environment
110 | %
111 | fprintf('\nLoading face dataset.\n\n');
112 | 
113 | %  Load Face dataset
114 | load ('ex7faces.mat')
115 | 
116 | %  Display the first 100 faces in the dataset
117 | displayData(X(1:100, :));
118 | 
119 | fprintf('Program paused. Press enter to continue.\n');
120 | pause;
121 | 
122 | %% =========== Part 5: PCA on Face Data: Eigenfaces  ===================
123 | %  Run PCA and visualize the eigenvectors which are in this case eigenfaces
124 | %  We display the first 36 eigenfaces.
125 | %
126 | fprintf(['\nRunning PCA on face dataset.\n' ...
127 |          '(this mght take a minute or two ...)\n\n']);
128 | 
129 | %  Before running PCA, it is important to first normalize X by subtracting 
130 | %  the mean value from each feature
131 | [X_norm, mu, sigma] = featureNormalize(X);
132 | 
133 | %  Run PCA
134 | [U, S] = pca(X_norm);
135 | 
136 | %  Visualize the top 36 eigenvectors found
137 | displayData(U(:, 1:36)');
138 | 
139 | fprintf('Program paused. Press enter to continue.\n');
140 | pause;
141 | 
142 | 
143 | %% ============= Part 6: Dimension Reduction for Faces =================
144 | %  Project images to the eigen space using the top k eigenvectors 
145 | %  If you are applying a machine learning algorithm 
146 | fprintf('\nDimension reduction for face dataset.\n\n');
147 | 
148 | K = 100;
149 | Z = projectData(X_norm, U, K);
150 | 
151 | fprintf('The projected data Z has a size of: ')
152 | fprintf('%d ', size(Z));
153 | 
154 | fprintf('\n\nProgram paused. Press enter to continue.\n');
155 | pause;
156 | 
157 | %% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
158 | %  Project images to the eigen space using the top K eigen vectors and 
159 | %  visualize only using those K dimensions
160 | %  Compare to the original input, which is also displayed
161 | 
162 | fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n');
163 | 
164 | K = 100;
165 | X_rec  = recoverData(Z, U, K);
166 | 
167 | % Display normalized data
168 | subplot(1, 2, 1);
169 | displayData(X_norm(1:100,:));
170 | title('Original faces');
171 | axis square;
172 | 
173 | % Display reconstructed data from only k eigenfaces
174 | subplot(1, 2, 2);
175 | displayData(X_rec(1:100,:));
176 | title('Recovered faces');
177 | axis square;
178 | 
179 | fprintf('Program paused. Press enter to continue.\n');
180 | pause;
181 | 
182 | 
183 | %% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization ===
184 | %  One useful application of PCA is to use it to visualize high-dimensional
185 | %  data. In the last K-Means exercise you ran K-Means on 3-dimensional 
186 | %  pixel colors of an image. We first visualize this output in 3D, and then
187 | %  apply PCA to obtain a visualization in 2D.
188 | 
189 | close all; close all; clc
190 | 
191 | % Re-load the image from the previous exercise and run K-Means on it
192 | % For this to work, you need to complete the K-Means assignment first
193 | A = double(imread('bird_small.png'));
194 | 
195 | % If imread does not work for you, you can try instead
196 | %   load ('bird_small.mat');
197 | 
198 | A = A / 255;
199 | img_size = size(A);
200 | X = reshape(A, img_size(1) * img_size(2), 3);
201 | K = 16; 
202 | max_iters = 10;
203 | initial_centroids = kMeansInitCentroids(X, K);
204 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters);
205 | 
206 | %  Sample 1000 random indexes (since working with all the data is
207 | %  too expensive. If you have a fast computer, you may increase this.
208 | sel = floor(rand(1000, 1) * size(X, 1)) + 1;
209 | 
210 | %  Setup Color Palette
211 | palette = hsv(K);
212 | colors = palette(idx(sel), :);
213 | 
214 | %  Visualize the data and centroid memberships in 3D
215 | figure;
216 | scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors);
217 | title('Pixel dataset plotted in 3D. Color shows centroid memberships');
218 | fprintf('Program paused. Press enter to continue.\n');
219 | pause;
220 | 
221 | %% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
222 | % Use PCA to project this cloud to 2D for visualization
223 | 
224 | % Subtract the mean to use PCA
225 | [X_norm, mu, sigma] = featureNormalize(X);
226 | 
227 | % PCA and project the data to 2D
228 | [U, S] = pca(X_norm);
229 | Z = projectData(X_norm, U, 2);
230 | 
231 | % Plot in 2D
232 | figure;
233 | plotDataPoints(Z(sel, :), idx(sel), K);
234 | title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction');
235 | fprintf('Program paused. Press enter to continue.\n');
236 | pause;
237 | 


--------------------------------------------------------------------------------
/ex7/ex7data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7/ex7data1.mat


--------------------------------------------------------------------------------
/ex7/ex7data2.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7/ex7data2.mat


--------------------------------------------------------------------------------
/ex7/ex7faces.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7/ex7faces.mat


--------------------------------------------------------------------------------
/ex7/featureNormalize.m:
--------------------------------------------------------------------------------
1 | ../ex1/featureNormalize.m


--------------------------------------------------------------------------------
/ex7/findClosestCentroids.m:
--------------------------------------------------------------------------------
 1 | function idx = findClosestCentroids(X, centroids)
 2 | %FINDCLOSESTCENTROIDS computes the centroid memberships for every example
 3 | %   idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids
 4 | %   in idx for a dataset X where each row is a single example. idx = m x 1 
 5 | %   vector of centroid assignments (i.e. each entry in range [1..K])
 6 | %
 7 | 
 8 | % Set K
 9 | K = size(centroids, 1);
10 | 
11 | % You need to return the following variables correctly.
12 | idx = zeros(size(X,1), 1);
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | % Instructions: Go over every example, find its closest centroid, and store
16 | %               the index inside idx at the appropriate location.
17 | %               Concretely, idx(i) should contain the index of the centroid
18 | %               closest to example i. Hence, it should be a value in the 
19 | %               range 1..K
20 | %
21 | % Note: You can use a for-loop over the examples to compute this.
22 | %
23 | 
24 | % TODO(SaveTheRbtz@): Think how it can be vectorized
25 | for i = 1:length(idx)
26 |     distance = zeros(K, 1);
27 |     for j = 1:K
28 |         % TODO(SaveTheRbtz@): Can be vectorized as diff * diff'
29 |         distance(j) = sum(sum((X(i, :) - centroids(j, :)) .^ 2 ));
30 |     endfor
31 |     [value, idx(i)] = min(distance);
32 | endfor
33 | 
34 | % =============================================================
35 | 
36 | end
37 | 
38 | 


--------------------------------------------------------------------------------
/ex7/kMeansInitCentroids.m:
--------------------------------------------------------------------------------
 1 | function centroids = kMeansInitCentroids(X, K)
 2 | %KMEANSINITCENTROIDS This function initializes K centroids that are to be 
 3 | %used in K-Means on the dataset X
 4 | %   centroids = KMEANSINITCENTROIDS(X, K) returns K initial centroids to be
 5 | %   used with the K-Means on the dataset X
 6 | %
 7 | 
 8 | % You should return this values correctly
 9 | centroids = zeros(K, size(X, 2));
10 | 
11 | % ====================== YOUR CODE HERE ======================
12 | % Instructions: You should set centroids to randomly chosen examples from
13 | %               the dataset X
14 | %
15 | 
16 | % Initialize the centroids to be random examples
17 | % Randomly reorder the indices of examples
18 | randidx = randperm(size(X, 1));
19 | % Take the first K examples as centroids
20 | centroids = X(randidx(1:K), :);
21 | 
22 | % =============================================================
23 | 
24 | end
25 | 
26 | 


--------------------------------------------------------------------------------
/ex7/pca.m:
--------------------------------------------------------------------------------
 1 | function [U, S] = pca(X)
 2 | %PCA Run principal component analysis on the dataset X
 3 | %   [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X
 4 | %   Returns the eigenvectors U, the eigenvalues (on diagonal) in S
 5 | %
 6 | 
 7 | % Useful values
 8 | [m, n] = size(X);
 9 | 
10 | % You need to return the following variables correctly.
11 | U = zeros(n);
12 | S = zeros(n);
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | % Instructions: You should first compute the covariance matrix. Then, you
16 | %               should use the "svd" function to compute the eigenvectors
17 | %               and eigenvalues of the covariance matrix. 
18 | %
19 | % Note: When computing the covariance matrix, remember to divide by m (the
20 | %       number of examples).
21 | %
22 | 
23 | Sigma = (X'*X) ./ m;
24 | [U, S, V] = svd(Sigma);
25 | 
26 | 
27 | % =========================================================================
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/ex7/plotDataPoints.m:
--------------------------------------------------------------------------------
 1 | function plotDataPoints(X, idx, K)
 2 | %PLOTDATAPOINTS plots data points in X, coloring them so that those with the same
 3 | %index assignments in idx have the same color
 4 | %   PLOTDATAPOINTS(X, idx, K) plots data points in X, coloring them so that those 
 5 | %   with the same index assignments in idx have the same color
 6 | 
 7 | % Create palette
 8 | palette = hsv(K + 1);
 9 | colors = palette(idx, :);
10 | 
11 | % Plot the data
12 | scatter(X(:,1), X(:,2), 15, colors);
13 | 
14 | end
15 | 


--------------------------------------------------------------------------------
/ex7/plotProgresskMeans.m:
--------------------------------------------------------------------------------
 1 | function plotProgresskMeans(X, centroids, previous, idx, K, i)
 2 | %PLOTPROGRESSKMEANS is a helper function that displays the progress of 
 3 | %k-Means as it is running. It is intended for use only with 2D data.
 4 | %   PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
 5 | %   points with colors assigned to each centroid. With the previous
 6 | %   centroids, it also plots a line between the previous locations and
 7 | %   current locations of the centroids.
 8 | %
 9 | 
10 | % Plot the examples
11 | plotDataPoints(X, idx, K);
12 | 
13 | % Plot the centroids as black x's
14 | plot(centroids(:,1), centroids(:,2), 'x', ...
15 |      'MarkerEdgeColor','k', ...
16 |      'MarkerSize', 10, 'LineWidth', 3);
17 | 
18 | % Plot the history of the centroids with lines
19 | for j=1:size(centroids,1)
20 |     drawLine(centroids(j, :), previous(j, :));
21 | end
22 | 
23 | % Title
24 | title(sprintf('Iteration number %d', i))
25 | 
26 | end
27 | 
28 | 


--------------------------------------------------------------------------------
/ex7/projectData.m:
--------------------------------------------------------------------------------
 1 | function Z = projectData(X, U, K)
 2 | %PROJECTDATA Computes the reduced data representation when projecting only 
 3 | %on to the top k eigenvectors
 4 | %   Z = projectData(X, U, K) computes the projection of 
 5 | %   the normalized inputs X into the reduced dimensional space spanned by
 6 | %   the first K columns of U. It returns the projected examples in Z.
 7 | %
 8 | 
 9 | % You need to return the following variables correctly.
10 | Z = zeros(size(X, 1), K);
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | % Instructions: Compute the projection of the data using only the top K 
14 | %               eigenvectors in U (first K columns). 
15 | %               For the i-th example X(i,:), the projection on to the k-th 
16 | %               eigenvector is given as follows:
17 | %                    x = X(i, :)';
18 | %                    projection_k = x' * U(:, k);
19 | %
20 | 
21 | U_reduce = U(:, 1:K);
22 | Z = X * U_reduce;
23 | 
24 | % =============================================================
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/ex7/recoverData.m:
--------------------------------------------------------------------------------
 1 | function X_rec = recoverData(Z, U, K)
 2 | %RECOVERDATA Recovers an approximation of the original data when using the 
 3 | %projected data
 4 | %   X_rec = RECOVERDATA(Z, U, K) recovers an approximation the 
 5 | %   original data that has been reduced to K dimensions. It returns the
 6 | %   approximate reconstruction in X_rec.
 7 | %
 8 | 
 9 | % You need to return the following variables correctly.
10 | X_rec = zeros(size(Z, 1), size(U, 1));
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | % Instructions: Compute the approximation of the data by projecting back
14 | %               onto the original space using the top K eigenvectors in U.
15 | %
16 | %               For the i-th example Z(i,:), the (approximate)
17 | %               recovered data for dimension j is given as follows:
18 | %                    v = Z(i, :)';
19 | %                    recovered_j = v' * U(j, 1:K)';
20 | %
21 | %               Notice that U(j, 1:K) is a row vector.
22 | %               
23 | 
24 | U_reduce = U(:, 1:K);
25 | X_rec = Z * U_reduce';
26 | 
27 | % =============================================================
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/ex7/runkMeans.m:
--------------------------------------------------------------------------------
 1 | function [centroids, idx] = runkMeans(X, initial_centroids, ...
 2 |                                       max_iters, plot_progress)
 3 | %RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
 4 | %is a single example
 5 | %   [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
 6 | %   plot_progress) runs the K-Means algorithm on data matrix X, where each 
 7 | %   row of X is a single example. It uses initial_centroids used as the
 8 | %   initial centroids. max_iters specifies the total number of interactions 
 9 | %   of K-Means to execute. plot_progress is a true/false flag that 
10 | %   indicates if the function should also plot its progress as the 
11 | %   learning happens. This is set to false by default. runkMeans returns 
12 | %   centroids, a Kxn matrix of the computed centroids and idx, a m x 1 
13 | %   vector of centroid assignments (i.e. each entry in range [1..K])
14 | %
15 | 
16 | % Set default value for plot progress
17 | if ~exist('plot_progress', 'var') || isempty(plot_progress)
18 |     plot_progress = false;
19 | end
20 | 
21 | % Plot the data if we are plotting progress
22 | if plot_progress
23 |     figure;
24 |     hold on;
25 | end
26 | 
27 | % Initialize values
28 | [m n] = size(X);
29 | K = size(initial_centroids, 1);
30 | centroids = initial_centroids;
31 | previous_centroids = centroids;
32 | idx = zeros(m, 1);
33 | 
34 | % Run K-Means
35 | for i=1:max_iters
36 |     
37 |     % Output progress
38 |     fprintf('K-Means iteration %d/%d...\n', i, max_iters);
39 |     if exist('OCTAVE_VERSION')
40 |         fflush(stdout);
41 |     end
42 |     
43 |     % For each example in X, assign it to the closest centroid
44 |     idx = findClosestCentroids(X, centroids);
45 |     
46 |     % Optionally, plot progress here
47 |     if plot_progress
48 |         plotProgresskMeans(X, centroids, previous_centroids, idx, K, i);
49 |         previous_centroids = centroids;
50 |         fprintf('Press enter to continue.\n');
51 |         pause;
52 |     end
53 |     
54 |     % Given the memberships, compute new centroids
55 |     centroids = computeCentroids(X, idx, K);
56 | end
57 | 
58 | % Hold off if we are plotting progress
59 | if plot_progress
60 |     hold off;
61 | end
62 | 
63 | end
64 | 
65 | 


--------------------------------------------------------------------------------
/ex8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8.pdf


--------------------------------------------------------------------------------
/ex8/checkCostFunction.m:
--------------------------------------------------------------------------------
 1 | function checkCostFunction(lambda)
 2 | %CHECKCOSTFUNCTION Creates a collaborative filering problem 
 3 | %to check your cost function and gradients
 4 | %   CHECKCOSTFUNCTION(lambda) Creates a collaborative filering problem 
 5 | %   to check your cost function and gradients, it will output the 
 6 | %   analytical gradients produced by your code and the numerical gradients 
 7 | %   (computed using computeNumericalGradient). These two gradient 
 8 | %   computations should result in very similar values.
 9 | 
10 | % Set lambda
11 | if ~exist('lambda', 'var') || isempty(lambda)
12 |     lambda = 0;
13 | end
14 | 
15 | %% Create small problem
16 | X_t = rand(4, 3);
17 | Theta_t = rand(5, 3);
18 | 
19 | % Zap out most entries
20 | Y = X_t * Theta_t';
21 | Y(rand(size(Y)) > 0.5) = 0;
22 | R = zeros(size(Y));
23 | R(Y ~= 0) = 1;
24 | 
25 | %% Run Gradient Checking
26 | X = randn(size(X_t));
27 | Theta = randn(size(Theta_t));
28 | num_users = size(Y, 2);
29 | num_movies = size(Y, 1);
30 | num_features = size(Theta_t, 2);
31 | 
32 | numgrad = computeNumericalGradient( ...
33 |                 @(t) cofiCostFunc(t, Y, R, num_users, num_movies, ...
34 |                                 num_features, lambda), [X(:); Theta(:)]);
35 | 
36 | [cost, grad] = cofiCostFunc([X(:); Theta(:)],  Y, R, num_users, ...
37 |                           num_movies, num_features, lambda);
38 | 
39 | disp([numgrad grad]);
40 | fprintf(['The above two columns you get should be very similar.\n' ...
41 |          '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']);
42 | 
43 | diff = norm(numgrad-grad)/norm(numgrad+grad);
44 | fprintf(['If your backpropagation implementation is correct, then \n' ...
45 |          'the relative difference will be small (less than 1e-9). \n' ...
46 |          '\nRelative Difference: %g\n'], diff);
47 | 
48 | end


--------------------------------------------------------------------------------
/ex8/cofiCostFunc.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ...
 2 |                                   num_features, lambda)
 3 | %COFICOSTFUNC Collaborative filtering cost function
 4 | %   [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
 5 | %   num_features, lambda) returns the cost and gradient for the
 6 | %   collaborative filtering problem.
 7 | %
 8 | 
 9 | % Unfold the U and W matrices from params
10 | X = reshape(params(1:num_movies*num_features), num_movies, num_features);
11 | Theta = reshape(params(num_movies*num_features+1:end), ...
12 |                 num_users, num_features);
13 | 
14 |             
15 | % You need to return the following values correctly
16 | J = 0;
17 | X_grad = zeros(size(X));
18 | Theta_grad = zeros(size(Theta));
19 | 
20 | % ====================== YOUR CODE HERE ======================
21 | % Instructions: Compute the cost function and gradient for collaborative
22 | %               filtering. Concretely, you should first implement the cost
23 | %               function (without regularization) and make sure it is
24 | %               matches our costs. After that, you should implement the 
25 | %               gradient and use the checkCostFunction routine to check
26 | %               that the gradient is correct. Finally, you should implement
27 | %               regularization.
28 | %
29 | % Notes: X - num_movies  x num_features matrix of movie features
30 | %        Theta - num_users  x num_features matrix of user features
31 | %        Y - num_movies x num_users matrix of user ratings of movies
32 | %        R - num_movies x num_users matrix, where R(i, j) = 1 if the 
33 | %            i-th movie was rated by the j-th user
34 | %
35 | % You should set the following variables correctly:
36 | %
37 | %        X_grad - num_movies x num_features matrix, containing the 
38 | %                 partial derivatives w.r.t. to each element of X
39 | %        Theta_grad - num_users x num_features matrix, containing the 
40 | %                     partial derivatives w.r.t. to each element of Theta
41 | %
42 | 
43 | % FIXME(SaveTheRbtz@): Not optical: preforms calculations on cells with R(i,j) == 0
44 | J = sum(sum((R==1) .* ((X * Theta' - Y) .^ 2))) / 2;
45 | 
46 | X_grad = (R==1) .* (X * Theta' - Y) * Theta + lambda * X;
47 | Theta_grad = (R==1)' .* (X * Theta' - Y)' * X + lambda * Theta;
48 | 
49 | Regularization = lambda * (sum(sum(Theta .^ 2)) + sum(sum(X .^ 2))) / 2;
50 | J += Regularization;
51 | 
52 | % =============================================================
53 | 
54 | grad = [X_grad(:); Theta_grad(:)];
55 | 
56 | end
57 | 


--------------------------------------------------------------------------------
/ex8/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
 3 | %and gives us a numerical estimate of the gradient.
 4 | %   numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
 5 | %   gradient of the function J around theta. Calling y = J(theta) should
 6 | %   return the function value at theta.
 7 | 
 8 | % Notes: The following code implements numerical gradient checking, and 
 9 | %        returns the numerical gradient.It sets numgrad(i) to (a numerical 
10 | %        approximation of) the partial derivative of J with respect to the 
11 | %        i-th input argument, evaluated at theta. (i.e., numgrad(i) should 
12 | %        be the (approximately) the partial derivative of J with respect 
13 | %        to theta(i).)
14 | %                
15 | 
16 | numgrad = zeros(size(theta));
17 | perturb = zeros(size(theta));
18 | e = 1e-4;
19 | for p = 1:numel(theta)
20 |     % Set perturbation vector
21 |     perturb(p) = e;
22 |     loss1 = J(theta - perturb);
23 |     loss2 = J(theta + perturb);
24 |     % Compute Numerical Gradient
25 |     numgrad(p) = (loss2 - loss1) / (2*e);
26 |     perturb(p) = 0;
27 | end
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/ex8/estimateGaussian.m:
--------------------------------------------------------------------------------
 1 | function [mu sigma2] = estimateGaussian(X)
 2 | %ESTIMATEGAUSSIAN This function estimates the parameters of a 
 3 | %Gaussian distribution using the data in X
 4 | %   [mu sigma2] = estimateGaussian(X), 
 5 | %   The input X is the dataset with each n-dimensional data point in one row
 6 | %   The output is an n-dimensional vector mu, the mean of the data set
 7 | %   and the variances sigma^2, an n x 1 vector
 8 | % 
 9 | 
10 | % Useful variables
11 | [m, n] = size(X);
12 | 
13 | % You should return these values correctly
14 | mu = zeros(n, 1);
15 | sigma2 = zeros(n, 1);
16 | 
17 | % ====================== YOUR CODE HERE ======================
18 | % Instructions: Compute the mean of the data and the variances
19 | %               In particular, mu(i) should contain the mean of
20 | %               the data for the i-th feature and sigma2(i)
21 | %               should contain variance of the i-th feature.
22 | %
23 | 
24 | mu = mean(X);
25 | sigma2 = var(X, 1);
26 | 
27 | % =============================================================
28 | 
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/ex8/ex8.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class
  3 | %  Exercise 8 | Anomaly Detection and Collaborative Filtering
  4 | %
  5 | %  Instructions
  6 | %  ------------
  7 | %
  8 | %  This file contains code that helps you get started on the
  9 | %  exercise. You will need to complete the following functions:
 10 | %
 11 | %     estimateGaussian.m
 12 | %     selectThreshold.m
 13 | %     cofiCostFunc.m
 14 | %
 15 | %  For this exercise, you will not need to change any code in this file,
 16 | %  or any other files other than those mentioned above.
 17 | %
 18 | 
 19 | %% Initialization
 20 | clear ; close all; clc
 21 | 
 22 | %% ================== Part 1: Load Example Dataset  ===================
 23 | %  We start this exercise by using a small dataset that is easy to
 24 | %  visualize.
 25 | %
 26 | %  Our example case consists of 2 network server statistics across
 27 | %  several machines: the latency and throughput of each machine.
 28 | %  This exercise will help us find possibly faulty (or very fast) machines.
 29 | %
 30 | 
 31 | fprintf('Visualizing example dataset for outlier detection.\n\n');
 32 | 
 33 | %  The following command loads the dataset. You should now have the
 34 | %  variables X, Xval, yval in your environment
 35 | load('ex8data1.mat');
 36 | 
 37 | %  Visualize the example dataset
 38 | plot(X(:, 1), X(:, 2), 'bx');
 39 | axis([0 30 0 30]);
 40 | xlabel('Latency (ms)');
 41 | ylabel('Throughput (mb/s)');
 42 | 
 43 | fprintf('Program paused. Press enter to continue.\n');
 44 | pause
 45 | 
 46 | 
 47 | %% ================== Part 2: Estimate the dataset statistics ===================
 48 | %  For this exercise, we assume a Gaussian distribution for the dataset.
 49 | %
 50 | %  We first estimate the parameters of our assumed Gaussian distribution, 
 51 | %  then compute the probabilities for each of the points and then visualize 
 52 | %  both the overall distribution and where each of the points falls in 
 53 | %  terms of that distribution.
 54 | %
 55 | fprintf('Visualizing Gaussian fit.\n\n');
 56 | 
 57 | %  Estimate my and sigma2
 58 | [mu sigma2] = estimateGaussian(X);
 59 | 
 60 | %  Returns the density of the multivariate normal at each data point (row) 
 61 | %  of X
 62 | p = multivariateGaussian(X, mu, sigma2);
 63 | 
 64 | %  Visualize the fit
 65 | visualizeFit(X,  mu, sigma2);
 66 | xlabel('Latency (ms)');
 67 | ylabel('Throughput (mb/s)');
 68 | 
 69 | fprintf('Program paused. Press enter to continue.\n');
 70 | pause;
 71 | 
 72 | %% ================== Part 3: Find Outliers ===================
 73 | %  Now you will find a good epsilon threshold using a cross-validation set
 74 | %  probabilities given the estimated Gaussian distribution
 75 | % 
 76 | 
 77 | pval = multivariateGaussian(Xval, mu, sigma2);
 78 | 
 79 | [epsilon F1] = selectThreshold(yval, pval);
 80 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon);
 81 | fprintf('Best F1 on Cross Validation Set:  %f\n', F1);
 82 | fprintf('   (you should see a value epsilon of about 8.99e-05)\n\n');
 83 | 
 84 | %  Find the outliers in the training set and plot the
 85 | outliers = find(p < epsilon);
 86 | 
 87 | %  Draw a red circle around those outliers
 88 | hold on
 89 | plot(X(outliers, 1), X(outliers, 2), 'ro', 'LineWidth', 2, 'MarkerSize', 10);
 90 | hold off
 91 | 
 92 | fprintf('Program paused. Press enter to continue.\n');
 93 | pause;
 94 | 
 95 | %% ================== Part 4: Multidimensional Outliers ===================
 96 | %  We will now use the code from the previous part and apply it to a 
 97 | %  harder problem in which more features describe each datapoint and only 
 98 | %  some features indicate whether a point is an outlier.
 99 | %
100 | 
101 | %  Loads the second dataset. You should now have the
102 | %  variables X, Xval, yval in your environment
103 | load('ex8data2.mat');
104 | 
105 | %  Apply the same steps to the larger dataset
106 | [mu sigma2] = estimateGaussian(X);
107 | 
108 | %  Training set 
109 | p = multivariateGaussian(X, mu, sigma2);
110 | 
111 | %  Cross-validation set
112 | pval = multivariateGaussian(Xval, mu, sigma2);
113 | 
114 | %  Find the best threshold
115 | [epsilon F1] = selectThreshold(yval, pval);
116 | 
117 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon);
118 | fprintf('Best F1 on Cross Validation Set:  %f\n', F1);
119 | fprintf('# Outliers found: %d\n', sum(p < epsilon));
120 | fprintf('   (you should see a value epsilon of about 1.38e-18)\n\n');
121 | pause
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/ex8/ex8_cofi.m:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env octave
  2 | %% Machine Learning Online Class
  3 | %  Exercise 8 | Anomaly Detection and Collaborative Filtering
  4 | %
  5 | %  Instructions
  6 | %  ------------
  7 | %
  8 | %  This file contains code that helps you get started on the
  9 | %  exercise. You will need to complete the following functions:
 10 | %
 11 | %     estimateGaussian.m
 12 | %     selectThreshold.m
 13 | %     cofiCostFunc.m
 14 | %
 15 | %  For this exercise, you will not need to change any code in this file,
 16 | %  or any other files other than those mentioned above.
 17 | %
 18 | 
 19 | %% =============== Part 1: Loading movie ratings dataset ================
 20 | %  You will start by loading the movie ratings dataset to understand the
 21 | %  structure of the data.
 22 | %  
 23 | fprintf('Loading movie ratings dataset.\n\n');
 24 | 
 25 | %  Load data
 26 | load ('ex8_movies.mat');
 27 | 
 28 | %  Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on 
 29 | %  943 users
 30 | %
 31 | %  R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
 32 | %  rating to movie i
 33 | 
 34 | %  From the matrix, we can compute statistics like average rating.
 35 | fprintf('Average rating for movie 1 (Toy Story): %f / 5\n\n', ...
 36 |         mean(Y(1, R(1, :))));
 37 | 
 38 | %  We can "visualize" the ratings matrix by plotting it with imagesc
 39 | imagesc(Y);
 40 | ylabel('Movies');
 41 | xlabel('Users');
 42 | 
 43 | fprintf('\nProgram paused. Press enter to continue.\n');
 44 | pause;
 45 | 
 46 | %% ============ Part 2: Collaborative Filtering Cost Function ===========
 47 | %  You will now implement the cost function for collaborative filtering.
 48 | %  To help you debug your cost function, we have included set of weights
 49 | %  that we trained on that. Specifically, you should complete the code in 
 50 | %  cofiCostFunc.m to return J.
 51 | 
 52 | %  Load pre-trained weights (X, Theta, num_users, num_movies, num_features)
 53 | load ('ex8_movieParams.mat');
 54 | 
 55 | %  Reduce the data set size so that this runs faster
 56 | num_users = 4; num_movies = 5; num_features = 3;
 57 | X = X(1:num_movies, 1:num_features);
 58 | Theta = Theta(1:num_users, 1:num_features);
 59 | Y = Y(1:num_movies, 1:num_users);
 60 | R = R(1:num_movies, 1:num_users);
 61 | 
 62 | %  Evaluate cost function
 63 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ...
 64 |                num_features, 0);
 65 |            
 66 | fprintf(['Cost at loaded parameters: %f '...
 67 |          '\n(this value should be about 22.22)\n'], J);
 68 | 
 69 | fprintf('\nProgram paused. Press enter to continue.\n');
 70 | pause;
 71 | 
 72 | 
 73 | %% ============== Part 3: Collaborative Filtering Gradient ==============
 74 | %  Once your cost function matches up with ours, you should now implement 
 75 | %  the collaborative filtering gradient function. Specifically, you should 
 76 | %  complete the code in cofiCostFunc.m to return the grad argument.
 77 | %  
 78 | fprintf('\nChecking Gradients (without regularization) ... \n');
 79 | 
 80 | %  Check gradients by running checkNNGradients
 81 | checkCostFunction;
 82 | 
 83 | fprintf('\nProgram paused. Press enter to continue.\n');
 84 | pause;
 85 | 
 86 | 
 87 | %% ========= Part 4: Collaborative Filtering Cost Regularization ========
 88 | %  Now, you should implement regularization for the cost function for 
 89 | %  collaborative filtering. You can implement it by adding the cost of
 90 | %  regularization to the original cost computation.
 91 | %  
 92 | 
 93 | %  Evaluate cost function
 94 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ...
 95 |                num_features, 1.5);
 96 |            
 97 | fprintf(['Cost at loaded parameters (lambda = 1.5): %f '...
 98 |          '\n(this value should be about 31.34)\n'], J);
 99 | 
100 | fprintf('\nProgram paused. Press enter to continue.\n');
101 | pause;
102 | 
103 | 
104 | %% ======= Part 5: Collaborative Filtering Gradient Regularization ======
105 | %  Once your cost matches up with ours, you should proceed to implement 
106 | %  regularization for the gradient. 
107 | %
108 | 
109 | %  
110 | fprintf('\nChecking Gradients (with regularization) ... \n');
111 | 
112 | %  Check gradients by running checkNNGradients
113 | checkCostFunction(1.5);
114 | 
115 | fprintf('\nProgram paused. Press enter to continue.\n');
116 | pause;
117 | 
118 | 
119 | %% ============== Part 6: Entering ratings for a new user ===============
120 | %  Before we will train the collaborative filtering model, we will first
121 | %  add ratings that correspond to a new user that we just observed. This
122 | %  part of the code will also allow you to put in your own ratings for the
123 | %  movies in our dataset!
124 | %
125 | movieList = loadMovieList();
126 | 
127 | %  Initialize my ratings
128 | my_ratings = zeros(1682, 1);
129 | 
130 | % Check the file movie_idx.txt for id of each movie in our dataset
131 | % For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
132 | my_ratings(1) = 4;
133 | 
134 | % Or suppose did not enjoy Silence of the Lambs (1991), you can set
135 | my_ratings(98) = 2;
136 | 
137 | % We have selected a few movies we liked / did not like and the ratings we
138 | % gave are as follows:
139 | my_ratings(7) = 3;
140 | my_ratings(12)= 5;
141 | my_ratings(54) = 4;
142 | my_ratings(64)= 5;
143 | my_ratings(66)= 3;
144 | my_ratings(69) = 5;
145 | my_ratings(183) = 4;
146 | my_ratings(226) = 5;
147 | my_ratings(355)= 5;
148 | 
149 | fprintf('\n\nNew user ratings:\n');
150 | for i = 1:length(my_ratings)
151 |     if my_ratings(i) > 0 
152 |         fprintf('Rated %d for %s\n', my_ratings(i), ...
153 |                  movieList{i});
154 |     end
155 | end
156 | 
157 | fprintf('\nProgram paused. Press enter to continue.\n');
158 | pause;
159 | 
160 | 
161 | %% ================== Part 7: Learning Movie Ratings ====================
162 | %  Now, you will train the collaborative filtering model on a movie rating 
163 | %  dataset of 1682 movies and 943 users
164 | %
165 | 
166 | fprintf('\nTraining collaborative filtering...\n');
167 | 
168 | %  Load data
169 | load('ex8_movies.mat');
170 | 
171 | %  Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by 
172 | %  943 users
173 | %
174 | %  R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
175 | %  rating to movie i
176 | 
177 | %  Add our own ratings to the data matrix
178 | Y = [my_ratings Y];
179 | R = [(my_ratings ~= 0) R];
180 | 
181 | %  Normalize Ratings
182 | [Ynorm, Ymean] = normalizeRatings(Y, R);
183 | 
184 | %  Useful Values
185 | num_users = size(Y, 2);
186 | num_movies = size(Y, 1);
187 | num_features = 10;
188 | 
189 | % Set Initial Parameters (Theta, X)
190 | X = randn(num_movies, num_features);
191 | Theta = randn(num_users, num_features);
192 | 
193 | initial_parameters = [X(:); Theta(:)];
194 | 
195 | % Set options for fmincg
196 | options = optimset('GradObj', 'on', 'MaxIter', 100);
197 | 
198 | % Set Regularization
199 | lambda = 10;
200 | theta = fmincg (@(t)(cofiCostFunc(t, Y, R, num_users, num_movies, ...
201 |                                 num_features, lambda)), ...
202 |                 initial_parameters, options);
203 | 
204 | % Unfold the returned theta back into U and W
205 | X = reshape(theta(1:num_movies*num_features), num_movies, num_features);
206 | Theta = reshape(theta(num_movies*num_features+1:end), ...
207 |                 num_users, num_features);
208 | 
209 | fprintf('Recommender system learning completed.\n');
210 | 
211 | fprintf('\nProgram paused. Press enter to continue.\n');
212 | pause;
213 | 
214 | %% ================== Part 8: Recommendation for you ====================
215 | %  After training the model, you can now make recommendations by computing
216 | %  the predictions matrix.
217 | %
218 | 
219 | p = X * Theta';
220 | my_predictions = p(:,1) + Ymean;
221 | 
222 | movieList = loadMovieList();
223 | 
224 | [r, ix] = sort(my_predictions, 'descend');
225 | fprintf('\nTop recommendations for you:\n');
226 | for i=1:10
227 |     j = ix(i);
228 |     fprintf('Predicting rating %.1f for movie %s\n', my_predictions(j), ...
229 |             movieList{j});
230 | end
231 | 
232 | fprintf('\n\nOriginal ratings provided:\n');
233 | for i = 1:length(my_ratings)
234 |     if my_ratings(i) > 0 
235 |         fprintf('Rated %d for %s\n', my_ratings(i), ...
236 |                  movieList{i});
237 |     end
238 | end
239 | 


--------------------------------------------------------------------------------
/ex8/ex8_movieParams.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8/ex8_movieParams.mat


--------------------------------------------------------------------------------
/ex8/ex8_movies.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8/ex8_movies.mat


--------------------------------------------------------------------------------
/ex8/ex8data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8/ex8data1.mat


--------------------------------------------------------------------------------
/ex8/ex8data2.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8/ex8data2.mat


--------------------------------------------------------------------------------
/ex8/fmincg.m:
--------------------------------------------------------------------------------
1 | ../ex3/fmincg.m


--------------------------------------------------------------------------------
/ex8/loadMovieList.m:
--------------------------------------------------------------------------------
 1 | function movieList = loadMovieList()
 2 | %GETMOVIELIST reads the fixed movie list in movie.txt and returns a
 3 | %cell array of the words
 4 | %   movieList = GETMOVIELIST() reads the fixed movie list in movie.txt 
 5 | %   and returns a cell array of the words in movieList.
 6 | 
 7 | 
 8 | %% Read the fixed movieulary list
 9 | fid = fopen('movie_ids.txt');
10 | 
11 | % Store all movies in cell array movie{}
12 | n = 1682;  % Total number of movies 
13 | 
14 | movieList = cell(n, 1);
15 | for i = 1:n
16 |     % Read line
17 |     line = fgets(fid);
18 |     % Word Index (can ignore since it will be = i)
19 |     [idx, movieName] = strtok(line, ' ');
20 |     % Actual Word
21 |     movieList{i} = strtrim(movieName);
22 | end
23 | fclose(fid);
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/ex8/movie_ids.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8/movie_ids.txt


--------------------------------------------------------------------------------
/ex8/multivariateGaussian.m:
--------------------------------------------------------------------------------
 1 | function p = multivariateGaussian(X, mu, Sigma2)
 2 | %MULTIVARIATEGAUSSIAN Computes the probability density function of the
 3 | %multivariate gaussian distribution.
 4 | %    p = MULTIVARIATEGAUSSIAN(X, mu, Sigma2) Computes the probability 
 5 | %    density function of the examples X under the multivariate gaussian 
 6 | %    distribution with parameters mu and Sigma2. If Sigma2 is a matrix, it is
 7 | %    treated as the covariance matrix. If Sigma2 is a vector, it is treated
 8 | %    as the \sigma^2 values of the variances in each dimension (a diagonal
 9 | %    covariance matrix)
10 | %
11 | 
12 | k = length(mu);
13 | 
14 | if (size(Sigma2, 2) == 1) || (size(Sigma2, 1) == 1)
15 |     Sigma2 = diag(Sigma2);
16 | end
17 | 
18 | X = bsxfun(@minus, X, mu(:)');
19 | p = (2 * pi) ^ (- k / 2) * det(Sigma2) ^ (-0.5) * ...
20 |     exp(-0.5 * sum(bsxfun(@times, X * pinv(Sigma2), X), 2));
21 | 
22 | end


--------------------------------------------------------------------------------
/ex8/normalizeRatings.m:
--------------------------------------------------------------------------------
 1 | function [Ynorm, Ymean] = normalizeRatings(Y, R)
 2 | %NORMALIZERATINGS Preprocess data by subtracting mean rating for every 
 3 | %movie (every row)
 4 | %   [Ynorm, Ymean] = NORMALIZERATINGS(Y, R) normalized Y so that each movie
 5 | %   has a rating of 0 on average, and returns the mean rating in Ymean.
 6 | %
 7 | 
 8 | [m, n] = size(Y);
 9 | Ymean = zeros(m, 1);
10 | Ynorm = zeros(size(Y));
11 | for i = 1:m
12 |     Ymean(i) = mean(Y(i, R(i, :)));
13 |     Ynorm(i, R(i, :)) = Y(i, R(i, :)) - Ymean(i);
14 | end
15 | 
16 | end


--------------------------------------------------------------------------------
/ex8/selectThreshold.m:
--------------------------------------------------------------------------------
 1 | function [bestEpsilon bestF1] = selectThreshold(yval, pval)
 2 | %SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting
 3 | %outliers
 4 | %   [bestEpsilon bestF1] = SELECTTHRESHOLD(yval, pval) finds the best
 5 | %   threshold to use for selecting outliers based on the results from a
 6 | %   validation set (pval) and the ground truth (yval).
 7 | %
 8 | 
 9 | bestEpsilon = 0;
10 | bestF1 = 0;
11 | F1 = 0;
12 | 
13 | stepsize = (max(pval) - min(pval)) / 1000;
14 | for epsilon = min(pval):stepsize:max(pval)
15 |     
16 |     % ====================== YOUR CODE HERE ======================
17 |     % Instructions: Compute the F1 score of choosing epsilon as the
18 |     %               threshold and place the value in F1. The code at the
19 |     %               end of the loop will compare the F1 score for this
20 |     %               choice of epsilon and set it to be the best epsilon if
21 |     %               it is better than the current choice of epsilon.
22 |     %               
23 |     % Note: You can use predictions = (pval < epsilon) to get a binary vector
24 |     %       of 0's and 1's of the outlier predictions
25 | 
26 |     predictions = (pval < epsilon);
27 | 
28 |     % TODO(SaveTheRbtz@): Move F1Score calculation to separate function
29 |     tp = sum((predictions == 1) & (yval == 1));
30 |     fp = sum((predictions == 1) & (yval == 0));
31 |     fn = sum((predictions == 0) & (yval == 1));
32 |     tn = sum((predictions == 0) & (yval == 0)); % XXX: NOT USED
33 | 
34 |     recall = tp / (tp + fn);
35 |     precidion = tp / (tp + fp);
36 | 
37 |     F1 = 2 * precidion * recall / (precidion + recall);
38 | 
39 |     % =============================================================
40 | 
41 |     if F1 > bestF1
42 |        bestF1 = F1;
43 |        bestEpsilon = epsilon;
44 |     end
45 | end
46 | 
47 | end
48 | 


--------------------------------------------------------------------------------
/ex8/visualizeFit.m:
--------------------------------------------------------------------------------
 1 | function visualizeFit(X, mu, sigma2)
 2 | %VISUALIZEFIT Visualize the dataset and its estimated distribution.
 3 | %   VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the 
 4 | %   probability density function of the Gaussian distribution. Each example
 5 | %   has a location (x1, x2) that depends on its feature values.
 6 | %
 7 | 
 8 | [X1,X2] = meshgrid(0:.5:35); 
 9 | Z = multivariateGaussian([X1(:) X2(:)],mu,sigma2);
10 | Z = reshape(Z,size(X1));
11 | 
12 | plot(X(:, 1), X(:, 2),'bx');
13 | hold on;
14 | % Do not plot if there are infinities
15 | if (sum(isinf(Z)) == 0)
16 |     contour(X1, X2, Z, 10.^(-20:3:0)');
17 | end
18 | hold off;
19 | 
20 | end


--------------------------------------------------------------------------------
/octave_tutorial.m:
--------------------------------------------------------------------------------
  1 | % Machine learning class 
  2 | % Octave tutorial 
  3 | 
  4 | % =======================================================
  5 | % Section 1: Octave Tutorial: Basic operations
  6 | 
  7 | %% Change Octave prompt  
  8 | PS1('>> ');              
  9 | 
 10 | %% elementary operations
 11 | 5+6
 12 | 3-2
 13 | 5*8
 14 | 1/2
 15 | 2^6
 16 | 1 == 2  % false
 17 | 1 ~= 2  % true.  note, not "!="
 18 | 1 && 0
 19 | 1 || 0
 20 | xor(1,0)
 21 | 
 22 | 
 23 | %% variable assignment
 24 | a = 3; % semicolon suppresses output
 25 | b = 'hi';
 26 | c = 3>=1;
 27 | 
 28 | % Displaying them:
 29 | a = pi
 30 | disp(sprintf('2 decimals: %0.2f', a))
 31 | disp(sprintf('6 decimals: %0.6f', a))
 32 | format long
 33 | a
 34 | format short
 35 | a
 36 | 
 37 | 
 38 | %%  vectors and matrices
 39 | A = [1 2; 3 4; 5 6]
 40 | 
 41 | v = [1 2 3]
 42 | v = [1; 2; 3]
 43 | v = [1:0.1:2]  % from 1 to 2, with stepsize of 0.1. Useful for plot axes
 44 | v = 1:6        % from 1 to 6, assumes stepsize of 1
 45 | 
 46 | C = 2*ones(2,3)  % same as C = [2 2 2; 2 2 2]
 47 | w = ones(1,3)    % 1x3 vector of ones
 48 | w = zeros(1,3)
 49 | w = rand(1,3)  % drawn from a uniform distribution 
 50 | w = randn(1,3) % drawn from a normal distribution (mean=0, var=1)
 51 | w = -6 + sqrt(10)*(randn(1,10000))  % (mean = 1, var = 2)
 52 | hist(w)
 53 | I = eye(4)    % 4x4 identity matrix
 54 | 
 55 | % help function
 56 | help eye
 57 | help rand
 58 | 
 59 | % =======================================================
 60 | % Section 2: Octave Tutorial: Moving data around 
 61 | 
 62 | 
 63 | %% dimensions
 64 | sz = size(A)
 65 | size(A,1)  % number of rows
 66 | size(A,2)  % number of cols
 67 | length(v)  % size of longest dimension
 68 | 
 69 | 
 70 | %% loading data
 71 | pwd    % show current directory (current path)
 72 | cd 'C:\Users\ang\Octave files'   % change directory 
 73 | ls     % list files in current directory 
 74 | load q1y.dat
 75 | load q1x.dat
 76 | who    % list variables in workspace
 77 | whos   % list variables in workspace (detailed view) 
 78 | clear q1y       % clear w/ no argt clears all
 79 | v = q1x(1:10);
 80 | save hello v;   % save variable v into file hello.mat
 81 | save hello.txt v -ascii; % save as ascii
 82 | % fopen, fread, fprintf, fscanf also work  [[not needed in class]]
 83 | 
 84 | %% indexing
 85 | A(3,2)  % indexing is (row,col)
 86 | A(2,:)  % get the 2nd row. 
 87 |         % ":" means every element along that dimension
 88 | A(:,2)  % get the 2nd col
 89 | A([1 3],:)
 90 | 
 91 | A(:,2) = [10; 11; 12]     % change second column
 92 | A = [A, [100; 101; 102]]; % append column vec
 93 | A(:) % Select all elements as a column vector.
 94 | 
 95 | % Putting data together 
 96 | A = [A [100; 101; 102]]
 97 | B = [11 12; 13 14; 15 16] % same dims as A
 98 | [A B]
 99 | [A; B] 
100 | 
101 | 
102 | % =======================================================
103 | % Section 3: Octave Tutorial: Computing on data 
104 | 
105 | 
106 | %% matrix operations
107 | A * C  % matrix multiplication
108 | A .* B % element-wise multiplcation
109 | % A .* C  or A * B gives error - wrong dimensions
110 | A .^ 2
111 | 1./v
112 | log(v)  % functions like this operate element-wise on vecs or matrices 
113 | exp(v)  % e^4
114 | abs(v)
115 | 
116 | -v  % -1*v
117 | 
118 | v + ones(1,length(v))
119 | % v + 1  % same
120 | 
121 | A'  % matrix transpose
122 | 
123 | %% misc useful functions
124 | 
125 | % max  (or min)
126 | a = [1 15 2 0.5]
127 | val = max(a)
128 | [val,ind] = max(a)
129 | 
130 | % find
131 | a < 3
132 | find(a < 3)
133 | A = magic(3)
134 | [r,c] = find(A>=7)
135 | 
136 | % sum, prod
137 | sum(a)
138 | prod(a)
139 | floor(a) % or ceil(a)
140 | max(rand(3),rand(3))
141 | max(A,[],1)
142 | min(A,[],2)
143 | A = magic(9)
144 | sum(A,1)
145 | sum(A,2)
146 | sum(sum( A .* eye(9) ))
147 | sum(sum( A .* flipud(eye(9)) ))
148 | 
149 | 
150 | % Matrix inverse (pseudo-inverse)
151 | pinv(A)        % inv(A'*A)*A'
152 | 
153 | 
154 | % =======================================================
155 | % Section 4: Octave Tutorial: Plotting 
156 | 
157 | 
158 | %% plotting
159 | t = [0:0.01:0.98];
160 | y1 = sin(2*pi*4*t); 
161 | plot(t,y1);
162 | y2 = cos(2*pi*4*t);
163 | hold on;  % "hold off" to turn off
164 | plot(t,y2,'r');
165 | xlabel('time');
166 | ylabel('value');
167 | legend('sin','cos');
168 | title('my plot');
169 | print -dpng 'myPlot.png'
170 | close;           % or,  "close all" to close all figs
171 | 
172 | figure(2), clf;  % can specify the figure number
173 | subplot(1,2,1);  % Divide plot into 1x2 grid, access 1st element
174 | plot(t,y1);
175 | subplot(1,2,2);  % Divide plot into 1x2 grid, access 2nd element
176 | plot(t,y2);
177 | axis([0.5 1 -1 1]);  % change axis scale
178 | 
179 | %% display a matrix (or image) 
180 | figure;
181 | imagesc(magic(15)), colorbar, colormap gray;
182 | % comma-chaining function calls.  
183 | a=1,b=2,c=3
184 | a=1;b=2;c=3;
185 | 
186 | 
187 | % =======================================================
188 | % Section 5: Octave Tutorial: For, while, if statements, and functions.
189 | 
190 | v = zeros(10,1);
191 | for i=1:10, 
192 |     v(i) = 2^i;
193 | end
194 | % Can also use "break" and "continue" inside for and while loops to control execution.
195 | 
196 | i = 1;
197 | while i <= 5,
198 |   v(i) = 100; 
199 |   i = i+1;
200 | end
201 | 
202 | i = 1;
203 | while true, 
204 |   v(i) = 999; 
205 |   i = i+1;
206 |   if i == 6,
207 |     break;
208 |   end;
209 | end
210 | 
211 | if v(1)==1,
212 |   disp('The value is one!');
213 | elseif v(1)==2,
214 |   disp('The value is two!');
215 | else
216 |   disp('The value is not one or two!');
217 | end
218 | 
219 | % exit  % quit
220 | 
221 | % Functions
222 | 
223 | % Create a file called squareThisNumber.m with the following contents (without the %):
224 | % function r = squareThisNumber(x)
225 | %   r = x * x;
226 | % end
227 | 
228 | squareThisNumber(5);  
229 | % If function is undefine, use "pwd" to check current directory (path), 
230 | % and "cd" to change directories
231 | pwd
232 | cd 'C:\Users\ang\Desktop';
233 | squareThisNumber(5);  
234 | 
235 | % Octave search path (advanced/optional) 
236 | addpath('C:\Users\ang\Desktop');
237 | cd 'C:\'
238 | squareThisNumber(5);
239 | 
240 | % If you have defined other functions such as costFunctionJ, 
241 | % the following code will work too. 
242 | 
243 | X = [1 1; 1 2; 1 3];
244 | y = [1;2;3];
245 | 
246 | theta = [0; 1]; 
247 | j = costFunctionJ(X, y, theta);
248 | 
249 | theta = [0; 0]; 
250 | j = costFunctionJ(X, y, theta);
251 | 
252 | 
253 | 
254 | 
255 | 


--------------------------------------------------------------------------------