├── Lectures ├── Lecture1.pdf ├── Lecture10.pdf ├── Lecture11.pdf ├── Lecture12.pdf ├── Lecture13.pdf ├── Lecture14.pdf ├── Lecture15.pdf ├── Lecture16.pdf ├── Lecture17.pdf ├── Lecture18.pdf ├── Lecture2.pdf ├── Lecture3.pdf ├── Lecture4.pdf ├── Lecture6.pdf ├── Lecture7.pdf ├── Lecture8.pdf └── Lecture9.pdf ├── README.rst ├── ex1.pdf ├── ex1 ├── computeCost.m ├── computeCostMulti.m ├── ex1.m ├── ex1_multi.m ├── ex1data1.txt ├── ex1data2.txt ├── featureNormalize.m ├── gradientDescent.m ├── gradientDescentMulti.m ├── normalEqn.m ├── plotData.m ├── submit.m └── warmUpExercise.m ├── ex2.pdf ├── ex2 ├── costFunction.m ├── costFunctionReg.m ├── ex2.m ├── ex2_reg.m ├── ex2data1.txt ├── ex2data2.txt ├── mapFeature.m ├── plotData.m ├── plotDecisionBoundary.m ├── predict.m ├── sigmoid.m ├── submit.m └── submitWeb.m ├── ex3.pdf ├── ex3 ├── displayData.m ├── ex3.m ├── ex3_nn.m ├── ex3data1.mat ├── ex3weights.mat ├── fmincg.m ├── lrCostFunction.m ├── oneVsAll.m ├── predict.m ├── predictOneVsAll.m ├── sigmoid.m ├── submit.m └── submitWeb.m ├── ex4.pdf ├── ex4 ├── checkNNGradients.m ├── computeNumericalGradient.m ├── debugInitializeWeights.m ├── displayData.m ├── ex4.m ├── ex4data1.mat ├── ex4weights.mat ├── fmincg.m ├── nnCostFunction.m ├── predict.m ├── randInitializeWeights.m ├── sigmoid.m ├── sigmoidGradient.m ├── submit.m └── submitWeb.m ├── ex5.pdf ├── ex5 ├── ex5.m ├── ex5data1.mat ├── featureNormalize.m ├── fmincg.m ├── learningCurve.m ├── linearRegCostFunction.m ├── plotFit.m ├── polyFeatures.m ├── submit.m ├── submitWeb.m ├── trainLinearReg.m └── validationCurve.m ├── ex6.pdf ├── ex6 ├── dataset3Params.m ├── emailFeatures.m ├── emailSample1.txt ├── emailSample2.txt ├── ex6.m ├── ex6_spam.m ├── ex6data1.mat ├── ex6data2.mat ├── ex6data3.mat ├── gaussianKernel.m ├── getVocabList.m ├── linearKernel.m ├── plotData.m ├── porterStemmer.m ├── processEmail.m ├── readFile.m ├── spamSample1.txt ├── spamSample2.txt ├── spamTest.mat ├── spamTrain.mat ├── submit.m ├── submitWeb.m ├── svmPredict.m ├── svmTrain.m ├── visualizeBoundary.m ├── visualizeBoundaryLinear.m └── vocab.txt ├── ex7.pdf ├── ex7 ├── bird_small.mat ├── bird_small.png ├── computeCentroids.m ├── displayData.m ├── drawLine.m ├── ex7.m ├── ex7_pca.m ├── ex7data1.mat ├── ex7data2.mat ├── ex7faces.mat ├── featureNormalize.m ├── findClosestCentroids.m ├── kMeansInitCentroids.m ├── pca.m ├── plotDataPoints.m ├── plotProgresskMeans.m ├── projectData.m ├── recoverData.m ├── runkMeans.m ├── submit.m └── submitWeb.m ├── ex8.pdf ├── ex8 ├── checkCostFunction.m ├── cofiCostFunc.m ├── computeNumericalGradient.m ├── estimateGaussian.m ├── ex8.m ├── ex8_cofi.m ├── ex8_movieParams.mat ├── ex8_movies.mat ├── ex8data1.mat ├── ex8data2.mat ├── fmincg.m ├── loadMovieList.m ├── movie_ids.txt ├── multivariateGaussian.m ├── normalizeRatings.m ├── selectThreshold.m ├── submit.m ├── submitWeb.m └── visualizeFit.m └── octave_tutorial.m /Lectures/Lecture1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture1.pdf -------------------------------------------------------------------------------- /Lectures/Lecture10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture10.pdf -------------------------------------------------------------------------------- /Lectures/Lecture11.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture11.pdf -------------------------------------------------------------------------------- /Lectures/Lecture12.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture12.pdf -------------------------------------------------------------------------------- /Lectures/Lecture13.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture13.pdf -------------------------------------------------------------------------------- /Lectures/Lecture14.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture14.pdf -------------------------------------------------------------------------------- /Lectures/Lecture15.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture15.pdf -------------------------------------------------------------------------------- /Lectures/Lecture16.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture16.pdf -------------------------------------------------------------------------------- /Lectures/Lecture17.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture17.pdf -------------------------------------------------------------------------------- /Lectures/Lecture18.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture18.pdf -------------------------------------------------------------------------------- /Lectures/Lecture2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture2.pdf -------------------------------------------------------------------------------- /Lectures/Lecture3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture3.pdf -------------------------------------------------------------------------------- /Lectures/Lecture4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture4.pdf -------------------------------------------------------------------------------- /Lectures/Lecture6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture6.pdf -------------------------------------------------------------------------------- /Lectures/Lecture7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture7.pdf -------------------------------------------------------------------------------- /Lectures/Lecture8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture8.pdf -------------------------------------------------------------------------------- /Lectures/Lecture9.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/Lectures/Lecture9.pdf -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | There are my solutions for Programming Exercises from Machine Learning Stanford classes. Made in `GNU Octave`_. 5 | 6 | .. _GNU Octave: https://www.gnu.org/software/octave/ 7 | 8 | Honor Code 9 | ========== 10 | 11 | This is quote from `Course Info`_ page: 12 | 13 | For the programming exercises, you are welcome to discuss them with other 14 | students, discuss specific algorithms, properties of algorithms, etc.; we 15 | ask only that you not look at any source code written by a different 16 | student, nor show your solution code to other students. 17 | 18 | -- Professor Andrew Ng & The ml-class Team 19 | 20 | .. _Course Info: http://www.ml-class.org/course/resources/index?page=course-info 21 | -------------------------------------------------------------------------------- /ex1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex1.pdf -------------------------------------------------------------------------------- /ex1/computeCost.m: -------------------------------------------------------------------------------- 1 | function J = computeCost(X, y, theta) 2 | %COMPUTECOST Compute cost for linear regression 3 | % J = COMPUTECOST(X, y, theta) computes the cost of using theta as the 4 | % parameter for linear regression to fit the data points in X and y 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | 9 | % You need to return the following variables correctly 10 | J = 0; 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the cost of a particular choice of theta 14 | % You should set J to the cost. 15 | 16 | % This is vectorized version of J's computation 17 | % Note that we are using element-wise square ( .^ ) instead of matrix 18 | % multiplicationi ( ^ ). 19 | J = 1/(2*m) * sum((X*theta - y) .^ 2); 20 | 21 | % ========================================================================= 22 | 23 | end 24 | -------------------------------------------------------------------------------- /ex1/computeCostMulti.m: -------------------------------------------------------------------------------- 1 | function J = computeCostMulti(X, y, theta) 2 | %COMPUTECOSTMULTI Compute cost for linear regression with multiple variables 3 | % J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the 4 | % parameter for linear regression to fit the data points in X and y 5 | 6 | J = computeCost(X, y, theta); 7 | 8 | end 9 | -------------------------------------------------------------------------------- /ex1/ex1.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class - Exercise 1: Linear Regression 3 | 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % linear exercise. You will need to complete the following functions 9 | % in this exericse: 10 | % 11 | % warmUpExercise.m 12 | % plotData.m 13 | % gradientDescent.m 14 | % computeCost.m 15 | % gradientDescentMulti.m 16 | % computeCostMulti.m 17 | % featureNormalize.m 18 | % normalEqn.m 19 | % 20 | % For this exercise, you will not need to change any code in this file, 21 | % or any other files other than those mentioned above. 22 | % 23 | % x refers to the population size in 10,000s 24 | % y refers to the profit in $10,000s 25 | % 26 | 27 | %% Initialization 28 | clear all; close all; clc 29 | 30 | %% ==================== Part 1: Basic Function ==================== 31 | % Complete warmUpExercise.m 32 | fprintf('Running warmUpExercise ... \n'); 33 | fprintf('5x5 Identity Matrix: \n'); 34 | warmUpExercise() 35 | 36 | fprintf('Program paused. Press enter to continue.\n'); 37 | pause; 38 | 39 | 40 | %% ======================= Part 2: Plotting ======================= 41 | fprintf('Plotting Data ...\n') 42 | data = csvread('ex1data1.txt'); 43 | X = data(:, 1); y = data(:, 2); 44 | m = length(y); % number of training examples 45 | 46 | % Plot Data 47 | % Note: You have to complete the code in plotData.m 48 | plotData(X, y); 49 | 50 | fprintf('Program paused. Press enter to continue.\n'); 51 | pause; 52 | 53 | %% =================== Part 3: Gradient descent =================== 54 | fprintf('Running Gradient Descent ...\n') 55 | 56 | X = [ones(m, 1), data(:,1)]; % Add a column of ones to x 57 | theta = zeros(2, 1); % initialize fitting parameters 58 | 59 | % Some gradient descent settings 60 | iterations = 1500; 61 | alpha = 0.01; 62 | 63 | % compute and display initial cost 64 | computeCost(X, y, theta) 65 | 66 | % run gradient descent 67 | theta = gradientDescent(X, y, theta, alpha, iterations); 68 | 69 | % print theta to screen 70 | fprintf('Theta found by gradient descent: '); 71 | fprintf('%f %f \n', theta(1), theta(2)); 72 | 73 | % Plot the linear fit 74 | hold on; % keep previous plot visible 75 | plot(X(:,2), X*theta, '-') 76 | legend('Training data', 'Linear regression') 77 | hold off % don't overlay any more plots on this figure 78 | 79 | % Predict values for population sizes of 35,000 and 70,000 80 | predict1 = [1, 3.5] *theta; 81 | fprintf('For population = 35,000, we predict a profit of %f\n',... 82 | predict1*10000); 83 | predict2 = [1, 7] * theta; 84 | fprintf('For population = 70,000, we predict a profit of %f\n',... 85 | predict2*10000); 86 | 87 | fprintf('Program paused. Press enter to continue.\n'); 88 | pause; 89 | 90 | %% ============= Part 4: Visualizing J(theta_0, theta_1) ============= 91 | fprintf('Visualizing J(theta_0, theta_1) ...\n') 92 | 93 | % Grid over which we will calculate J 94 | theta0_vals = linspace(-10, 10, 100); 95 | theta1_vals = linspace(-1, 4, 100); 96 | 97 | % initialize J_vals to a matrix of 0's 98 | J_vals = zeros(length(theta0_vals), length(theta1_vals)); 99 | 100 | % Fill out J_vals 101 | for i = 1:length(theta0_vals) 102 | for j = 1:length(theta1_vals) 103 | t = [theta0_vals(i); theta1_vals(j)]; 104 | J_vals(i,j) = computeCost(X, y, t); 105 | end 106 | end 107 | 108 | 109 | % Because of the way meshgrids work in the surf command, we need to 110 | % transpose J_vals before calling surf, or else the axes will be flipped 111 | J_vals = J_vals'; 112 | % Surface plot 113 | figure; 114 | surf(theta0_vals, theta1_vals, J_vals) 115 | xlabel('\theta_0'); ylabel('\theta_1'); 116 | 117 | % Contour plot 118 | figure; 119 | % Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100 120 | contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20)) 121 | xlabel('\theta_0'); ylabel('\theta_1'); 122 | hold on; 123 | plot(theta(1), theta(2), 'rx', 'MarkerSize', 10, 'LineWidth', 2); 124 | 125 | fprintf('Program paused. Press enter to continue.\n'); 126 | pause; 127 | -------------------------------------------------------------------------------- /ex1/ex1_multi.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class 3 | % Exercise 1: Linear regression with multiple variables 4 | % 5 | % Instructions 6 | % ------------ 7 | % 8 | % This file contains code that helps you get started on the 9 | % linear regression exercise. 10 | % 11 | % You will need to complete the following functions in this 12 | % exericse: 13 | % 14 | % warmUpExercise.m 15 | % plotData.m 16 | % gradientDescent.m 17 | % computeCost.m 18 | % gradientDescentMulti.m 19 | % computeCostMulti.m 20 | % featureNormalize.m 21 | % normalEqn.m 22 | % 23 | % For this part of the exercise, you will need to change some 24 | % parts of the code below for various experiments (e.g., changing 25 | % learning rates). 26 | % 27 | 28 | %% Clear and Close Figures 29 | clear all; close all; clc 30 | 31 | %% Initialization 32 | % 1650 sq-ft, 3 br house 33 | % Don't know about naming conventions in Ocvate/Matlab, so using nerdCaps 34 | houseToCheck = [1, 1650, 3]; 35 | 36 | %% ================ Part 1: Feature Normalization ================ 37 | 38 | fprintf('Loading data ...\n'); 39 | 40 | %% Load Data 41 | data = csvread('ex1data2.txt'); 42 | X = data(:, 1:2); 43 | y = data(:, 3); 44 | m = length(y); 45 | 46 | % Print out some data points 47 | fprintf('First 10 examples from the dataset: \n'); 48 | fprintf(' x = [%.0f %.0f], y = %.0f \n', [X(1:10,:) y(1:10,:)]'); 49 | 50 | fprintf('Program paused. Press enter to continue.\n'); 51 | pause; 52 | 53 | % Scale features and set them to zero mean 54 | fprintf('Normalizing Features ...\n'); 55 | 56 | [X mu sigma] = featureNormalize(X); 57 | 58 | % Add intercept term to X 59 | X = [ones(m, 1) X]; 60 | 61 | 62 | %% ================ Part 2: Gradient Descent ================ 63 | 64 | % ====================== YOUR CODE HERE ====================== 65 | % Instructions: We have provided you with the following starter 66 | % code that runs gradient descent with a particular 67 | % learning rate (alpha). 68 | % 69 | % Your task is to first make sure that your functions - 70 | % computeCost and gradientDescent already work with 71 | % this starter code and support multiple variables. 72 | % 73 | % After that, try running gradient descent with 74 | % different values of alpha and see which one gives 75 | % you the best result. 76 | % 77 | % Finally, you should complete the code at the end 78 | % to predict the price of a 1650 sq-ft, 3 br house. 79 | % 80 | % Hint: By using the 'hold on' command, you can plot multiple 81 | % graphs on the same figure. 82 | % 83 | % Hint: At prediction, make sure you do the same feature normalization. 84 | % 85 | 86 | fprintf('Running gradient descent ...\n'); 87 | 88 | % Choose some alpha value 89 | % TODO(SaveTheRbtz@): We should for some clever way to find an alpha instead 90 | % of manually brute forcing it. May be in gradientDescent function if we see 91 | % that thetas are increasing - devide alpha by half(just like TCP does with 92 | % it's window size when drop happends). 93 | alpha = 1; 94 | num_iters = 100; 95 | 96 | % Init Theta and Run Gradient Descent 97 | theta = zeros(3, 1); 98 | [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters); 99 | 100 | % Plot the convergence graph 101 | figure; 102 | plot(1:numel(J_history), J_history, '-b', 'LineWidth', 2); 103 | xlabel('Number of iterations'); 104 | ylabel('Cost J'); 105 | 106 | % Display gradient descent's result 107 | fprintf('Theta computed from gradient descent: \n'); 108 | fprintf(' %f \n', theta); 109 | fprintf('\n'); 110 | 111 | fprintf('Program paused. Press enter to continue.\n'); 112 | pause; 113 | 114 | % Estimate the price of a 1650 sq-ft, 3 br house 115 | % ====================== YOUR CODE HERE ====================== 116 | % Recall that the first column of X is all-ones. Thus, it does 117 | % not need to be normalized. 118 | figure; 119 | hold on; 120 | 121 | % Plot data 122 | scatter3(X(:, 2), X(:, 3), y, 'r'); 123 | 124 | xlabel('sq. feet'); 125 | ylabel('bedrooms'); 126 | zlabel('price'); 127 | 128 | % Drawing a linear regression line 129 | % Three std. dev. each direction 130 | limit = 3; 131 | m = length(y); 132 | val = linspace(-limit, limit, m)'; 133 | z_val = [ones(m, 1), val, val] * theta; 134 | plot3(val, val, z_val); 135 | 136 | legend('data', 'gradient decent') 137 | 138 | % Normalizing data 139 | % NB! The first column of X is all-ones 140 | houseToCheckNormalized = [1 ((houseToCheck(2:3) - mu) ./ sigma)] 141 | % Computing price 142 | price = houseToCheckNormalized * theta; 143 | % ============================================================ 144 | 145 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ... 146 | '(using gradient descent):\n $%f\n'], price); 147 | 148 | fprintf('Program paused. Press enter to continue.\n'); 149 | pause; 150 | 151 | %% ================ Part 3: Normal Equations ================ 152 | 153 | fprintf('Solving with normal equations...\n'); 154 | 155 | % ====================== YOUR CODE HERE ====================== 156 | % Instructions: The following code computes the closed form 157 | % solution for linear regression using the normal 158 | % equations. You should complete the code in 159 | % normalEqn.m 160 | % 161 | % After doing so, you should complete this code 162 | % to predict the price of a 1650 sq-ft, 3 br house. 163 | % 164 | 165 | %% Load Data 166 | data = csvread('ex1data2.txt'); 167 | X = data(:, 1:2); 168 | y = data(:, 3); 169 | m = length(y); 170 | 171 | % Add intercept term to X 172 | X = [ones(m, 1) X]; 173 | 174 | % Calculate the parameters from the normal equation 175 | theta = normalEqn(X, y); 176 | 177 | % Display normal equation's result 178 | fprintf('Theta computed from the normal equations: \n'); 179 | fprintf(' %f \n', theta); 180 | fprintf('\n'); 181 | 182 | % Estimate the price of a 1650 sq-ft, 3 br house 183 | % ====================== YOUR CODE HERE ====================== 184 | price = houseToCheck * theta; 185 | 186 | 187 | % ============================================================ 188 | 189 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ... 190 | '(using normal equations):\n $%f\n'], price); 191 | -------------------------------------------------------------------------------- /ex1/ex1data1.txt: -------------------------------------------------------------------------------- 1 | 6.1101,17.592 2 | 5.5277,9.1302 3 | 8.5186,13.662 4 | 7.0032,11.854 5 | 5.8598,6.8233 6 | 8.3829,11.886 7 | 7.4764,4.3483 8 | 8.5781,12 9 | 6.4862,6.5987 10 | 5.0546,3.8166 11 | 5.7107,3.2522 12 | 14.164,15.505 13 | 5.734,3.1551 14 | 8.4084,7.2258 15 | 5.6407,0.71618 16 | 5.3794,3.5129 17 | 6.3654,5.3048 18 | 5.1301,0.56077 19 | 6.4296,3.6518 20 | 7.0708,5.3893 21 | 6.1891,3.1386 22 | 20.27,21.767 23 | 5.4901,4.263 24 | 6.3261,5.1875 25 | 5.5649,3.0825 26 | 18.945,22.638 27 | 12.828,13.501 28 | 10.957,7.0467 29 | 13.176,14.692 30 | 22.203,24.147 31 | 5.2524,-1.22 32 | 6.5894,5.9966 33 | 9.2482,12.134 34 | 5.8918,1.8495 35 | 8.2111,6.5426 36 | 7.9334,4.5623 37 | 8.0959,4.1164 38 | 5.6063,3.3928 39 | 12.836,10.117 40 | 6.3534,5.4974 41 | 5.4069,0.55657 42 | 6.8825,3.9115 43 | 11.708,5.3854 44 | 5.7737,2.4406 45 | 7.8247,6.7318 46 | 7.0931,1.0463 47 | 5.0702,5.1337 48 | 5.8014,1.844 49 | 11.7,8.0043 50 | 5.5416,1.0179 51 | 7.5402,6.7504 52 | 5.3077,1.8396 53 | 7.4239,4.2885 54 | 7.6031,4.9981 55 | 6.3328,1.4233 56 | 6.3589,-1.4211 57 | 6.2742,2.4756 58 | 5.6397,4.6042 59 | 9.3102,3.9624 60 | 9.4536,5.4141 61 | 8.8254,5.1694 62 | 5.1793,-0.74279 63 | 21.279,17.929 64 | 14.908,12.054 65 | 18.959,17.054 66 | 7.2182,4.8852 67 | 8.2951,5.7442 68 | 10.236,7.7754 69 | 5.4994,1.0173 70 | 20.341,20.992 71 | 10.136,6.6799 72 | 7.3345,4.0259 73 | 6.0062,1.2784 74 | 7.2259,3.3411 75 | 5.0269,-2.6807 76 | 6.5479,0.29678 77 | 7.5386,3.8845 78 | 5.0365,5.7014 79 | 10.274,6.7526 80 | 5.1077,2.0576 81 | 5.7292,0.47953 82 | 5.1884,0.20421 83 | 6.3557,0.67861 84 | 9.7687,7.5435 85 | 6.5159,5.3436 86 | 8.5172,4.2415 87 | 9.1802,6.7981 88 | 6.002,0.92695 89 | 5.5204,0.152 90 | 5.0594,2.8214 91 | 5.7077,1.8451 92 | 7.6366,4.2959 93 | 5.8707,7.2029 94 | 5.3054,1.9869 95 | 8.2934,0.14454 96 | 13.394,9.0551 97 | 5.4369,0.61705 98 | -------------------------------------------------------------------------------- /ex1/ex1data2.txt: -------------------------------------------------------------------------------- 1 | 2104,3,399900 2 | 1600,3,329900 3 | 2400,3,369000 4 | 1416,2,232000 5 | 3000,4,539900 6 | 1985,4,299900 7 | 1534,3,314900 8 | 1427,3,198999 9 | 1380,3,212000 10 | 1494,3,242500 11 | 1940,4,239999 12 | 2000,3,347000 13 | 1890,3,329999 14 | 4478,5,699900 15 | 1268,3,259900 16 | 2300,4,449900 17 | 1320,2,299900 18 | 1236,3,199900 19 | 2609,4,499998 20 | 3031,4,599000 21 | 1767,3,252900 22 | 1888,2,255000 23 | 1604,3,242900 24 | 1962,4,259900 25 | 3890,3,573900 26 | 1100,3,249900 27 | 1458,3,464500 28 | 2526,3,469000 29 | 2200,3,475000 30 | 2637,3,299900 31 | 1839,2,349900 32 | 1000,1,169900 33 | 2040,4,314900 34 | 3137,3,579900 35 | 1811,4,285900 36 | 1437,3,249900 37 | 1239,3,229900 38 | 2132,4,345000 39 | 4215,4,549000 40 | 2162,4,287000 41 | 1664,2,368500 42 | 2238,3,329900 43 | 2567,4,314000 44 | 1200,3,299000 45 | 852,2,179900 46 | 1852,4,299900 47 | 1203,3,239500 48 | -------------------------------------------------------------------------------- /ex1/featureNormalize.m: -------------------------------------------------------------------------------- 1 | function [X_norm, mu, sigma] = featureNormalize(X) 2 | %FEATURENORMALIZE Normalizes the features in X 3 | % FEATURENORMALIZE(X) returns a normalized version of X where 4 | % the mean value of each feature is 0 and the standard deviation 5 | % is 1. This is often a good preprocessing step to do when 6 | % working with learning algorithms. 7 | 8 | % You need to set these values correctly 9 | X_norm = X; 10 | mu = zeros(1, size(X, 2)); 11 | sigma = zeros(1, size(X, 2)); 12 | 13 | % ====================== YOUR CODE HERE ====================== 14 | % Instructions: First, for each feature dimension, compute the mean 15 | % of the feature and subtract it from the dataset, 16 | % storing the mean value in mu. Next, compute the 17 | % standard deviation of each feature and divide 18 | % each feature by it's standard deviation, storing 19 | % the standard deviation in sigma. 20 | % 21 | % Note that X is a matrix where each column is a 22 | % feature and each row is an example. You need 23 | % to perform the normalization separately for 24 | % each feature. 25 | % 26 | % Hint: You might find the 'mean' and 'std' functions useful. 27 | % 28 | 29 | mu = mean(X); 30 | sigma = std(X); 31 | 32 | % bsxfun applies function element-by-element to two maticies 33 | X_norm = bsxfun(@minus, X, mu); 34 | X_norm = bsxfun(@rdivide, X_norm, sigma); 35 | % ============================================================ 36 | 37 | end 38 | -------------------------------------------------------------------------------- /ex1/gradientDescent.m: -------------------------------------------------------------------------------- 1 | function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters) 2 | %GRADIENTDESCENT Performs gradient descent to learn theta 3 | % theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by 4 | % taking num_iters gradient steps with learning rate alpha 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | J_history = zeros(num_iters, 1); 9 | 10 | for iter = 1:num_iters 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Perform a single gradient step on the parameter vector 14 | % theta. 15 | % 16 | % Hint: While debugging, it can be useful to print out the values 17 | % of the cost function (computeCost) and gradient here. 18 | % 19 | theta = theta - alpha*(1/m)*(X'*(X*theta - y)); 20 | 21 | % ============================================================ 22 | 23 | % Save the cost J in every iteration 24 | J_history(iter) = computeCost(X, y, theta); 25 | 26 | end 27 | 28 | end 29 | -------------------------------------------------------------------------------- /ex1/gradientDescentMulti.m: -------------------------------------------------------------------------------- 1 | function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters) 2 | %GRADIENTDESCENTMULTI Performs gradient descent to learn theta 3 | % theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by 4 | % taking num_iters gradient steps with learning rate alpha 5 | [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters); 6 | end 7 | -------------------------------------------------------------------------------- /ex1/normalEqn.m: -------------------------------------------------------------------------------- 1 | function [theta] = normalEqn(X, y) 2 | %NORMALEQN Computes the closed-form solution to linear regression 3 | % NORMALEQN(X,y) computes the closed-form solution to linear 4 | % regression using the normal equations. 5 | 6 | theta = zeros(size(X, 2), 1); 7 | 8 | % ====================== YOUR CODE HERE ====================== 9 | % Instructions: Complete the code to compute the closed form solution 10 | % to linear regression and put the result in theta. 11 | % 12 | 13 | % ---------------------- Sample Solution ---------------------- 14 | theta = pinv(X'*X)*X'*y; 15 | 16 | % ------------------------------------------------------------- 17 | 18 | 19 | % ============================================================ 20 | 21 | end 22 | -------------------------------------------------------------------------------- /ex1/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(x, y) 2 | %PLOTDATA Plots the data points x and y into a new figure 3 | % PLOTDATA(x,y) plots the data points and gives the figure axes labels of 4 | % population and profit. 5 | 6 | % ====================== YOUR CODE HERE ====================== 7 | % Instructions: Plot the training data into a figure using the 8 | % "figure" and "plot" commands. Set the axes labels using 9 | % the "xlabel" and "ylabel" commands. Assume the 10 | % population and revenue data have been passed in 11 | % as the x and y arguments of this function. 12 | % 13 | % Hint: You can use the 'rx' option with plot to have the markers 14 | % appear as red crosses. Furthermore, you can make the 15 | % markers larger by using plot(..., 'rx', 'MarkerSize', 10); 16 | 17 | figure; % open a new figure window 18 | 19 | plot(x, y, 'rx', 'MarkerSize', 10); 20 | xlabel('Population'); 21 | ylabel('Revenue'); 22 | 23 | % ============================================================ 24 | 25 | end 26 | -------------------------------------------------------------------------------- /ex1/warmUpExercise.m: -------------------------------------------------------------------------------- 1 | function A = warmUpExercise() 2 | %WARMUPEXERCISE Example function in octave 3 | % A = WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix 4 | 5 | A = []; 6 | % ============= YOUR CODE HERE ============== 7 | % Instructions: Return the 5x5 identity matrix 8 | % In octave, we return values by defining which variables 9 | % represent the return values (at the top of the file) 10 | % and then set them accordingly. 11 | 12 | A = eye(5); 13 | 14 | % =========================================== 15 | 16 | 17 | end 18 | -------------------------------------------------------------------------------- /ex2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex2.pdf -------------------------------------------------------------------------------- /ex2/costFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = costFunction(theta, X, y) 2 | %COSTFUNCTION Compute cost and gradient for logistic regression 3 | % J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the 4 | % parameter for logistic regression and the gradient of the cost 5 | % w.r.t. to the parameters. 6 | 7 | % Initialize some useful values 8 | m = length(y); % number of training examples 9 | 10 | % You need to return the following variables correctly 11 | J = 0; 12 | grad = zeros(size(theta)); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Compute the cost of a particular choice of theta. 16 | % You should set J to the cost. 17 | % Compute the partial derivatives and set grad to the partial 18 | % derivatives of the cost w.r.t. each parameter in theta 19 | % 20 | % Note: grad should have the same dimensions as theta 21 | % 22 | h0 = sigmoid(X*theta); 23 | 24 | J = (1/m)*sum(-y.*log(h0) - (1-y).*log(1-h0)); 25 | grad = (1/m)*(X'*(h0-y)); 26 | % ============================================================= 27 | 28 | end 29 | -------------------------------------------------------------------------------- /ex2/costFunctionReg.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = costFunctionReg(theta, X, y, lambda) 2 | %COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization 3 | % J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using 4 | % theta as the parameter for regularized logistic regression and the 5 | % gradient of the cost w.r.t. to the parameters. 6 | 7 | % Initialize some useful values 8 | m = length(y); % number of training examples 9 | 10 | % You need to return the following variables correctly 11 | J = 0; 12 | grad = zeros(size(theta)); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Compute the cost of a particular choice of theta. 16 | % You should set J to the cost. 17 | % Compute the partial derivatives and set grad to the partial 18 | % derivatives of the cost w.r.t. each parameter in theta 19 | 20 | [J, grad] = costFunction(theta, X, y); 21 | penalize = sum(theta(2:end) .^ 2); 22 | J = J + lambda/(2*m) * penalize; 23 | 24 | grad(2:end) = grad(2:end) + (lambda/m)*theta(2:end); 25 | % ============================================================= 26 | 27 | end 28 | -------------------------------------------------------------------------------- /ex2/ex2.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class - Exercise 2: Logistic Regression 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the logistic 8 | % regression exercise. You will need to complete the following functions 9 | % in this exericse: 10 | % 11 | % sigmoid.m 12 | % costFunction.m 13 | % predict.m 14 | % costFunctionReg.m 15 | % 16 | % For this exercise, you will not need to change any code in this file, 17 | % or any other files other than those mentioned above. 18 | % 19 | 20 | %% Initialization 21 | clear ; close all; clc 22 | 23 | %% Load Data 24 | % The first two columns contains the exam scores and the third column 25 | % contains the label. 26 | 27 | data = load('ex2data1.txt'); 28 | X = data(:, [1, 2]); y = data(:, 3); 29 | 30 | %% ==================== Part 1: Plotting ==================== 31 | % We start the exercise by first plotting the data to understand the 32 | % the problem we are working with. 33 | 34 | fprintf(['Plotting data with + indicating (y = 1) examples and o ' ... 35 | 'indicating (y = 0) examples.\n']); 36 | 37 | plotData(X, y); 38 | 39 | % Put some labels 40 | hold on; 41 | % Labels and Legend 42 | xlabel('Exam 1 score') 43 | ylabel('Exam 2 score') 44 | 45 | % Specified in plot order 46 | legend('Admitted', 'Not admitted') 47 | hold off; 48 | 49 | fprintf('\nProgram paused. Press enter to continue.\n'); 50 | pause; 51 | 52 | 53 | %% ============ Part 2: Compute Cost and Gradient ============ 54 | % In this part of the exercise, you will implement the cost and gradient 55 | % for logistic regression. You neeed to complete the code in 56 | % costFunction.m 57 | 58 | % Setup the data matrix appropriately, and add ones for the intercept term 59 | [m, n] = size(X); 60 | 61 | % Add intercept term to x and X_test 62 | X = [ones(m, 1) X]; 63 | 64 | % Initialize fitting parameters 65 | initial_theta = zeros(n + 1, 1); 66 | 67 | % Compute and display initial cost and gradient 68 | [cost, grad] = costFunction(initial_theta, X, y); 69 | 70 | fprintf('Cost at initial theta (zeros): %f\n', cost); 71 | fprintf('Gradient at initial theta (zeros): \n'); 72 | fprintf(' %f \n', grad); 73 | 74 | fprintf('\nProgram paused. Press enter to continue.\n'); 75 | pause; 76 | 77 | 78 | %% ============= Part 3: Optimizing using fminunc ============= 79 | % In this exercise, you will use a built-in function (fminunc) to find the 80 | % optimal parameters theta. 81 | 82 | % Set options for fminunc 83 | options = optimset('GradObj', 'on', 'MaxIter', 400); 84 | 85 | % Run fminunc to obtain the optimal theta 86 | % This function will return theta and the cost 87 | [theta, cost] = ... 88 | fminunc(@(t)(costFunction(t, X, y)), initial_theta, options); 89 | 90 | % Print theta to screen 91 | fprintf('Cost at theta found by fminunc: %f\n', cost); 92 | fprintf('theta: \n'); 93 | fprintf(' %f \n', theta); 94 | 95 | % Plot Boundary 96 | plotDecisionBoundary(theta, X, y); 97 | 98 | % Put some labels 99 | hold on; 100 | % Labels and Legend 101 | xlabel('Exam 1 score') 102 | ylabel('Exam 2 score') 103 | 104 | % Specified in plot order 105 | legend('Admitted', 'Not admitted') 106 | hold off; 107 | 108 | fprintf('\nProgram paused. Press enter to continue.\n'); 109 | pause; 110 | 111 | %% ============== Part 4: Predict and Accuracies ============== 112 | % After learning the parameters, you'll like to use it to predict the outcomes 113 | % on unseen data. In this part, you will use the logistic regression model 114 | % to predict the probability that a student with score 20 on exam 1 and 115 | % score 80 on exam 2 will be admitted. 116 | % 117 | % Furthermore, you will compute the training and test set accuracies of 118 | % our model. 119 | % 120 | % Your task is to complete the code in predict.m 121 | 122 | % Predict probability for a student with score 45 on exam 1 123 | % and score 85 on exam 2 124 | 125 | prob = sigmoid([1 45 85] * theta); 126 | fprintf(['For a student with scores 45 and 85, we predict an admission ' ... 127 | 'probability of %f\n\n'], prob); 128 | 129 | % Compute accuracy on our training set 130 | p = predict(theta, X); 131 | 132 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); 133 | 134 | fprintf('\nProgram paused. Press enter to continue.\n'); 135 | pause; 136 | 137 | -------------------------------------------------------------------------------- /ex2/ex2_reg.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class - Exercise 2: Logistic Regression 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the second part 8 | % of the exercise which covers regularization with logistic regression. 9 | % 10 | % You will need to complete the following functions in this exericse: 11 | % 12 | % sigmoid.m 13 | % costFunction.m 14 | % predict.m 15 | % costFunctionReg.m 16 | % 17 | % For this exercise, you will not need to change any code in this file, 18 | % or any other files other than those mentioned above. 19 | % 20 | 21 | %% Initialization 22 | clear ; close all; clc 23 | 24 | %% Load Data 25 | % The first two columns contains the exam scores and the third column 26 | % contains the label. 27 | 28 | data = load('ex2data2.txt'); 29 | X = data(:, [1, 2]); y = data(:, 3); 30 | 31 | plotData(X, y); 32 | 33 | % Put some labels 34 | hold on; 35 | 36 | % Labels and Legend 37 | xlabel('Microchip Test 1') 38 | ylabel('Microchip Test 2') 39 | 40 | % Specified in plot order 41 | legend('y = 1', 'y = 0') 42 | hold off; 43 | 44 | 45 | %% =========== Part 1: Regularized Logistic Regression ============ 46 | % In this part, you are given a dataset with data points that are not 47 | % linearly separable. However, you would still like to use logistic 48 | % regression to classify the data points. 49 | % 50 | % To do so, you introduce more features to use -- in particular, you add 51 | % polynomial features to our data matrix (similar to polynomial 52 | % regression). 53 | % 54 | 55 | % Add Polynomial Features 56 | 57 | % Note that mapFeature also adds a column of ones for us, so the intercept 58 | % term is handled 59 | X = mapFeature(X(:,1), X(:,2)); 60 | 61 | % Initialize fitting parameters 62 | initial_theta = zeros(size(X, 2), 1); 63 | 64 | % Set regularization parameter lambda to 1 65 | lambda = 1; 66 | 67 | % Compute and display initial cost and gradient for regularized logistic 68 | % regression 69 | [cost, grad] = costFunctionReg(initial_theta, X, y, lambda); 70 | 71 | fprintf('Cost at initial theta (zeros): %f\n', cost); 72 | 73 | fprintf('\nProgram paused. Press enter to continue.\n'); 74 | pause; 75 | 76 | %% ============= Part 2: Regularization and Accuracies ============= 77 | % Optional Exercise: 78 | % In this part, you will get to try different values of lambda and 79 | % see how regularization affects the decision coundart 80 | % 81 | % Try the following values of lambda (0, 1, 10, 100). 82 | % 83 | % How does the decision boundary change when you vary lambda? How does 84 | % the training set accuracy vary? 85 | % 86 | 87 | % Initialize fitting parameters 88 | initial_theta = zeros(size(X, 2), 1); 89 | 90 | % Set regularization parameter lambda to 1 (you should vary this) 91 | lambda = 1; 92 | 93 | % Set Options 94 | options = optimset('GradObj', 'on', 'MaxIter', 400); 95 | 96 | % Optimize 97 | [theta, J, exit_flag] = ... 98 | fminunc(@(t)(costFunctionReg(t, X, y, lambda)), initial_theta, options); 99 | 100 | % Plot Boundary 101 | plotDecisionBoundary(theta, X, y); 102 | hold on; 103 | title(sprintf('lambda = %g', lambda)) 104 | 105 | % Labels and Legend 106 | xlabel('Microchip Test 1') 107 | ylabel('Microchip Test 2') 108 | 109 | legend('y = 1', 'y = 0', 'Decision boundary') 110 | hold off; 111 | 112 | % Compute accuracy on our training set 113 | p = predict(theta, X); 114 | 115 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); 116 | 117 | pause; 118 | -------------------------------------------------------------------------------- /ex2/ex2data1.txt: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | -------------------------------------------------------------------------------- /ex2/ex2data2.txt: -------------------------------------------------------------------------------- 1 | 0.051267,0.69956,1 2 | -0.092742,0.68494,1 3 | -0.21371,0.69225,1 4 | -0.375,0.50219,1 5 | -0.51325,0.46564,1 6 | -0.52477,0.2098,1 7 | -0.39804,0.034357,1 8 | -0.30588,-0.19225,1 9 | 0.016705,-0.40424,1 10 | 0.13191,-0.51389,1 11 | 0.38537,-0.56506,1 12 | 0.52938,-0.5212,1 13 | 0.63882,-0.24342,1 14 | 0.73675,-0.18494,1 15 | 0.54666,0.48757,1 16 | 0.322,0.5826,1 17 | 0.16647,0.53874,1 18 | -0.046659,0.81652,1 19 | -0.17339,0.69956,1 20 | -0.47869,0.63377,1 21 | -0.60541,0.59722,1 22 | -0.62846,0.33406,1 23 | -0.59389,0.005117,1 24 | -0.42108,-0.27266,1 25 | -0.11578,-0.39693,1 26 | 0.20104,-0.60161,1 27 | 0.46601,-0.53582,1 28 | 0.67339,-0.53582,1 29 | -0.13882,0.54605,1 30 | -0.29435,0.77997,1 31 | -0.26555,0.96272,1 32 | -0.16187,0.8019,1 33 | -0.17339,0.64839,1 34 | -0.28283,0.47295,1 35 | -0.36348,0.31213,1 36 | -0.30012,0.027047,1 37 | -0.23675,-0.21418,1 38 | -0.06394,-0.18494,1 39 | 0.062788,-0.16301,1 40 | 0.22984,-0.41155,1 41 | 0.2932,-0.2288,1 42 | 0.48329,-0.18494,1 43 | 0.64459,-0.14108,1 44 | 0.46025,0.012427,1 45 | 0.6273,0.15863,1 46 | 0.57546,0.26827,1 47 | 0.72523,0.44371,1 48 | 0.22408,0.52412,1 49 | 0.44297,0.67032,1 50 | 0.322,0.69225,1 51 | 0.13767,0.57529,1 52 | -0.0063364,0.39985,1 53 | -0.092742,0.55336,1 54 | -0.20795,0.35599,1 55 | -0.20795,0.17325,1 56 | -0.43836,0.21711,1 57 | -0.21947,-0.016813,1 58 | -0.13882,-0.27266,1 59 | 0.18376,0.93348,0 60 | 0.22408,0.77997,0 61 | 0.29896,0.61915,0 62 | 0.50634,0.75804,0 63 | 0.61578,0.7288,0 64 | 0.60426,0.59722,0 65 | 0.76555,0.50219,0 66 | 0.92684,0.3633,0 67 | 0.82316,0.27558,0 68 | 0.96141,0.085526,0 69 | 0.93836,0.012427,0 70 | 0.86348,-0.082602,0 71 | 0.89804,-0.20687,0 72 | 0.85196,-0.36769,0 73 | 0.82892,-0.5212,0 74 | 0.79435,-0.55775,0 75 | 0.59274,-0.7405,0 76 | 0.51786,-0.5943,0 77 | 0.46601,-0.41886,0 78 | 0.35081,-0.57968,0 79 | 0.28744,-0.76974,0 80 | 0.085829,-0.75512,0 81 | 0.14919,-0.57968,0 82 | -0.13306,-0.4481,0 83 | -0.40956,-0.41155,0 84 | -0.39228,-0.25804,0 85 | -0.74366,-0.25804,0 86 | -0.69758,0.041667,0 87 | -0.75518,0.2902,0 88 | -0.69758,0.68494,0 89 | -0.4038,0.70687,0 90 | -0.38076,0.91886,0 91 | -0.50749,0.90424,0 92 | -0.54781,0.70687,0 93 | 0.10311,0.77997,0 94 | 0.057028,0.91886,0 95 | -0.10426,0.99196,0 96 | -0.081221,1.1089,0 97 | 0.28744,1.087,0 98 | 0.39689,0.82383,0 99 | 0.63882,0.88962,0 100 | 0.82316,0.66301,0 101 | 0.67339,0.64108,0 102 | 1.0709,0.10015,0 103 | -0.046659,-0.57968,0 104 | -0.23675,-0.63816,0 105 | -0.15035,-0.36769,0 106 | -0.49021,-0.3019,0 107 | -0.46717,-0.13377,0 108 | -0.28859,-0.060673,0 109 | -0.61118,-0.067982,0 110 | -0.66302,-0.21418,0 111 | -0.59965,-0.41886,0 112 | -0.72638,-0.082602,0 113 | -0.83007,0.31213,0 114 | -0.72062,0.53874,0 115 | -0.59389,0.49488,0 116 | -0.48445,0.99927,0 117 | -0.0063364,0.99927,0 118 | 0.63265,-0.030612,0 119 | -------------------------------------------------------------------------------- /ex2/mapFeature.m: -------------------------------------------------------------------------------- 1 | function out = mapFeature(X1, X2) 2 | % MAPFEATURE Feature mapping function to polynomial features 3 | % 4 | % MAPFEATURE(X1, X2) maps the two input features 5 | % to quadratic features used in the regularization exercise. 6 | % 7 | % Returns a new feature array with more features, comprising of 8 | % X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc.. 9 | % 10 | % Inputs X1, X2 must be the same size 11 | % 12 | 13 | degree = 6; 14 | out = ones(size(X1(:,1))); 15 | for i = 1:degree 16 | for j = 0:i 17 | out(:, end+1) = (X1.^(i-j)).*(X2.^j); 18 | end 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /ex2/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(X, y) 2 | %PLOTDATA Plots the data points X and y into a new figure 3 | % PLOTDATA(x,y) plots the data points with + for the positive examples 4 | % and o for the negative examples. X is assumed to be a Mx2 matrix. 5 | 6 | % Create New Figure 7 | figure; hold on; 8 | 9 | % ====================== YOUR CODE HERE ====================== 10 | % Instructions: Plot the positive and negative examples on a 11 | % 2D plot, using the option 'k+' for the positive 12 | % examples and 'ko' for the negative examples. 13 | % 14 | 15 | negative = find(y==0); positive = find(y==1); 16 | plot(X(positive, 1), X(positive, 2), 'k+') 17 | plot(X(negative, 1), X(negative, 2), 'ko') 18 | 19 | % ========================================================================= 20 | 21 | 22 | 23 | hold off; 24 | 25 | end 26 | -------------------------------------------------------------------------------- /ex2/plotDecisionBoundary.m: -------------------------------------------------------------------------------- 1 | function plotDecisionBoundary(theta, X, y) 2 | %PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with 3 | %the decision boundary defined by theta 4 | % PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the 5 | % positive examples and o for the negative examples. X is assumed to be 6 | % a either 7 | % 1) Mx3 matrix, where the first column is an all-ones column for the 8 | % intercept. 9 | % 2) MxN, N>3 matrix, where the first column is all-ones 10 | 11 | % Plot Data 12 | plotData(X(:,2:3), y); 13 | hold on 14 | 15 | if size(X, 2) <= 3 16 | % Only need 2 points to define a line, so choose two endpoints 17 | plot_x = [min(X(:,2))-2, max(X(:,2))+2]; 18 | 19 | % Calculate the decision boundary line 20 | plot_y = (-1./theta(3)).*(theta(2).*plot_x + theta(1)); 21 | 22 | % Plot, and adjust axes for better viewing 23 | plot(plot_x, plot_y) 24 | 25 | % Legend, specific for the exercise 26 | legend('Admitted', 'Not admitted', 'Decision Boundary') 27 | axis([30, 100, 30, 100]) 28 | else 29 | % Here is the grid range 30 | u = linspace(-1, 1.5, 50); 31 | v = linspace(-1, 1.5, 50); 32 | 33 | z = zeros(length(u), length(v)); 34 | % Evaluate z = theta*x over the grid 35 | for i = 1:length(u) 36 | for j = 1:length(v) 37 | z(i,j) = mapFeature(u(i), v(j))*theta; 38 | end 39 | end 40 | z = z'; % important to transpose z before calling contour 41 | 42 | % Plot z = 0 43 | % Notice you need to specify the range [0, 0] 44 | contour(u, v, z, [0, 0], 'LineWidth', 2) 45 | end 46 | hold off 47 | 48 | end 49 | -------------------------------------------------------------------------------- /ex2/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(theta, X) 2 | %PREDICT Predict whether the label is 0 or 1 using learned logistic 3 | %regression parameters theta 4 | % p = PREDICT(theta, X) computes the predictions for X using a 5 | % threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1) 6 | 7 | m = size(X, 1); % Number of training examples 8 | 9 | % You need to return the following variables correctly 10 | p = zeros(m, 1); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Complete the following code to make predictions using 14 | % your learned logistic regression parameters. 15 | % You should set p to a vector of 0's and 1's 16 | % 17 | 18 | p = round(sigmoid(X*theta)); 19 | % ========================================================================= 20 | 21 | 22 | end 23 | -------------------------------------------------------------------------------- /ex2/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid functoon 3 | % J = SIGMOID(z) computes the sigmoid of z. 4 | 5 | % You need to return the following variables correctly 6 | g = zeros(size(z)); 7 | 8 | % ====================== YOUR CODE HERE ====================== 9 | % Instructions: Compute the sigmoid of each value of z (z can be a matrix, 10 | % vector or scalar). 11 | g = 1 ./ (1 + e .^ -z); 12 | 13 | % ============================================================= 14 | 15 | end 16 | -------------------------------------------------------------------------------- /ex3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex3.pdf -------------------------------------------------------------------------------- /ex3/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /ex3/ex3.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all 3 | 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % linear exercise. You will need to complete the following functions 9 | % in this exericse: 10 | % 11 | % lrCostFunction.m (logistic regression cost function) 12 | % oneVsAll.m 13 | % predictOneVsAll.m 14 | % predict.m 15 | % 16 | % For this exercise, you will not need to change any code in this file, 17 | % or any other files other than those mentioned above. 18 | % 19 | 20 | %% Initialization 21 | clear ; close all; clc 22 | 23 | %% Setup the parameters you will use for this part of the exercise 24 | input_layer_size = 400; % 20x20 Input Images of Digits 25 | num_labels = 10; % 10 labels, from 1 to 10 26 | % (note that we have mapped "0" to label 10) 27 | 28 | %% =========== Part 1: Loading and Visualizing Data ============= 29 | % We start the exercise by first loading and visualizing the dataset. 30 | % You will be working with a dataset that contains handwritten digits. 31 | % 32 | 33 | % Load Training Data 34 | fprintf('Loading and Visualizing Data ...\n') 35 | 36 | load('ex3data1.mat'); % training data stored in arrays X, y 37 | m = size(X, 1); 38 | 39 | % Randomly select 100 data points to display 40 | rand_indices = randperm(m); 41 | sel = X(rand_indices(1:100), :); 42 | 43 | displayData(sel); 44 | 45 | fprintf('Program paused. Press enter to continue.\n'); 46 | pause; 47 | 48 | %% ============ Part 2: Vectorize Logistic Regression ============ 49 | % In this part of the exercise, you will reuse your logistic regression 50 | % code from the last exercise. You task here is to make sure that your 51 | % regularized logistic regression implementation is vectorized. After 52 | % that, you will implement one-vs-all classification for the handwritten 53 | % digit dataset. 54 | % 55 | 56 | fprintf('\nTraining One-vs-All Logistic Regression...\n') 57 | 58 | lambda = 0.1; 59 | [all_theta] = oneVsAll(X, y, num_labels, lambda); 60 | 61 | fprintf('Program paused. Press enter to continue.\n'); 62 | pause; 63 | 64 | 65 | %% ================ Part 3: Predict for One-Vs-All ================ 66 | % After ... 67 | pred = predictOneVsAll(all_theta, X); 68 | 69 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 70 | 71 | pause; 72 | -------------------------------------------------------------------------------- /ex3/ex3_nn.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks 3 | 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % linear exercise. You will need to complete the following functions 9 | % in this exericse: 10 | % 11 | % lrCostFunction.m (logistic regression cost function) 12 | % oneVsAll.m 13 | % predictOneVsAll.m 14 | % predict.m 15 | % 16 | % For this exercise, you will not need to change any code in this file, 17 | % or any other files other than those mentioned above. 18 | % 19 | 20 | %% Initialization 21 | clear ; close all; clc 22 | 23 | %% Setup the parameters you will use for this exercise 24 | input_layer_size = 400; % 20x20 Input Images of Digits 25 | hidden_layer_size = 25; % 25 hidden units 26 | num_labels = 10; % 10 labels, from 1 to 10 27 | % (note that we have mapped "0" to label 10) 28 | 29 | %% =========== Part 1: Loading and Visualizing Data ============= 30 | % We start the exercise by first loading and visualizing the dataset. 31 | % You will be working with a dataset that contains handwritten digits. 32 | % 33 | 34 | % Load Training Data 35 | fprintf('Loading and Visualizing Data ...\n') 36 | 37 | load('ex3data1.mat'); 38 | m = size(X, 1); 39 | 40 | % Randomly select 100 data points to display 41 | sel = randperm(size(X, 1)); 42 | sel = sel(1:100); 43 | 44 | displayData(X(sel, :)); 45 | 46 | fprintf('Program paused. Press enter to continue.\n'); 47 | pause; 48 | 49 | %% ================ Part 2: Loading Pameters ================ 50 | % In this part of the exercise, we load some pre-initialized 51 | % neural network parameters. 52 | 53 | fprintf('\nLoading Saved Neural Network Parameters ...\n') 54 | 55 | % Load the weights into variables Theta1 and Theta2 56 | load('ex3weights.mat'); 57 | 58 | %% ================= Part 3: Implement Predict ================= 59 | % After training the neural network, we would like to use it to predict 60 | % the labels. You will now implement the "predict" function to use the 61 | % neural network to predict the labels of the training set. This lets 62 | % you compute the training set accuracy. 63 | 64 | pred = predict(Theta1, Theta2, X); 65 | 66 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 67 | 68 | fprintf('Program paused. Press enter to continue.\n'); 69 | pause; 70 | 71 | % To give you an idea of the network's output, you can also run 72 | % through the examples one at the a time to see what it is predicting. 73 | 74 | % Randomly permute examples 75 | rp = randperm(m); 76 | 77 | for i = 1:m 78 | % Display 79 | fprintf('\nDisplaying Example Image\n'); 80 | displayData(X(rp(i), :)); 81 | 82 | pred = predict(Theta1, Theta2, X(rp(i),:)); 83 | fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10)); 84 | 85 | % Pause 86 | fprintf('Program paused. Press enter to continue.\n'); 87 | pause; 88 | end 89 | 90 | -------------------------------------------------------------------------------- /ex3/ex3data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex3/ex3data1.mat -------------------------------------------------------------------------------- /ex3/ex3weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex3/ex3weights.mat -------------------------------------------------------------------------------- /ex3/fmincg.m: -------------------------------------------------------------------------------- 1 | function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) 2 | % Minimize a continuous differentialble multivariate function. Starting point 3 | % is given by "X" (D by 1), and the function named in the string "f", must 4 | % return a function value and a vector of partial derivatives. The Polack- 5 | % Ribiere flavour of conjugate gradients is used to compute search directions, 6 | % and a line search using quadratic and cubic polynomial approximations and the 7 | % Wolfe-Powell stopping criteria is used together with the slope ratio method 8 | % for guessing initial step sizes. Additionally a bunch of checks are made to 9 | % make sure that exploration is taking place and that extrapolation will not 10 | % be unboundedly large. The "length" gives the length of the run: if it is 11 | % positive, it gives the maximum number of line searches, if negative its 12 | % absolute gives the maximum allowed number of function evaluations. You can 13 | % (optionally) give "length" a second component, which will indicate the 14 | % reduction in function value to be expected in the first line-search (defaults 15 | % to 1.0). The function returns when either its length is up, or if no further 16 | % progress can be made (ie, we are at a minimum, or so close that due to 17 | % numerical problems, we cannot get any closer). If the function terminates 18 | % within a few iterations, it could be an indication that the function value 19 | % and derivatives are not consistent (ie, there may be a bug in the 20 | % implementation of your "f" function). The function returns the found 21 | % solution "X", a vector of function values "fX" indicating the progress made 22 | % and "i" the number of iterations (line searches or function evaluations, 23 | % depending on the sign of "length") used. 24 | % 25 | % Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) 26 | % 27 | % See also: checkgrad 28 | % 29 | % Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13 30 | % 31 | % 32 | % (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen 33 | % 34 | % Permission is granted for anyone to copy, use, or modify these 35 | % programs and accompanying documents for purposes of research or 36 | % education, provided this copyright notice is retained, and note is 37 | % made of any changes that have been made. 38 | % 39 | % These programs and documents are distributed without any warranty, 40 | % express or implied. As the programs were written for research 41 | % purposes only, they have not been tested to the degree that would be 42 | % advisable in any important application. All use of these programs is 43 | % entirely at the user's own risk. 44 | % 45 | % [ml-class] Changes Made: 46 | % 1) Function name and argument specifications 47 | % 2) Output display 48 | % 49 | 50 | % Read options 51 | if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter') 52 | length = options.MaxIter; 53 | else 54 | length = 100; 55 | end 56 | 57 | 58 | RHO = 0.01; % a bunch of constants for line searches 59 | SIG = 0.5; % RHO and SIG are the constants in the Wolfe-Powell conditions 60 | INT = 0.1; % don't reevaluate within 0.1 of the limit of the current bracket 61 | EXT = 3.0; % extrapolate maximum 3 times the current bracket 62 | MAX = 20; % max 20 function evaluations per line search 63 | RATIO = 100; % maximum allowed slope ratio 64 | 65 | argstr = ['feval(f, X']; % compose string used to call function 66 | for i = 1:(nargin - 3) 67 | argstr = [argstr, ',P', int2str(i)]; 68 | end 69 | argstr = [argstr, ')']; 70 | 71 | if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end 72 | S=['Iteration ']; 73 | 74 | i = 0; % zero the run length counter 75 | ls_failed = 0; % no previous line search has failed 76 | fX = []; 77 | [f1 df1] = eval(argstr); % get function value and gradient 78 | i = i + (length<0); % count epochs?! 79 | s = -df1; % search direction is steepest 80 | d1 = -s'*s; % this is the slope 81 | z1 = red/(1-d1); % initial step is red/(|s|+1) 82 | 83 | while i < abs(length) % while not finished 84 | i = i + (length>0); % count iterations?! 85 | 86 | X0 = X; f0 = f1; df0 = df1; % make a copy of current values 87 | X = X + z1*s; % begin line search 88 | [f2 df2] = eval(argstr); 89 | i = i + (length<0); % count epochs?! 90 | d2 = df2'*s; 91 | f3 = f1; d3 = d1; z3 = -z1; % initialize point 3 equal to point 1 92 | if length>0, M = MAX; else M = min(MAX, -length-i); end 93 | success = 0; limit = -1; % initialize quanteties 94 | while 1 95 | while ((f2 > f1+z1*RHO*d1) || (d2 > -SIG*d1)) && (M > 0) 96 | limit = z1; % tighten the bracket 97 | if f2 > f1 98 | z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3); % quadratic fit 99 | else 100 | A = 6*(f2-f3)/z3+3*(d2+d3); % cubic fit 101 | B = 3*(f3-f2)-z3*(d3+2*d2); 102 | z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A; % numerical error possible - ok! 103 | end 104 | if isnan(z2) || isinf(z2) 105 | z2 = z3/2; % if we had a numerical problem then bisect 106 | end 107 | z2 = max(min(z2, INT*z3),(1-INT)*z3); % don't accept too close to limits 108 | z1 = z1 + z2; % update the step 109 | X = X + z2*s; 110 | [f2 df2] = eval(argstr); 111 | M = M - 1; i = i + (length<0); % count epochs?! 112 | d2 = df2'*s; 113 | z3 = z3-z2; % z3 is now relative to the location of z2 114 | end 115 | if f2 > f1+z1*RHO*d1 || d2 > -SIG*d1 116 | break; % this is a failure 117 | elseif d2 > SIG*d1 118 | success = 1; break; % success 119 | elseif M == 0 120 | break; % failure 121 | end 122 | A = 6*(f2-f3)/z3+3*(d2+d3); % make cubic extrapolation 123 | B = 3*(f3-f2)-z3*(d3+2*d2); 124 | z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3)); % num. error possible - ok! 125 | if ~isreal(z2) || isnan(z2) || isinf(z2) || z2 < 0 % num prob or wrong sign? 126 | if limit < -0.5 % if we have no upper limit 127 | z2 = z1 * (EXT-1); % the extrapolate the maximum amount 128 | else 129 | z2 = (limit-z1)/2; % otherwise bisect 130 | end 131 | elseif (limit > -0.5) && (z2+z1 > limit) % extraplation beyond max? 132 | z2 = (limit-z1)/2; % bisect 133 | elseif (limit < -0.5) && (z2+z1 > z1*EXT) % extrapolation beyond limit 134 | z2 = z1*(EXT-1.0); % set to extrapolation limit 135 | elseif z2 < -z3*INT 136 | z2 = -z3*INT; 137 | elseif (limit > -0.5) && (z2 < (limit-z1)*(1.0-INT)) % too close to limit? 138 | z2 = (limit-z1)*(1.0-INT); 139 | end 140 | f3 = f2; d3 = d2; z3 = -z2; % set point 3 equal to point 2 141 | z1 = z1 + z2; X = X + z2*s; % update current estimates 142 | [f2 df2] = eval(argstr); 143 | M = M - 1; i = i + (length<0); % count epochs?! 144 | d2 = df2'*s; 145 | end % end of line search 146 | 147 | if success % if line search succeeded 148 | f1 = f2; fX = [fX' f1]'; 149 | fprintf('%s %4i | Cost: %4.6e\r', S, i, f1); 150 | s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2; % Polack-Ribiere direction 151 | tmp = df1; df1 = df2; df2 = tmp; % swap derivatives 152 | d2 = df1'*s; 153 | if d2 > 0 % new slope must be negative 154 | s = -df1; % otherwise use steepest direction 155 | d2 = -s'*s; 156 | end 157 | z1 = z1 * min(RATIO, d1/(d2-realmin)); % slope ratio but max RATIO 158 | d1 = d2; 159 | ls_failed = 0; % this line search did not fail 160 | else 161 | X = X0; f1 = f0; df1 = df0; % restore point from before failed line search 162 | if ls_failed || i > abs(length) % line search failed twice in a row 163 | break; % or we ran out of time, so we give up 164 | end 165 | tmp = df1; df1 = df2; df2 = tmp; % swap derivatives 166 | s = -df1; % try steepest 167 | d1 = -s'*s; 168 | z1 = 1/(1-d1); 169 | ls_failed = 1; % this line search failed 170 | end 171 | if exist('OCTAVE_VERSION') 172 | fflush(stdout); 173 | end 174 | end 175 | fprintf('\n'); 176 | -------------------------------------------------------------------------------- /ex3/lrCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = lrCostFunction(theta, X, y, lambda) 2 | %LRCOSTFUNCTION Compute cost and gradient for logistic regression with 3 | %regularization 4 | % J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using 5 | % theta as the parameter for regularized logistic regression and the 6 | % gradient of the cost w.r.t. to the parameters. 7 | 8 | % Initialize some useful values 9 | m = length(y); % number of training examples 10 | 11 | % You need to return the following variables correctly 12 | J = 0; 13 | grad = zeros(size(theta)); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Instructions: Compute the cost of a particular choice of theta. 17 | % You should set J to the cost. 18 | % Compute the partial derivatives and set grad to the partial 19 | % derivatives of the cost w.r.t. each parameter in theta 20 | % 21 | % Hint: The computation of the cost function and gradients can be 22 | % efficiently vectorized. For example, consider the computation 23 | % 24 | % sigmoid(X * theta) 25 | % 26 | % Each row of the resulting matrix will contain the value of the 27 | % prediction for that example. You can make use of this to vectorize 28 | % the cost function and gradient computations. 29 | % 30 | % Hint: When computing the gradient of the regularized cost function, 31 | % there're many possible vectorized solutions, but one solution 32 | % looks like: 33 | % grad = (unregularized gradient for logistic regression) 34 | % temp = theta; 35 | % temp(1) = 0; % because we don't add anything for j = 0 36 | % grad = grad + YOUR_CODE_HERE (using the temp variable) 37 | % 38 | 39 | addpath('../ex2'); 40 | [J, grad] = costFunctionReg(theta, X, y, lambda); 41 | 42 | % ============================================================= 43 | 44 | end 45 | -------------------------------------------------------------------------------- /ex3/oneVsAll.m: -------------------------------------------------------------------------------- 1 | function [all_theta] = oneVsAll(X, y, num_labels, lambda) 2 | %ONEVSALL trains multiple logistic regression classifiers and returns all 3 | %the classifiers in a matrix all_theta, where the i-th row of all_theta 4 | %corresponds to the classifier for label i 5 | % [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels 6 | % logisitc regression classifiers and returns each of these classifiers 7 | % in a matrix all_theta, where the i-th row of all_theta corresponds 8 | % to the classifier for label i 9 | 10 | % Some useful variables 11 | m = size(X, 1); 12 | n = size(X, 2); 13 | 14 | % You need to return the following variables correctly 15 | all_theta = zeros(num_labels, n + 1); 16 | 17 | % Add ones to the X data matrix 18 | X = [ones(m, 1) X]; 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: You should complete the following code to train num_labels 22 | % logistic regression classifiers with regularization 23 | % parameter lambda. 24 | % 25 | % Hint: theta(:) will return a column vector. 26 | % 27 | % Hint: You can use y == c to obtain a vector of 1's and 0's that tell use 28 | % whether the ground truth is true/false for this class. 29 | % 30 | % Note: For this assignment, we recommend using fmincg to optimize the cost 31 | % function. It is okay to use a for-loop (for c = 1:num_labels) to 32 | % loop over the different classes. 33 | % 34 | % fmincg works similarly to fminunc, but is more efficient when we 35 | % are dealing with large number of parameters. 36 | % 37 | % Example Code for fmincg: 38 | % 39 | % % Set Initial theta 40 | % initial_theta = zeros(n + 1, 1); 41 | % 42 | % % Set options for fminunc 43 | % options = optimset('GradObj', 'on', 'MaxIter', 50); 44 | % 45 | % % Run fmincg to obtain the optimal theta 46 | % % This function will return theta and the cost 47 | % [theta] = ... 48 | % fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ... 49 | % initial_theta, options); 50 | % 51 | 52 | for c = 1:num_labels 53 | options = optimset('GradObj', 'on', 'MaxIter', 50); 54 | 55 | all_theta(c, :) = fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ... 56 | zeros(n + 1, 1), options); 57 | 58 | % ========================================================================= 59 | 60 | 61 | end 62 | -------------------------------------------------------------------------------- /ex3/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(Theta1, Theta2, X) 2 | %PREDICT Predict the label of an input given a trained neural network 3 | % p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the 4 | % trained weights of a neural network (Theta1, Theta2) 5 | 6 | % Useful values 7 | m = size(X, 1); 8 | num_labels = size(Theta2, 1); 9 | 10 | % You need to return the following variables correctly 11 | p = zeros(m, 1); 12 | 13 | % ====================== YOUR CODE HERE ====================== 14 | % Instructions: Complete the following code to make predictions using 15 | % your learned neural network. You should set p to a 16 | % vector containing labels between 1 to num_labels. 17 | % 18 | % Hint: The max function might come in useful. In particular, the max 19 | % function can also return the index of the max element, for more 20 | % information see 'help max'. If your examples are in rows, then, you 21 | % can use max(A, [], 2) to obtain the max for each row. 22 | % 23 | 24 | A1 = [ones(1, m); X']; 25 | A2 = [ones(1, m); sigmoid(Theta1*A1)]; 26 | A3 = sigmoid(Theta2*A2); 27 | [value, p] = max(A3', [], 2); 28 | 29 | % ========================================================================= 30 | 31 | 32 | end 33 | -------------------------------------------------------------------------------- /ex3/predictOneVsAll.m: -------------------------------------------------------------------------------- 1 | function p = predictOneVsAll(all_theta, X) 2 | %PREDICT Predict the label for a trained one-vs-all classifier. The labels 3 | %are in the range 1..K, where K = size(all_theta, 1). 4 | % p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions 5 | % for each example in the matrix X. Note that X contains the examples in 6 | % rows. all_theta is a matrix where the i-th row is a trained logistic 7 | % regression theta vector for the i-th class. You should set p to a vector 8 | % of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2 9 | % for 4 examples) 10 | 11 | m = size(X, 1); 12 | num_labels = size(all_theta, 1); 13 | 14 | % You need to return the following variables correctly 15 | p = zeros(size(X, 1), 1); 16 | 17 | % Add ones to the X data matrix 18 | X = [ones(m, 1) X]; 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: Complete the following code to make predictions using 22 | % your learned logistic regression parameters (one-vs-all). 23 | % You should set p to a vector of predictions (from 1 to 24 | % num_labels). 25 | % 26 | % Hint: This code can be done all vectorized using the max function. 27 | % In particular, the max function can also return the index of the 28 | % max element, for more information see 'help max'. If your examples 29 | % are in rows, then, you can use max(A, [], 2) to obtain the max 30 | % for each row. 31 | % 32 | 33 | [value, p] = max((X*all_theta'), [], 2); 34 | 35 | % ========================================================================= 36 | 37 | 38 | end 39 | -------------------------------------------------------------------------------- /ex3/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid functoon 3 | % J = SIGMOID(z) computes the sigmoid of z. 4 | 5 | g = 1.0 ./ (1.0 + exp(-z)); 6 | end 7 | -------------------------------------------------------------------------------- /ex4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex4.pdf -------------------------------------------------------------------------------- /ex4/checkNNGradients.m: -------------------------------------------------------------------------------- 1 | function checkNNGradients(lambda) 2 | %CHECKNNGRADIENTS Creates a small neural network to check the 3 | %backpropagation gradients 4 | % CHECKNNGRADIENTS(lambda) Creates a small neural network to check the 5 | % backpropagation gradients, it will output the analytical gradients 6 | % produced by your backprop code and the numerical gradients (computed 7 | % using computeNumericalGradient). These two gradient computations should 8 | % result in very similar values. 9 | % 10 | 11 | if ~exist('lambda', 'var') || isempty(lambda) 12 | lambda = 0; 13 | end 14 | 15 | input_layer_size = 3; 16 | hidden_layer_size = 5; 17 | num_labels = 3; 18 | m = 5; 19 | 20 | % We generate some 'random' test data 21 | Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size); 22 | Theta2 = debugInitializeWeights(num_labels, hidden_layer_size); 23 | % Reusing debugInitializeWeights to generate X 24 | X = debugInitializeWeights(m, input_layer_size - 1); 25 | y = 1 + mod(1:m, num_labels)'; 26 | 27 | % Unroll parameters 28 | nn_params = [Theta1(:) ; Theta2(:)]; 29 | 30 | % Short hand for cost function 31 | costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ... 32 | num_labels, X, y, lambda); 33 | 34 | [cost, grad] = costFunc(nn_params); 35 | numgrad = computeNumericalGradient(costFunc, nn_params); 36 | 37 | % Visually examine the two gradient computations. The two columns 38 | % you get should be very similar. 39 | disp([numgrad grad]); 40 | fprintf(['The above two columns you get should be very similar.\n' ... 41 | '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']); 42 | 43 | % Evaluate the norm of the difference between two solutions. 44 | % If you have a correct implementation, and assuming you used EPSILON = 0.0001 45 | % in computeNumericalGradient.m, then diff below should be less than 1e-9 46 | diff = norm(numgrad-grad)/norm(numgrad+grad); 47 | 48 | fprintf(['If your backpropagation implementation is correct, then \n' ... 49 | 'the relative difference will be small (less than 1e-9). \n' ... 50 | '\nRelative Difference: %g\n'], diff); 51 | 52 | end 53 | -------------------------------------------------------------------------------- /ex4/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" 3 | %and gives us a numerical estimate of the gradient. 4 | % numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical 5 | % gradient of the function J around theta. Calling y = J(theta) should 6 | % return the function value at theta. 7 | 8 | % Notes: The following code implements numerical gradient checking, and 9 | % returns the numerical gradient.It sets numgrad(i) to (a numerical 10 | % approximation of) the partial derivative of J with respect to the 11 | % i-th input argument, evaluated at theta. (i.e., numgrad(i) should 12 | % be the (approximately) the partial derivative of J with respect 13 | % to theta(i).) 14 | % 15 | 16 | numgrad = zeros(size(theta)); 17 | perturb = zeros(size(theta)); 18 | e = 1e-4; 19 | for p = 1:numel(theta) 20 | % Set perturbation vector 21 | perturb(p) = e; 22 | loss1 = J(theta - perturb); 23 | loss2 = J(theta + perturb); 24 | % Compute Numerical Gradient 25 | numgrad(p) = (loss2 - loss1) / (2*e); 26 | perturb(p) = 0; 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /ex4/debugInitializeWeights.m: -------------------------------------------------------------------------------- 1 | function W = debugInitializeWeights(fan_out, fan_in) 2 | %DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in 3 | %incoming connections and fan_out outgoing connections using a fixed 4 | %strategy, this will help you later in debugging 5 | % W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights 6 | % of a layer with fan_in incoming connections and fan_out outgoing 7 | % connections using a fix set of values 8 | % 9 | % Note that W should be set to a matrix of size(1 + fan_in, fan_out) as 10 | % the first row of W handles the "bias" terms 11 | % 12 | 13 | % Set W to zeros 14 | W = zeros(fan_out, 1 + fan_in); 15 | 16 | % Initialize W using "sin", this ensures that W is always of the same 17 | % values and will be useful for debugging 18 | W = reshape(sin(1:numel(W)), size(W)) / 10; 19 | 20 | % ========================================================================= 21 | 22 | end 23 | -------------------------------------------------------------------------------- /ex4/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /ex4/ex4.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class - Exercise 4 Neural Network Learning 3 | 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % linear exercise. You will need to complete the following functions 9 | % in this exericse: 10 | % 11 | % sigmoidGradient.m 12 | % randInitializeWeights.m 13 | % nnCostFunction.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% Setup the parameters you will use for this exercise 23 | input_layer_size = 400; % 20x20 Input Images of Digits 24 | hidden_layer_size = 25; % 25 hidden units 25 | num_labels = 10; % 10 labels, from 1 to 10 26 | % (note that we have mapped "0" to label 10) 27 | 28 | %% =========== Part 1: Loading and Visualizing Data ============= 29 | % We start the exercise by first loading and visualizing the dataset. 30 | % You will be working with a dataset that contains handwritten digits. 31 | % 32 | 33 | % Load Training Data 34 | fprintf('Loading and Visualizing Data ...\n') 35 | 36 | load('ex4data1.mat'); 37 | m = size(X, 1); 38 | 39 | % Randomly select 100 data points to display 40 | sel = randperm(size(X, 1)); 41 | sel = sel(1:100); 42 | 43 | displayData(X(sel, :)); 44 | 45 | fprintf('Program paused. Press enter to continue.\n'); 46 | pause; 47 | 48 | 49 | %% ================ Part 2: Loading Pameters ================ 50 | % In this part of the exercise, we load some pre-initialized 51 | % neural network parameters. 52 | 53 | fprintf('\nLoading Saved Neural Network Parameters ...\n') 54 | 55 | % Load the weights into variables Theta1 and Theta2 56 | load('ex4weights.mat'); 57 | 58 | % Unroll parameters 59 | nn_params = [Theta1(:) ; Theta2(:)]; 60 | 61 | %% ================ Part 3: Compute Cost (Feedforward) ================ 62 | % To the neural network, you should first start by implementing the 63 | % feedforward part of the neural network that returns the cost only. You 64 | % should complete the code in nnCostFunction.m to return cost. After 65 | % implementing the feedforward to compute the cost, you can verify that 66 | % your implementation is correct by verifying that you get the same cost 67 | % as us for the fixed debugging parameters. 68 | % 69 | % We suggest implementing the feedforward cost *without* regularization 70 | % first so that it will be easier for you to debug. Later, in part 4, you 71 | % will get to implement the regularized cost. 72 | % 73 | fprintf('\nFeedforward Using Neural Network ...\n') 74 | 75 | % Weight regularization parameter (we set this to 0 here). 76 | lambda = 0; 77 | 78 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... 79 | num_labels, X, y, lambda); 80 | 81 | fprintf(['Cost at parameters (loaded from ex4weights): %f '... 82 | '\n(this value should be about 0.287629)\n'], J); 83 | 84 | fprintf('\nProgram paused. Press enter to continue.\n'); 85 | pause; 86 | 87 | %% =============== Part 4: Implement Regularization =============== 88 | % Once your cost function implementation is correct, you should now 89 | % continue to implement the regularization with the cost. 90 | % 91 | 92 | fprintf('\nChecking Cost Function (w/ Regularization) ... \n') 93 | 94 | % Weight regularization parameter (we set this to 1 here). 95 | lambda = 1; 96 | 97 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... 98 | num_labels, X, y, lambda); 99 | 100 | fprintf(['Cost at parameters (loaded from ex4weights): %f '... 101 | '\n(this value should be about 0.383770)\n'], J); 102 | 103 | fprintf('Program paused. Press enter to continue.\n'); 104 | pause; 105 | 106 | 107 | %% ================ Part 5: Sigmoid Gradient ================ 108 | % Before you start implementing the neural network, you will first 109 | % implement the gradient for the sigmoid function. You should complete the 110 | % code in the sigmoidGradient.m file. 111 | % 112 | 113 | fprintf('\nEvaluating sigmoid gradient...\n') 114 | 115 | g = sigmoidGradient([1 -0.5 0 0.5 1]); 116 | fprintf('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n '); 117 | fprintf('%f ', g); 118 | fprintf('\n\n'); 119 | 120 | fprintf('Program paused. Press enter to continue.\n'); 121 | pause; 122 | 123 | 124 | %% ================ Part 6: Initializing Pameters ================ 125 | % In this part of the exercise, you will be starting to implment a two 126 | % layer neural network that classifies digits. You will start by 127 | % implementing a function to initialize the weights of the neural network 128 | % (randInitializeWeights.m) 129 | 130 | fprintf('\nInitializing Neural Network Parameters ...\n') 131 | 132 | initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size); 133 | initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels); 134 | 135 | % Unroll parameters 136 | initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)]; 137 | 138 | 139 | %% =============== Part 7: Implement Backpropagation =============== 140 | % Once your cost matches up with ours, you should proceed to implement the 141 | % backpropagation algorithm for the neural network. You should add to the 142 | % code you've written in nnCostFunction.m to return the partial 143 | % derivatives of the parameters. 144 | % 145 | fprintf('\nChecking Backpropagation... \n'); 146 | 147 | % Check gradients by running checkNNGradients 148 | checkNNGradients; 149 | 150 | fprintf('\nProgram paused. Press enter to continue.\n'); 151 | pause; 152 | 153 | 154 | %% =============== Part 8: Implement Regularization =============== 155 | % Once your backpropagation implementation is correct, you should now 156 | % continue to implement the regularization with the cost and gradient. 157 | % 158 | 159 | fprintf('\nChecking Backpropagation (w/ Regularization) ... \n') 160 | 161 | % Check gradients by running checkNNGradients 162 | lambda = 3; 163 | checkNNGradients(lambda); 164 | 165 | % Also output the costFunction debugging values 166 | debug_J = nnCostFunction(nn_params, input_layer_size, ... 167 | hidden_layer_size, num_labels, X, y, lambda); 168 | 169 | fprintf(['\n\nCost at (fixed) debugging parameters (w/ lambda = 10): %f ' ... 170 | '\n(this value should be about 0.576051)\n\n'], debug_J); 171 | 172 | fprintf('Program paused. Press enter to continue.\n'); 173 | pause; 174 | 175 | 176 | %% =================== Part 8: Training NN =================== 177 | % You have now implemented all the code necessary to train a neural 178 | % network. To train your neural network, we will now use "fmincg", which 179 | % is a function which works similarly to "fminunc". Recall that these 180 | % advanced optimizers are able to train our cost functions efficiently as 181 | % long as we provide them with the gradient computations. 182 | % 183 | fprintf('\nTraining Neural Network... \n') 184 | 185 | % After you have completed the assignment, change the MaxIter to a larger 186 | % value to see how more training helps. 187 | options = optimset('MaxIter', 400); 188 | 189 | % You should also try different values of lambda 190 | lambda = 1; 191 | 192 | % Create "short hand" for the cost function to be minimized 193 | costFunction = @(p) nnCostFunction(p, ... 194 | input_layer_size, ... 195 | hidden_layer_size, ... 196 | num_labels, X, y, lambda); 197 | 198 | % Now, costFunction is a function that takes in only one argument (the 199 | % neural network parameters) 200 | [nn_params, cost] = fmincg(costFunction, initial_nn_params, options); 201 | 202 | % Obtain Theta1 and Theta2 back from nn_params 203 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... 204 | hidden_layer_size, (input_layer_size + 1)); 205 | 206 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... 207 | num_labels, (hidden_layer_size + 1)); 208 | 209 | fprintf('Program paused. Press enter to continue.\n'); 210 | pause; 211 | 212 | 213 | %% ================= Part 9: Visualize Weights ================= 214 | % You can now "visualize" what the neural network is learning by 215 | % displaying the hidden units to see what features they are capturing in 216 | % the data. 217 | 218 | fprintf('\nVisualizing Neural Network... \n') 219 | 220 | displayData(Theta1(:, 2:end)); 221 | 222 | fprintf('\nProgram paused. Press enter to continue.\n'); 223 | pause; 224 | 225 | %% ================= Part 10: Implement Predict ================= 226 | % After training the neural network, we would like to use it to predict 227 | % the labels. You will now implement the "predict" function to use the 228 | % neural network to predict the labels of the training set. This lets 229 | % you compute the training set accuracy. 230 | 231 | pred = predict(Theta1, Theta2, X); 232 | 233 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 234 | 235 | 236 | -------------------------------------------------------------------------------- /ex4/ex4data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex4/ex4data1.mat -------------------------------------------------------------------------------- /ex4/ex4weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex4/ex4weights.mat -------------------------------------------------------------------------------- /ex4/fmincg.m: -------------------------------------------------------------------------------- 1 | ../ex3/fmincg.m -------------------------------------------------------------------------------- /ex4/nnCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J grad] = nnCostFunction(nn_params, ... 2 | input_layer_size, ... 3 | hidden_layer_size, ... 4 | num_labels, ... 5 | X, y, lambda) 6 | %NNCOSTFUNCTION Implements the neural network cost function for a two layer 7 | %neural network which performs classification 8 | % [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ... 9 | % X, y, lambda) computes the cost and gradient of the neural network. The 10 | % parameters for the neural network are "unrolled" into the vector 11 | % nn_params and need to be converted back into the weight matrices. 12 | % 13 | % The returned parameter grad should be a "unrolled" vector of the 14 | % partial derivatives of the neural network. 15 | % 16 | 17 | % Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices 18 | % for our 2 layer neural network 19 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... 20 | hidden_layer_size, (input_layer_size + 1)); 21 | 22 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... 23 | num_labels, (hidden_layer_size + 1)); 24 | 25 | % Setup some useful variables 26 | m = size(X, 1); 27 | 28 | % You need to return the following variables correctly 29 | J = 0; 30 | Theta1_grad = zeros(size(Theta1)); 31 | Theta2_grad = zeros(size(Theta2)); 32 | 33 | % ====================== YOUR CODE HERE ====================== 34 | % Instructions: You should complete the code by working through the 35 | % following parts. 36 | % 37 | % Part 1: Feedforward the neural network and return the cost in the 38 | % variable J. After implementing Part 1, you can verify that your 39 | % cost function computation is correct by verifying the cost 40 | % computed in ex4.m 41 | % 42 | % Part 2: Implement the backpropagation algorithm to compute the gradients 43 | % Theta1_grad and Theta2_grad. You should return the partial derivatives of 44 | % the cost function with respect to Theta1 and Theta2 in Theta1_grad and 45 | % Theta2_grad, respectively. After implementing Part 2, you can check 46 | % that your implementation is correct by running checkNNGradients 47 | % 48 | % Note: The vector y passed into the function is a vector of labels 49 | % containing values from 1..K. You need to map this vector into a 50 | % binary vector of 1's and 0's to be used with the neural network 51 | % cost function. 52 | % 53 | % Hint: We recommend implementing backpropagation using a for-loop 54 | % over the training examples if you are implementing it for the 55 | % first time. 56 | % 57 | % Part 3: Implement regularization with the cost function and gradients. 58 | % 59 | % Hint: You can implement this around the code for 60 | % backpropagation. That is, you can compute the gradients for 61 | % the regularization separately and then add them to Theta1_grad 62 | % and Theta2_grad from Part 2. 63 | % 64 | 65 | % Convert y to matrix 66 | % XXX(SaveTheRbtz@): Curious how it can be vectorized (Should reread ex3 for 67 | % logical arrays) 68 | number_of_classes = length(unique(y)); 69 | Y = zeros(number_of_classes, m); 70 | for i = 1:m 71 | Y(y(i), i) = 1; 72 | endfor 73 | 74 | % Do forward propagation 75 | % Copy/Paste from ex3 predict.m 76 | % FIXME(SaveTheRbtz@): Move to separate function 77 | A1 = [ones(1, m); X']; 78 | Z2=Theta1*A1; 79 | A2 = [ones(1, m); sigmoid(Z2)]; 80 | Z3=Theta2*A2; 81 | A3 = sigmoid(Z3); 82 | 83 | % A3 here is our h0 84 | h0 = A3; 85 | 86 | % Compute cost function 87 | % XXX(SaveTheRbtz@): Slightly modified version of ex2 costFunction 88 | J = (1/m)*sum(sum(-Y.*log(h0) - (1-Y).*log(1-h0))); 89 | 90 | % Add some regularization 91 | % XXX(SaveTheRbtz@): Also borrowed from ex2 costFunctionReg 92 | penalize = sum(sum(Theta1(:, 2:end) .^ 2)) + sum(sum(Theta2(:, 2:end) .^ 2)); 93 | J = J + (lambda/(2*m)) * penalize; 94 | 95 | % Implement backpropagation 96 | delta_3 = A3 - Y; 97 | delta_2 = (Theta2'*delta_3)(2:end, :) .* sigmoidGradient(Z2); 98 | 99 | % Calculate gradients 100 | Theta1_unreg_grad = (delta_2 * A1')/m; 101 | Theta2_unreg_grad = (delta_3 * A2')/m; 102 | 103 | % Regularize 104 | Theta1_grad = Theta1_unreg_grad + (lambda/m) * Theta1; 105 | Theta2_grad = Theta2_unreg_grad + (lambda/m) * Theta2; 106 | 107 | Theta1_grad(:, 1) = Theta1_unreg_grad(:, 1); 108 | Theta2_grad(:, 1) = Theta2_unreg_grad(:, 1); 109 | 110 | % ------------------------------------------------------------- 111 | 112 | % ========================================================================= 113 | 114 | % Unroll gradients 115 | grad = [Theta1_grad(:) ; Theta2_grad(:)]; 116 | 117 | 118 | end 119 | -------------------------------------------------------------------------------- /ex4/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(Theta1, Theta2, X) 2 | %PREDICT Predict the label of an input given a trained neural network 3 | % p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the 4 | % trained weights of a neural network (Theta1, Theta2) 5 | 6 | % Useful values 7 | m = size(X, 1); 8 | num_labels = size(Theta2, 1); 9 | 10 | % You need to return the following variables correctly 11 | p = zeros(size(X, 1), 1); 12 | 13 | h1 = sigmoid([ones(m, 1) X] * Theta1'); 14 | h2 = sigmoid([ones(m, 1) h1] * Theta2'); 15 | [dummy, p] = max(h2, [], 2); 16 | 17 | % ========================================================================= 18 | 19 | 20 | end 21 | -------------------------------------------------------------------------------- /ex4/randInitializeWeights.m: -------------------------------------------------------------------------------- 1 | function W = randInitializeWeights(L_in, L_out) 2 | %RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in 3 | %incoming connections and L_out outgoing connections 4 | % W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 5 | % of a layer with L_in incoming connections and L_out outgoing 6 | % connections. 7 | % 8 | % Note that W should be set to a matrix of size(L_out, 1 + L_in) as 9 | % the first row of W handles the "bias" terms 10 | % 11 | 12 | % You need to return the following variables correctly 13 | W = zeros(L_out, 1 + L_in); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Instructions: Initialize W randomly so that we break the symmetry while 17 | % training the neural network. 18 | % 19 | % Note: The first row of W corresponds to the parameters for the bias units 20 | % 21 | 22 | % Randomly initialize the weights to small values 23 | % XXX(SaveTheRbtz@): This one was given in text book 24 | % Book also suggests setting epsilon based on number of nodes in network: 25 | %epsilon_init = (sqrt(6)/(sqrt(L_in + L_out))); 26 | epsilon_init = 0.12; 27 | W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init; 28 | 29 | % ========================================================================= 30 | 31 | end 32 | -------------------------------------------------------------------------------- /ex4/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid functoon 3 | % J = SIGMOID(z) computes the sigmoid of z. 4 | 5 | g = 1.0 ./ (1.0 + exp(-z)); 6 | end 7 | -------------------------------------------------------------------------------- /ex4/sigmoidGradient.m: -------------------------------------------------------------------------------- 1 | function g = sigmoidGradient(z) 2 | %SIGMOIDGRADIENT returns the gradient of the sigmoid function 3 | %evaluated at z 4 | % g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function 5 | % evaluated at z. This should work regardless if z is a matrix or a 6 | % vector. In particular, if z is a vector or matrix, you should return 7 | % the gradient for each element. 8 | 9 | g = zeros(size(z)); 10 | 11 | % ====================== YOUR CODE HERE ====================== 12 | % Instructions: Compute the gradient of the sigmoid function evaluated at 13 | % each value of z (z can be a matrix, vector or scalar). 14 | 15 | g = sigmoid(z) .* (1 - sigmoid(z)); 16 | 17 | % ============================================================= 18 | 19 | 20 | 21 | 22 | end 23 | -------------------------------------------------------------------------------- /ex5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex5.pdf -------------------------------------------------------------------------------- /ex5/ex5.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class 3 | % Exercise 5 | Regularized Linear Regression and Bias-Variance 4 | % 5 | % Instructions 6 | % ------------ 7 | % 8 | % This file contains code that helps you get started on the 9 | % exercise. You will need to complete the following functions: 10 | % 11 | % linearRegCostFunction.m 12 | % learningCurve.m 13 | % validationCurve.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% =========== Part 1: Loading and Visualizing Data ============= 23 | % We start the exercise by first loading and visualizing the dataset. 24 | % The following code will load the dataset into your environment and plot 25 | % the data. 26 | % 27 | 28 | % Load Training Data 29 | fprintf('Loading and Visualizing Data ...\n') 30 | 31 | % Load from ex5data1: 32 | % You will have X, y, Xval, yval, Xtest, ytest in your environment 33 | load ('ex5data1.mat'); 34 | 35 | % m = Number of examples 36 | m = size(X, 1); 37 | 38 | % Plot training data 39 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 40 | xlabel('Change in water level (x)'); 41 | ylabel('Water flowing out of the dam (y)'); 42 | 43 | fprintf('Program paused. Press enter to continue.\n'); 44 | pause; 45 | 46 | %% =========== Part 2: Regularized Linear Regression Cost ============= 47 | % You should now implement the cost function for regularized linear 48 | % regression. 49 | % 50 | 51 | theta = [1 ; 1]; 52 | J = linearRegCostFunction([ones(m, 1) X], y, theta, 1); 53 | 54 | fprintf(['Cost at theta = [1 ; 1]: %f '... 55 | '\n(this value should be about 303.993192)\n'], J); 56 | 57 | fprintf('Program paused. Press enter to continue.\n'); 58 | pause; 59 | 60 | %% =========== Part 3: Regularized Linear Regression Gradient ============= 61 | % You should now implement the gradient for regularized linear 62 | % regression. 63 | % 64 | 65 | theta = [1 ; 1]; 66 | [J, grad] = linearRegCostFunction([ones(m, 1) X], y, theta, 1); 67 | 68 | fprintf(['Gradient at theta = [1 ; 1]: [%f; %f] '... 69 | '\n(this value should be about [-15.303016; 598.250744])\n'], ... 70 | grad(1), grad(2)); 71 | 72 | fprintf('Program paused. Press enter to continue.\n'); 73 | pause; 74 | 75 | 76 | %% =========== Part 4: Train Linear Regression ============= 77 | % Once you have implemented the cost and gradient correctly, the 78 | % trainLinearReg function will use your cost function to train 79 | % regularized linear regression. 80 | % 81 | % Write Up Note: The data is non-linear, so this will not give a great 82 | % fit. 83 | % 84 | 85 | % Train linear regression with lambda = 0 86 | lambda = 0; 87 | [theta] = trainLinearReg([ones(m, 1) X], y, lambda); 88 | 89 | % Plot fit over the data 90 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 91 | xlabel('Change in water level (x)'); 92 | ylabel('Water flowing out of the dam (y)'); 93 | hold on; 94 | plot(X, [ones(m, 1) X]*theta, '--', 'LineWidth', 2) 95 | hold off; 96 | 97 | fprintf('Program paused. Press enter to continue.\n'); 98 | pause; 99 | 100 | 101 | %% =========== Part 5: Learning Curve for Linear Regression ============= 102 | % Next, you should implement the learningCurve function. 103 | % 104 | % Write Up Note: Since the model is underfitting the data, we expect to 105 | % see a graph with "high bias" -- slide 8 in ML-advice.pdf 106 | % 107 | 108 | lambda = 0; 109 | [error_train, error_val] = ... 110 | learningCurve([ones(m, 1) X], y, ... 111 | [ones(size(Xval, 1), 1) Xval], yval, ... 112 | lambda); 113 | 114 | plot(1:m, error_train, 1:m, error_val); 115 | title('Learning curve for linear regression') 116 | legend('Train', 'Cross Validation') 117 | xlabel('Number of training examples') 118 | ylabel('Error') 119 | axis([0 13 0 150]) 120 | 121 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n'); 122 | for i = 1:m 123 | fprintf(' \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i)); 124 | end 125 | 126 | fprintf('Program paused. Press enter to continue.\n'); 127 | pause; 128 | 129 | %% =========== Part 6: Feature Mapping for Polynomial Regression ============= 130 | % One solution to this is to use polynomial regression. You should now 131 | % complete polyFeatures to map each example into its powers 132 | % 133 | 134 | p = 8; 135 | 136 | % Map X onto Polynomial Features and Normalize 137 | X_poly = polyFeatures(X, p); 138 | [X_poly, mu, sigma] = featureNormalize(X_poly); % Normalize 139 | X_poly = [ones(m, 1), X_poly]; % Add Ones 140 | 141 | % Map X_poly_test and normalize (using mu and sigma) 142 | X_poly_test = polyFeatures(Xtest, p); 143 | X_poly_test = bsxfun(@minus, X_poly_test, mu); 144 | X_poly_test = bsxfun(@rdivide, X_poly_test, sigma); 145 | X_poly_test = [ones(size(X_poly_test, 1), 1), X_poly_test]; % Add Ones 146 | 147 | % Map X_poly_val and normalize (using mu and sigma) 148 | X_poly_val = polyFeatures(Xval, p); 149 | X_poly_val = bsxfun(@minus, X_poly_val, mu); 150 | X_poly_val = bsxfun(@rdivide, X_poly_val, sigma); 151 | X_poly_val = [ones(size(X_poly_val, 1), 1), X_poly_val]; % Add Ones 152 | 153 | fprintf('Normalized Training Example 1:\n'); 154 | fprintf(' %f \n', X_poly(1, :)); 155 | 156 | fprintf('\nProgram paused. Press enter to continue.\n'); 157 | pause; 158 | 159 | 160 | 161 | %% =========== Part 7: Learning Curve for Polynomial Regression ============= 162 | % Now, you will get to experiment with polynomial regression with multiple 163 | % values of lambda. The code below runs polynomial regression with 164 | % lambda = 0. You should try running the code with different values of 165 | % lambda to see how the fit and learning curve change. 166 | % 167 | 168 | lambda = 0; 169 | [theta] = trainLinearReg(X_poly, y, lambda); 170 | 171 | % Plot training data and fit 172 | figure(1); 173 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 174 | plotFit(min(X), max(X), mu, sigma, theta, p); 175 | xlabel('Change in water level (x)'); 176 | ylabel('Water flowing out of the dam (y)'); 177 | title (sprintf('Polynomial Regression Fit (lambda = %f)', lambda)); 178 | 179 | figure(2); 180 | [error_train, error_val] = ... 181 | learningCurve(X_poly, y, X_poly_val, yval, lambda); 182 | % Should we use logarithmix scale there? 183 | plot(1:m, error_train, 1:m, error_val); 184 | 185 | title(sprintf('Polynomial Regression Learning Curve (lambda = %f)', lambda)); 186 | xlabel('Number of training examples') 187 | ylabel('Error') 188 | axis([0 13 0 100]) 189 | legend('Train', 'Cross Validation') 190 | 191 | fprintf('Polynomial Regression (lambda = %f)\n\n', lambda); 192 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n'); 193 | for i = 1:m 194 | fprintf(' \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i)); 195 | end 196 | 197 | fprintf('Program paused. Press enter to continue.\n'); 198 | pause; 199 | 200 | %% =========== Part 8: Validation for Selecting Lambda ============= 201 | % You will now implement validationCurve to test various values of 202 | % lambda on a validation set. You will then use this to select the 203 | % "best" lambda value. 204 | % 205 | 206 | [lambda_vec, error_train, error_val] = ... 207 | validationCurve(X_poly, y, X_poly_val, yval); 208 | 209 | close all; 210 | plot(lambda_vec, error_train, lambda_vec, error_val); 211 | legend('Train', 'Cross Validation'); 212 | xlabel('lambda'); 213 | ylabel('Error'); 214 | 215 | fprintf('lambda\t\tTrain Error\tValidation Error\n'); 216 | for i = 1:length(lambda_vec) 217 | fprintf(' %f\t%f\t%f\n', ... 218 | lambda_vec(i), error_train(i), error_val(i)); 219 | end 220 | 221 | fprintf('Program paused. Press enter to continue.\n'); 222 | pause; 223 | -------------------------------------------------------------------------------- /ex5/ex5data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex5/ex5data1.mat -------------------------------------------------------------------------------- /ex5/featureNormalize.m: -------------------------------------------------------------------------------- 1 | ../ex1/featureNormalize.m -------------------------------------------------------------------------------- /ex5/fmincg.m: -------------------------------------------------------------------------------- 1 | ../ex3/fmincg.m -------------------------------------------------------------------------------- /ex5/learningCurve.m: -------------------------------------------------------------------------------- 1 | function [error_train, error_val] = ... 2 | learningCurve(X, y, Xval, yval, lambda) 3 | %LEARNINGCURVE Generates the train and cross validation set errors needed 4 | %to plot a learning curve 5 | % [error_train, error_val] = ... 6 | % LEARNINGCURVE(X, y, Xval, yval, lambda) returns the train and 7 | % cross validation set errors for a learning curve. In particular, 8 | % it returns two vectors of the same length - error_train and 9 | % error_val. Then, error_train(i) contains the training error for 10 | % i examples (and similarly for error_val(i)). 11 | % 12 | % In this function, you will compute the train and test errors for 13 | % dataset sizes from 1 up to m. In practice, when working with larger 14 | % datasets, you might want to do this in larger intervals. 15 | % 16 | 17 | % Number of training examples 18 | m = size(X, 1); 19 | 20 | % You need to return these values correctly 21 | error_train = zeros(m, 1); 22 | error_val = zeros(m, 1); 23 | 24 | % ====================== YOUR CODE HERE ====================== 25 | % Instructions: Fill in this function to return training errors in 26 | % error_train and the cross validation errors in error_val. 27 | % The vector numex_vec contains the number of training 28 | % examples to use for each calculation of training error and 29 | % cross validation error, i.e, error_train(i) and 30 | % error_val(i) should give you the errors 31 | % obtained after training on i examples. 32 | % 33 | % Note: You should evaluate the training error on the first i training 34 | % examples (i.e., X(1:i, :) and y(1:i)). 35 | % 36 | % For the cross-validation error, you should instead evaluate on 37 | % the _entire_ cross validation set (Xval and yval). 38 | % 39 | % Note: If you are using your cost function (linearRegCostFunction) 40 | % to compute the training and cross validation error, you should 41 | % call the function with the lambda argument set to 0. 42 | % Do note that you will still need to use lambda when running 43 | % the training to obtain the theta parameters. 44 | % 45 | % Hint: You can loop over the examples with the following: 46 | % 47 | % for i = 1:m 48 | % % Compute train/cross validation errors using training examples 49 | % % X(1:i, :) and y(1:i), storing the result in 50 | % % error_train(i) and error_val(i) 51 | % .... 52 | % 53 | % end 54 | % 55 | 56 | % ---------------------- Sample Solution ---------------------- 57 | 58 | for i = 1:m 59 | X_trimmed = X(1:i, :); 60 | y_trimmed = y(1:i); 61 | theta = trainLinearReg(X_trimmed, y_trimmed, lambda); 62 | error_train(i) = linearRegCostFunction(X_trimmed, y_trimmed, theta, 0)(1); 63 | error_val(i) = linearRegCostFunction(Xval, yval, theta, 0)(1); 64 | 65 | % ------------------------------------------------------------- 66 | 67 | % ========================================================================= 68 | 69 | end 70 | -------------------------------------------------------------------------------- /ex5/linearRegCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = linearRegCostFunction(X, y, theta, lambda) 2 | %LINEARREGCOSTFUNCTION Compute cost and gradient for regularized linear 3 | %regression with multiple variables 4 | % [J, grad] = LINEARREGCOSTFUNCTION(X, y, theta, lambda) computes the 5 | % cost of using theta as the parameter for linear regression to fit the 6 | % data points in X and y. Returns the cost in J and the gradient in grad 7 | 8 | % Initialize some useful values 9 | m = length(y); % number of training examples 10 | 11 | % You need to return the following variables correctly 12 | J = 0; 13 | grad = zeros(size(theta)); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Instructions: Compute the cost and gradient of regularized linear 17 | % regression for a particular choice of theta. 18 | % 19 | % You should set J to the cost and grad to the gradient. 20 | % 21 | 22 | % We can reuse ex1's computeCost() but it be messier and slower 23 | h0 = X*theta; 24 | J = (sum((h0 - y) .^ 2) + lambda*sum(theta(2:end) .^ 2))/(2*m); 25 | 26 | grad = (1/m)*(X'*(h0-y)) + [0; (lambda/m)*theta(2:end)]; 27 | 28 | % ========================================================================= 29 | 30 | grad = grad(:); 31 | 32 | end 33 | -------------------------------------------------------------------------------- /ex5/plotFit.m: -------------------------------------------------------------------------------- 1 | function plotFit(min_x, max_x, mu, sigma, theta, p) 2 | %PLOTFIT Plots a learned polynomial regression fit over an existing figure. 3 | %Also works with linear regression. 4 | % PLOTFIT(min_x, max_x, mu, sigma, theta, p) plots the learned polynomial 5 | % fit with power p and feature normalization (mu, sigma). 6 | 7 | % Hold on to the current figure 8 | hold on; 9 | 10 | % We plot a range slightly bigger than the min and max values to get 11 | % an idea of how the fit will vary outside the range of the data points 12 | x = (min_x - 15: 0.05 : max_x + 25)'; 13 | 14 | % Map the X values 15 | X_poly = polyFeatures(x, p); 16 | X_poly = bsxfun(@minus, X_poly, mu); 17 | X_poly = bsxfun(@rdivide, X_poly, sigma); 18 | 19 | % Add ones 20 | X_poly = [ones(size(x, 1), 1) X_poly]; 21 | 22 | % Plot 23 | plot(x, X_poly * theta, '--', 'LineWidth', 2) 24 | 25 | % Hold off to the current figure 26 | hold off 27 | 28 | end 29 | -------------------------------------------------------------------------------- /ex5/polyFeatures.m: -------------------------------------------------------------------------------- 1 | function [X_poly] = polyFeatures(X, p) 2 | %POLYFEATURES Maps X (1D vector) into the p-th power 3 | % [X_poly] = POLYFEATURES(X, p) takes a data matrix X (size m x 1) and 4 | % maps each example into its polynomial features where 5 | % X_poly(i, :) = [X(i) X(i).^2 X(i).^3 ... X(i).^p]; 6 | % 7 | 8 | 9 | % You need to return the following variables correctly. 10 | X_poly = zeros(numel(X), p); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Given a vector X, return a matrix X_poly where the p-th 14 | % column of X contains the values of X to the p-th power. 15 | % 16 | % 17 | 18 | % Think of some vectorized implementation 19 | for i = 1:numel(X) 20 | for j = 1:p 21 | X_poly(i, j) = X(i)^j; 22 | end 23 | end 24 | 25 | % ========================================================================= 26 | 27 | end 28 | -------------------------------------------------------------------------------- /ex5/trainLinearReg.m: -------------------------------------------------------------------------------- 1 | function [theta] = trainLinearReg(X, y, lambda) 2 | %TRAINLINEARREG Trains linear regression given a dataset (X, y) and a 3 | %regularization parameter lambda 4 | % [theta] = TRAINLINEARREG (X, y, lambda) trains linear regression using 5 | % the dataset (X, y) and regularization parameter lambda. Returns the 6 | % trained parameters theta. 7 | % 8 | 9 | % Initialize Theta 10 | initial_theta = zeros(size(X, 2), 1); 11 | 12 | % Create "short hand" for the cost function to be minimized 13 | costFunction = @(t) linearRegCostFunction(X, y, t, lambda); 14 | 15 | % Now, costFunction is a function that takes in only one argument 16 | options = optimset('MaxIter', 200, 'GradObj', 'on'); 17 | 18 | % Minimize using fmincg 19 | % XXX(SaveTheRbtz@): Disable warnings here? 20 | theta = fmincg(costFunction, initial_theta, options); 21 | 22 | end 23 | -------------------------------------------------------------------------------- /ex5/validationCurve.m: -------------------------------------------------------------------------------- 1 | function [lambda_vec, error_train, error_val] = ... 2 | validationCurve(X, y, Xval, yval) 3 | %VALIDATIONCURVE Generate the train and validation errors needed to 4 | %plot a validation curve that we can use to select lambda 5 | % [lambda_vec, error_train, error_val] = ... 6 | % VALIDATIONCURVE(X, y, Xval, yval) returns the train 7 | % and validation errors (in error_train, error_val) 8 | % for different values of lambda. You are given the training set (X, 9 | % y) and validation set (Xval, yval). 10 | % 11 | 12 | % Selected values of lambda (you should not change this) 13 | lambda_vec = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3 10]'; 14 | 15 | % You need to return these variables correctly. 16 | error_train = zeros(length(lambda_vec), 1); 17 | error_val = zeros(length(lambda_vec), 1); 18 | 19 | % ====================== YOUR CODE HERE ====================== 20 | % Instructions: Fill in this function to return training errors in 21 | % error_train and the validation errors in error_val. The 22 | % vector lambda_vec contains the different lambda parameters 23 | % to use for each calculation of the errors, i.e, 24 | % error_train(i), and error_val(i) should give 25 | % you the errors obtained after training with 26 | % lambda = lambda_vec(i) 27 | % 28 | % Note: You can loop over lambda_vec with the following: 29 | % 30 | % for i = 1:length(lambda_vec) 31 | % lambda = lambda_vec(i); 32 | % % Compute train / val errors when training linear 33 | % % regression with regularization parameter lambda 34 | % % You should store the result in error_train(i) 35 | % % and error_val(i) 36 | % .... 37 | % 38 | % end 39 | % 40 | % 41 | 42 | for i = 1:length(lambda_vec) 43 | lambda = lambda_vec(i); 44 | theta = trainLinearReg(X, y, lambda); 45 | error_train(i) = linearRegCostFunction(X, y, theta, 0)(1); 46 | error_val(i) = linearRegCostFunction(Xval, yval, theta, 0)(1); 47 | 48 | % ========================================================================= 49 | 50 | end 51 | -------------------------------------------------------------------------------- /ex6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6.pdf -------------------------------------------------------------------------------- /ex6/dataset3Params.m: -------------------------------------------------------------------------------- 1 | function [C, sigma] = dataset3Params(X, y, Xval, yval) 2 | %DATASET3PARAMS returns your choice of C and sigma for Part 3 of the exercise 3 | %where you select the optimal (C, sigma) learning parameters to use for SVM 4 | %with RBF kernel 5 | % [C, sigma] = DATASET3PARAMS(X, y, Xval, yval) returns your choice of C and 6 | % sigma. You should complete this function to return the optimal C and 7 | % sigma based on a cross-validation set. 8 | % 9 | 10 | % You need to return the following variables correctly. 11 | C = 1; 12 | sigma = 0.1; 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Fill in this function to return the optimal C and sigma 16 | % learning parameters found using the cross validation set. 17 | % You can use svmPredict to predict the labels on the cross 18 | % validation set. For example, 19 | % predictions = svmPredict(model, Xval); 20 | % will return the predictions on the cross validation set. 21 | % 22 | % Note: You can compute the prediction error using 23 | % mean(double(predictions ~= yval)) 24 | % 25 | 26 | if(0) 27 | C_vec = [0.01 0.03 0.1 0.3 1 3 10]; 28 | sigma_vec = [0.01 0.03 0.1 0.3 1 3 10]; 29 | 30 | result = []; 31 | minimum = [0 0 0]; 32 | 33 | % XXX(SaveTheRbtz): A lot can be optimized here 34 | for c = 1:length(C_vec) 35 | for s = 1:length(sigma_vec) 36 | model = svmTrain(X, y, C_vec(c), @(x1, x2) gaussianKernel(x1, x2, sigma_vec(s))); 37 | predictions = svmPredict(model, Xval); 38 | result = [ result; mean(double(predictions ~= yval)) C_vec(c) sigma_vec(s) ]; 39 | endfor 40 | endfor 41 | 42 | % MATLAB's unstack would be usefull here =( 43 | minimum = sortrows(result)(1,:); 44 | C = minimum(2); 45 | sigma = minimum(3); 46 | 47 | endif 48 | % ========================================================================= 49 | 50 | end 51 | -------------------------------------------------------------------------------- /ex6/emailFeatures.m: -------------------------------------------------------------------------------- 1 | function x = emailFeatures(word_indices) 2 | %EMAILFEATURES takes in a word_indices vector and produces a feature vector 3 | %from the word indices 4 | % x = EMAILFEATURES(word_indices) takes in a word_indices vector and 5 | % produces a feature vector from the word indices. 6 | 7 | % Total number of words in the dictionary 8 | n = 1899; 9 | 10 | % You need to return the following variables correctly. 11 | x = zeros(n, 1); 12 | 13 | % ====================== YOUR CODE HERE ====================== 14 | % Instructions: Fill in this function to return a feature vector for the 15 | % given email (word_indices). To help make it easier to 16 | % process the emails, we have have already pre-processed each 17 | % email and converted each word in the email into an index in 18 | % a fixed dictionary (of 1899 words). The variable 19 | % word_indices contains the list of indices of the words 20 | % which occur in one email. 21 | % 22 | % Concretely, if an email has the text: 23 | % 24 | % The quick brown fox jumped over the lazy dog. 25 | % 26 | % Then, the word_indices vector for this text might look 27 | % like: 28 | % 29 | % 60 100 33 44 10 53 60 58 5 30 | % 31 | % where, we have mapped each word onto a number, for example: 32 | % 33 | % the -- 60 34 | % quick -- 100 35 | % ... 36 | % 37 | % (note: the above numbers are just an example and are not the 38 | % actual mappings). 39 | % 40 | % Your task is take one such word_indices vector and construct 41 | % a binary feature vector that indicates whether a particular 42 | % word occurs in the email. That is, x(i) = 1 when word i 43 | % is present in the email. Concretely, if the word 'the' (say, 44 | % index 60) appears in the email, then x(60) = 1. The feature 45 | % vector should look like: 46 | % 47 | % x = [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..]; 48 | % 49 | % 50 | 51 | % Hope this is not very "clumzy" solution 52 | x = arrayfun(@(i) ~isempty(word_indices(word_indices==i)), 1:n); 53 | 54 | % ========================================================================= 55 | 56 | 57 | end 58 | -------------------------------------------------------------------------------- /ex6/emailSample1.txt: -------------------------------------------------------------------------------- 1 | > Anyone knows how much it costs to host a web portal ? 2 | > 3 | Well, it depends on how many visitors you're expecting. 4 | This can be anywhere from less than 10 bucks a month to a couple of $100. 5 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 6 | if youre running something big.. 7 | 8 | To unsubscribe yourself from this mailing list, send an email to: 9 | groupname-unsubscribe@egroups.com 10 | 11 | -------------------------------------------------------------------------------- /ex6/emailSample2.txt: -------------------------------------------------------------------------------- 1 | Folks, 2 | 3 | my first time posting - have a bit of Unix experience, but am new to Linux. 4 | 5 | 6 | Just got a new PC at home - Dell box with Windows XP. Added a second hard disk 7 | for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went 8 | fine except it didn't pick up my monitor. 9 | 10 | I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4 11 | Ti4200 video card, both of which are probably too new to feature in Suse's default 12 | set. I downloaded a driver from the nVidia website and installed it using RPM. 13 | Then I ran Sax2 (as was recommended in some postings I found on the net), but 14 | it still doesn't feature my video card in the available list. What next? 15 | 16 | Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice, 17 | the whole machine crashes (in Linux, not Windows) - even the on/off switch is 18 | inactive, leaving me to reach for the power cable instead. 19 | 20 | If anyone can help me in any way with these probs., I'd be really grateful - 21 | I've searched the 'net but have run out of ideas. 22 | 23 | Or should I be going for a different version of Linux such as RedHat? Opinions 24 | welcome. 25 | 26 | Thanks a lot, 27 | Peter 28 | 29 | -- 30 | Irish Linux Users' Group: ilug@linux.ie 31 | http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information. 32 | List maintainer: listmaster@linux.ie 33 | 34 | 35 | -------------------------------------------------------------------------------- /ex6/ex6.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class 3 | % Exercise 6 | Support Vector Machines 4 | % 5 | % Instructions 6 | % ------------ 7 | % 8 | % This file contains code that helps you get started on the 9 | % exercise. You will need to complete the following functions: 10 | % 11 | % gaussianKernel.m 12 | % dataset3Params.m 13 | % processEmail.m 14 | % emailFeatures.m 15 | % 16 | % For this exercise, you will not need to change any code in this file, 17 | % or any other files other than those mentioned above. 18 | % 19 | 20 | %% Initialization 21 | clear ; close all; clc 22 | 23 | %% =============== Part 1: Loading and Visualizing Data ================ 24 | % We start the exercise by first loading and visualizing the dataset. 25 | % The following code will load the dataset into your environment and plot 26 | % the data. 27 | % 28 | 29 | fprintf('Loading and Visualizing Data ...\n') 30 | 31 | % Load from ex6data1: 32 | % You will have X, y in your environment 33 | load('ex6data1.mat'); 34 | 35 | % Plot training data 36 | plotData(X, y); 37 | 38 | fprintf('Program paused. Press enter to continue.\n'); 39 | pause; 40 | 41 | %% ==================== Part 2: Training Linear SVM ==================== 42 | % The following code will train a linear SVM on the dataset and plot the 43 | % decision boundary learned. 44 | % 45 | 46 | % Load from ex6data1: 47 | % You will have X, y in your environment 48 | load('ex6data1.mat'); 49 | 50 | fprintf('\nTraining Linear SVM ...\n') 51 | 52 | % You should try to change the C value below and see how the decision 53 | % boundary varies (e.g., try C = 1000) 54 | C = 1; 55 | model = svmTrain(X, y, C, @linearKernel, 1e-3, 20); 56 | visualizeBoundaryLinear(X, y, model); 57 | 58 | fprintf('Program paused. Press enter to continue.\n'); 59 | pause; 60 | 61 | %% =============== Part 3: Implementing Gaussian Kernel =============== 62 | % You will now implement the Gaussian kernel to use 63 | % with the SVM. You should complete the code in gaussianKernel.m 64 | % 65 | fprintf('\nEvaluating the Gaussian Kernel ...\n') 66 | 67 | x1 = [1 2 1]; x2 = [0 4 -1]; sigma = 2; 68 | sim = gaussianKernel(x1, x2, sigma); 69 | 70 | fprintf(['Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 0.5 :' ... 71 | '\n\t%f\n(this value should be about 0.324652)\n'], sim); 72 | 73 | fprintf('Program paused. Press enter to continue.\n'); 74 | pause; 75 | 76 | %% =============== Part 4: Visualizing Dataset 2 ================ 77 | % The following code will load the next dataset into your environment and 78 | % plot the data. 79 | % 80 | 81 | fprintf('Loading and Visualizing Data ...\n') 82 | 83 | % Load from ex6data2: 84 | % You will have X, y in your environment 85 | load('ex6data2.mat'); 86 | 87 | % Plot training data 88 | plotData(X, y); 89 | 90 | fprintf('Program paused. Press enter to continue.\n'); 91 | pause; 92 | 93 | %% ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ========== 94 | % After you have implemented the kernel, we can now use it to train the 95 | % SVM classifier. 96 | % 97 | fprintf('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n'); 98 | 99 | % Load from ex6data2: 100 | % You will have X, y in your environment 101 | load('ex6data2.mat'); 102 | 103 | % SVM Parameters 104 | C = 1; sigma = 0.1; 105 | 106 | % We set the tolerance and max_passes lower here so that the code will run 107 | % faster. However, in practice, you will want to run the training to 108 | % convergence. 109 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 110 | visualizeBoundary(X, y, model); 111 | 112 | fprintf('Program paused. Press enter to continue.\n'); 113 | pause; 114 | 115 | %% =============== Part 6: Visualizing Dataset 3 ================ 116 | % The following code will load the next dataset into your environment and 117 | % plot the data. 118 | % 119 | 120 | fprintf('Loading and Visualizing Data ...\n') 121 | 122 | % Load from ex6data3: 123 | % You will have X, y in your environment 124 | load('ex6data3.mat'); 125 | 126 | % Plot training data 127 | plotData(X, y); 128 | 129 | fprintf('Program paused. Press enter to continue.\n'); 130 | pause; 131 | 132 | %% ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ========== 133 | 134 | % This is a different dataset that you can use to experiment with. Try 135 | % different values of C and sigma here. 136 | % 137 | 138 | % Load from ex6data3: 139 | % You will have X, y in your environment 140 | load('ex6data3.mat'); 141 | 142 | % Try different SVM Parameters here 143 | [C, sigma] = dataset3Params(X, y, Xval, yval); 144 | 145 | % Train the SVM 146 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 147 | visualizeBoundary(X, y, model); 148 | 149 | fprintf('Program paused. Press enter to continue.\n'); 150 | pause; 151 | 152 | -------------------------------------------------------------------------------- /ex6/ex6_spam.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class 3 | % Exercise 6 | Spam Classification with SVMs 4 | % 5 | % Instructions 6 | % ------------ 7 | % 8 | % This file contains code that helps you get started on the 9 | % exercise. You will need to complete the following functions: 10 | % 11 | % gaussianKernel.m 12 | % dataset3Params.m 13 | % processEmail.m 14 | % emailFeatures.m 15 | % 16 | % For this exercise, you will not need to change any code in this file, 17 | % or any other files other than those mentioned above. 18 | % 19 | 20 | %% Initialization 21 | clear ; close all; clc 22 | 23 | %% ==================== Part 1: Email Preprocessing ==================== 24 | % To use an SVM to classify emails into Spam v.s. Non-Spam, you first need 25 | % to convert each email into a vector of features. In this part, you will 26 | % implement the preprocessing steps for each email. You should 27 | % complete the code in processEmail.m to produce a word indices vector 28 | % for a given email. 29 | 30 | fprintf('\nPreprocessing sample email (emailSample1.txt)\n'); 31 | 32 | % Extract Features 33 | file_contents = readFile('emailSample1.txt'); 34 | word_indices = processEmail(file_contents); 35 | 36 | % Print Stats 37 | fprintf('Word Indices: \n'); 38 | fprintf(' %d', word_indices); 39 | fprintf('\n\n'); 40 | 41 | fprintf('Program paused. Press enter to continue.\n'); 42 | pause; 43 | 44 | %% ==================== Part 2: Feature Extraction ==================== 45 | % Now, you will convert each email into a vector of features in R^n. 46 | % You should complete the code in emailFeatures.m to produce a feature 47 | % vector for a given email. 48 | 49 | fprintf('\nExtracting features from sample email (emailSample1.txt)\n'); 50 | 51 | % Extract Features 52 | file_contents = readFile('emailSample1.txt'); 53 | word_indices = processEmail(file_contents); 54 | features = emailFeatures(word_indices); 55 | 56 | % Print Stats 57 | fprintf('Length of feature vector: %d\n', length(features)); 58 | fprintf('Number of non-zero entries: %d\n', sum(features > 0)); 59 | 60 | fprintf('Program paused. Press enter to continue.\n'); 61 | pause; 62 | 63 | %% =========== Part 3: Train Linear SVM for Spam Classification ======== 64 | % In this section, you will train a linear classifier to determine if an 65 | % email is Spam or Not-Spam. 66 | 67 | % Load the Spam Email dataset 68 | % You will have X, y in your environment 69 | load('spamTrain.mat'); 70 | 71 | fprintf('\nTraining Linear SVM (Spam Classification)\n') 72 | fprintf('(this may take 1 to 2 minutes) ...\n') 73 | 74 | C = 0.1; 75 | model = svmTrain(X, y, C, @linearKernel); 76 | 77 | p = svmPredict(model, X); 78 | 79 | fprintf('Training Accuracy: %f\n', mean(double(p == y)) * 100); 80 | 81 | %% =================== Part 4: Test Spam Classification ================ 82 | % After training the classifier, we can evaluate it on a test set. We have 83 | % included a test set in spamTest.mat 84 | 85 | % Load the test dataset 86 | % You will have Xtest, ytest in your environment 87 | load('spamTest.mat'); 88 | 89 | fprintf('\nEvaluating the trained Linear SVM on a test set ...\n') 90 | 91 | p = svmPredict(model, Xtest); 92 | 93 | fprintf('Test Accuracy: %f\n', mean(double(p == ytest)) * 100); 94 | pause; 95 | 96 | 97 | %% ================= Part 5: Top Predictors of Spam ==================== 98 | % Since the model we are training is a linear SVM, we can inspect the 99 | % weights learned by the model to understand better how it is determining 100 | % whether an email is spam or not. The following code finds the words with 101 | % the highest weights in the classifier. Informally, the classifier 102 | % 'thinks' that these words are the most likely indicators of spam. 103 | % 104 | 105 | % Sort the weights and obtin the vocabulary list 106 | [weight, idx] = sort(model.w, 'descend'); 107 | vocabList = getVocabList(); 108 | 109 | fprintf('\nTop predictors of spam: \n'); 110 | for i = 1:15 111 | fprintf(' %-15s (%f) \n', vocabList{idx(i)}, weight(i)); 112 | end 113 | 114 | fprintf('\n\n'); 115 | fprintf('\nProgram paused. Press enter to continue.\n'); 116 | pause; 117 | 118 | %% =================== Part 6: Try Your Own Emails ===================== 119 | % Now that you've trained the spam classifier, you can use it on your own 120 | % emails! In the starter code, we have included spamSample1.txt, 121 | % spamSample2.txt, emailSample1.txt and emailSample2.txt as examples. 122 | % The following code reads in one of these emails and then uses your 123 | % learned SVM classifier to determine whether the email is Spam or 124 | % Not Spam 125 | 126 | % Set the file to be read in (change this to spamSample2.txt, 127 | % emailSample1.txt or emailSample2.txt to see different predictions on 128 | % different emails types). Try your own emails as well! 129 | filename = 'spamSample1.txt'; 130 | 131 | % Read and predict 132 | file_contents = readFile(filename); 133 | word_indices = processEmail(file_contents); 134 | x = emailFeatures(word_indices); 135 | p = svmPredict(model, x); 136 | 137 | fprintf('\nProcessed %s\n\nSpam Classification: %d\n', filename, p); 138 | fprintf('(1 indicates spam, 0 indicates not spam)\n\n'); 139 | 140 | -------------------------------------------------------------------------------- /ex6/ex6data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6/ex6data1.mat -------------------------------------------------------------------------------- /ex6/ex6data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6/ex6data2.mat -------------------------------------------------------------------------------- /ex6/ex6data3.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6/ex6data3.mat -------------------------------------------------------------------------------- /ex6/gaussianKernel.m: -------------------------------------------------------------------------------- 1 | function sim = gaussianKernel(x1, x2, sigma) 2 | %RBFKERNEL returns a radial basis function kernel between x1 and x2 3 | % sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2 4 | % and returns the value in sim 5 | 6 | % Ensure that x1 and x2 are column vectors 7 | x1 = x1(:); x2 = x2(:); 8 | 9 | % You need to return the following variables correctly. 10 | sim = 0; 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Fill in this function to return the similarity between x1 14 | % and x2 computed using a Gaussian kernel with bandwidth 15 | % sigma 16 | % 17 | % 18 | 19 | % I've used matrix multiplication insted of instead of square and sum 20 | sim = exp(-((x1 - x2)'*(x1-x2))/(2*(sigma^2))); 21 | % Canonic implementation is: 22 | % sim = exp(-sum((x1 - x2) .^ 2)/(2*(sigma^2))); 23 | % TODO(SaveTheRbtz@): Should test that i'm not trying to outsmart an optimizer 24 | 25 | % ============================================================= 26 | 27 | end 28 | -------------------------------------------------------------------------------- /ex6/getVocabList.m: -------------------------------------------------------------------------------- 1 | function vocabList = getVocabList() 2 | %GETVOCABLIST reads the fixed vocabulary list in vocab.txt and returns a 3 | %cell array of the words 4 | % vocabList = GETVOCABLIST() reads the fixed vocabulary list in vocab.txt 5 | % and returns a cell array of the words in vocabList. 6 | 7 | 8 | %% Read the fixed vocabulary list 9 | fid = fopen('vocab.txt'); 10 | 11 | % Store all dictionary words in cell array vocab{} 12 | n = 1899; % Total number of words in the dictionary 13 | 14 | % For ease of implementation, we use a struct to map the strings => integers 15 | % In practice, you'll want to use some form of hashmap 16 | vocabList = cell(n, 1); 17 | for i = 1:n 18 | % Word Index (can ignore since it will be = i) 19 | fscanf(fid, '%d', 1); 20 | % Actual Word 21 | vocabList{i} = fscanf(fid, '%s', 1); 22 | end 23 | fclose(fid); 24 | 25 | end 26 | -------------------------------------------------------------------------------- /ex6/linearKernel.m: -------------------------------------------------------------------------------- 1 | function sim = linearKernel(x1, x2) 2 | %LINEARKERNEL returns a linear kernel between x1 and x2 3 | % sim = linearKernel(x1, x2) returns a linear kernel between x1 and x2 4 | % and returns the value in sim 5 | 6 | % Ensure that x1 and x2 are column vectors 7 | x1 = x1(:); x2 = x2(:); 8 | 9 | % Compute the kernel 10 | sim = x1' * x2; % dot product 11 | 12 | end -------------------------------------------------------------------------------- /ex6/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(X, y) 2 | %PLOTDATA Plots the data points X and y into a new figure 3 | % PLOTDATA(x,y) plots the data points with + for the positive examples 4 | % and o for the negative examples. X is assumed to be a Mx2 matrix. 5 | % 6 | % Note: This was slightly modified such that it expects y = 1 or y = 0 7 | 8 | % Find Indices of Positive and Negative Examples 9 | pos = find(y == 1); neg = find(y == 0); 10 | 11 | % Plot Examples 12 | plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 1, 'MarkerSize', 7) 13 | hold on; 14 | plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7) 15 | hold off; 16 | 17 | end 18 | -------------------------------------------------------------------------------- /ex6/processEmail.m: -------------------------------------------------------------------------------- 1 | function word_indices = processEmail(email_contents) 2 | %PROCESSEMAIL preprocesses a the body of an email and 3 | %returns a list of word_indices 4 | % word_indices = PROCESSEMAIL(email_contents) preprocesses 5 | % the body of an email and returns a list of indices of the 6 | % words contained in the email. 7 | % 8 | 9 | % Load Vocabulary 10 | vocabList = getVocabList(); 11 | 12 | % Init return value 13 | word_indices = []; 14 | 15 | % ========================== Preprocess Email =========================== 16 | 17 | % Find the Headers ( \n\n and remove ) 18 | % Uncomment the following lines if you are working with raw emails with the 19 | % full headers 20 | 21 | % hdrstart = strfind(email_contents, ([char(10) char(10)])); 22 | % email_contents = email_contents(hdrstart(1):end); 23 | 24 | % Lower case 25 | email_contents = lower(email_contents); 26 | 27 | % Strip all HTML 28 | % Looks for any expression that starts with < and ends with > and replace 29 | % and does not have any < or > in the tag it with a space 30 | email_contents = regexprep(email_contents, '<[^<>]+>', ' '); 31 | 32 | % Handle Numbers 33 | % Look for one or more characters between 0-9 34 | email_contents = regexprep(email_contents, '[0-9]+', 'number'); 35 | 36 | % Handle URLS 37 | % Look for strings starting with http:// or https:// 38 | email_contents = regexprep(email_contents, ... 39 | '(http|https)://[^\s]*', 'httpaddr'); 40 | 41 | % Handle Email Addresses 42 | % Look for strings with @ in the middle 43 | email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr'); 44 | 45 | % Handle $ sign 46 | email_contents = regexprep(email_contents, '[$]+', 'dollar'); 47 | 48 | 49 | % ========================== Tokenize Email =========================== 50 | 51 | % Output the email to screen as well 52 | fprintf('\n==== Processed Email ====\n\n'); 53 | 54 | % Process file 55 | l = 0; 56 | 57 | while ~isempty(email_contents) 58 | 59 | % Tokenize and also get rid of any punctuation 60 | [str, email_contents] = ... 61 | strtok(email_contents, ... 62 | [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]); 63 | 64 | % Remove any non alphanumeric characters 65 | str = regexprep(str, '[^a-zA-Z0-9]', ''); 66 | 67 | % Stem the word 68 | % (the porterStemmer sometimes has issues, so we use a try catch block) 69 | try str = porterStemmer(strtrim(str)); 70 | catch str = ''; continue; 71 | end; 72 | 73 | % Skip the word if it is too short 74 | if length(str) < 1 75 | continue; 76 | end 77 | 78 | % Look up the word in the dictionary and add to word_indices if 79 | % found 80 | % ====================== YOUR CODE HERE ====================== 81 | % Instructions: Fill in this function to add the index of str to 82 | % word_indices if it is in the vocabulary. At this point 83 | % of the code, you have a stemmed word from the email in 84 | % the variable str. You should look up str in the 85 | % vocabulary list (vocabList). If a match exists, you 86 | % should add the index of the word to the word_indices 87 | % vector. Concretely, if str = 'action', then you should 88 | % look up the vocabulary list to find where in vocabList 89 | % 'action' appears. For example, if vocabList{18} = 90 | % 'action', then, you should add 18 to the word_indices 91 | % vector (e.g., word_indices = [word_indices ; 18]; ). 92 | % 93 | % Note: vocabList{idx} returns a the word with index idx in the 94 | % vocabulary list. 95 | % 96 | % Note: You can use strcmp(str1, str2) to compare two strings (str1 and 97 | % str2). It will return 1 only if the two strings are equivalent. 98 | % 99 | 100 | word_indices = [word_indices strmatch(str, vocabList, 'exact')]; 101 | 102 | % ============================================================= 103 | 104 | 105 | % Print to screen, ensuring that the output lines are not too long 106 | if (l + length(str) + 1) > 78 107 | fprintf('\n'); 108 | l = 0; 109 | end 110 | fprintf('%s ', str); 111 | l = l + length(str) + 1; 112 | 113 | end 114 | 115 | % Print footer 116 | fprintf('\n\n=========================\n'); 117 | 118 | end 119 | -------------------------------------------------------------------------------- /ex6/readFile.m: -------------------------------------------------------------------------------- 1 | function file_contents = readFile(filename) 2 | %READFILE reads a file and returns its entire contents 3 | % file_contents = READFILE(filename) reads a file and returns its entire 4 | % contents in file_contents 5 | % 6 | 7 | % Load File 8 | fid = fopen(filename); 9 | if fid 10 | file_contents = fscanf(fid, '%c', inf); 11 | fclose(fid); 12 | else 13 | file_contents = ''; 14 | fprintf('Unable to open %s\n', filename); 15 | end 16 | 17 | end 18 | 19 | -------------------------------------------------------------------------------- /ex6/spamSample1.txt: -------------------------------------------------------------------------------- 1 | Do You Want To Make $1000 Or More Per Week? 2 | 3 | 4 | 5 | If you are a motivated and qualified individual - I 6 | will personally demonstrate to you a system that will 7 | make you $1,000 per week or more! This is NOT mlm. 8 | 9 | 10 | 11 | Call our 24 hour pre-recorded number to get the 12 | details. 13 | 14 | 15 | 16 | 000-456-789 17 | 18 | 19 | 20 | I need people who want to make serious money. Make 21 | the call and get the facts. 22 | 23 | Invest 2 minutes in yourself now! 24 | 25 | 26 | 27 | 000-456-789 28 | 29 | 30 | 31 | Looking forward to your call and I will introduce you 32 | to people like yourself who 33 | are currently making $10,000 plus per week! 34 | 35 | 36 | 37 | 000-456-789 38 | 39 | 40 | 41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72 42 | 43 | -------------------------------------------------------------------------------- /ex6/spamSample2.txt: -------------------------------------------------------------------------------- 1 | Best Buy Viagra Generic Online 2 | 3 | Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed! 4 | 5 | We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers! 6 | http://medphysitcstech.ru 7 | 8 | 9 | -------------------------------------------------------------------------------- /ex6/spamTest.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6/spamTest.mat -------------------------------------------------------------------------------- /ex6/spamTrain.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex6/spamTrain.mat -------------------------------------------------------------------------------- /ex6/submit.m: -------------------------------------------------------------------------------- 1 | function submit(partId) 2 | %SUBMIT Submit your code and output to the ml-class servers 3 | % SUBMIT() will connect to the ml-class server and submit your solution 4 | 5 | fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ... 6 | homework_id()); 7 | if ~exist('partId', 'var') || isempty(partId) 8 | partId = promptPart(); 9 | end 10 | 11 | % Check valid partId 12 | partNames = validParts(); 13 | if ~isValidPartId(partId) 14 | fprintf('!! Invalid homework part selected.\n'); 15 | fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1); 16 | fprintf('!! Submission Cancelled\n'); 17 | return 18 | end 19 | 20 | [login password] = loginPrompt(); 21 | if isempty(login) 22 | fprintf('!! Submission Cancelled\n'); 23 | return 24 | end 25 | 26 | fprintf('\n== Connecting to ml-class ... '); 27 | if exist('OCTAVE_VERSION') 28 | fflush(stdout); 29 | end 30 | 31 | % Setup submit list 32 | if partId == numel(partNames) + 1 33 | submitParts = 1:numel(partNames); 34 | else 35 | submitParts = [partId]; 36 | end 37 | 38 | for s = 1:numel(submitParts) 39 | % Submit this part 40 | partId = submitParts(s); 41 | 42 | % Get Challenge 43 | [login, ch, signature] = getChallenge(login); 44 | if isempty(login) || isempty(ch) || isempty(signature) 45 | % Some error occured, error string in first return element. 46 | fprintf('\n!! Error: %s\n\n', login); 47 | return 48 | end 49 | 50 | % Attempt Submission with Challenge 51 | ch_resp = challengeResponse(login, password, ch); 52 | [result, str] = submitSolution(login, ch_resp, partId, output(partId), ... 53 | source(partId), signature); 54 | 55 | fprintf('\n== [ml-class] Submitted Homework %s - Part %d - %s\n', ... 56 | homework_id(), partId, partNames{partId}); 57 | fprintf('== %s\n', strtrim(str)); 58 | if exist('OCTAVE_VERSION') 59 | fflush(stdout); 60 | end 61 | end 62 | 63 | end 64 | 65 | % ================== CONFIGURABLES FOR EACH HOMEWORK ================== 66 | 67 | function id = homework_id() 68 | id = '6'; 69 | end 70 | 71 | function [partNames] = validParts() 72 | partNames = { 'Gaussian Kernel', ... 73 | 'Parameters (C, sigma) for Dataset 3', ... 74 | 'Email Preprocessing' ... 75 | 'Email Feature Extraction' ... 76 | }; 77 | end 78 | 79 | function srcs = sources() 80 | % Separated by part 81 | srcs = { { 'gaussianKernel.m' }, ... 82 | { 'dataset3Params.m' }, ... 83 | { 'processEmail.m' }, ... 84 | { 'emailFeatures.m' } }; 85 | end 86 | 87 | function out = output(partId) 88 | % Random Test Cases 89 | x1 = sin(1:10)'; 90 | x2 = cos(1:10)'; 91 | ec = 'the quick brown fox jumped over the lazy dog'; 92 | wi = 1 + abs(round(x1 * 1863)); 93 | wi = [wi ; wi]; 94 | if partId == 1 95 | sim = gaussianKernel(x1, x2, 2); 96 | out = sprintf('%0.5f ', sim); 97 | elseif partId == 2 98 | load('ex6data3.mat'); 99 | [C, sigma] = dataset3Params(X, y, Xval, yval); 100 | out = sprintf('%0.5f ', C); 101 | out = [out sprintf('%0.5f ', sigma)]; 102 | elseif partId == 3 103 | word_indices = processEmail(ec); 104 | out = sprintf('%d ', word_indices); 105 | elseif partId == 4 106 | x = emailFeatures(wi); 107 | out = sprintf('%d ', x); 108 | end 109 | end 110 | 111 | function url = challenge_url() 112 | url = 'http://www.ml-class.org/course/homework/challenge'; 113 | end 114 | 115 | function url = submit_url() 116 | url = 'http://www.ml-class.org/course/homework/submit'; 117 | end 118 | 119 | % ========================= CHALLENGE HELPERS ========================= 120 | 121 | function src = source(partId) 122 | src = ''; 123 | src_files = sources(); 124 | if partId <= numel(src_files) 125 | flist = src_files{partId}; 126 | for i = 1:numel(flist) 127 | fid = fopen(flist{i}); 128 | while ~feof(fid) 129 | line = fgets(fid); 130 | src = [src line]; 131 | end 132 | fclose(fid); 133 | src = [src '||||||||']; 134 | end 135 | end 136 | end 137 | 138 | function ret = isValidPartId(partId) 139 | partNames = validParts(); 140 | ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1); 141 | end 142 | 143 | function partId = promptPart() 144 | fprintf('== Select which part(s) to submit:\n', ... 145 | homework_id()); 146 | partNames = validParts(); 147 | srcFiles = sources(); 148 | for i = 1:numel(partNames) 149 | fprintf('== %d) %s [', i, partNames{i}); 150 | fprintf(' %s ', srcFiles{i}{:}); 151 | fprintf(']\n'); 152 | end 153 | fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ... 154 | numel(partNames) + 1, numel(partNames) + 1); 155 | selPart = input('', 's'); 156 | partId = str2num(selPart); 157 | if ~isValidPartId(partId) 158 | partId = -1; 159 | end 160 | end 161 | 162 | function [email,ch,signature] = getChallenge(email) 163 | str = urlread(challenge_url(), 'post', {'email_address', email}); 164 | 165 | str = strtrim(str); 166 | [email, str] = strtok (str, '|'); 167 | [ch, str] = strtok (str, '|'); 168 | [signature, str] = strtok (str, '|'); 169 | end 170 | 171 | 172 | function [result, str] = submitSolution(email, ch_resp, part, output, ... 173 | source, signature) 174 | 175 | params = {'homework', homework_id(), ... 176 | 'part', num2str(part), ... 177 | 'email', email, ... 178 | 'output', output, ... 179 | 'source', source, ... 180 | 'challenge_response', ch_resp, ... 181 | 'signature', signature}; 182 | 183 | str = urlread(submit_url(), 'post', params); 184 | 185 | % Parse str to read for success / failure 186 | result = 0; 187 | 188 | end 189 | 190 | % =========================== LOGIN HELPERS =========================== 191 | 192 | function [login password] = loginPrompt() 193 | % Prompt for password 194 | [login password] = basicPrompt(); 195 | 196 | if isempty(login) || isempty(password) 197 | login = []; password = []; 198 | end 199 | end 200 | 201 | 202 | function [login password] = basicPrompt() 203 | login = input('Login (Email address): ', 's'); 204 | password = input('Password: ', 's'); 205 | end 206 | 207 | 208 | function [str] = challengeResponse(email, passwd, challenge) 209 | salt = ')~/|]QMB3[!W`?OVt7qC"@+}'; 210 | str = sha1([challenge sha1([salt email passwd])]); 211 | sel = randperm(numel(str)); 212 | sel = sort(sel(1:16)); 213 | str = str(sel); 214 | end 215 | 216 | 217 | % =============================== SHA-1 ================================ 218 | 219 | function hash = sha1(str) 220 | 221 | % Initialize variables 222 | h0 = uint32(1732584193); 223 | h1 = uint32(4023233417); 224 | h2 = uint32(2562383102); 225 | h3 = uint32(271733878); 226 | h4 = uint32(3285377520); 227 | 228 | % Convert to word array 229 | strlen = numel(str); 230 | 231 | % Break string into chars and append the bit 1 to the message 232 | mC = [double(str) 128]; 233 | mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')]; 234 | 235 | numB = strlen * 8; 236 | if exist('idivide') 237 | numC = idivide(uint32(numB + 65), 512, 'ceil'); 238 | else 239 | numC = ceil(double(numB + 65)/512); 240 | end 241 | numW = numC * 16; 242 | mW = zeros(numW, 1, 'uint32'); 243 | 244 | idx = 1; 245 | for i = 1:4:strlen + 1 246 | mW(idx) = bitor(bitor(bitor( ... 247 | bitshift(uint32(mC(i)), 24), ... 248 | bitshift(uint32(mC(i+1)), 16)), ... 249 | bitshift(uint32(mC(i+2)), 8)), ... 250 | uint32(mC(i+3))); 251 | idx = idx + 1; 252 | end 253 | 254 | % Append length of message 255 | mW(numW - 1) = uint32(bitshift(uint64(numB), -32)); 256 | mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32)); 257 | 258 | % Process the message in successive 512-bit chs 259 | for cId = 1 : double(numC) 260 | cSt = (cId - 1) * 16 + 1; 261 | cEnd = cId * 16; 262 | ch = mW(cSt : cEnd); 263 | 264 | % Extend the sixteen 32-bit words into eighty 32-bit words 265 | for j = 17 : 80 266 | ch(j) = ch(j - 3); 267 | ch(j) = bitxor(ch(j), ch(j - 8)); 268 | ch(j) = bitxor(ch(j), ch(j - 14)); 269 | ch(j) = bitxor(ch(j), ch(j - 16)); 270 | ch(j) = bitrotate(ch(j), 1); 271 | end 272 | 273 | % Initialize hash value for this ch 274 | a = h0; 275 | b = h1; 276 | c = h2; 277 | d = h3; 278 | e = h4; 279 | 280 | % Main loop 281 | for i = 1 : 80 282 | if(i >= 1 && i <= 20) 283 | f = bitor(bitand(b, c), bitand(bitcmp(b), d)); 284 | k = uint32(1518500249); 285 | elseif(i >= 21 && i <= 40) 286 | f = bitxor(bitxor(b, c), d); 287 | k = uint32(1859775393); 288 | elseif(i >= 41 && i <= 60) 289 | f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d)); 290 | k = uint32(2400959708); 291 | elseif(i >= 61 && i <= 80) 292 | f = bitxor(bitxor(b, c), d); 293 | k = uint32(3395469782); 294 | end 295 | 296 | t = bitrotate(a, 5); 297 | t = bitadd(t, f); 298 | t = bitadd(t, e); 299 | t = bitadd(t, k); 300 | t = bitadd(t, ch(i)); 301 | e = d; 302 | d = c; 303 | c = bitrotate(b, 30); 304 | b = a; 305 | a = t; 306 | 307 | end 308 | h0 = bitadd(h0, a); 309 | h1 = bitadd(h1, b); 310 | h2 = bitadd(h2, c); 311 | h3 = bitadd(h3, d); 312 | h4 = bitadd(h4, e); 313 | 314 | end 315 | 316 | hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]); 317 | 318 | hash = lower(hash); 319 | 320 | end 321 | 322 | function ret = bitadd(iA, iB) 323 | ret = double(iA) + double(iB); 324 | ret = bitset(ret, 33, 0); 325 | ret = uint32(ret); 326 | end 327 | 328 | function ret = bitrotate(iA, places) 329 | t = bitshift(iA, places - 32); 330 | ret = bitshift(iA, places); 331 | ret = bitor(ret, t); 332 | end 333 | -------------------------------------------------------------------------------- /ex6/svmPredict.m: -------------------------------------------------------------------------------- 1 | function pred = svmPredict(model, X) 2 | %SVMPREDICT returns a vector of predictions using a trained SVM model 3 | %(svmTrain). 4 | % pred = SVMPREDICT(model, X) returns a vector of predictions using a 5 | % trained SVM model (svmTrain). X is a mxn matrix where there each 6 | % example is a row. model is a svm model returned from svmTrain. 7 | % predictions pred is a m x 1 column of predictions of {0, 1} values. 8 | % 9 | 10 | % Check if we are getting a column vector, if so, then assume that we only 11 | % need to do prediction for a single example 12 | if (size(X, 2) == 1) 13 | % Examples should be in rows 14 | X = X'; 15 | end 16 | 17 | % Dataset 18 | m = size(X, 1); 19 | p = zeros(m, 1); 20 | pred = zeros(m, 1); 21 | 22 | if strcmp(func2str(model.kernelFunction), 'linearKernel') 23 | % We can use the weights and bias directly if working with the 24 | % linear kernel 25 | p = X * model.w + model.b; 26 | elseif strfind(func2str(model.kernelFunction), 'gaussianKernel') 27 | % Vectorized RBF Kernel 28 | % This is equivalent to computing the kernel on every pair of examples 29 | X1 = sum(X.^2, 2); 30 | X2 = sum(model.X.^2, 2)'; 31 | K = bsxfun(@plus, X1, bsxfun(@plus, X2, - 2 * X * model.X')); 32 | K = model.kernelFunction(1, 0) .^ K; 33 | K = bsxfun(@times, model.y', K); 34 | K = bsxfun(@times, model.alphas', K); 35 | p = sum(K, 2); 36 | else 37 | % Other Non-linear kernel 38 | for i = 1:m 39 | prediction = 0; 40 | for j = 1:size(model.X, 1) 41 | prediction = prediction + ... 42 | model.alphas(j) * model.y(j) * ... 43 | model.kernelFunction(X(i,:)', model.X(j,:)'); 44 | end 45 | p(i) = prediction + model.b; 46 | end 47 | end 48 | 49 | % Convert predictions into 0 / 1 50 | pred(p >= 0) = 1; 51 | pred(p < 0) = 0; 52 | 53 | end 54 | 55 | -------------------------------------------------------------------------------- /ex6/svmTrain.m: -------------------------------------------------------------------------------- 1 | function [model] = svmTrain(X, Y, C, kernelFunction, ... 2 | tol, max_passes) 3 | %SVMTRAIN Trains an SVM classifier using a simplified version of the SMO 4 | %algorithm. 5 | % [model] = SVMTRAIN(X, Y, C, kernelFunction, tol, max_passes) trains an 6 | % SVM classifier and returns trained model. X is the matrix of training 7 | % examples. Each row is a training example, and the jth column holds the 8 | % jth feature. Y is a column matrix containing 1 for positive examples 9 | % and 0 for negative examples. C is the standard SVM regularization 10 | % parameter. tol is a tolerance value used for determining equality of 11 | % floating point numbers. max_passes controls the number of iterations 12 | % over the dataset (without changes to alpha) before the algorithm quits. 13 | % 14 | % Note: This is a simplified version of the SMO algorithm for training 15 | % SVMs. In practice, if you want to train an SVM classifier, we 16 | % recommend using an optimized package such as: 17 | % 18 | % LIBSVM (http://www.csie.ntu.edu.tw/~cjlin/libsvm/) 19 | % SVMLight (http://svmlight.joachims.org/) 20 | % 21 | % 22 | 23 | if ~exist('tol', 'var') || isempty(tol) 24 | tol = 1e-3; 25 | end 26 | 27 | if ~exist('max_passes', 'var') || isempty(max_passes) 28 | max_passes = 5; 29 | end 30 | 31 | % Data parameters 32 | m = size(X, 1); 33 | n = size(X, 2); 34 | 35 | % Map 0 to -1 36 | Y(Y==0) = -1; 37 | 38 | % Variables 39 | alphas = zeros(m, 1); 40 | b = 0; 41 | E = zeros(m, 1); 42 | passes = 0; 43 | eta = 0; 44 | L = 0; 45 | H = 0; 46 | 47 | % Pre-compute the Kernel Matrix since our dataset is small 48 | % (in practice, optimized SVM packages that handle large datasets 49 | % gracefully will _not_ do this) 50 | % 51 | % We have implemented optimized vectorized version of the Kernels here so 52 | % that the svm training will run faster. 53 | if strcmp(func2str(kernelFunction), 'linearKernel') 54 | % Vectorized computation for the Linear Kernel 55 | % This is equivalent to computing the kernel on every pair of examples 56 | K = X*X'; 57 | elseif strfind(func2str(kernelFunction), 'gaussianKernel') 58 | % Vectorized RBF Kernel 59 | % This is equivalent to computing the kernel on every pair of examples 60 | X2 = sum(X.^2, 2); 61 | K = bsxfun(@plus, X2, bsxfun(@plus, X2', - 2 * (X * X'))); 62 | K = kernelFunction(1, 0) .^ K; 63 | else 64 | % Pre-compute the Kernel Matrix 65 | % The following can be slow due to the lack of vectorization 66 | K = zeros(m); 67 | for i = 1:m 68 | for j = i:m 69 | K(i,j) = kernelFunction(X(i,:)', X(j,:)'); 70 | K(j,i) = K(i,j); %the matrix is symmetric 71 | end 72 | end 73 | end 74 | 75 | % Train 76 | fprintf('\nTraining ...'); 77 | dots = 12; 78 | while passes < max_passes, 79 | 80 | num_changed_alphas = 0; 81 | for i = 1:m, 82 | 83 | % Calculate Ei = f(x(i)) - y(i) using (2). 84 | % E(i) = b + sum (X(i, :) * (repmat(alphas.*Y,1,n).*X)') - Y(i); 85 | E(i) = b + sum (alphas.*Y.*K(:,i)) - Y(i); 86 | 87 | if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0)), 88 | 89 | % In practice, there are many heuristics one can use to select 90 | % the i and j. In this simplified code, we select them randomly. 91 | j = ceil(m * rand()); 92 | while j == i, % Make sure i \neq j 93 | j = ceil(m * rand()); 94 | end 95 | 96 | % Calculate Ej = f(x(j)) - y(j) using (2). 97 | E(j) = b + sum (alphas.*Y.*K(:,j)) - Y(j); 98 | 99 | % Save old alphas 100 | alpha_i_old = alphas(i); 101 | alpha_j_old = alphas(j); 102 | 103 | % Compute L and H by (10) or (11). 104 | if (Y(i) == Y(j)), 105 | L = max(0, alphas(j) + alphas(i) - C); 106 | H = min(C, alphas(j) + alphas(i)); 107 | else 108 | L = max(0, alphas(j) - alphas(i)); 109 | H = min(C, C + alphas(j) - alphas(i)); 110 | end 111 | 112 | if (L == H), 113 | % continue to next i. 114 | continue; 115 | end 116 | 117 | % Compute eta by (14). 118 | eta = 2 * K(i,j) - K(i,i) - K(j,j); 119 | if (eta >= 0), 120 | % continue to next i. 121 | continue; 122 | end 123 | 124 | % Compute and clip new value for alpha j using (12) and (15). 125 | alphas(j) = alphas(j) - (Y(j) * (E(i) - E(j))) / eta; 126 | 127 | % Clip 128 | alphas(j) = min (H, alphas(j)); 129 | alphas(j) = max (L, alphas(j)); 130 | 131 | % Check if change in alpha is significant 132 | if (abs(alphas(j) - alpha_j_old) < tol), 133 | % continue to next i. 134 | % replace anyway 135 | alphas(j) = alpha_j_old; 136 | continue; 137 | end 138 | 139 | % Determine value for alpha i using (16). 140 | alphas(i) = alphas(i) + Y(i)*Y(j)*(alpha_j_old - alphas(j)); 141 | 142 | % Compute b1 and b2 using (17) and (18) respectively. 143 | b1 = b - E(i) ... 144 | - Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ... 145 | - Y(j) * (alphas(j) - alpha_j_old) * K(i,j)'; 146 | b2 = b - E(j) ... 147 | - Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ... 148 | - Y(j) * (alphas(j) - alpha_j_old) * K(j,j)'; 149 | 150 | % Compute b by (19). 151 | if (0 < alphas(i) && alphas(i) < C), 152 | b = b1; 153 | elseif (0 < alphas(j) && alphas(j) < C), 154 | b = b2; 155 | else 156 | b = (b1+b2)/2; 157 | end 158 | 159 | num_changed_alphas = num_changed_alphas + 1; 160 | 161 | end 162 | 163 | end 164 | 165 | if (num_changed_alphas == 0), 166 | passes = passes + 1; 167 | else 168 | passes = 0; 169 | end 170 | 171 | fprintf('.'); 172 | dots = dots + 1; 173 | if dots > 78 174 | dots = 0; 175 | fprintf('\n'); 176 | end 177 | if exist('OCTAVE_VERSION') 178 | fflush(stdout); 179 | end 180 | end 181 | fprintf(' Done! \n\n'); 182 | 183 | % Save the model 184 | idx = alphas > 0; 185 | model.X= X(idx,:); 186 | model.y= Y(idx); 187 | model.kernelFunction = kernelFunction; 188 | model.b= b; 189 | model.alphas= alphas(idx); 190 | model.w = ((alphas.*Y)'*X)'; 191 | 192 | end 193 | -------------------------------------------------------------------------------- /ex6/visualizeBoundary.m: -------------------------------------------------------------------------------- 1 | function visualizeBoundary(X, y, model, varargin) 2 | %VISUALIZEBOUNDARY plots a non-linear decision boundary learned by the SVM 3 | % VISUALIZEBOUNDARYLINEAR(X, y, model) plots a non-linear decision 4 | % boundary learned by the SVM and overlays the data on it 5 | 6 | % Plot the training data on top of the boundary 7 | plotData(X, y) 8 | 9 | % Make classification predictions over a grid of values 10 | x1plot = linspace(min(X(:,1)), max(X(:,1)), 100)'; 11 | x2plot = linspace(min(X(:,2)), max(X(:,2)), 100)'; 12 | [X1, X2] = meshgrid(x1plot, x2plot); 13 | vals = zeros(size(X1)); 14 | for i = 1:size(X1, 2) 15 | this_X = [X1(:, i), X2(:, i)]; 16 | vals(:, i) = svmPredict(model, this_X); 17 | end 18 | 19 | % Plot the SVM boundary 20 | hold on 21 | contour(X1, X2, vals, [0 0], 'Color', 'b'); 22 | hold off; 23 | 24 | end 25 | -------------------------------------------------------------------------------- /ex6/visualizeBoundaryLinear.m: -------------------------------------------------------------------------------- 1 | function visualizeBoundaryLinear(X, y, model) 2 | %VISUALIZEBOUNDARYLINEAR plots a linear decision boundary learned by the 3 | %SVM 4 | % VISUALIZEBOUNDARYLINEAR(X, y, model) plots a linear decision boundary 5 | % learned by the SVM and overlays the data on it 6 | 7 | w = model.w; 8 | b = model.b; 9 | xp = linspace(min(X(:,1)), max(X(:,1)), 100); 10 | yp = - (w(1)*xp + b)/w(2); 11 | plotData(X, y); 12 | hold on; 13 | plot(xp, yp, '-b'); 14 | hold off 15 | 16 | end 17 | -------------------------------------------------------------------------------- /ex7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7.pdf -------------------------------------------------------------------------------- /ex7/bird_small.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7/bird_small.mat -------------------------------------------------------------------------------- /ex7/bird_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7/bird_small.png -------------------------------------------------------------------------------- /ex7/computeCentroids.m: -------------------------------------------------------------------------------- 1 | function centroids = computeCentroids(X, idx, K) 2 | %COMPUTECENTROIDS returs the new centroids by computing the means of the 3 | %data points assigned to each centroid. 4 | % centroids = COMPUTECENTROIDS(X, idx, K) returns the new centroids by 5 | % computing the means of the data points assigned to each centroid. It is 6 | % given a dataset X where each row is a single data point, a vector 7 | % idx of centroid assignments (i.e. each entry in range [1..K]) for each 8 | % example, and K, the number of centroids. You should return a matrix 9 | % centroids, where each row of centroids is the mean of the data points 10 | % assigned to it. 11 | % 12 | 13 | % Useful variables 14 | [m n] = size(X); 15 | 16 | % You need to return the following variables correctly. 17 | centroids = zeros(K, n); 18 | 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: Go over every centroid and compute mean of all points that 22 | % belong to it. Concretely, the row vector centroids(i, :) 23 | % should contain the mean of the data points assigned to 24 | % centroid i. 25 | % 26 | % Note: You can use a for-loop over the centroids to compute this. 27 | % 28 | 29 | % TODO(SaveTheRbtz@): See if it can be futher vectorized 30 | for k = 1:K 31 | point_indeces = find(idx==k); 32 | centroids(k, :) = sum(X(point_indeces, :)) ./ length(point_indeces); 33 | 34 | % ============================================================= 35 | 36 | 37 | end 38 | 39 | -------------------------------------------------------------------------------- /ex7/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /ex7/drawLine.m: -------------------------------------------------------------------------------- 1 | function drawLine(p1, p2, varargin) 2 | %DRAWLINE Draws a line from point p1 to point p2 3 | % DRAWLINE(p1, p2) Draws a line from point p1 to point p2 and holds the 4 | % current figure 5 | 6 | plot([p1(1) p2(1)], [p1(2) p2(2)], varargin{:}); 7 | 8 | end -------------------------------------------------------------------------------- /ex7/ex7.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class 3 | % Exercise 7 | Principle Component Analysis and K-Means Clustering 4 | % 5 | % Instructions 6 | % ------------ 7 | % 8 | % This file contains code that helps you get started on the 9 | % exercise. You will need to complete the following functions: 10 | % 11 | % pca.m 12 | % projectData.m 13 | % recoverData.m 14 | % computeCentroids.m 15 | % findClosestCentroids.m 16 | % kMeansInitCentroids.m 17 | % 18 | % For this exercise, you will not need to change any code in this file, 19 | % or any other files other than those mentioned above. 20 | % 21 | 22 | %% Initialization 23 | clear ; close all; clc 24 | 25 | %% ================= Part 1: Find Closest Centroids ==================== 26 | % To help you implement K-Means, we have divided the learning algorithm 27 | % into two functions -- findClosestCentroids and computeCentroids. In this 28 | % part, you shoudl complete the code in the findClosestCentroids function. 29 | % 30 | fprintf('Finding closest centroids.\n\n'); 31 | 32 | % Load an example dataset that we will be using 33 | load('ex7data2.mat'); 34 | 35 | % Select an initial set of centroids 36 | K = 3; % 3 Centroids 37 | initial_centroids = [3 3; 6 2; 8 5]; 38 | 39 | % Find the closest centroids for the examples using the 40 | % initial_centroids 41 | idx = findClosestCentroids(X, initial_centroids); 42 | 43 | fprintf('Closest centroids for the first 3 examples: \n') 44 | fprintf(' %d', idx(1:3)); 45 | fprintf('\n(the closest centroids should be 1, 3, 2 respectively)\n'); 46 | 47 | fprintf('Program paused. Press enter to continue.\n'); 48 | pause; 49 | 50 | %% ===================== Part 2: Compute Means ========================= 51 | % After implementing the closest centroids function, you should now 52 | % complete the computeCentroids function. 53 | % 54 | fprintf('\nComputing centroids means.\n\n'); 55 | 56 | % Compute means based on the closest centroids found in the previous part. 57 | centroids = computeCentroids(X, idx, K); 58 | 59 | fprintf('Centroids computed after initial finding of closest centroids: \n') 60 | fprintf(' %f %f \n' , centroids'); 61 | fprintf('\n(the centroids should be\n'); 62 | fprintf(' [ 2.428301 3.157924 ]\n'); 63 | fprintf(' [ 5.813503 2.633656 ]\n'); 64 | fprintf(' [ 7.119387 3.616684 ]\n\n'); 65 | 66 | fprintf('Program paused. Press enter to continue.\n'); 67 | pause; 68 | 69 | 70 | %% =================== Part 3: K-Means Clustering ====================== 71 | % After you have completed the two functions computeCentroids and 72 | % findClosestCentroids, you have all the necessary pieces to run the 73 | % kMeans algorithm. In this part, you will run the K-Means algorithm on 74 | % the example dataset we have provided. 75 | % 76 | fprintf('\nRunning K-Means clustering on example dataset.\n\n'); 77 | 78 | % Load an example dataset 79 | load('ex7data2.mat'); 80 | 81 | % Settings for running K-Means 82 | K = 3; 83 | max_iters = 10; 84 | 85 | % For consistency, here we set centroids to specific values 86 | % but in practice you want to generate them automatically, such as by 87 | % settings them to be random examples (as can be seen in 88 | % kMeansInitCentroids). 89 | initial_centroids = [3 3; 6 2; 8 5]; 90 | 91 | % Run K-Means algorithm. The 'true' at the end tells our function to plot 92 | % the progress of K-Means 93 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters, true); 94 | fprintf('\nK-Means Done.\n\n'); 95 | 96 | fprintf('Program paused. Press enter to continue.\n'); 97 | pause; 98 | 99 | %% ============= Part 4: K-Means Clustering on Pixels =============== 100 | % In this exercise, you will use K-Means to compress an image. To do this, 101 | % you will first run K-Means on the colors of the pixels in the image and 102 | % then you will map each pixel on to it's closest centroid. 103 | % 104 | % You should now complete the code in kMeansInitCentroids.m 105 | % 106 | 107 | fprintf('\nRunning K-Means clustering on pixels from an image.\n\n'); 108 | 109 | % Load an image of a bird 110 | A = double(imread('bird_small.png')); 111 | 112 | % If imread does not work for you, you can try instead 113 | % load ('bird_small.mat'); 114 | 115 | A = A / 255; % Divide by 255 so that all values are in the range 0 - 1 116 | 117 | % Size of the image 118 | img_size = size(A); 119 | 120 | % Reshape the image into an Nx3 matrix where N = number of pixels. 121 | % Each row will contain the Red, Green and Blue pixel values 122 | % This gives us our dataset matrix X that we will use K-Means on. 123 | X = reshape(A, img_size(1) * img_size(2), 3); 124 | 125 | % Run your K-Means algorithm on this data 126 | % You should try different values of K and max_iters here 127 | K = 16; 128 | max_iters = 10; 129 | 130 | % When using K-Means, it is important the initialize the centroids 131 | % randomly. 132 | % You should complete the code in kMeansInitCentroids.m before proceeding 133 | initial_centroids = kMeansInitCentroids(X, K); 134 | 135 | % Run K-Means 136 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters); 137 | 138 | fprintf('Program paused. Press enter to continue.\n'); 139 | pause; 140 | 141 | 142 | %% ================= Part 5: Image Compression ====================== 143 | % In this part of the exercise, you will use the clusters of K-Means to 144 | % compress an image. To do this, we first find the closest clusters for 145 | % each example. After that, we 146 | 147 | fprintf('\nApplying K-Means to compress an image.\n\n'); 148 | 149 | % Find closest cluster members 150 | idx = findClosestCentroids(X, centroids); 151 | 152 | % Essentially, now we have represented the image X as in terms of the 153 | % indices in idx. 154 | 155 | % We can now recover the image from the indices (idx) by mapping each pixel 156 | % (specified by it's index in idx) to the centroid value 157 | X_recovered = centroids(idx,:); 158 | 159 | % Reshape the recovered image into proper dimensions 160 | X_recovered = reshape(X_recovered, img_size(1), img_size(2), 3); 161 | 162 | % Display the original image 163 | subplot(1, 2, 1); 164 | imagesc(A); 165 | title('Original'); 166 | 167 | % Display compressed image side by side 168 | subplot(1, 2, 2); 169 | imagesc(X_recovered) 170 | title(sprintf('Compressed, with %d colors.', K)); 171 | 172 | 173 | fprintf('Program paused. Press enter to continue.\n'); 174 | pause; 175 | 176 | -------------------------------------------------------------------------------- /ex7/ex7_pca.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class 3 | % Exercise 7 | Principle Component Analysis and K-Means Clustering 4 | % 5 | % Instructions 6 | % ------------ 7 | % 8 | % This file contains code that helps you get started on the 9 | % exercise. You will need to complete the following functions: 10 | % 11 | % pca.m 12 | % projectData.m 13 | % recoverData.m 14 | % computeCentroids.m 15 | % findClosestCentroids.m 16 | % kMeansInitCentroids.m 17 | % 18 | % For this exercise, you will not need to change any code in this file, 19 | % or any other files other than those mentioned above. 20 | % 21 | 22 | %% Initialization 23 | clear ; close all; clc 24 | 25 | %% ================== Part 1: Load Example Dataset =================== 26 | % We start this exercise by using a small dataset that is easily to 27 | % visualize 28 | % 29 | fprintf('Visualizing example dataset for PCA.\n\n'); 30 | 31 | % The following command loads the dataset. You should now have the 32 | % variable X in your environment 33 | load ('ex7data1.mat'); 34 | 35 | % Visualize the example dataset 36 | plot(X(:, 1), X(:, 2), 'bo'); 37 | axis([0.5 6.5 2 8]); axis square; 38 | 39 | fprintf('Program paused. Press enter to continue.\n'); 40 | pause; 41 | 42 | 43 | %% =============== Part 2: Principal Component Analysis =============== 44 | % You should now implement PCA, a dimension reduction technique. You 45 | % should complete the code in pca.m 46 | % 47 | fprintf('\nRunning PCA on example dataset.\n\n'); 48 | 49 | % Before running PCA, it is important to first normalize X 50 | [X_norm, mu, sigma] = featureNormalize(X); 51 | 52 | % Run PCA 53 | [U, S] = pca(X_norm); 54 | 55 | % Compute mu, the mean of the each feature 56 | 57 | % Draw the eigenvectors centered at mean of data. These lines show the 58 | % directions of maximum variations in the dataset. 59 | hold on; 60 | drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2); 61 | drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2); 62 | hold off; 63 | 64 | fprintf('Top eigenvector: \n'); 65 | fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1)); 66 | fprintf('\n(you should expect to see -0.707107 -0.707107)\n'); 67 | 68 | fprintf('Program paused. Press enter to continue.\n'); 69 | pause; 70 | 71 | 72 | %% =================== Part 3: Dimension Reduction =================== 73 | % You should now implement the projection step to map the data onto the 74 | % first k eigenvectors. The code will then plot the data in this reduced 75 | % dimensional space. This will show you what the data looks like when 76 | % using only the corresponding eigenvectors to reconstruct it. 77 | % 78 | % You should complete the code in projectData.m 79 | % 80 | fprintf('\nDimension reduction on example dataset.\n\n'); 81 | 82 | % Plot the normalized dataset (returned from pca) 83 | plot(X_norm(:, 1), X_norm(:, 2), 'bo'); 84 | axis([-4 3 -4 3]); axis square 85 | 86 | % Project the data onto K = 1 dimension 87 | K = 1; 88 | Z = projectData(X_norm, U, K); 89 | fprintf('Projection of the first example: %f\n', Z(1)); 90 | fprintf('\n(this value should be about 1.481274)\n\n'); 91 | 92 | X_rec = recoverData(Z, U, K); 93 | fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2)); 94 | fprintf('\n(this value should be about -1.047419 -1.047419)\n\n'); 95 | 96 | % Draw lines connecting the projected points to the original points 97 | hold on; 98 | plot(X_rec(:, 1), X_rec(:, 2), 'ro'); 99 | for i = 1:size(X_norm, 1) 100 | drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1); 101 | end 102 | hold off 103 | 104 | fprintf('Program paused. Press enter to continue.\n'); 105 | pause; 106 | 107 | %% =============== Part 4: Loading and Visualizing Face Data ============= 108 | % We start the exercise by first loading and visualizing the dataset. 109 | % The following code will load the dataset into your environment 110 | % 111 | fprintf('\nLoading face dataset.\n\n'); 112 | 113 | % Load Face dataset 114 | load ('ex7faces.mat') 115 | 116 | % Display the first 100 faces in the dataset 117 | displayData(X(1:100, :)); 118 | 119 | fprintf('Program paused. Press enter to continue.\n'); 120 | pause; 121 | 122 | %% =========== Part 5: PCA on Face Data: Eigenfaces =================== 123 | % Run PCA and visualize the eigenvectors which are in this case eigenfaces 124 | % We display the first 36 eigenfaces. 125 | % 126 | fprintf(['\nRunning PCA on face dataset.\n' ... 127 | '(this mght take a minute or two ...)\n\n']); 128 | 129 | % Before running PCA, it is important to first normalize X by subtracting 130 | % the mean value from each feature 131 | [X_norm, mu, sigma] = featureNormalize(X); 132 | 133 | % Run PCA 134 | [U, S] = pca(X_norm); 135 | 136 | % Visualize the top 36 eigenvectors found 137 | displayData(U(:, 1:36)'); 138 | 139 | fprintf('Program paused. Press enter to continue.\n'); 140 | pause; 141 | 142 | 143 | %% ============= Part 6: Dimension Reduction for Faces ================= 144 | % Project images to the eigen space using the top k eigenvectors 145 | % If you are applying a machine learning algorithm 146 | fprintf('\nDimension reduction for face dataset.\n\n'); 147 | 148 | K = 100; 149 | Z = projectData(X_norm, U, K); 150 | 151 | fprintf('The projected data Z has a size of: ') 152 | fprintf('%d ', size(Z)); 153 | 154 | fprintf('\n\nProgram paused. Press enter to continue.\n'); 155 | pause; 156 | 157 | %% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== 158 | % Project images to the eigen space using the top K eigen vectors and 159 | % visualize only using those K dimensions 160 | % Compare to the original input, which is also displayed 161 | 162 | fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n'); 163 | 164 | K = 100; 165 | X_rec = recoverData(Z, U, K); 166 | 167 | % Display normalized data 168 | subplot(1, 2, 1); 169 | displayData(X_norm(1:100,:)); 170 | title('Original faces'); 171 | axis square; 172 | 173 | % Display reconstructed data from only k eigenfaces 174 | subplot(1, 2, 2); 175 | displayData(X_rec(1:100,:)); 176 | title('Recovered faces'); 177 | axis square; 178 | 179 | fprintf('Program paused. Press enter to continue.\n'); 180 | pause; 181 | 182 | 183 | %% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === 184 | % One useful application of PCA is to use it to visualize high-dimensional 185 | % data. In the last K-Means exercise you ran K-Means on 3-dimensional 186 | % pixel colors of an image. We first visualize this output in 3D, and then 187 | % apply PCA to obtain a visualization in 2D. 188 | 189 | close all; close all; clc 190 | 191 | % Re-load the image from the previous exercise and run K-Means on it 192 | % For this to work, you need to complete the K-Means assignment first 193 | A = double(imread('bird_small.png')); 194 | 195 | % If imread does not work for you, you can try instead 196 | % load ('bird_small.mat'); 197 | 198 | A = A / 255; 199 | img_size = size(A); 200 | X = reshape(A, img_size(1) * img_size(2), 3); 201 | K = 16; 202 | max_iters = 10; 203 | initial_centroids = kMeansInitCentroids(X, K); 204 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters); 205 | 206 | % Sample 1000 random indexes (since working with all the data is 207 | % too expensive. If you have a fast computer, you may increase this. 208 | sel = floor(rand(1000, 1) * size(X, 1)) + 1; 209 | 210 | % Setup Color Palette 211 | palette = hsv(K); 212 | colors = palette(idx(sel), :); 213 | 214 | % Visualize the data and centroid memberships in 3D 215 | figure; 216 | scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors); 217 | title('Pixel dataset plotted in 3D. Color shows centroid memberships'); 218 | fprintf('Program paused. Press enter to continue.\n'); 219 | pause; 220 | 221 | %% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === 222 | % Use PCA to project this cloud to 2D for visualization 223 | 224 | % Subtract the mean to use PCA 225 | [X_norm, mu, sigma] = featureNormalize(X); 226 | 227 | % PCA and project the data to 2D 228 | [U, S] = pca(X_norm); 229 | Z = projectData(X_norm, U, 2); 230 | 231 | % Plot in 2D 232 | figure; 233 | plotDataPoints(Z(sel, :), idx(sel), K); 234 | title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction'); 235 | fprintf('Program paused. Press enter to continue.\n'); 236 | pause; 237 | -------------------------------------------------------------------------------- /ex7/ex7data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7/ex7data1.mat -------------------------------------------------------------------------------- /ex7/ex7data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7/ex7data2.mat -------------------------------------------------------------------------------- /ex7/ex7faces.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex7/ex7faces.mat -------------------------------------------------------------------------------- /ex7/featureNormalize.m: -------------------------------------------------------------------------------- 1 | ../ex1/featureNormalize.m -------------------------------------------------------------------------------- /ex7/findClosestCentroids.m: -------------------------------------------------------------------------------- 1 | function idx = findClosestCentroids(X, centroids) 2 | %FINDCLOSESTCENTROIDS computes the centroid memberships for every example 3 | % idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids 4 | % in idx for a dataset X where each row is a single example. idx = m x 1 5 | % vector of centroid assignments (i.e. each entry in range [1..K]) 6 | % 7 | 8 | % Set K 9 | K = size(centroids, 1); 10 | 11 | % You need to return the following variables correctly. 12 | idx = zeros(size(X,1), 1); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Go over every example, find its closest centroid, and store 16 | % the index inside idx at the appropriate location. 17 | % Concretely, idx(i) should contain the index of the centroid 18 | % closest to example i. Hence, it should be a value in the 19 | % range 1..K 20 | % 21 | % Note: You can use a for-loop over the examples to compute this. 22 | % 23 | 24 | % TODO(SaveTheRbtz@): Think how it can be vectorized 25 | for i = 1:length(idx) 26 | distance = zeros(K, 1); 27 | for j = 1:K 28 | % TODO(SaveTheRbtz@): Can be vectorized as diff * diff' 29 | distance(j) = sum(sum((X(i, :) - centroids(j, :)) .^ 2 )); 30 | endfor 31 | [value, idx(i)] = min(distance); 32 | endfor 33 | 34 | % ============================================================= 35 | 36 | end 37 | 38 | -------------------------------------------------------------------------------- /ex7/kMeansInitCentroids.m: -------------------------------------------------------------------------------- 1 | function centroids = kMeansInitCentroids(X, K) 2 | %KMEANSINITCENTROIDS This function initializes K centroids that are to be 3 | %used in K-Means on the dataset X 4 | % centroids = KMEANSINITCENTROIDS(X, K) returns K initial centroids to be 5 | % used with the K-Means on the dataset X 6 | % 7 | 8 | % You should return this values correctly 9 | centroids = zeros(K, size(X, 2)); 10 | 11 | % ====================== YOUR CODE HERE ====================== 12 | % Instructions: You should set centroids to randomly chosen examples from 13 | % the dataset X 14 | % 15 | 16 | % Initialize the centroids to be random examples 17 | % Randomly reorder the indices of examples 18 | randidx = randperm(size(X, 1)); 19 | % Take the first K examples as centroids 20 | centroids = X(randidx(1:K), :); 21 | 22 | % ============================================================= 23 | 24 | end 25 | 26 | -------------------------------------------------------------------------------- /ex7/pca.m: -------------------------------------------------------------------------------- 1 | function [U, S] = pca(X) 2 | %PCA Run principal component analysis on the dataset X 3 | % [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X 4 | % Returns the eigenvectors U, the eigenvalues (on diagonal) in S 5 | % 6 | 7 | % Useful values 8 | [m, n] = size(X); 9 | 10 | % You need to return the following variables correctly. 11 | U = zeros(n); 12 | S = zeros(n); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: You should first compute the covariance matrix. Then, you 16 | % should use the "svd" function to compute the eigenvectors 17 | % and eigenvalues of the covariance matrix. 18 | % 19 | % Note: When computing the covariance matrix, remember to divide by m (the 20 | % number of examples). 21 | % 22 | 23 | Sigma = (X'*X) ./ m; 24 | [U, S, V] = svd(Sigma); 25 | 26 | 27 | % ========================================================================= 28 | 29 | end 30 | -------------------------------------------------------------------------------- /ex7/plotDataPoints.m: -------------------------------------------------------------------------------- 1 | function plotDataPoints(X, idx, K) 2 | %PLOTDATAPOINTS plots data points in X, coloring them so that those with the same 3 | %index assignments in idx have the same color 4 | % PLOTDATAPOINTS(X, idx, K) plots data points in X, coloring them so that those 5 | % with the same index assignments in idx have the same color 6 | 7 | % Create palette 8 | palette = hsv(K + 1); 9 | colors = palette(idx, :); 10 | 11 | % Plot the data 12 | scatter(X(:,1), X(:,2), 15, colors); 13 | 14 | end 15 | -------------------------------------------------------------------------------- /ex7/plotProgresskMeans.m: -------------------------------------------------------------------------------- 1 | function plotProgresskMeans(X, centroids, previous, idx, K, i) 2 | %PLOTPROGRESSKMEANS is a helper function that displays the progress of 3 | %k-Means as it is running. It is intended for use only with 2D data. 4 | % PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data 5 | % points with colors assigned to each centroid. With the previous 6 | % centroids, it also plots a line between the previous locations and 7 | % current locations of the centroids. 8 | % 9 | 10 | % Plot the examples 11 | plotDataPoints(X, idx, K); 12 | 13 | % Plot the centroids as black x's 14 | plot(centroids(:,1), centroids(:,2), 'x', ... 15 | 'MarkerEdgeColor','k', ... 16 | 'MarkerSize', 10, 'LineWidth', 3); 17 | 18 | % Plot the history of the centroids with lines 19 | for j=1:size(centroids,1) 20 | drawLine(centroids(j, :), previous(j, :)); 21 | end 22 | 23 | % Title 24 | title(sprintf('Iteration number %d', i)) 25 | 26 | end 27 | 28 | -------------------------------------------------------------------------------- /ex7/projectData.m: -------------------------------------------------------------------------------- 1 | function Z = projectData(X, U, K) 2 | %PROJECTDATA Computes the reduced data representation when projecting only 3 | %on to the top k eigenvectors 4 | % Z = projectData(X, U, K) computes the projection of 5 | % the normalized inputs X into the reduced dimensional space spanned by 6 | % the first K columns of U. It returns the projected examples in Z. 7 | % 8 | 9 | % You need to return the following variables correctly. 10 | Z = zeros(size(X, 1), K); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the projection of the data using only the top K 14 | % eigenvectors in U (first K columns). 15 | % For the i-th example X(i,:), the projection on to the k-th 16 | % eigenvector is given as follows: 17 | % x = X(i, :)'; 18 | % projection_k = x' * U(:, k); 19 | % 20 | 21 | U_reduce = U(:, 1:K); 22 | Z = X * U_reduce; 23 | 24 | % ============================================================= 25 | 26 | end 27 | -------------------------------------------------------------------------------- /ex7/recoverData.m: -------------------------------------------------------------------------------- 1 | function X_rec = recoverData(Z, U, K) 2 | %RECOVERDATA Recovers an approximation of the original data when using the 3 | %projected data 4 | % X_rec = RECOVERDATA(Z, U, K) recovers an approximation the 5 | % original data that has been reduced to K dimensions. It returns the 6 | % approximate reconstruction in X_rec. 7 | % 8 | 9 | % You need to return the following variables correctly. 10 | X_rec = zeros(size(Z, 1), size(U, 1)); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the approximation of the data by projecting back 14 | % onto the original space using the top K eigenvectors in U. 15 | % 16 | % For the i-th example Z(i,:), the (approximate) 17 | % recovered data for dimension j is given as follows: 18 | % v = Z(i, :)'; 19 | % recovered_j = v' * U(j, 1:K)'; 20 | % 21 | % Notice that U(j, 1:K) is a row vector. 22 | % 23 | 24 | U_reduce = U(:, 1:K); 25 | X_rec = Z * U_reduce'; 26 | 27 | % ============================================================= 28 | 29 | end 30 | -------------------------------------------------------------------------------- /ex7/runkMeans.m: -------------------------------------------------------------------------------- 1 | function [centroids, idx] = runkMeans(X, initial_centroids, ... 2 | max_iters, plot_progress) 3 | %RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X 4 | %is a single example 5 | % [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... 6 | % plot_progress) runs the K-Means algorithm on data matrix X, where each 7 | % row of X is a single example. It uses initial_centroids used as the 8 | % initial centroids. max_iters specifies the total number of interactions 9 | % of K-Means to execute. plot_progress is a true/false flag that 10 | % indicates if the function should also plot its progress as the 11 | % learning happens. This is set to false by default. runkMeans returns 12 | % centroids, a Kxn matrix of the computed centroids and idx, a m x 1 13 | % vector of centroid assignments (i.e. each entry in range [1..K]) 14 | % 15 | 16 | % Set default value for plot progress 17 | if ~exist('plot_progress', 'var') || isempty(plot_progress) 18 | plot_progress = false; 19 | end 20 | 21 | % Plot the data if we are plotting progress 22 | if plot_progress 23 | figure; 24 | hold on; 25 | end 26 | 27 | % Initialize values 28 | [m n] = size(X); 29 | K = size(initial_centroids, 1); 30 | centroids = initial_centroids; 31 | previous_centroids = centroids; 32 | idx = zeros(m, 1); 33 | 34 | % Run K-Means 35 | for i=1:max_iters 36 | 37 | % Output progress 38 | fprintf('K-Means iteration %d/%d...\n', i, max_iters); 39 | if exist('OCTAVE_VERSION') 40 | fflush(stdout); 41 | end 42 | 43 | % For each example in X, assign it to the closest centroid 44 | idx = findClosestCentroids(X, centroids); 45 | 46 | % Optionally, plot progress here 47 | if plot_progress 48 | plotProgresskMeans(X, centroids, previous_centroids, idx, K, i); 49 | previous_centroids = centroids; 50 | fprintf('Press enter to continue.\n'); 51 | pause; 52 | end 53 | 54 | % Given the memberships, compute new centroids 55 | centroids = computeCentroids(X, idx, K); 56 | end 57 | 58 | % Hold off if we are plotting progress 59 | if plot_progress 60 | hold off; 61 | end 62 | 63 | end 64 | 65 | -------------------------------------------------------------------------------- /ex8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8.pdf -------------------------------------------------------------------------------- /ex8/checkCostFunction.m: -------------------------------------------------------------------------------- 1 | function checkCostFunction(lambda) 2 | %CHECKCOSTFUNCTION Creates a collaborative filering problem 3 | %to check your cost function and gradients 4 | % CHECKCOSTFUNCTION(lambda) Creates a collaborative filering problem 5 | % to check your cost function and gradients, it will output the 6 | % analytical gradients produced by your code and the numerical gradients 7 | % (computed using computeNumericalGradient). These two gradient 8 | % computations should result in very similar values. 9 | 10 | % Set lambda 11 | if ~exist('lambda', 'var') || isempty(lambda) 12 | lambda = 0; 13 | end 14 | 15 | %% Create small problem 16 | X_t = rand(4, 3); 17 | Theta_t = rand(5, 3); 18 | 19 | % Zap out most entries 20 | Y = X_t * Theta_t'; 21 | Y(rand(size(Y)) > 0.5) = 0; 22 | R = zeros(size(Y)); 23 | R(Y ~= 0) = 1; 24 | 25 | %% Run Gradient Checking 26 | X = randn(size(X_t)); 27 | Theta = randn(size(Theta_t)); 28 | num_users = size(Y, 2); 29 | num_movies = size(Y, 1); 30 | num_features = size(Theta_t, 2); 31 | 32 | numgrad = computeNumericalGradient( ... 33 | @(t) cofiCostFunc(t, Y, R, num_users, num_movies, ... 34 | num_features, lambda), [X(:); Theta(:)]); 35 | 36 | [cost, grad] = cofiCostFunc([X(:); Theta(:)], Y, R, num_users, ... 37 | num_movies, num_features, lambda); 38 | 39 | disp([numgrad grad]); 40 | fprintf(['The above two columns you get should be very similar.\n' ... 41 | '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']); 42 | 43 | diff = norm(numgrad-grad)/norm(numgrad+grad); 44 | fprintf(['If your backpropagation implementation is correct, then \n' ... 45 | 'the relative difference will be small (less than 1e-9). \n' ... 46 | '\nRelative Difference: %g\n'], diff); 47 | 48 | end -------------------------------------------------------------------------------- /ex8/cofiCostFunc.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ... 2 | num_features, lambda) 3 | %COFICOSTFUNC Collaborative filtering cost function 4 | % [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ... 5 | % num_features, lambda) returns the cost and gradient for the 6 | % collaborative filtering problem. 7 | % 8 | 9 | % Unfold the U and W matrices from params 10 | X = reshape(params(1:num_movies*num_features), num_movies, num_features); 11 | Theta = reshape(params(num_movies*num_features+1:end), ... 12 | num_users, num_features); 13 | 14 | 15 | % You need to return the following values correctly 16 | J = 0; 17 | X_grad = zeros(size(X)); 18 | Theta_grad = zeros(size(Theta)); 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: Compute the cost function and gradient for collaborative 22 | % filtering. Concretely, you should first implement the cost 23 | % function (without regularization) and make sure it is 24 | % matches our costs. After that, you should implement the 25 | % gradient and use the checkCostFunction routine to check 26 | % that the gradient is correct. Finally, you should implement 27 | % regularization. 28 | % 29 | % Notes: X - num_movies x num_features matrix of movie features 30 | % Theta - num_users x num_features matrix of user features 31 | % Y - num_movies x num_users matrix of user ratings of movies 32 | % R - num_movies x num_users matrix, where R(i, j) = 1 if the 33 | % i-th movie was rated by the j-th user 34 | % 35 | % You should set the following variables correctly: 36 | % 37 | % X_grad - num_movies x num_features matrix, containing the 38 | % partial derivatives w.r.t. to each element of X 39 | % Theta_grad - num_users x num_features matrix, containing the 40 | % partial derivatives w.r.t. to each element of Theta 41 | % 42 | 43 | % FIXME(SaveTheRbtz@): Not optical: preforms calculations on cells with R(i,j) == 0 44 | J = sum(sum((R==1) .* ((X * Theta' - Y) .^ 2))) / 2; 45 | 46 | X_grad = (R==1) .* (X * Theta' - Y) * Theta + lambda * X; 47 | Theta_grad = (R==1)' .* (X * Theta' - Y)' * X + lambda * Theta; 48 | 49 | Regularization = lambda * (sum(sum(Theta .^ 2)) + sum(sum(X .^ 2))) / 2; 50 | J += Regularization; 51 | 52 | % ============================================================= 53 | 54 | grad = [X_grad(:); Theta_grad(:)]; 55 | 56 | end 57 | -------------------------------------------------------------------------------- /ex8/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" 3 | %and gives us a numerical estimate of the gradient. 4 | % numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical 5 | % gradient of the function J around theta. Calling y = J(theta) should 6 | % return the function value at theta. 7 | 8 | % Notes: The following code implements numerical gradient checking, and 9 | % returns the numerical gradient.It sets numgrad(i) to (a numerical 10 | % approximation of) the partial derivative of J with respect to the 11 | % i-th input argument, evaluated at theta. (i.e., numgrad(i) should 12 | % be the (approximately) the partial derivative of J with respect 13 | % to theta(i).) 14 | % 15 | 16 | numgrad = zeros(size(theta)); 17 | perturb = zeros(size(theta)); 18 | e = 1e-4; 19 | for p = 1:numel(theta) 20 | % Set perturbation vector 21 | perturb(p) = e; 22 | loss1 = J(theta - perturb); 23 | loss2 = J(theta + perturb); 24 | % Compute Numerical Gradient 25 | numgrad(p) = (loss2 - loss1) / (2*e); 26 | perturb(p) = 0; 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /ex8/estimateGaussian.m: -------------------------------------------------------------------------------- 1 | function [mu sigma2] = estimateGaussian(X) 2 | %ESTIMATEGAUSSIAN This function estimates the parameters of a 3 | %Gaussian distribution using the data in X 4 | % [mu sigma2] = estimateGaussian(X), 5 | % The input X is the dataset with each n-dimensional data point in one row 6 | % The output is an n-dimensional vector mu, the mean of the data set 7 | % and the variances sigma^2, an n x 1 vector 8 | % 9 | 10 | % Useful variables 11 | [m, n] = size(X); 12 | 13 | % You should return these values correctly 14 | mu = zeros(n, 1); 15 | sigma2 = zeros(n, 1); 16 | 17 | % ====================== YOUR CODE HERE ====================== 18 | % Instructions: Compute the mean of the data and the variances 19 | % In particular, mu(i) should contain the mean of 20 | % the data for the i-th feature and sigma2(i) 21 | % should contain variance of the i-th feature. 22 | % 23 | 24 | mu = mean(X); 25 | sigma2 = var(X, 1); 26 | 27 | % ============================================================= 28 | 29 | 30 | end 31 | -------------------------------------------------------------------------------- /ex8/ex8.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class 3 | % Exercise 8 | Anomaly Detection and Collaborative Filtering 4 | % 5 | % Instructions 6 | % ------------ 7 | % 8 | % This file contains code that helps you get started on the 9 | % exercise. You will need to complete the following functions: 10 | % 11 | % estimateGaussian.m 12 | % selectThreshold.m 13 | % cofiCostFunc.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% ================== Part 1: Load Example Dataset =================== 23 | % We start this exercise by using a small dataset that is easy to 24 | % visualize. 25 | % 26 | % Our example case consists of 2 network server statistics across 27 | % several machines: the latency and throughput of each machine. 28 | % This exercise will help us find possibly faulty (or very fast) machines. 29 | % 30 | 31 | fprintf('Visualizing example dataset for outlier detection.\n\n'); 32 | 33 | % The following command loads the dataset. You should now have the 34 | % variables X, Xval, yval in your environment 35 | load('ex8data1.mat'); 36 | 37 | % Visualize the example dataset 38 | plot(X(:, 1), X(:, 2), 'bx'); 39 | axis([0 30 0 30]); 40 | xlabel('Latency (ms)'); 41 | ylabel('Throughput (mb/s)'); 42 | 43 | fprintf('Program paused. Press enter to continue.\n'); 44 | pause 45 | 46 | 47 | %% ================== Part 2: Estimate the dataset statistics =================== 48 | % For this exercise, we assume a Gaussian distribution for the dataset. 49 | % 50 | % We first estimate the parameters of our assumed Gaussian distribution, 51 | % then compute the probabilities for each of the points and then visualize 52 | % both the overall distribution and where each of the points falls in 53 | % terms of that distribution. 54 | % 55 | fprintf('Visualizing Gaussian fit.\n\n'); 56 | 57 | % Estimate my and sigma2 58 | [mu sigma2] = estimateGaussian(X); 59 | 60 | % Returns the density of the multivariate normal at each data point (row) 61 | % of X 62 | p = multivariateGaussian(X, mu, sigma2); 63 | 64 | % Visualize the fit 65 | visualizeFit(X, mu, sigma2); 66 | xlabel('Latency (ms)'); 67 | ylabel('Throughput (mb/s)'); 68 | 69 | fprintf('Program paused. Press enter to continue.\n'); 70 | pause; 71 | 72 | %% ================== Part 3: Find Outliers =================== 73 | % Now you will find a good epsilon threshold using a cross-validation set 74 | % probabilities given the estimated Gaussian distribution 75 | % 76 | 77 | pval = multivariateGaussian(Xval, mu, sigma2); 78 | 79 | [epsilon F1] = selectThreshold(yval, pval); 80 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon); 81 | fprintf('Best F1 on Cross Validation Set: %f\n', F1); 82 | fprintf(' (you should see a value epsilon of about 8.99e-05)\n\n'); 83 | 84 | % Find the outliers in the training set and plot the 85 | outliers = find(p < epsilon); 86 | 87 | % Draw a red circle around those outliers 88 | hold on 89 | plot(X(outliers, 1), X(outliers, 2), 'ro', 'LineWidth', 2, 'MarkerSize', 10); 90 | hold off 91 | 92 | fprintf('Program paused. Press enter to continue.\n'); 93 | pause; 94 | 95 | %% ================== Part 4: Multidimensional Outliers =================== 96 | % We will now use the code from the previous part and apply it to a 97 | % harder problem in which more features describe each datapoint and only 98 | % some features indicate whether a point is an outlier. 99 | % 100 | 101 | % Loads the second dataset. You should now have the 102 | % variables X, Xval, yval in your environment 103 | load('ex8data2.mat'); 104 | 105 | % Apply the same steps to the larger dataset 106 | [mu sigma2] = estimateGaussian(X); 107 | 108 | % Training set 109 | p = multivariateGaussian(X, mu, sigma2); 110 | 111 | % Cross-validation set 112 | pval = multivariateGaussian(Xval, mu, sigma2); 113 | 114 | % Find the best threshold 115 | [epsilon F1] = selectThreshold(yval, pval); 116 | 117 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon); 118 | fprintf('Best F1 on Cross Validation Set: %f\n', F1); 119 | fprintf('# Outliers found: %d\n', sum(p < epsilon)); 120 | fprintf(' (you should see a value epsilon of about 1.38e-18)\n\n'); 121 | pause 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /ex8/ex8_cofi.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave 2 | %% Machine Learning Online Class 3 | % Exercise 8 | Anomaly Detection and Collaborative Filtering 4 | % 5 | % Instructions 6 | % ------------ 7 | % 8 | % This file contains code that helps you get started on the 9 | % exercise. You will need to complete the following functions: 10 | % 11 | % estimateGaussian.m 12 | % selectThreshold.m 13 | % cofiCostFunc.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% =============== Part 1: Loading movie ratings dataset ================ 20 | % You will start by loading the movie ratings dataset to understand the 21 | % structure of the data. 22 | % 23 | fprintf('Loading movie ratings dataset.\n\n'); 24 | 25 | % Load data 26 | load ('ex8_movies.mat'); 27 | 28 | % Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on 29 | % 943 users 30 | % 31 | % R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a 32 | % rating to movie i 33 | 34 | % From the matrix, we can compute statistics like average rating. 35 | fprintf('Average rating for movie 1 (Toy Story): %f / 5\n\n', ... 36 | mean(Y(1, R(1, :)))); 37 | 38 | % We can "visualize" the ratings matrix by plotting it with imagesc 39 | imagesc(Y); 40 | ylabel('Movies'); 41 | xlabel('Users'); 42 | 43 | fprintf('\nProgram paused. Press enter to continue.\n'); 44 | pause; 45 | 46 | %% ============ Part 2: Collaborative Filtering Cost Function =========== 47 | % You will now implement the cost function for collaborative filtering. 48 | % To help you debug your cost function, we have included set of weights 49 | % that we trained on that. Specifically, you should complete the code in 50 | % cofiCostFunc.m to return J. 51 | 52 | % Load pre-trained weights (X, Theta, num_users, num_movies, num_features) 53 | load ('ex8_movieParams.mat'); 54 | 55 | % Reduce the data set size so that this runs faster 56 | num_users = 4; num_movies = 5; num_features = 3; 57 | X = X(1:num_movies, 1:num_features); 58 | Theta = Theta(1:num_users, 1:num_features); 59 | Y = Y(1:num_movies, 1:num_users); 60 | R = R(1:num_movies, 1:num_users); 61 | 62 | % Evaluate cost function 63 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ... 64 | num_features, 0); 65 | 66 | fprintf(['Cost at loaded parameters: %f '... 67 | '\n(this value should be about 22.22)\n'], J); 68 | 69 | fprintf('\nProgram paused. Press enter to continue.\n'); 70 | pause; 71 | 72 | 73 | %% ============== Part 3: Collaborative Filtering Gradient ============== 74 | % Once your cost function matches up with ours, you should now implement 75 | % the collaborative filtering gradient function. Specifically, you should 76 | % complete the code in cofiCostFunc.m to return the grad argument. 77 | % 78 | fprintf('\nChecking Gradients (without regularization) ... \n'); 79 | 80 | % Check gradients by running checkNNGradients 81 | checkCostFunction; 82 | 83 | fprintf('\nProgram paused. Press enter to continue.\n'); 84 | pause; 85 | 86 | 87 | %% ========= Part 4: Collaborative Filtering Cost Regularization ======== 88 | % Now, you should implement regularization for the cost function for 89 | % collaborative filtering. You can implement it by adding the cost of 90 | % regularization to the original cost computation. 91 | % 92 | 93 | % Evaluate cost function 94 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ... 95 | num_features, 1.5); 96 | 97 | fprintf(['Cost at loaded parameters (lambda = 1.5): %f '... 98 | '\n(this value should be about 31.34)\n'], J); 99 | 100 | fprintf('\nProgram paused. Press enter to continue.\n'); 101 | pause; 102 | 103 | 104 | %% ======= Part 5: Collaborative Filtering Gradient Regularization ====== 105 | % Once your cost matches up with ours, you should proceed to implement 106 | % regularization for the gradient. 107 | % 108 | 109 | % 110 | fprintf('\nChecking Gradients (with regularization) ... \n'); 111 | 112 | % Check gradients by running checkNNGradients 113 | checkCostFunction(1.5); 114 | 115 | fprintf('\nProgram paused. Press enter to continue.\n'); 116 | pause; 117 | 118 | 119 | %% ============== Part 6: Entering ratings for a new user =============== 120 | % Before we will train the collaborative filtering model, we will first 121 | % add ratings that correspond to a new user that we just observed. This 122 | % part of the code will also allow you to put in your own ratings for the 123 | % movies in our dataset! 124 | % 125 | movieList = loadMovieList(); 126 | 127 | % Initialize my ratings 128 | my_ratings = zeros(1682, 1); 129 | 130 | % Check the file movie_idx.txt for id of each movie in our dataset 131 | % For example, Toy Story (1995) has ID 1, so to rate it "4", you can set 132 | my_ratings(1) = 4; 133 | 134 | % Or suppose did not enjoy Silence of the Lambs (1991), you can set 135 | my_ratings(98) = 2; 136 | 137 | % We have selected a few movies we liked / did not like and the ratings we 138 | % gave are as follows: 139 | my_ratings(7) = 3; 140 | my_ratings(12)= 5; 141 | my_ratings(54) = 4; 142 | my_ratings(64)= 5; 143 | my_ratings(66)= 3; 144 | my_ratings(69) = 5; 145 | my_ratings(183) = 4; 146 | my_ratings(226) = 5; 147 | my_ratings(355)= 5; 148 | 149 | fprintf('\n\nNew user ratings:\n'); 150 | for i = 1:length(my_ratings) 151 | if my_ratings(i) > 0 152 | fprintf('Rated %d for %s\n', my_ratings(i), ... 153 | movieList{i}); 154 | end 155 | end 156 | 157 | fprintf('\nProgram paused. Press enter to continue.\n'); 158 | pause; 159 | 160 | 161 | %% ================== Part 7: Learning Movie Ratings ==================== 162 | % Now, you will train the collaborative filtering model on a movie rating 163 | % dataset of 1682 movies and 943 users 164 | % 165 | 166 | fprintf('\nTraining collaborative filtering...\n'); 167 | 168 | % Load data 169 | load('ex8_movies.mat'); 170 | 171 | % Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by 172 | % 943 users 173 | % 174 | % R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a 175 | % rating to movie i 176 | 177 | % Add our own ratings to the data matrix 178 | Y = [my_ratings Y]; 179 | R = [(my_ratings ~= 0) R]; 180 | 181 | % Normalize Ratings 182 | [Ynorm, Ymean] = normalizeRatings(Y, R); 183 | 184 | % Useful Values 185 | num_users = size(Y, 2); 186 | num_movies = size(Y, 1); 187 | num_features = 10; 188 | 189 | % Set Initial Parameters (Theta, X) 190 | X = randn(num_movies, num_features); 191 | Theta = randn(num_users, num_features); 192 | 193 | initial_parameters = [X(:); Theta(:)]; 194 | 195 | % Set options for fmincg 196 | options = optimset('GradObj', 'on', 'MaxIter', 100); 197 | 198 | % Set Regularization 199 | lambda = 10; 200 | theta = fmincg (@(t)(cofiCostFunc(t, Y, R, num_users, num_movies, ... 201 | num_features, lambda)), ... 202 | initial_parameters, options); 203 | 204 | % Unfold the returned theta back into U and W 205 | X = reshape(theta(1:num_movies*num_features), num_movies, num_features); 206 | Theta = reshape(theta(num_movies*num_features+1:end), ... 207 | num_users, num_features); 208 | 209 | fprintf('Recommender system learning completed.\n'); 210 | 211 | fprintf('\nProgram paused. Press enter to continue.\n'); 212 | pause; 213 | 214 | %% ================== Part 8: Recommendation for you ==================== 215 | % After training the model, you can now make recommendations by computing 216 | % the predictions matrix. 217 | % 218 | 219 | p = X * Theta'; 220 | my_predictions = p(:,1) + Ymean; 221 | 222 | movieList = loadMovieList(); 223 | 224 | [r, ix] = sort(my_predictions, 'descend'); 225 | fprintf('\nTop recommendations for you:\n'); 226 | for i=1:10 227 | j = ix(i); 228 | fprintf('Predicting rating %.1f for movie %s\n', my_predictions(j), ... 229 | movieList{j}); 230 | end 231 | 232 | fprintf('\n\nOriginal ratings provided:\n'); 233 | for i = 1:length(my_ratings) 234 | if my_ratings(i) > 0 235 | fprintf('Rated %d for %s\n', my_ratings(i), ... 236 | movieList{i}); 237 | end 238 | end 239 | -------------------------------------------------------------------------------- /ex8/ex8_movieParams.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8/ex8_movieParams.mat -------------------------------------------------------------------------------- /ex8/ex8_movies.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8/ex8_movies.mat -------------------------------------------------------------------------------- /ex8/ex8data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8/ex8data1.mat -------------------------------------------------------------------------------- /ex8/ex8data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8/ex8data2.mat -------------------------------------------------------------------------------- /ex8/fmincg.m: -------------------------------------------------------------------------------- 1 | ../ex3/fmincg.m -------------------------------------------------------------------------------- /ex8/loadMovieList.m: -------------------------------------------------------------------------------- 1 | function movieList = loadMovieList() 2 | %GETMOVIELIST reads the fixed movie list in movie.txt and returns a 3 | %cell array of the words 4 | % movieList = GETMOVIELIST() reads the fixed movie list in movie.txt 5 | % and returns a cell array of the words in movieList. 6 | 7 | 8 | %% Read the fixed movieulary list 9 | fid = fopen('movie_ids.txt'); 10 | 11 | % Store all movies in cell array movie{} 12 | n = 1682; % Total number of movies 13 | 14 | movieList = cell(n, 1); 15 | for i = 1:n 16 | % Read line 17 | line = fgets(fid); 18 | % Word Index (can ignore since it will be = i) 19 | [idx, movieName] = strtok(line, ' '); 20 | % Actual Word 21 | movieList{i} = strtrim(movieName); 22 | end 23 | fclose(fid); 24 | 25 | end 26 | -------------------------------------------------------------------------------- /ex8/movie_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/ml-class/74ce689e21e9f3ca184e60313351b31112e5dd56/ex8/movie_ids.txt -------------------------------------------------------------------------------- /ex8/multivariateGaussian.m: -------------------------------------------------------------------------------- 1 | function p = multivariateGaussian(X, mu, Sigma2) 2 | %MULTIVARIATEGAUSSIAN Computes the probability density function of the 3 | %multivariate gaussian distribution. 4 | % p = MULTIVARIATEGAUSSIAN(X, mu, Sigma2) Computes the probability 5 | % density function of the examples X under the multivariate gaussian 6 | % distribution with parameters mu and Sigma2. If Sigma2 is a matrix, it is 7 | % treated as the covariance matrix. If Sigma2 is a vector, it is treated 8 | % as the \sigma^2 values of the variances in each dimension (a diagonal 9 | % covariance matrix) 10 | % 11 | 12 | k = length(mu); 13 | 14 | if (size(Sigma2, 2) == 1) || (size(Sigma2, 1) == 1) 15 | Sigma2 = diag(Sigma2); 16 | end 17 | 18 | X = bsxfun(@minus, X, mu(:)'); 19 | p = (2 * pi) ^ (- k / 2) * det(Sigma2) ^ (-0.5) * ... 20 | exp(-0.5 * sum(bsxfun(@times, X * pinv(Sigma2), X), 2)); 21 | 22 | end -------------------------------------------------------------------------------- /ex8/normalizeRatings.m: -------------------------------------------------------------------------------- 1 | function [Ynorm, Ymean] = normalizeRatings(Y, R) 2 | %NORMALIZERATINGS Preprocess data by subtracting mean rating for every 3 | %movie (every row) 4 | % [Ynorm, Ymean] = NORMALIZERATINGS(Y, R) normalized Y so that each movie 5 | % has a rating of 0 on average, and returns the mean rating in Ymean. 6 | % 7 | 8 | [m, n] = size(Y); 9 | Ymean = zeros(m, 1); 10 | Ynorm = zeros(size(Y)); 11 | for i = 1:m 12 | Ymean(i) = mean(Y(i, R(i, :))); 13 | Ynorm(i, R(i, :)) = Y(i, R(i, :)) - Ymean(i); 14 | end 15 | 16 | end -------------------------------------------------------------------------------- /ex8/selectThreshold.m: -------------------------------------------------------------------------------- 1 | function [bestEpsilon bestF1] = selectThreshold(yval, pval) 2 | %SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting 3 | %outliers 4 | % [bestEpsilon bestF1] = SELECTTHRESHOLD(yval, pval) finds the best 5 | % threshold to use for selecting outliers based on the results from a 6 | % validation set (pval) and the ground truth (yval). 7 | % 8 | 9 | bestEpsilon = 0; 10 | bestF1 = 0; 11 | F1 = 0; 12 | 13 | stepsize = (max(pval) - min(pval)) / 1000; 14 | for epsilon = min(pval):stepsize:max(pval) 15 | 16 | % ====================== YOUR CODE HERE ====================== 17 | % Instructions: Compute the F1 score of choosing epsilon as the 18 | % threshold and place the value in F1. The code at the 19 | % end of the loop will compare the F1 score for this 20 | % choice of epsilon and set it to be the best epsilon if 21 | % it is better than the current choice of epsilon. 22 | % 23 | % Note: You can use predictions = (pval < epsilon) to get a binary vector 24 | % of 0's and 1's of the outlier predictions 25 | 26 | predictions = (pval < epsilon); 27 | 28 | % TODO(SaveTheRbtz@): Move F1Score calculation to separate function 29 | tp = sum((predictions == 1) & (yval == 1)); 30 | fp = sum((predictions == 1) & (yval == 0)); 31 | fn = sum((predictions == 0) & (yval == 1)); 32 | tn = sum((predictions == 0) & (yval == 0)); % XXX: NOT USED 33 | 34 | recall = tp / (tp + fn); 35 | precidion = tp / (tp + fp); 36 | 37 | F1 = 2 * precidion * recall / (precidion + recall); 38 | 39 | % ============================================================= 40 | 41 | if F1 > bestF1 42 | bestF1 = F1; 43 | bestEpsilon = epsilon; 44 | end 45 | end 46 | 47 | end 48 | -------------------------------------------------------------------------------- /ex8/visualizeFit.m: -------------------------------------------------------------------------------- 1 | function visualizeFit(X, mu, sigma2) 2 | %VISUALIZEFIT Visualize the dataset and its estimated distribution. 3 | % VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the 4 | % probability density function of the Gaussian distribution. Each example 5 | % has a location (x1, x2) that depends on its feature values. 6 | % 7 | 8 | [X1,X2] = meshgrid(0:.5:35); 9 | Z = multivariateGaussian([X1(:) X2(:)],mu,sigma2); 10 | Z = reshape(Z,size(X1)); 11 | 12 | plot(X(:, 1), X(:, 2),'bx'); 13 | hold on; 14 | % Do not plot if there are infinities 15 | if (sum(isinf(Z)) == 0) 16 | contour(X1, X2, Z, 10.^(-20:3:0)'); 17 | end 18 | hold off; 19 | 20 | end -------------------------------------------------------------------------------- /octave_tutorial.m: -------------------------------------------------------------------------------- 1 | % Machine learning class 2 | % Octave tutorial 3 | 4 | % ======================================================= 5 | % Section 1: Octave Tutorial: Basic operations 6 | 7 | %% Change Octave prompt 8 | PS1('>> '); 9 | 10 | %% elementary operations 11 | 5+6 12 | 3-2 13 | 5*8 14 | 1/2 15 | 2^6 16 | 1 == 2 % false 17 | 1 ~= 2 % true. note, not "!=" 18 | 1 && 0 19 | 1 || 0 20 | xor(1,0) 21 | 22 | 23 | %% variable assignment 24 | a = 3; % semicolon suppresses output 25 | b = 'hi'; 26 | c = 3>=1; 27 | 28 | % Displaying them: 29 | a = pi 30 | disp(sprintf('2 decimals: %0.2f', a)) 31 | disp(sprintf('6 decimals: %0.6f', a)) 32 | format long 33 | a 34 | format short 35 | a 36 | 37 | 38 | %% vectors and matrices 39 | A = [1 2; 3 4; 5 6] 40 | 41 | v = [1 2 3] 42 | v = [1; 2; 3] 43 | v = [1:0.1:2] % from 1 to 2, with stepsize of 0.1. Useful for plot axes 44 | v = 1:6 % from 1 to 6, assumes stepsize of 1 45 | 46 | C = 2*ones(2,3) % same as C = [2 2 2; 2 2 2] 47 | w = ones(1,3) % 1x3 vector of ones 48 | w = zeros(1,3) 49 | w = rand(1,3) % drawn from a uniform distribution 50 | w = randn(1,3) % drawn from a normal distribution (mean=0, var=1) 51 | w = -6 + sqrt(10)*(randn(1,10000)) % (mean = 1, var = 2) 52 | hist(w) 53 | I = eye(4) % 4x4 identity matrix 54 | 55 | % help function 56 | help eye 57 | help rand 58 | 59 | % ======================================================= 60 | % Section 2: Octave Tutorial: Moving data around 61 | 62 | 63 | %% dimensions 64 | sz = size(A) 65 | size(A,1) % number of rows 66 | size(A,2) % number of cols 67 | length(v) % size of longest dimension 68 | 69 | 70 | %% loading data 71 | pwd % show current directory (current path) 72 | cd 'C:\Users\ang\Octave files' % change directory 73 | ls % list files in current directory 74 | load q1y.dat 75 | load q1x.dat 76 | who % list variables in workspace 77 | whos % list variables in workspace (detailed view) 78 | clear q1y % clear w/ no argt clears all 79 | v = q1x(1:10); 80 | save hello v; % save variable v into file hello.mat 81 | save hello.txt v -ascii; % save as ascii 82 | % fopen, fread, fprintf, fscanf also work [[not needed in class]] 83 | 84 | %% indexing 85 | A(3,2) % indexing is (row,col) 86 | A(2,:) % get the 2nd row. 87 | % ":" means every element along that dimension 88 | A(:,2) % get the 2nd col 89 | A([1 3],:) 90 | 91 | A(:,2) = [10; 11; 12] % change second column 92 | A = [A, [100; 101; 102]]; % append column vec 93 | A(:) % Select all elements as a column vector. 94 | 95 | % Putting data together 96 | A = [A [100; 101; 102]] 97 | B = [11 12; 13 14; 15 16] % same dims as A 98 | [A B] 99 | [A; B] 100 | 101 | 102 | % ======================================================= 103 | % Section 3: Octave Tutorial: Computing on data 104 | 105 | 106 | %% matrix operations 107 | A * C % matrix multiplication 108 | A .* B % element-wise multiplcation 109 | % A .* C or A * B gives error - wrong dimensions 110 | A .^ 2 111 | 1./v 112 | log(v) % functions like this operate element-wise on vecs or matrices 113 | exp(v) % e^4 114 | abs(v) 115 | 116 | -v % -1*v 117 | 118 | v + ones(1,length(v)) 119 | % v + 1 % same 120 | 121 | A' % matrix transpose 122 | 123 | %% misc useful functions 124 | 125 | % max (or min) 126 | a = [1 15 2 0.5] 127 | val = max(a) 128 | [val,ind] = max(a) 129 | 130 | % find 131 | a < 3 132 | find(a < 3) 133 | A = magic(3) 134 | [r,c] = find(A>=7) 135 | 136 | % sum, prod 137 | sum(a) 138 | prod(a) 139 | floor(a) % or ceil(a) 140 | max(rand(3),rand(3)) 141 | max(A,[],1) 142 | min(A,[],2) 143 | A = magic(9) 144 | sum(A,1) 145 | sum(A,2) 146 | sum(sum( A .* eye(9) )) 147 | sum(sum( A .* flipud(eye(9)) )) 148 | 149 | 150 | % Matrix inverse (pseudo-inverse) 151 | pinv(A) % inv(A'*A)*A' 152 | 153 | 154 | % ======================================================= 155 | % Section 4: Octave Tutorial: Plotting 156 | 157 | 158 | %% plotting 159 | t = [0:0.01:0.98]; 160 | y1 = sin(2*pi*4*t); 161 | plot(t,y1); 162 | y2 = cos(2*pi*4*t); 163 | hold on; % "hold off" to turn off 164 | plot(t,y2,'r'); 165 | xlabel('time'); 166 | ylabel('value'); 167 | legend('sin','cos'); 168 | title('my plot'); 169 | print -dpng 'myPlot.png' 170 | close; % or, "close all" to close all figs 171 | 172 | figure(2), clf; % can specify the figure number 173 | subplot(1,2,1); % Divide plot into 1x2 grid, access 1st element 174 | plot(t,y1); 175 | subplot(1,2,2); % Divide plot into 1x2 grid, access 2nd element 176 | plot(t,y2); 177 | axis([0.5 1 -1 1]); % change axis scale 178 | 179 | %% display a matrix (or image) 180 | figure; 181 | imagesc(magic(15)), colorbar, colormap gray; 182 | % comma-chaining function calls. 183 | a=1,b=2,c=3 184 | a=1;b=2;c=3; 185 | 186 | 187 | % ======================================================= 188 | % Section 5: Octave Tutorial: For, while, if statements, and functions. 189 | 190 | v = zeros(10,1); 191 | for i=1:10, 192 | v(i) = 2^i; 193 | end 194 | % Can also use "break" and "continue" inside for and while loops to control execution. 195 | 196 | i = 1; 197 | while i <= 5, 198 | v(i) = 100; 199 | i = i+1; 200 | end 201 | 202 | i = 1; 203 | while true, 204 | v(i) = 999; 205 | i = i+1; 206 | if i == 6, 207 | break; 208 | end; 209 | end 210 | 211 | if v(1)==1, 212 | disp('The value is one!'); 213 | elseif v(1)==2, 214 | disp('The value is two!'); 215 | else 216 | disp('The value is not one or two!'); 217 | end 218 | 219 | % exit % quit 220 | 221 | % Functions 222 | 223 | % Create a file called squareThisNumber.m with the following contents (without the %): 224 | % function r = squareThisNumber(x) 225 | % r = x * x; 226 | % end 227 | 228 | squareThisNumber(5); 229 | % If function is undefine, use "pwd" to check current directory (path), 230 | % and "cd" to change directories 231 | pwd 232 | cd 'C:\Users\ang\Desktop'; 233 | squareThisNumber(5); 234 | 235 | % Octave search path (advanced/optional) 236 | addpath('C:\Users\ang\Desktop'); 237 | cd 'C:\' 238 | squareThisNumber(5); 239 | 240 | % If you have defined other functions such as costFunctionJ, 241 | % the following code will work too. 242 | 243 | X = [1 1; 1 2; 1 3]; 244 | y = [1;2;3]; 245 | 246 | theta = [0; 1]; 247 | j = costFunctionJ(X, y, theta); 248 | 249 | theta = [0; 0]; 250 | j = costFunctionJ(X, y, theta); 251 | 252 | 253 | 254 | 255 | --------------------------------------------------------------------------------