├── README.md ├── ex1 ├── computeCost.m ├── computeCostMulti.m ├── ex1.m ├── ex1_multi.m ├── ex1data1.txt ├── ex1data2.txt ├── featureNormalize.m ├── gradientDescent.m ├── gradientDescentMulti.m ├── normalEqn.m ├── plotData.m └── warmUpExercise.m ├── ex2 ├── costFunction.m ├── costFunctionReg.m ├── ex2.m ├── ex2_guide.pdf ├── ex2_reg.m ├── mapFeature.m ├── plotData.m ├── plotDecisionBoundary.m ├── predict.m └── sigmoid.m ├── ex3 ├── displayData.m ├── ex3.m ├── ex3_nn.m ├── lrCostFunction.m ├── oneVsAll.m ├── predict.m ├── predictOneVsAll.m └── sigmoid.m ├── ex4 ├── computeNumericalGradient.m ├── ex4.m ├── nnCostFunction -V2.m ├── nnCostFunction.m ├── randInitializeWeights.m └── sigmoidGradient.m ├── ex5 ├── ex5.m ├── learningCurve.m ├── linearRegCostFunction.m ├── polyFeatures.m ├── trainLinearReg.m └── validationCurve.m ├── ex6 ├── dataset3Params.m ├── emailFeatures.m ├── ex6.m ├── ex6_spam.m ├── gaussianKernel.m └── processEmail.m ├── ex7 ├── computeCentroids.m ├── ex7.m ├── ex7_pca.m ├── findClosestCentroids.m ├── kMeansInitCentroids.m ├── pca.m ├── projectData.m └── recoverData.m ├── ex8 ├── cofiCostFunc.m ├── estimateGaussian.m ├── ex8.m ├── ex8_cofi.m └── selectThreshold.m └── update /README.md: -------------------------------------------------------------------------------- 1 | # Andrew-Ng-Machine-Learning-Assignment 2 | The assignment code for Coursera by Ng's ML course 3 | 吴恩达《机器学习》coursera课程的作业、课后代码 4 | -------------------------------------------------------------------------------- /ex1/computeCost.m: -------------------------------------------------------------------------------- 1 | function J = computeCost(X, y, theta) 2 | %COMPUTECOST Compute cost for linear regression 3 | % J = COMPUTECOST(X, y, theta) computes the cost of using theta as the 4 | % parameter for linear regression to fit the data points in X and y 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | sum = 0; 9 | 10 | for i = 1:m 11 | cost = ((theta)' * (X(i,:))' - y(i)) ^ 2; 12 | sum = sum + cost; 13 | end 14 | 15 | J = 1/m * (1/2) * sum; 16 | % You need to return the following variables correctly 17 | 18 | % ====================== YOUR CODE HERE ====================== 19 | % Instructions: Compute the cost of a particular choice of theta 20 | % You should set J to the cost. 21 | 22 | 23 | 24 | 25 | 26 | % ========================================================================= 27 | 28 | end 29 | -------------------------------------------------------------------------------- /ex1/computeCostMulti.m: -------------------------------------------------------------------------------- 1 | function J = computeCostMulti(X, y, theta) 2 | %COMPUTECOSTMULTI Compute cost for linear regression with multiple variables 3 | % J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the 4 | % parameter for linear regression to fit the data points in X and y 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | 9 | 10 | 11 | cost = (X * theta - y)' * (X * theta - y); 12 | 13 | 14 | J = cost / m * (1/2); 15 | 16 | 17 | 18 | % You need to return the following variables correctly 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: Compute the cost of a particular choice of theta 22 | % You should set J to the cost. 23 | 24 | 25 | 26 | 27 | 28 | % ========================================================================= 29 | 30 | end 31 | -------------------------------------------------------------------------------- /ex1/ex1.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 1: Linear Regression 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % warmUpExercise.m 11 | % plotData.m 12 | % gradientDescent.m 13 | % computeCost.m 14 | % gradientDescentMulti.m 15 | % computeCostMulti.m 16 | % featureNormalize.m 17 | % normalEqn.m 18 | % 19 | % For this exercise, you will not need to change any code in this file, 20 | % or any other files other than those mentioned above. 21 | % 22 | % x refers to the population size in 10,000s 23 | % y refers to the profit in $10,000s 24 | % 25 | 26 | %% Initialization 27 | clear ; close all; clc 28 | 29 | %% ==================== Part 1: Basic Function ==================== 30 | % Complete warmUpExercise.m 31 | fprintf('Running warmUpExercise ... \n'); 32 | fprintf('5x5 Identity Matrix: \n'); 33 | warmUpExercise() 34 | 35 | fprintf('Program paused. Press enter to continue.\n'); 36 | pause; 37 | 38 | 39 | %% ======================= Part 2: Plotting ======================= 40 | fprintf('Plotting Data ...\n') 41 | data = load('ex1data1.txt'); 42 | X = data(:, 1); y = data(:, 2); 43 | m = length(y); % number of training examples 44 | 45 | % Plot Data 46 | % Note: You have to complete the code in plotData.m 47 | plot(X, y,'rx','MarkerSize',10); 48 | ylabel('Profit in $10,000s'); 49 | ylabel('Population of City in 10,000s'); 50 | fprintf('Program paused. Press enter to continue.\n'); 51 | 52 | pause; 53 | 54 | %% =================== Part 3: Cost and Gradient descent =================== 55 | 56 | X = [ones(m, 1), data(:,1)]; % Add a column of ones to x 57 | theta = zeros(2, 1); % initialize fitting parameters 58 | 59 | % Some gradient descent settings 60 | iterations = 1500; 61 | alpha = 0.01; 62 | 63 | fprintf('\nTesting the cost function ...\n') 64 | % compute and display initial cost 65 | J = computeCost(X, y, theta); 66 | fprintf('With theta = [0 ; 0]\nCost computed = %f\n', J); 67 | fprintf('Expected cost value (approx) 32.07\n'); 68 | 69 | % further testing of the cost function 70 | J = computeCost(X, y, [-1 ; 2]); 71 | fprintf('\nWith theta = [-1 ; 2]\nCost computed = %f\n', J); 72 | fprintf('Expected cost value (approx) 54.24\n'); 73 | 74 | fprintf('Program paused. Press enter to continue.\n'); 75 | pause; 76 | 77 | fprintf('\nRunning Gradient Descent ...\n') 78 | % run gradient descent 79 | theta = gradientDescent(X, y, theta, alpha, iterations); 80 | 81 | % print theta to screen 82 | fprintf('Theta found by gradient descent:\n'); 83 | fprintf('%f\n', theta); 84 | fprintf('Expected theta values (approx)\n'); 85 | fprintf(' -3.6303\n 1.1664\n\n'); 86 | 87 | % Plot the linear fit 88 | hold on; % keep previous plot visible 89 | plot(X(:,2), X*theta, '-') 90 | legend('Training data', 'Linear regression') 91 | hold off % don't overlay any more plots on this figure 92 | 93 | % Predict values for population sizes of 35,000 and 70,000 94 | predict1 = [1, 3.5] *theta; 95 | fprintf('For population = 35,000, we predict a profit of %f\n',... 96 | predict1*10000); 97 | predict2 = [1, 7] * theta; 98 | fprintf('For population = 70,000, we predict a profit of %f\n',... 99 | predict2*10000); 100 | 101 | fprintf('Program paused. Press enter to continue.\n'); 102 | pause; 103 | 104 | %% ============= Part 4: Visualizing J(theta_0, theta_1) ============= 105 | fprintf('Visualizing J(theta_0, theta_1) ...\n') 106 | 107 | % Grid over which we will calculate J 108 | theta0_vals = linspace(-10, 10, 100); 109 | theta1_vals = linspace(-1, 4, 100); 110 | 111 | % initialize J_vals to a matrix of 0's 112 | J_vals = zeros(length(theta0_vals), length(theta1_vals)); 113 | 114 | % Fill out J_vals 115 | for i = 1:length(theta0_vals) 116 | for j = 1:length(theta1_vals) 117 | t = [theta0_vals(i); theta1_vals(j)]; 118 | J_vals(i,j) = computeCost(X, y, t); 119 | end 120 | end 121 | 122 | 123 | % Because of the way meshgrids work in the surf command, we need to 124 | % transpose J_vals before calling surf, or else the axes will be flipped 125 | J_vals = J_vals'; 126 | % Surface plot 127 | figure; 128 | surf(theta0_vals, theta1_vals, J_vals) 129 | xlabel('\theta_0'); ylabel('\theta_1'); 130 | 131 | % Contour plot 132 | figure; 133 | % Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100 134 | contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20)) 135 | xlabel('\theta_0'); ylabel('\theta_1'); 136 | hold on; 137 | plot(theta(1), theta(2), 'rx', 'MarkerSize', 10, 'LineWidth', 2); 138 | -------------------------------------------------------------------------------- /ex1/ex1_multi.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 1: Linear regression with multiple variables 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % linear regression exercise. 9 | % 10 | % You will need to complete the following functions in this 11 | % exericse: 12 | % 13 | % warmUpExercise.m 14 | % plotData.m 15 | % gradientDescent.m 16 | % computeCost.m 17 | % gradientDescentMulti.m 18 | % computeCostMulti.m 19 | % featureNormalize.m 20 | % normalEqn.m 21 | % 22 | % For this part of the exercise, you will need to change some 23 | % parts of the code below for various experiments (e.g., changing 24 | % learning rates). 25 | % 26 | 27 | %% Initialization 28 | 29 | %% ================ Part 1: Feature Normalization ================ 30 | 31 | %% Clear and Close Figures 32 | clear ; close all; clc 33 | 34 | fprintf('Loading data ...\n'); 35 | 36 | %% Load Data 37 | data = load('ex1data2.txt'); 38 | X = data(:, 1:2); 39 | y = data(:, 3); 40 | m = length(y); 41 | 42 | % Print out some data points 43 | fprintf('First 10 examples from the dataset: \n'); 44 | fprintf(' x = [%.0f %.0f], y = %.0f \n', [X(1:10,:) y(1:10,:)]'); 45 | 46 | fprintf('Program paused. Press enter to continue.\n'); 47 | pause; 48 | 49 | % Scale features and set them to zero mean 50 | fprintf('Normalizing Features ...\n'); 51 | 52 | [X mu sigma] = featureNormalize(X); 53 | 54 | % Add intercept term to X 55 | X = [ones(m, 1), X]; 56 | 57 | 58 | %% ================ Part 2: Gradient Descent ================ 59 | 60 | 61 | % About step choose: 62 | % if alpha = 0.01, then 500 iters required to converg 63 | % if alpha = 0.1, then 150 iters required to converg 64 | % if alpha = 1, then 15 iters required to converg 65 | 66 | fprintf('Running gradient descent ...\n'); 67 | 68 | % Choose some alpha value 69 | alpha = 0.1; 70 | num_iters = 350; 71 | 72 | % Init Theta and Run Gradient Descent 73 | theta = zeros(3, 1); 74 | [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters); 75 | 76 | % Plot the convergence graph 77 | figure; 78 | plot(1:numel(J_history), J_history, '-b', 'LineWidth', 2); 79 | xlabel('Number of iterations'); 80 | ylabel('Cost J'); 81 | 82 | % Display gradient descent's result 83 | fprintf('Theta computed from gradient descent: \n'); 84 | fprintf(' %f \n', theta); 85 | fprintf('\n'); 86 | 87 | % Estimate the price of a 1650 sq-ft, 3 br house 88 | % ====================== YOUR CODE HERE ====================== 89 | % Recall that the first column of X is all-ones. Thus, it does 90 | % not need to be normalized. 91 | 92 | input = [1650, 3]; 93 | 94 | input = [1,(input - mu) ./ sigma]; 95 | %(1650 - mu)/sigma, 3]; 96 | %[input miu sig] = featureNormalize(input); 97 | price = theta' * input'; % You should change this 98 | 99 | 100 | % ============================================================ 101 | 102 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ... 103 | '(using gradient descent):\n $%f\n'], price); 104 | 105 | fprintf('Program paused. Press enter to continue.\n'); 106 | pause; 107 | 108 | %% ================ Part 3: Normal Equations ================ 109 | 110 | fprintf('Solving with normal equations...\n'); 111 | 112 | % ====================== YOUR CODE HERE ====================== 113 | % Instructions: The following code computes the closed form 114 | % solution for linear regression using the normal 115 | % equations. You should complete the code in 116 | % normalEqn.m 117 | % 118 | % After doing so, you should complete this code 119 | % to predict the price of a 1650 sq-ft, 3 br house. 120 | % 121 | 122 | %% Load Data 123 | data = csvread('ex1data2.txt'); 124 | X = data(:, 1:2); 125 | y = data(:, 3); 126 | m = length(y); 127 | 128 | % Add intercept term to X 129 | X = [ones(m, 1), X]; 130 | 131 | % Calculate the parameters from the normal equation 132 | theta = normalEqn(X, y); 133 | 134 | % Display normal equation's result 135 | fprintf('Theta computed from the normal equations: \n'); 136 | fprintf(' %f \n', theta); 137 | fprintf('\n'); 138 | 139 | 140 | % Estimate the price of a 1650 sq-ft, 3 br house 141 | % ====================== YOUR CODE HERE ====================== 142 | price = theta' * [1; 1650; 3]; % You should change this 143 | 144 | 145 | % ============================================================ 146 | 147 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ... 148 | '(using normal equations):\n $%f\n'], price); 149 | 150 | -------------------------------------------------------------------------------- /ex1/ex1data1.txt: -------------------------------------------------------------------------------- 1 | 6.1101,17.592 2 | 5.5277,9.1302 3 | 8.5186,13.662 4 | 7.0032,11.854 5 | 5.8598,6.8233 6 | 8.3829,11.886 7 | 7.4764,4.3483 8 | 8.5781,12 9 | 6.4862,6.5987 10 | 5.0546,3.8166 11 | 5.7107,3.2522 12 | 14.164,15.505 13 | 5.734,3.1551 14 | 8.4084,7.2258 15 | 5.6407,0.71618 16 | 5.3794,3.5129 17 | 6.3654,5.3048 18 | 5.1301,0.56077 19 | 6.4296,3.6518 20 | 7.0708,5.3893 21 | 6.1891,3.1386 22 | 20.27,21.767 23 | 5.4901,4.263 24 | 6.3261,5.1875 25 | 5.5649,3.0825 26 | 18.945,22.638 27 | 12.828,13.501 28 | 10.957,7.0467 29 | 13.176,14.692 30 | 22.203,24.147 31 | 5.2524,-1.22 32 | 6.5894,5.9966 33 | 9.2482,12.134 34 | 5.8918,1.8495 35 | 8.2111,6.5426 36 | 7.9334,4.5623 37 | 8.0959,4.1164 38 | 5.6063,3.3928 39 | 12.836,10.117 40 | 6.3534,5.4974 41 | 5.4069,0.55657 42 | 6.8825,3.9115 43 | 11.708,5.3854 44 | 5.7737,2.4406 45 | 7.8247,6.7318 46 | 7.0931,1.0463 47 | 5.0702,5.1337 48 | 5.8014,1.844 49 | 11.7,8.0043 50 | 5.5416,1.0179 51 | 7.5402,6.7504 52 | 5.3077,1.8396 53 | 7.4239,4.2885 54 | 7.6031,4.9981 55 | 6.3328,1.4233 56 | 6.3589,-1.4211 57 | 6.2742,2.4756 58 | 5.6397,4.6042 59 | 9.3102,3.9624 60 | 9.4536,5.4141 61 | 8.8254,5.1694 62 | 5.1793,-0.74279 63 | 21.279,17.929 64 | 14.908,12.054 65 | 18.959,17.054 66 | 7.2182,4.8852 67 | 8.2951,5.7442 68 | 10.236,7.7754 69 | 5.4994,1.0173 70 | 20.341,20.992 71 | 10.136,6.6799 72 | 7.3345,4.0259 73 | 6.0062,1.2784 74 | 7.2259,3.3411 75 | 5.0269,-2.6807 76 | 6.5479,0.29678 77 | 7.5386,3.8845 78 | 5.0365,5.7014 79 | 10.274,6.7526 80 | 5.1077,2.0576 81 | 5.7292,0.47953 82 | 5.1884,0.20421 83 | 6.3557,0.67861 84 | 9.7687,7.5435 85 | 6.5159,5.3436 86 | 8.5172,4.2415 87 | 9.1802,6.7981 88 | 6.002,0.92695 89 | 5.5204,0.152 90 | 5.0594,2.8214 91 | 5.7077,1.8451 92 | 7.6366,4.2959 93 | 5.8707,7.2029 94 | 5.3054,1.9869 95 | 8.2934,0.14454 96 | 13.394,9.0551 97 | 5.4369,0.61705 98 | -------------------------------------------------------------------------------- /ex1/ex1data2.txt: -------------------------------------------------------------------------------- 1 | 2104,3,399900 2 | 1600,3,329900 3 | 2400,3,369000 4 | 1416,2,232000 5 | 3000,4,539900 6 | 1985,4,299900 7 | 1534,3,314900 8 | 1427,3,198999 9 | 1380,3,212000 10 | 1494,3,242500 11 | 1940,4,239999 12 | 2000,3,347000 13 | 1890,3,329999 14 | 4478,5,699900 15 | 1268,3,259900 16 | 2300,4,449900 17 | 1320,2,299900 18 | 1236,3,199900 19 | 2609,4,499998 20 | 3031,4,599000 21 | 1767,3,252900 22 | 1888,2,255000 23 | 1604,3,242900 24 | 1962,4,259900 25 | 3890,3,573900 26 | 1100,3,249900 27 | 1458,3,464500 28 | 2526,3,469000 29 | 2200,3,475000 30 | 2637,3,299900 31 | 1839,2,349900 32 | 1000,1,169900 33 | 2040,4,314900 34 | 3137,3,579900 35 | 1811,4,285900 36 | 1437,3,249900 37 | 1239,3,229900 38 | 2132,4,345000 39 | 4215,4,549000 40 | 2162,4,287000 41 | 1664,2,368500 42 | 2238,3,329900 43 | 2567,4,314000 44 | 1200,3,299000 45 | 852,2,179900 46 | 1852,4,299900 47 | 1203,3,239500 48 | -------------------------------------------------------------------------------- /ex1/featureNormalize.m: -------------------------------------------------------------------------------- 1 | function [X_norm, mu, sigma] = featureNormalize(X) 2 | %FEATURENORMALIZE Normalizes the features in X 3 | % FEATURENORMALIZE(X) returns a normalized version of X where 4 | % the mean value of each feature is 0 and the standard deviation 5 | % is 1. This is often a good preprocessing step to do when 6 | % working with learning algorithms. 7 | 8 | % You need to set these values correctly 9 | 10 | n = length(X(1,:)); 11 | mu = zeros(1, size(X, 2)); 12 | sigma = zeros(1, size(X, 2)); 13 | 14 | for i=1:n 15 | tmp = std(X(:,i)); 16 | miu = mean(X(:,i)); 17 | X(:,i) = (X(:,i) - miu) / tmp; 18 | 19 | mmu(1,i) = miu; 20 | ssigma(1, i) = tmp; 21 | end 22 | 23 | X_norm = X; 24 | mu = mmu; 25 | sigma = ssigma; 26 | 27 | % ====================== YOUR CODE HERE ====================== 28 | % Instructions: First, for each feature dimension, compute the mean 29 | % of the feature and subtract it from the dataset, 30 | % storing the mean value in mu. Next, compute the 31 | % standard deviation of each feature and divide 32 | % each feature by it's standard deviation, storing 33 | % the standard deviation in sigma. 34 | % 35 | % Note that X is a matrix where each column is a 36 | % feature and each row is an example. You need 37 | % to perform the normalization separately for 38 | % each feature. 39 | % 40 | % Hint: You might find the 'mean' and 'std' functions useful. 41 | % 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | % ============================================================ 52 | 53 | end 54 | -------------------------------------------------------------------------------- /ex1/gradientDescent.m: -------------------------------------------------------------------------------- 1 | function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters) 2 | %GRADIENTDESCENT Performs gradient descent to learn theta 3 | % theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by 4 | % taking num_iters gradient steps with learning rate alpha 5 | % Initialize some useful values 6 | 7 | m = length(y); % number of training examples 8 | J_history = zeros(num_iters, 1); 9 | 10 | for iter = 1:num_iters 11 | 12 | % ====================== CODE HERE ====================== 13 | %Notice it has been updated simultaneously otherwise the value 14 | %will has little disparity in theta around [0.006,0.0006] 15 | tmp = theta; 16 | for i = 1:2 17 | k = 1; 18 | sum = 0; 19 | 20 | while( k <= m ) 21 | sum = sum + ((theta)' * (X(k,:))' - y(k)) * X(k, i); 22 | k = k + 1; 23 | end 24 | tmp(i) = tmp(i) - alpha * sum / m; 25 | end 26 | 27 | theta = tmp; 28 | 29 | % ============================================================ 30 | 31 | % Save the cost J in every iteration 32 | J_history(iter) = computeCost(X, y, theta); 33 | fprintf('---%d--- \r\n', J_history(iter)); 34 | %make sure cost function J always goes down 35 | 36 | end 37 | 38 | end 39 | -------------------------------------------------------------------------------- /ex1/gradientDescentMulti.m: -------------------------------------------------------------------------------- 1 | function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters) 2 | %GRADIENTDESCENTMULTI Performs gradient descent to learn theta 3 | % theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by 4 | % taking num_iters gradient steps with learning rate alpha 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | J_history = zeros(num_iters, 1); 9 | 10 | for iter = 1:num_iters 11 | 12 | % ====================== CODE HERE ====================== 13 | % 14 | tmp = theta; 15 | feature_dim = length(X(1,:)); 16 | for i = 1: feature_dim 17 | k = 1; 18 | sum = 0; 19 | 20 | while( k <= m ) 21 | sum = sum + ((theta)' * (X(k,:))' - y(k)) * X(k, i); 22 | k = k + 1; 23 | end 24 | tmp(i) = tmp(i) - alpha * sum / m; 25 | end 26 | 27 | theta = tmp; 28 | 29 | % ============================================================ 30 | 31 | % Save the cost J in every iteration 32 | J_history(iter) = computeCostMulti(X, y, theta); 33 | fprintf('---%d--- \r\n', J_history(iter)); 34 | %make sure cost function J always goes down 35 | 36 | end 37 | 38 | end 39 | -------------------------------------------------------------------------------- /ex1/normalEqn.m: -------------------------------------------------------------------------------- 1 | function [theta] = normalEqn(X, y) 2 | %NORMALEQN Computes the closed-form solution to linear regression 3 | % NORMALEQN(X,y) computes the closed-form solution to linear 4 | % regression using the normal equations. 5 | 6 | theta = zeros(size(X, 2), 1); 7 | 8 | % ====================== YOUR CODE HERE ====================== 9 | 10 | %compra to Gradient Descent, here only 1 line code! what the f.. 11 | solution = pinv((X'* X)) * X'* y; 12 | 13 | % ------------------------------------------------------------- 14 | 15 | theta = solution 16 | % ============================================================ 17 | 18 | end 19 | -------------------------------------------------------------------------------- /ex1/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(x, y) 2 | %PLOTDATA Plots the data points x and y into a new figure 3 | % PLOTDATA(x,y) plots the data points and gives the figure axes labels of 4 | % population and profit. 5 | 6 | figure; % open a new figure window 7 | 8 | % ====================== YOUR CODE HERE ====================== 9 | % Instructions: Plot the training data into a figure using the 10 | % "figure" and "plot" commands. Set the axes labels using 11 | % the "xlabel" and "ylabel" commands. Assume the 12 | % population and revenue data have been passed in 13 | % as the x and y arguments of this function. 14 | % 15 | % Hint: You can use the 'rx' option with plot to have the markers 16 | % appear as red crosses. Furthermore, you can make the 17 | % markers larger by using plot(..., 'rx', 'MarkerSize', 10); 18 | 19 | 20 | 21 | 22 | 23 | % ============================================================ 24 | 25 | end 26 | -------------------------------------------------------------------------------- /ex1/warmUpExercise.m: -------------------------------------------------------------------------------- 1 | function A = warmUpExercise() 2 | %WARMUPEXERCISE Example function in octave 3 | % A = WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix 4 | 5 | A = eye(5); 6 | % ============= YOUR CODE HERE ============== 7 | % Instructions: Return the 5x5 identity matrix 8 | % In octave, we return values by defining which variables 9 | % represent the return values (at the top of the file) 10 | % and then set them accordingly. 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | % =========================================== 19 | 20 | 21 | end 22 | -------------------------------------------------------------------------------- /ex2/costFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = costFunction(theta, X, y) 2 | %COSTFUNCTION Compute cost and gradient for logistic regression 3 | % J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the 4 | % parameter for logistic regression and the gradient of the cost 5 | % w.r.t. to the parameters. 6 | 7 | % Initialize some useful values 8 | m = length(y); % number of training examples 9 | 10 | % You need to return the following variables correctly 11 | J = 0; 12 | grad = zeros(size(theta)); 13 | 14 | [row, col] = size(X); 15 | 16 | for i = 1:row 17 | cost = (-y(i)) * log(sigmoid(theta' * X(i,:)')) ... 18 | -(1 - y(i)) * log(1 - sigmoid(theta' * X(i,:)')); 19 | J = J + cost; 20 | end 21 | 22 | J = 1 / m * J; 23 | 24 | 25 | % for j = 1: length(theta) 26 | % tmp = (sigmoid(X(:,j) * theta(j,:))' - y') * X(:,j); 27 | % grad(j) = 1 / m * tmp; 28 | % end 29 | 30 | sum = zeros(col,1); 31 | 32 | for j = 1: col 33 | for i = 1: m 34 | tmp = (sigmoid(X(i,:) * theta) - y(i)) * X(i,j); 35 | sum(j) = sum(j) + tmp; 36 | 37 | grad = (1 / m) * sum; 38 | end 39 | % ============================================================= 40 | 41 | end 42 | -------------------------------------------------------------------------------- /ex2/costFunctionReg.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = costFunctionReg(theta, X, y, lambda) 2 | %COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization 3 | % J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using 4 | % theta as the parameter for regularized logistic regression and the 5 | % gradient of the cost w.r.t. to the parameters. 6 | 7 | % Initialize some useful values 8 | m = length(y); % number of training examples 9 | 10 | % You need to return the following variables correctly 11 | J = 0; 12 | grad = zeros(size(theta)); 13 | [row, col] = size(X); 14 | 15 | for i = 1:row 16 | cost = (-y(i)) * log(sigmoid(theta' * X(i,:)')) ... 17 | -(1 - y(i)) * log(1 - sigmoid(theta' * X(i,:)')); 18 | %penal = ((norm(theta))^2); 19 | J = J + cost; 20 | end 21 | thet = theta(2:col,1); 22 | 23 | penal = lambda / 2 * (norm(thet))^2; 24 | 25 | J = 1 / m * (J + penal); 26 | 27 | 28 | % for j = 1: length(theta) 29 | % tmp = (sigmoid(X(:,j) * theta(j,:))' - y') * X(:,j); 30 | % grad(j) = 1 / m * tmp; 31 | % end 32 | 33 | sums = zeros(col,1); 34 | 35 | for j = 1: col 36 | if(j == 1) 37 | for i = 1: m 38 | tmp = (sigmoid(X(i,:) * theta) - y(i)) * X(i,j); 39 | sums(j) = sums(j) + tmp; 40 | end 41 | else 42 | for i = 1: m 43 | tmp = (sigmoid(X(i,:) * theta) - y(i)) * X(i,j); 44 | 45 | sums(j) = sums(j) + tmp; 46 | end 47 | penal = lambda * theta(j); 48 | sums(j) = sums(j) + penal; 49 | 50 | end 51 | 52 | grad = (1 / m) * sums; 53 | 54 | end 55 | 56 | 57 | end 58 | -------------------------------------------------------------------------------- /ex2/ex2.m: -------------------------------------------------------------------------------- 1 | For this exercise, you will not need to change any code in this file, 2 | % or any other files other than those mentioned above. 3 | % 4 | 5 | %% Initialization 6 | clear ; close all; clc 7 | 8 | %% Load Data 9 | % The first two columns contains the exam scores and the third column 10 | % contains the label. 11 | 12 | data = load('ex2data1.txt'); 13 | X = data(:, [1, 2]); y = data(:, 3); 14 | 15 | %% ==================== Part 1: Plotting ==================== 16 | % We start the exercise by first plotting the data to understand the 17 | % the problem we are working with. 18 | 19 | fprintf(['Plotting data with + indicating (y = 1) examples and o ' ... 20 | 'indicating (y = 0) examples.\n']); 21 | 22 | plotData(X, y); 23 | 24 | % Put some labels 25 | hold on; 26 | % Labels and Legend 27 | xlabel('Exam 1 score') 28 | ylabel('Exam 2 score') 29 | 30 | % Specified in plot order 31 | legend('Admitted', 'Not admitted') 32 | hold off; 33 | 34 | fprintf('\nProgram paused. Press enter to continue.\n'); 35 | pause; 36 | 37 | 38 | %% ============ Part 2: Compute Cost and Gradient ============ 39 | % In this part of the exercise, you will implement the cost and gradient 40 | % for logistic regression. You neeed to complete the code in 41 | % costFunction.m 42 | 43 | % Setup the data matrix appropriately, and add ones for the intercept term 44 | [m, n] = size(X); 45 | 46 | % Add intercept term to x and X_test 47 | X = [ones(m, 1) X]; 48 | 49 | % Initialize fitting parameters 50 | initial_theta = zeros(n + 1, 1); 51 | 52 | % Compute and display initial cost and gradient 53 | [cost, grad] = costFunction(initial_theta, X, y); 54 | 55 | fprintf('Cost at initial theta (zeros): %f\n', cost); 56 | fprintf('Expected cost (approx): 0.693\n'); 57 | fprintf('Gradient at initial theta (zeros): \n'); 58 | fprintf(' %f \n', grad); 59 | fprintf('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n'); 60 | 61 | % Compute and display cost and gradient with non-zero theta 62 | test_theta = [-24; 0.2; 0.2]; 63 | [cost, grad] = costFunction(test_theta, X, y); 64 | 65 | fprintf('\nCost at test theta: %f\n', cost); 66 | fprintf('Expected cost (approx): 0.218\n'); 67 | fprintf('Gradient at test theta: \n'); 68 | fprintf(' %f \n', grad); 69 | fprintf('Expected gradients (approx):\n 0.043\n 2.566\n 2.647\n'); 70 | 71 | fprintf('\nProgram paused. Press enter to continue.\n'); 72 | pause; 73 | 74 | 75 | %% ============= Part 3: Optimizing using fminunc ============= 76 | % In this exercise, you will use a built-in function (fminunc) to find the 77 | % optimal parameters theta. 78 | 79 | % Set options for fminunc 80 | options = optimset('GradObj', 'on', 'MaxIter', 400); 81 | 82 | % Run fminunc to obtain the optimal theta 83 | % This function will return theta and the cost 84 | [theta, cost] = ... 85 | fminunc(@(t)(costFunction(t, X, y)), initial_theta, options); 86 | 87 | % Print theta to screen 88 | fprintf('Cost at theta found by fminunc: %f\n', cost); 89 | fprintf('Expected cost (approx): 0.203\n'); 90 | fprintf('theta: \n'); 91 | fprintf(' %f \n', theta); 92 | fprintf('Expected theta (approx):\n'); 93 | fprintf(' -25.161\n 0.206\n 0.201\n'); 94 | 95 | % Plot Boundary 96 | plotDecisionBoundary(theta, X, y); 97 | 98 | % Put some labels 99 | hold on; 100 | % Labels and Legend 101 | xlabel('Exam 1 score') 102 | ylabel('Exam 2 score') 103 | 104 | % Specified in plot order 105 | legend('Admitted', 'Not admitted') 106 | hold off; 107 | 108 | fprintf('\nProgram paused. Press enter to continue.\n'); 109 | pause; 110 | 111 | %% ============== Part 4: Predict and Accuracies ============== 112 | % After learning the parameters, you'll like to use it to predict the outcomes 113 | % on unseen data. In this part, you will use the logistic regression model 114 | % to predict the probability that a student with score 45 on exam 1 and 115 | % score 85 on exam 2 will be admitted. 116 | % 117 | % Furthermore, you will compute the training and test set accuracies of 118 | % our model. 119 | % 120 | % Your task is to complete the code in predict.m 121 | 122 | % Predict probability for a student with score 45 on exam 1 123 | % and score 85 on exam 2 124 | 125 | prob = sigmoid([1 45 85] * theta); 126 | fprintf(['For a student with scores 45 and 85, we predict an admission ' ... 127 | 'probability of %f\n'], prob); 128 | fprintf('Expected value: 0.775 +/- 0.002\n\n'); 129 | 130 | % Compute accuracy on our training set 131 | p = predict(theta, X); 132 | 133 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); 134 | fprintf('Expected accuracy (approx): 89.0\n'); 135 | fprintf('\n'); 136 | 137 | 138 | -------------------------------------------------------------------------------- /ex2/ex2_guide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Y1ran/Machine-Learning-Coursera-code/6c3cb995660cc27fa471089739570569755ab1d9/ex2/ex2_guide.pdf -------------------------------------------------------------------------------- /ex2/ex2_reg.m: -------------------------------------------------------------------------------- 1 | For this exercise, you will not need to change any code in this file, 2 | % or any other files other than those mentioned above. 3 | % 4 | 5 | %% Initialization 6 | clear ; close all; clc 7 | 8 | %% Load Data 9 | % The first two columns contains the X values and the third column 10 | % contains the label (y). 11 | 12 | data = load('ex2data2.txt'); 13 | X = data(:, [1, 2]); y = data(:, 3); 14 | 15 | plotData(X, y); 16 | 17 | % Put some labels 18 | hold on; 19 | 20 | % Labels and Legend 21 | xlabel('Microchip Test 1') 22 | ylabel('Microchip Test 2') 23 | 24 | % Specified in plot order 25 | legend('y = 1', 'y = 0') 26 | hold off; 27 | 28 | 29 | %% =========== Part 1: Regularized Logistic Regression ============ 30 | % In this part, you are given a dataset with data points that are not 31 | % linearly separable. However, you would still like to use logistic 32 | % regression to classify the data points. 33 | % 34 | % To do so, you introduce more features to use -- in particular, you add 35 | % polynomial features to our data matrix (similar to polynomial 36 | % regression). 37 | % 38 | 39 | % Add Polynomial Features 40 | 41 | % Note that mapFeature also adds a column of ones for us, so the intercept 42 | % term is handled 43 | X = mapFeature(X(:,1), X(:,2)); 44 | 45 | % Initialize fitting parameters 46 | initial_theta = zeros(size(X, 2), 1); 47 | 48 | % Set regularization parameter lambda to 1 49 | lambda = 1; 50 | 51 | % Compute and display initial cost and gradient for regularized logistic 52 | % regression 53 | [cost, grad] = costFunctionReg(initial_theta, X, y, lambda); 54 | 55 | fprintf('Cost at initial theta (zeros): %f\n', cost); 56 | fprintf('Expected cost (approx): 0.693\n'); 57 | fprintf('Gradient at initial theta (zeros) - first five values only:\n'); 58 | fprintf(' %f \n', grad(1:5)); 59 | fprintf('Expected gradients (approx) - first five values only:\n'); 60 | fprintf(' 0.0085\n 0.0188\n 0.0001\n 0.0503\n 0.0115\n'); 61 | 62 | fprintf('\nProgram paused. Press enter to continue.\n'); 63 | pause; 64 | 65 | % Compute and display cost and gradient 66 | % with all-ones theta and lambda = 10 67 | test_theta = ones(size(X,2),1); 68 | [cost, grad] = costFunctionReg(test_theta, X, y, 10); 69 | 70 | fprintf('\nCost at test theta (with lambda = 10): %f\n', cost); 71 | fprintf('Expected cost (approx): 3.16\n'); 72 | fprintf('Gradient at test theta - first five values only:\n'); 73 | fprintf(' %f \n', grad(1:5)); 74 | fprintf('Expected gradients (approx) - first five values only:\n'); 75 | fprintf(' 0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922\n'); 76 | 77 | fprintf('\nProgram paused. Press enter to continue.\n'); 78 | pause; 79 | 80 | %% ============= Part 2: Regularization and Accuracies ============= 81 | % Optional Exercise: 82 | % In this part, you will get to try different values of lambda and 83 | % see how regularization affects the decision coundart 84 | % 85 | % Try the following values of lambda (0, 1, 10, 100). 86 | % 87 | % How does the decision boundary change when you vary lambda? How does 88 | % the training set accuracy vary? 89 | % 90 | 91 | % Initialize fitting parameters 92 | initial_theta = zeros(size(X, 2), 1); 93 | 94 | % Set regularization parameter lambda to 1 (you should vary this) 95 | lambda = 0.0558; 96 | 97 | % Set Options 98 | options = optimset('GradObj', 'on', 'MaxIter', 400); 99 | 100 | % Optimize 101 | [theta, J, exit_flag] = ... 102 | fminunc(@(t)(costFunctionReg(t, X, y, lambda)), initial_theta, options); 103 | 104 | % Plot Boundary 105 | plotDecisionBoundary(theta, X, y); 106 | hold on; 107 | title(sprintf('lambda = %g', lambda)) 108 | 109 | % Labels and Legend 110 | xlabel('Microchip Test 1') 111 | ylabel('Microchip Test 2') 112 | 113 | legend('y = 1', 'y = 0', 'Decision boundary') 114 | hold off; 115 | 116 | % Compute accuracy on our training set 117 | p = predict(theta, X); 118 | 119 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); 120 | fprintf('Expected accuracy (with lambda = 1): 83.1 (approx)\n'); 121 | 122 | -------------------------------------------------------------------------------- /ex2/mapFeature.m: -------------------------------------------------------------------------------- 1 | function out = mapFeature(X1, X2) 2 | % MAPFEATURE Feature mapping function to polynomial features 3 | % 4 | % MAPFEATURE(X1, X2) maps the two input features 5 | % to quadratic features used in the regularization exercise. 6 | % 7 | % Returns a new feature array with more features, comprising of 8 | % X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc.. 9 | % 10 | % Inputs X1, X2 must be the same size 11 | % 12 | 13 | degree = 6; 14 | out = ones(size(X1(:,1))); 15 | for i = 1:degree 16 | for j = 0:i 17 | out(:, end+1) = (X1.^(i-j)).*(X2.^j); 18 | end 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /ex2/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(X, y) 2 | %PLOTDATA Plots the data points X and y into a new figure 3 | % PLOTDATA(x,y) plots the data points with + for the positive examples 4 | % and o for the negative examples. X is assumed to be a Mx2 matrix. 5 | 6 | % Create New Figure 7 | figure; hold on; 8 | 9 | % ====================== YOUR CODE HERE ====================== 10 | % Instructions: Plot the positive and negative examples on a 11 | % 2D plot, using the option 'k+' for the positive 12 | % examples and 'ko' for the negative examples. 13 | % 14 | 15 | 16 | pos = find(y == 1); 17 | neg = find(y == 0); 18 | 19 | plot(X(pos, 1), X(pos, 2), 'k+', 'LineWidth', ... 20 | 2, 'MarkerSize', 7); 21 | 22 | plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', ... 23 | 'y', 'MarkerSize',7); 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | % ========================================================================= 32 | 33 | 34 | 35 | hold off; 36 | 37 | end 38 | -------------------------------------------------------------------------------- /ex2/plotDecisionBoundary.m: -------------------------------------------------------------------------------- 1 | function plotDecisionBoundary(theta, X, y) 2 | %PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with 3 | %the decision boundary defined by theta 4 | % PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the 5 | % positive examples and o for the negative examples. X is assumed to be 6 | % a either 7 | % 1) Mx3 matrix, where the first column is an all-ones column for the 8 | % intercept. 9 | % 2) MxN, N>3 matrix, where the first column is all-ones 10 | 11 | % Plot Data 12 | plotData(X(:,2:3), y); 13 | hold on 14 | 15 | if size(X, 2) <= 3 16 | % Only need 2 points to define a line, so choose two endpoints 17 | plot_x = [min(X(:,2))-2, max(X(:,2))+2]; 18 | 19 | % Calculate the decision boundary line 20 | plot_y = (-1./theta(3)).*(theta(2).*plot_x + theta(1)); 21 | 22 | % Plot, and adjust axes for better viewing 23 | plot(plot_x, plot_y) 24 | 25 | % Legend, specific for the exercise 26 | legend('Admitted', 'Not admitted', 'Decision Boundary') 27 | axis([30, 100, 30, 100]) 28 | else 29 | % Here is the grid range 30 | u = linspace(-1, 1.5, 50); 31 | v = linspace(-1, 1.5, 50); 32 | 33 | z = zeros(length(u), length(v)); 34 | % Evaluate z = theta*x over the grid 35 | for i = 1:length(u) 36 | for j = 1:length(v) 37 | z(i,j) = mapFeature(u(i), v(j))*theta; 38 | end 39 | end 40 | z = z'; % important to transpose z before calling contour 41 | 42 | % Plot z = 0 43 | % Notice you need to specify the range [0, 0] 44 | contour(u, v, z, [0, 0], 'LineWidth', 2) 45 | end 46 | hold off 47 | 48 | end 49 | -------------------------------------------------------------------------------- /ex2/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(theta, X) 2 | %PREDICT Predict whether the label is 0 or 1 using learned logistic 3 | %regression parameters theta 4 | % p = PREDICT(theta, X) computes the predictions for X using a 5 | % threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1) 6 | 7 | m = size(X, 1); % Number of training examples 8 | 9 | % You need to return the following variables correctly 10 | p = zeros(m, 1); 11 | 12 | for i = 1:m 13 | if(sigmoid(theta' * X(i,:)')) >= 0.5 14 | p(i) = 1; 15 | else 16 | p(i) = 0; 17 | end 18 | end 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | % ========================================================================= 28 | 29 | 30 | end 31 | -------------------------------------------------------------------------------- /ex2/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid function 3 | % g = SIGMOID(z) computes the sigmoid of z. 4 | 5 | % You need to return the following variables correctly 6 | g = zeros(size(z)); 7 | 8 | % ====================== YOUR CODE HERE ====================== 9 | % Instructions: Compute the sigmoid of each value of z (z can be a matrix, 10 | % vector or scalar). 11 | 12 | if(length(g) == 1) 13 | g = 1 / ( 1 + exp(-z)); 14 | else 15 | [col, row] = size(z); 16 | g = 1 ./ (1 + exp(- z)); 17 | 18 | 19 | 20 | 21 | 22 | % ============================================================= 23 | 24 | end -------------------------------------------------------------------------------- /ex3/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /ex3/ex3.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % lrCostFunction.m (logistic regression cost function) 11 | % oneVsAll.m 12 | % predictOneVsAll.m 13 | % predict.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% Setup the parameters you will use for this part of the exercise 23 | input_layer_size = 400; % 20x20 Input Images of Digits 24 | num_labels = 10; % 10 labels, from 1 to 10 25 | % (note that we have mapped "0" to label 10) 26 | 27 | %% =========== Part 1: Loading and Visualizing Data ============= 28 | % We start the exercise by first loading and visualizing the dataset. 29 | % You will be working with a dataset that contains handwritten digits. 30 | % 31 | 32 | % Load Training Data 33 | fprintf('Loading and Visualizing Data ...\n') 34 | 35 | load('ex3data1.mat'); % training data stored in arrays X, y 36 | m = size(X, 1); 37 | 38 | % Randomly select 100 data points to display 39 | rand_indices = randperm(m); 40 | sel = X(rand_indices(1:100), :); 41 | 42 | displayData(sel); 43 | 44 | fprintf('Program paused. Press enter to continue.\n'); 45 | pause; 46 | 47 | %% ============ Part 2a: Vectorize Logistic Regression ============ 48 | % In this part of the exercise, you will reuse your logistic regression 49 | % code from the last exercise. You task here is to make sure that your 50 | % regularized logistic regression implementation is vectorized. After 51 | % that, you will implement one-vs-all classification for the handwritten 52 | % digit dataset. 53 | % 54 | 55 | % Test case for lrCostFunction 56 | fprintf('\nTesting lrCostFunction() with regularization'); 57 | 58 | theta_t = [-2; -1; 1; 2]; 59 | X_t = [ones(5,1) reshape(1:15,5,3)/10]; 60 | y_t = ([1;0;1;0;1] >= 0.5); 61 | lambda_t = 3; 62 | [J grad] = lrCostFunction(theta_t, X_t, y_t, lambda_t); 63 | 64 | fprintf('\nCost: %f\n', J); 65 | fprintf('Expected cost: 2.534819\n'); 66 | fprintf('Gradients:\n'); 67 | fprintf(' %f \n', grad); 68 | fprintf('Expected gradients:\n'); 69 | fprintf(' 0.146561\n -0.548558\n 0.724722\n 1.398003\n'); 70 | 71 | fprintf('Program paused. Press enter to continue.\n'); 72 | pause; 73 | %% ============ Part 2b: One-vs-All Training ============ 74 | fprintf('\nTraining One-vs-All Logistic Regression...\n') 75 | 76 | lambda = 0.1; 77 | [all_theta] = oneVsAll(X, y, num_labels, lambda); 78 | fprintf('Program paused. Press enter to continue.\n'); 79 | pause; 80 | 81 | 82 | %% ================ Part 3: Predict for One-Vs-All ================ 83 | 84 | pred = predictOneVsAll(all_theta, X); 85 | 86 | y((y==10)) = 0; 87 | 88 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 89 | 90 | -------------------------------------------------------------------------------- /ex3/ex3_nn.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % lrCostFunction.m (logistic regression cost function) 11 | % oneVsAll.m 12 | % predictOneVsAll.m 13 | % predict.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% Setup the parameters you will use for this exercise 23 | input_layer_size = 400; % 20x20 Input Images of Digits 24 | hidden_layer_size = 25; % 25 hidden units 25 | num_labels = 10; % 10 labels, from 1 to 10 26 | % (note that we have mapped "0" to label 10) 27 | 28 | %% =========== Part 1: Loading and Visualizing Data ============= 29 | % We start the exercise by first loading and visualizing the dataset. 30 | % You will be working with a dataset that contains handwritten digits. 31 | % 32 | 33 | % Load Training Data 34 | fprintf('Loading and Visualizing Data ...\n') 35 | 36 | load('ex3data1.mat'); 37 | m = size(X, 1); 38 | 39 | % Randomly select 100 data points to display 40 | sel = randperm(size(X, 1)); 41 | sel = sel(1:100); 42 | 43 | displayData(X(sel, :)); 44 | 45 | fprintf('Program paused. Press enter to continue.\n'); 46 | pause; 47 | 48 | %% ================ Part 2: Loading Pameters ================ 49 | % In this part of the exercise, we load some pre-initialized 50 | % neural network parameters. 51 | 52 | fprintf('\nLoading Saved Neural Network Parameters ...\n') 53 | 54 | % Load the weights into variables Theta1 and Theta2 55 | load('ex3weights.mat'); 56 | 57 | %% ================= Part 3: Implement Predict ================= 58 | % After training the neural network, we would like to use it to predict 59 | % the labels. You will now implement the "predict" function to use the 60 | % neural network to predict the labels of the training set. This lets 61 | % you compute the training set accuracy. 62 | 63 | pred = predict(Theta1, Theta2, X); 64 | y(y == 10) = 0; 65 | 66 | 67 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 68 | 69 | fprintf('Program paused. Press enter to continue.\n'); 70 | pause; 71 | 72 | % To give you an idea of the network's output, you can also run 73 | % through the examples one at the a time to see what it is predicting. 74 | 75 | % Randomly permute examples 76 | rp = randperm(m); 77 | 78 | for i = 1:m 79 | % Display 80 | fprintf('\nDisplaying Example Image\n'); 81 | displayData(X(rp(i), :)); 82 | 83 | pred = predict(Theta1, Theta2, X(rp(i),:)); 84 | fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10)); 85 | 86 | % Pause with quit option 87 | s = input('Paused - press enter to continue, q to exit:','s'); 88 | if s == 'q' 89 | break 90 | end 91 | end 92 | 93 | -------------------------------------------------------------------------------- /ex3/lrCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = lrCostFunction(theta, X, y, lambda) 2 | %LRCOSTFUNCTION Compute cost and gradient for logistic regression with 3 | %regularization 4 | % J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using 5 | % theta as the parameter for regularized logistic regression and the 6 | % gradient of the cost w.r.t. to the parameters. 7 | 8 | % Initialize some useful values 9 | m = length(y); % number of training examples 10 | 11 | % You need to return the following variables correctly 12 | J = 0; 13 | grad = zeros(size(theta)); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Hint: When computing the gradient of the regularized cost function, 17 | % there're many possible vectorized solutions, but one solution 18 | % looks like: 19 | % grad = (unregularized gradient for logistic regression) 20 | % temp = theta; 21 | % temp(1) = 0; % because we don't add anything for j = 0 22 | % grad = grad + YOUR_CODE_HERE (using the temp variable) 23 | 24 | 25 | [row, col] = size(X); 26 | 27 | 28 | cost = (-1 .* y)' * log(sigmoid( X * theta)) ... 29 | -(ones(row,1) - y)' * log(ones(row,1) - sigmoid(X * theta)); 30 | %penal = ((norm(theta))^2); 31 | %J = J + cost; 32 | 33 | thet = theta(2:col,1); 34 | 35 | penal = lambda / 2 * ((norm(thet))^2); 36 | 37 | J = 1 / m * (cost + penal); 38 | 39 | 40 | 41 | tmp = X' * (sigmoid(X* theta) - y); 42 | %grad(j) = 1 / m * tmp; 43 | %sums = zeros(col,1); 44 | 45 | theta(1,1) = 0; 46 | pena = lambda .* theta; 47 | grad = tmp + pena; 48 | 49 | 50 | 51 | grad = (1 / m) .* grad(:); 52 | 53 | end 54 | 55 | -------------------------------------------------------------------------------- /ex3/oneVsAll.m: -------------------------------------------------------------------------------- 1 | function [all_theta] = oneVsAll(X, y, num_labels, lambda) 2 | 3 | % [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels 4 | % logistic regression classifiers and returns each of these classifiers 5 | % in a matrix all_theta, where the i-th row of all_theta corresponds 6 | % to the classifier for label i 7 | 8 | % Some useful variables 9 | m = size(X, 1); 10 | n = size(X, 2); 11 | 12 | % You need to return the following variables correctly 13 | all_theta = zeros(num_labels, n + 1); 14 | 15 | % Add ones to the X data matrix 16 | X = [ones(m, 1) X]; 17 | 18 | % ====================== YOUR CODE HERE ====================== 19 | % Example Code for fmincg: 20 | % 21 | % % Set Initial theta 22 | % initial_theta = zeros(n + 1, 1); 23 | % 24 | % % Set options for fminunc 25 | % options = optimset('GradObj', 'on', 'MaxIter', 50); 26 | % 27 | % % Run fmincg to obtain the optimal theta 28 | % % This function will return theta and the cost 29 | % [theta] = ... 30 | % fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ... 31 | % initial_theta, options); 32 | % 33 | 34 | for i = 1 : num_labels 35 | y_tmp = (y == i); 36 | initial_theta = zeros(n + 1, 1); 37 | 38 | options = optimset('GradObj', 'on', 'MaxIter', 50); 39 | %This function will return theta and the cost 40 | [all_theta(i,:)] = ... 41 | fmincg (@(t)(lrCostFunction(t, X, y_tmp, lambda)), ... 42 | initial_theta, options); 43 | end 44 | 45 | 46 | 47 | 48 | 49 | % ========================================================================= 50 | 51 | 52 | end 53 | -------------------------------------------------------------------------------- /ex3/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(Theta1, Theta2, X) 2 | %PREDICT Predict the label of an input given a trained neural network 3 | % p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the 4 | % trained weights of a neural network (Theta1, Theta2) 5 | 6 | % Useful values 7 | m = size(X, 1); 8 | num_labels = size(Theta2, 1); 9 | 10 | X = [ones(m,1) X]; 11 | % You need to return the following variables correctly 12 | p = zeros(size(X, 1), 1); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | %hidden_act = zeros(size(X, 1), size(Theta1, 2)); 16 | hidden_layer = [ones(size(X, 1),1) sigmoid(X * Theta1')]; 17 | 18 | %hidden_layer = zeros(size(X, 1), size(hidden_act, 2)); 19 | %hidden_layer = sigmoid(hidden_act); 20 | 21 | output_act = hidden_layer * Theta2'; 22 | output_layer = sigmoid(output_act); 23 | 24 | 25 | p = max(output_layer, [], 2); 26 | 27 | for i = 1 : m 28 | for j = 1 : num_labels 29 | if( output_layer(i, j) == p(i, :)) 30 | if( j ~= 10) 31 | p(i,:) = j; 32 | else 33 | p(i,:) = 0; 34 | end 35 | end 36 | end 37 | end 38 | 39 | 40 | % ========================================================================= 41 | 42 | 43 | end 44 | -------------------------------------------------------------------------------- /ex3/predictOneVsAll.m: -------------------------------------------------------------------------------- 1 | function p = predictOneVsAll(all_theta, X) 2 | %PREDICT Predict the label for a trained one-vs-all classifier. The labels 3 | %are in the range 1..K, where K = size(all_theta, 1). 4 | % p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions 5 | % for each example in the matrix X. Note that X contains the examples in 6 | % rows. all_theta is a matrix where the i-th row is a trained logistic 7 | % regression theta vector for the i-th class. You should set p to a vector 8 | % of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2 9 | % for 4 examples) 10 | 11 | m = size(X, 1); 12 | num_labels = size(all_theta, 1); 13 | 14 | % You need to return the following variables correctly 15 | p = zeros(size(X, 1), 1); 16 | 17 | % Add ones to the X data matrix 18 | X = [ones(m, 1) X]; 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | 22 | % Hint: This code can be done all vectorized using the max function. 23 | % In particular, the max function can also return the index of the 24 | % max element, for more information see 'help max'. If your examples 25 | % are in rows, then, you can use max(A, [], 2) to obtain the max 26 | % for each row. 27 | % 28 | 29 | pred = sigmoid( X * all_theta'); 30 | 31 | p = max(pred, [], 2); 32 | 33 | for i = 1 : m 34 | for j = 1 : num_labels 35 | if( pred(i, j) == p(i, :)) 36 | if( j ~= 10) 37 | p(i,:) = j; 38 | else 39 | p(i,:) = 0; 40 | end 41 | end 42 | end 43 | end 44 | 45 | % ========================================================================= 46 | 47 | 48 | end 49 | -------------------------------------------------------------------------------- /ex3/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid functoon 3 | % J = SIGMOID(z) computes the sigmoid of z. 4 | 5 | g = 1.0 ./ (1.0 + exp(-z)); 6 | end 7 | -------------------------------------------------------------------------------- /ex4/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" 3 | %and gives us a numerical estimate of the gradient. 4 | % numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical 5 | % gradient of the function J around theta. Calling y = J(theta) should 6 | % return the function value at theta. 7 | 8 | % Notes: The following code implements numerical gradient checking, and 9 | % returns the numerical gradient.It sets numgrad(i) to (a numerical 10 | % approximation of) the partial derivative of J with respect to the 11 | % i-th input argument, evaluated at theta. (i.e., numgrad(i) should 12 | % be the (approximately) the partial derivative of J with respect 13 | % to theta(i).) 14 | % 15 | 16 | numgrad = zeros(size(theta)); 17 | perturb = zeros(size(theta)); 18 | e = 1e-4; 19 | for p = 1:numel(theta) 20 | % Set perturbation vector 21 | perturb(p) = e; 22 | loss1 = J(theta - perturb); 23 | loss2 = J(theta + perturb); 24 | % Compute Numerical Gradient 25 | numgrad(p) = (loss2 - loss1) / (2*e); 26 | perturb(p) = 0; 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /ex4/ex4.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 4 Neural Network Learning 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % sigmoidGradient.m 11 | % randInitializeWeights.m 12 | % nnCostFunction.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% Initialization 19 | clear ; close all; clc 20 | 21 | %% Setup the parameters you will use for this exercise 22 | input_layer_size = 400; % 20x20 Input Images of Digits 23 | hidden_layer_size = 25; % 25 hidden units 24 | num_labels = 10; % 10 labels, from 1 to 10 25 | % (note that we have mapped "0" to label 10) 26 | 27 | %% =========== Part 1: Loading and Visualizing Data ============= 28 | % We start the exercise by first loading and visualizing the dataset. 29 | % You will be working with a dataset that contains handwritten digits. 30 | % 31 | 32 | % Load Training Data 33 | fprintf('Loading and Visualizing Data ...\n') 34 | 35 | load('ex4data1.mat'); 36 | m = size(X, 1); 37 | 38 | % Randomly select 100 data points to display 39 | sel = randperm(size(X, 1)); 40 | sel = sel(1:100); 41 | 42 | displayData(X(sel, :)); 43 | 44 | fprintf('Program paused. Press enter to continue.\n'); 45 | pause; 46 | 47 | 48 | %% ================ Part 2: Loading Parameters ================ 49 | % In this part of the exercise, we load some pre-initialized 50 | % neural network parameters. 51 | 52 | fprintf('\nLoading Saved Neural Network Parameters ...\n') 53 | 54 | % Load the weights into variables Theta1 and Theta2 55 | load('ex4weights.mat'); 56 | 57 | % Unroll parameters 58 | nn_params = [Theta1(:) ; Theta2(:)]; 59 | 60 | %% ================ Part 3: Compute Cost (Feedforward) ================ 61 | % To the neural network, you should first start by implementing the 62 | % feedforward part of the neural network that returns the cost only. You 63 | % should complete the code in nnCostFunction.m to return cost. After 64 | % implementing the feedforward to compute the cost, you can verify that 65 | % your implementation is correct by verifying that you get the same cost 66 | % as us for the fixed debugging parameters. 67 | % 68 | % We suggest implementing the feedforward cost *without* regularization 69 | % first so that it will be easier for you to debug. Later, in part 4, you 70 | % will get to implement the regularized cost. 71 | % 72 | fprintf('\nFeedforward Using Neural Network ...\n') 73 | 74 | % Weight regularization parameter (we set this to 0 here). 75 | lambda = 0; 76 | 77 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... 78 | num_labels, X, y, lambda); 79 | 80 | fprintf(['Cost at parameters (loaded from ex4weights): %f '... 81 | '\n(this value should be about 0.287629)\n'], J); 82 | 83 | fprintf('\nProgram paused. Press enter to continue.\n'); 84 | pause; 85 | 86 | %% =============== Part 4: Implement Regularization =============== 87 | % Once your cost function implementation is correct, you should now 88 | % continue to implement the regularization with the cost. 89 | % 90 | 91 | fprintf('\nChecking Cost Function (w/ Regularization) ... \n') 92 | 93 | % Weight regularization parameter (we set this to 1 here). 94 | lambda = 1; 95 | 96 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... 97 | num_labels, X, y, lambda); 98 | 99 | fprintf(['Cost at parameters (loaded from ex4weights): %f '... 100 | '\n(this value should be about 0.383770)\n'], J); 101 | 102 | fprintf('Program paused. Press enter to continue.\n'); 103 | pause; 104 | 105 | 106 | %% ================ Part 5: Sigmoid Gradient ================ 107 | % Before you start implementing the neural network, you will first 108 | % implement the gradient for the sigmoid function. You should complete the 109 | % code in the sigmoidGradient.m file. 110 | % 111 | 112 | fprintf('\nEvaluating sigmoid gradient...\n') 113 | 114 | g = sigmoidGradient([-1 -0.5 0 0.5 1]); 115 | fprintf('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n '); 116 | fprintf('%f ', g); 117 | fprintf('\n\n'); 118 | 119 | fprintf('Program paused. Press enter to continue.\n'); 120 | pause; 121 | 122 | 123 | %% ================ Part 6: Initializing Pameters ================ 124 | % In this part of the exercise, you will be starting to implment a two 125 | % layer neural network that classifies digits. You will start by 126 | % implementing a function to initialize the weights of the neural network 127 | % (randInitializeWeights.m) 128 | 129 | fprintf('\nInitializing Neural Network Parameters ...\n') 130 | 131 | initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size); 132 | initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels); 133 | 134 | % Unroll parameters 135 | initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)]; 136 | 137 | 138 | %% =============== Part 7: Implement Backpropagation =============== 139 | % Once your cost matches up with ours, you should proceed to implement the 140 | % backpropagation algorithm for the neural network. You should add to the 141 | % code you've written in nnCostFunction.m to return the partial 142 | % derivatives of the parameters. 143 | % 144 | fprintf('\nChecking Backpropagation... \n'); 145 | 146 | % Check gradients by running checkNNGradients 147 | checkNNGradients; 148 | 149 | fprintf('\nProgram paused. Press enter to continue.\n'); 150 | pause; 151 | 152 | 153 | %% =============== Part 8: Implement Regularization =============== 154 | % Once your backpropagation implementation is correct, you should now 155 | % continue to implement the regularization with the cost and gradient. 156 | % 157 | 158 | fprintf('\nChecking Backpropagation (w/ Regularization) ... \n') 159 | 160 | % Check gradients by running checkNNGradients 161 | lambda = 3; 162 | checkNNGradients(lambda); 163 | 164 | % Also output the costFunction debugging values 165 | debug_J = nnCostFunction(nn_params, input_layer_size, ... 166 | hidden_layer_size, num_labels, X, y, lambda); 167 | 168 | fprintf(['\n\nCost at (fixed) debugging parameters (w/ lambda = %f): %f ' ... 169 | '\n(for lambda = 3, this value should be about 0.576051)\n\n'], lambda, debug_J); 170 | 171 | fprintf('Program paused. Press enter to continue.\n'); 172 | pause; 173 | 174 | 175 | %% =================== Part 8: Training NN =================== 176 | % You have now implemented all the code necessary to train a neural 177 | % network. To train your neural network, we will now use "fmincg", which 178 | % is a function which works similarly to "fminunc". Recall that these 179 | % advanced optimizers are able to train our cost functions efficiently as 180 | % long as we provide them with the gradient computations. 181 | % 182 | fprintf('\nTraining Neural Network... \n') 183 | 184 | % After you have completed the assignment, change the MaxIter to a larger 185 | % value to see how more training helps. 186 | options = optimset('MaxIter', 50); 187 | 188 | % You should also try different values of lambda 189 | lambda = 1; 190 | 191 | % Create "short hand" for the cost function to be minimized 192 | costFunction = @(p) nnCostFunction(p, ... 193 | input_layer_size, ... 194 | hidden_layer_size, ... 195 | num_labels, X, y, lambda); 196 | 197 | % Now, costFunction is a function that takes in only one argument (the 198 | % neural network parameters) 199 | [nn_params, cost] = fmincg(costFunction, initial_nn_params, options); 200 | 201 | % Obtain Theta1 and Theta2 back from nn_params 202 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... 203 | hidden_layer_size, (input_layer_size + 1)); 204 | 205 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... 206 | num_labels, (hidden_layer_size + 1)); 207 | 208 | fprintf('Program paused. Press enter to continue.\n'); 209 | pause; 210 | 211 | 212 | %% ================= Part 9: Visualize Weights ================= 213 | % You can now "visualize" what the neural network is learning by 214 | % displaying the hidden units to see what features they are capturing in 215 | % the data. 216 | 217 | fprintf('\nVisualizing Neural Network... \n') 218 | 219 | displayData(Theta1(:, 2:end)); 220 | 221 | fprintf('\nProgram paused. Press enter to continue.\n'); 222 | pause; 223 | 224 | %% ================= Part 10: Implement Predict ================= 225 | % After training the neural network, we would like to use it to predict 226 | % the labels. You will now implement the "predict" function to use the 227 | % neural network to predict the labels of the training set. This lets 228 | % you compute the training set accuracy. 229 | 230 | pred = predict(Theta1, Theta2, X); 231 | 232 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 233 | 234 | 235 | -------------------------------------------------------------------------------- /ex4/nnCostFunction -V2.m: -------------------------------------------------------------------------------- 1 | function [J grad] = nnCostFunction(nn_params, ... 2 | input_layer_size, ... 3 | hidden_layer_size, ... 4 | num_labels, ... 5 | X, y, lambda) 6 | %NNCOSTFUNCTION Implements the neural network cost function for a two layer 7 | %neural network which performs classification 8 | % [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ... 9 | % X, y, lambda) computes the cost and gradient of the neural network. The 10 | % parameters for the neural network are "unrolled" into the vector 11 | % nn_params and need to be converted back into the weight matrices. 12 | % 13 | % The returned parameter grad should be a "unrolled" vector of the 14 | % partial derivatives of the neural network. 15 | % 16 | 17 | % Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices 18 | % for our 2 layer neural network 19 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... 20 | hidden_layer_size, (input_layer_size + 1)); 21 | 22 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... 23 | num_labels, (hidden_layer_size + 1)); 24 | 25 | % Setup some useful variables 26 | m = size(X, 1); 27 | X = [ones(m, 1) X]; 28 | 29 | % You need to return the following variables correctly 30 | J = 0; 31 | 32 | y_tmp = zeros(m, num_labels); 33 | total = 0; 34 | 35 | for i = 1: m 36 | row_label = y(i); 37 | y_tmp(i, row_label) = 1; 38 | 39 | output = sigmoid([ones(1,1) sigmoid(X(i,:) * Theta1')]... 40 | * Theta2'); 41 | cost = -y_tmp(i, :) * log(output') - (ones(1, num_labels)... 42 | - y_tmp(i, :)) * log(ones(num_labels, 1) - output'); 43 | 44 | total = total + cost; 45 | end 46 | 47 | sums_1 = 0; 48 | sums_2 = 0; 49 | 50 | 51 | for i = 1 : hidden_layer_size 52 | for j = 2: (input_layer_size + 1) 53 | tmp_theta1 = (Theta1(i,j)) ^ 2; 54 | sums_1 = sums_1 + tmp_theta1; 55 | end 56 | end 57 | 58 | for i = 1 : num_labels 59 | for j = 2: (hidden_layer_size + 1) 60 | tmp_theta2 = (Theta2(i,j)) ^ 2; 61 | sums_2 = sums_2 + tmp_theta2; 62 | end 63 | end 64 | 65 | penal_sum = lambda / 2 * (sums_1 + sums_2); 66 | 67 | J = 1 / m * (total + penal_sum); 68 | 69 | 70 | %compute the BP algrithm 71 | 72 | delta_total1 = zeros(size(Theta1)); 73 | %delta_total2 = zeros(num_labels, hidden_layer_size); 74 | delta_total2 = zeros(size(Theta2)); 75 | 76 | for i = 1:m 77 | 78 | %compute the layer-wise units 79 | a1 = X(i,:); 80 | z2 = a1 * Theta1'; 81 | a2 = sigmoid(z2); 82 | a2 = [1 a2]; 83 | 84 | z3 = a2 * Theta2'; 85 | a3 = sigmoid(z3); 86 | z2 = [1 z2]; 87 | 88 | %compute the delta within layers 89 | delta3 = a3 - y_tmp(i); 90 | delta_tmp = delta3 * Theta2; 91 | delta2 = delta_tmp .* sigmoidGradient(z2); 92 | 93 | delta2 = delta2(2 : end); 94 | %sum all the delta by formula 95 | delta_total1 = delta_total1 + delta2' * a1; 96 | delta_total2 = delta_total2 + delta3' * a2; 97 | 98 | end 99 | 100 | 101 | 102 | Theta1_grad = (1 / m) .* delta_total1; 103 | Theta2_grad = (1 / m) .* delta_total2; 104 | 105 | % ====================== YOUR CODE HERE ====================== 106 | % Instructions: You should complete the code by working through the 107 | % ------------------------------------------------------------- 108 | 109 | % ========================================================================= 110 | 111 | % Unroll gradients 112 | grad = [Theta1_grad(:) ; Theta2_grad(:)]; 113 | 114 | 115 | end 116 | -------------------------------------------------------------------------------- /ex4/nnCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J grad] = nnCostFunction(nn_params, ... 2 | input_layer_size, ... 3 | hidden_layer_size, ... 4 | num_labels, ... 5 | X, y, lambda) 6 | %NNCOSTFUNCTION Implements the neural network cost function for a two layer 7 | %neural network which performs classification 8 | % [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ... 9 | % X, y, lambda) computes the cost and gradient of the neural network. The 10 | % parameters for the neural network are "unrolled" into the vector 11 | % nn_params and need to be converted back into the weight matrices. 12 | % 13 | % The returned parameter grad should be a "unrolled" vector of the 14 | % partial derivatives of the neural network. 15 | % 16 | 17 | % Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices 18 | % for our 2 layer neural network 19 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... 20 | hidden_layer_size, (input_layer_size + 1)); 21 | 22 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... 23 | num_labels, (hidden_layer_size + 1)); 24 | 25 | % Setup some useful variables 26 | m = size(X, 1); 27 | 28 | % You need to return the following variables correctly 29 | J = 0; 30 | Theta1_grad = zeros(size(Theta1)); 31 | Theta2_grad = zeros(size(Theta2)); 32 | 33 | % ====================== YOUR CODE HERE ====================== 34 | % Instructions: You should complete the code by working through the 35 | % following parts. 36 | % 37 | % Part 1: Feedforward the neural network and return the cost in the 38 | % variable J. After implementing Part 1, you can verify that your 39 | % cost function computation is correct by verifying the cost 40 | % computed in ex4.m 41 | % 42 | % Part 2: Implement the backpropagation algorithm to compute the gradients 43 | % Theta1_grad and Theta2_grad. You should return the partial derivatives of 44 | % the cost function with respect to Theta1 and Theta2 in Theta1_grad and 45 | % Theta2_grad, respectively. After implementing Part 2, you can check 46 | % that your implementation is correct by running checkNNGradients 47 | % 48 | % Note: The vector y passed into the function is a vector of labels 49 | % containing values from 1..K. You need to map this vector into a 50 | % binary vector of 1's and 0's to be used with the neural network 51 | % cost function. 52 | % 53 | % Hint: We recommend implementing backpropagation using a for-loop 54 | % over the training examples if you are implementing it for the 55 | % first time. 56 | % 57 | % Part 3: Implement regularization with the cost function and gradients. 58 | % 59 | % Hint: You can implement this around the code for 60 | % backpropagation. That is, you can compute the gradients for 61 | % the regularization separately and then add them to Theta1_grad 62 | % and Theta2_grad from Part 2. 63 | % 64 | 65 | X = [ones(m,1) X]; 66 | 67 | 68 | % foward propagation 69 | % a1 = X; 70 | a2 = sigmoid(Theta1 * X'); 71 | a2 = [ones(m,1) a2']; 72 | 73 | h_theta = sigmoid(Theta2 * a2'); % h_theta equals z3 74 | 75 | % y(k) - the great trick - we need to recode the labels as vectors containing only values 0 or 1 (page 5 of ex4.pdf) 76 | yk = zeros(num_labels, m); 77 | for i=1:m, 78 | yk(y(i),i)=1; 79 | end 80 | 81 | % follow the form 82 | J = (1/m) * sum ( sum ( (-yk) .* log(h_theta) - (1-yk) .* log(1-h_theta) )); 83 | 84 | 85 | 86 | % Note that you should not be regularizing the terms that correspond to the bias. 87 | % For the matrices Theta1 and Theta2, this corresponds to the first column of each matrix. 88 | t1 = Theta1(:,2:size(Theta1,2)); 89 | t2 = Theta2(:,2:size(Theta2,2)); 90 | 91 | % regularization formula 92 | Reg = lambda * (sum( sum ( t1.^ 2 )) + sum( sum ( t2.^ 2 ))) / (2*m); 93 | 94 | % cost function + reg 95 | J = J + Reg; 96 | 97 | 98 | % ------------------------------------------------------------- 99 | 100 | % Backprop 101 | 102 | for t=1:m, 103 | 104 | % dummie pass-by-pass 105 | % forward propag 106 | 107 | a1 = X(t,:); % X already have bias 108 | z2 = Theta1 * a1'; 109 | 110 | a2 = sigmoid(z2); 111 | a2 = [1 ; a2]; % add bias 112 | 113 | z3 = Theta2 * a2; 114 | 115 | a3 = sigmoid(z3); % final activation layer a3 == h(theta) 116 | 117 | 118 | % back propag (god bless me) 119 | 120 | z2=[1; z2]; % bias 121 | 122 | delta_3 = a3 - yk(:,t); % y(k) trick - getting columns of t element 123 | delta_2 = (Theta2' * delta_3) .* sigmoidGradient(z2); 124 | 125 | % skipping sigma2(0) 126 | delta_2 = delta_2(2:end); 127 | 128 | Theta2_grad = Theta2_grad + delta_3 * a2'; 129 | Theta1_grad = Theta1_grad + delta_2 * a1; % I don't know why a1 doesn't need to be transpost (brute force try) 130 | 131 | end; 132 | 133 | % Theta1_grad = Theta1_grad ./ m; 134 | % Theta2_grad = Theta2_grad ./ m; 135 | 136 | 137 | % Regularization (here you go) 138 | 139 | 140 | Theta1_grad(:, 1) = Theta1_grad(:, 1) ./ m; 141 | 142 | Theta1_grad(:, 2:end) = Theta1_grad(:, 2:end) ./ m + ((lambda/m) * Theta1(:, 2:end)); 143 | 144 | 145 | Theta2_grad(:, 1) = Theta2_grad(:, 1) ./ m; 146 | 147 | Theta2_grad(:, 2:end) = Theta2_grad(:, 2:end) ./ m + ((lambda/m) * Theta2(:, 2:end)); 148 | 149 | 150 | 151 | 152 | % ========================================================================= 153 | 154 | % Unroll gradients 155 | grad = [Theta1_grad(:) ; Theta2_grad(:)]; 156 | 157 | 158 | 159 | end 160 | -------------------------------------------------------------------------------- /ex4/randInitializeWeights.m: -------------------------------------------------------------------------------- 1 | function W = randInitializeWeights(L_in, L_out) 2 | %RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in 3 | %incoming connections and L_out outgoing connections 4 | % W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 5 | % of a layer with L_in incoming connections and L_out outgoing 6 | % connections. 7 | % 8 | % Note that W should be set to a matrix of size(L_out, 1 + L_in) as 9 | % the first column of W handles the "bias" terms 10 | % 11 | 12 | % You need to return the following variables correctly 13 | W = zeros(L_out, 1 + L_in); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Instructions: Initialize W randomly so that we break the symmetry while 17 | % training the neural network. 18 | % 19 | % Note: The first column of W corresponds to the parameters for the bias unit 20 | % 21 | epsilon_init = 0.12; 22 | W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init; 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | % ========================================================================= 32 | 33 | end 34 | -------------------------------------------------------------------------------- /ex4/sigmoidGradient.m: -------------------------------------------------------------------------------- 1 | function g = sigmoidGradient(z) 2 | %SIGMOIDGRADIENT returns the gradient of the sigmoid function 3 | %evaluated at z 4 | % g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function 5 | % evaluated at z. This should work regardless if z is a matrix or a 6 | % vector. In particular, if z is a vector or matrix, you should return 7 | % the gradient for each element. 8 | 9 | g = zeros(size(z)); 10 | 11 | [rows cols] = size(g); 12 | 13 | if cols == 1 && rows == 1 14 | g = sigmoid(z) * (ones(size(g)) - sigmoid(z)); 15 | else 16 | g = sigmoid(z) .* (ones(size(g)) - sigmoid(z)); 17 | 18 | 19 | 20 | % ============================================================= 21 | 22 | 23 | 24 | 25 | end 26 | -------------------------------------------------------------------------------- /ex5/ex5.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 5 | Regularized Linear Regression and Bias-Variance 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % linearRegCostFunction.m 11 | % learningCurve.m 12 | % validationCurve.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% Initialization 19 | clear ; close all; clc 20 | 21 | %% =========== Part 1: Loading and Visualizing Data ============= 22 | % We start the exercise by first loading and visualizing the dataset. 23 | % The following code will load the dataset into your environment and plot 24 | % the data. 25 | % 26 | 27 | % Load Training Data 28 | fprintf('Loading and Visualizing Data ...\n') 29 | 30 | % Load from ex5data1: 31 | % You will have X, y, Xval, yval, Xtest, ytest in your environment 32 | load ('ex5data1.mat'); 33 | 34 | % m = Number of examples 35 | m = size(X, 1); 36 | 37 | % Plot training data 38 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 39 | xlabel('Change in water level (x)'); 40 | ylabel('Water flowing out of the dam (y)'); 41 | 42 | fprintf('Program paused. Press enter to continue.\n'); 43 | pause; 44 | 45 | %% =========== Part 2: Regularized Linear Regression Cost ============= 46 | % You should now implement the cost function for regularized linear 47 | % regression. 48 | % 49 | 50 | theta = [1 ; 1]; 51 | J = linearRegCostFunction([ones(m, 1) X], y, theta, 1); 52 | 53 | fprintf(['Cost at theta = [1 ; 1]: %f '... 54 | '\n(this value should be about 303.993192)\n'], J); 55 | 56 | fprintf('Program paused. Press enter to continue.\n'); 57 | pause; 58 | 59 | %% =========== Part 3: Regularized Linear Regression Gradient ============= 60 | % You should now implement the gradient for regularized linear 61 | % regression. 62 | % 63 | 64 | theta = [1 ; 1]; 65 | [J, grad] = linearRegCostFunction([ones(m, 1) X], y, theta, 1); 66 | 67 | fprintf(['Gradient at theta = [1 ; 1]: [%f; %f] '... 68 | '\n(this value should be about [-15.303016; 598.250744])\n'], ... 69 | grad(1), grad(2)); 70 | 71 | fprintf('Program paused. Press enter to continue.\n'); 72 | pause; 73 | 74 | 75 | %% =========== Part 4: Train Linear Regression ============= 76 | % Once you have implemented the cost and gradient correctly, the 77 | % trainLinearReg function will use your cost function to train 78 | % regularized linear regression. 79 | % 80 | % Write Up Note: The data is non-linear, so this will not give a great 81 | % fit. 82 | % 83 | 84 | % Train linear regression with lambda = 0 85 | lambda = 0; 86 | [theta] = trainLinearReg([ones(m, 1) X], y, lambda); 87 | 88 | % Plot fit over the data 89 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 90 | xlabel('Change in water level (x)'); 91 | ylabel('Water flowing out of the dam (y)'); 92 | hold on; 93 | plot(X, [ones(m, 1) X]*theta, '--', 'LineWidth', 2) 94 | hold off; 95 | 96 | fprintf('Program paused. Press enter to continue.\n'); 97 | pause; 98 | 99 | 100 | %% =========== Part 5: Learning Curve for Linear Regression ============= 101 | % Next, you should implement the learningCurve function. 102 | % 103 | % Write Up Note: Since the model is underfitting the data, we expect to 104 | % see a graph with "high bias" -- Figure 3 in ex5.pdf 105 | % 106 | 107 | lambda = 0; 108 | [error_train, error_val] = ... 109 | learningCurve([ones(m, 1) X], y, ... 110 | [ones(size(Xval, 1), 1) Xval], yval, ... 111 | lambda); 112 | 113 | plot(1:m, error_train, 1:m, error_val); 114 | title('Learning curve for linear regression') 115 | legend('Train', 'Cross Validation') 116 | xlabel('Number of training examples') 117 | ylabel('Error') 118 | axis([0 13 0 150]) 119 | 120 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n'); 121 | for i = 1:m 122 | fprintf(' \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i)); 123 | end 124 | 125 | fprintf('Program paused. Press enter to continue.\n'); 126 | pause; 127 | 128 | %% =========== Part 6: Feature Mapping for Polynomial Regression ============= 129 | % One solution to this is to use polynomial regression. You should now 130 | % complete polyFeatures to map each example into its powers 131 | % 132 | 133 | p = 8; 134 | 135 | % Map X onto Polynomial Features and Normalize 136 | X_poly = polyFeatures(X, p); 137 | [X_poly, mu, sigma] = featureNormalize(X_poly); % Normalize 138 | X_poly = [ones(m, 1), X_poly]; % Add Ones 139 | 140 | % Map X_poly_test and normalize (using mu and sigma) 141 | X_poly_test = polyFeatures(Xtest, p); 142 | X_poly_test = bsxfun(@minus, X_poly_test, mu); 143 | X_poly_test = bsxfun(@rdivide, X_poly_test, sigma); 144 | X_poly_test = [ones(size(X_poly_test, 1), 1), X_poly_test]; % Add Ones 145 | 146 | % Map X_poly_val and normalize (using mu and sigma) 147 | X_poly_val = polyFeatures(Xval, p); 148 | X_poly_val = bsxfun(@minus, X_poly_val, mu); 149 | X_poly_val = bsxfun(@rdivide, X_poly_val, sigma); 150 | X_poly_val = [ones(size(X_poly_val, 1), 1), X_poly_val]; % Add Ones 151 | 152 | fprintf('Normalized Training Example 1:\n'); 153 | fprintf(' %f \n', X_poly(1, :)); 154 | 155 | fprintf('\nProgram paused. Press enter to continue.\n'); 156 | pause; 157 | 158 | 159 | 160 | %% =========== Part 7: Learning Curve for Polynomial Regression ============= 161 | % Now, you will get to experiment with polynomial regression with multiple 162 | % values of lambda. The code below runs polynomial regression with 163 | % lambda = 0. You should try running the code with different values of 164 | % lambda to see how the fit and learning curve change. 165 | % 166 | 167 | lambda = 0.3; 168 | [theta] = trainLinearReg(X_poly, y, lambda); 169 | 170 | % Plot training data and fit 171 | figure(1); 172 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 173 | plotFit(min(X), max(X), mu, sigma, theta, p); 174 | xlabel('Change in water level (x)'); 175 | ylabel('Water flowing out of the dam (y)'); 176 | title (sprintf('Polynomial Regression Fit (lambda = %f)', lambda)); 177 | 178 | figure(2); 179 | [error_train, error_val] = ... 180 | learningCurve(X_poly, y, X_poly_val, yval, lambda); 181 | plot(1:m, error_train, 1:m, error_val); 182 | 183 | title(sprintf('Polynomial Regression Learning Curve (lambda = %f)', lambda)); 184 | xlabel('Number of training examples') 185 | ylabel('Error') 186 | axis([0 13 0 100]) 187 | legend('Train', 'Cross Validation') 188 | 189 | fprintf('Polynomial Regression (lambda = %f)\n\n', lambda); 190 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n'); 191 | for i = 1:m 192 | fprintf(' \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i)); 193 | end 194 | 195 | fprintf('Program paused. Press enter to continue.\n'); 196 | pause; 197 | 198 | %% =========== Part 8: Validation for Selecting Lambda ============= 199 | % You will now implement validationCurve to test various values of 200 | % lambda on a validation set. You will then use this to select the 201 | % "best" lambda value. 202 | % 203 | 204 | [lambda_vec, error_train, error_val] = ... 205 | validationCurve(X_poly, y, X_poly_val, yval); 206 | 207 | close all; 208 | plot(lambda_vec, error_train, lambda_vec, error_val); 209 | legend('Train', 'Cross Validation'); 210 | xlabel('lambda'); 211 | ylabel('Error'); 212 | 213 | fprintf('lambda\t\tTrain Error\tValidation Error\n'); 214 | for i = 1:length(lambda_vec) 215 | fprintf(' %f\t%f\t%f\n', ... 216 | lambda_vec(i), error_train(i), error_val(i)); 217 | end 218 | 219 | fprintf('Program paused. Press enter to continue.\n'); 220 | pause; 221 | -------------------------------------------------------------------------------- /ex5/learningCurve.m: -------------------------------------------------------------------------------- 1 | function [error_train, error_val] = ... 2 | learningCurve(X, y, Xval, yval, lambda) 3 | %LEARNINGCURVE Generates the train and cross validation set errors needed 4 | 5 | % Number of training examples 6 | m = size(X, 1); 7 | %X = [ones(m, 1) X]; 8 | % You need to return these values correctly 9 | error_train = zeros(m, 1); 10 | error_val = zeros(m, 1); 11 | 12 | 13 | %[theta] = trainLinearReg(X, y, lambda); 14 | 15 | % Compute train/cross validation errors 16 | % for i = 1 : m 17 | % [theta] = trainLinearReg(X(1:i,:), y(1:i), lambda); 18 | % error_train(i) = linearRegCostFunction(X... 19 | % (1:i,:), y(1:i), theta, lambda); 20 | % error_val(i) = linearRegCostFunction(Xval, yval, theta, lambda); 21 | % end 22 | for i = 1:m 23 | X_sub = X(1:i, :); 24 | y_sub = y(1:i); 25 | 26 | theta = trainLinearReg(X_sub, y_sub, lambda); 27 | 28 | error_train(i) = linearRegCostFunction(X_sub, y_sub, theta, 0); 29 | error_val(i) = linearRegCostFunction(Xval, yval, theta, 0); 30 | end 31 | % ====================== YOUR CODE HERE ====================== 32 | 33 | % ------------------------------------------------------------- 34 | 35 | % ========================================================================= 36 | 37 | end 38 | -------------------------------------------------------------------------------- /ex5/linearRegCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = linearRegCostFunction(X, y, theta, lambda) 2 | %LINEARREGCOSTFUNCTION Compute cost and gradient for regularized linear 3 | %regression with multiple variables 4 | % [J, grad] = LINEARREGCOSTFUNCTION(X, y, theta, lambda) computes the 5 | % cost of using theta as the parameter for linear regression to fit the 6 | % data points in X and y. Returns the cost in J and the gradient in grad 7 | 8 | % Initialize some useful values 9 | m = length(y); % number of training examples 10 | 11 | % You need to return the following variables correctly 12 | J = 0; 13 | grad = zeros(size(theta)); 14 | 15 | %X = [ones(m,1) X]; 16 | % ====================== YOUR CODE HERE ====================== 17 | % cost = 0; 18 | % total = 0; 19 | % 20 | % for i =1:m 21 | % cost = (theta' * X(i,:)' - y(i,:))^2; 22 | % total = total + cost; 23 | 24 | 25 | % cost = (norm((X * theta - y))) .^2; 26 | % penal = lambda ./ (norm(theta(2:end,1))) .^ 2; 27 | % 28 | 29 | J = (1/(2*m))*sum(power((X*theta - y),2))+ (lambda/(2*m)) * sum(power(theta(2:end),2)); 30 | 31 | G = (lambda/m) .* theta; 32 | G(1) = 0; % this is always 0 33 | 34 | grad = ((1/m) .* X' * (X*theta - y)) + G; 35 | %J = 0.5 / m * (cost + penal); 36 | % J = (1/(2*m))*sum(power((X*theta - y),2))+ (lambda/(2*m)) * sum(power(theta(2:end),2)); 37 | % % h=X*theta; 38 | % % thetas=theta(2:end,1); 39 | % % J=1/(2*m).*sum((h-y).^2)+(lambda/(2.*m)).*sum(thetas.^2); 40 | % 41 | % grad = 1 / m .* ((theta' * X' - y') * X + lambda .* theta')'; 42 | % grad(1,:) = 1 / m * (theta' * X' - y') * X(:,1); 43 | 44 | 45 | % ========================================================================= 46 | 47 | grad = grad(:); 48 | 49 | end 50 | -------------------------------------------------------------------------------- /ex5/polyFeatures.m: -------------------------------------------------------------------------------- 1 | function [X_poly] = polyFeatures(X, p) 2 | %POLYFEATURES Maps X (1D vector) into the p-th power 3 | % [X_poly] = POLYFEATURES(X, p) takes a data matrix X (size m x 1) and 4 | % maps each example into its polynomial features where 5 | % X_poly(i, :) = [X(i) X(i).^2 X(i).^3 ... X(i).^p]; 6 | % 7 | 8 | 9 | % You need to return the following variables correctly. 10 | X_poly = zeros(numel(X), p); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Given a vector X, return a matrix X_poly where the p-th 14 | % column of X contains the values of X to the p-th power. 15 | % 16 | % 17 | 18 | for j = 1:p 19 | 20 | X_poly(:,j) = X .^ j; 21 | 22 | end 23 | 24 | 25 | 26 | 27 | % ========================================================================= 28 | 29 | end 30 | -------------------------------------------------------------------------------- /ex5/trainLinearReg.m: -------------------------------------------------------------------------------- 1 | function [theta] = trainLinearReg(X, y, lambda) 2 | %TRAINLINEARREG Trains linear regression given a dataset (X, y) and a 3 | %regularization parameter lambda 4 | % [theta] = TRAINLINEARREG (X, y, lambda) trains linear regression using 5 | % the dataset (X, y) and regularization parameter lambda. Returns the 6 | % trained parameters theta. 7 | % 8 | 9 | % Initialize Theta 10 | initial_theta = zeros(size(X, 2), 1); 11 | 12 | % Create "short hand" for the cost function to be minimized 13 | costFunction = @(t) linearRegCostFunction(X, y, t, lambda); 14 | 15 | % Now, costFunction is a function that takes in only one argument 16 | options = optimset('MaxIter', 200, 'GradObj', 'on'); 17 | 18 | % Minimize using fmincg 19 | theta = fmincg(costFunction, initial_theta, options); 20 | 21 | end 22 | -------------------------------------------------------------------------------- /ex5/validationCurve.m: -------------------------------------------------------------------------------- 1 | function [lambda_vec, error_train, error_val] = ... 2 | validationCurve(X, y, Xval, yval) 3 | %VALIDATIONCURVE Generate the train and validation errors needed to 4 | %plot a validation curve that we can use to select lambda 5 | % [lambda_vec, error_train, error_val] = ... 6 | % VALIDATIONCURVE(X, y, Xval, yval) returns the train 7 | % and validation errors (in error_train, error_val) 8 | % for different values of lambda. You are given the training set (X, 9 | % y) and validation set (Xval, yval). 10 | % 11 | 12 | % Selected values of lambda (you should not change this) 13 | lambda_vec = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3 10]'; 14 | 15 | % You need to return these variables correctly. 16 | error_train = zeros(length(lambda_vec), 1); 17 | error_val = zeros(length(lambda_vec), 1); 18 | 19 | %X = [ones(length(X),1) X]; 20 | %Xval = [ones(length(Xval),1) Xval]; 21 | 22 | %compute test/train error 23 | for i = 1:length(lambda_vec) 24 | [theta] = trainLinearReg(X,y,lambda_vec(i)); 25 | error_train(i) = linearRegCostFunction(X, y, theta,lambda_vec(i)); 26 | error_val(i) = linearRegCostFunction(Xval, yval, theta,lambda_vec(i)); 27 | end 28 | 29 | 30 | 31 | 32 | 33 | % ========================================================================= 34 | 35 | end 36 | -------------------------------------------------------------------------------- /ex6/dataset3Params.m: -------------------------------------------------------------------------------- 1 | function [C, sigma] = dataset3Params(X, y, Xval, yval) 2 | %DATASET3PARAMS returns your choice of C and sigma for Part 3 of the exercise 3 | %where you select the optimal (C, sigma) learning parameters to use for SVM 4 | %with RBF kernel 5 | % [C, sigma] = DATASET3PARAMS(X, y, Xval, yval) returns your choice of C and 6 | % sigma. You should complete this function to return the optimal C and 7 | % sigma based on a cross-validation set. 8 | % 9 | 10 | % You need to return the following variables correctly. 11 | % C = 1; 12 | % sigma = 0.3; 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | 16 | %predictions = svmPredict(model, Xval); 17 | 18 | %svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)) 19 | 20 | Para = zeros(62,2); 21 | 22 | sigma = zeros(8,1); 23 | C = zeros(8,1); 24 | 25 | sigma = [0.01; 0.03; 0.1; 0.3; 1; 3;10 ;30]; 26 | C = [0.01; 0.03; 0.1; 0.3; 1; 3;10 ;30]; 27 | 28 | 29 | tmp = 0.01; 30 | 31 | 32 | for i = 1:8 33 | for j = 1:8 34 | Para(8 * (i -1) + j,1) = sigma(i,:); 35 | end 36 | end 37 | 38 | for i = 1:8 39 | for j = 1:8 40 | Para(8 * (i -1) + j,2) = C(j,:); 41 | end 42 | end 43 | 44 | error = zeros(64,1); 45 | for i = 1:64 46 | 47 | model = svmTrain(X, y, Para(i,1), @(x1, x2) gaussianKernel... 48 | (x1, x2, Para(i,2))); 49 | 50 | % Note: You can compute the prediction error using 51 | predictions = svmPredict(model, Xval); 52 | err_tmp = mean(double(predictions ~= yval)); 53 | error(i,:) = err_tmp; 54 | end 55 | 56 | pos = 0; 57 | for j = 1:64 58 | mins = min(error); 59 | if(error(j,:) == mins) 60 | pos = j; 61 | break; 62 | end 63 | end 64 | 65 | C = Para(pos,1); 66 | sigma = Para(pos, 2); 67 | 68 | 69 | 70 | 71 | % ========================================================================= 72 | 73 | end 74 | -------------------------------------------------------------------------------- /ex6/emailFeatures.m: -------------------------------------------------------------------------------- 1 | function x = emailFeatures(word_indices) 2 | %EMAILFEATURES takes in a word_indices vector and produces a feature vector 3 | %from the word indices 4 | % x = EMAILFEATURES(word_indices) takes in a word_indices vector and 5 | % produces a feature vector from the word indices. 6 | 7 | % Total number of words in the dictionary 8 | n = 1899; 9 | 10 | % You need to return the following variables correctly. 11 | x = zeros(n, 1); 12 | 13 | % ====================== YOUR CODE HERE ====================== 14 | len = length(word_indices); 15 | 16 | for i = 1 : n 17 | for j = 1 : len 18 | if( i == word_indices(j,:)) 19 | x(i,1) = 1; 20 | end 21 | end 22 | end 23 | 24 | % % This is the second method to compute 25 | % for i = 1:len 26 | % tmp = word_indices(i,:); 27 | % for j = 1:n 28 | % if(tmp == j) 29 | % x(j,1) = 1; 30 | % end 31 | % end 32 | % % end 33 | 34 | 35 | 36 | 37 | % ========================================================================= 38 | 39 | 40 | end 41 | -------------------------------------------------------------------------------- /ex6/ex6.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 6 | Support Vector Machines 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % gaussianKernel.m 11 | % dataset3Params.m 12 | % processEmail.m 13 | % emailFeatures.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% =============== Part 1: Loading and Visualizing Data ================ 23 | % We start the exercise by first loading and visualizing the dataset. 24 | % The following code will load the dataset into your environment and plot 25 | % the data. 26 | % 27 | 28 | fprintf('Loading and Visualizing Data ...\n') 29 | 30 | % Load from ex6data1: 31 | % You will have X, y in your environment 32 | load('ex6data1.mat'); 33 | 34 | % Plot training data 35 | plotData(X, y); 36 | 37 | fprintf('Program paused. Press enter to continue.\n'); 38 | pause; 39 | 40 | %% ==================== Part 2: Training Linear SVM ==================== 41 | % The following code will train a linear SVM on the dataset and plot the 42 | % decision boundary learned. 43 | % 44 | 45 | % Load from ex6data1: 46 | % You will have X, y in your environment 47 | load('ex6data1.mat'); 48 | 49 | fprintf('\nTraining Linear SVM ...\n') 50 | 51 | % You should try to change the C value below and see how the decision 52 | % boundary varies (e.g., try C = 1000) 53 | C = 100; 54 | model = svmTrain(X, y, C, @linearKernel, 1e-3, 20); 55 | visualizeBoundaryLinear(X, y, model); 56 | 57 | fprintf('Program paused. Press enter to continue.\n'); 58 | pause; 59 | 60 | %% =============== Part 3: Implementing Gaussian Kernel =============== 61 | % You will now implement the Gaussian kernel to use 62 | % with the SVM. You should complete the code in gaussianKernel.m 63 | % 64 | fprintf('\nEvaluating the Gaussian Kernel ...\n') 65 | 66 | x1 = [1 2 1]; x2 = [0 4 -1]; sigma = 2; 67 | sim = gaussianKernel(x1, x2, sigma); 68 | 69 | fprintf(['Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = %f :' ... 70 | '\n\t%f\n(for sigma = 2, this value should be about 0.324652)\n'], sigma, sim); 71 | 72 | fprintf('Program paused. Press enter to continue.\n'); 73 | pause; 74 | 75 | %% =============== Part 4: Visualizing Dataset 2 ================ 76 | % The following code will load the next dataset into your environment and 77 | % plot the data. 78 | % 79 | 80 | fprintf('Loading and Visualizing Data ...\n') 81 | 82 | % Load from ex6data2: 83 | % You will have X, y in your environment 84 | load('ex6data2.mat'); 85 | 86 | % Plot training data 87 | plotData(X, y); 88 | 89 | fprintf('Program paused. Press enter to continue.\n'); 90 | pause; 91 | 92 | %% ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ========== 93 | % After you have implemented the kernel, we can now use it to train the 94 | % SVM classifier. 95 | % 96 | fprintf('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n'); 97 | 98 | % Load from ex6data2: 99 | % You will have X, y in your environment 100 | load('ex6data2.mat'); 101 | 102 | % SVM Parameters 103 | C = 1; sigma = 0.1; 104 | 105 | % We set the tolerance and max_passes lower here so that the code will run 106 | % faster. However, in practice, you will want to run the training to 107 | % convergence. 108 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 109 | visualizeBoundary(X, y, model); 110 | 111 | fprintf('Program paused. Press enter to continue.\n'); 112 | pause; 113 | 114 | %% =============== Part 6: Visualizing Dataset 3 ================ 115 | % The following code will load the next dataset into your environment and 116 | % plot the data. 117 | % 118 | 119 | fprintf('Loading and Visualizing Data ...\n') 120 | 121 | % Load from ex6data3: 122 | % You will have X, y in your environment 123 | load('ex6data3.mat'); 124 | 125 | % Plot training data 126 | plotData(X, y); 127 | 128 | fprintf('Program paused. Press enter to continue.\n'); 129 | pause; 130 | 131 | %% ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ========== 132 | 133 | % This is a different dataset that you can use to experiment with. Try 134 | % different values of C and sigma here. 135 | % 136 | 137 | % Load from ex6data3: 138 | % You will have X, y in your environment 139 | load('ex6data3.mat'); 140 | 141 | % Try different SVM Parameters here 142 | [C, sigma] = dataset3Params(X, y, Xval, yval); 143 | 144 | % Train the SVM 145 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 146 | visualizeBoundary(X, y, model); 147 | 148 | fprintf('Program paused. Press enter to continue.\n'); 149 | pause; 150 | 151 | -------------------------------------------------------------------------------- /ex6/ex6_spam.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 6 | Spam Classification with SVMs 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % gaussianKernel.m 11 | % dataset3Params.m 12 | % processEmail.m 13 | % emailFeatures.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% ==================== Part 1: Email Preprocessing ==================== 23 | % To use an SVM to classify emails into Spam v.s. Non-Spam, you first need 24 | % to convert each email into a vector of features. In this part, you will 25 | % implement the preprocessing steps for each email. You should 26 | % complete the code in processEmail.m to produce a word indices vector 27 | % for a given email. 28 | 29 | fprintf('\nPreprocessing sample email (emailSample1.txt)\n'); 30 | 31 | % Extract Features 32 | file_contents = readFile('emailSample1.txt'); 33 | word_indices = processEmail(file_contents); 34 | 35 | % Print Stats 36 | fprintf('Word Indices: \n'); 37 | fprintf(' %d', word_indices); 38 | fprintf('\n\n'); 39 | 40 | fprintf('Program paused. Press enter to continue.\n'); 41 | pause; 42 | 43 | %% ==================== Part 2: Feature Extraction ==================== 44 | % Now, you will convert each email into a vector of features in R^n. 45 | % You should complete the code in emailFeatures.m to produce a feature 46 | % vector for a given email. 47 | 48 | fprintf('\nExtracting features from sample email (emailSample1.txt)\n'); 49 | 50 | % Extract Features 51 | file_contents = readFile('emailSample1.txt'); 52 | word_indices = processEmail(file_contents); 53 | features = emailFeatures(word_indices); 54 | 55 | % Print Stats 56 | fprintf('Length of feature vector: %d\n', length(features)); 57 | fprintf('Number of non-zero entries: %d\n', sum(features > 0)); 58 | 59 | fprintf('Program paused. Press enter to continue.\n'); 60 | pause; 61 | 62 | %% =========== Part 3: Train Linear SVM for Spam Classification ======== 63 | % In this section, you will train a linear classifier to determine if an 64 | % email is Spam or Not-Spam. 65 | 66 | % Load the Spam Email dataset 67 | % You will have X, y in your environment 68 | load('spamTrain.mat'); 69 | 70 | fprintf('\nTraining Linear SVM (Spam Classification)\n') 71 | fprintf('(this may take 1 to 2 minutes) ...\n') 72 | 73 | C = 0.1; 74 | model = svmTrain(X, y, C, @linearKernel); 75 | 76 | p = svmPredict(model, X); 77 | 78 | fprintf('Training Accuracy: %f\n', mean(double(p == y)) * 100); 79 | 80 | %% =================== Part 4: Test Spam Classification ================ 81 | % After training the classifier, we can evaluate it on a test set. We have 82 | % included a test set in spamTest.mat 83 | 84 | % Load the test dataset 85 | % You will have Xtest, ytest in your environment 86 | load('spamTest.mat'); 87 | 88 | fprintf('\nEvaluating the trained Linear SVM on a test set ...\n') 89 | 90 | p = svmPredict(model, Xtest); 91 | 92 | fprintf('Test Accuracy: %f\n', mean(double(p == ytest)) * 100); 93 | pause; 94 | 95 | 96 | %% ================= Part 5: Top Predictors of Spam ==================== 97 | % Since the model we are training is a linear SVM, we can inspect the 98 | % weights learned by the model to understand better how it is determining 99 | % whether an email is spam or not. The following code finds the words with 100 | % the highest weights in the classifier. Informally, the classifier 101 | % 'thinks' that these words are the most likely indicators of spam. 102 | % 103 | 104 | % Sort the weights and obtin the vocabulary list 105 | [weight, idx] = sort(model.w, 'descend'); 106 | vocabList = getVocabList(); 107 | 108 | fprintf('\nTop predictors of spam: \n'); 109 | for i = 1:15 110 | fprintf(' %-15s (%f) \n', vocabList{idx(i)}, weight(i)); 111 | end 112 | 113 | fprintf('\n\n'); 114 | fprintf('\nProgram paused. Press enter to continue.\n'); 115 | pause; 116 | 117 | %% =================== Part 6: Try Your Own Emails ===================== 118 | % Now that you've trained the spam classifier, you can use it on your own 119 | % emails! In the starter code, we have included spamSample1.txt, 120 | % spamSample2.txt, emailSample1.txt and emailSample2.txt as examples. 121 | % The following code reads in one of these emails and then uses your 122 | % learned SVM classifier to determine whether the email is Spam or 123 | % Not Spam 124 | 125 | % Set the file to be read in (change this to spamSample2.txt, 126 | % emailSample1.txt or emailSample2.txt to see different predictions on 127 | % different emails types). Try your own emails as well! 128 | filename = 'spamSample1.txt'; 129 | 130 | % Read and predict 131 | file_contents = readFile(filename); 132 | word_indices = processEmail(file_contents); 133 | x = emailFeatures(word_indices); 134 | p = svmPredict(model, x); 135 | 136 | fprintf('\nProcessed %s\n\nSpam Classification: %d\n', filename, p); 137 | fprintf('(1 indicates spam, 0 indicates not spam)\n\n'); 138 | 139 | -------------------------------------------------------------------------------- /ex6/gaussianKernel.m: -------------------------------------------------------------------------------- 1 | function sim = gaussianKernel(x1, x2, sigma) 2 | %RBFKERNEL returns a radial basis function kernel between x1 and x2 3 | % sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2 4 | % and returns the value in sim 5 | 6 | % Ensure that x1 and x2 are column vectors 7 | x1 = x1(:); x2 = x2(:); 8 | 9 | % You need to return the following variables correctly. 10 | sim = 0; 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | 14 | Gas_kernel = exp(-(norm(x1 - x2)) ^ 2 / (2 * sigma ^ 2)); 15 | 16 | sim = Gas_kernel; 17 | 18 | 19 | 20 | 21 | % ============================================================= 22 | 23 | end 24 | -------------------------------------------------------------------------------- /ex6/processEmail.m: -------------------------------------------------------------------------------- 1 | function word_indices = processEmail(email_contents) 2 | %PROCESSEMAIL preprocesses a the body of an email and 3 | %returns a list of word_indices 4 | % word_indices = PROCESSEMAIL(email_contents) preprocesses 5 | % the body of an email and returns a list of indices of the 6 | % words contained in the email. 7 | % 8 | 9 | % Load Vocabulary 10 | vocabList = getVocabList(); 11 | 12 | % Init return value 13 | word_indices = []; 14 | 15 | % ========================== Preprocess Email =========================== 16 | 17 | % Find the Headers ( \n\n and remove ) 18 | % Uncomment the following lines if you are working with raw emails with the 19 | % full headers 20 | 21 | % hdrstart = strfind(email_contents, ([char(10) char(10)])); 22 | % email_contents = email_contents(hdrstart(1):end); 23 | 24 | % Lower case 25 | email_contents = lower(email_contents); 26 | 27 | % Strip all HTML 28 | % Looks for any expression that starts with < and ends with > and replace 29 | % and does not have any < or > in the tag it with a space 30 | email_contents = regexprep(email_contents, '<[^<>]+>', ' '); 31 | 32 | % Handle Numbers 33 | % Look for one or more characters between 0-9 34 | email_contents = regexprep(email_contents, '[0-9]+', 'number'); 35 | 36 | % Handle URLS 37 | % Look for strings starting with http:// or https:// 38 | email_contents = regexprep(email_contents, ... 39 | '(http|https)://[^\s]*', 'httpaddr'); 40 | 41 | % Handle Email Addresses 42 | % Look for strings with @ in the middle 43 | email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr'); 44 | 45 | % Handle $ sign 46 | email_contents = regexprep(email_contents, '[$]+', 'dollar'); 47 | 48 | 49 | % ========================== Tokenize Email =========================== 50 | 51 | % Output the email to screen as well 52 | fprintf('\n==== Processed Email ====\n\n'); 53 | 54 | % Process file 55 | l = 0; 56 | 57 | while ~isempty(email_contents) 58 | 59 | % Tokenize and also get rid of any punctuation 60 | [str, email_contents] = ... 61 | strtok(email_contents, ... 62 | [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]); 63 | 64 | % Remove any non alphanumeric characters 65 | str = regexprep(str, '[^a-zA-Z0-9]', ''); 66 | 67 | % Stem the word 68 | % (the porterStemmer sometimes has issues, so we use a try catch block) 69 | try str = porterStemmer(strtrim(str)); 70 | catch str = ''; continue; 71 | end; 72 | 73 | % Skip the word if it is too short 74 | if length(str) < 1 75 | continue; 76 | end 77 | 78 | len = length(vocabList); 79 | 80 | for i = 1: len 81 | if(strcmp(vocabList{i}, str)) 82 | word_indices = [word_indices; i]; 83 | end 84 | end 85 | 86 | % Note: vocabList{idx} returns a the word with index idx in the 87 | % vocabulary list. 88 | % 89 | % Note: You can use strcmp(str1, str2) to compare two strings (str1 and 90 | % str2). It will return 1 only if the two strings are equivalent. 91 | % 92 | 93 | 94 | 95 | 96 | % ============================================================= 97 | 98 | 99 | % Print to screen, ensuring that the output lines are not too long 100 | if (l + length(str) + 1) > 78 101 | fprintf('\n'); 102 | l = 0; 103 | end 104 | fprintf('%s ', str); 105 | l = l + length(str) + 1; 106 | 107 | end 108 | 109 | % Print footer 110 | fprintf('\n\n=========================\n'); 111 | 112 | end 113 | -------------------------------------------------------------------------------- /ex7/computeCentroids.m: -------------------------------------------------------------------------------- 1 | function centroids = computeCentroids(X, idx, K) 2 | %COMPUTECENTROIDS returns the new centroids by computing the means of the 3 | %data points assigned to each centroid. 4 | 5 | 6 | % Useful variables 7 | [m n] = size(X); 8 | 9 | % You need to return the following variables correctly. 10 | centroids = zeros(K, n); 11 | 12 | 13 | Ck = zeros(K,1); 14 | 15 | for j = 1:K 16 | for i = 1:m 17 | if(idx(i) == j) 18 | Ck(j) = Ck(j) + 1; 19 | end 20 | end 21 | end 22 | 23 | 24 | for j = 1:K 25 | Mu = zeros(K, n); 26 | for i = 1:m 27 | 28 | if (idx(i) == j) 29 | Mu(i,:) = X(i,:); 30 | end 31 | end 32 | 33 | for k = 1:n 34 | centroids(j,k) = 1 / Ck(j) * sum(Mu(:,k)); 35 | 36 | 37 | end 38 | 39 | end 40 | 41 | 42 | 43 | 44 | 45 | 46 | % ============================================================= 47 | 48 | 49 | end 50 | 51 | -------------------------------------------------------------------------------- /ex7/ex7.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 7 | Principle Component Analysis and K-Means Clustering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % pca.m 11 | % projectData.m 12 | % recoverData.m 13 | % computeCentroids.m 14 | % findClosestCentroids.m 15 | % kMeansInitCentroids.m 16 | % 17 | % For this exercise, you will not need to change any code in this file, 18 | % or any other files other than those mentioned above. 19 | % 20 | 21 | %% Initialization 22 | clear ; close all; clc 23 | 24 | %% ================= Part 1: Find Closest Centroids ==================== 25 | % To help you implement K-Means, we have divided the learning algorithm 26 | % into two functions -- findClosestCentroids and computeCentroids. In this 27 | % part, you should complete the code in the findClosestCentroids function. 28 | % 29 | fprintf('Finding closest centroids.\n\n'); 30 | 31 | % Load an example dataset that we will be using 32 | load('ex7data2.mat'); 33 | 34 | % Select an initial set of centroids 35 | K = 3; % 3 Centroids 36 | initial_centroids = [3 3; 6 2; 8 5]; 37 | 38 | % Find the closest centroids for the examples using the 39 | % initial_centroids 40 | idx = findClosestCentroids(X, initial_centroids); 41 | 42 | fprintf('Closest centroids for the first 3 examples: \n') 43 | fprintf(' %d', idx(1:3)); 44 | fprintf('\n(the closest centroids should be 1, 3, 2 respectively)\n'); 45 | 46 | fprintf('Program paused. Press enter to continue.\n'); 47 | pause; 48 | 49 | %% ===================== Part 2: Compute Means ========================= 50 | % After implementing the closest centroids function, you should now 51 | % complete the computeCentroids function. 52 | % 53 | fprintf('\nComputing centroids means.\n\n'); 54 | 55 | % Compute means based on the closest centroids found in the previous part. 56 | centroids = computeCentroids(X, idx, K); 57 | 58 | fprintf('Centroids computed after initial finding of closest centroids: \n') 59 | fprintf(' %f %f \n' , centroids'); 60 | fprintf('\n(the centroids should be\n'); 61 | fprintf(' [ 2.428301 3.157924 ]\n'); 62 | fprintf(' [ 5.813503 2.633656 ]\n'); 63 | fprintf(' [ 7.119387 3.616684 ]\n\n'); 64 | 65 | fprintf('Program paused. Press enter to continue.\n'); 66 | pause; 67 | 68 | 69 | %% =================== Part 3: K-Means Clustering ====================== 70 | % After you have completed the two functions computeCentroids and 71 | % findClosestCentroids, you have all the necessary pieces to run the 72 | % kMeans algorithm. In this part, you will run the K-Means algorithm on 73 | % the example dataset we have provided. 74 | % 75 | fprintf('\nRunning K-Means clustering on example dataset.\n\n'); 76 | 77 | % Load an example dataset 78 | load('ex7data2.mat'); 79 | 80 | % Settings for running K-Means 81 | K = 3; 82 | max_iters = 10; 83 | 84 | % For consistency, here we set centroids to specific values 85 | % but in practice you want to generate them automatically, such as by 86 | % settings them to be random examples (as can be seen in 87 | % kMeansInitCentroids). 88 | initial_centroids = [3 3; 6 2; 8 5]; 89 | 90 | % Run K-Means algorithm. The 'true' at the end tells our function to plot 91 | % the progress of K-Means 92 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters, true); 93 | fprintf('\nK-Means Done.\n\n'); 94 | 95 | fprintf('Program paused. Press enter to continue.\n'); 96 | pause; 97 | 98 | %% ============= Part 4: K-Means Clustering on Pixels =============== 99 | % In this exercise, you will use K-Means to compress an image. To do this, 100 | % you will first run K-Means on the colors of the pixels in the image and 101 | % then you will map each pixel onto its closest centroid. 102 | % 103 | % You should now complete the code in kMeansInitCentroids.m 104 | % 105 | 106 | fprintf('\nRunning K-Means clustering on pixels from an image.\n\n'); 107 | 108 | % Load an image of a bird 109 | A = double(imread('bird_small.png')); 110 | 111 | % If imread does not work for you, you can try instead 112 | % load ('bird_small.mat'); 113 | 114 | A = A / 255; % Divide by 255 so that all values are in the range 0 - 1 115 | 116 | % Size of the image 117 | img_size = size(A); 118 | 119 | % Reshape the image into an Nx3 matrix where N = number of pixels. 120 | % Each row will contain the Red, Green and Blue pixel values 121 | % This gives us our dataset matrix X that we will use K-Means on. 122 | X = reshape(A, img_size(1) * img_size(2), 3); 123 | 124 | % Run your K-Means algorithm on this data 125 | % You should try different values of K and max_iters here 126 | K = 16; 127 | max_iters = 10; 128 | 129 | % When using K-Means, it is important the initialize the centroids 130 | % randomly. 131 | % You should complete the code in kMeansInitCentroids.m before proceeding 132 | initial_centroids = kMeansInitCentroids(X, K); 133 | 134 | % Run K-Means 135 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters); 136 | 137 | fprintf('Program paused. Press enter to continue.\n'); 138 | pause; 139 | 140 | 141 | %% ================= Part 5: Image Compression ====================== 142 | % In this part of the exercise, you will use the clusters of K-Means to 143 | % compress an image. To do this, we first find the closest clusters for 144 | % each example. After that, we 145 | 146 | fprintf('\nApplying K-Means to compress an image.\n\n'); 147 | 148 | % Find closest cluster members 149 | idx = findClosestCentroids(X, centroids); 150 | 151 | % Essentially, now we have represented the image X as in terms of the 152 | % indices in idx. 153 | 154 | % We can now recover the image from the indices (idx) by mapping each pixel 155 | % (specified by its index in idx) to the centroid value 156 | X_recovered = centroids(idx,:); 157 | 158 | % Reshape the recovered image into proper dimensions 159 | X_recovered = reshape(X_recovered, img_size(1), img_size(2), 3); 160 | 161 | % Display the original image 162 | subplot(1, 2, 1); 163 | imagesc(A); 164 | title('Original'); 165 | 166 | % Display compressed image side by side 167 | subplot(1, 2, 2); 168 | imagesc(X_recovered) 169 | title(sprintf('Compressed, with %d colors.', K)); 170 | 171 | 172 | fprintf('Program paused. Press enter to continue.\n'); 173 | pause; 174 | 175 | -------------------------------------------------------------------------------- /ex7/ex7_pca.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 7 | Principle Component Analysis and K-Means Clustering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % pca.m 11 | % projectData.m 12 | % recoverData.m 13 | % computeCentroids.m 14 | % findClosestCentroids.m 15 | % kMeansInitCentroids.m 16 | % 17 | % For this exercise, you will not need to change any code in this file, 18 | % or any other files other than those mentioned above. 19 | % 20 | 21 | %% Initialization 22 | clear ; close all; clc 23 | 24 | %% ================== Part 1: Load Example Dataset =================== 25 | % We start this exercise by using a small dataset that is easily to 26 | % visualize 27 | % 28 | fprintf('Visualizing example dataset for PCA.\n\n'); 29 | 30 | % The following command loads the dataset. You should now have the 31 | % variable X in your environment 32 | load ('ex7data1.mat'); 33 | 34 | % Visualize the example dataset 35 | plot(X(:, 1), X(:, 2), 'bo'); 36 | axis([0.5 6.5 2 8]); axis square; 37 | 38 | fprintf('Program paused. Press enter to continue.\n'); 39 | pause; 40 | 41 | 42 | %% =============== Part 2: Principal Component Analysis =============== 43 | % You should now implement PCA, a dimension reduction technique. You 44 | % should complete the code in pca.m 45 | % 46 | fprintf('\nRunning PCA on example dataset.\n\n'); 47 | 48 | % Before running PCA, it is important to first normalize X 49 | [X_norm, mu, sigma] = featureNormalize(X); 50 | 51 | % Run PCA 52 | [U, S] = pca(X_norm); 53 | 54 | % Compute mu, the mean of the each feature 55 | 56 | % Draw the eigenvectors centered at mean of data. These lines show the 57 | % directions of maximum variations in the dataset. 58 | hold on; 59 | drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2); 60 | drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2); 61 | hold off; 62 | 63 | fprintf('Top eigenvector: \n'); 64 | fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1)); 65 | fprintf('\n(you should expect to see -0.707107 -0.707107)\n'); 66 | 67 | fprintf('Program paused. Press enter to continue.\n'); 68 | pause; 69 | 70 | 71 | %% =================== Part 3: Dimension Reduction =================== 72 | % You should now implement the projection step to map the data onto the 73 | % first k eigenvectors. The code will then plot the data in this reduced 74 | % dimensional space. This will show you what the data looks like when 75 | % using only the corresponding eigenvectors to reconstruct it. 76 | % 77 | % You should complete the code in projectData.m 78 | % 79 | fprintf('\nDimension reduction on example dataset.\n\n'); 80 | 81 | % Plot the normalized dataset (returned from pca) 82 | plot(X_norm(:, 1), X_norm(:, 2), 'bo'); 83 | axis([-4 3 -4 3]); axis square 84 | 85 | % Project the data onto K = 1 dimension 86 | K = 1; 87 | Z = projectData(X_norm, U, K); 88 | fprintf('Projection of the first example: %f\n', Z(1)); 89 | fprintf('\n(this value should be about 1.481274)\n\n'); 90 | 91 | X_rec = recoverData(Z, U, K); 92 | fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2)); 93 | fprintf('\n(this value should be about -1.047419 -1.047419)\n\n'); 94 | 95 | % Draw lines connecting the projected points to the original points 96 | hold on; 97 | plot(X_rec(:, 1), X_rec(:, 2), 'ro'); 98 | for i = 1:size(X_norm, 1) 99 | drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1); 100 | end 101 | hold off 102 | 103 | fprintf('Program paused. Press enter to continue.\n'); 104 | pause; 105 | 106 | %% =============== Part 4: Loading and Visualizing Face Data ============= 107 | % We start the exercise by first loading and visualizing the dataset. 108 | % The following code will load the dataset into your environment 109 | % 110 | fprintf('\nLoading face dataset.\n\n'); 111 | 112 | % Load Face dataset 113 | load ('ex7faces.mat') 114 | 115 | % Display the first 100 faces in the dataset 116 | displayData(X(1:100, :)); 117 | 118 | fprintf('Program paused. Press enter to continue.\n'); 119 | pause; 120 | 121 | %% =========== Part 5: PCA on Face Data: Eigenfaces =================== 122 | % Run PCA and visualize the eigenvectors which are in this case eigenfaces 123 | % We display the first 36 eigenfaces. 124 | % 125 | fprintf(['\nRunning PCA on face dataset.\n' ... 126 | '(this might take a minute or two ...)\n\n']); 127 | 128 | % Before running PCA, it is important to first normalize X by subtracting 129 | % the mean value from each feature 130 | [X_norm, mu, sigma] = featureNormalize(X); 131 | 132 | % Run PCA 133 | [U, S] = pca(X_norm); 134 | 135 | % Visualize the top 36 eigenvectors found 136 | displayData(U(:, 1:36)'); 137 | 138 | fprintf('Program paused. Press enter to continue.\n'); 139 | pause; 140 | 141 | 142 | %% ============= Part 6: Dimension Reduction for Faces ================= 143 | % Project images to the eigen space using the top k eigenvectors 144 | % If you are applying a machine learning algorithm 145 | fprintf('\nDimension reduction for face dataset.\n\n'); 146 | 147 | K = 100; 148 | Z = projectData(X_norm, U, K); 149 | 150 | fprintf('The projected data Z has a size of: ') 151 | fprintf('%d ', size(Z)); 152 | 153 | fprintf('\n\nProgram paused. Press enter to continue.\n'); 154 | pause; 155 | 156 | %% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== 157 | % Project images to the eigen space using the top K eigen vectors and 158 | % visualize only using those K dimensions 159 | % Compare to the original input, which is also displayed 160 | 161 | fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n'); 162 | 163 | K = 100; 164 | X_rec = recoverData(Z, U, K); 165 | 166 | % Display normalized data 167 | subplot(1, 2, 1); 168 | displayData(X_norm(1:100,:)); 169 | title('Original faces'); 170 | axis square; 171 | 172 | % Display reconstructed data from only k eigenfaces 173 | subplot(1, 2, 2); 174 | displayData(X_rec(1:100,:)); 175 | title('Recovered faces'); 176 | axis square; 177 | 178 | fprintf('Program paused. Press enter to continue.\n'); 179 | pause; 180 | 181 | 182 | %% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === 183 | % One useful application of PCA is to use it to visualize high-dimensional 184 | % data. In the last K-Means exercise you ran K-Means on 3-dimensional 185 | % pixel colors of an image. We first visualize this output in 3D, and then 186 | % apply PCA to obtain a visualization in 2D. 187 | 188 | close all; close all; clc 189 | 190 | % Reload the image from the previous exercise and run K-Means on it 191 | % For this to work, you need to complete the K-Means assignment first 192 | A = double(imread('bird_small.png')); 193 | 194 | % If imread does not work for you, you can try instead 195 | % load ('bird_small.mat'); 196 | 197 | A = A / 255; 198 | img_size = size(A); 199 | X = reshape(A, img_size(1) * img_size(2), 3); 200 | K = 16; 201 | max_iters = 10; 202 | initial_centroids = kMeansInitCentroids(X, K); 203 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters); 204 | 205 | % Sample 1000 random indexes (since working with all the data is 206 | % too expensive. If you have a fast computer, you may increase this. 207 | sel = floor(rand(1000, 1) * size(X, 1)) + 1; 208 | 209 | % Setup Color Palette 210 | palette = hsv(K); 211 | colors = palette(idx(sel), :); 212 | 213 | % Visualize the data and centroid memberships in 3D 214 | figure; 215 | scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors); 216 | title('Pixel dataset plotted in 3D. Color shows centroid memberships'); 217 | fprintf('Program paused. Press enter to continue.\n'); 218 | pause; 219 | 220 | %% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === 221 | % Use PCA to project this cloud to 2D for visualization 222 | 223 | % Subtract the mean to use PCA 224 | [X_norm, mu, sigma] = featureNormalize(X); 225 | 226 | % PCA and project the data to 2D 227 | [U, S] = pca(X_norm); 228 | Z = projectData(X_norm, U, 2); 229 | 230 | % Plot in 2D 231 | figure; 232 | plotDataPoints(Z(sel, :), idx(sel), K); 233 | title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction'); 234 | fprintf('Program paused. Press enter to continue.\n'); 235 | pause; 236 | -------------------------------------------------------------------------------- /ex7/findClosestCentroids.m: -------------------------------------------------------------------------------- 1 | function idx = findClosestCentroids(X, centroids) 2 | %FINDCLOSESTCENTROIDS computes the centroid memberships for every example 3 | % idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids 4 | % in idx for a dataset X where each row is a single example. idx = m x 1 5 | % vector of centroid assignments (i.e. each entry in range [1..K]) 6 | % 7 | 8 | % Set K 9 | K = size(centroids, 1); 10 | 11 | % You need to return the following variables correctly. 12 | idx = zeros(size(X,1), 1); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Go over every example, find its closest centroid, and store 16 | % the index inside idx at the appropriate location. 17 | % Concretely, idx(i) should contain the index of the centroid 18 | % closest to example i. Hence, it should be a value in the 19 | % range 1..K 20 | % 21 | % Note: You can use a for-loop over the examples to compute this. 22 | % 23 | 24 | m = length(X); 25 | dist = zeros(K, 1); 26 | 27 | for i = 1:m 28 | for j = 1:K 29 | 30 | dist(j) = norm(X(i,:) - centroids(j,:)) ^ 2; 31 | 32 | end 33 | 34 | [minV, minI] = min(dist); 35 | 36 | idx(i) = minI; 37 | 38 | 39 | end 40 | 41 | 42 | 43 | 44 | % ============================================================= 45 | 46 | end 47 | 48 | -------------------------------------------------------------------------------- /ex7/kMeansInitCentroids.m: -------------------------------------------------------------------------------- 1 | function centroids = kMeansInitCentroids(X, K) 2 | %KMEANSINITCENTROIDS This function initializes K centroids that are to be 3 | %used in K-Means on the dataset X 4 | % centroids = KMEANSINITCENTROIDS(X, K) returns K initial centroids to be 5 | % used with the K-Means on the dataset X 6 | % 7 | 8 | % You should return this values correctly 9 | centroids = zeros(K, size(X, 2)); 10 | 11 | % ====================== YOUR CODE HERE ====================== 12 | % Instructions: You should set centroids to randomly chosen examples from 13 | % the dataset X 14 | % 15 | 16 | 17 | randidx = randperm(size(X,1)); 18 | 19 | centroids = X(randidx(1:K),:); 20 | 21 | 22 | 23 | 24 | 25 | % ============================================================= 26 | 27 | end 28 | 29 | -------------------------------------------------------------------------------- /ex7/pca.m: -------------------------------------------------------------------------------- 1 | function [U, S] = pca(X) 2 | %PCA Run principal component analysis on the dataset X 3 | % [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X 4 | % Returns the eigenvectors U, the eigenvalues (on diagonal) in S 5 | % 6 | 7 | % Useful values 8 | [m, n] = size(X); 9 | 10 | % You need to return the following variables correctly. 11 | U = zeros(n); 12 | S = zeros(n); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | 16 | sigma = zeros(n); 17 | sigma = 1 / m .* (X' * X); 18 | 19 | 20 | [U,S,V] = svd(sigma); 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | % ========================================================================= 29 | 30 | end 31 | -------------------------------------------------------------------------------- /ex7/projectData.m: -------------------------------------------------------------------------------- 1 | function Z = projectData(X, U, K) 2 | %PROJECTDATA Computes the reduced data representation when projecting only 3 | %on to the top k eigenvectors 4 | % Z = projectData(X, U, K) computes the projection of 5 | % the normalized inputs X into the reduced dimensional space spanned by 6 | % the first K columns of U. It returns the projected examples in Z. 7 | % 8 | 9 | % You need to return the following variables correctly. 10 | Z = zeros(size(X, 1), K); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | 14 | 15 | U_red = U(:,1:K); 16 | 17 | Z = X * U_red; 18 | 19 | 20 | 21 | 22 | 23 | % ============================================================= 24 | 25 | end 26 | -------------------------------------------------------------------------------- /ex7/recoverData.m: -------------------------------------------------------------------------------- 1 | function X_rec = recoverData(Z, U, K) 2 | %RECOVERDATA Recovers an approximation of the original data when using the 3 | %projected data 4 | % X_rec = RECOVERDATA(Z, U, K) recovers an approximation the 5 | % original data that has been reduced to K dimensions. It returns the 6 | % approximate reconstruction in X_rec. 7 | % 8 | 9 | % You need to return the following variables correctly. 10 | X_rec = zeros(size(Z, 1), size(U, 1)); 11 | 12 | 13 | U = U(:,1:K); 14 | 15 | X_rec = Z * U'; 16 | 17 | 18 | 19 | % ============================================================= 20 | 21 | end 22 | -------------------------------------------------------------------------------- /ex8/cofiCostFunc.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ... 2 | num_features, lambda) 3 | %COFICOSTFUNC Collaborative filtering cost function 4 | % [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ... 5 | % num_features, lambda) returns the cost and gradient for the 6 | % collaborative filtering problem. 7 | % 8 | 9 | % Unfold the U and W matrices from params 10 | X = reshape(params(1:num_movies*num_features), num_movies, num_features); 11 | Theta = reshape(params(num_movies*num_features+1:end), ... 12 | num_users, num_features); 13 | 14 | 15 | % You need to return the following values correctly 16 | J = 0; 17 | X_grad = zeros(size(X)); 18 | Theta_grad = zeros(size(Theta)); 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | cost = 0; 22 | tmp_Y = zeros(num_movies, num_users); 23 | 24 | Reg_Theta = 0; 25 | Reg_X = 0; 26 | 27 | for i = 1:num_movies 28 | for k = 1:num_features 29 | 30 | %X_tmp = X(idx,:); 31 | Reg_X = Reg_X + norm(X(i,k)) ^ 2; 32 | 33 | end 34 | end 35 | 36 | for j = 1:num_users 37 | for k = 1:num_features 38 | 39 | %X_tmp = X(idx,:); 40 | Reg_Theta = Reg_Theta + norm(Theta(j,k)) ^ 2; 41 | 42 | end 43 | end 44 | 45 | 46 | 47 | for i = 1:num_movies 48 | for j = 1:num_users 49 | if(R(i,j) == 1) 50 | cost = cost + (Theta(j,:)*X(i,:)' - Y(i,j)) ^ 2; 51 | end 52 | end 53 | end 54 | 55 | J = 1 / 2 * (cost + lambda * (Reg_Theta + Reg_X)); 56 | 57 | 58 | % You should set the following variables correctly: 59 | % 60 | % X_grad - num_movies x num_features matrix, containing the 61 | % partial derivatives w.r.t. to each element of X 62 | % Theta_grad - num_users x num_features matrix, containing the 63 | % partial derivatives w.r.t. to each element of Theta 64 | grad_X = zeros(size(X)); 65 | grad_Theta = zeros(size(Theta)); 66 | 67 | for i = 1:num_movies 68 | 69 | idx = find(R(i,:) == 1); 70 | Theta_tmp = Theta(idx,:); 71 | Y_tmp = Y(i,idx); 72 | 73 | grad_X(i,:) = (X(i,:) * Theta_tmp' - Y_tmp) * Theta_tmp + ... 74 | lambda * X(i,:); 75 | for j = 1:num_users 76 | jdx = find(R(:,j) == 1); 77 | X_tmp = X(jdx,:); 78 | Y_tmp = Y(jdx,j); 79 | grad_Theta(j,:) = (Theta(j,:) * X_tmp' - Y_tmp') * X_tmp + ... 80 | lambda * Theta(j,:); 81 | end 82 | end 83 | 84 | 85 | 86 | 87 | % ============================================================= 88 | 89 | grad = [grad_X(:); grad_Theta(:)]; 90 | 91 | end 92 | -------------------------------------------------------------------------------- /ex8/estimateGaussian.m: -------------------------------------------------------------------------------- 1 | function [mu sigma2] = estimateGaussian(X) 2 | %ESTIMATEGAUSSIAN This function estimates the parameters of a 3 | %Gaussian distribution using the data in X 4 | % [mu sigma2] = estimateGaussian(X), 5 | % The input X is the dataset with each n-dimensional data point in one row 6 | % The output is an n-dimensional vector mu, the mean of the data set 7 | % and the variances sigma^2, an n x 1 vector 8 | % 9 | 10 | % Useful variables 11 | [m, n] = size(X); 12 | 13 | % You should return these values correctly 14 | mu = zeros(n, 1); 15 | sigma2 = zeros(n, 1); 16 | 17 | % ====================== YOUR CODE HERE ====================== 18 | 19 | for i = 1:n 20 | mu(i,:) = 1 / m * sum(X(:,i)); 21 | sigma2(i,:) = 1 / m * (norm(X(:,i) - mu(i,:)) ^ 2); 22 | end 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | % ============================================================= 34 | 35 | 36 | end 37 | -------------------------------------------------------------------------------- /ex8/ex8.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 8 | Anomaly Detection and Collaborative Filtering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % estimateGaussian.m 11 | % selectThreshold.m 12 | % cofiCostFunc.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% Initialization 19 | clear ; close all; clc 20 | 21 | %% ================== Part 1: Load Example Dataset =================== 22 | % We start this exercise by using a small dataset that is easy to 23 | % visualize. 24 | % 25 | % Our example case consists of 2 network server statistics across 26 | % several machines: the latency and throughput of each machine. 27 | % This exercise will help us find possibly faulty (or very fast) machines. 28 | % 29 | 30 | fprintf('Visualizing example dataset for outlier detection.\n\n'); 31 | 32 | % The following command loads the dataset. You should now have the 33 | % variables X, Xval, yval in your environment 34 | load('ex8data1.mat'); 35 | 36 | % Visualize the example dataset 37 | plot(X(:, 1), X(:, 2), 'bx'); 38 | axis([0 30 0 30]); 39 | xlabel('Latency (ms)'); 40 | ylabel('Throughput (mb/s)'); 41 | 42 | fprintf('Program paused. Press enter to continue.\n'); 43 | pause 44 | 45 | 46 | %% ================== Part 2: Estimate the dataset statistics =================== 47 | % For this exercise, we assume a Gaussian distribution for the dataset. 48 | % 49 | % We first estimate the parameters of our assumed Gaussian distribution, 50 | % then compute the probabilities for each of the points and then visualize 51 | % both the overall distribution and where each of the points falls in 52 | % terms of that distribution. 53 | % 54 | fprintf('Visualizing Gaussian fit.\n\n'); 55 | 56 | % Estimate my and sigma2 57 | [mu sigma2] = estimateGaussian(X); 58 | 59 | % Returns the density of the multivariate normal at each data point (row) 60 | % of X 61 | p = multivariateGaussian(X, mu, sigma2); 62 | 63 | % Visualize the fit 64 | visualizeFit(X, mu, sigma2); 65 | xlabel('Latency (ms)'); 66 | ylabel('Throughput (mb/s)'); 67 | 68 | fprintf('Program paused. Press enter to continue.\n'); 69 | pause; 70 | 71 | %% ================== Part 3: Find Outliers =================== 72 | % Now you will find a good epsilon threshold using a cross-validation set 73 | % probabilities given the estimated Gaussian distribution 74 | % 75 | 76 | pval = multivariateGaussian(Xval, mu, sigma2); 77 | 78 | [epsilon F1] = selectThreshold(yval, pval); 79 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon); 80 | fprintf('Best F1 on Cross Validation Set: %f\n', F1); 81 | fprintf(' (you should see a value epsilon of about 8.99e-05)\n'); 82 | fprintf(' (you should see a Best F1 value of 0.875000)\n\n'); 83 | 84 | % Find the outliers in the training set and plot the 85 | outliers = find(p < epsilon); 86 | 87 | % Draw a red circle around those outliers 88 | hold on 89 | plot(X(outliers, 1), X(outliers, 2), 'ro', 'LineWidth', 2, 'MarkerSize', 10); 90 | hold off 91 | 92 | fprintf('Program paused. Press enter to continue.\n'); 93 | pause; 94 | 95 | %% ================== Part 4: Multidimensional Outliers =================== 96 | % We will now use the code from the previous part and apply it to a 97 | % harder problem in which more features describe each datapoint and only 98 | % some features indicate whether a point is an outlier. 99 | % 100 | 101 | % Loads the second dataset. You should now have the 102 | % variables X, Xval, yval in your environment 103 | load('ex8data2.mat'); 104 | 105 | % Apply the same steps to the larger dataset 106 | [mu sigma2] = estimateGaussian(X); 107 | 108 | % Training set 109 | p = multivariateGaussian(X, mu, sigma2); 110 | 111 | % Cross-validation set 112 | pval = multivariateGaussian(Xval, mu, sigma2); 113 | 114 | % Find the best threshold 115 | [epsilon F1] = selectThreshold(yval, pval); 116 | 117 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon); 118 | fprintf('Best F1 on Cross Validation Set: %f\n', F1); 119 | fprintf(' (you should see a value epsilon of about 1.38e-18)\n'); 120 | fprintf(' (you should see a Best F1 value of 0.615385)\n'); 121 | fprintf('# Outliers found: %d\n\n', sum(p < epsilon)); 122 | -------------------------------------------------------------------------------- /ex8/ex8_cofi.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 8 | Anomaly Detection and Collaborative Filtering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % estimateGaussian.m 11 | % selectThreshold.m 12 | % cofiCostFunc.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% =============== Part 1: Loading movie ratings dataset ================ 19 | % You will start by loading the movie ratings dataset to understand the 20 | % structure of the data. 21 | % 22 | fprintf('Loading movie ratings dataset.\n\n'); 23 | 24 | % Load data 25 | load ('ex8_movies.mat'); 26 | 27 | % Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on 28 | % 943 users 29 | % 30 | % R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a 31 | % rating to movie i 32 | 33 | % From the matrix, we can compute statistics like average rating. 34 | fprintf('Average rating for movie 1 (Toy Story): %f / 5\n\n', ... 35 | mean(Y(1, R(1, :)))); 36 | 37 | % We can "visualize" the ratings matrix by plotting it with imagesc 38 | imagesc(Y); 39 | ylabel('Movies'); 40 | xlabel('Users'); 41 | 42 | fprintf('\nProgram paused. Press enter to continue.\n'); 43 | pause; 44 | 45 | %% ============ Part 2: Collaborative Filtering Cost Function =========== 46 | % You will now implement the cost function for collaborative filtering. 47 | % To help you debug your cost function, we have included set of weights 48 | % that we trained on that. Specifically, you should complete the code in 49 | % cofiCostFunc.m to return J. 50 | 51 | % Load pre-trained weights (X, Theta, num_users, num_movies, num_features) 52 | load ('ex8_movieParams.mat'); 53 | 54 | % Reduce the data set size so that this runs faster 55 | num_users = 4; num_movies = 5; num_features = 3; 56 | X = X(1:num_movies, 1:num_features); 57 | Theta = Theta(1:num_users, 1:num_features); 58 | Y = Y(1:num_movies, 1:num_users); 59 | R = R(1:num_movies, 1:num_users); 60 | 61 | % Evaluate cost function 62 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ... 63 | num_features, 0); 64 | 65 | fprintf(['Cost at loaded parameters: %f '... 66 | '\n(this value should be about 22.22)\n'], J); 67 | 68 | fprintf('\nProgram paused. Press enter to continue.\n'); 69 | pause; 70 | 71 | 72 | %% ============== Part 3: Collaborative Filtering Gradient ============== 73 | % Once your cost function matches up with ours, you should now implement 74 | % the collaborative filtering gradient function. Specifically, you should 75 | % complete the code in cofiCostFunc.m to return the grad argument. 76 | % 77 | fprintf('\nChecking Gradients (without regularization) ... \n'); 78 | 79 | % Check gradients by running checkNNGradients 80 | checkCostFunction; 81 | 82 | fprintf('\nProgram paused. Press enter to continue.\n'); 83 | pause; 84 | 85 | 86 | %% ========= Part 4: Collaborative Filtering Cost Regularization ======== 87 | % Now, you should implement regularization for the cost function for 88 | % collaborative filtering. You can implement it by adding the cost of 89 | % regularization to the original cost computation. 90 | % 91 | 92 | % Evaluate cost function 93 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ... 94 | num_features, 1.5); 95 | 96 | fprintf(['Cost at loaded parameters (lambda = 1.5): %f '... 97 | '\n(this value should be about 31.34)\n'], J); 98 | 99 | fprintf('\nProgram paused. Press enter to continue.\n'); 100 | pause; 101 | 102 | 103 | %% ======= Part 5: Collaborative Filtering Gradient Regularization ====== 104 | % Once your cost matches up with ours, you should proceed to implement 105 | % regularization for the gradient. 106 | % 107 | 108 | % 109 | fprintf('\nChecking Gradients (with regularization) ... \n'); 110 | 111 | % Check gradients by running checkNNGradients 112 | checkCostFunction(1.5); 113 | 114 | fprintf('\nProgram paused. Press enter to continue.\n'); 115 | pause; 116 | 117 | 118 | %% ============== Part 6: Entering ratings for a new user =============== 119 | % Before we will train the collaborative filtering model, we will first 120 | % add ratings that correspond to a new user that we just observed. This 121 | % part of the code will also allow you to put in your own ratings for the 122 | % movies in our dataset! 123 | % 124 | movieList = loadMovieList(); 125 | 126 | % Initialize my ratings 127 | my_ratings = zeros(1682, 1); 128 | 129 | % Check the file movie_idx.txt for id of each movie in our dataset 130 | % For example, Toy Story (1995) has ID 1, so to rate it "4", you can set 131 | my_ratings(1) = 4; 132 | 133 | % Or suppose did not enjoy Silence of the Lambs (1991), you can set 134 | my_ratings(98) = 2; 135 | 136 | % We have selected a few movies we liked / did not like and the ratings we 137 | % gave are as follows: 138 | my_ratings(7) = 3; 139 | my_ratings(12)= 5; 140 | my_ratings(54) = 4; 141 | my_ratings(64)= 5; 142 | my_ratings(66)= 3; 143 | my_ratings(69) = 5; 144 | my_ratings(183) = 4; 145 | my_ratings(226) = 5; 146 | my_ratings(355)= 5; 147 | 148 | fprintf('\n\nNew user ratings:\n'); 149 | for i = 1:length(my_ratings) 150 | if my_ratings(i) > 0 151 | fprintf('Rated %d for %s\n', my_ratings(i), ... 152 | movieList{i}); 153 | end 154 | end 155 | 156 | fprintf('\nProgram paused. Press enter to continue.\n'); 157 | pause; 158 | 159 | 160 | %% ================== Part 7: Learning Movie Ratings ==================== 161 | % Now, you will train the collaborative filtering model on a movie rating 162 | % dataset of 1682 movies and 943 users 163 | % 164 | 165 | fprintf('\nTraining collaborative filtering...\n'); 166 | 167 | % Load data 168 | load('ex8_movies.mat'); 169 | 170 | % Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by 171 | % 943 users 172 | % 173 | % R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a 174 | % rating to movie i 175 | 176 | % Add our own ratings to the data matrix 177 | Y = [my_ratings Y]; 178 | R = [(my_ratings ~= 0) R]; 179 | 180 | % Normalize Ratings 181 | [Ynorm, Ymean] = normalizeRatings(Y, R); 182 | 183 | % Useful Values 184 | num_users = size(Y, 2); 185 | num_movies = size(Y, 1); 186 | num_features = 10; 187 | 188 | % Set Initial Parameters (Theta, X) 189 | X = randn(num_movies, num_features); 190 | Theta = randn(num_users, num_features); 191 | 192 | initial_parameters = [X(:); Theta(:)]; 193 | 194 | % Set options for fmincg 195 | options = optimset('GradObj', 'on', 'MaxIter', 100); 196 | 197 | % Set Regularization 198 | lambda = 10; 199 | theta = fmincg (@(t)(cofiCostFunc(t, Ynorm, R, num_users, num_movies, ... 200 | num_features, lambda)), ... 201 | initial_parameters, options); 202 | 203 | % Unfold the returned theta back into U and W 204 | X = reshape(theta(1:num_movies*num_features), num_movies, num_features); 205 | Theta = reshape(theta(num_movies*num_features+1:end), ... 206 | num_users, num_features); 207 | 208 | fprintf('Recommender system learning completed.\n'); 209 | 210 | fprintf('\nProgram paused. Press enter to continue.\n'); 211 | pause; 212 | 213 | %% ================== Part 8: Recommendation for you ==================== 214 | % After training the model, you can now make recommendations by computing 215 | % the predictions matrix. 216 | % 217 | 218 | p = X * Theta'; 219 | my_predictions = p(:,1) + Ymean; 220 | 221 | movieList = loadMovieList(); 222 | 223 | [r, ix] = sort(my_predictions, 'descend'); 224 | fprintf('\nTop recommendations for you:\n'); 225 | for i=1:10 226 | j = ix(i); 227 | fprintf('Predicting rating %.1f for movie %s\n', my_predictions(j), ... 228 | movieList{j}); 229 | end 230 | 231 | fprintf('\n\nOriginal ratings provided:\n'); 232 | for i = 1:length(my_ratings) 233 | if my_ratings(i) > 0 234 | fprintf('Rated %d for %s\n', my_ratings(i), ... 235 | movieList{i}); 236 | end 237 | end 238 | -------------------------------------------------------------------------------- /ex8/selectThreshold.m: -------------------------------------------------------------------------------- 1 | function [bestEpsilon bestF1] = selectThreshold(yval, pval) 2 | %SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting 3 | %outliers 4 | % [bestEpsilon bestF1] = SELECTTHRESHOLD(yval, pval) finds the best 5 | % threshold to use for selecting outliers based on the results from a 6 | % validation set (pval) and the ground truth (yval). 7 | % 8 | 9 | m = length(pval); 10 | 11 | bestEpsilon = 0; 12 | bestF1 = 0; 13 | 14 | stepsize = (max(pval) - min(pval)) / 1000; 15 | %tmp = (max(pval) - min(pval))/ stepsize; 16 | %F1 = zeros(tmp,1); 17 | cvPred = zeros(m, 1); 18 | %count = 1; 19 | 20 | F1 = 0; 21 | 22 | for epsilon = min(pval):stepsize:max(pval) 23 | for i = 1:m 24 | if(pval(i) < epsilon) 25 | cvPred(i) = 1; 26 | end 27 | end 28 | 29 | fp = sum((cvPred == 1) & (yval == 0)); 30 | tp = sum((cvPred == 1) & (yval == 1)); 31 | fn = sum((cvPred == 0) & (yval == 1)); 32 | 33 | prec = tp / (tp + fp); 34 | recall = tp / (tp + fn); 35 | F1 = 2 * prec * recall / (prec + recall); 36 | 37 | if F1 > bestF1 38 | bestF1 = F1; 39 | bestEpsilon = epsilon; 40 | end 41 | end 42 | 43 | % ====================== YOUR CODE HERE ====================== 44 | 45 | 46 | 47 | end 48 | -------------------------------------------------------------------------------- /update: -------------------------------------------------------------------------------- 1 | no update.. 2 | --------------------------------------------------------------------------------