├── .gitignore ├── Angaben ├── ex1.zip ├── ex2.zip ├── ex3.zip ├── ex4.zip ├── ex5.zip ├── ex6.zip ├── ex7.zip └── ex8.zip ├── Exercise 1 ├── ex1.pdf └── ex1 │ ├── computeCost.m │ ├── computeCostMulti.m │ ├── ex1.m │ ├── ex1_multi.m │ ├── ex1data1.txt │ ├── ex1data2.txt │ ├── featureNormalize.m │ ├── gradientDescent.m │ ├── gradientDescentMulti.m │ ├── ml_login_data.mat │ ├── normalEqn.m │ ├── octave-core │ ├── plotData.m │ ├── submit.m │ └── warmUpExercise.m ├── Exercise 2 ├── ex2.pdf └── ex2 │ ├── costFunction.m │ ├── costFunctionReg.m │ ├── ex2.m │ ├── ex2_reg.m │ ├── ex2data1.txt │ ├── ex2data2.txt │ ├── mapFeature.m │ ├── ml_login_data.mat │ ├── plotData.m │ ├── plotDecisionBoundary.m │ ├── predict.m │ ├── sigmoid.m │ ├── submit.m │ └── submitWeb.m ├── Exercise 3 ├── ex3.pdf └── ex3 │ ├── displayData.m │ ├── ex3.m │ ├── ex3_nn.m │ ├── ex3data1.mat │ ├── ex3weights.mat │ ├── fmincg.m │ ├── lrCostFunction.m │ ├── oneVsAll.m │ ├── predict.m │ ├── predictOneVsAll.m │ ├── sigmoid.m │ └── submit.m ├── Exercise 4 ├── ex4.pdf └── ex4 │ ├── checkNNGradients.m │ ├── computeNumericalGradient.m │ ├── debugInitializeWeights.m │ ├── displayData.m │ ├── ex4.m │ ├── ex4data1.mat │ ├── ex4weights.mat │ ├── fmincg.m │ ├── nnCostFunction.m │ ├── predict.m │ ├── randInitializeWeights.m │ ├── sigmoid.m │ ├── sigmoidGradient.m │ ├── submit.m │ └── submitWeb.m ├── Exercise 5 ├── ex5.pdf └── ex5 │ ├── ex5.m │ ├── ex5data1.mat │ ├── featureNormalize.m │ ├── fmincg.m │ ├── learningCurve.m │ ├── linearRegCostFunction.m │ ├── plotFit.m │ ├── polyFeatures.m │ ├── submit.m │ ├── submitWeb.m │ ├── trainLinearReg.m │ └── validationCurve.m ├── Exercise 6 ├── ex6.pdf └── ex6 │ ├── dataset3Params.m │ ├── emailFeatures.m │ ├── emailSample1.txt │ ├── emailSample2.txt │ ├── ex6.m │ ├── ex6_spam.m │ ├── ex6data1.mat │ ├── ex6data2.mat │ ├── ex6data3.mat │ ├── gaussianKernel.m │ ├── getVocabList.m │ ├── linearKernel.m │ ├── plotData.m │ ├── porterStemmer.m │ ├── processEmail.m │ ├── readFile.m │ ├── spamSample1.txt │ ├── spamSample2.txt │ ├── spamTest.mat │ ├── spamTrain.mat │ ├── submit.m │ ├── submitWeb.m │ ├── svmPredict.m │ ├── svmTrain.m │ ├── visualizeBoundary.m │ ├── visualizeBoundaryLinear.m │ └── vocab.txt ├── Exercise 7 ├── ex7.pdf └── ex7 │ ├── bird_small.mat │ ├── bird_small.png │ ├── computeCentroids.m │ ├── displayData.m │ ├── drawLine.m │ ├── ex7.m │ ├── ex7_pca.m │ ├── ex7data1.mat │ ├── ex7data2.mat │ ├── ex7faces.mat │ ├── featureNormalize.m │ ├── findClosestCentroids.m │ ├── kMeansInitCentroids.m │ ├── pca.m │ ├── plotDataPoints.m │ ├── plotProgresskMeans.m │ ├── projectData.m │ ├── recoverData.m │ ├── runkMeans.m │ ├── submit.m │ └── submitWeb.m ├── Exercise 8 ├── ex8.pdf └── ex8 │ ├── checkCostFunction.m │ ├── cofiCostFunc.m │ ├── computeNumericalGradient.m │ ├── estimateGaussian.m │ ├── ex8.m │ ├── ex8_cofi.m │ ├── ex8_movieParams.mat │ ├── ex8_movies.mat │ ├── ex8data1.mat │ ├── ex8data2.mat │ ├── fmincg.m │ ├── loadMovieList.m │ ├── movie_ids.txt │ ├── multivariateGaussian.m │ ├── normalizeRatings.m │ ├── selectThreshold.m │ ├── submit.m │ ├── submitWeb.m │ └── visualizeFit.m └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | ml_login_data.mat 2 | octave-core 3 | -------------------------------------------------------------------------------- /Angaben/ex1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Angaben/ex1.zip -------------------------------------------------------------------------------- /Angaben/ex2.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Angaben/ex2.zip -------------------------------------------------------------------------------- /Angaben/ex3.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Angaben/ex3.zip -------------------------------------------------------------------------------- /Angaben/ex4.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Angaben/ex4.zip -------------------------------------------------------------------------------- /Angaben/ex5.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Angaben/ex5.zip -------------------------------------------------------------------------------- /Angaben/ex6.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Angaben/ex6.zip -------------------------------------------------------------------------------- /Angaben/ex7.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Angaben/ex7.zip -------------------------------------------------------------------------------- /Angaben/ex8.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Angaben/ex8.zip -------------------------------------------------------------------------------- /Exercise 1/ex1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 1/ex1.pdf -------------------------------------------------------------------------------- /Exercise 1/ex1/computeCost.m: -------------------------------------------------------------------------------- 1 | function J = computeCost(X, y, theta) 2 | %COMPUTECOST Compute cost for linear regression 3 | % J = COMPUTECOST(X, y, theta) computes the cost of using theta as the 4 | % parameter for linear regression to fit the data points in X and y 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | 9 | % You need to return the following variables correctly 10 | % J = 0; 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the cost of a particular choice of theta 14 | % You should set J to the cost. 15 | 16 | J = 0; 17 | for i = 1:m 18 | J = J + (theta' * X(i,:)' - y(i))^2; 19 | end 20 | 21 | J = J/(2*m); 22 | 23 | 24 | 25 | % ========================================================================= 26 | 27 | end 28 | -------------------------------------------------------------------------------- /Exercise 1/ex1/computeCostMulti.m: -------------------------------------------------------------------------------- 1 | function J = computeCostMulti(X, y, theta) 2 | %COMPUTECOSTMULTI Compute cost for linear regression with multiple variables 3 | % J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the 4 | % parameter for linear regression to fit the data points in X and y 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | 9 | % You need to return the following variables correctly 10 | % J = 0; 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the cost of a particular choice of theta 14 | % You should set J to the cost. 15 | 16 | 17 | difference = X * theta - y; 18 | vector = difference' * difference; 19 | J = (1 / (2 * m)) * vector; 20 | 21 | 22 | % ========================================================================= 23 | 24 | end 25 | -------------------------------------------------------------------------------- /Exercise 1/ex1/ex1.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 1: Linear Regression 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % warmUpExercise.m 11 | % plotData.m 12 | % gradientDescent.m 13 | % computeCost.m 14 | % gradientDescentMulti.m 15 | % computeCostMulti.m 16 | % featureNormalize.m 17 | % normalEqn.m 18 | % 19 | % For this exercise, you will not need to change any code in this file, 20 | % or any other files other than those mentioned above. 21 | % 22 | % x refers to the population size in 10,000s 23 | % y refers to the profit in $10,000s 24 | % 25 | 26 | %% Initialization 27 | clear ; close all; clc 28 | 29 | %% ==================== Part 1: Basic Function ==================== 30 | % Complete warmUpExercise.m 31 | fprintf('Running warmUpExercise ... \n'); 32 | fprintf('5x5 Identity Matrix: \n'); 33 | warmUpExercise() 34 | 35 | fprintf('Program paused. Press enter to continue.\n'); 36 | pause; 37 | 38 | 39 | %% ======================= Part 2: Plotting ======================= 40 | fprintf('Plotting Data ...\n') 41 | data = load('ex1data1.txt'); 42 | X = data(:, 1); y = data(:, 2); 43 | m = length(y); % number of training examples 44 | 45 | % Plot Data 46 | % Note: You have to complete the code in plotData.m 47 | plotData(X, y); 48 | 49 | fprintf('Program paused. Press enter to continue.\n'); 50 | pause; 51 | 52 | %% =================== Part 3: Gradient descent =================== 53 | fprintf('Running Gradient Descent ...\n') 54 | 55 | X = [ones(m, 1), data(:,1)]; % Add a column of ones to x 56 | theta = zeros(2, 1); % initialize fitting parameters 57 | 58 | % Some gradient descent settings 59 | iterations = 1500; 60 | alpha = 0.01; 61 | 62 | % compute and display initial cost 63 | computeCost(X, y, theta) 64 | 65 | % run gradient descent 66 | theta = gradientDescent(X, y, theta, alpha, iterations); 67 | 68 | % print theta to screen 69 | fprintf('Theta found by gradient descent: '); 70 | fprintf('%f %f \n', theta(1), theta(2)); 71 | 72 | % Plot the linear fit 73 | hold on; % keep previous plot visible 74 | plot(X(:,2), X*theta, '-') 75 | legend('Training data', 'Linear regression') 76 | hold off % don't overlay any more plots on this figure 77 | 78 | % Predict values for population sizes of 35,000 and 70,000 79 | predict1 = [1, 3.5] *theta; 80 | fprintf('For population = 35,000, we predict a profit of %f\n',... 81 | predict1*10000); 82 | predict2 = [1, 7] * theta; 83 | fprintf('For population = 70,000, we predict a profit of %f\n',... 84 | predict2*10000); 85 | 86 | fprintf('Program paused. Press enter to continue.\n'); 87 | pause; 88 | 89 | %% ============= Part 4: Visualizing J(theta_0, theta_1) ============= 90 | fprintf('Visualizing J(theta_0, theta_1) ...\n') 91 | 92 | % Grid over which we will calculate J 93 | theta0_vals = linspace(-10, 10, 100); 94 | theta1_vals = linspace(-1, 4, 100); 95 | 96 | % initialize J_vals to a matrix of 0's 97 | J_vals = zeros(length(theta0_vals), length(theta1_vals)); 98 | 99 | % Fill out J_vals 100 | for i = 1:length(theta0_vals) 101 | for j = 1:length(theta1_vals) 102 | t = [theta0_vals(i); theta1_vals(j)]; 103 | J_vals(i,j) = computeCost(X, y, t); 104 | end 105 | end 106 | 107 | 108 | % Because of the way meshgrids work in the surf command, we need to 109 | % transpose J_vals before calling surf, or else the axes will be flipped 110 | J_vals = J_vals'; 111 | % Surface plot 112 | figure; 113 | surf(theta0_vals, theta1_vals, J_vals) 114 | xlabel('\theta_0'); ylabel('\theta_1'); 115 | 116 | % Contour plot 117 | figure; 118 | % Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100 119 | contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20)) 120 | xlabel('\theta_0'); ylabel('\theta_1'); 121 | hold on; 122 | plot(theta(1), theta(2), 'rx', 'MarkerSize', 10, 'LineWidth', 2); 123 | -------------------------------------------------------------------------------- /Exercise 1/ex1/ex1_multi.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 1: Linear regression with multiple variables 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % linear regression exercise. 9 | % 10 | % You will need to complete the following functions in this 11 | % exericse: 12 | % 13 | % warmUpExercise.m 14 | % plotData.m 15 | % gradientDescent.m 16 | % computeCost.m 17 | % gradientDescentMulti.m 18 | % computeCostMulti.m 19 | % featureNormalize.m 20 | % normalEqn.m 21 | % 22 | % For this part of the exercise, you will need to change some 23 | % parts of the code below for various experiments (e.g., changing 24 | % learning rates). 25 | % 26 | 27 | %% Initialization 28 | 29 | %% ================ Part 1: Feature Normalization ================ 30 | 31 | %% Clear and Close Figures 32 | clear ; close all; clc 33 | 34 | fprintf('Loading data ...\n'); 35 | 36 | %% Load Data 37 | data = load('ex1data2.txt'); 38 | X = data(:, 1:2); 39 | y = data(:, 3); 40 | m = length(y); 41 | 42 | % Print out some data points 43 | fprintf('First 10 examples from the dataset: \n'); 44 | fprintf(' x = [%.0f %.0f], y = %.0f \n', [X(1:10,:) y(1:10,:)]'); 45 | 46 | fprintf('Program paused. Press enter to continue.\n'); 47 | pause; 48 | 49 | % Scale features and set them to zero mean 50 | fprintf('Normalizing Features ...\n'); 51 | 52 | [X mu sigma] = featureNormalize(X); 53 | 54 | % Add intercept term to X 55 | X = [ones(m, 1) X]; 56 | 57 | 58 | %% ================ Part 2: Gradient Descent ================ 59 | 60 | % ====================== YOUR CODE HERE ====================== 61 | % Instructions: We have provided you with the following starter 62 | % code that runs gradient descent with a particular 63 | % learning rate (alpha). 64 | % 65 | % Your task is to first make sure that your functions - 66 | % computeCost and gradientDescent already work with 67 | % this starter code and support multiple variables. 68 | % 69 | % After that, try running gradient descent with 70 | % different values of alpha and see which one gives 71 | % you the best result. 72 | % 73 | % Finally, you should complete the code at the end 74 | % to predict the price of a 1650 sq-ft, 3 br house. 75 | % 76 | % Hint: By using the 'hold on' command, you can plot multiple 77 | % graphs on the same figure. 78 | % 79 | % Hint: At prediction, make sure you do the same feature normalization. 80 | % 81 | 82 | fprintf('Running gradient descent ...\n'); 83 | 84 | % Choose some alpha value 85 | alpha = 0.15; 86 | num_iters = 50; 87 | 88 | % Init Theta and Run Gradient Descent 89 | theta = zeros(3, 1); 90 | [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters); 91 | 92 | % Plot the convergence graph 93 | figure; 94 | plot(1:numel(J_history), J_history, '-b', 'LineWidth', 2); 95 | xlabel('Number of iterations'); 96 | ylabel('Cost J'); 97 | 98 | % Display gradient descent's result 99 | fprintf('Theta computed from gradient descent: \n'); 100 | fprintf(' %f \n', theta); 101 | fprintf('\n'); 102 | 103 | % Estimate the price of a 1650 sq-ft, 3 br house 104 | % ====================== YOUR CODE HERE ====================== 105 | % Recall that the first column of X is all-ones. Thus, it does 106 | % not need to be normalized. 107 | % price = 0; % You should change this 108 | prediction = [1 ((1650 - mu(:,1))/sigma(:,1)) ((3 - mu(:,2))/sigma(:,2))]; 109 | price = prediction * theta; 110 | 111 | 112 | % ============================================================ 113 | 114 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ... 115 | '(using gradient descent):\n $%f\n'], price); 116 | 117 | fprintf('Program paused. Press enter to continue.\n'); 118 | pause; 119 | 120 | %% ================ Part 3: Normal Equations ================ 121 | 122 | fprintf('Solving with normal equations...\n'); 123 | 124 | % ====================== YOUR CODE HERE ====================== 125 | % Instructions: The following code computes the closed form 126 | % solution for linear regression using the normal 127 | % equations. You should complete the code in 128 | % normalEqn.m 129 | % 130 | % After doing so, you should complete this code 131 | % to predict the price of a 1650 sq-ft, 3 br house. 132 | % 133 | 134 | %% Load Data 135 | data = csvread('ex1data2.txt'); 136 | X = data(:, 1:2); 137 | y = data(:, 3); 138 | m = length(y); 139 | 140 | % Add intercept term to X 141 | X = [ones(m, 1) X]; 142 | 143 | % Calculate the parameters from the normal equation 144 | theta = normalEqn(X, y); 145 | 146 | % Display normal equation's result 147 | fprintf('Theta computed from the normal equations: \n'); 148 | fprintf(' %f \n', theta); 149 | fprintf('\n'); 150 | 151 | 152 | % Estimate the price of a 1650 sq-ft, 3 br house 153 | % ====================== YOUR CODE HERE ====================== 154 | % price = 0; % You should change this 155 | 156 | prediction = [1 1650 3]; 157 | price = prediction * theta; 158 | 159 | % ============================================================ 160 | 161 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ... 162 | '(using normal equations):\n $%f\n'], price); 163 | 164 | -------------------------------------------------------------------------------- /Exercise 1/ex1/ex1data1.txt: -------------------------------------------------------------------------------- 1 | 6.1101,17.592 2 | 5.5277,9.1302 3 | 8.5186,13.662 4 | 7.0032,11.854 5 | 5.8598,6.8233 6 | 8.3829,11.886 7 | 7.4764,4.3483 8 | 8.5781,12 9 | 6.4862,6.5987 10 | 5.0546,3.8166 11 | 5.7107,3.2522 12 | 14.164,15.505 13 | 5.734,3.1551 14 | 8.4084,7.2258 15 | 5.6407,0.71618 16 | 5.3794,3.5129 17 | 6.3654,5.3048 18 | 5.1301,0.56077 19 | 6.4296,3.6518 20 | 7.0708,5.3893 21 | 6.1891,3.1386 22 | 20.27,21.767 23 | 5.4901,4.263 24 | 6.3261,5.1875 25 | 5.5649,3.0825 26 | 18.945,22.638 27 | 12.828,13.501 28 | 10.957,7.0467 29 | 13.176,14.692 30 | 22.203,24.147 31 | 5.2524,-1.22 32 | 6.5894,5.9966 33 | 9.2482,12.134 34 | 5.8918,1.8495 35 | 8.2111,6.5426 36 | 7.9334,4.5623 37 | 8.0959,4.1164 38 | 5.6063,3.3928 39 | 12.836,10.117 40 | 6.3534,5.4974 41 | 5.4069,0.55657 42 | 6.8825,3.9115 43 | 11.708,5.3854 44 | 5.7737,2.4406 45 | 7.8247,6.7318 46 | 7.0931,1.0463 47 | 5.0702,5.1337 48 | 5.8014,1.844 49 | 11.7,8.0043 50 | 5.5416,1.0179 51 | 7.5402,6.7504 52 | 5.3077,1.8396 53 | 7.4239,4.2885 54 | 7.6031,4.9981 55 | 6.3328,1.4233 56 | 6.3589,-1.4211 57 | 6.2742,2.4756 58 | 5.6397,4.6042 59 | 9.3102,3.9624 60 | 9.4536,5.4141 61 | 8.8254,5.1694 62 | 5.1793,-0.74279 63 | 21.279,17.929 64 | 14.908,12.054 65 | 18.959,17.054 66 | 7.2182,4.8852 67 | 8.2951,5.7442 68 | 10.236,7.7754 69 | 5.4994,1.0173 70 | 20.341,20.992 71 | 10.136,6.6799 72 | 7.3345,4.0259 73 | 6.0062,1.2784 74 | 7.2259,3.3411 75 | 5.0269,-2.6807 76 | 6.5479,0.29678 77 | 7.5386,3.8845 78 | 5.0365,5.7014 79 | 10.274,6.7526 80 | 5.1077,2.0576 81 | 5.7292,0.47953 82 | 5.1884,0.20421 83 | 6.3557,0.67861 84 | 9.7687,7.5435 85 | 6.5159,5.3436 86 | 8.5172,4.2415 87 | 9.1802,6.7981 88 | 6.002,0.92695 89 | 5.5204,0.152 90 | 5.0594,2.8214 91 | 5.7077,1.8451 92 | 7.6366,4.2959 93 | 5.8707,7.2029 94 | 5.3054,1.9869 95 | 8.2934,0.14454 96 | 13.394,9.0551 97 | 5.4369,0.61705 98 | -------------------------------------------------------------------------------- /Exercise 1/ex1/ex1data2.txt: -------------------------------------------------------------------------------- 1 | 2104,3,399900 2 | 1600,3,329900 3 | 2400,3,369000 4 | 1416,2,232000 5 | 3000,4,539900 6 | 1985,4,299900 7 | 1534,3,314900 8 | 1427,3,198999 9 | 1380,3,212000 10 | 1494,3,242500 11 | 1940,4,239999 12 | 2000,3,347000 13 | 1890,3,329999 14 | 4478,5,699900 15 | 1268,3,259900 16 | 2300,4,449900 17 | 1320,2,299900 18 | 1236,3,199900 19 | 2609,4,499998 20 | 3031,4,599000 21 | 1767,3,252900 22 | 1888,2,255000 23 | 1604,3,242900 24 | 1962,4,259900 25 | 3890,3,573900 26 | 1100,3,249900 27 | 1458,3,464500 28 | 2526,3,469000 29 | 2200,3,475000 30 | 2637,3,299900 31 | 1839,2,349900 32 | 1000,1,169900 33 | 2040,4,314900 34 | 3137,3,579900 35 | 1811,4,285900 36 | 1437,3,249900 37 | 1239,3,229900 38 | 2132,4,345000 39 | 4215,4,549000 40 | 2162,4,287000 41 | 1664,2,368500 42 | 2238,3,329900 43 | 2567,4,314000 44 | 1200,3,299000 45 | 852,2,179900 46 | 1852,4,299900 47 | 1203,3,239500 48 | -------------------------------------------------------------------------------- /Exercise 1/ex1/featureNormalize.m: -------------------------------------------------------------------------------- 1 | function [X_norm, mu, sigma] = featureNormalize(X) 2 | %FEATURENORMALIZE Normalizes the features in X 3 | % FEATURENORMALIZE(X) returns a normalized version of X where 4 | % the mean value of each feature is 0 and the standard deviation 5 | % is 1. This is often a good preprocessing step to do when 6 | % working with learning algorithms. 7 | 8 | % You need to set these values correctly 9 | X_norm = X; 10 | mu = zeros(1, size(X, 2)); 11 | sigma = zeros(1, size(X, 2)); 12 | 13 | % ====================== YOUR CODE HERE ====================== 14 | % Instructions: First, for each feature dimension, compute the mean 15 | % of the feature and subtract it from the dataset, 16 | % storing the mean value in mu. Next, compute the 17 | % standard deviation of each feature and divide 18 | % each feature by it's standard deviation, storing 19 | % the standard deviation in sigma. 20 | % 21 | % Note that X is a matrix where each column is a 22 | % feature and each row is an example. You need 23 | % to perform the normalization separately for 24 | % each feature. 25 | % 26 | % Hint: You might find the 'mean' and 'std' functions useful. 27 | % 28 | 29 | for i = 1:size(X, 2) 30 | mu(:,i) = mean(X_norm(:,i)); 31 | sigma(:,i) = std(X_norm(:,i)); 32 | X_norm(:,i) = (X_norm(:,i) - mu(:,i)) / sigma(:,i); 33 | end 34 | 35 | % ============================================================ 36 | 37 | end 38 | -------------------------------------------------------------------------------- /Exercise 1/ex1/gradientDescent.m: -------------------------------------------------------------------------------- 1 | function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters) 2 | %GRADIENTDESCENT Performs gradient descent to learn theta 3 | % theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by 4 | % taking num_iters gradient steps with learning rate alpha 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | J_history = zeros(num_iters, 1); 9 | 10 | for iter = 1:num_iters 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Perform a single gradient step on the parameter vector 14 | % theta. 15 | % 16 | % Hint: While debugging, it can be useful to print out the values 17 | % of the cost function (computeCost) and gradient here. 18 | % 19 | temp0 = 0; 20 | temp1 = 0; 21 | 22 | for i = 1:m 23 | temp0 = temp0 + (theta' * X(i,:)' - y(i)); 24 | temp1 = temp1 + (theta' * X(i,:)' - y(i)) * X(i,2); 25 | end 26 | 27 | theta(1) = theta(1) - (alpha/m) * temp0; 28 | theta(2) = theta(2) - (alpha/m) * temp1; 29 | 30 | % ============================================================ 31 | 32 | % Save the cost J in every iteration 33 | J_history(iter) = computeCost(X, y, theta); 34 | 35 | end 36 | 37 | end 38 | -------------------------------------------------------------------------------- /Exercise 1/ex1/gradientDescentMulti.m: -------------------------------------------------------------------------------- 1 | function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters) 2 | %GRADIENTDESCENTMULTI Performs gradient descent to learn theta 3 | % theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by 4 | % taking num_iters gradient steps with learning rate alpha 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | J_history = zeros(num_iters, 1); 9 | 10 | for iter = 1:num_iters 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Perform a single gradient step on the parameter vector 14 | % theta. 15 | % 16 | % Hint: While debugging, it can be useful to print out the values 17 | % of the cost function (computeCostMulti) and gradient here. 18 | % 19 | 20 | difference = X * theta - y; 21 | summe = difference' * X; 22 | theta = theta - (alpha / m) * summe'; 23 | 24 | 25 | % ============================================================ 26 | 27 | % Save the cost J in every iteration 28 | J_history(iter) = computeCostMulti(X, y, theta); 29 | 30 | end 31 | 32 | end 33 | -------------------------------------------------------------------------------- /Exercise 1/ex1/ml_login_data.mat: -------------------------------------------------------------------------------- 1 | # Created by Octave 3.4.0, Sat Jul 07 23:19:43 2012 CEST 2 | # name: login 3 | # type: sq_string 4 | # elements: 1 5 | # length: 13 6 | primal@aon.at 7 | 8 | 9 | # name: password 10 | # type: sq_string 11 | # elements: 1 12 | # length: 10 13 | huBKwC6DRx 14 | 15 | 16 | -------------------------------------------------------------------------------- /Exercise 1/ex1/normalEqn.m: -------------------------------------------------------------------------------- 1 | function [theta] = normalEqn(X, y) 2 | %NORMALEQN Computes the closed-form solution to linear regression 3 | % NORMALEQN(X,y) computes the closed-form solution to linear 4 | % regression using the normal equations. 5 | 6 | theta = zeros(size(X, 2), 1); 7 | 8 | % ====================== YOUR CODE HERE ====================== 9 | % Instructions: Complete the code to compute the closed form solution 10 | % to linear regression and put the result in theta. 11 | % 12 | 13 | % ---------------------- Sample Solution ---------------------- 14 | 15 | theta = pinv(X' * X) * X' * y; 16 | 17 | 18 | % ------------------------------------------------------------- 19 | 20 | 21 | % ============================================================ 22 | 23 | end 24 | -------------------------------------------------------------------------------- /Exercise 1/ex1/octave-core: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 1/ex1/octave-core -------------------------------------------------------------------------------- /Exercise 1/ex1/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(x, y) 2 | %PLOTDATA Plots the data points x and y into a new figure 3 | % PLOTDATA(x,y) plots the data points and gives the figure axes labels of 4 | % population and profit. 5 | 6 | % ====================== YOUR CODE HERE ====================== 7 | % Instructions: Plot the training data into a figure using the 8 | % "figure" and "plot" commands. Set the axes labels using 9 | % the "xlabel" and "ylabel" commands. Assume the 10 | % population and revenue data have been passed in 11 | % as the x and y arguments of this function. 12 | % 13 | % Hint: You can use the 'rx' option with plot to have the markers 14 | % appear as red crosses. Furthermore, you can make the 15 | % markers larger by using plot(..., 'rx', 'MarkerSize', 10); 16 | 17 | figure; % open a new figure window 18 | 19 | plot(x, y, 'rx', 'MarkerSize', 10); % Plot the data 20 | ylabel('Profit in $10,000s'); % Set the y-axis label 21 | xlabel('Population of City in 10,000s'); % Set the x-axis label 22 | 23 | 24 | 25 | 26 | % ============================================================ 27 | 28 | end 29 | -------------------------------------------------------------------------------- /Exercise 1/ex1/warmUpExercise.m: -------------------------------------------------------------------------------- 1 | function A = warmUpExercise() 2 | %WARMUPEXERCISE Example function in octave 3 | % A = WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix 4 | 5 | A = 0; 6 | % ============= YOUR CODE HERE ============== 7 | % Instructions: Return the 5x5 identity matrix 8 | % In octave, we return values by defining which variables 9 | % represent the return values (at the top of the file) 10 | % and then set them accordingly. 11 | 12 | % Create identity matrix 13 | A = eye(5); 14 | 15 | 16 | % =========================================== 17 | 18 | 19 | end 20 | -------------------------------------------------------------------------------- /Exercise 2/ex2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 2/ex2.pdf -------------------------------------------------------------------------------- /Exercise 2/ex2/costFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = costFunction(theta, X, y) 2 | %COSTFUNCTION Compute cost and gradient for logistic regression 3 | % J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the 4 | % parameter for logistic regression and the gradient of the cost 5 | % w.r.t. to the parameters. 6 | 7 | % Initialize some useful values 8 | m = length(y); % number of training examples 9 | 10 | % You need to return the following variables correctly 11 | % J = 0; 12 | % grad = zeros(size(theta)); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Compute the cost of a particular choice of theta. 16 | % You should set J to the cost. 17 | % Compute the partial derivatives and set grad to the partial 18 | % derivatives of the cost w.r.t. each parameter in theta 19 | % 20 | % Note: grad should have the same dimensions as theta 21 | % 22 | 23 | temp1 = -1 * (y .* log(sigmoid(X * theta))); 24 | temp2 = (1 - y) .* log(1 - sigmoid(X * theta)); 25 | 26 | J = sum(temp1 - temp2) / m; 27 | 28 | grad = (X' * (sigmoid(X * theta) - y)) * (1/m); 29 | 30 | % ============================================================= 31 | 32 | end 33 | -------------------------------------------------------------------------------- /Exercise 2/ex2/costFunctionReg.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = costFunctionReg(theta, X, y, lambda) 2 | %COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization 3 | % J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using 4 | % theta as the parameter for regularized logistic regression and the 5 | % gradient of the cost w.r.t. to the parameters. 6 | 7 | % Initialize some useful values 8 | m = length(y); % number of training examples 9 | 10 | % You need to return the following variables correctly 11 | % J = 0; 12 | grad = zeros(size(theta)); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Compute the cost of a particular choice of theta. 16 | % You should set J to the cost. 17 | % Compute the partial derivatives and set grad to the partial 18 | % derivatives of the cost w.r.t. each parameter in theta 19 | 20 | temp1 = -1 * (y .* log(sigmoid(X * theta))); 21 | temp2 = (1 - y) .* log(1 - sigmoid(X * theta)); 22 | 23 | thetaT = theta; 24 | thetaT(1) = 0; 25 | correction = sum(thetaT .^ 2) * (lambda / (2 * m)); 26 | 27 | J = sum(temp1 - temp2) / m + correction; 28 | 29 | grad = (X' * (sigmoid(X * theta) - y)) * (1/m) + thetaT * (lambda / m); 30 | 31 | % ============================================================= 32 | 33 | end 34 | -------------------------------------------------------------------------------- /Exercise 2/ex2/ex2.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 2: Logistic Regression 2 | % 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the logistic 7 | % regression exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % sigmoid.m 11 | % costFunction.m 12 | % predict.m 13 | % costFunctionReg.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% Load Data 23 | % The first two columns contains the exam scores and the third column 24 | % contains the label. 25 | 26 | data = load('ex2data1.txt'); 27 | X = data(:, [1, 2]); y = data(:, 3); 28 | 29 | %% ==================== Part 1: Plotting ==================== 30 | % We start the exercise by first plotting the data to understand the 31 | % the problem we are working with. 32 | 33 | fprintf(['Plotting data with + indicating (y = 1) examples and o ' ... 34 | 'indicating (y = 0) examples.\n']); 35 | 36 | plotData(X, y); 37 | 38 | % Put some labels 39 | hold on; 40 | % Labels and Legend 41 | xlabel('Exam 1 score') 42 | ylabel('Exam 2 score') 43 | 44 | % Specified in plot order 45 | legend('Admitted', 'Not admitted') 46 | hold off; 47 | 48 | fprintf('\nProgram paused. Press enter to continue.\n'); 49 | pause; 50 | 51 | 52 | %% ============ Part 2: Compute Cost and Gradient ============ 53 | % In this part of the exercise, you will implement the cost and gradient 54 | % for logistic regression. You neeed to complete the code in 55 | % costFunction.m 56 | 57 | % Setup the data matrix appropriately, and add ones for the intercept term 58 | [m, n] = size(X); 59 | 60 | % Add intercept term to x and X_test 61 | X = [ones(m, 1) X]; 62 | 63 | % Initialize fitting parameters 64 | initial_theta = zeros(n + 1, 1); 65 | 66 | % Compute and display initial cost and gradient 67 | [cost, grad] = costFunction(initial_theta, X, y); 68 | 69 | fprintf('Cost at initial theta (zeros): %f\n', cost); 70 | fprintf('Gradient at initial theta (zeros): \n'); 71 | fprintf(' %f \n', grad); 72 | 73 | fprintf('\nProgram paused. Press enter to continue.\n'); 74 | pause; 75 | 76 | 77 | %% ============= Part 3: Optimizing using fminunc ============= 78 | % In this exercise, you will use a built-in function (fminunc) to find the 79 | % optimal parameters theta. 80 | 81 | % Set options for fminunc 82 | options = optimset('GradObj', 'on', 'MaxIter', 400); 83 | 84 | % Run fminunc to obtain the optimal theta 85 | % This function will return theta and the cost 86 | [theta, cost] = ... 87 | fminunc(@(t)(costFunction(t, X, y)), initial_theta, options); 88 | 89 | % Print theta to screen 90 | fprintf('Cost at theta found by fminunc: %f\n', cost); 91 | fprintf('theta: \n'); 92 | fprintf(' %f \n', theta); 93 | 94 | % Plot Boundary 95 | plotDecisionBoundary(theta, X, y); 96 | 97 | % Put some labels 98 | hold on; 99 | % Labels and Legend 100 | xlabel('Exam 1 score') 101 | ylabel('Exam 2 score') 102 | 103 | % Specified in plot order 104 | legend('Admitted', 'Not admitted') 105 | hold off; 106 | 107 | fprintf('\nProgram paused. Press enter to continue.\n'); 108 | pause; 109 | 110 | %% ============== Part 4: Predict and Accuracies ============== 111 | % After learning the parameters, you'll like to use it to predict the outcomes 112 | % on unseen data. In this part, you will use the logistic regression model 113 | % to predict the probability that a student with score 45 on exam 1 and 114 | % score 85 on exam 2 will be admitted. 115 | % 116 | % Furthermore, you will compute the training and test set accuracies of 117 | % our model. 118 | % 119 | % Your task is to complete the code in predict.m 120 | 121 | % Predict probability for a student with score 45 on exam 1 122 | % and score 85 on exam 2 123 | 124 | prob = sigmoid([1 45 85] * theta); 125 | fprintf(['For a student with scores 45 and 85, we predict an admission ' ... 126 | 'probability of %f\n\n'], prob); 127 | 128 | % Compute accuracy on our training set 129 | p = predict(theta, X); 130 | 131 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); 132 | 133 | fprintf('\nProgram paused. Press enter to continue.\n'); 134 | pause; 135 | 136 | -------------------------------------------------------------------------------- /Exercise 2/ex2/ex2_reg.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 2: Logistic Regression 2 | % 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the second part 7 | % of the exercise which covers regularization with logistic regression. 8 | % 9 | % You will need to complete the following functions in this exericse: 10 | % 11 | % sigmoid.m 12 | % costFunction.m 13 | % predict.m 14 | % costFunctionReg.m 15 | % 16 | % For this exercise, you will not need to change any code in this file, 17 | % or any other files other than those mentioned above. 18 | % 19 | 20 | %% Initialization 21 | clear ; close all; clc 22 | 23 | %% Load Data 24 | % The first two columns contains the X values and the third column 25 | % contains the label (y). 26 | 27 | data = load('ex2data2.txt'); 28 | X = data(:, [1, 2]); y = data(:, 3); 29 | 30 | plotData(X, y); 31 | 32 | % Put some labels 33 | hold on; 34 | 35 | % Labels and Legend 36 | xlabel('Microchip Test 1') 37 | ylabel('Microchip Test 2') 38 | 39 | % Specified in plot order 40 | legend('y = 1', 'y = 0') 41 | hold off; 42 | 43 | %% =========== Part 1: Regularized Logistic Regression ============ 44 | % In this part, you are given a dataset with data points that are not 45 | % linearly separable. However, you would still like to use logistic 46 | % regression to classify the data points. 47 | % 48 | % To do so, you introduce more features to use -- in particular, you add 49 | % polynomial features to our data matrix (similar to polynomial 50 | % regression). 51 | % 52 | 53 | % Add Polynomial Features 54 | 55 | % Note that mapFeature also adds a column of ones for us, so the intercept 56 | % term is handled 57 | X = mapFeature(X(:,1), X(:,2)); 58 | 59 | % Initialize fitting parameters 60 | initial_theta = zeros(size(X, 2), 1); 61 | 62 | % Set regularization parameter lambda to 1 63 | lambda = 1; 64 | 65 | % Compute and display initial cost and gradient for regularized logistic 66 | % regression 67 | [cost, grad] = costFunctionReg(initial_theta, X, y, lambda); 68 | 69 | fprintf('Cost at initial theta (zeros): %f\n', cost); 70 | 71 | fprintf('\nProgram paused. Press enter to continue.\n'); 72 | pause; 73 | 74 | %% ============= Part 2: Regularization and Accuracies ============= 75 | % Optional Exercise: 76 | % In this part, you will get to try different values of lambda and 77 | % see how regularization affects the decision coundart 78 | % 79 | % Try the following values of lambda (0, 1, 10, 100). 80 | % 81 | % How does the decision boundary change when you vary lambda? How does 82 | % the training set accuracy vary? 83 | % 84 | % Initialize fitting parameters 85 | initial_theta = zeros(size(X, 2), 1); 86 | 87 | % Set regularization parameter lambda to 1 (you should vary this) 88 | lambda = 1; 89 | %lambda = 0; 90 | %lambda = 10; 91 | %lambda = 100; 92 | 93 | % Set Options 94 | options = optimset('GradObj', 'on', 'MaxIter', 400); 95 | 96 | % Optimize 97 | [theta, J, exit_flag] = ... 98 | fminunc(@(t)(costFunctionReg(t, X, y, lambda)), initial_theta, options); 99 | 100 | % Plot Boundary 101 | plotDecisionBoundary(theta, X, y); 102 | hold on; 103 | title(sprintf('lambda = %g', lambda)) 104 | 105 | % Labels and Legend 106 | xlabel('Microchip Test 1') 107 | ylabel('Microchip Test 2') 108 | 109 | legend('y = 1', 'y = 0', 'Decision boundary') 110 | hold off; 111 | 112 | % Compute accuracy on our training set 113 | p = predict(theta, X); 114 | 115 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); 116 | 117 | 118 | -------------------------------------------------------------------------------- /Exercise 2/ex2/ex2data1.txt: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | -------------------------------------------------------------------------------- /Exercise 2/ex2/ex2data2.txt: -------------------------------------------------------------------------------- 1 | 0.051267,0.69956,1 2 | -0.092742,0.68494,1 3 | -0.21371,0.69225,1 4 | -0.375,0.50219,1 5 | -0.51325,0.46564,1 6 | -0.52477,0.2098,1 7 | -0.39804,0.034357,1 8 | -0.30588,-0.19225,1 9 | 0.016705,-0.40424,1 10 | 0.13191,-0.51389,1 11 | 0.38537,-0.56506,1 12 | 0.52938,-0.5212,1 13 | 0.63882,-0.24342,1 14 | 0.73675,-0.18494,1 15 | 0.54666,0.48757,1 16 | 0.322,0.5826,1 17 | 0.16647,0.53874,1 18 | -0.046659,0.81652,1 19 | -0.17339,0.69956,1 20 | -0.47869,0.63377,1 21 | -0.60541,0.59722,1 22 | -0.62846,0.33406,1 23 | -0.59389,0.005117,1 24 | -0.42108,-0.27266,1 25 | -0.11578,-0.39693,1 26 | 0.20104,-0.60161,1 27 | 0.46601,-0.53582,1 28 | 0.67339,-0.53582,1 29 | -0.13882,0.54605,1 30 | -0.29435,0.77997,1 31 | -0.26555,0.96272,1 32 | -0.16187,0.8019,1 33 | -0.17339,0.64839,1 34 | -0.28283,0.47295,1 35 | -0.36348,0.31213,1 36 | -0.30012,0.027047,1 37 | -0.23675,-0.21418,1 38 | -0.06394,-0.18494,1 39 | 0.062788,-0.16301,1 40 | 0.22984,-0.41155,1 41 | 0.2932,-0.2288,1 42 | 0.48329,-0.18494,1 43 | 0.64459,-0.14108,1 44 | 0.46025,0.012427,1 45 | 0.6273,0.15863,1 46 | 0.57546,0.26827,1 47 | 0.72523,0.44371,1 48 | 0.22408,0.52412,1 49 | 0.44297,0.67032,1 50 | 0.322,0.69225,1 51 | 0.13767,0.57529,1 52 | -0.0063364,0.39985,1 53 | -0.092742,0.55336,1 54 | -0.20795,0.35599,1 55 | -0.20795,0.17325,1 56 | -0.43836,0.21711,1 57 | -0.21947,-0.016813,1 58 | -0.13882,-0.27266,1 59 | 0.18376,0.93348,0 60 | 0.22408,0.77997,0 61 | 0.29896,0.61915,0 62 | 0.50634,0.75804,0 63 | 0.61578,0.7288,0 64 | 0.60426,0.59722,0 65 | 0.76555,0.50219,0 66 | 0.92684,0.3633,0 67 | 0.82316,0.27558,0 68 | 0.96141,0.085526,0 69 | 0.93836,0.012427,0 70 | 0.86348,-0.082602,0 71 | 0.89804,-0.20687,0 72 | 0.85196,-0.36769,0 73 | 0.82892,-0.5212,0 74 | 0.79435,-0.55775,0 75 | 0.59274,-0.7405,0 76 | 0.51786,-0.5943,0 77 | 0.46601,-0.41886,0 78 | 0.35081,-0.57968,0 79 | 0.28744,-0.76974,0 80 | 0.085829,-0.75512,0 81 | 0.14919,-0.57968,0 82 | -0.13306,-0.4481,0 83 | -0.40956,-0.41155,0 84 | -0.39228,-0.25804,0 85 | -0.74366,-0.25804,0 86 | -0.69758,0.041667,0 87 | -0.75518,0.2902,0 88 | -0.69758,0.68494,0 89 | -0.4038,0.70687,0 90 | -0.38076,0.91886,0 91 | -0.50749,0.90424,0 92 | -0.54781,0.70687,0 93 | 0.10311,0.77997,0 94 | 0.057028,0.91886,0 95 | -0.10426,0.99196,0 96 | -0.081221,1.1089,0 97 | 0.28744,1.087,0 98 | 0.39689,0.82383,0 99 | 0.63882,0.88962,0 100 | 0.82316,0.66301,0 101 | 0.67339,0.64108,0 102 | 1.0709,0.10015,0 103 | -0.046659,-0.57968,0 104 | -0.23675,-0.63816,0 105 | -0.15035,-0.36769,0 106 | -0.49021,-0.3019,0 107 | -0.46717,-0.13377,0 108 | -0.28859,-0.060673,0 109 | -0.61118,-0.067982,0 110 | -0.66302,-0.21418,0 111 | -0.59965,-0.41886,0 112 | -0.72638,-0.082602,0 113 | -0.83007,0.31213,0 114 | -0.72062,0.53874,0 115 | -0.59389,0.49488,0 116 | -0.48445,0.99927,0 117 | -0.0063364,0.99927,0 118 | 0.63265,-0.030612,0 119 | -------------------------------------------------------------------------------- /Exercise 2/ex2/mapFeature.m: -------------------------------------------------------------------------------- 1 | function out = mapFeature(X1, X2) 2 | % MAPFEATURE Feature mapping function to polynomial features 3 | % 4 | % MAPFEATURE(X1, X2) maps the two input features 5 | % to quadratic features used in the regularization exercise. 6 | % 7 | % Returns a new feature array with more features, comprising of 8 | % X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc.. 9 | % 10 | % Inputs X1, X2 must be the same size 11 | % 12 | 13 | degree = 6; 14 | out = ones(size(X1(:,1))); 15 | for i = 1:degree 16 | for j = 0:i 17 | out(:, end+1) = (X1.^(i-j)).*(X2.^j); 18 | end 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /Exercise 2/ex2/ml_login_data.mat: -------------------------------------------------------------------------------- 1 | # Created by Octave 3.4.0, Sat Jul 07 23:53:08 2012 CEST 2 | # name: login 3 | # type: sq_string 4 | # elements: 1 5 | # length: 13 6 | primal@aon.at 7 | 8 | 9 | # name: password 10 | # type: sq_string 11 | # elements: 1 12 | # length: 10 13 | huBKwC6DRx 14 | 15 | 16 | -------------------------------------------------------------------------------- /Exercise 2/ex2/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(X, y) 2 | %PLOTDATA Plots the data points X and y into a new figure 3 | % PLOTDATA(x,y) plots the data points with + for the positive examples 4 | % and o for the negative examples. X is assumed to be a Mx2 matrix. 5 | 6 | % Create New Figure 7 | figure; hold on; 8 | 9 | % ====================== YOUR CODE HERE ====================== 10 | % Instructions: Plot the positive and negative examples on a 11 | % 2D plot, using the option 'k+' for the positive 12 | % examples and 'ko' for the negative examples. 13 | % 14 | 15 | positive = find(y == 1); 16 | negative = find(y == 0); 17 | 18 | plot(X(positive, 1), X(positive, 2), 'k+', 'LineWidth', 2, 'MarkerSize', 7); 19 | plot(X(negative, 1), X(negative, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7); 20 | 21 | % ========================================================================= 22 | 23 | 24 | 25 | hold off; 26 | 27 | end 28 | -------------------------------------------------------------------------------- /Exercise 2/ex2/plotDecisionBoundary.m: -------------------------------------------------------------------------------- 1 | function plotDecisionBoundary(theta, X, y) 2 | %PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with 3 | %the decision boundary defined by theta 4 | % PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the 5 | % positive examples and o for the negative examples. X is assumed to be 6 | % a either 7 | % 1) Mx3 matrix, where the first column is an all-ones column for the 8 | % intercept. 9 | % 2) MxN, N>3 matrix, where the first column is all-ones 10 | 11 | % Plot Data 12 | plotData(X(:,2:3), y); 13 | hold on 14 | 15 | if size(X, 2) <= 3 16 | % Only need 2 points to define a line, so choose two endpoints 17 | plot_x = [min(X(:,2))-2, max(X(:,2))+2]; 18 | 19 | % Calculate the decision boundary line 20 | plot_y = (-1./theta(3)).*(theta(2).*plot_x + theta(1)); 21 | 22 | % Plot, and adjust axes for better viewing 23 | plot(plot_x, plot_y) 24 | 25 | % Legend, specific for the exercise 26 | legend('Admitted', 'Not admitted', 'Decision Boundary') 27 | axis([30, 100, 30, 100]) 28 | else 29 | % Here is the grid range 30 | u = linspace(-1, 1.5, 50); 31 | v = linspace(-1, 1.5, 50); 32 | 33 | z = zeros(length(u), length(v)); 34 | % Evaluate z = theta*x over the grid 35 | for i = 1:length(u) 36 | for j = 1:length(v) 37 | z(i,j) = mapFeature(u(i), v(j))*theta; 38 | end 39 | end 40 | z = z'; % important to transpose z before calling contour 41 | 42 | % Plot z = 0 43 | % Notice you need to specify the range [0, 0] 44 | contour(u, v, z, [0, 0], 'LineWidth', 2) 45 | end 46 | hold off 47 | 48 | end 49 | -------------------------------------------------------------------------------- /Exercise 2/ex2/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(theta, X) 2 | %PREDICT Predict whether the label is 0 or 1 using learned logistic 3 | %regression parameters theta 4 | % p = PREDICT(theta, X) computes the predictions for X using a 5 | % threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1) 6 | 7 | m = size(X, 1); % Number of training examples 8 | 9 | % You need to return the following variables correctly 10 | p = zeros(m, 1); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Complete the following code to make predictions using 14 | % your learned logistic regression parameters. 15 | % You should set p to a vector of 0's and 1's 16 | % 17 | 18 | result = sigmoid(X * theta); 19 | p = round(result); 20 | 21 | % ========================================================================= 22 | 23 | 24 | end 25 | -------------------------------------------------------------------------------- /Exercise 2/ex2/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid functoon 3 | % J = SIGMOID(z) computes the sigmoid of z. 4 | 5 | % You need to return the following variables correctly 6 | g = zeros(size(z)); 7 | 8 | % ====================== YOUR CODE HERE ====================== 9 | % Instructions: Compute the sigmoid of each value of z (z can be a matrix, 10 | % vector or scalar). 11 | 12 | denominator = 1 + exp(-1 * z); 13 | g = 1 ./ denominator; 14 | 15 | % ============================================================= 16 | 17 | end 18 | -------------------------------------------------------------------------------- /Exercise 2/ex2/submitWeb.m: -------------------------------------------------------------------------------- 1 | % submitWeb Creates files from your code and output for web submission. 2 | % 3 | % If the submit function does not work for you, use the web-submission mechanism. 4 | % Call this function to produce a file for the part you wish to submit. Then, 5 | % submit the file to the class servers using the "Web Submission" button on the 6 | % Programming Exercises page on the course website. 7 | % 8 | % You should call this function without arguments (submitWeb), to receive 9 | % an interactive prompt for submission; optionally you can call it with the partID 10 | % if you so wish. Make sure your working directory is set to the directory 11 | % containing the submitWeb.m file and your assignment files. 12 | 13 | function submitWeb(partId) 14 | if ~exist('partId', 'var') || isempty(partId) 15 | partId = []; 16 | end 17 | 18 | submit(partId, 1); 19 | end 20 | 21 | -------------------------------------------------------------------------------- /Exercise 3/ex3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 3/ex3.pdf -------------------------------------------------------------------------------- /Exercise 3/ex3/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /Exercise 3/ex3/ex3.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % lrCostFunction.m (logistic regression cost function) 11 | % oneVsAll.m 12 | % predictOneVsAll.m 13 | % predict.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% Setup the parameters you will use for this part of the exercise 23 | input_layer_size = 400; % 20x20 Input Images of Digits 24 | num_labels = 10; % 10 labels, from 1 to 10 25 | % (note that we have mapped "0" to label 10) 26 | 27 | %% =========== Part 1: Loading and Visualizing Data ============= 28 | % We start the exercise by first loading and visualizing the dataset. 29 | % You will be working with a dataset that contains handwritten digits. 30 | % 31 | 32 | % Load Training Data 33 | fprintf('Loading and Visualizing Data ...\n') 34 | 35 | load('ex3data1.mat'); % training data stored in arrays X, y 36 | m = size(X, 1); 37 | 38 | % Randomly select 100 data points to display 39 | rand_indices = randperm(m); 40 | sel = X(rand_indices(1:100), :); 41 | 42 | displayData(sel); 43 | 44 | fprintf('Program paused. Press enter to continue.\n'); 45 | pause; 46 | 47 | %% ============ Part 2: Vectorize Logistic Regression ============ 48 | % In this part of the exercise, you will reuse your logistic regression 49 | % code from the last exercise. You task here is to make sure that your 50 | % regularized logistic regression implementation is vectorized. After 51 | % that, you will implement one-vs-all classification for the handwritten 52 | % digit dataset. 53 | % 54 | 55 | fprintf('\nTraining One-vs-All Logistic Regression...\n') 56 | 57 | lambda = 0.1; 58 | [all_theta] = oneVsAll(X, y, num_labels, lambda); 59 | 60 | fprintf('Program paused. Press enter to continue.\n'); 61 | pause; 62 | 63 | 64 | %% ================ Part 3: Predict for One-Vs-All ================ 65 | % After ... 66 | pred = predictOneVsAll(all_theta, X); 67 | 68 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 69 | 70 | -------------------------------------------------------------------------------- /Exercise 3/ex3/ex3_nn.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % lrCostFunction.m (logistic regression cost function) 11 | % oneVsAll.m 12 | % predictOneVsAll.m 13 | % predict.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% Setup the parameters you will use for this exercise 23 | input_layer_size = 400; % 20x20 Input Images of Digits 24 | hidden_layer_size = 25; % 25 hidden units 25 | num_labels = 10; % 10 labels, from 1 to 10 26 | % (note that we have mapped "0" to label 10) 27 | 28 | %% =========== Part 1: Loading and Visualizing Data ============= 29 | % We start the exercise by first loading and visualizing the dataset. 30 | % You will be working with a dataset that contains handwritten digits. 31 | % 32 | 33 | % Load Training Data 34 | fprintf('Loading and Visualizing Data ...\n') 35 | 36 | load('ex3data1.mat'); 37 | m = size(X, 1); 38 | 39 | % Randomly select 100 data points to display 40 | sel = randperm(size(X, 1)); 41 | sel = sel(1:100); 42 | 43 | displayData(X(sel, :)); 44 | 45 | fprintf('Program paused. Press enter to continue.\n'); 46 | pause; 47 | 48 | %% ================ Part 2: Loading Pameters ================ 49 | % In this part of the exercise, we load some pre-initialized 50 | % neural network parameters. 51 | 52 | fprintf('\nLoading Saved Neural Network Parameters ...\n') 53 | 54 | % Load the weights into variables Theta1 and Theta2 55 | load('ex3weights.mat'); 56 | 57 | %% ================= Part 3: Implement Predict ================= 58 | % After training the neural network, we would like to use it to predict 59 | % the labels. You will now implement the "predict" function to use the 60 | % neural network to predict the labels of the training set. This lets 61 | % you compute the training set accuracy. 62 | 63 | pred = predict(Theta1, Theta2, X); 64 | 65 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 66 | 67 | fprintf('Program paused. Press enter to continue.\n'); 68 | pause; 69 | 70 | % To give you an idea of the network's output, you can also run 71 | % through the examples one at the a time to see what it is predicting. 72 | 73 | % Randomly permute examples 74 | rp = randperm(m); 75 | 76 | for i = 1:m 77 | % Display 78 | fprintf('\nDisplaying Example Image\n'); 79 | displayData(X(rp(i), :)); 80 | 81 | pred = predict(Theta1, Theta2, X(rp(i),:)); 82 | fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10)); 83 | 84 | % Pause 85 | fprintf('Program paused. Press enter to continue.\n'); 86 | pause; 87 | end 88 | 89 | -------------------------------------------------------------------------------- /Exercise 3/ex3/ex3data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 3/ex3/ex3data1.mat -------------------------------------------------------------------------------- /Exercise 3/ex3/ex3weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 3/ex3/ex3weights.mat -------------------------------------------------------------------------------- /Exercise 3/ex3/fmincg.m: -------------------------------------------------------------------------------- 1 | function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) 2 | % Minimize a continuous differentialble multivariate function. Starting point 3 | % is given by "X" (D by 1), and the function named in the string "f", must 4 | % return a function value and a vector of partial derivatives. The Polack- 5 | % Ribiere flavour of conjugate gradients is used to compute search directions, 6 | % and a line search using quadratic and cubic polynomial approximations and the 7 | % Wolfe-Powell stopping criteria is used together with the slope ratio method 8 | % for guessing initial step sizes. Additionally a bunch of checks are made to 9 | % make sure that exploration is taking place and that extrapolation will not 10 | % be unboundedly large. The "length" gives the length of the run: if it is 11 | % positive, it gives the maximum number of line searches, if negative its 12 | % absolute gives the maximum allowed number of function evaluations. You can 13 | % (optionally) give "length" a second component, which will indicate the 14 | % reduction in function value to be expected in the first line-search (defaults 15 | % to 1.0). The function returns when either its length is up, or if no further 16 | % progress can be made (ie, we are at a minimum, or so close that due to 17 | % numerical problems, we cannot get any closer). If the function terminates 18 | % within a few iterations, it could be an indication that the function value 19 | % and derivatives are not consistent (ie, there may be a bug in the 20 | % implementation of your "f" function). The function returns the found 21 | % solution "X", a vector of function values "fX" indicating the progress made 22 | % and "i" the number of iterations (line searches or function evaluations, 23 | % depending on the sign of "length") used. 24 | % 25 | % Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) 26 | % 27 | % See also: checkgrad 28 | % 29 | % Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13 30 | % 31 | % 32 | % (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen 33 | % 34 | % Permission is granted for anyone to copy, use, or modify these 35 | % programs and accompanying documents for purposes of research or 36 | % education, provided this copyright notice is retained, and note is 37 | % made of any changes that have been made. 38 | % 39 | % These programs and documents are distributed without any warranty, 40 | % express or implied. As the programs were written for research 41 | % purposes only, they have not been tested to the degree that would be 42 | % advisable in any important application. All use of these programs is 43 | % entirely at the user's own risk. 44 | % 45 | % [ml-class] Changes Made: 46 | % 1) Function name and argument specifications 47 | % 2) Output display 48 | % 49 | 50 | % Read options 51 | if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter') 52 | length = options.MaxIter; 53 | else 54 | length = 100; 55 | end 56 | 57 | 58 | RHO = 0.01; % a bunch of constants for line searches 59 | SIG = 0.5; % RHO and SIG are the constants in the Wolfe-Powell conditions 60 | INT = 0.1; % don't reevaluate within 0.1 of the limit of the current bracket 61 | EXT = 3.0; % extrapolate maximum 3 times the current bracket 62 | MAX = 20; % max 20 function evaluations per line search 63 | RATIO = 100; % maximum allowed slope ratio 64 | 65 | argstr = ['feval(f, X']; % compose string used to call function 66 | for i = 1:(nargin - 3) 67 | argstr = [argstr, ',P', int2str(i)]; 68 | end 69 | argstr = [argstr, ')']; 70 | 71 | if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end 72 | S=['Iteration ']; 73 | 74 | i = 0; % zero the run length counter 75 | ls_failed = 0; % no previous line search has failed 76 | fX = []; 77 | [f1 df1] = eval(argstr); % get function value and gradient 78 | i = i + (length<0); % count epochs?! 79 | s = -df1; % search direction is steepest 80 | d1 = -s'*s; % this is the slope 81 | z1 = red/(1-d1); % initial step is red/(|s|+1) 82 | 83 | while i < abs(length) % while not finished 84 | i = i + (length>0); % count iterations?! 85 | 86 | X0 = X; f0 = f1; df0 = df1; % make a copy of current values 87 | X = X + z1*s; % begin line search 88 | [f2 df2] = eval(argstr); 89 | i = i + (length<0); % count epochs?! 90 | d2 = df2'*s; 91 | f3 = f1; d3 = d1; z3 = -z1; % initialize point 3 equal to point 1 92 | if length>0, M = MAX; else M = min(MAX, -length-i); end 93 | success = 0; limit = -1; % initialize quanteties 94 | while 1 95 | while ((f2 > f1+z1*RHO*d1) | (d2 > -SIG*d1)) & (M > 0) 96 | limit = z1; % tighten the bracket 97 | if f2 > f1 98 | z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3); % quadratic fit 99 | else 100 | A = 6*(f2-f3)/z3+3*(d2+d3); % cubic fit 101 | B = 3*(f3-f2)-z3*(d3+2*d2); 102 | z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A; % numerical error possible - ok! 103 | end 104 | if isnan(z2) | isinf(z2) 105 | z2 = z3/2; % if we had a numerical problem then bisect 106 | end 107 | z2 = max(min(z2, INT*z3),(1-INT)*z3); % don't accept too close to limits 108 | z1 = z1 + z2; % update the step 109 | X = X + z2*s; 110 | [f2 df2] = eval(argstr); 111 | M = M - 1; i = i + (length<0); % count epochs?! 112 | d2 = df2'*s; 113 | z3 = z3-z2; % z3 is now relative to the location of z2 114 | end 115 | if f2 > f1+z1*RHO*d1 | d2 > -SIG*d1 116 | break; % this is a failure 117 | elseif d2 > SIG*d1 118 | success = 1; break; % success 119 | elseif M == 0 120 | break; % failure 121 | end 122 | A = 6*(f2-f3)/z3+3*(d2+d3); % make cubic extrapolation 123 | B = 3*(f3-f2)-z3*(d3+2*d2); 124 | z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3)); % num. error possible - ok! 125 | if ~isreal(z2) | isnan(z2) | isinf(z2) | z2 < 0 % num prob or wrong sign? 126 | if limit < -0.5 % if we have no upper limit 127 | z2 = z1 * (EXT-1); % the extrapolate the maximum amount 128 | else 129 | z2 = (limit-z1)/2; % otherwise bisect 130 | end 131 | elseif (limit > -0.5) & (z2+z1 > limit) % extraplation beyond max? 132 | z2 = (limit-z1)/2; % bisect 133 | elseif (limit < -0.5) & (z2+z1 > z1*EXT) % extrapolation beyond limit 134 | z2 = z1*(EXT-1.0); % set to extrapolation limit 135 | elseif z2 < -z3*INT 136 | z2 = -z3*INT; 137 | elseif (limit > -0.5) & (z2 < (limit-z1)*(1.0-INT)) % too close to limit? 138 | z2 = (limit-z1)*(1.0-INT); 139 | end 140 | f3 = f2; d3 = d2; z3 = -z2; % set point 3 equal to point 2 141 | z1 = z1 + z2; X = X + z2*s; % update current estimates 142 | [f2 df2] = eval(argstr); 143 | M = M - 1; i = i + (length<0); % count epochs?! 144 | d2 = df2'*s; 145 | end % end of line search 146 | 147 | if success % if line search succeeded 148 | f1 = f2; fX = [fX' f1]'; 149 | fprintf('%s %4i | Cost: %4.6e\r', S, i, f1); 150 | s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2; % Polack-Ribiere direction 151 | tmp = df1; df1 = df2; df2 = tmp; % swap derivatives 152 | d2 = df1'*s; 153 | if d2 > 0 % new slope must be negative 154 | s = -df1; % otherwise use steepest direction 155 | d2 = -s'*s; 156 | end 157 | z1 = z1 * min(RATIO, d1/(d2-realmin)); % slope ratio but max RATIO 158 | d1 = d2; 159 | ls_failed = 0; % this line search did not fail 160 | else 161 | X = X0; f1 = f0; df1 = df0; % restore point from before failed line search 162 | if ls_failed | i > abs(length) % line search failed twice in a row 163 | break; % or we ran out of time, so we give up 164 | end 165 | tmp = df1; df1 = df2; df2 = tmp; % swap derivatives 166 | s = -df1; % try steepest 167 | d1 = -s'*s; 168 | z1 = 1/(1-d1); 169 | ls_failed = 1; % this line search failed 170 | end 171 | if exist('OCTAVE_VERSION') 172 | fflush(stdout); 173 | end 174 | end 175 | fprintf('\n'); 176 | -------------------------------------------------------------------------------- /Exercise 3/ex3/lrCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = lrCostFunction(theta, X, y, lambda) 2 | %LRCOSTFUNCTION Compute cost and gradient for logistic regression with 3 | %regularization 4 | % J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using 5 | % theta as the parameter for regularized logistic regression and the 6 | % gradient of the cost w.r.t. to the parameters. 7 | 8 | % Initialize some useful values 9 | m = length(y); % number of training examples 10 | 11 | % You need to return the following variables correctly 12 | J = 0; 13 | grad = zeros(size(theta)); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Instructions: Compute the cost of a particular choice of theta. 17 | % You should set J to the cost. 18 | % Compute the partial derivatives and set grad to the partial 19 | % derivatives of the cost w.r.t. each parameter in theta 20 | % 21 | % Hint: The computation of the cost function and gradients can be 22 | % efficiently vectorized. For example, consider the computation 23 | % 24 | % sigmoid(X * theta) 25 | % 26 | % Each row of the resulting matrix will contain the value of the 27 | % prediction for that example. You can make use of this to vectorize 28 | % the cost function and gradient computations. 29 | % 30 | % Hint: When computing the gradient of the regularized cost function, 31 | % there're many possible vectorized solutions, but one solution 32 | % looks like: 33 | % grad = (unregularized gradient for logistic regression) 34 | % temp = theta; 35 | % temp(1) = 0; % because we don't add anything for j = 0 36 | % grad = grad + YOUR_CODE_HERE (using the temp variable) 37 | % 38 | 39 | sig = sigmoid(X * theta); 40 | cost = -y .* log(sig) - (1 - y) .* log(1 - sig); 41 | thetaNoZero = [ [ 0 ]; theta([2:length(theta)]) ]; 42 | J = (1 / m) * sum(cost) + (lambda / (2 * m)) * sum(thetaNoZero .^ 2); 43 | grad = (1 / m) .* (X' * (sig - y)) + (lambda / m) * thetaNoZero; 44 | 45 | % ============================================================= 46 | 47 | grad = grad(:); 48 | 49 | end 50 | -------------------------------------------------------------------------------- /Exercise 3/ex3/oneVsAll.m: -------------------------------------------------------------------------------- 1 | function [all_theta] = oneVsAll(X, y, num_labels, lambda) 2 | %ONEVSALL trains multiple logistic regression classifiers and returns all 3 | %the classifiers in a matrix all_theta, where the i-th row of all_theta 4 | %corresponds to the classifier for label i 5 | % [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels 6 | % logisitc regression classifiers and returns each of these classifiers 7 | % in a matrix all_theta, where the i-th row of all_theta corresponds 8 | % to the classifier for label i 9 | 10 | % Some useful variables 11 | m = size(X, 1); 12 | n = size(X, 2); 13 | 14 | % You need to return the following variables correctly 15 | all_theta = zeros(num_labels, n + 1); 16 | 17 | % Add ones to the X data matrix 18 | X = [ones(m, 1) X]; 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: You should complete the following code to train num_labels 22 | % logistic regression classifiers with regularization 23 | % parameter lambda. 24 | % 25 | % Hint: theta(:) will return a column vector. 26 | % 27 | % Hint: You can use y == c to obtain a vector of 1's and 0's that tell use 28 | % whether the ground truth is true/false for this class. 29 | % 30 | % Note: For this assignment, we recommend using fmincg to optimize the cost 31 | % function. It is okay to use a for-loop (for c = 1:num_labels) to 32 | % loop over the different classes. 33 | % 34 | % fmincg works similarly to fminunc, but is more efficient when we 35 | % are dealing with large number of parameters. 36 | % 37 | % Example Code for fmincg: 38 | % 39 | % % Set Initial theta 40 | % initial_theta = zeros(n + 1, 1); 41 | % 42 | % % Set options for fminunc 43 | % options = optimset('GradObj', 'on', 'MaxIter', 50); 44 | % 45 | % % Run fmincg to obtain the optimal theta 46 | % % This function will return theta and the cost 47 | % [theta] = ... 48 | % fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ... 49 | % initial_theta, options); 50 | % 51 | 52 | for c = 1:num_labels 53 | init_theta = zeros(n + 1, 1); 54 | options = optimset('GradObj', 'on', 'MaxIter', 50); 55 | [theta] = fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), init_theta, options); 56 | all_theta(c,:) = theta'; 57 | end; 58 | 59 | % ========================================================================= 60 | 61 | 62 | end 63 | -------------------------------------------------------------------------------- /Exercise 3/ex3/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(Theta1, Theta2, X) 2 | %PREDICT Predict the label of an input given a trained neural network 3 | % p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the 4 | % trained weights of a neural network (Theta1, Theta2) 5 | 6 | % Useful values 7 | m = size(X, 1); 8 | num_labels = size(Theta2, 1); 9 | 10 | % You need to return the following variables correctly 11 | p = zeros(size(X, 1), 1); 12 | 13 | % ====================== YOUR CODE HERE ====================== 14 | % Instructions: Complete the following code to make predictions using 15 | % your learned neural network. You should set p to a 16 | % vector containing labels between 1 to num_labels. 17 | % 18 | % Hint: The max function might come in useful. In particular, the max 19 | % function can also return the index of the max element, for more 20 | % information see 'help max'. If your examples are in rows, then, you 21 | % can use max(A, [], 2) to obtain the max for each row. 22 | % 23 | 24 | temp1 = [ones(m, 1) X]; 25 | temp2 = [ones(m, 1) sigmoid(temp1 * Theta1')]; 26 | temp3 = sigmoid(temp2 * Theta2'); 27 | [maxTemp3, maxTemp3_2] = max(temp3'); 28 | p = maxTemp3_2'; 29 | 30 | % ========================================================================= 31 | 32 | 33 | end 34 | -------------------------------------------------------------------------------- /Exercise 3/ex3/predictOneVsAll.m: -------------------------------------------------------------------------------- 1 | function p = predictOneVsAll(all_theta, X) 2 | %PREDICT Predict the label for a trained one-vs-all classifier. The labels 3 | %are in the range 1..K, where K = size(all_theta, 1). 4 | % p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions 5 | % for each example in the matrix X. Note that X contains the examples in 6 | % rows. all_theta is a matrix where the i-th row is a trained logistic 7 | % regression theta vector for the i-th class. You should set p to a vector 8 | % of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2 9 | % for 4 examples) 10 | 11 | m = size(X, 1); 12 | num_labels = size(all_theta, 1); 13 | 14 | % You need to return the following variables correctly 15 | p = zeros(size(X, 1), 1); 16 | 17 | % Add ones to the X data matrix 18 | X = [ones(m, 1) X]; 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: Complete the following code to make predictions using 22 | % your learned logistic regression parameters (one-vs-all). 23 | % You should set p to a vector of predictions (from 1 to 24 | % num_labels). 25 | % 26 | % Hint: This code can be done all vectorized using the max function. 27 | % In particular, the max function can also return the index of the 28 | % max element, for more information see 'help max'. If your examples 29 | % are in rows, then, you can use max(A, [], 2) to obtain the max 30 | % for each row. 31 | % 32 | 33 | sig = sigmoid(X * all_theta'); 34 | [maxSig, maxSig_2] = max(sig'); 35 | p = maxSig_2'; 36 | 37 | % ========================================================================= 38 | 39 | 40 | end 41 | -------------------------------------------------------------------------------- /Exercise 3/ex3/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid functoon 3 | % J = SIGMOID(z) computes the sigmoid of z. 4 | 5 | g = 1.0 ./ (1.0 + exp(-z)); 6 | end 7 | -------------------------------------------------------------------------------- /Exercise 4/ex4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 4/ex4.pdf -------------------------------------------------------------------------------- /Exercise 4/ex4/checkNNGradients.m: -------------------------------------------------------------------------------- 1 | function checkNNGradients(lambda) 2 | %CHECKNNGRADIENTS Creates a small neural network to check the 3 | %backpropagation gradients 4 | % CHECKNNGRADIENTS(lambda) Creates a small neural network to check the 5 | % backpropagation gradients, it will output the analytical gradients 6 | % produced by your backprop code and the numerical gradients (computed 7 | % using computeNumericalGradient). These two gradient computations should 8 | % result in very similar values. 9 | % 10 | 11 | if ~exist('lambda', 'var') || isempty(lambda) 12 | lambda = 0; 13 | end 14 | 15 | input_layer_size = 3; 16 | hidden_layer_size = 5; 17 | num_labels = 3; 18 | m = 5; 19 | 20 | % We generate some 'random' test data 21 | Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size); 22 | Theta2 = debugInitializeWeights(num_labels, hidden_layer_size); 23 | % Reusing debugInitializeWeights to generate X 24 | X = debugInitializeWeights(m, input_layer_size - 1); 25 | y = 1 + mod(1:m, num_labels)'; 26 | 27 | % Unroll parameters 28 | nn_params = [Theta1(:) ; Theta2(:)]; 29 | 30 | % Short hand for cost function 31 | costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ... 32 | num_labels, X, y, lambda); 33 | 34 | [cost, grad] = costFunc(nn_params); 35 | numgrad = computeNumericalGradient(costFunc, nn_params); 36 | 37 | % Visually examine the two gradient computations. The two columns 38 | % you get should be very similar. 39 | disp([numgrad grad]); 40 | fprintf(['The above two columns you get should be very similar.\n' ... 41 | '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']); 42 | 43 | % Evaluate the norm of the difference between two solutions. 44 | % If you have a correct implementation, and assuming you used EPSILON = 0.0001 45 | % in computeNumericalGradient.m, then diff below should be less than 1e-9 46 | diff = norm(numgrad-grad)/norm(numgrad+grad); 47 | 48 | fprintf(['If your backpropagation implementation is correct, then \n' ... 49 | 'the relative difference will be small (less than 1e-9). \n' ... 50 | '\nRelative Difference: %g\n'], diff); 51 | 52 | end 53 | -------------------------------------------------------------------------------- /Exercise 4/ex4/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" 3 | %and gives us a numerical estimate of the gradient. 4 | % numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical 5 | % gradient of the function J around theta. Calling y = J(theta) should 6 | % return the function value at theta. 7 | 8 | % Notes: The following code implements numerical gradient checking, and 9 | % returns the numerical gradient.It sets numgrad(i) to (a numerical 10 | % approximation of) the partial derivative of J with respect to the 11 | % i-th input argument, evaluated at theta. (i.e., numgrad(i) should 12 | % be the (approximately) the partial derivative of J with respect 13 | % to theta(i).) 14 | % 15 | 16 | numgrad = zeros(size(theta)); 17 | perturb = zeros(size(theta)); 18 | e = 1e-4; 19 | for p = 1:numel(theta) 20 | % Set perturbation vector 21 | perturb(p) = e; 22 | loss1 = J(theta - perturb); 23 | loss2 = J(theta + perturb); 24 | % Compute Numerical Gradient 25 | numgrad(p) = (loss2 - loss1) / (2*e); 26 | perturb(p) = 0; 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /Exercise 4/ex4/debugInitializeWeights.m: -------------------------------------------------------------------------------- 1 | function W = debugInitializeWeights(fan_out, fan_in) 2 | %DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in 3 | %incoming connections and fan_out outgoing connections using a fixed 4 | %strategy, this will help you later in debugging 5 | % W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights 6 | % of a layer with fan_in incoming connections and fan_out outgoing 7 | % connections using a fix set of values 8 | % 9 | % Note that W should be set to a matrix of size(1 + fan_in, fan_out) as 10 | % the first row of W handles the "bias" terms 11 | % 12 | 13 | % Set W to zeros 14 | W = zeros(fan_out, 1 + fan_in); 15 | 16 | % Initialize W using "sin", this ensures that W is always of the same 17 | % values and will be useful for debugging 18 | W = reshape(sin(1:numel(W)), size(W)) / 10; 19 | 20 | % ========================================================================= 21 | 22 | end 23 | -------------------------------------------------------------------------------- /Exercise 4/ex4/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /Exercise 4/ex4/ex4.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 4 Neural Network Learning 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % sigmoidGradient.m 11 | % randInitializeWeights.m 12 | % nnCostFunction.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% Initialization 19 | clear ; close all; clc 20 | 21 | %% Setup the parameters you will use for this exercise 22 | input_layer_size = 400; % 20x20 Input Images of Digits 23 | hidden_layer_size = 25; % 25 hidden units 24 | num_labels = 10; % 10 labels, from 1 to 10 25 | % (note that we have mapped "0" to label 10) 26 | 27 | %% =========== Part 1: Loading and Visualizing Data ============= 28 | % We start the exercise by first loading and visualizing the dataset. 29 | % You will be working with a dataset that contains handwritten digits. 30 | % 31 | 32 | % Load Training Data 33 | fprintf('Loading and Visualizing Data ...\n') 34 | 35 | load('ex4data1.mat'); 36 | m = size(X, 1); 37 | 38 | % Randomly select 100 data points to display 39 | sel = randperm(size(X, 1)); 40 | sel = sel(1:100); 41 | 42 | displayData(X(sel, :)); 43 | 44 | fprintf('Program paused. Press enter to continue.\n'); 45 | pause; 46 | 47 | 48 | %% ================ Part 2: Loading Parameters ================ 49 | % In this part of the exercise, we load some pre-initialized 50 | % neural network parameters. 51 | 52 | fprintf('\nLoading Saved Neural Network Parameters ...\n') 53 | 54 | % Load the weights into variables Theta1 and Theta2 55 | load('ex4weights.mat'); 56 | 57 | % Unroll parameters 58 | nn_params = [Theta1(:) ; Theta2(:)]; 59 | 60 | %% ================ Part 3: Compute Cost (Feedforward) ================ 61 | % To the neural network, you should first start by implementing the 62 | % feedforward part of the neural network that returns the cost only. You 63 | % should complete the code in nnCostFunction.m to return cost. After 64 | % implementing the feedforward to compute the cost, you can verify that 65 | % your implementation is correct by verifying that you get the same cost 66 | % as us for the fixed debugging parameters. 67 | % 68 | % We suggest implementing the feedforward cost *without* regularization 69 | % first so that it will be easier for you to debug. Later, in part 4, you 70 | % will get to implement the regularized cost. 71 | % 72 | fprintf('\nFeedforward Using Neural Network ...\n') 73 | 74 | % Weight regularization parameter (we set this to 0 here). 75 | lambda = 0; 76 | 77 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... 78 | num_labels, X, y, lambda); 79 | 80 | fprintf(['Cost at parameters (loaded from ex4weights): %f '... 81 | '\n(this value should be about 0.287629)\n'], J); 82 | 83 | fprintf('\nProgram paused. Press enter to continue.\n'); 84 | pause; 85 | 86 | %% =============== Part 4: Implement Regularization =============== 87 | % Once your cost function implementation is correct, you should now 88 | % continue to implement the regularization with the cost. 89 | % 90 | 91 | fprintf('\nChecking Cost Function (w/ Regularization) ... \n') 92 | 93 | % Weight regularization parameter (we set this to 1 here). 94 | lambda = 1; 95 | 96 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... 97 | num_labels, X, y, lambda); 98 | 99 | fprintf(['Cost at parameters (loaded from ex4weights): %f '... 100 | '\n(this value should be about 0.383770)\n'], J); 101 | 102 | fprintf('Program paused. Press enter to continue.\n'); 103 | pause; 104 | 105 | 106 | %% ================ Part 5: Sigmoid Gradient ================ 107 | % Before you start implementing the neural network, you will first 108 | % implement the gradient for the sigmoid function. You should complete the 109 | % code in the sigmoidGradient.m file. 110 | % 111 | 112 | fprintf('\nEvaluating sigmoid gradient...\n') 113 | 114 | g = sigmoidGradient([1 -0.5 0 0.5 1]); 115 | fprintf('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n '); 116 | fprintf('%f ', g); 117 | fprintf('\n\n'); 118 | 119 | fprintf('Program paused. Press enter to continue.\n'); 120 | pause; 121 | 122 | 123 | %% ================ Part 6: Initializing Pameters ================ 124 | % In this part of the exercise, you will be starting to implment a two 125 | % layer neural network that classifies digits. You will start by 126 | % implementing a function to initialize the weights of the neural network 127 | % (randInitializeWeights.m) 128 | 129 | fprintf('\nInitializing Neural Network Parameters ...\n') 130 | 131 | initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size); 132 | initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels); 133 | 134 | % Unroll parameters 135 | initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)]; 136 | 137 | 138 | %% =============== Part 7: Implement Backpropagation =============== 139 | % Once your cost matches up with ours, you should proceed to implement the 140 | % backpropagation algorithm for the neural network. You should add to the 141 | % code you've written in nnCostFunction.m to return the partial 142 | % derivatives of the parameters. 143 | % 144 | fprintf('\nChecking Backpropagation... \n'); 145 | 146 | % Check gradients by running checkNNGradients 147 | checkNNGradients; 148 | 149 | fprintf('\nProgram paused. Press enter to continue.\n'); 150 | pause; 151 | 152 | 153 | %% =============== Part 8: Implement Regularization =============== 154 | % Once your backpropagation implementation is correct, you should now 155 | % continue to implement the regularization with the cost and gradient. 156 | % 157 | 158 | fprintf('\nChecking Backpropagation (w/ Regularization) ... \n') 159 | 160 | % Check gradients by running checkNNGradients 161 | lambda = 3; 162 | checkNNGradients(lambda); 163 | 164 | % Also output the costFunction debugging values 165 | debug_J = nnCostFunction(nn_params, input_layer_size, ... 166 | hidden_layer_size, num_labels, X, y, lambda); 167 | 168 | fprintf(['\n\nCost at (fixed) debugging parameters (w/ lambda = 10): %f ' ... 169 | '\n(this value should be about 0.576051)\n\n'], debug_J); 170 | 171 | fprintf('Program paused. Press enter to continue.\n'); 172 | pause; 173 | 174 | 175 | %% =================== Part 8: Training NN =================== 176 | % You have now implemented all the code necessary to train a neural 177 | % network. To train your neural network, we will now use "fmincg", which 178 | % is a function which works similarly to "fminunc". Recall that these 179 | % advanced optimizers are able to train our cost functions efficiently as 180 | % long as we provide them with the gradient computations. 181 | % 182 | fprintf('\nTraining Neural Network... \n') 183 | 184 | % After you have completed the assignment, change the MaxIter to a larger 185 | % value to see how more training helps. 186 | options = optimset('MaxIter', 50); 187 | 188 | % You should also try different values of lambda 189 | lambda = 1; 190 | 191 | % Create "short hand" for the cost function to be minimized 192 | costFunction = @(p) nnCostFunction(p, ... 193 | input_layer_size, ... 194 | hidden_layer_size, ... 195 | num_labels, X, y, lambda); 196 | 197 | % Now, costFunction is a function that takes in only one argument (the 198 | % neural network parameters) 199 | [nn_params, cost] = fmincg(costFunction, initial_nn_params, options); 200 | 201 | % Obtain Theta1 and Theta2 back from nn_params 202 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... 203 | hidden_layer_size, (input_layer_size + 1)); 204 | 205 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... 206 | num_labels, (hidden_layer_size + 1)); 207 | 208 | fprintf('Program paused. Press enter to continue.\n'); 209 | pause; 210 | 211 | 212 | %% ================= Part 9: Visualize Weights ================= 213 | % You can now "visualize" what the neural network is learning by 214 | % displaying the hidden units to see what features they are capturing in 215 | % the data. 216 | 217 | fprintf('\nVisualizing Neural Network... \n') 218 | 219 | displayData(Theta1(:, 2:end)); 220 | 221 | fprintf('\nProgram paused. Press enter to continue.\n'); 222 | pause; 223 | 224 | %% ================= Part 10: Implement Predict ================= 225 | % After training the neural network, we would like to use it to predict 226 | % the labels. You will now implement the "predict" function to use the 227 | % neural network to predict the labels of the training set. This lets 228 | % you compute the training set accuracy. 229 | 230 | pred = predict(Theta1, Theta2, X); 231 | 232 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 233 | 234 | 235 | -------------------------------------------------------------------------------- /Exercise 4/ex4/ex4data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 4/ex4/ex4data1.mat -------------------------------------------------------------------------------- /Exercise 4/ex4/ex4weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 4/ex4/ex4weights.mat -------------------------------------------------------------------------------- /Exercise 4/ex4/fmincg.m: -------------------------------------------------------------------------------- 1 | function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) 2 | % Minimize a continuous differentialble multivariate function. Starting point 3 | % is given by "X" (D by 1), and the function named in the string "f", must 4 | % return a function value and a vector of partial derivatives. The Polack- 5 | % Ribiere flavour of conjugate gradients is used to compute search directions, 6 | % and a line search using quadratic and cubic polynomial approximations and the 7 | % Wolfe-Powell stopping criteria is used together with the slope ratio method 8 | % for guessing initial step sizes. Additionally a bunch of checks are made to 9 | % make sure that exploration is taking place and that extrapolation will not 10 | % be unboundedly large. The "length" gives the length of the run: if it is 11 | % positive, it gives the maximum number of line searches, if negative its 12 | % absolute gives the maximum allowed number of function evaluations. You can 13 | % (optionally) give "length" a second component, which will indicate the 14 | % reduction in function value to be expected in the first line-search (defaults 15 | % to 1.0). The function returns when either its length is up, or if no further 16 | % progress can be made (ie, we are at a minimum, or so close that due to 17 | % numerical problems, we cannot get any closer). If the function terminates 18 | % within a few iterations, it could be an indication that the function value 19 | % and derivatives are not consistent (ie, there may be a bug in the 20 | % implementation of your "f" function). The function returns the found 21 | % solution "X", a vector of function values "fX" indicating the progress made 22 | % and "i" the number of iterations (line searches or function evaluations, 23 | % depending on the sign of "length") used. 24 | % 25 | % Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) 26 | % 27 | % See also: checkgrad 28 | % 29 | % Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13 30 | % 31 | % 32 | % (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen 33 | % 34 | % Permission is granted for anyone to copy, use, or modify these 35 | % programs and accompanying documents for purposes of research or 36 | % education, provided this copyright notice is retained, and note is 37 | % made of any changes that have been made. 38 | % 39 | % These programs and documents are distributed without any warranty, 40 | % express or implied. As the programs were written for research 41 | % purposes only, they have not been tested to the degree that would be 42 | % advisable in any important application. All use of these programs is 43 | % entirely at the user's own risk. 44 | % 45 | % [ml-class] Changes Made: 46 | % 1) Function name and argument specifications 47 | % 2) Output display 48 | % 49 | 50 | % Read options 51 | if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter') 52 | length = options.MaxIter; 53 | else 54 | length = 100; 55 | end 56 | 57 | 58 | RHO = 0.01; % a bunch of constants for line searches 59 | SIG = 0.5; % RHO and SIG are the constants in the Wolfe-Powell conditions 60 | INT = 0.1; % don't reevaluate within 0.1 of the limit of the current bracket 61 | EXT = 3.0; % extrapolate maximum 3 times the current bracket 62 | MAX = 20; % max 20 function evaluations per line search 63 | RATIO = 100; % maximum allowed slope ratio 64 | 65 | argstr = ['feval(f, X']; % compose string used to call function 66 | for i = 1:(nargin - 3) 67 | argstr = [argstr, ',P', int2str(i)]; 68 | end 69 | argstr = [argstr, ')']; 70 | 71 | if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end 72 | S=['Iteration ']; 73 | 74 | i = 0; % zero the run length counter 75 | ls_failed = 0; % no previous line search has failed 76 | fX = []; 77 | [f1 df1] = eval(argstr); % get function value and gradient 78 | i = i + (length<0); % count epochs?! 79 | s = -df1; % search direction is steepest 80 | d1 = -s'*s; % this is the slope 81 | z1 = red/(1-d1); % initial step is red/(|s|+1) 82 | 83 | while i < abs(length) % while not finished 84 | i = i + (length>0); % count iterations?! 85 | 86 | X0 = X; f0 = f1; df0 = df1; % make a copy of current values 87 | X = X + z1*s; % begin line search 88 | [f2 df2] = eval(argstr); 89 | i = i + (length<0); % count epochs?! 90 | d2 = df2'*s; 91 | f3 = f1; d3 = d1; z3 = -z1; % initialize point 3 equal to point 1 92 | if length>0, M = MAX; else M = min(MAX, -length-i); end 93 | success = 0; limit = -1; % initialize quanteties 94 | while 1 95 | while ((f2 > f1+z1*RHO*d1) | (d2 > -SIG*d1)) & (M > 0) 96 | limit = z1; % tighten the bracket 97 | if f2 > f1 98 | z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3); % quadratic fit 99 | else 100 | A = 6*(f2-f3)/z3+3*(d2+d3); % cubic fit 101 | B = 3*(f3-f2)-z3*(d3+2*d2); 102 | z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A; % numerical error possible - ok! 103 | end 104 | if isnan(z2) | isinf(z2) 105 | z2 = z3/2; % if we had a numerical problem then bisect 106 | end 107 | z2 = max(min(z2, INT*z3),(1-INT)*z3); % don't accept too close to limits 108 | z1 = z1 + z2; % update the step 109 | X = X + z2*s; 110 | [f2 df2] = eval(argstr); 111 | M = M - 1; i = i + (length<0); % count epochs?! 112 | d2 = df2'*s; 113 | z3 = z3-z2; % z3 is now relative to the location of z2 114 | end 115 | if f2 > f1+z1*RHO*d1 | d2 > -SIG*d1 116 | break; % this is a failure 117 | elseif d2 > SIG*d1 118 | success = 1; break; % success 119 | elseif M == 0 120 | break; % failure 121 | end 122 | A = 6*(f2-f3)/z3+3*(d2+d3); % make cubic extrapolation 123 | B = 3*(f3-f2)-z3*(d3+2*d2); 124 | z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3)); % num. error possible - ok! 125 | if ~isreal(z2) | isnan(z2) | isinf(z2) | z2 < 0 % num prob or wrong sign? 126 | if limit < -0.5 % if we have no upper limit 127 | z2 = z1 * (EXT-1); % the extrapolate the maximum amount 128 | else 129 | z2 = (limit-z1)/2; % otherwise bisect 130 | end 131 | elseif (limit > -0.5) & (z2+z1 > limit) % extraplation beyond max? 132 | z2 = (limit-z1)/2; % bisect 133 | elseif (limit < -0.5) & (z2+z1 > z1*EXT) % extrapolation beyond limit 134 | z2 = z1*(EXT-1.0); % set to extrapolation limit 135 | elseif z2 < -z3*INT 136 | z2 = -z3*INT; 137 | elseif (limit > -0.5) & (z2 < (limit-z1)*(1.0-INT)) % too close to limit? 138 | z2 = (limit-z1)*(1.0-INT); 139 | end 140 | f3 = f2; d3 = d2; z3 = -z2; % set point 3 equal to point 2 141 | z1 = z1 + z2; X = X + z2*s; % update current estimates 142 | [f2 df2] = eval(argstr); 143 | M = M - 1; i = i + (length<0); % count epochs?! 144 | d2 = df2'*s; 145 | end % end of line search 146 | 147 | if success % if line search succeeded 148 | f1 = f2; fX = [fX' f1]'; 149 | fprintf('%s %4i | Cost: %4.6e\r', S, i, f1); 150 | s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2; % Polack-Ribiere direction 151 | tmp = df1; df1 = df2; df2 = tmp; % swap derivatives 152 | d2 = df1'*s; 153 | if d2 > 0 % new slope must be negative 154 | s = -df1; % otherwise use steepest direction 155 | d2 = -s'*s; 156 | end 157 | z1 = z1 * min(RATIO, d1/(d2-realmin)); % slope ratio but max RATIO 158 | d1 = d2; 159 | ls_failed = 0; % this line search did not fail 160 | else 161 | X = X0; f1 = f0; df1 = df0; % restore point from before failed line search 162 | if ls_failed | i > abs(length) % line search failed twice in a row 163 | break; % or we ran out of time, so we give up 164 | end 165 | tmp = df1; df1 = df2; df2 = tmp; % swap derivatives 166 | s = -df1; % try steepest 167 | d1 = -s'*s; 168 | z1 = 1/(1-d1); 169 | ls_failed = 1; % this line search failed 170 | end 171 | if exist('OCTAVE_VERSION') 172 | fflush(stdout); 173 | end 174 | end 175 | fprintf('\n'); 176 | -------------------------------------------------------------------------------- /Exercise 4/ex4/nnCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J grad] = nnCostFunction(nn_params, ... 2 | input_layer_size, ... 3 | hidden_layer_size, ... 4 | num_labels, ... 5 | X, y, lambda) 6 | %NNCOSTFUNCTION Implements the neural network cost function for a two layer 7 | %neural network which performs classification 8 | % [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ... 9 | % X, y, lambda) computes the cost and gradient of the neural network. The 10 | % parameters for the neural network are "unrolled" into the vector 11 | % nn_params and need to be converted back into the weight matrices. 12 | % 13 | % The returned parameter grad should be a "unrolled" vector of the 14 | % partial derivatives of the neural network. 15 | % 16 | 17 | % Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices 18 | % for our 2 layer neural network 19 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... 20 | hidden_layer_size, (input_layer_size + 1)); 21 | 22 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... 23 | num_labels, (hidden_layer_size + 1)); 24 | 25 | % Setup some useful variables 26 | m = size(X, 1); 27 | 28 | % You need to return the following variables correctly 29 | J = 0; 30 | Theta1_grad = zeros(size(Theta1)); 31 | Theta2_grad = zeros(size(Theta2)); 32 | 33 | % ====================== YOUR CODE HERE ====================== 34 | % Instructions: You should complete the code by working through the 35 | % following parts. 36 | % 37 | % Part 1: Feedforward the neural network and return the cost in the 38 | % variable J. After implementing Part 1, you can verify that your 39 | % cost function computation is correct by verifying the cost 40 | % computed in ex4.m 41 | % 42 | % Part 2: Implement the backpropagation algorithm to compute the gradients 43 | % Theta1_grad and Theta2_grad. You should return the partial derivatives of 44 | % the cost function with respect to Theta1 and Theta2 in Theta1_grad and 45 | % Theta2_grad, respectively. After implementing Part 2, you can check 46 | % that your implementation is correct by running checkNNGradients 47 | % 48 | % Note: The vector y passed into the function is a vector of labels 49 | % containing values from 1..K. You need to map this vector into a 50 | % binary vector of 1's and 0's to be used with the neural network 51 | % cost function. 52 | % 53 | % Hint: We recommend implementing backpropagation using a for-loop 54 | % over the training examples if you are implementing it for the 55 | % first time. 56 | % 57 | % Part 3: Implement regularization with the cost function and gradients. 58 | % 59 | % Hint: You can implement this around the code for 60 | % backpropagation. That is, you can compute the gradients for 61 | % the regularization separately and then add them to Theta1_grad 62 | % and Theta2_grad from Part 2. 63 | % 64 | 65 | a1 = [ones(m, 1) X]; 66 | a1 = [ones(m, 1) X]; 67 | a2 = [ones(m, 1) sigmoid(a1 * Theta1')]; 68 | sig = sigmoid(a2 * Theta2'); 69 | yVector = repmat([1:num_labels], m, 1) == repmat(y, 1, num_labels); 70 | cost = -yVector .* log(sig) - (1 - yVector) .* log(1 - sig); 71 | 72 | Theta1NoBias = Theta1(:, 2:end); 73 | Theta2NoBias = Theta2(:, 2:end); 74 | J = (1 / m) * sum(sum(cost)) + (lambda / (2 * m)) * (sum(sum(Theta1NoBias .^ 2)) + sum(sum(Theta2NoBias .^ 2))); 75 | 76 | delta1 = zeros(size(Theta1)); 77 | delta2 = zeros(size(Theta2)); 78 | 79 | for t = 1:m, 80 | a1t = a1(t,:)'; 81 | a2t = a2(t,:)'; 82 | sigt = sig(t,:)'; 83 | yVectorT = yVector(t,:)'; 84 | 85 | d3t = sigt - yVectorT; 86 | 87 | z2t = [1; Theta1 * a1t]; 88 | d2t = Theta2' * d3t .* sigmoidGradient(z2t); 89 | 90 | delta1 = delta1 + d2t(2:end) * a1t'; 91 | delta2 = delta2 + d3t * a2t'; 92 | end; 93 | 94 | Theta1ZeroBias = [ zeros(size(Theta1, 1), 1) Theta1NoBias ]; 95 | Theta2ZeroBias = [ zeros(size(Theta2, 1), 1) Theta2NoBias ]; 96 | Theta1_grad = (1 / m) * delta1 + (lambda / m) * Theta1ZeroBias; 97 | Theta2_grad = (1 / m) * delta2 + (lambda / m) * Theta2ZeroBias; 98 | 99 | % ========================================================================= 100 | 101 | % Unroll gradients 102 | grad = [Theta1_grad(:) ; Theta2_grad(:)]; 103 | 104 | 105 | end 106 | -------------------------------------------------------------------------------- /Exercise 4/ex4/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(Theta1, Theta2, X) 2 | %PREDICT Predict the label of an input given a trained neural network 3 | % p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the 4 | % trained weights of a neural network (Theta1, Theta2) 5 | 6 | % Useful values 7 | m = size(X, 1); 8 | num_labels = size(Theta2, 1); 9 | 10 | % You need to return the following variables correctly 11 | p = zeros(size(X, 1), 1); 12 | 13 | h1 = sigmoid([ones(m, 1) X] * Theta1'); 14 | h2 = sigmoid([ones(m, 1) h1] * Theta2'); 15 | [dummy, p] = max(h2, [], 2); 16 | 17 | % ========================================================================= 18 | 19 | 20 | end 21 | -------------------------------------------------------------------------------- /Exercise 4/ex4/randInitializeWeights.m: -------------------------------------------------------------------------------- 1 | function W = randInitializeWeights(L_in, L_out) 2 | %RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in 3 | %incoming connections and L_out outgoing connections 4 | % W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 5 | % of a layer with L_in incoming connections and L_out outgoing 6 | % connections. 7 | % 8 | % Note that W should be set to a matrix of size(L_out, 1 + L_in) as 9 | % the column row of W handles the "bias" terms 10 | % 11 | 12 | % You need to return the following variables correctly 13 | W = zeros(L_out, 1 + L_in); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Instructions: Initialize W randomly so that we break the symmetry while 17 | % training the neural network. 18 | % 19 | % Note: The first row of W corresponds to the parameters for the bias units 20 | % 21 | 22 | epsilon = 0.12; 23 | W = rand(L_out, 1 + L_in) * 2 * epsilon - epsilon; 24 | 25 | % ========================================================================= 26 | 27 | end 28 | -------------------------------------------------------------------------------- /Exercise 4/ex4/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid functoon 3 | % J = SIGMOID(z) computes the sigmoid of z. 4 | 5 | g = 1.0 ./ (1.0 + exp(-z)); 6 | end 7 | -------------------------------------------------------------------------------- /Exercise 4/ex4/sigmoidGradient.m: -------------------------------------------------------------------------------- 1 | function g = sigmoidGradient(z) 2 | %SIGMOIDGRADIENT returns the gradient of the sigmoid function 3 | %evaluated at z 4 | % g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function 5 | % evaluated at z. This should work regardless if z is a matrix or a 6 | % vector. In particular, if z is a vector or matrix, you should return 7 | % the gradient for each element. 8 | 9 | g = zeros(size(z)); 10 | 11 | % ====================== YOUR CODE HERE ====================== 12 | % Instructions: Compute the gradient of the sigmoid function evaluated at 13 | % each value of z (z can be a matrix, vector or scalar). 14 | 15 | sigmoid = sigmoid(z); 16 | g = sigmoid .* (1 .- sigmoid); 17 | 18 | % ============================================================= 19 | 20 | 21 | 22 | 23 | end 24 | -------------------------------------------------------------------------------- /Exercise 4/ex4/submitWeb.m: -------------------------------------------------------------------------------- 1 | % submitWeb Creates files from your code and output for web submission. 2 | % 3 | % If the submit function does not work for you, use the web-submission mechanism. 4 | % Call this function to produce a file for the part you wish to submit. Then, 5 | % submit the file to the class servers using the "Web Submission" button on the 6 | % Programming Exercises page on the course website. 7 | % 8 | % You should call this function without arguments (submitWeb), to receive 9 | % an interactive prompt for submission; optionally you can call it with the partID 10 | % if you so wish. Make sure your working directory is set to the directory 11 | % containing the submitWeb.m file and your assignment files. 12 | 13 | function submitWeb(partId) 14 | if ~exist('partId', 'var') || isempty(partId) 15 | partId = []; 16 | end 17 | 18 | submit(partId, 1); 19 | end 20 | 21 | -------------------------------------------------------------------------------- /Exercise 5/ex5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 5/ex5.pdf -------------------------------------------------------------------------------- /Exercise 5/ex5/ex5.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 5 | Regularized Linear Regression and Bias-Variance 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % linearRegCostFunction.m 11 | % learningCurve.m 12 | % validationCurve.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% Initialization 19 | clear ; close all; clc 20 | 21 | %% =========== Part 1: Loading and Visualizing Data ============= 22 | % We start the exercise by first loading and visualizing the dataset. 23 | % The following code will load the dataset into your environment and plot 24 | % the data. 25 | % 26 | 27 | % Load Training Data 28 | fprintf('Loading and Visualizing Data ...\n') 29 | 30 | % Load from ex5data1: 31 | % You will have X, y, Xval, yval, Xtest, ytest in your environment 32 | load ('ex5data1.mat'); 33 | 34 | % m = Number of examples 35 | m = size(X, 1); 36 | 37 | % Plot training data 38 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 39 | xlabel('Change in water level (x)'); 40 | ylabel('Water flowing out of the dam (y)'); 41 | 42 | fprintf('Program paused. Press enter to continue.\n'); 43 | pause; 44 | 45 | %% =========== Part 2: Regularized Linear Regression Cost ============= 46 | % You should now implement the cost function for regularized linear 47 | % regression. 48 | % 49 | 50 | theta = [1 ; 1]; 51 | J = linearRegCostFunction([ones(m, 1) X], y, theta, 1); 52 | 53 | fprintf(['Cost at theta = [1 ; 1]: %f '... 54 | '\n(this value should be about 303.993192)\n'], J); 55 | 56 | fprintf('Program paused. Press enter to continue.\n'); 57 | pause; 58 | 59 | %% =========== Part 3: Regularized Linear Regression Gradient ============= 60 | % You should now implement the gradient for regularized linear 61 | % regression. 62 | % 63 | 64 | theta = [1 ; 1]; 65 | [J, grad] = linearRegCostFunction([ones(m, 1) X], y, theta, 1); 66 | 67 | fprintf(['Gradient at theta = [1 ; 1]: [%f; %f] '... 68 | '\n(this value should be about [-15.303016; 598.250744])\n'], ... 69 | grad(1), grad(2)); 70 | 71 | fprintf('Program paused. Press enter to continue.\n'); 72 | pause; 73 | 74 | 75 | %% =========== Part 4: Train Linear Regression ============= 76 | % Once you have implemented the cost and gradient correctly, the 77 | % trainLinearReg function will use your cost function to train 78 | % regularized linear regression. 79 | % 80 | % Write Up Note: The data is non-linear, so this will not give a great 81 | % fit. 82 | % 83 | 84 | % Train linear regression with lambda = 0 85 | lambda = 0; 86 | [theta] = trainLinearReg([ones(m, 1) X], y, lambda); 87 | 88 | % Plot fit over the data 89 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 90 | xlabel('Change in water level (x)'); 91 | ylabel('Water flowing out of the dam (y)'); 92 | hold on; 93 | plot(X, [ones(m, 1) X]*theta, '--', 'LineWidth', 2) 94 | hold off; 95 | 96 | fprintf('Program paused. Press enter to continue.\n'); 97 | pause; 98 | 99 | 100 | %% =========== Part 5: Learning Curve for Linear Regression ============= 101 | % Next, you should implement the learningCurve function. 102 | % 103 | % Write Up Note: Since the model is underfitting the data, we expect to 104 | % see a graph with "high bias" -- slide 8 in ML-advice.pdf 105 | % 106 | 107 | lambda = 0; 108 | [error_train, error_val] = ... 109 | learningCurve([ones(m, 1) X], y, ... 110 | [ones(size(Xval, 1), 1) Xval], yval, ... 111 | lambda); 112 | 113 | plot(1:m, error_train, 1:m, error_val); 114 | title('Learning curve for linear regression') 115 | legend('Train', 'Cross Validation') 116 | xlabel('Number of training examples') 117 | ylabel('Error') 118 | axis([0 13 0 150]) 119 | 120 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n'); 121 | for i = 1:m 122 | fprintf(' \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i)); 123 | end 124 | 125 | fprintf('Program paused. Press enter to continue.\n'); 126 | pause; 127 | 128 | %% =========== Part 6: Feature Mapping for Polynomial Regression ============= 129 | % One solution to this is to use polynomial regression. You should now 130 | % complete polyFeatures to map each example into its powers 131 | % 132 | 133 | p = 8; 134 | 135 | % Map X onto Polynomial Features and Normalize 136 | X_poly = polyFeatures(X, p); 137 | [X_poly, mu, sigma] = featureNormalize(X_poly); % Normalize 138 | X_poly = [ones(m, 1), X_poly]; % Add Ones 139 | 140 | % Map X_poly_test and normalize (using mu and sigma) 141 | X_poly_test = polyFeatures(Xtest, p); 142 | X_poly_test = bsxfun(@minus, X_poly_test, mu); 143 | X_poly_test = bsxfun(@rdivide, X_poly_test, sigma); 144 | X_poly_test = [ones(size(X_poly_test, 1), 1), X_poly_test]; % Add Ones 145 | 146 | % Map X_poly_val and normalize (using mu and sigma) 147 | X_poly_val = polyFeatures(Xval, p); 148 | X_poly_val = bsxfun(@minus, X_poly_val, mu); 149 | X_poly_val = bsxfun(@rdivide, X_poly_val, sigma); 150 | X_poly_val = [ones(size(X_poly_val, 1), 1), X_poly_val]; % Add Ones 151 | 152 | fprintf('Normalized Training Example 1:\n'); 153 | fprintf(' %f \n', X_poly(1, :)); 154 | 155 | fprintf('\nProgram paused. Press enter to continue.\n'); 156 | pause; 157 | 158 | 159 | 160 | %% =========== Part 7: Learning Curve for Polynomial Regression ============= 161 | % Now, you will get to experiment with polynomial regression with multiple 162 | % values of lambda. The code below runs polynomial regression with 163 | % lambda = 0. You should try running the code with different values of 164 | % lambda to see how the fit and learning curve change. 165 | % 166 | 167 | lambda = 0; 168 | [theta] = trainLinearReg(X_poly, y, lambda); 169 | 170 | % Plot training data and fit 171 | figure(1); 172 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 173 | plotFit(min(X), max(X), mu, sigma, theta, p); 174 | xlabel('Change in water level (x)'); 175 | ylabel('Water flowing out of the dam (y)'); 176 | title (sprintf('Polynomial Regression Fit (lambda = %f)', lambda)); 177 | 178 | figure(2); 179 | [error_train, error_val] = ... 180 | learningCurve(X_poly, y, X_poly_val, yval, lambda); 181 | plot(1:m, error_train, 1:m, error_val); 182 | 183 | title(sprintf('Polynomial Regression Learning Curve (lambda = %f)', lambda)); 184 | xlabel('Number of training examples') 185 | ylabel('Error') 186 | axis([0 13 0 100]) 187 | legend('Train', 'Cross Validation') 188 | 189 | fprintf('Polynomial Regression (lambda = %f)\n\n', lambda); 190 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n'); 191 | for i = 1:m 192 | fprintf(' \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i)); 193 | end 194 | 195 | fprintf('Program paused. Press enter to continue.\n'); 196 | pause; 197 | 198 | %% =========== Part 8: Validation for Selecting Lambda ============= 199 | % You will now implement validationCurve to test various values of 200 | % lambda on a validation set. You will then use this to select the 201 | % "best" lambda value. 202 | % 203 | 204 | [lambda_vec, error_train, error_val] = ... 205 | validationCurve(X_poly, y, X_poly_val, yval); 206 | 207 | close all; 208 | plot(lambda_vec, error_train, lambda_vec, error_val); 209 | legend('Train', 'Cross Validation'); 210 | xlabel('lambda'); 211 | ylabel('Error'); 212 | 213 | fprintf('lambda\t\tTrain Error\tValidation Error\n'); 214 | for i = 1:length(lambda_vec) 215 | fprintf(' %f\t%f\t%f\n', ... 216 | lambda_vec(i), error_train(i), error_val(i)); 217 | end 218 | 219 | fprintf('Program paused. Press enter to continue.\n'); 220 | pause; 221 | -------------------------------------------------------------------------------- /Exercise 5/ex5/ex5data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 5/ex5/ex5data1.mat -------------------------------------------------------------------------------- /Exercise 5/ex5/featureNormalize.m: -------------------------------------------------------------------------------- 1 | function [X_norm, mu, sigma] = featureNormalize(X) 2 | %FEATURENORMALIZE Normalizes the features in X 3 | % FEATURENORMALIZE(X) returns a normalized version of X where 4 | % the mean value of each feature is 0 and the standard deviation 5 | % is 1. This is often a good preprocessing step to do when 6 | % working with learning algorithms. 7 | 8 | mu = mean(X); 9 | X_norm = bsxfun(@minus, X, mu); 10 | 11 | sigma = std(X_norm); 12 | X_norm = bsxfun(@rdivide, X_norm, sigma); 13 | 14 | 15 | % ============================================================ 16 | 17 | end 18 | -------------------------------------------------------------------------------- /Exercise 5/ex5/learningCurve.m: -------------------------------------------------------------------------------- 1 | function [error_train, error_val] = ... 2 | learningCurve(X, y, Xval, yval, lambda) 3 | %LEARNINGCURVE Generates the train and cross validation set errors needed 4 | %to plot a learning curve 5 | % [error_train, error_val] = ... 6 | % LEARNINGCURVE(X, y, Xval, yval, lambda) returns the train and 7 | % cross validation set errors for a learning curve. In particular, 8 | % it returns two vectors of the same length - error_train and 9 | % error_val. Then, error_train(i) contains the training error for 10 | % i examples (and similarly for error_val(i)). 11 | % 12 | % In this function, you will compute the train and test errors for 13 | % dataset sizes from 1 up to m. In practice, when working with larger 14 | % datasets, you might want to do this in larger intervals. 15 | % 16 | 17 | % Number of training examples 18 | m = size(X, 1); 19 | 20 | % You need to return these values correctly 21 | error_train = zeros(m, 1); 22 | error_val = zeros(m, 1); 23 | 24 | % ====================== YOUR CODE HERE ====================== 25 | % Instructions: Fill in this function to return training errors in 26 | % error_train and the cross validation errors in error_val. 27 | % i.e., error_train(i) and 28 | % error_val(i) should give you the errors 29 | % obtained after training on i examples. 30 | % 31 | % Note: You should evaluate the training error on the first i training 32 | % examples (i.e., X(1:i, :) and y(1:i)). 33 | % 34 | % For the cross-validation error, you should instead evaluate on 35 | % the _entire_ cross validation set (Xval and yval). 36 | % 37 | % Note: If you are using your cost function (linearRegCostFunction) 38 | % to compute the training and cross validation error, you should 39 | % call the function with the lambda argument set to 0. 40 | % Do note that you will still need to use lambda when running 41 | % the training to obtain the theta parameters. 42 | % 43 | % Hint: You can loop over the examples with the following: 44 | % 45 | % for i = 1:m 46 | % % Compute train/cross validation errors using training examples 47 | % % X(1:i, :) and y(1:i), storing the result in 48 | % % error_train(i) and error_val(i) 49 | % .... 50 | % 51 | % end 52 | % 53 | 54 | % ---------------------- Sample Solution ---------------------- 55 | 56 | for i = 1:m, 57 | XSubset = X(1:i, :); 58 | ySubset = y(1:i); 59 | theta = trainLinearReg(XSubset, ySubset, lambda); 60 | error_train(i) = linearRegCostFunction(XSubset, ySubset, theta, 0); 61 | error_val(i) = linearRegCostFunction(Xval, yval, theta, 0); 62 | end; 63 | 64 | % ------------------------------------------------------------- 65 | 66 | % ========================================================================= 67 | 68 | end 69 | -------------------------------------------------------------------------------- /Exercise 5/ex5/linearRegCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = linearRegCostFunction(X, y, theta, lambda) 2 | %LINEARREGCOSTFUNCTION Compute cost and gradient for regularized linear 3 | %regression with multiple variables 4 | % [J, grad] = LINEARREGCOSTFUNCTION(X, y, theta, lambda) computes the 5 | % cost of using theta as the parameter for linear regression to fit the 6 | % data points in X and y. Returns the cost in J and the gradient in grad 7 | 8 | % Initialize some useful values 9 | m = length(y); % number of training examples 10 | 11 | % You need to return the following variables correctly 12 | J = 0; 13 | grad = zeros(size(theta)); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Instructions: Compute the cost and gradient of regularized linear 17 | % regression for a particular choice of theta. 18 | % 19 | % You should set J to the cost and grad to the gradient. 20 | % 21 | 22 | h = X * theta; 23 | squaredErrors = (h - y) .^ 2; 24 | thetaNoZero = [ 0; theta(2:end) ]; 25 | J = (1 / (2 * m)) * sum(squaredErrors) + (lambda / (2 * m)) * sum(thetaNoZero .^ 2); 26 | grad = (1 / m) .* (X' * (h - y)) + (lambda / m) * thetaNoZero; 27 | 28 | % ========================================================================= 29 | 30 | grad = grad(:); 31 | 32 | end 33 | -------------------------------------------------------------------------------- /Exercise 5/ex5/plotFit.m: -------------------------------------------------------------------------------- 1 | function plotFit(min_x, max_x, mu, sigma, theta, p) 2 | %PLOTFIT Plots a learned polynomial regression fit over an existing figure. 3 | %Also works with linear regression. 4 | % PLOTFIT(min_x, max_x, mu, sigma, theta, p) plots the learned polynomial 5 | % fit with power p and feature normalization (mu, sigma). 6 | 7 | % Hold on to the current figure 8 | hold on; 9 | 10 | % We plot a range slightly bigger than the min and max values to get 11 | % an idea of how the fit will vary outside the range of the data points 12 | x = (min_x - 15: 0.05 : max_x + 25)'; 13 | 14 | % Map the X values 15 | X_poly = polyFeatures(x, p); 16 | X_poly = bsxfun(@minus, X_poly, mu); 17 | X_poly = bsxfun(@rdivide, X_poly, sigma); 18 | 19 | % Add ones 20 | X_poly = [ones(size(x, 1), 1) X_poly]; 21 | 22 | % Plot 23 | plot(x, X_poly * theta, '--', 'LineWidth', 2) 24 | 25 | % Hold off to the current figure 26 | hold off 27 | 28 | end 29 | -------------------------------------------------------------------------------- /Exercise 5/ex5/polyFeatures.m: -------------------------------------------------------------------------------- 1 | function [X_poly] = polyFeatures(X, p) 2 | %POLYFEATURES Maps X (1D vector) into the p-th power 3 | % [X_poly] = POLYFEATURES(X, p) takes a data matrix X (size m x 1) and 4 | % maps each example into its polynomial features where 5 | % X_poly(i, :) = [X(i) X(i).^2 X(i).^3 ... X(i).^p]; 6 | % 7 | 8 | 9 | % You need to return the following variables correctly. 10 | X_poly = zeros(numel(X), p); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Given a vector X, return a matrix X_poly where the p-th 14 | % column of X contains the values of X to the p-th power. 15 | % 16 | % 17 | 18 | m = length(X); 19 | powers = repmat([1:p], m, 1); 20 | Xrepeated = repmat(X, 1, p); 21 | X_poly = Xrepeated .^ powers; 22 | 23 | % ========================================================================= 24 | 25 | end 26 | -------------------------------------------------------------------------------- /Exercise 5/ex5/submitWeb.m: -------------------------------------------------------------------------------- 1 | % submitWeb Creates files from your code and output for web submission. 2 | % 3 | % If the submit function does not work for you, use the web-submission mechanism. 4 | % Call this function to produce a file for the part you wish to submit. Then, 5 | % submit the file to the class servers using the "Web Submission" button on the 6 | % Programming Exercises page on the course website. 7 | % 8 | % You should call this function without arguments (submitWeb), to receive 9 | % an interactive prompt for submission; optionally you can call it with the partID 10 | % if you so wish. Make sure your working directory is set to the directory 11 | % containing the submitWeb.m file and your assignment files. 12 | 13 | function submitWeb(partId) 14 | if ~exist('partId', 'var') || isempty(partId) 15 | partId = []; 16 | end 17 | 18 | submit(partId, 1); 19 | end 20 | 21 | -------------------------------------------------------------------------------- /Exercise 5/ex5/trainLinearReg.m: -------------------------------------------------------------------------------- 1 | function [theta] = trainLinearReg(X, y, lambda) 2 | %TRAINLINEARREG Trains linear regression given a dataset (X, y) and a 3 | %regularization parameter lambda 4 | % [theta] = TRAINLINEARREG (X, y, lambda) trains linear regression using 5 | % the dataset (X, y) and regularization parameter lambda. Returns the 6 | % trained parameters theta. 7 | % 8 | 9 | % Initialize Theta 10 | initial_theta = zeros(size(X, 2), 1); 11 | 12 | % Create "short hand" for the cost function to be minimized 13 | costFunction = @(t) linearRegCostFunction(X, y, t, lambda); 14 | 15 | % Now, costFunction is a function that takes in only one argument 16 | options = optimset('MaxIter', 200, 'GradObj', 'on'); 17 | 18 | % Minimize using fmincg 19 | theta = fmincg(costFunction, initial_theta, options); 20 | 21 | end 22 | -------------------------------------------------------------------------------- /Exercise 5/ex5/validationCurve.m: -------------------------------------------------------------------------------- 1 | function [lambda_vec, error_train, error_val] = ... 2 | validationCurve(X, y, Xval, yval) 3 | %VALIDATIONCURVE Generate the train and validation errors needed to 4 | %plot a validation curve that we can use to select lambda 5 | % [lambda_vec, error_train, error_val] = ... 6 | % VALIDATIONCURVE(X, y, Xval, yval) returns the train 7 | % and validation errors (in error_train, error_val) 8 | % for different values of lambda. You are given the training set (X, 9 | % y) and validation set (Xval, yval). 10 | % 11 | 12 | % Selected values of lambda (you should not change this) 13 | lambda_vec = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3 10]'; 14 | 15 | % You need to return these variables correctly. 16 | error_train = zeros(length(lambda_vec), 1); 17 | error_val = zeros(length(lambda_vec), 1); 18 | 19 | % ====================== YOUR CODE HERE ====================== 20 | % Instructions: Fill in this function to return training errors in 21 | % error_train and the validation errors in error_val. The 22 | % vector lambda_vec contains the different lambda parameters 23 | % to use for each calculation of the errors, i.e, 24 | % error_train(i), and error_val(i) should give 25 | % you the errors obtained after training with 26 | % lambda = lambda_vec(i) 27 | % 28 | % Note: You can loop over lambda_vec with the following: 29 | % 30 | % for i = 1:length(lambda_vec) 31 | % lambda = lambda_vec(i); 32 | % % Compute train / val errors when training linear 33 | % % regression with regularization parameter lambda 34 | % % You should store the result in error_train(i) 35 | % % and error_val(i) 36 | % .... 37 | % 38 | % end 39 | % 40 | % 41 | 42 | for i = 1:length(lambda_vec), 43 | lambda = lambda_vec(i); 44 | theta = trainLinearReg(X, y, lambda); 45 | error_train(i) = linearRegCostFunction(X, y, theta, 0); 46 | error_val(i) = linearRegCostFunction(Xval, yval, theta, 0); 47 | end; 48 | 49 | % ========================================================================= 50 | 51 | end 52 | -------------------------------------------------------------------------------- /Exercise 6/ex6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 6/ex6.pdf -------------------------------------------------------------------------------- /Exercise 6/ex6/dataset3Params.m: -------------------------------------------------------------------------------- 1 | function [C, sigma] = dataset3Params(X, y, Xval, yval) 2 | %EX6PARAMS returns your choice of C and sigma for Part 3 of the exercise 3 | %where you select the optimal (C, sigma) learning parameters to use for SVM 4 | %with RBF kernel 5 | % [C, sigma] = EX6PARAMS(X, y, Xval, yval) returns your choice of C and 6 | % sigma. You should complete this function to return the optimal C and 7 | % sigma based on a cross-validation set. 8 | % 9 | 10 | % You need to return the following variables correctly. 11 | C = 1; 12 | sigma = 0.3; 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Fill in this function to return the optimal C and sigma 16 | % learning parameters found using the cross validation set. 17 | % You can use svmPredict to predict the labels on the cross 18 | % validation set. For example, 19 | % predictions = svmPredict(model, Xval); 20 | % will return the predictions on the cross validation set. 21 | % 22 | % Note: You can compute the prediction error using 23 | % mean(double(predictions ~= yval)) 24 | % 25 | 26 | steps = [ 0.01 0.03 0.1 0.3 1 3 10 30 ]; 27 | minError = Inf; 28 | minC = Inf; 29 | minSigma = Inf; 30 | 31 | for i = 1:length(steps) 32 | for j = 1:length(steps) 33 | currentC = steps(i); 34 | currentSigma = steps(j); 35 | model = svmTrain(X, y, currentC, @(x1, x2) gaussianKernel(x1, x2, currentSigma)); 36 | predictions = svmPredict(model, Xval); 37 | error = mean(double(predictions ~= yval)); 38 | 39 | if error < minError 40 | minError = error; 41 | minC = currentC; 42 | minSigma = currentSigma; 43 | end 44 | end 45 | end 46 | 47 | C = minC; 48 | sigma = minSigma; 49 | 50 | % ========================================================================= 51 | 52 | end 53 | -------------------------------------------------------------------------------- /Exercise 6/ex6/emailFeatures.m: -------------------------------------------------------------------------------- 1 | function x = emailFeatures(word_indices) 2 | %EMAILFEATURES takes in a word_indices vector and produces a feature vector 3 | %from the word indices 4 | % x = EMAILFEATURES(word_indices) takes in a word_indices vector and 5 | % produces a feature vector from the word indices. 6 | 7 | % Total number of words in the dictionary 8 | n = 1899; 9 | 10 | % You need to return the following variables correctly. 11 | x = zeros(n, 1); 12 | 13 | % ====================== YOUR CODE HERE ====================== 14 | % Instructions: Fill in this function to return a feature vector for the 15 | % given email (word_indices). To help make it easier to 16 | % process the emails, we have have already pre-processed each 17 | % email and converted each word in the email into an index in 18 | % a fixed dictionary (of 1899 words). The variable 19 | % word_indices contains the list of indices of the words 20 | % which occur in one email. 21 | % 22 | % Concretely, if an email has the text: 23 | % 24 | % The quick brown fox jumped over the lazy dog. 25 | % 26 | % Then, the word_indices vector for this text might look 27 | % like: 28 | % 29 | % 60 100 33 44 10 53 60 58 5 30 | % 31 | % where, we have mapped each word onto a number, for example: 32 | % 33 | % the -- 60 34 | % quick -- 100 35 | % ... 36 | % 37 | % (note: the above numbers are just an example and are not the 38 | % actual mappings). 39 | % 40 | % Your task is take one such word_indices vector and construct 41 | % a binary feature vector that indicates whether a particular 42 | % word occurs in the email. That is, x(i) = 1 when word i 43 | % is present in the email. Concretely, if the word 'the' (say, 44 | % index 60) appears in the email, then x(60) = 1. The feature 45 | % vector should look like: 46 | % 47 | % x = [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..]; 48 | % 49 | % 50 | 51 | for i = 1:length(word_indices) 52 | x(word_indices(i)) = 1; 53 | end 54 | 55 | % ========================================================================= 56 | 57 | 58 | end 59 | -------------------------------------------------------------------------------- /Exercise 6/ex6/emailSample1.txt: -------------------------------------------------------------------------------- 1 | > Anyone knows how much it costs to host a web portal ? 2 | > 3 | Well, it depends on how many visitors you're expecting. 4 | This can be anywhere from less than 10 bucks a month to a couple of $100. 5 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 6 | if youre running something big.. 7 | 8 | To unsubscribe yourself from this mailing list, send an email to: 9 | groupname-unsubscribe@egroups.com 10 | 11 | -------------------------------------------------------------------------------- /Exercise 6/ex6/emailSample2.txt: -------------------------------------------------------------------------------- 1 | Folks, 2 | 3 | my first time posting - have a bit of Unix experience, but am new to Linux. 4 | 5 | 6 | Just got a new PC at home - Dell box with Windows XP. Added a second hard disk 7 | for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went 8 | fine except it didn't pick up my monitor. 9 | 10 | I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4 11 | Ti4200 video card, both of which are probably too new to feature in Suse's default 12 | set. I downloaded a driver from the nVidia website and installed it using RPM. 13 | Then I ran Sax2 (as was recommended in some postings I found on the net), but 14 | it still doesn't feature my video card in the available list. What next? 15 | 16 | Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice, 17 | the whole machine crashes (in Linux, not Windows) - even the on/off switch is 18 | inactive, leaving me to reach for the power cable instead. 19 | 20 | If anyone can help me in any way with these probs., I'd be really grateful - 21 | I've searched the 'net but have run out of ideas. 22 | 23 | Or should I be going for a different version of Linux such as RedHat? Opinions 24 | welcome. 25 | 26 | Thanks a lot, 27 | Peter 28 | 29 | -- 30 | Irish Linux Users' Group: ilug@linux.ie 31 | http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information. 32 | List maintainer: listmaster@linux.ie 33 | 34 | 35 | -------------------------------------------------------------------------------- /Exercise 6/ex6/ex6.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 6 | Support Vector Machines 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % gaussianKernel.m 11 | % dataset3Params.m 12 | % processEmail.m 13 | % emailFeatures.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% =============== Part 1: Loading and Visualizing Data ================ 23 | % We start the exercise by first loading and visualizing the dataset. 24 | % The following code will load the dataset into your environment and plot 25 | % the data. 26 | % 27 | 28 | fprintf('Loading and Visualizing Data ...\n') 29 | 30 | % Load from ex6data1: 31 | % You will have X, y in your environment 32 | load('ex6data1.mat'); 33 | 34 | % Plot training data 35 | plotData(X, y); 36 | 37 | fprintf('Program paused. Press enter to continue.\n'); 38 | pause; 39 | 40 | %% ==================== Part 2: Training Linear SVM ==================== 41 | % The following code will train a linear SVM on the dataset and plot the 42 | % decision boundary learned. 43 | % 44 | 45 | % Load from ex6data1: 46 | % You will have X, y in your environment 47 | load('ex6data1.mat'); 48 | 49 | fprintf('\nTraining Linear SVM ...\n') 50 | 51 | % You should try to change the C value below and see how the decision 52 | % boundary varies (e.g., try C = 1000) 53 | C = 1; 54 | model = svmTrain(X, y, C, @linearKernel, 1e-3, 20); 55 | visualizeBoundaryLinear(X, y, model); 56 | 57 | fprintf('Program paused. Press enter to continue.\n'); 58 | pause; 59 | 60 | %% =============== Part 3: Implementing Gaussian Kernel =============== 61 | % You will now implement the Gaussian kernel to use 62 | % with the SVM. You should complete the code in gaussianKernel.m 63 | % 64 | fprintf('\nEvaluating the Gaussian Kernel ...\n') 65 | 66 | x1 = [1 2 1]; x2 = [0 4 -1]; sigma = 2; 67 | sim = gaussianKernel(x1, x2, sigma); 68 | 69 | fprintf(['Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 0.5 :' ... 70 | '\n\t%f\n(this value should be about 0.324652)\n'], sim); 71 | 72 | fprintf('Program paused. Press enter to continue.\n'); 73 | pause; 74 | 75 | %% =============== Part 4: Visualizing Dataset 2 ================ 76 | % The following code will load the next dataset into your environment and 77 | % plot the data. 78 | % 79 | 80 | fprintf('Loading and Visualizing Data ...\n') 81 | 82 | % Load from ex6data2: 83 | % You will have X, y in your environment 84 | load('ex6data2.mat'); 85 | 86 | % Plot training data 87 | plotData(X, y); 88 | 89 | fprintf('Program paused. Press enter to continue.\n'); 90 | pause; 91 | 92 | %% ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ========== 93 | % After you have implemented the kernel, we can now use it to train the 94 | % SVM classifier. 95 | % 96 | fprintf('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n'); 97 | 98 | % Load from ex6data2: 99 | % You will have X, y in your environment 100 | load('ex6data2.mat'); 101 | 102 | % SVM Parameters 103 | C = 1; sigma = 0.1; 104 | 105 | % We set the tolerance and max_passes lower here so that the code will run 106 | % faster. However, in practice, you will want to run the training to 107 | % convergence. 108 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 109 | visualizeBoundary(X, y, model); 110 | 111 | fprintf('Program paused. Press enter to continue.\n'); 112 | pause; 113 | 114 | %% =============== Part 6: Visualizing Dataset 3 ================ 115 | % The following code will load the next dataset into your environment and 116 | % plot the data. 117 | % 118 | 119 | fprintf('Loading and Visualizing Data ...\n') 120 | 121 | % Load from ex6data3: 122 | % You will have X, y in your environment 123 | load('ex6data3.mat'); 124 | 125 | % Plot training data 126 | plotData(X, y); 127 | 128 | fprintf('Program paused. Press enter to continue.\n'); 129 | pause; 130 | 131 | %% ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ========== 132 | 133 | % This is a different dataset that you can use to experiment with. Try 134 | % different values of C and sigma here. 135 | % 136 | 137 | % Load from ex6data3: 138 | % You will have X, y in your environment 139 | load('ex6data3.mat'); 140 | 141 | % Try different SVM Parameters here 142 | [C, sigma] = dataset3Params(X, y, Xval, yval); 143 | 144 | % Train the SVM 145 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 146 | visualizeBoundary(X, y, model); 147 | 148 | fprintf('Program paused. Press enter to continue.\n'); 149 | pause; 150 | 151 | -------------------------------------------------------------------------------- /Exercise 6/ex6/ex6_spam.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 6 | Spam Classification with SVMs 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % gaussianKernel.m 11 | % dataset3Params.m 12 | % processEmail.m 13 | % emailFeatures.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% ==================== Part 1: Email Preprocessing ==================== 23 | % To use an SVM to classify emails into Spam v.s. Non-Spam, you first need 24 | % to convert each email into a vector of features. In this part, you will 25 | % implement the preprocessing steps for each email. You should 26 | % complete the code in processEmail.m to produce a word indices vector 27 | % for a given email. 28 | 29 | fprintf('\nPreprocessing sample email (emailSample1.txt)\n'); 30 | 31 | % Extract Features 32 | file_contents = readFile('emailSample1.txt'); 33 | word_indices = processEmail(file_contents); 34 | 35 | % Print Stats 36 | fprintf('Word Indices: \n'); 37 | fprintf(' %d', word_indices); 38 | fprintf('\n\n'); 39 | 40 | fprintf('Program paused. Press enter to continue.\n'); 41 | pause; 42 | 43 | %% ==================== Part 2: Feature Extraction ==================== 44 | % Now, you will convert each email into a vector of features in R^n. 45 | % You should complete the code in emailFeatures.m to produce a feature 46 | % vector for a given email. 47 | 48 | fprintf('\nExtracting features from sample email (emailSample1.txt)\n'); 49 | 50 | % Extract Features 51 | file_contents = readFile('emailSample1.txt'); 52 | word_indices = processEmail(file_contents); 53 | features = emailFeatures(word_indices); 54 | 55 | % Print Stats 56 | fprintf('Length of feature vector: %d\n', length(features)); 57 | fprintf('Number of non-zero entries: %d\n', sum(features > 0)); 58 | 59 | fprintf('Program paused. Press enter to continue.\n'); 60 | pause; 61 | 62 | %% =========== Part 3: Train Linear SVM for Spam Classification ======== 63 | % In this section, you will train a linear classifier to determine if an 64 | % email is Spam or Not-Spam. 65 | 66 | % Load the Spam Email dataset 67 | % You will have X, y in your environment 68 | load('spamTrain.mat'); 69 | 70 | fprintf('\nTraining Linear SVM (Spam Classification)\n') 71 | fprintf('(this may take 1 to 2 minutes) ...\n') 72 | 73 | C = 0.1; 74 | model = svmTrain(X, y, C, @linearKernel); 75 | 76 | p = svmPredict(model, X); 77 | 78 | fprintf('Training Accuracy: %f\n', mean(double(p == y)) * 100); 79 | 80 | %% =================== Part 4: Test Spam Classification ================ 81 | % After training the classifier, we can evaluate it on a test set. We have 82 | % included a test set in spamTest.mat 83 | 84 | % Load the test dataset 85 | % You will have Xtest, ytest in your environment 86 | load('spamTest.mat'); 87 | 88 | fprintf('\nEvaluating the trained Linear SVM on a test set ...\n') 89 | 90 | p = svmPredict(model, Xtest); 91 | 92 | fprintf('Test Accuracy: %f\n', mean(double(p == ytest)) * 100); 93 | pause; 94 | 95 | 96 | %% ================= Part 5: Top Predictors of Spam ==================== 97 | % Since the model we are training is a linear SVM, we can inspect the 98 | % weights learned by the model to understand better how it is determining 99 | % whether an email is spam or not. The following code finds the words with 100 | % the highest weights in the classifier. Informally, the classifier 101 | % 'thinks' that these words are the most likely indicators of spam. 102 | % 103 | 104 | % Sort the weights and obtin the vocabulary list 105 | [weight, idx] = sort(model.w, 'descend'); 106 | vocabList = getVocabList(); 107 | 108 | fprintf('\nTop predictors of spam: \n'); 109 | for i = 1:15 110 | fprintf(' %-15s (%f) \n', vocabList{idx(i)}, weight(i)); 111 | end 112 | 113 | fprintf('\n\n'); 114 | fprintf('\nProgram paused. Press enter to continue.\n'); 115 | pause; 116 | 117 | %% =================== Part 6: Try Your Own Emails ===================== 118 | % Now that you've trained the spam classifier, you can use it on your own 119 | % emails! In the starter code, we have included spamSample1.txt, 120 | % spamSample2.txt, emailSample1.txt and emailSample2.txt as examples. 121 | % The following code reads in one of these emails and then uses your 122 | % learned SVM classifier to determine whether the email is Spam or 123 | % Not Spam 124 | 125 | % Set the file to be read in (change this to spamSample2.txt, 126 | % emailSample1.txt or emailSample2.txt to see different predictions on 127 | % different emails types). Try your own emails as well! 128 | filename = 'spamSample1.txt'; 129 | 130 | % Read and predict 131 | file_contents = readFile(filename); 132 | word_indices = processEmail(file_contents); 133 | x = emailFeatures(word_indices); 134 | p = svmPredict(model, x); 135 | 136 | fprintf('\nProcessed %s\n\nSpam Classification: %d\n', filename, p); 137 | fprintf('(1 indicates spam, 0 indicates not spam)\n\n'); 138 | 139 | -------------------------------------------------------------------------------- /Exercise 6/ex6/ex6data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 6/ex6/ex6data1.mat -------------------------------------------------------------------------------- /Exercise 6/ex6/ex6data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 6/ex6/ex6data2.mat -------------------------------------------------------------------------------- /Exercise 6/ex6/ex6data3.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 6/ex6/ex6data3.mat -------------------------------------------------------------------------------- /Exercise 6/ex6/gaussianKernel.m: -------------------------------------------------------------------------------- 1 | function sim = gaussianKernel(x1, x2, sigma) 2 | %RBFKERNEL returns a radial basis function kernel between x1 and x2 3 | % sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2 4 | % and returns the value in sim 5 | 6 | % Ensure that x1 and x2 are column vectors 7 | x1 = x1(:); x2 = x2(:); 8 | 9 | % You need to return the following variables correctly. 10 | % sim = 0; 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Fill in this function to return the similarity between x1 14 | % and x2 computed using a Gaussian kernel with bandwidth 15 | % sigma 16 | % 17 | % 18 | 19 | sim = exp(-(norm(x1 - x2) ^ 2) / (2 * (sigma ^ 2))); 20 | 21 | % ============================================================= 22 | 23 | end 24 | -------------------------------------------------------------------------------- /Exercise 6/ex6/getVocabList.m: -------------------------------------------------------------------------------- 1 | function vocabList = getVocabList() 2 | %GETVOCABLIST reads the fixed vocabulary list in vocab.txt and returns a 3 | %cell array of the words 4 | % vocabList = GETVOCABLIST() reads the fixed vocabulary list in vocab.txt 5 | % and returns a cell array of the words in vocabList. 6 | 7 | 8 | %% Read the fixed vocabulary list 9 | fid = fopen('vocab.txt'); 10 | 11 | % Store all dictionary words in cell array vocab{} 12 | n = 1899; % Total number of words in the dictionary 13 | 14 | % For ease of implementation, we use a struct to map the strings => integers 15 | % In practice, you'll want to use some form of hashmap 16 | vocabList = cell(n, 1); 17 | for i = 1:n 18 | % Word Index (can ignore since it will be = i) 19 | fscanf(fid, '%d', 1); 20 | % Actual Word 21 | vocabList{i} = fscanf(fid, '%s', 1); 22 | end 23 | fclose(fid); 24 | 25 | end 26 | -------------------------------------------------------------------------------- /Exercise 6/ex6/linearKernel.m: -------------------------------------------------------------------------------- 1 | function sim = linearKernel(x1, x2) 2 | %LINEARKERNEL returns a linear kernel between x1 and x2 3 | % sim = linearKernel(x1, x2) returns a linear kernel between x1 and x2 4 | % and returns the value in sim 5 | 6 | % Ensure that x1 and x2 are column vectors 7 | x1 = x1(:); x2 = x2(:); 8 | 9 | % Compute the kernel 10 | sim = x1' * x2; % dot product 11 | 12 | end -------------------------------------------------------------------------------- /Exercise 6/ex6/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(X, y) 2 | %PLOTDATA Plots the data points X and y into a new figure 3 | % PLOTDATA(x,y) plots the data points with + for the positive examples 4 | % and o for the negative examples. X is assumed to be a Mx2 matrix. 5 | % 6 | % Note: This was slightly modified such that it expects y = 1 or y = 0 7 | 8 | % Find Indices of Positive and Negative Examples 9 | pos = find(y == 1); neg = find(y == 0); 10 | 11 | % Plot Examples 12 | plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 1, 'MarkerSize', 7) 13 | hold on; 14 | plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7) 15 | hold off; 16 | 17 | end 18 | -------------------------------------------------------------------------------- /Exercise 6/ex6/processEmail.m: -------------------------------------------------------------------------------- 1 | function word_indices = processEmail(email_contents) 2 | %PROCESSEMAIL preprocesses a the body of an email and 3 | %returns a list of word_indices 4 | % word_indices = PROCESSEMAIL(email_contents) preprocesses 5 | % the body of an email and returns a list of indices of the 6 | % words contained in the email. 7 | % 8 | 9 | % Load Vocabulary 10 | vocabList = getVocabList(); 11 | 12 | % Init return value 13 | word_indices = []; 14 | 15 | % ========================== Preprocess Email =========================== 16 | 17 | % Find the Headers ( \n\n and remove ) 18 | % Uncomment the following lines if you are working with raw emails with the 19 | % full headers 20 | 21 | % hdrstart = strfind(email_contents, ([char(10) char(10)])); 22 | % email_contents = email_contents(hdrstart(1):end); 23 | 24 | % Lower case 25 | email_contents = lower(email_contents); 26 | 27 | % Strip all HTML 28 | % Looks for any expression that starts with < and ends with > and replace 29 | % and does not have any < or > in the tag it with a space 30 | email_contents = regexprep(email_contents, '<[^<>]+>', ' '); 31 | 32 | % Handle Numbers 33 | % Look for one or more characters between 0-9 34 | email_contents = regexprep(email_contents, '[0-9]+', 'number'); 35 | 36 | % Handle URLS 37 | % Look for strings starting with http:// or https:// 38 | email_contents = regexprep(email_contents, ... 39 | '(http|https)://[^\s]*', 'httpaddr'); 40 | 41 | % Handle Email Addresses 42 | % Look for strings with @ in the middle 43 | email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr'); 44 | 45 | % Handle $ sign 46 | email_contents = regexprep(email_contents, '[$]+', 'dollar'); 47 | 48 | 49 | % ========================== Tokenize Email =========================== 50 | 51 | % Output the email to screen as well 52 | fprintf('\n==== Processed Email ====\n\n'); 53 | 54 | % Process file 55 | l = 0; 56 | 57 | while ~isempty(email_contents) 58 | 59 | % Tokenize and also get rid of any punctuation 60 | [str, email_contents] = ... 61 | strtok(email_contents, ... 62 | [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]); 63 | 64 | % Remove any non alphanumeric characters 65 | str = regexprep(str, '[^a-zA-Z0-9]', ''); 66 | 67 | % Stem the word 68 | % (the porterStemmer sometimes has issues, so we use a try catch block) 69 | try str = porterStemmer(strtrim(str)); 70 | catch str = ''; continue; 71 | end; 72 | 73 | % Skip the word if it is too short 74 | if length(str) < 1 75 | continue; 76 | end 77 | 78 | % Look up the word in the dictionary and add to word_indices if 79 | % found 80 | % ====================== YOUR CODE HERE ====================== 81 | % Instructions: Fill in this function to add the index of str to 82 | % word_indices if it is in the vocabulary. At this point 83 | % of the code, you have a stemmed word from the email in 84 | % the variable str. You should look up str in the 85 | % vocabulary list (vocabList). If a match exists, you 86 | % should add the index of the word to the word_indices 87 | % vector. Concretely, if str = 'action', then you should 88 | % look up the vocabulary list to find where in vocabList 89 | % 'action' appears. For example, if vocabList{18} = 90 | % 'action', then, you should add 18 to the word_indices 91 | % vector (e.g., word_indices = [word_indices ; 18]; ). 92 | % 93 | % Note: vocabList{idx} returns a the word with index idx in the 94 | % vocabulary list. 95 | % 96 | % Note: You can use strcmp(str1, str2) to compare two strings (str1 and 97 | % str2). It will return 1 only if the two strings are equivalent. 98 | % 99 | 100 | 101 | for i = 1:length(vocabList) 102 | if strcmp(vocabList{i}, str) 103 | word_indices = [word_indices; i]; 104 | break; 105 | end 106 | end 107 | 108 | % ============================================================= 109 | 110 | 111 | % Print to screen, ensuring that the output lines are not too long 112 | if (l + length(str) + 1) > 78 113 | fprintf('\n'); 114 | l = 0; 115 | end 116 | fprintf('%s ', str); 117 | l = l + length(str) + 1; 118 | 119 | end 120 | 121 | % Print footer 122 | fprintf('\n\n=========================\n'); 123 | 124 | end 125 | -------------------------------------------------------------------------------- /Exercise 6/ex6/readFile.m: -------------------------------------------------------------------------------- 1 | function file_contents = readFile(filename) 2 | %READFILE reads a file and returns its entire contents 3 | % file_contents = READFILE(filename) reads a file and returns its entire 4 | % contents in file_contents 5 | % 6 | 7 | % Load File 8 | fid = fopen(filename); 9 | if fid 10 | file_contents = fscanf(fid, '%c', inf); 11 | fclose(fid); 12 | else 13 | file_contents = ''; 14 | fprintf('Unable to open %s\n', filename); 15 | end 16 | 17 | end 18 | 19 | -------------------------------------------------------------------------------- /Exercise 6/ex6/spamSample1.txt: -------------------------------------------------------------------------------- 1 | Do You Want To Make $1000 Or More Per Week? 2 | 3 | 4 | 5 | If you are a motivated and qualified individual - I 6 | will personally demonstrate to you a system that will 7 | make you $1,000 per week or more! This is NOT mlm. 8 | 9 | 10 | 11 | Call our 24 hour pre-recorded number to get the 12 | details. 13 | 14 | 15 | 16 | 000-456-789 17 | 18 | 19 | 20 | I need people who want to make serious money. Make 21 | the call and get the facts. 22 | 23 | Invest 2 minutes in yourself now! 24 | 25 | 26 | 27 | 000-456-789 28 | 29 | 30 | 31 | Looking forward to your call and I will introduce you 32 | to people like yourself who 33 | are currently making $10,000 plus per week! 34 | 35 | 36 | 37 | 000-456-789 38 | 39 | 40 | 41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72 42 | 43 | -------------------------------------------------------------------------------- /Exercise 6/ex6/spamSample2.txt: -------------------------------------------------------------------------------- 1 | Best Buy Viagra Generic Online 2 | 3 | Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed! 4 | 5 | We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers! 6 | http://medphysitcstech.ru 7 | 8 | 9 | -------------------------------------------------------------------------------- /Exercise 6/ex6/spamTest.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 6/ex6/spamTest.mat -------------------------------------------------------------------------------- /Exercise 6/ex6/spamTrain.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 6/ex6/spamTrain.mat -------------------------------------------------------------------------------- /Exercise 6/ex6/submitWeb.m: -------------------------------------------------------------------------------- 1 | % submitWeb Creates files from your code and output for web submission. 2 | % 3 | % If the submit function does not work for you, use the web-submission mechanism. 4 | % Call this function to produce a file for the part you wish to submit. Then, 5 | % submit the file to the class servers using the "Web Submission" button on the 6 | % Programming Exercises page on the course website. 7 | % 8 | % You should call this function without arguments (submitWeb), to receive 9 | % an interactive prompt for submission; optionally you can call it with the partID 10 | % if you so wish. Make sure your working directory is set to the directory 11 | % containing the submitWeb.m file and your assignment files. 12 | 13 | function submitWeb(partId) 14 | if ~exist('partId', 'var') || isempty(partId) 15 | partId = []; 16 | end 17 | 18 | submit(partId, 1); 19 | end 20 | 21 | -------------------------------------------------------------------------------- /Exercise 6/ex6/svmPredict.m: -------------------------------------------------------------------------------- 1 | function pred = svmPredict(model, X) 2 | %SVMPREDICT returns a vector of predictions using a trained SVM model 3 | %(svmTrain). 4 | % pred = SVMPREDICT(model, X) returns a vector of predictions using a 5 | % trained SVM model (svmTrain). X is a mxn matrix where there each 6 | % example is a row. model is a svm model returned from svmTrain. 7 | % predictions pred is a m x 1 column of predictions of {0, 1} values. 8 | % 9 | 10 | % Check if we are getting a column vector, if so, then assume that we only 11 | % need to do prediction for a single example 12 | if (size(X, 2) == 1) 13 | % Examples should be in rows 14 | X = X'; 15 | end 16 | 17 | % Dataset 18 | m = size(X, 1); 19 | p = zeros(m, 1); 20 | pred = zeros(m, 1); 21 | 22 | if strcmp(func2str(model.kernelFunction), 'linearKernel') 23 | % We can use the weights and bias directly if working with the 24 | % linear kernel 25 | p = X * model.w + model.b; 26 | elseif strfind(func2str(model.kernelFunction), 'gaussianKernel') 27 | % Vectorized RBF Kernel 28 | % This is equivalent to computing the kernel on every pair of examples 29 | X1 = sum(X.^2, 2); 30 | X2 = sum(model.X.^2, 2)'; 31 | K = bsxfun(@plus, X1, bsxfun(@plus, X2, - 2 * X * model.X')); 32 | K = model.kernelFunction(1, 0) .^ K; 33 | K = bsxfun(@times, model.y', K); 34 | K = bsxfun(@times, model.alphas', K); 35 | p = sum(K, 2); 36 | else 37 | % Other Non-linear kernel 38 | for i = 1:m 39 | prediction = 0; 40 | for j = 1:size(model.X, 1) 41 | prediction = prediction + ... 42 | model.alphas(j) * model.y(j) * ... 43 | model.kernelFunction(X(i,:)', model.X(j,:)'); 44 | end 45 | p(i) = prediction + model.b; 46 | end 47 | end 48 | 49 | % Convert predictions into 0 / 1 50 | pred(p >= 0) = 1; 51 | pred(p < 0) = 0; 52 | 53 | end 54 | 55 | -------------------------------------------------------------------------------- /Exercise 6/ex6/svmTrain.m: -------------------------------------------------------------------------------- 1 | function [model] = svmTrain(X, Y, C, kernelFunction, ... 2 | tol, max_passes) 3 | %SVMTRAIN Trains an SVM classifier using a simplified version of the SMO 4 | %algorithm. 5 | % [model] = SVMTRAIN(X, Y, C, kernelFunction, tol, max_passes) trains an 6 | % SVM classifier and returns trained model. X is the matrix of training 7 | % examples. Each row is a training example, and the jth column holds the 8 | % jth feature. Y is a column matrix containing 1 for positive examples 9 | % and 0 for negative examples. C is the standard SVM regularization 10 | % parameter. tol is a tolerance value used for determining equality of 11 | % floating point numbers. max_passes controls the number of iterations 12 | % over the dataset (without changes to alpha) before the algorithm quits. 13 | % 14 | % Note: This is a simplified version of the SMO algorithm for training 15 | % SVMs. In practice, if you want to train an SVM classifier, we 16 | % recommend using an optimized package such as: 17 | % 18 | % LIBSVM (http://www.csie.ntu.edu.tw/~cjlin/libsvm/) 19 | % SVMLight (http://svmlight.joachims.org/) 20 | % 21 | % 22 | 23 | if ~exist('tol', 'var') || isempty(tol) 24 | tol = 1e-3; 25 | end 26 | 27 | if ~exist('max_passes', 'var') || isempty(max_passes) 28 | max_passes = 5; 29 | end 30 | 31 | % Data parameters 32 | m = size(X, 1); 33 | n = size(X, 2); 34 | 35 | % Map 0 to -1 36 | Y(Y==0) = -1; 37 | 38 | % Variables 39 | alphas = zeros(m, 1); 40 | b = 0; 41 | E = zeros(m, 1); 42 | passes = 0; 43 | eta = 0; 44 | L = 0; 45 | H = 0; 46 | 47 | % Pre-compute the Kernel Matrix since our dataset is small 48 | % (in practice, optimized SVM packages that handle large datasets 49 | % gracefully will _not_ do this) 50 | % 51 | % We have implemented optimized vectorized version of the Kernels here so 52 | % that the svm training will run faster. 53 | if strcmp(func2str(kernelFunction), 'linearKernel') 54 | % Vectorized computation for the Linear Kernel 55 | % This is equivalent to computing the kernel on every pair of examples 56 | K = X*X'; 57 | elseif strfind(func2str(kernelFunction), 'gaussianKernel') 58 | % Vectorized RBF Kernel 59 | % This is equivalent to computing the kernel on every pair of examples 60 | X2 = sum(X.^2, 2); 61 | K = bsxfun(@plus, X2, bsxfun(@plus, X2', - 2 * (X * X'))); 62 | K = kernelFunction(1, 0) .^ K; 63 | else 64 | % Pre-compute the Kernel Matrix 65 | % The following can be slow due to the lack of vectorization 66 | K = zeros(m); 67 | for i = 1:m 68 | for j = i:m 69 | K(i,j) = kernelFunction(X(i,:)', X(j,:)'); 70 | K(j,i) = K(i,j); %the matrix is symmetric 71 | end 72 | end 73 | end 74 | 75 | % Train 76 | fprintf('\nTraining ...'); 77 | dots = 12; 78 | while passes < max_passes, 79 | 80 | num_changed_alphas = 0; 81 | for i = 1:m, 82 | 83 | % Calculate Ei = f(x(i)) - y(i) using (2). 84 | % E(i) = b + sum (X(i, :) * (repmat(alphas.*Y,1,n).*X)') - Y(i); 85 | E(i) = b + sum (alphas.*Y.*K(:,i)) - Y(i); 86 | 87 | if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0)), 88 | 89 | % In practice, there are many heuristics one can use to select 90 | % the i and j. In this simplified code, we select them randomly. 91 | j = ceil(m * rand()); 92 | while j == i, % Make sure i \neq j 93 | j = ceil(m * rand()); 94 | end 95 | 96 | % Calculate Ej = f(x(j)) - y(j) using (2). 97 | E(j) = b + sum (alphas.*Y.*K(:,j)) - Y(j); 98 | 99 | % Save old alphas 100 | alpha_i_old = alphas(i); 101 | alpha_j_old = alphas(j); 102 | 103 | % Compute L and H by (10) or (11). 104 | if (Y(i) == Y(j)), 105 | L = max(0, alphas(j) + alphas(i) - C); 106 | H = min(C, alphas(j) + alphas(i)); 107 | else 108 | L = max(0, alphas(j) - alphas(i)); 109 | H = min(C, C + alphas(j) - alphas(i)); 110 | end 111 | 112 | if (L == H), 113 | % continue to next i. 114 | continue; 115 | end 116 | 117 | % Compute eta by (14). 118 | eta = 2 * K(i,j) - K(i,i) - K(j,j); 119 | if (eta >= 0), 120 | % continue to next i. 121 | continue; 122 | end 123 | 124 | % Compute and clip new value for alpha j using (12) and (15). 125 | alphas(j) = alphas(j) - (Y(j) * (E(i) - E(j))) / eta; 126 | 127 | % Clip 128 | alphas(j) = min (H, alphas(j)); 129 | alphas(j) = max (L, alphas(j)); 130 | 131 | % Check if change in alpha is significant 132 | if (abs(alphas(j) - alpha_j_old) < tol), 133 | % continue to next i. 134 | % replace anyway 135 | alphas(j) = alpha_j_old; 136 | continue; 137 | end 138 | 139 | % Determine value for alpha i using (16). 140 | alphas(i) = alphas(i) + Y(i)*Y(j)*(alpha_j_old - alphas(j)); 141 | 142 | % Compute b1 and b2 using (17) and (18) respectively. 143 | b1 = b - E(i) ... 144 | - Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ... 145 | - Y(j) * (alphas(j) - alpha_j_old) * K(i,j)'; 146 | b2 = b - E(j) ... 147 | - Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ... 148 | - Y(j) * (alphas(j) - alpha_j_old) * K(j,j)'; 149 | 150 | % Compute b by (19). 151 | if (0 < alphas(i) && alphas(i) < C), 152 | b = b1; 153 | elseif (0 < alphas(j) && alphas(j) < C), 154 | b = b2; 155 | else 156 | b = (b1+b2)/2; 157 | end 158 | 159 | num_changed_alphas = num_changed_alphas + 1; 160 | 161 | end 162 | 163 | end 164 | 165 | if (num_changed_alphas == 0), 166 | passes = passes + 1; 167 | else 168 | passes = 0; 169 | end 170 | 171 | fprintf('.'); 172 | dots = dots + 1; 173 | if dots > 78 174 | dots = 0; 175 | fprintf('\n'); 176 | end 177 | if exist('OCTAVE_VERSION') 178 | fflush(stdout); 179 | end 180 | end 181 | fprintf(' Done! \n\n'); 182 | 183 | % Save the model 184 | idx = alphas > 0; 185 | model.X= X(idx,:); 186 | model.y= Y(idx); 187 | model.kernelFunction = kernelFunction; 188 | model.b= b; 189 | model.alphas= alphas(idx); 190 | model.w = ((alphas.*Y)'*X)'; 191 | 192 | end 193 | -------------------------------------------------------------------------------- /Exercise 6/ex6/visualizeBoundary.m: -------------------------------------------------------------------------------- 1 | function visualizeBoundary(X, y, model, varargin) 2 | %VISUALIZEBOUNDARY plots a non-linear decision boundary learned by the SVM 3 | % VISUALIZEBOUNDARYLINEAR(X, y, model) plots a non-linear decision 4 | % boundary learned by the SVM and overlays the data on it 5 | 6 | % Plot the training data on top of the boundary 7 | plotData(X, y) 8 | 9 | % Make classification predictions over a grid of values 10 | x1plot = linspace(min(X(:,1)), max(X(:,1)), 100)'; 11 | x2plot = linspace(min(X(:,2)), max(X(:,2)), 100)'; 12 | [X1, X2] = meshgrid(x1plot, x2plot); 13 | vals = zeros(size(X1)); 14 | for i = 1:size(X1, 2) 15 | this_X = [X1(:, i), X2(:, i)]; 16 | vals(:, i) = svmPredict(model, this_X); 17 | end 18 | 19 | % Plot the SVM boundary 20 | hold on 21 | contour(X1, X2, vals, [0 0], 'Color', 'b'); 22 | hold off; 23 | 24 | end 25 | -------------------------------------------------------------------------------- /Exercise 6/ex6/visualizeBoundaryLinear.m: -------------------------------------------------------------------------------- 1 | function visualizeBoundaryLinear(X, y, model) 2 | %VISUALIZEBOUNDARYLINEAR plots a linear decision boundary learned by the 3 | %SVM 4 | % VISUALIZEBOUNDARYLINEAR(X, y, model) plots a linear decision boundary 5 | % learned by the SVM and overlays the data on it 6 | 7 | w = model.w; 8 | b = model.b; 9 | xp = linspace(min(X(:,1)), max(X(:,1)), 100); 10 | yp = - (w(1)*xp + b)/w(2); 11 | plotData(X, y); 12 | hold on; 13 | plot(xp, yp, '-b'); 14 | hold off 15 | 16 | end 17 | -------------------------------------------------------------------------------- /Exercise 7/ex7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 7/ex7.pdf -------------------------------------------------------------------------------- /Exercise 7/ex7/bird_small.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 7/ex7/bird_small.mat -------------------------------------------------------------------------------- /Exercise 7/ex7/bird_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 7/ex7/bird_small.png -------------------------------------------------------------------------------- /Exercise 7/ex7/computeCentroids.m: -------------------------------------------------------------------------------- 1 | function centroids = computeCentroids(X, idx, K) 2 | %COMPUTECENTROIDS returs the new centroids by computing the means of the 3 | %data points assigned to each centroid. 4 | % centroids = COMPUTECENTROIDS(X, idx, K) returns the new centroids by 5 | % computing the means of the data points assigned to each centroid. It is 6 | % given a dataset X where each row is a single data point, a vector 7 | % idx of centroid assignments (i.e. each entry in range [1..K]) for each 8 | % example, and K, the number of centroids. You should return a matrix 9 | % centroids, where each row of centroids is the mean of the data points 10 | % assigned to it. 11 | % 12 | 13 | % Useful variables 14 | [m n] = size(X); 15 | 16 | % You need to return the following variables correctly. 17 | centroids = zeros(K, n); 18 | 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: Go over every centroid and compute mean of all points that 22 | % belong to it. Concretely, the row vector centroids(i, :) 23 | % should contain the mean of the data points assigned to 24 | % centroid i. 25 | % 26 | % Note: You can use a for-loop over the centroids to compute this. 27 | % 28 | 29 | for i = 1:K 30 | count = 0; 31 | for j = 1:m 32 | if idx(j) == i 33 | count = count + 1; 34 | centroids(i,:) = centroids(i,:) + X(j,:); 35 | end 36 | end 37 | 38 | centroids(i,:) = centroids(i,:) / count; 39 | end 40 | 41 | % ============================================================= 42 | 43 | end 44 | 45 | -------------------------------------------------------------------------------- /Exercise 7/ex7/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /Exercise 7/ex7/drawLine.m: -------------------------------------------------------------------------------- 1 | function drawLine(p1, p2, varargin) 2 | %DRAWLINE Draws a line from point p1 to point p2 3 | % DRAWLINE(p1, p2) Draws a line from point p1 to point p2 and holds the 4 | % current figure 5 | 6 | plot([p1(1) p2(1)], [p1(2) p2(2)], varargin{:}); 7 | 8 | end -------------------------------------------------------------------------------- /Exercise 7/ex7/ex7.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 7 | Principle Component Analysis and K-Means Clustering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % pca.m 11 | % projectData.m 12 | % recoverData.m 13 | % computeCentroids.m 14 | % findClosestCentroids.m 15 | % kMeansInitCentroids.m 16 | % 17 | % For this exercise, you will not need to change any code in this file, 18 | % or any other files other than those mentioned above. 19 | % 20 | 21 | %% Initialization 22 | clear ; close all; clc 23 | 24 | %% ================= Part 1: Find Closest Centroids ==================== 25 | % To help you implement K-Means, we have divided the learning algorithm 26 | % into two functions -- findClosestCentroids and computeCentroids. In this 27 | % part, you shoudl complete the code in the findClosestCentroids function. 28 | % 29 | fprintf('Finding closest centroids.\n\n'); 30 | 31 | % Load an example dataset that we will be using 32 | load('ex7data2.mat'); 33 | 34 | % Select an initial set of centroids 35 | K = 3; % 3 Centroids 36 | initial_centroids = [3 3; 6 2; 8 5]; 37 | 38 | % Find the closest centroids for the examples using the 39 | % initial_centroids 40 | idx = findClosestCentroids(X, initial_centroids); 41 | 42 | fprintf('Closest centroids for the first 3 examples: \n') 43 | fprintf(' %d', idx(1:3)); 44 | fprintf('\n(the closest centroids should be 1, 3, 2 respectively)\n'); 45 | 46 | fprintf('Program paused. Press enter to continue.\n'); 47 | pause; 48 | 49 | %% ===================== Part 2: Compute Means ========================= 50 | % After implementing the closest centroids function, you should now 51 | % complete the computeCentroids function. 52 | % 53 | fprintf('\nComputing centroids means.\n\n'); 54 | 55 | % Compute means based on the closest centroids found in the previous part. 56 | centroids = computeCentroids(X, idx, K); 57 | 58 | fprintf('Centroids computed after initial finding of closest centroids: \n') 59 | fprintf(' %f %f \n' , centroids'); 60 | fprintf('\n(the centroids should be\n'); 61 | fprintf(' [ 2.428301 3.157924 ]\n'); 62 | fprintf(' [ 5.813503 2.633656 ]\n'); 63 | fprintf(' [ 7.119387 3.616684 ]\n\n'); 64 | 65 | fprintf('Program paused. Press enter to continue.\n'); 66 | pause; 67 | 68 | 69 | %% =================== Part 3: K-Means Clustering ====================== 70 | % After you have completed the two functions computeCentroids and 71 | % findClosestCentroids, you have all the necessary pieces to run the 72 | % kMeans algorithm. In this part, you will run the K-Means algorithm on 73 | % the example dataset we have provided. 74 | % 75 | fprintf('\nRunning K-Means clustering on example dataset.\n\n'); 76 | 77 | % Load an example dataset 78 | load('ex7data2.mat'); 79 | 80 | % Settings for running K-Means 81 | K = 3; 82 | max_iters = 10; 83 | 84 | % For consistency, here we set centroids to specific values 85 | % but in practice you want to generate them automatically, such as by 86 | % settings them to be random examples (as can be seen in 87 | % kMeansInitCentroids). 88 | initial_centroids = [3 3; 6 2; 8 5]; 89 | 90 | % Run K-Means algorithm. The 'true' at the end tells our function to plot 91 | % the progress of K-Means 92 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters, true); 93 | fprintf('\nK-Means Done.\n\n'); 94 | 95 | fprintf('Program paused. Press enter to continue.\n'); 96 | pause; 97 | 98 | %% ============= Part 4: K-Means Clustering on Pixels =============== 99 | % In this exercise, you will use K-Means to compress an image. To do this, 100 | % you will first run K-Means on the colors of the pixels in the image and 101 | % then you will map each pixel on to it's closest centroid. 102 | % 103 | % You should now complete the code in kMeansInitCentroids.m 104 | % 105 | 106 | fprintf('\nRunning K-Means clustering on pixels from an image.\n\n'); 107 | 108 | % Load an image of a bird 109 | %A = double(imread('bird_small.png')); 110 | 111 | % If imread does not work for you, you can try instead 112 | load ('bird_small.mat'); 113 | 114 | A = A / 255; % Divide by 255 so that all values are in the range 0 - 1 115 | 116 | % Size of the image 117 | img_size = size(A); 118 | 119 | % Reshape the image into an Nx3 matrix where N = number of pixels. 120 | % Each row will contain the Red, Green and Blue pixel values 121 | % This gives us our dataset matrix X that we will use K-Means on. 122 | X = reshape(A, img_size(1) * img_size(2), 3); 123 | 124 | % Run your K-Means algorithm on this data 125 | % You should try different values of K and max_iters here 126 | K = 16; 127 | max_iters = 10; 128 | 129 | % When using K-Means, it is important the initialize the centroids 130 | % randomly. 131 | % You should complete the code in kMeansInitCentroids.m before proceeding 132 | initial_centroids = kMeansInitCentroids(X, K); 133 | 134 | % Run K-Means 135 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters); 136 | 137 | fprintf('Program paused. Press enter to continue.\n'); 138 | pause; 139 | 140 | 141 | %% ================= Part 5: Image Compression ====================== 142 | % In this part of the exercise, you will use the clusters of K-Means to 143 | % compress an image. To do this, we first find the closest clusters for 144 | % each example. After that, we 145 | 146 | fprintf('\nApplying K-Means to compress an image.\n\n'); 147 | 148 | % Find closest cluster members 149 | idx = findClosestCentroids(X, centroids); 150 | 151 | % Essentially, now we have represented the image X as in terms of the 152 | % indices in idx. 153 | 154 | % We can now recover the image from the indices (idx) by mapping each pixel 155 | % (specified by it's index in idx) to the centroid value 156 | X_recovered = centroids(idx,:); 157 | 158 | % Reshape the recovered image into proper dimensions 159 | X_recovered = reshape(X_recovered, img_size(1), img_size(2), 3); 160 | 161 | % Display the original image 162 | subplot(1, 2, 1); 163 | imagesc(A); 164 | title('Original'); 165 | 166 | % Display compressed image side by side 167 | subplot(1, 2, 2); 168 | imagesc(X_recovered) 169 | title(sprintf('Compressed, with %d colors.', K)); 170 | 171 | 172 | fprintf('Program paused. Press enter to continue.\n'); 173 | pause; 174 | 175 | -------------------------------------------------------------------------------- /Exercise 7/ex7/ex7_pca.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 7 | Principle Component Analysis and K-Means Clustering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % pca.m 11 | % projectData.m 12 | % recoverData.m 13 | % computeCentroids.m 14 | % findClosestCentroids.m 15 | % kMeansInitCentroids.m 16 | % 17 | % For this exercise, you will not need to change any code in this file, 18 | % or any other files other than those mentioned above. 19 | % 20 | 21 | %% Initialization 22 | clear ; close all; clc 23 | 24 | %% ================== Part 1: Load Example Dataset =================== 25 | % We start this exercise by using a small dataset that is easily to 26 | % visualize 27 | % 28 | fprintf('Visualizing example dataset for PCA.\n\n'); 29 | 30 | % The following command loads the dataset. You should now have the 31 | % variable X in your environment 32 | load ('ex7data1.mat'); 33 | 34 | % Visualize the example dataset 35 | plot(X(:, 1), X(:, 2), 'bo'); 36 | axis([0.5 6.5 2 8]); axis square; 37 | 38 | fprintf('Program paused. Press enter to continue.\n'); 39 | pause; 40 | 41 | 42 | %% =============== Part 2: Principal Component Analysis =============== 43 | % You should now implement PCA, a dimension reduction technique. You 44 | % should complete the code in pca.m 45 | % 46 | fprintf('\nRunning PCA on example dataset.\n\n'); 47 | 48 | % Before running PCA, it is important to first normalize X 49 | [X_norm, mu, sigma] = featureNormalize(X); 50 | 51 | % Run PCA 52 | [U, S] = pca(X_norm); 53 | 54 | % Compute mu, the mean of the each feature 55 | 56 | % Draw the eigenvectors centered at mean of data. These lines show the 57 | % directions of maximum variations in the dataset. 58 | hold on; 59 | drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2); 60 | drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2); 61 | hold off; 62 | 63 | fprintf('Top eigenvector: \n'); 64 | fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1)); 65 | fprintf('\n(you should expect to see -0.707107 -0.707107)\n'); 66 | 67 | fprintf('Program paused. Press enter to continue.\n'); 68 | pause; 69 | 70 | 71 | %% =================== Part 3: Dimension Reduction =================== 72 | % You should now implement the projection step to map the data onto the 73 | % first k eigenvectors. The code will then plot the data in this reduced 74 | % dimensional space. This will show you what the data looks like when 75 | % using only the corresponding eigenvectors to reconstruct it. 76 | % 77 | % You should complete the code in projectData.m 78 | % 79 | fprintf('\nDimension reduction on example dataset.\n\n'); 80 | 81 | % Plot the normalized dataset (returned from pca) 82 | plot(X_norm(:, 1), X_norm(:, 2), 'bo'); 83 | axis([-4 3 -4 3]); axis square 84 | 85 | % Project the data onto K = 1 dimension 86 | K = 1; 87 | Z = projectData(X_norm, U, K); 88 | fprintf('Projection of the first example: %f\n', Z(1)); 89 | fprintf('\n(this value should be about 1.481274)\n\n'); 90 | 91 | X_rec = recoverData(Z, U, K); 92 | fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2)); 93 | fprintf('\n(this value should be about -1.047419 -1.047419)\n\n'); 94 | 95 | % Draw lines connecting the projected points to the original points 96 | hold on; 97 | plot(X_rec(:, 1), X_rec(:, 2), 'ro'); 98 | for i = 1:size(X_norm, 1) 99 | drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1); 100 | end 101 | hold off 102 | 103 | fprintf('Program paused. Press enter to continue.\n'); 104 | pause; 105 | 106 | %% =============== Part 4: Loading and Visualizing Face Data ============= 107 | % We start the exercise by first loading and visualizing the dataset. 108 | % The following code will load the dataset into your environment 109 | % 110 | fprintf('\nLoading face dataset.\n\n'); 111 | 112 | % Load Face dataset 113 | load ('ex7faces.mat') 114 | 115 | % Display the first 100 faces in the dataset 116 | displayData(X(1:100, :)); 117 | 118 | fprintf('Program paused. Press enter to continue.\n'); 119 | pause; 120 | 121 | %% =========== Part 5: PCA on Face Data: Eigenfaces =================== 122 | % Run PCA and visualize the eigenvectors which are in this case eigenfaces 123 | % We display the first 36 eigenfaces. 124 | % 125 | fprintf(['\nRunning PCA on face dataset.\n' ... 126 | '(this mght take a minute or two ...)\n\n']); 127 | 128 | % Before running PCA, it is important to first normalize X by subtracting 129 | % the mean value from each feature 130 | [X_norm, mu, sigma] = featureNormalize(X); 131 | 132 | % Run PCA 133 | [U, S] = pca(X_norm); 134 | 135 | % Visualize the top 36 eigenvectors found 136 | displayData(U(:, 1:36)'); 137 | 138 | fprintf('Program paused. Press enter to continue.\n'); 139 | pause; 140 | 141 | 142 | %% ============= Part 6: Dimension Reduction for Faces ================= 143 | % Project images to the eigen space using the top k eigenvectors 144 | % If you are applying a machine learning algorithm 145 | fprintf('\nDimension reduction for face dataset.\n\n'); 146 | 147 | K = 100; 148 | Z = projectData(X_norm, U, K); 149 | 150 | fprintf('The projected data Z has a size of: ') 151 | fprintf('%d ', size(Z)); 152 | 153 | fprintf('\n\nProgram paused. Press enter to continue.\n'); 154 | pause; 155 | 156 | %% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== 157 | % Project images to the eigen space using the top K eigen vectors and 158 | % visualize only using those K dimensions 159 | % Compare to the original input, which is also displayed 160 | 161 | fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n'); 162 | 163 | K = 100; 164 | X_rec = recoverData(Z, U, K); 165 | 166 | % Display normalized data 167 | subplot(1, 2, 1); 168 | displayData(X_norm(1:100,:)); 169 | title('Original faces'); 170 | axis square; 171 | 172 | % Display reconstructed data from only k eigenfaces 173 | subplot(1, 2, 2); 174 | displayData(X_rec(1:100,:)); 175 | title('Recovered faces'); 176 | axis square; 177 | 178 | fprintf('Program paused. Press enter to continue.\n'); 179 | pause; 180 | 181 | 182 | %% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === 183 | % One useful application of PCA is to use it to visualize high-dimensional 184 | % data. In the last K-Means exercise you ran K-Means on 3-dimensional 185 | % pixel colors of an image. We first visualize this output in 3D, and then 186 | % apply PCA to obtain a visualization in 2D. 187 | 188 | close all; close all; clc 189 | 190 | % Re-load the image from the previous exercise and run K-Means on it 191 | % For this to work, you need to complete the K-Means assignment first 192 | A = double(imread('bird_small.png')); 193 | 194 | % If imread does not work for you, you can try instead 195 | % load ('bird_small.mat'); 196 | 197 | A = A / 255; 198 | img_size = size(A); 199 | X = reshape(A, img_size(1) * img_size(2), 3); 200 | K = 16; 201 | max_iters = 10; 202 | initial_centroids = kMeansInitCentroids(X, K); 203 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters); 204 | 205 | % Sample 1000 random indexes (since working with all the data is 206 | % too expensive. If you have a fast computer, you may increase this. 207 | sel = floor(rand(1000, 1) * size(X, 1)) + 1; 208 | 209 | % Setup Color Palette 210 | palette = hsv(K); 211 | colors = palette(idx(sel), :); 212 | 213 | % Visualize the data and centroid memberships in 3D 214 | figure; 215 | scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors); 216 | title('Pixel dataset plotted in 3D. Color shows centroid memberships'); 217 | fprintf('Program paused. Press enter to continue.\n'); 218 | pause; 219 | 220 | %% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === 221 | % Use PCA to project this cloud to 2D for visualization 222 | 223 | % Subtract the mean to use PCA 224 | [X_norm, mu, sigma] = featureNormalize(X); 225 | 226 | % PCA and project the data to 2D 227 | [U, S] = pca(X_norm); 228 | Z = projectData(X_norm, U, 2); 229 | 230 | % Plot in 2D 231 | figure; 232 | plotDataPoints(Z(sel, :), idx(sel), K); 233 | title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction'); 234 | fprintf('Program paused. Press enter to continue.\n'); 235 | pause; 236 | -------------------------------------------------------------------------------- /Exercise 7/ex7/ex7data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 7/ex7/ex7data1.mat -------------------------------------------------------------------------------- /Exercise 7/ex7/ex7data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 7/ex7/ex7data2.mat -------------------------------------------------------------------------------- /Exercise 7/ex7/ex7faces.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 7/ex7/ex7faces.mat -------------------------------------------------------------------------------- /Exercise 7/ex7/featureNormalize.m: -------------------------------------------------------------------------------- 1 | function [X_norm, mu, sigma] = featureNormalize(X) 2 | %FEATURENORMALIZE Normalizes the features in X 3 | % FEATURENORMALIZE(X) returns a normalized version of X where 4 | % the mean value of each feature is 0 and the standard deviation 5 | % is 1. This is often a good preprocessing step to do when 6 | % working with learning algorithms. 7 | 8 | mu = mean(X); 9 | X_norm = bsxfun(@minus, X, mu); 10 | 11 | sigma = std(X_norm); 12 | X_norm = bsxfun(@rdivide, X_norm, sigma); 13 | 14 | 15 | % ============================================================ 16 | 17 | end 18 | -------------------------------------------------------------------------------- /Exercise 7/ex7/findClosestCentroids.m: -------------------------------------------------------------------------------- 1 | function idx = findClosestCentroids(X, centroids) 2 | %FINDCLOSESTCENTROIDS computes the centroid memberships for every example 3 | % idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids 4 | % in idx for a dataset X where each row is a single example. idx = m x 1 5 | % vector of centroid assignments (i.e. each entry in range [1..K]) 6 | % 7 | 8 | % Set K 9 | K = size(centroids, 1); 10 | 11 | % You need to return the following variables correctly. 12 | idx = zeros(size(X,1), 1); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Go over every example, find its closest centroid, and store 16 | % the index inside idx at the appropriate location. 17 | % Concretely, idx(i) should contain the index of the centroid 18 | % closest to example i. Hence, it should be a value in the 19 | % range 1..K 20 | % 21 | % Note: You can use a for-loop over the examples to compute this. 22 | % 23 | 24 | for i = 1:length(X) 25 | minDist = Inf; 26 | for j = 1:K 27 | dist = norm(X(i,:) - centroids(j,:)) ^ 2; 28 | 29 | if dist < minDist 30 | minDist = dist; 31 | idx(i) = j; 32 | end 33 | end 34 | end 35 | 36 | % ============================================================= 37 | 38 | end 39 | 40 | -------------------------------------------------------------------------------- /Exercise 7/ex7/kMeansInitCentroids.m: -------------------------------------------------------------------------------- 1 | function centroids = kMeansInitCentroids(X, K) 2 | %KMEANSINITCENTROIDS This function initializes K centroids that are to be 3 | %used in K-Means on the dataset X 4 | % centroids = KMEANSINITCENTROIDS(X, K) returns K initial centroids to be 5 | % used with the K-Means on the dataset X 6 | % 7 | 8 | % You should return this values correctly 9 | centroids = zeros(K, size(X, 2)); 10 | 11 | % ====================== YOUR CODE HERE ====================== 12 | % Instructions: You should set centroids to randomly chosen examples from 13 | % the dataset X 14 | % 15 | 16 | randidx = randperm(size(X, 1)); 17 | centroids = X(randidx(1:K), :); 18 | 19 | % ============================================================= 20 | 21 | end 22 | 23 | -------------------------------------------------------------------------------- /Exercise 7/ex7/pca.m: -------------------------------------------------------------------------------- 1 | function [U, S] = pca(X) 2 | %PCA Run principal component analysis on the dataset X 3 | % [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X 4 | % Returns the eigenvectors U, the eigenvalues (on diagonal) in S 5 | % 6 | 7 | % Useful values 8 | [m, n] = size(X); 9 | 10 | % You need to return the following variables correctly. 11 | U = zeros(n); 12 | S = zeros(n); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: You should first compute the covariance matrix. Then, you 16 | % should use the "svd" function to compute the eigenvectors 17 | % and eigenvalues of the covariance matrix. 18 | % 19 | % Note: When computing the covariance matrix, remember to divide by m (the 20 | % number of examples). 21 | % 22 | 23 | Sigma = (X' * X) / m; 24 | [U, S, V] = svd(Sigma); 25 | 26 | % ========================================================================= 27 | 28 | end 29 | -------------------------------------------------------------------------------- /Exercise 7/ex7/plotDataPoints.m: -------------------------------------------------------------------------------- 1 | function plotDataPoints(X, idx, K) 2 | %PLOTDATAPOINTS plots data points in X, coloring them so that those with the same 3 | %index assignments in idx have the same color 4 | % PLOTDATAPOINTS(X, idx, K) plots data points in X, coloring them so that those 5 | % with the same index assignments in idx have the same color 6 | 7 | % Create palette 8 | palette = hsv(K + 1); 9 | colors = palette(idx, :); 10 | 11 | % Plot the data 12 | scatter(X(:,1), X(:,2), 15, colors); 13 | 14 | end 15 | -------------------------------------------------------------------------------- /Exercise 7/ex7/plotProgresskMeans.m: -------------------------------------------------------------------------------- 1 | function plotProgresskMeans(X, centroids, previous, idx, K, i) 2 | %PLOTPROGRESSKMEANS is a helper function that displays the progress of 3 | %k-Means as it is running. It is intended for use only with 2D data. 4 | % PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data 5 | % points with colors assigned to each centroid. With the previous 6 | % centroids, it also plots a line between the previous locations and 7 | % current locations of the centroids. 8 | % 9 | 10 | % Plot the examples 11 | plotDataPoints(X, idx, K); 12 | 13 | % Plot the centroids as black x's 14 | plot(centroids(:,1), centroids(:,2), 'x', ... 15 | 'MarkerEdgeColor','k', ... 16 | 'MarkerSize', 10, 'LineWidth', 3); 17 | 18 | % Plot the history of the centroids with lines 19 | for j=1:size(centroids,1) 20 | drawLine(centroids(j, :), previous(j, :)); 21 | end 22 | 23 | % Title 24 | title(sprintf('Iteration number %d', i)) 25 | 26 | end 27 | 28 | -------------------------------------------------------------------------------- /Exercise 7/ex7/projectData.m: -------------------------------------------------------------------------------- 1 | function Z = projectData(X, U, K) 2 | %PROJECTDATA Computes the reduced data representation when projecting only 3 | %on to the top k eigenvectors 4 | % Z = projectData(X, U, K) computes the projection of 5 | % the normalized inputs X into the reduced dimensional space spanned by 6 | % the first K columns of U. It returns the projected examples in Z. 7 | % 8 | 9 | % You need to return the following variables correctly. 10 | Z = zeros(size(X, 1), K); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the projection of the data using only the top K 14 | % eigenvectors in U (first K columns). 15 | % For the i-th example X(i,:), the projection on to the k-th 16 | % eigenvector is given as follows: 17 | % x = X(i, :)'; 18 | % projection_k = x' * U(:, k); 19 | % 20 | 21 | U_reduced = U(:, 1:K); 22 | Z = X * U_reduced; 23 | 24 | % ============================================================= 25 | 26 | end 27 | -------------------------------------------------------------------------------- /Exercise 7/ex7/recoverData.m: -------------------------------------------------------------------------------- 1 | function X_rec = recoverData(Z, U, K) 2 | %RECOVERDATA Recovers an approximation of the original data when using the 3 | %projected data 4 | % X_rec = RECOVERDATA(Z, U, K) recovers an approximation the 5 | % original data that has been reduced to K dimensions. It returns the 6 | % approximate reconstruction in X_rec. 7 | % 8 | 9 | % You need to return the following variables correctly. 10 | X_rec = zeros(size(Z, 1), size(U, 1)); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the approximation of the data by projecting back 14 | % onto the original space using the top K eigenvectors in U. 15 | % 16 | % For the i-th example Z(i,:), the (approximate) 17 | % recovered data for dimension j is given as follows: 18 | % v = Z(i, :)'; 19 | % recovered_j = v' * U(j, 1:K)'; 20 | % 21 | % Notice that U(j, 1:K) is a row vector. 22 | % 23 | 24 | U_reduced = U(:, 1:K); 25 | X_rec = Z * U_reduced'; 26 | 27 | % ============================================================= 28 | 29 | end 30 | -------------------------------------------------------------------------------- /Exercise 7/ex7/runkMeans.m: -------------------------------------------------------------------------------- 1 | function [centroids, idx] = runkMeans(X, initial_centroids, ... 2 | max_iters, plot_progress) 3 | %RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X 4 | %is a single example 5 | % [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... 6 | % plot_progress) runs the K-Means algorithm on data matrix X, where each 7 | % row of X is a single example. It uses initial_centroids used as the 8 | % initial centroids. max_iters specifies the total number of interactions 9 | % of K-Means to execute. plot_progress is a true/false flag that 10 | % indicates if the function should also plot its progress as the 11 | % learning happens. This is set to false by default. runkMeans returns 12 | % centroids, a Kxn matrix of the computed centroids and idx, a m x 1 13 | % vector of centroid assignments (i.e. each entry in range [1..K]) 14 | % 15 | 16 | % Set default value for plot progress 17 | if ~exist('plot_progress', 'var') || isempty(plot_progress) 18 | plot_progress = false; 19 | end 20 | 21 | % Plot the data if we are plotting progress 22 | if plot_progress 23 | figure; 24 | hold on; 25 | end 26 | 27 | % Initialize values 28 | [m n] = size(X); 29 | K = size(initial_centroids, 1); 30 | centroids = initial_centroids; 31 | previous_centroids = centroids; 32 | idx = zeros(m, 1); 33 | 34 | % Run K-Means 35 | for i=1:max_iters 36 | 37 | % Output progress 38 | fprintf('K-Means iteration %d/%d...\n', i, max_iters); 39 | if exist('OCTAVE_VERSION') 40 | fflush(stdout); 41 | end 42 | 43 | % For each example in X, assign it to the closest centroid 44 | idx = findClosestCentroids(X, centroids); 45 | 46 | % Optionally, plot progress here 47 | if plot_progress 48 | plotProgresskMeans(X, centroids, previous_centroids, idx, K, i); 49 | previous_centroids = centroids; 50 | fprintf('Press enter to continue.\n'); 51 | pause; 52 | end 53 | 54 | % Given the memberships, compute new centroids 55 | centroids = computeCentroids(X, idx, K); 56 | end 57 | 58 | % Hold off if we are plotting progress 59 | if plot_progress 60 | hold off; 61 | end 62 | 63 | end 64 | 65 | -------------------------------------------------------------------------------- /Exercise 7/ex7/submitWeb.m: -------------------------------------------------------------------------------- 1 | % submitWeb Creates files from your code and output for web submission. 2 | % 3 | % If the submit function does not work for you, use the web-submission mechanism. 4 | % Call this function to produce a file for the part you wish to submit. Then, 5 | % submit the file to the class servers using the "Web Submission" button on the 6 | % Programming Exercises page on the course website. 7 | % 8 | % You should call this function without arguments (submitWeb), to receive 9 | % an interactive prompt for submission; optionally you can call it with the partID 10 | % if you so wish. Make sure your working directory is set to the directory 11 | % containing the submitWeb.m file and your assignment files. 12 | 13 | function submitWeb(partId) 14 | if ~exist('partId', 'var') || isempty(partId) 15 | partId = []; 16 | end 17 | 18 | submit(partId, 1); 19 | end 20 | 21 | -------------------------------------------------------------------------------- /Exercise 8/ex8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 8/ex8.pdf -------------------------------------------------------------------------------- /Exercise 8/ex8/checkCostFunction.m: -------------------------------------------------------------------------------- 1 | function checkCostFunction(lambda) 2 | %CHECKCOSTFUNCTION Creates a collaborative filering problem 3 | %to check your cost function and gradients 4 | % CHECKCOSTFUNCTION(lambda) Creates a collaborative filering problem 5 | % to check your cost function and gradients, it will output the 6 | % analytical gradients produced by your code and the numerical gradients 7 | % (computed using computeNumericalGradient). These two gradient 8 | % computations should result in very similar values. 9 | 10 | % Set lambda 11 | if ~exist('lambda', 'var') || isempty(lambda) 12 | lambda = 0; 13 | end 14 | 15 | %% Create small problem 16 | X_t = rand(4, 3); 17 | Theta_t = rand(5, 3); 18 | 19 | % Zap out most entries 20 | Y = X_t * Theta_t'; 21 | Y(rand(size(Y)) > 0.5) = 0; 22 | R = zeros(size(Y)); 23 | R(Y ~= 0) = 1; 24 | 25 | %% Run Gradient Checking 26 | X = randn(size(X_t)); 27 | Theta = randn(size(Theta_t)); 28 | num_users = size(Y, 2); 29 | num_movies = size(Y, 1); 30 | num_features = size(Theta_t, 2); 31 | 32 | numgrad = computeNumericalGradient( ... 33 | @(t) cofiCostFunc(t, Y, R, num_users, num_movies, ... 34 | num_features, lambda), [X(:); Theta(:)]); 35 | 36 | [cost, grad] = cofiCostFunc([X(:); Theta(:)], Y, R, num_users, ... 37 | num_movies, num_features, lambda); 38 | 39 | disp([numgrad grad]); 40 | fprintf(['The above two columns you get should be very similar.\n' ... 41 | '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']); 42 | 43 | diff = norm(numgrad-grad)/norm(numgrad+grad); 44 | fprintf(['If your backpropagation implementation is correct, then \n' ... 45 | 'the relative difference will be small (less than 1e-9). \n' ... 46 | '\nRelative Difference: %g\n'], diff); 47 | 48 | end -------------------------------------------------------------------------------- /Exercise 8/ex8/cofiCostFunc.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ... 2 | num_features, lambda) 3 | %COFICOSTFUNC Collaborative filtering cost function 4 | % [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ... 5 | % num_features, lambda) returns the cost and gradient for the 6 | % collaborative filtering problem. 7 | % 8 | 9 | % Unfold the U and W matrices from params 10 | X = reshape(params(1:num_movies*num_features), num_movies, num_features); 11 | Theta = reshape(params(num_movies*num_features+1:end), ... 12 | num_users, num_features); 13 | 14 | 15 | % You need to return the following values correctly 16 | J = 0; 17 | X_grad = zeros(size(X)); 18 | Theta_grad = zeros(size(Theta)); 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: Compute the cost function and gradient for collaborative 22 | % filtering. Concretely, you should first implement the cost 23 | % function (without regularization) and make sure it is 24 | % matches our costs. After that, you should implement the 25 | % gradient and use the checkCostFunction routine to check 26 | % that the gradient is correct. Finally, you should implement 27 | % regularization. 28 | % 29 | % Notes: X - num_movies x num_features matrix of movie features 30 | % Theta - num_users x num_features matrix of user features 31 | % Y - num_movies x num_users matrix of user ratings of movies 32 | % R - num_movies x num_users matrix, where R(i, j) = 1 if the 33 | % i-th movie was rated by the j-th user 34 | % 35 | % You should set the following variables correctly: 36 | % 37 | % X_grad - num_movies x num_features matrix, containing the 38 | % partial derivatives w.r.t. to each element of X 39 | % Theta_grad - num_users x num_features matrix, containing the 40 | % partial derivatives w.r.t. to each element of Theta 41 | % 42 | 43 | errors = ((X * Theta' - Y) .* R); 44 | squaredErrors = errors .^ 2; 45 | J = ((1 / 2) * sum(squaredErrors(:))) + ((lambda / 2) * sum(Theta(:) .^ 2)) + ((lambda / 2) * sum(X(:) .^ 2)); 46 | 47 | X_grad = errors * Theta .+ (lambda .* X); 48 | Theta_grad = errors' * X .+ (lambda .* Theta); 49 | 50 | % ============================================================= 51 | 52 | grad = [X_grad(:); Theta_grad(:)]; 53 | 54 | end 55 | -------------------------------------------------------------------------------- /Exercise 8/ex8/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" 3 | %and gives us a numerical estimate of the gradient. 4 | % numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical 5 | % gradient of the function J around theta. Calling y = J(theta) should 6 | % return the function value at theta. 7 | 8 | % Notes: The following code implements numerical gradient checking, and 9 | % returns the numerical gradient.It sets numgrad(i) to (a numerical 10 | % approximation of) the partial derivative of J with respect to the 11 | % i-th input argument, evaluated at theta. (i.e., numgrad(i) should 12 | % be the (approximately) the partial derivative of J with respect 13 | % to theta(i).) 14 | % 15 | 16 | numgrad = zeros(size(theta)); 17 | perturb = zeros(size(theta)); 18 | e = 1e-4; 19 | for p = 1:numel(theta) 20 | % Set perturbation vector 21 | perturb(p) = e; 22 | loss1 = J(theta - perturb); 23 | loss2 = J(theta + perturb); 24 | % Compute Numerical Gradient 25 | numgrad(p) = (loss2 - loss1) / (2*e); 26 | perturb(p) = 0; 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /Exercise 8/ex8/estimateGaussian.m: -------------------------------------------------------------------------------- 1 | function [mu sigma2] = estimateGaussian(X) 2 | %ESTIMATEGAUSSIAN This function estimates the parameters of a 3 | %Gaussian distribution using the data in X 4 | % [mu sigma2] = estimateGaussian(X), 5 | % The input X is the dataset with each n-dimensional data point in one row 6 | % The output is an n-dimensional vector mu, the mean of the data set 7 | % and the variances sigma^2, an n x 1 vector 8 | % 9 | 10 | % Useful variables 11 | [m, n] = size(X); 12 | 13 | % You should return these values correctly 14 | mu = zeros(n, 1); 15 | sigma2 = zeros(n, 1); 16 | 17 | % ====================== YOUR CODE HERE ====================== 18 | % Instructions: Compute the mean of the data and the variances 19 | % In particular, mu(i) should contain the mean of 20 | % the data for the i-th feature and sigma2(i) 21 | % should contain variance of the i-th feature. 22 | % 23 | 24 | mu = mean(X)'; 25 | sigma2 = var(X, 1)'; 26 | 27 | % ============================================================= 28 | 29 | 30 | end 31 | -------------------------------------------------------------------------------- /Exercise 8/ex8/ex8.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 8 | Anomaly Detection and Collaborative Filtering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % estimateGaussian.m 11 | % selectThreshold.m 12 | % cofiCostFunc.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% Initialization 19 | clear ; close all; clc 20 | 21 | %% ================== Part 1: Load Example Dataset =================== 22 | % We start this exercise by using a small dataset that is easy to 23 | % visualize. 24 | % 25 | % Our example case consists of 2 network server statistics across 26 | % several machines: the latency and throughput of each machine. 27 | % This exercise will help us find possibly faulty (or very fast) machines. 28 | % 29 | 30 | fprintf('Visualizing example dataset for outlier detection.\n\n'); 31 | 32 | % The following command loads the dataset. You should now have the 33 | % variables X, Xval, yval in your environment 34 | load('ex8data1.mat'); 35 | 36 | % Visualize the example dataset 37 | plot(X(:, 1), X(:, 2), 'bx'); 38 | axis([0 30 0 30]); 39 | xlabel('Latency (ms)'); 40 | ylabel('Throughput (mb/s)'); 41 | 42 | fprintf('Program paused. Press enter to continue.\n'); 43 | pause 44 | 45 | 46 | %% ================== Part 2: Estimate the dataset statistics =================== 47 | % For this exercise, we assume a Gaussian distribution for the dataset. 48 | % 49 | % We first estimate the parameters of our assumed Gaussian distribution, 50 | % then compute the probabilities for each of the points and then visualize 51 | % both the overall distribution and where each of the points falls in 52 | % terms of that distribution. 53 | % 54 | fprintf('Visualizing Gaussian fit.\n\n'); 55 | 56 | % Estimate my and sigma2 57 | [mu sigma2] = estimateGaussian(X); 58 | 59 | % Returns the density of the multivariate normal at each data point (row) 60 | % of X 61 | p = multivariateGaussian(X, mu, sigma2); 62 | 63 | % Visualize the fit 64 | visualizeFit(X, mu, sigma2); 65 | xlabel('Latency (ms)'); 66 | ylabel('Throughput (mb/s)'); 67 | 68 | fprintf('Program paused. Press enter to continue.\n'); 69 | pause; 70 | 71 | %% ================== Part 3: Find Outliers =================== 72 | % Now you will find a good epsilon threshold using a cross-validation set 73 | % probabilities given the estimated Gaussian distribution 74 | % 75 | 76 | pval = multivariateGaussian(Xval, mu, sigma2); 77 | 78 | [epsilon F1] = selectThreshold(yval, pval); 79 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon); 80 | fprintf('Best F1 on Cross Validation Set: %f\n', F1); 81 | fprintf(' (you should see a value epsilon of about 8.99e-05)\n\n'); 82 | 83 | % Find the outliers in the training set and plot the 84 | outliers = find(p < epsilon); 85 | 86 | % Draw a red circle around those outliers 87 | hold on 88 | plot(X(outliers, 1), X(outliers, 2), 'ro', 'LineWidth', 2, 'MarkerSize', 10); 89 | hold off 90 | 91 | fprintf('Program paused. Press enter to continue.\n'); 92 | pause; 93 | 94 | %% ================== Part 4: Multidimensional Outliers =================== 95 | % We will now use the code from the previous part and apply it to a 96 | % harder problem in which more features describe each datapoint and only 97 | % some features indicate whether a point is an outlier. 98 | % 99 | 100 | % Loads the second dataset. You should now have the 101 | % variables X, Xval, yval in your environment 102 | load('ex8data2.mat'); 103 | 104 | % Apply the same steps to the larger dataset 105 | [mu sigma2] = estimateGaussian(X); 106 | 107 | % Training set 108 | p = multivariateGaussian(X, mu, sigma2); 109 | 110 | % Cross-validation set 111 | pval = multivariateGaussian(Xval, mu, sigma2); 112 | 113 | % Find the best threshold 114 | [epsilon F1] = selectThreshold(yval, pval); 115 | 116 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon); 117 | fprintf('Best F1 on Cross Validation Set: %f\n', F1); 118 | fprintf('# Outliers found: %d\n', sum(p < epsilon)); 119 | fprintf(' (you should see a value epsilon of about 1.38e-18)\n\n'); 120 | pause 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /Exercise 8/ex8/ex8_cofi.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 8 | Anomaly Detection and Collaborative Filtering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % estimateGaussian.m 11 | % selectThreshold.m 12 | % cofiCostFunc.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% =============== Part 1: Loading movie ratings dataset ================ 19 | % You will start by loading the movie ratings dataset to understand the 20 | % structure of the data. 21 | % 22 | fprintf('Loading movie ratings dataset.\n\n'); 23 | 24 | % Load data 25 | load ('ex8_movies.mat'); 26 | 27 | % Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on 28 | % 943 users 29 | % 30 | % R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a 31 | % rating to movie i 32 | 33 | % From the matrix, we can compute statistics like average rating. 34 | fprintf('Average rating for movie 1 (Toy Story): %f / 5\n\n', ... 35 | mean(Y(1, R(1, :)))); 36 | 37 | % We can "visualize" the ratings matrix by plotting it with imagesc 38 | imagesc(Y); 39 | ylabel('Movies'); 40 | xlabel('Users'); 41 | 42 | fprintf('\nProgram paused. Press enter to continue.\n'); 43 | pause; 44 | 45 | %% ============ Part 2: Collaborative Filtering Cost Function =========== 46 | % You will now implement the cost function for collaborative filtering. 47 | % To help you debug your cost function, we have included set of weights 48 | % that we trained on that. Specifically, you should complete the code in 49 | % cofiCostFunc.m to return J. 50 | 51 | % Load pre-trained weights (X, Theta, num_users, num_movies, num_features) 52 | load ('ex8_movieParams.mat'); 53 | 54 | % Reduce the data set size so that this runs faster 55 | num_users = 4; num_movies = 5; num_features = 3; 56 | X = X(1:num_movies, 1:num_features); 57 | Theta = Theta(1:num_users, 1:num_features); 58 | Y = Y(1:num_movies, 1:num_users); 59 | R = R(1:num_movies, 1:num_users); 60 | 61 | % Evaluate cost function 62 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ... 63 | num_features, 0); 64 | 65 | fprintf(['Cost at loaded parameters: %f '... 66 | '\n(this value should be about 22.22)\n'], J); 67 | 68 | fprintf('\nProgram paused. Press enter to continue.\n'); 69 | pause; 70 | 71 | 72 | %% ============== Part 3: Collaborative Filtering Gradient ============== 73 | % Once your cost function matches up with ours, you should now implement 74 | % the collaborative filtering gradient function. Specifically, you should 75 | % complete the code in cofiCostFunc.m to return the grad argument. 76 | % 77 | fprintf('\nChecking Gradients (without regularization) ... \n'); 78 | 79 | % Check gradients by running checkNNGradients 80 | checkCostFunction; 81 | 82 | fprintf('\nProgram paused. Press enter to continue.\n'); 83 | pause; 84 | 85 | 86 | %% ========= Part 4: Collaborative Filtering Cost Regularization ======== 87 | % Now, you should implement regularization for the cost function for 88 | % collaborative filtering. You can implement it by adding the cost of 89 | % regularization to the original cost computation. 90 | % 91 | 92 | % Evaluate cost function 93 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ... 94 | num_features, 1.5); 95 | 96 | fprintf(['Cost at loaded parameters (lambda = 1.5): %f '... 97 | '\n(this value should be about 31.34)\n'], J); 98 | 99 | fprintf('\nProgram paused. Press enter to continue.\n'); 100 | pause; 101 | 102 | 103 | %% ======= Part 5: Collaborative Filtering Gradient Regularization ====== 104 | % Once your cost matches up with ours, you should proceed to implement 105 | % regularization for the gradient. 106 | % 107 | 108 | % 109 | fprintf('\nChecking Gradients (with regularization) ... \n'); 110 | 111 | % Check gradients by running checkNNGradients 112 | checkCostFunction(1.5); 113 | 114 | fprintf('\nProgram paused. Press enter to continue.\n'); 115 | pause; 116 | 117 | 118 | %% ============== Part 6: Entering ratings for a new user =============== 119 | % Before we will train the collaborative filtering model, we will first 120 | % add ratings that correspond to a new user that we just observed. This 121 | % part of the code will also allow you to put in your own ratings for the 122 | % movies in our dataset! 123 | % 124 | movieList = loadMovieList(); 125 | 126 | % Initialize my ratings 127 | my_ratings = zeros(1682, 1); 128 | 129 | % Check the file movie_idx.txt for id of each movie in our dataset 130 | % For example, Toy Story (1995) has ID 1, so to rate it "4", you can set 131 | my_ratings(1) = 4; 132 | 133 | % Or suppose did not enjoy Silence of the Lambs (1991), you can set 134 | my_ratings(98) = 2; 135 | 136 | % We have selected a few movies we liked / did not like and the ratings we 137 | % gave are as follows: 138 | my_ratings(7) = 3; 139 | my_ratings(12)= 5; 140 | my_ratings(54) = 4; 141 | my_ratings(64)= 5; 142 | my_ratings(66)= 3; 143 | my_ratings(69) = 5; 144 | my_ratings(183) = 4; 145 | my_ratings(226) = 5; 146 | my_ratings(355)= 5; 147 | 148 | fprintf('\n\nNew user ratings:\n'); 149 | for i = 1:length(my_ratings) 150 | if my_ratings(i) > 0 151 | fprintf('Rated %d for %s\n', my_ratings(i), ... 152 | movieList{i}); 153 | end 154 | end 155 | 156 | fprintf('\nProgram paused. Press enter to continue.\n'); 157 | pause; 158 | 159 | 160 | %% ================== Part 7: Learning Movie Ratings ==================== 161 | % Now, you will train the collaborative filtering model on a movie rating 162 | % dataset of 1682 movies and 943 users 163 | % 164 | 165 | fprintf('\nTraining collaborative filtering...\n'); 166 | 167 | % Load data 168 | load('ex8_movies.mat'); 169 | 170 | % Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by 171 | % 943 users 172 | % 173 | % R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a 174 | % rating to movie i 175 | 176 | % Add our own ratings to the data matrix 177 | Y = [my_ratings Y]; 178 | R = [(my_ratings ~= 0) R]; 179 | 180 | % Normalize Ratings 181 | [Ynorm, Ymean] = normalizeRatings(Y, R); 182 | 183 | % Useful Values 184 | num_users = size(Y, 2); 185 | num_movies = size(Y, 1); 186 | num_features = 10; 187 | 188 | % Set Initial Parameters (Theta, X) 189 | X = randn(num_movies, num_features); 190 | Theta = randn(num_users, num_features); 191 | 192 | initial_parameters = [X(:); Theta(:)]; 193 | 194 | % Set options for fmincg 195 | options = optimset('GradObj', 'on', 'MaxIter', 100); 196 | 197 | % Set Regularization 198 | lambda = 10; 199 | theta = fmincg (@(t)(cofiCostFunc(t, Y, R, num_users, num_movies, ... 200 | num_features, lambda)), ... 201 | initial_parameters, options); 202 | 203 | % Unfold the returned theta back into U and W 204 | X = reshape(theta(1:num_movies*num_features), num_movies, num_features); 205 | Theta = reshape(theta(num_movies*num_features+1:end), ... 206 | num_users, num_features); 207 | 208 | fprintf('Recommender system learning completed.\n'); 209 | 210 | fprintf('\nProgram paused. Press enter to continue.\n'); 211 | pause; 212 | 213 | %% ================== Part 8: Recommendation for you ==================== 214 | % After training the model, you can now make recommendations by computing 215 | % the predictions matrix. 216 | % 217 | 218 | p = X * Theta'; 219 | my_predictions = p(:,1) + Ymean; 220 | 221 | movieList = loadMovieList(); 222 | 223 | [r, ix] = sort(my_predictions, 'descend'); 224 | fprintf('\nTop recommendations for you:\n'); 225 | for i=1:10 226 | j = ix(i); 227 | fprintf('Predicting rating %.1f for movie %s\n', my_predictions(j), ... 228 | movieList{j}); 229 | end 230 | 231 | fprintf('\n\nOriginal ratings provided:\n'); 232 | for i = 1:length(my_ratings) 233 | if my_ratings(i) > 0 234 | fprintf('Rated %d for %s\n', my_ratings(i), ... 235 | movieList{i}); 236 | end 237 | end 238 | -------------------------------------------------------------------------------- /Exercise 8/ex8/ex8_movieParams.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 8/ex8/ex8_movieParams.mat -------------------------------------------------------------------------------- /Exercise 8/ex8/ex8_movies.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 8/ex8/ex8_movies.mat -------------------------------------------------------------------------------- /Exercise 8/ex8/ex8data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 8/ex8/ex8data1.mat -------------------------------------------------------------------------------- /Exercise 8/ex8/ex8data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 8/ex8/ex8data2.mat -------------------------------------------------------------------------------- /Exercise 8/ex8/loadMovieList.m: -------------------------------------------------------------------------------- 1 | function movieList = loadMovieList() 2 | %GETMOVIELIST reads the fixed movie list in movie.txt and returns a 3 | %cell array of the words 4 | % movieList = GETMOVIELIST() reads the fixed movie list in movie.txt 5 | % and returns a cell array of the words in movieList. 6 | 7 | 8 | %% Read the fixed movieulary list 9 | fid = fopen('movie_ids.txt'); 10 | 11 | % Store all movies in cell array movie{} 12 | n = 1682; % Total number of movies 13 | 14 | movieList = cell(n, 1); 15 | for i = 1:n 16 | % Read line 17 | line = fgets(fid); 18 | % Word Index (can ignore since it will be = i) 19 | [idx, movieName] = strtok(line, ' '); 20 | % Actual Word 21 | movieList{i} = strtrim(movieName); 22 | end 23 | fclose(fid); 24 | 25 | end 26 | -------------------------------------------------------------------------------- /Exercise 8/ex8/movie_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rieder91/MachineLearning/f6708f216326cb5c9e9e5c3afc912060bfa10486/Exercise 8/ex8/movie_ids.txt -------------------------------------------------------------------------------- /Exercise 8/ex8/multivariateGaussian.m: -------------------------------------------------------------------------------- 1 | function p = multivariateGaussian(X, mu, Sigma2) 2 | %MULTIVARIATEGAUSSIAN Computes the probability density function of the 3 | %multivariate gaussian distribution. 4 | % p = MULTIVARIATEGAUSSIAN(X, mu, Sigma2) Computes the probability 5 | % density function of the examples X under the multivariate gaussian 6 | % distribution with parameters mu and Sigma2. If Sigma2 is a matrix, it is 7 | % treated as the covariance matrix. If Sigma2 is a vector, it is treated 8 | % as the \sigma^2 values of the variances in each dimension (a diagonal 9 | % covariance matrix) 10 | % 11 | 12 | k = length(mu); 13 | 14 | if (size(Sigma2, 2) == 1) || (size(Sigma2, 1) == 1) 15 | Sigma2 = diag(Sigma2); 16 | end 17 | 18 | X = bsxfun(@minus, X, mu(:)'); 19 | p = (2 * pi) ^ (- k / 2) * det(Sigma2) ^ (-0.5) * ... 20 | exp(-0.5 * sum(bsxfun(@times, X * pinv(Sigma2), X), 2)); 21 | 22 | end -------------------------------------------------------------------------------- /Exercise 8/ex8/normalizeRatings.m: -------------------------------------------------------------------------------- 1 | function [Ynorm, Ymean] = normalizeRatings(Y, R) 2 | %NORMALIZERATINGS Preprocess data by subtracting mean rating for every 3 | %movie (every row) 4 | % [Ynorm, Ymean] = NORMALIZERATINGS(Y, R) normalized Y so that each movie 5 | % has a rating of 0 on average, and returns the mean rating in Ymean. 6 | % 7 | 8 | [m, n] = size(Y); 9 | Ymean = zeros(m, 1); 10 | Ynorm = zeros(size(Y)); 11 | for i = 1:m 12 | idx = find(R(i, :) == 1); 13 | Ymean(i) = mean(Y(i, idx)); 14 | Ynorm(i, idx) = Y(i, idx) - Ymean(i); 15 | end 16 | 17 | end 18 | -------------------------------------------------------------------------------- /Exercise 8/ex8/selectThreshold.m: -------------------------------------------------------------------------------- 1 | function [bestEpsilon bestF1] = selectThreshold(yval, pval) 2 | %SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting 3 | %outliers 4 | % [bestEpsilon bestF1] = SELECTTHRESHOLD(yval, pval) finds the best 5 | % threshold to use for selecting outliers based on the results from a 6 | % validation set (pval) and the ground truth (yval). 7 | % 8 | 9 | bestEpsilon = 0; 10 | bestF1 = 0; 11 | F1 = 0; 12 | 13 | stepsize = (max(pval) - min(pval)) / 1000; 14 | for epsilon = min(pval):stepsize:max(pval) 15 | 16 | % ====================== YOUR CODE HERE ====================== 17 | % Instructions: Compute the F1 score of choosing epsilon as the 18 | % threshold and place the value in F1. The code at the 19 | % end of the loop will compare the F1 score for this 20 | % choice of epsilon and set it to be the best epsilon if 21 | % it is better than the current choice of epsilon. 22 | % 23 | % Note: You can use predictions = (pval < epsilon) to get a binary vector 24 | % of 0's and 1's of the outlier predictions 25 | 26 | predictions = (pval < epsilon); 27 | tp = sum((predictions == 1 & yval == 1)); 28 | fp = sum((predictions == 1 & yval == 0)); 29 | fn = sum((predictions == 0 & yval == 1)); 30 | precision = tp / (tp + fp); 31 | recall = tp / (tp + fn); 32 | F1 = (2 * precision * recall) / (precision + recall); 33 | 34 | % ============================================================= 35 | 36 | if F1 > bestF1 37 | bestF1 = F1; 38 | bestEpsilon = epsilon; 39 | end 40 | end 41 | 42 | end 43 | -------------------------------------------------------------------------------- /Exercise 8/ex8/submitWeb.m: -------------------------------------------------------------------------------- 1 | % submitWeb Creates files from your code and output for web submission. 2 | % 3 | % If the submit function does not work for you, use the web-submission mechanism. 4 | % Call this function to produce a file for the part you wish to submit. Then, 5 | % submit the file to the class servers using the "Web Submission" button on the 6 | % Programming Exercises page on the course website. 7 | % 8 | % You should call this function without arguments (submitWeb), to receive 9 | % an interactive prompt for submission; optionally you can call it with the partID 10 | % if you so wish. Make sure your working directory is set to the directory 11 | % containing the submitWeb.m file and your assignment files. 12 | 13 | function submitWeb(partId) 14 | if ~exist('partId', 'var') || isempty(partId) 15 | partId = []; 16 | end 17 | 18 | submit(partId, 1); 19 | end 20 | 21 | -------------------------------------------------------------------------------- /Exercise 8/ex8/visualizeFit.m: -------------------------------------------------------------------------------- 1 | function visualizeFit(X, mu, sigma2) 2 | %VISUALIZEFIT Visualize the dataset and its estimated distribution. 3 | % VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the 4 | % probability density function of the Gaussian distribution. Each example 5 | % has a location (x1, x2) that depends on its feature values. 6 | % 7 | 8 | [X1,X2] = meshgrid(0:.5:35); 9 | Z = multivariateGaussian([X1(:) X2(:)],mu,sigma2); 10 | Z = reshape(Z,size(X1)); 11 | 12 | plot(X(:, 1), X(:, 2),'bx'); 13 | hold on; 14 | % Do not plot if there are infinities 15 | if (sum(isinf(Z)) == 0) 16 | contour(X1, X2, Z, 10.^(-20:3:0)'); 17 | end 18 | hold off; 19 | 20 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine Learning 2 | _Coursera, taught by Andrew Ng_ 3 | 4 | ### Done 5 | * Linear Regression 6 | * Logistic Regression 7 | * Multi-class Classification and Neural Networks 8 | * Neural Network Learning 9 | * Regularized Linear Regression and Bias/Variance 10 | * Support Vector Machines 11 | * K-Means Clustering and PCA 12 | * Anomaly Detection and Recommender Systems 13 | 14 | 15 | ### TODO 16 | * Nothing 17 | 18 | 19 | ### How to contact me 20 | * via email: thomasrieder _at_ aon _dot_ at 21 | * via twitter: [My Profile](https://twitter.com/#!/thomasrieder) 22 | 23 | 24 | ### Class: 25 | [Machine Learning](https://class.coursera.org/ml "Machine Learning") 26 | --------------------------------------------------------------------------------