├── README.md
├── ex1
    ├── computeCost.m
    ├── computeCostMulti.m
    ├── ex1.m
    ├── ex1_multi.m
    ├── ex1data1.txt
    ├── ex1data2.txt
    ├── featureNormalize.m
    ├── gradientDescent.m
    ├── gradientDescentMulti.m
    ├── normalEqn.m
    ├── plotData.m
    └── warmUpExercise.m
├── ex2
    ├── costFunction.m
    ├── costFunctionReg.m
    ├── ex2.m
    ├── ex2_guide.pdf
    ├── ex2_reg.m
    ├── mapFeature.m
    ├── plotData.m
    ├── plotDecisionBoundary.m
    ├── predict.m
    └── sigmoid.m
├── ex3
    ├── displayData.m
    ├── ex3.m
    ├── ex3_nn.m
    ├── lrCostFunction.m
    ├── oneVsAll.m
    ├── predict.m
    ├── predictOneVsAll.m
    └── sigmoid.m
├── ex4
    ├── computeNumericalGradient.m
    ├── ex4.m
    ├── nnCostFunction -V2.m
    ├── nnCostFunction.m
    ├── randInitializeWeights.m
    └── sigmoidGradient.m
├── ex5
    ├── ex5.m
    ├── learningCurve.m
    ├── linearRegCostFunction.m
    ├── polyFeatures.m
    ├── trainLinearReg.m
    └── validationCurve.m
├── ex6
    ├── dataset3Params.m
    ├── emailFeatures.m
    ├── ex6.m
    ├── ex6_spam.m
    ├── gaussianKernel.m
    └── processEmail.m
├── ex7
    ├── computeCentroids.m
    ├── ex7.m
    ├── ex7_pca.m
    ├── findClosestCentroids.m
    ├── kMeansInitCentroids.m
    ├── pca.m
    ├── projectData.m
    └── recoverData.m
├── ex8
    ├── cofiCostFunc.m
    ├── estimateGaussian.m
    ├── ex8.m
    ├── ex8_cofi.m
    └── selectThreshold.m
└── update


/README.md:
--------------------------------------------------------------------------------
1 | # Andrew-Ng-Machine-Learning-Assignment
2 | The assignment code for Coursera by Ng's ML course
3 | 吴恩达《机器学习》coursera课程的作业、课后代码
4 | 


--------------------------------------------------------------------------------
/ex1/computeCost.m:
--------------------------------------------------------------------------------
 1 | function J = computeCost(X, y, theta)
 2 | %COMPUTECOST Compute cost for linear regression
 3 | %   J = COMPUTECOST(X, y, theta) computes the cost of using theta as the
 4 | %   parameter for linear regression to fit the data points in X and y
 5 | 
 6 | % Initialize some useful values
 7 | m = length(y); % number of training examples
 8 | sum = 0;
 9 | 
10 | for i = 1:m
11 |     cost = ((theta)' * (X(i,:))' - y(i)) ^ 2;
12 |     sum = sum + cost;
13 | end
14 | 
15 | J = 1/m * (1/2) * sum;
16 | % You need to return the following variables correctly 
17 | 
18 | % ====================== YOUR CODE HERE ======================
19 | % Instructions: Compute the cost of a particular choice of theta
20 | %               You should set J to the cost.
21 | 
22 | 
23 | 
24 | 
25 | 
26 | % =========================================================================
27 | 
28 | end
29 | 


--------------------------------------------------------------------------------
/ex1/computeCostMulti.m:
--------------------------------------------------------------------------------
 1 | function J = computeCostMulti(X, y, theta)
 2 | %COMPUTECOSTMULTI Compute cost for linear regression with multiple variables
 3 | %   J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the
 4 | %   parameter for linear regression to fit the data points in X and y
 5 | 
 6 | % Initialize some useful values
 7 | m = length(y); % number of training examples
 8 | 
 9 | 
10 | 
11 | cost = (X * theta - y)' * (X * theta - y);
12 | 
13 | 
14 | J = cost / m * (1/2);
15 | 
16 | 
17 | 
18 | % You need to return the following variables correctly 
19 | 
20 | % ====================== YOUR CODE HERE ======================
21 | % Instructions: Compute the cost of a particular choice of theta
22 | %               You should set J to the cost.
23 | 
24 | 
25 | 
26 | 
27 | 
28 | % =========================================================================
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/ex1/ex1.m:
--------------------------------------------------------------------------------
  1 | %% Machine Learning Online Class - Exercise 1: Linear Regression
  2 | 
  3 | %  Instructions
  4 | %  ------------
  5 | %
  6 | %  This file contains code that helps you get started on the
  7 | %  linear exercise. You will need to complete the following functions
  8 | %  in this exericse:
  9 | %
 10 | %     warmUpExercise.m
 11 | %     plotData.m
 12 | %     gradientDescent.m
 13 | %     computeCost.m
 14 | %     gradientDescentMulti.m
 15 | %     computeCostMulti.m
 16 | %     featureNormalize.m
 17 | %     normalEqn.m
 18 | %
 19 | %  For this exercise, you will not need to change any code in this file,
 20 | %  or any other files other than those mentioned above.
 21 | %
 22 | % x refers to the population size in 10,000s
 23 | % y refers to the profit in $10,000s
 24 | %
 25 | 
 26 | %% Initialization
 27 | clear ; close all; clc
 28 | 
 29 | %% ==================== Part 1: Basic Function ====================
 30 | % Complete warmUpExercise.m
 31 | fprintf('Running warmUpExercise ... \n');
 32 | fprintf('5x5 Identity Matrix: \n');
 33 | warmUpExercise()
 34 | 
 35 | fprintf('Program paused. Press enter to continue.\n');
 36 | pause;
 37 | 
 38 | 
 39 | %% ======================= Part 2: Plotting =======================
 40 | fprintf('Plotting Data ...\n')
 41 | data = load('ex1data1.txt');
 42 | X = data(:, 1); y = data(:, 2);
 43 | m = length(y); % number of training examples
 44 | 
 45 | % Plot Data
 46 | % Note: You have to complete the code in plotData.m
 47 | plot(X, y,'rx','MarkerSize',10);
 48 | ylabel('Profit in $10,000s');
 49 | ylabel('Population of City in 10,000s');
 50 | fprintf('Program paused. Press enter to continue.\n');
 51 | 
 52 | pause;
 53 | 
 54 | %% =================== Part 3: Cost and Gradient descent ===================
 55 | 
 56 | X = [ones(m, 1), data(:,1)]; % Add a column of ones to x
 57 | theta = zeros(2, 1); % initialize fitting parameters
 58 | 
 59 | % Some gradient descent settings
 60 | iterations = 1500;
 61 | alpha = 0.01;
 62 | 
 63 | fprintf('\nTesting the cost function ...\n')
 64 | % compute and display initial cost
 65 | J = computeCost(X, y, theta);
 66 | fprintf('With theta = [0 ; 0]\nCost computed = %f\n', J);
 67 | fprintf('Expected cost value (approx) 32.07\n');
 68 | 
 69 | % further testing of the cost function
 70 | J = computeCost(X, y, [-1 ; 2]);
 71 | fprintf('\nWith theta = [-1 ; 2]\nCost computed = %f\n', J);
 72 | fprintf('Expected cost value (approx) 54.24\n');
 73 | 
 74 | fprintf('Program paused. Press enter to continue.\n');
 75 | pause;
 76 | 
 77 | fprintf('\nRunning Gradient Descent ...\n')
 78 | % run gradient descent
 79 | theta = gradientDescent(X, y, theta, alpha, iterations);
 80 | 
 81 | % print theta to screen
 82 | fprintf('Theta found by gradient descent:\n');
 83 | fprintf('%f\n', theta);
 84 | fprintf('Expected theta values (approx)\n');
 85 | fprintf(' -3.6303\n  1.1664\n\n');
 86 | 
 87 | % Plot the linear fit
 88 | hold on; % keep previous plot visible
 89 | plot(X(:,2), X*theta, '-')
 90 | legend('Training data', 'Linear regression')
 91 | hold off % don't overlay any more plots on this figure
 92 | 
 93 | % Predict values for population sizes of 35,000 and 70,000
 94 | predict1 = [1, 3.5] *theta;
 95 | fprintf('For population = 35,000, we predict a profit of %f\n',...
 96 |     predict1*10000);
 97 | predict2 = [1, 7] * theta;
 98 | fprintf('For population = 70,000, we predict a profit of %f\n',...
 99 |     predict2*10000);
100 | 
101 | fprintf('Program paused. Press enter to continue.\n');
102 | pause;
103 | 
104 | %% ============= Part 4: Visualizing J(theta_0, theta_1) =============
105 | fprintf('Visualizing J(theta_0, theta_1) ...\n')
106 | 
107 | % Grid over which we will calculate J
108 | theta0_vals = linspace(-10, 10, 100);
109 | theta1_vals = linspace(-1, 4, 100);
110 | 
111 | % initialize J_vals to a matrix of 0's
112 | J_vals = zeros(length(theta0_vals), length(theta1_vals));
113 | 
114 | % Fill out J_vals
115 | for i = 1:length(theta0_vals)
116 |     for j = 1:length(theta1_vals)
117 | 	  t = [theta0_vals(i); theta1_vals(j)];
118 | 	  J_vals(i,j) = computeCost(X, y, t);
119 |     end
120 | end
121 | 
122 | 
123 | % Because of the way meshgrids work in the surf command, we need to
124 | % transpose J_vals before calling surf, or else the axes will be flipped
125 | J_vals = J_vals';
126 | % Surface plot
127 | figure;
128 | surf(theta0_vals, theta1_vals, J_vals)
129 | xlabel('\theta_0'); ylabel('\theta_1');
130 | 
131 | % Contour plot
132 | figure;
133 | % Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100
134 | contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20))
135 | xlabel('\theta_0'); ylabel('\theta_1');
136 | hold on;
137 | plot(theta(1), theta(2), 'rx', 'MarkerSize', 10, 'LineWidth', 2);
138 | 


--------------------------------------------------------------------------------
/ex1/ex1_multi.m:
--------------------------------------------------------------------------------
  1 | %% Machine Learning Online Class
  2 | %  Exercise 1: Linear regression with multiple variables
  3 | %
  4 | %  Instructions
  5 | %  ------------
  6 | % 
  7 | %  This file contains code that helps you get started on the
  8 | %  linear regression exercise. 
  9 | %
 10 | %  You will need to complete the following functions in this 
 11 | %  exericse:
 12 | %
 13 | %     warmUpExercise.m
 14 | %     plotData.m
 15 | %     gradientDescent.m
 16 | %     computeCost.m
 17 | %     gradientDescentMulti.m
 18 | %     computeCostMulti.m
 19 | %     featureNormalize.m
 20 | %     normalEqn.m
 21 | %
 22 | %  For this part of the exercise, you will need to change some
 23 | %  parts of the code below for various experiments (e.g., changing
 24 | %  learning rates).
 25 | %
 26 | 
 27 | %% Initialization
 28 | 
 29 | %% ================ Part 1: Feature Normalization ================
 30 | 
 31 | %% Clear and Close Figures
 32 | clear ; close all; clc
 33 | 
 34 | fprintf('Loading data ...\n');
 35 | 
 36 | %% Load Data
 37 | data = load('ex1data2.txt');
 38 | X = data(:, 1:2);
 39 | y = data(:, 3);
 40 | m = length(y);
 41 | 
 42 | % Print out some data points
 43 | fprintf('First 10 examples from the dataset: \n');
 44 | fprintf(' x = [%.0f %.0f], y = %.0f \n', [X(1:10,:) y(1:10,:)]');
 45 | 
 46 | fprintf('Program paused. Press enter to continue.\n');
 47 | pause;
 48 | 
 49 | % Scale features and set them to zero mean
 50 | fprintf('Normalizing Features ...\n');
 51 | 
 52 | [X mu sigma] = featureNormalize(X);
 53 | 
 54 | % Add intercept term to X
 55 | X = [ones(m, 1), X];
 56 | 
 57 | 
 58 | %% ================ Part 2: Gradient Descent ================
 59 | 
 60 | 
 61 | % About step choose: 
 62 | %    if alpha = 0.01, then 500 iters required to converg
 63 | %    if alpha = 0.1, then 150 iters required to converg
 64 | %    if alpha = 1, then 15 iters required to converg    
 65 | 
 66 | fprintf('Running gradient descent ...\n');
 67 | 
 68 | % Choose some alpha value
 69 | alpha = 0.1;
 70 | num_iters = 350;
 71 | 
 72 | % Init Theta and Run Gradient Descent 
 73 | theta = zeros(3, 1);
 74 | [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters);
 75 | 
 76 | % Plot the convergence graph
 77 | figure;
 78 | plot(1:numel(J_history), J_history, '-b', 'LineWidth', 2);
 79 | xlabel('Number of iterations');
 80 | ylabel('Cost J');
 81 | 
 82 | % Display gradient descent's result
 83 | fprintf('Theta computed from gradient descent: \n');
 84 | fprintf(' %f \n', theta);
 85 | fprintf('\n');
 86 | 
 87 | % Estimate the price of a 1650 sq-ft, 3 br house
 88 | % ====================== YOUR CODE HERE ======================
 89 | % Recall that the first column of X is all-ones. Thus, it does
 90 | % not need to be normalized.
 91 | 
 92 | input = [1650, 3];
 93 | 
 94 | input = [1,(input - mu) ./ sigma];
 95 | %(1650 - mu)/sigma, 3];
 96 | %[input miu sig] = featureNormalize(input);
 97 | price = theta' * input'; % You should change this
 98 | 
 99 | 
100 | % ============================================================
101 | 
102 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ...
103 |          '(using gradient descent):\n $%f\n'], price);
104 | 
105 | fprintf('Program paused. Press enter to continue.\n');
106 | pause;
107 | 
108 | %% ================ Part 3: Normal Equations ================
109 | 
110 | fprintf('Solving with normal equations...\n');
111 | 
112 | % ====================== YOUR CODE HERE ======================
113 | % Instructions: The following code computes the closed form 
114 | %               solution for linear regression using the normal
115 | %               equations. You should complete the code in 
116 | %               normalEqn.m
117 | %
118 | %               After doing so, you should complete this code 
119 | %               to predict the price of a 1650 sq-ft, 3 br house.
120 | %
121 | 
122 | %% Load Data
123 | data = csvread('ex1data2.txt');
124 | X = data(:, 1:2);
125 | y = data(:, 3);
126 | m = length(y);
127 | 
128 | % Add intercept term to X
129 | X = [ones(m, 1), X];
130 | 
131 | % Calculate the parameters from the normal equation
132 | theta = normalEqn(X, y);
133 | 
134 | % Display normal equation's result
135 | fprintf('Theta computed from the normal equations: \n');
136 | fprintf(' %f \n', theta);
137 | fprintf('\n');
138 | 
139 | 
140 | % Estimate the price of a 1650 sq-ft, 3 br house
141 | % ====================== YOUR CODE HERE ======================
142 | price = theta' * [1; 1650; 3]; % You should change this
143 | 
144 | 
145 | % ============================================================
146 | 
147 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ...
148 |          '(using normal equations):\n $%f\n'], price);
149 | 
150 | 


--------------------------------------------------------------------------------
/ex1/ex1data1.txt:
--------------------------------------------------------------------------------
 1 | 6.1101,17.592
 2 | 5.5277,9.1302
 3 | 8.5186,13.662
 4 | 7.0032,11.854
 5 | 5.8598,6.8233
 6 | 8.3829,11.886
 7 | 7.4764,4.3483
 8 | 8.5781,12
 9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 | 


--------------------------------------------------------------------------------
/ex1/ex1data2.txt:
--------------------------------------------------------------------------------
 1 | 2104,3,399900
 2 | 1600,3,329900
 3 | 2400,3,369000
 4 | 1416,2,232000
 5 | 3000,4,539900
 6 | 1985,4,299900
 7 | 1534,3,314900
 8 | 1427,3,198999
 9 | 1380,3,212000
10 | 1494,3,242500
11 | 1940,4,239999
12 | 2000,3,347000
13 | 1890,3,329999
14 | 4478,5,699900
15 | 1268,3,259900
16 | 2300,4,449900
17 | 1320,2,299900
18 | 1236,3,199900
19 | 2609,4,499998
20 | 3031,4,599000
21 | 1767,3,252900
22 | 1888,2,255000
23 | 1604,3,242900
24 | 1962,4,259900
25 | 3890,3,573900
26 | 1100,3,249900
27 | 1458,3,464500
28 | 2526,3,469000
29 | 2200,3,475000
30 | 2637,3,299900
31 | 1839,2,349900
32 | 1000,1,169900
33 | 2040,4,314900
34 | 3137,3,579900
35 | 1811,4,285900
36 | 1437,3,249900
37 | 1239,3,229900
38 | 2132,4,345000
39 | 4215,4,549000
40 | 2162,4,287000
41 | 1664,2,368500
42 | 2238,3,329900
43 | 2567,4,314000
44 | 1200,3,299000
45 | 852,2,179900
46 | 1852,4,299900
47 | 1203,3,239500
48 | 


--------------------------------------------------------------------------------
/ex1/featureNormalize.m:
--------------------------------------------------------------------------------
 1 | function [X_norm, mu, sigma] = featureNormalize(X)
 2 | %FEATURENORMALIZE Normalizes the features in X 
 3 | %   FEATURENORMALIZE(X) returns a normalized version of X where
 4 | %   the mean value of each feature is 0 and the standard deviation
 5 | %   is 1. This is often a good preprocessing step to do when
 6 | %   working with learning algorithms.
 7 | 
 8 | % You need to set these values correctly
 9 | 
10 | n = length(X(1,:));
11 | mu = zeros(1, size(X, 2));
12 | sigma = zeros(1, size(X, 2));
13 | 
14 | for i=1:n
15 |     tmp = std(X(:,i));
16 |     miu = mean(X(:,i));
17 |     X(:,i) = (X(:,i) - miu) / tmp;
18 |     
19 |     mmu(1,i) = miu;
20 |     ssigma(1, i) = tmp;
21 | end
22 | 
23 | X_norm = X;
24 | mu = mmu;
25 | sigma = ssigma;
26 | 
27 | % ====================== YOUR CODE HERE ======================
28 | % Instructions: First, for each feature dimension, compute the mean
29 | %               of the feature and subtract it from the dataset,
30 | %               storing the mean value in mu. Next, compute the 
31 | %               standard deviation of each feature and divide
32 | %               each feature by it's standard deviation, storing
33 | %               the standard deviation in sigma. 
34 | %
35 | %               Note that X is a matrix where each column is a 
36 | %               feature and each row is an example. You need 
37 | %               to perform the normalization separately for 
38 | %               each feature. 
39 | %
40 | % Hint: You might find the 'mean' and 'std' functions useful.
41 | %       
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | % ============================================================
52 | 
53 | end
54 | 


--------------------------------------------------------------------------------
/ex1/gradientDescent.m:
--------------------------------------------------------------------------------
 1 | function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
 2 | %GRADIENTDESCENT Performs gradient descent to learn theta
 3 | % theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by 
 4 | % taking num_iters gradient steps with learning rate alpha
 5 | % Initialize some useful values
 6 | 
 7 | m = length(y); % number of training examples
 8 | J_history = zeros(num_iters, 1);
 9 | 
10 | for iter = 1:num_iters
11 | 
12 |     % ====================== CODE HERE ======================
13 |     %Notice it has been updated simultaneously otherwise the value
14 |     %will has little disparity in theta around [0.006,0.0006]
15 |     tmp = theta;
16 |     for i = 1:2
17 |         k = 1;
18 |         sum = 0;
19 |       
20 |         while( k <= m )
21 |             sum = sum + ((theta)' * (X(k,:))' - y(k)) * X(k, i);
22 |             k = k + 1;
23 |         end
24 |         tmp(i) = tmp(i) - alpha * sum / m;
25 |     end
26 |     
27 |     theta = tmp;
28 | 
29 |     % ============================================================
30 | 
31 |     % Save the cost J in every iteration    
32 |     J_history(iter) = computeCost(X, y, theta);
33 |     fprintf('---%d--- \r\n', J_history(iter));
34 |     %make sure cost function J always goes down
35 | 
36 | end
37 | 
38 | end
39 | 


--------------------------------------------------------------------------------
/ex1/gradientDescentMulti.m:
--------------------------------------------------------------------------------
 1 | function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters)
 2 | %GRADIENTDESCENTMULTI Performs gradient descent to learn theta
 3 | %   theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by
 4 | %   taking num_iters gradient steps with learning rate alpha
 5 | 
 6 | % Initialize some useful values
 7 | m = length(y); % number of training examples
 8 | J_history = zeros(num_iters, 1);
 9 | 
10 | for iter = 1:num_iters
11 | 
12 |     % ====================== CODE HERE ======================
13 |     %
14 |     tmp = theta;
15 |     feature_dim = length(X(1,:));
16 |     for i = 1: feature_dim
17 |         k = 1;
18 |         sum = 0;
19 |       
20 |         while( k <= m )
21 |             sum = sum + ((theta)' * (X(k,:))' - y(k)) * X(k, i);
22 |             k = k + 1;
23 |         end
24 |         tmp(i) = tmp(i) - alpha * sum / m;
25 |     end
26 |     
27 |     theta = tmp;
28 | 
29 |     % ============================================================
30 | 
31 |     % Save the cost J in every iteration    
32 |     J_history(iter) = computeCostMulti(X, y, theta);
33 |     fprintf('---%d--- \r\n', J_history(iter));
34 |     %make sure cost function J always goes down
35 | 
36 | end
37 | 
38 | end
39 | 


--------------------------------------------------------------------------------
/ex1/normalEqn.m:
--------------------------------------------------------------------------------
 1 | function [theta] = normalEqn(X, y)
 2 | %NORMALEQN Computes the closed-form solution to linear regression 
 3 | %   NORMALEQN(X,y) computes the closed-form solution to linear 
 4 | %   regression using the normal equations.
 5 | 
 6 | theta = zeros(size(X, 2), 1);
 7 | 
 8 | % ====================== YOUR CODE HERE ======================
 9 | 
10 | %compra to Gradient Descent, here only 1 line code! what the f..
11 | solution = pinv((X'* X)) * X'* y;
12 | 
13 | % -------------------------------------------------------------
14 | 
15 | theta = solution
16 | % ============================================================
17 | 
18 | end
19 | 


--------------------------------------------------------------------------------
/ex1/plotData.m:
--------------------------------------------------------------------------------
 1 | function plotData(x, y)
 2 | %PLOTDATA Plots the data points x and y into a new figure 
 3 | %   PLOTDATA(x,y) plots the data points and gives the figure axes labels of
 4 | %   population and profit.
 5 | 
 6 | figure; % open a new figure window
 7 | 
 8 | % ====================== YOUR CODE HERE ======================
 9 | % Instructions: Plot the training data into a figure using the 
10 | %               "figure" and "plot" commands. Set the axes labels using
11 | %               the "xlabel" and "ylabel" commands. Assume the 
12 | %               population and revenue data have been passed in
13 | %               as the x and y arguments of this function.
14 | %
15 | % Hint: You can use the 'rx' option with plot to have the markers
16 | %       appear as red crosses. Furthermore, you can make the
17 | %       markers larger by using plot(..., 'rx', 'MarkerSize', 10);
18 | 
19 | 
20 | 
21 | 
22 | 
23 | % ============================================================
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/ex1/warmUpExercise.m:
--------------------------------------------------------------------------------
 1 | function A = warmUpExercise()
 2 | %WARMUPEXERCISE Example function in octave
 3 | %   A = WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix
 4 | 
 5 | A = eye(5);
 6 | % ============= YOUR CODE HERE ==============
 7 | % Instructions: Return the 5x5 identity matrix 
 8 | %               In octave, we return values by defining which variables
 9 | %               represent the return values (at the top of the file)
10 | %               and then set them accordingly. 
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 
18 | % ===========================================
19 | 
20 | 
21 | end
22 | 


--------------------------------------------------------------------------------
/ex2/costFunction.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = costFunction(theta, X, y)
 2 | %COSTFUNCTION Compute cost and gradient for logistic regression
 3 | %   J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the
 4 | %   parameter for logistic regression and the gradient of the cost
 5 | %   w.r.t. to the parameters.
 6 | 
 7 | % Initialize some useful values
 8 | m = length(y); % number of training examples
 9 | 
10 | % You need to return the following variables correctly 
11 | J = 0;
12 | grad = zeros(size(theta));
13 | 
14 | [row, col] = size(X);
15 | 
16 | for i = 1:row
17 |     cost = (-y(i)) * log(sigmoid(theta' * X(i,:)')) ...
18 |         -(1 - y(i)) * log(1 - sigmoid(theta' * X(i,:)'));
19 |     J = J + cost;
20 | end
21 | 
22 | J = 1 / m * J;
23 | 
24 | 
25 | % for j = 1: length(theta)
26 | %     tmp = (sigmoid(X(:,j) * theta(j,:))' - y') * X(:,j);
27 | %     grad(j) = 1 / m * tmp;
28 | % end
29 | 
30 | sum = zeros(col,1);
31 | 
32 | for j = 1: col
33 |     for i = 1: m
34 |         tmp = (sigmoid(X(i,:) * theta) - y(i)) * X(i,j);
35 |         sum(j) = sum(j) +  tmp;
36 | 
37 | grad = (1 / m) * sum;
38 |     end
39 | % =============================================================
40 | 
41 | end
42 | 


--------------------------------------------------------------------------------
/ex2/costFunctionReg.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = costFunctionReg(theta, X, y, lambda)
 2 | %COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
 3 | %   J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
 4 | %   theta as the parameter for regularized logistic regression and the
 5 | %   gradient of the cost w.r.t. to the parameters. 
 6 | 
 7 | % Initialize some useful values
 8 | m = length(y); % number of training examples
 9 | 
10 | % You need to return the following variables correctly 
11 | J = 0;
12 | grad = zeros(size(theta));
13 | [row, col] = size(X);
14 | 
15 | for i = 1:row
16 |     cost = (-y(i)) * log(sigmoid(theta' * X(i,:)')) ...
17 |         -(1 - y(i)) * log(1 - sigmoid(theta' * X(i,:)'));
18 |     %penal =  ((norm(theta))^2);
19 |     J = J + cost;
20 | end
21 | thet = theta(2:col,1);
22 | 
23 | penal =  lambda / 2 * (norm(thet))^2;
24 | 
25 | J = 1 / m * (J + penal);
26 | 
27 | 
28 | % for j = 1: length(theta)
29 | %     tmp = (sigmoid(X(:,j) * theta(j,:))' - y') * X(:,j);
30 | %     grad(j) = 1 / m * tmp;
31 | % end
32 | 
33 | sums = zeros(col,1);
34 | 
35 | for j = 1: col
36 |     if(j == 1)
37 |         for i = 1: m
38 |             tmp = (sigmoid(X(i,:) * theta) - y(i)) * X(i,j);
39 |             sums(j) = sums(j) +  tmp;
40 |         end
41 |     else
42 |         for i = 1: m
43 |             tmp = (sigmoid(X(i,:) * theta) - y(i)) * X(i,j);
44 |             
45 |             sums(j) = sums(j) +  tmp;
46 |         end
47 |         penal = lambda * theta(j);
48 |         sums(j) = sums(j) + penal;
49 |     
50 |     end
51 | 
52 | grad = (1 / m) * sums;
53 |     
54 | end
55 | 
56 | 
57 | end
58 | 


--------------------------------------------------------------------------------
/ex2/ex2.m:
--------------------------------------------------------------------------------
  1 |  For this exercise, you will not need to change any code in this file,
  2 | %  or any other files other than those mentioned above.
  3 | %
  4 | 
  5 | %% Initialization
  6 | clear ; close all; clc
  7 | 
  8 | %% Load Data
  9 | %  The first two columns contains the exam scores and the third column
 10 | %  contains the label.
 11 | 
 12 | data = load('ex2data1.txt');
 13 | X = data(:, [1, 2]); y = data(:, 3);
 14 | 
 15 | %% ==================== Part 1: Plotting ====================
 16 | %  We start the exercise by first plotting the data to understand the 
 17 | %  the problem we are working with.
 18 | 
 19 | fprintf(['Plotting data with + indicating (y = 1) examples and o ' ...
 20 |          'indicating (y = 0) examples.\n']);
 21 | 
 22 | plotData(X, y);
 23 | 
 24 | % Put some labels 
 25 | hold on;
 26 | % Labels and Legend
 27 | xlabel('Exam 1 score')
 28 | ylabel('Exam 2 score')
 29 | 
 30 | % Specified in plot order
 31 | legend('Admitted', 'Not admitted')
 32 | hold off;
 33 | 
 34 | fprintf('\nProgram paused. Press enter to continue.\n');
 35 | pause;
 36 | 
 37 | 
 38 | %% ============ Part 2: Compute Cost and Gradient ============
 39 | %  In this part of the exercise, you will implement the cost and gradient
 40 | %  for logistic regression. You neeed to complete the code in 
 41 | %  costFunction.m
 42 | 
 43 | %  Setup the data matrix appropriately, and add ones for the intercept term
 44 | [m, n] = size(X);
 45 | 
 46 | % Add intercept term to x and X_test
 47 | X = [ones(m, 1) X];
 48 | 
 49 | % Initialize fitting parameters
 50 | initial_theta = zeros(n + 1, 1);
 51 | 
 52 | % Compute and display initial cost and gradient
 53 | [cost, grad] = costFunction(initial_theta, X, y);
 54 | 
 55 | fprintf('Cost at initial theta (zeros): %f\n', cost);
 56 | fprintf('Expected cost (approx): 0.693\n');
 57 | fprintf('Gradient at initial theta (zeros): \n');
 58 | fprintf(' %f \n', grad);
 59 | fprintf('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n');
 60 | 
 61 | % Compute and display cost and gradient with non-zero theta
 62 | test_theta = [-24; 0.2; 0.2];
 63 | [cost, grad] = costFunction(test_theta, X, y);
 64 | 
 65 | fprintf('\nCost at test theta: %f\n', cost);
 66 | fprintf('Expected cost (approx): 0.218\n');
 67 | fprintf('Gradient at test theta: \n');
 68 | fprintf(' %f \n', grad);
 69 | fprintf('Expected gradients (approx):\n 0.043\n 2.566\n 2.647\n');
 70 | 
 71 | fprintf('\nProgram paused. Press enter to continue.\n');
 72 | pause;
 73 | 
 74 | 
 75 | %% ============= Part 3: Optimizing using fminunc  =============
 76 | %  In this exercise, you will use a built-in function (fminunc) to find the
 77 | %  optimal parameters theta.
 78 | 
 79 | %  Set options for fminunc
 80 | options = optimset('GradObj', 'on', 'MaxIter', 400);
 81 | 
 82 | %  Run fminunc to obtain the optimal theta
 83 | %  This function will return theta and the cost 
 84 | [theta, cost] = ...
 85 | 	fminunc(@(t)(costFunction(t, X, y)), initial_theta, options);
 86 | 
 87 | % Print theta to screen
 88 | fprintf('Cost at theta found by fminunc: %f\n', cost);
 89 | fprintf('Expected cost (approx): 0.203\n');
 90 | fprintf('theta: \n');
 91 | fprintf(' %f \n', theta);
 92 | fprintf('Expected theta (approx):\n');
 93 | fprintf(' -25.161\n 0.206\n 0.201\n');
 94 | 
 95 | % Plot Boundary
 96 | plotDecisionBoundary(theta, X, y);
 97 | 
 98 | % Put some labels 
 99 | hold on;
100 | % Labels and Legend
101 | xlabel('Exam 1 score')
102 | ylabel('Exam 2 score')
103 | 
104 | % Specified in plot order
105 | legend('Admitted', 'Not admitted')
106 | hold off;
107 | 
108 | fprintf('\nProgram paused. Press enter to continue.\n');
109 | pause;
110 | 
111 | %% ============== Part 4: Predict and Accuracies ==============
112 | %  After learning the parameters, you'll like to use it to predict the outcomes
113 | %  on unseen data. In this part, you will use the logistic regression model
114 | %  to predict the probability that a student with score 45 on exam 1 and 
115 | %  score 85 on exam 2 will be admitted.
116 | %
117 | %  Furthermore, you will compute the training and test set accuracies of 
118 | %  our model.
119 | %
120 | %  Your task is to complete the code in predict.m
121 | 
122 | %  Predict probability for a student with score 45 on exam 1 
123 | %  and score 85 on exam 2 
124 | 
125 | prob = sigmoid([1 45 85] * theta);
126 | fprintf(['For a student with scores 45 and 85, we predict an admission ' ...
127 |          'probability of %f\n'], prob);
128 | fprintf('Expected value: 0.775 +/- 0.002\n\n');
129 | 
130 | % Compute accuracy on our training set
131 | p = predict(theta, X);
132 | 
133 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100);
134 | fprintf('Expected accuracy (approx): 89.0\n');
135 | fprintf('\n');
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/ex2/ex2_guide.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Y1ran/Machine-Learning-Coursera-code/6c3cb995660cc27fa471089739570569755ab1d9/ex2/ex2_guide.pdf


--------------------------------------------------------------------------------
/ex2/ex2_reg.m:
--------------------------------------------------------------------------------
  1 | For this exercise, you will not need to change any code in this file,
  2 | %  or any other files other than those mentioned above.
  3 | %
  4 | 
  5 | %% Initialization
  6 | clear ; close all; clc
  7 | 
  8 | %% Load Data
  9 | %  The first two columns contains the X values and the third column
 10 | %  contains the label (y).
 11 | 
 12 | data = load('ex2data2.txt');
 13 | X = data(:, [1, 2]); y = data(:, 3);
 14 | 
 15 | plotData(X, y);
 16 | 
 17 | % Put some labels
 18 | hold on;
 19 | 
 20 | % Labels and Legend
 21 | xlabel('Microchip Test 1')
 22 | ylabel('Microchip Test 2')
 23 | 
 24 | % Specified in plot order
 25 | legend('y = 1', 'y = 0')
 26 | hold off;
 27 | 
 28 | 
 29 | %% =========== Part 1: Regularized Logistic Regression ============
 30 | %  In this part, you are given a dataset with data points that are not
 31 | %  linearly separable. However, you would still like to use logistic
 32 | %  regression to classify the data points.
 33 | %
 34 | %  To do so, you introduce more features to use -- in particular, you add
 35 | %  polynomial features to our data matrix (similar to polynomial
 36 | %  regression).
 37 | %
 38 | 
 39 | % Add Polynomial Features
 40 | 
 41 | % Note that mapFeature also adds a column of ones for us, so the intercept
 42 | % term is handled
 43 | X = mapFeature(X(:,1), X(:,2));
 44 | 
 45 | % Initialize fitting parameters
 46 | initial_theta = zeros(size(X, 2), 1);
 47 | 
 48 | % Set regularization parameter lambda to 1
 49 | lambda = 1;
 50 | 
 51 | % Compute and display initial cost and gradient for regularized logistic
 52 | % regression
 53 | [cost, grad] = costFunctionReg(initial_theta, X, y, lambda);
 54 | 
 55 | fprintf('Cost at initial theta (zeros): %f\n', cost);
 56 | fprintf('Expected cost (approx): 0.693\n');
 57 | fprintf('Gradient at initial theta (zeros) - first five values only:\n');
 58 | fprintf(' %f \n', grad(1:5));
 59 | fprintf('Expected gradients (approx) - first five values only:\n');
 60 | fprintf(' 0.0085\n 0.0188\n 0.0001\n 0.0503\n 0.0115\n');
 61 | 
 62 | fprintf('\nProgram paused. Press enter to continue.\n');
 63 | pause;
 64 | 
 65 | % Compute and display cost and gradient
 66 | % with all-ones theta and lambda = 10
 67 | test_theta = ones(size(X,2),1);
 68 | [cost, grad] = costFunctionReg(test_theta, X, y, 10);
 69 | 
 70 | fprintf('\nCost at test theta (with lambda = 10): %f\n', cost);
 71 | fprintf('Expected cost (approx): 3.16\n');
 72 | fprintf('Gradient at test theta - first five values only:\n');
 73 | fprintf(' %f \n', grad(1:5));
 74 | fprintf('Expected gradients (approx) - first five values only:\n');
 75 | fprintf(' 0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922\n');
 76 | 
 77 | fprintf('\nProgram paused. Press enter to continue.\n');
 78 | pause;
 79 | 
 80 | %% ============= Part 2: Regularization and Accuracies =============
 81 | %  Optional Exercise:
 82 | %  In this part, you will get to try different values of lambda and
 83 | %  see how regularization affects the decision coundart
 84 | %
 85 | %  Try the following values of lambda (0, 1, 10, 100).
 86 | %
 87 | %  How does the decision boundary change when you vary lambda? How does
 88 | %  the training set accuracy vary?
 89 | %
 90 | 
 91 | % Initialize fitting parameters
 92 | initial_theta = zeros(size(X, 2), 1);
 93 | 
 94 | % Set regularization parameter lambda to 1 (you should vary this)
 95 | lambda = 0.0558;
 96 | 
 97 | % Set Options
 98 | options = optimset('GradObj', 'on', 'MaxIter', 400);
 99 | 
100 | % Optimize
101 | [theta, J, exit_flag] = ...
102 | 	fminunc(@(t)(costFunctionReg(t, X, y, lambda)), initial_theta, options);
103 | 
104 | % Plot Boundary
105 | plotDecisionBoundary(theta, X, y);
106 | hold on;
107 | title(sprintf('lambda = %g', lambda))
108 | 
109 | % Labels and Legend
110 | xlabel('Microchip Test 1')
111 | ylabel('Microchip Test 2')
112 | 
113 | legend('y = 1', 'y = 0', 'Decision boundary')
114 | hold off;
115 | 
116 | % Compute accuracy on our training set
117 | p = predict(theta, X);
118 | 
119 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100);
120 | fprintf('Expected accuracy (with lambda = 1): 83.1 (approx)\n');
121 | 
122 | 


--------------------------------------------------------------------------------
/ex2/mapFeature.m:
--------------------------------------------------------------------------------
 1 | function out = mapFeature(X1, X2)
 2 | % MAPFEATURE Feature mapping function to polynomial features
 3 | %
 4 | %   MAPFEATURE(X1, X2) maps the two input features
 5 | %   to quadratic features used in the regularization exercise.
 6 | %
 7 | %   Returns a new feature array with more features, comprising of 
 8 | %   X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc..
 9 | %
10 | %   Inputs X1, X2 must be the same size
11 | %
12 | 
13 | degree = 6;
14 | out = ones(size(X1(:,1)));
15 | for i = 1:degree
16 |     for j = 0:i
17 |         out(:, end+1) = (X1.^(i-j)).*(X2.^j);
18 |     end
19 | end
20 | 
21 | end


--------------------------------------------------------------------------------
/ex2/plotData.m:
--------------------------------------------------------------------------------
 1 | function plotData(X, y)
 2 | %PLOTDATA Plots the data points X and y into a new figure 
 3 | %   PLOTDATA(x,y) plots the data points with + for the positive examples
 4 | %   and o for the negative examples. X is assumed to be a Mx2 matrix.
 5 | 
 6 | % Create New Figure
 7 | figure; hold on;
 8 | 
 9 | % ====================== YOUR CODE HERE ======================
10 | % Instructions: Plot the positive and negative examples on a
11 | %               2D plot, using the option 'k+' for the positive
12 | %               examples and 'ko' for the negative examples.
13 | %
14 | 
15 | 
16 | pos = find(y == 1);
17 | neg = find(y == 0);
18 | 
19 | plot(X(pos, 1), X(pos, 2), 'k+', 'LineWidth', ...
20 |     2, 'MarkerSize', 7);
21 | 
22 | plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', ...
23 |     'y', 'MarkerSize',7);
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | % =========================================================================
32 | 
33 | 
34 | 
35 | hold off;
36 | 
37 | end
38 | 


--------------------------------------------------------------------------------
/ex2/plotDecisionBoundary.m:
--------------------------------------------------------------------------------
 1 | function plotDecisionBoundary(theta, X, y)
 2 | %PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with
 3 | %the decision boundary defined by theta
 4 | %   PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the 
 5 | %   positive examples and o for the negative examples. X is assumed to be 
 6 | %   a either 
 7 | %   1) Mx3 matrix, where the first column is an all-ones column for the 
 8 | %      intercept.
 9 | %   2) MxN, N>3 matrix, where the first column is all-ones
10 | 
11 | % Plot Data
12 | plotData(X(:,2:3), y);
13 | hold on
14 | 
15 | if size(X, 2) <= 3
16 |     % Only need 2 points to define a line, so choose two endpoints
17 |     plot_x = [min(X(:,2))-2,  max(X(:,2))+2];
18 | 
19 |     % Calculate the decision boundary line
20 |     plot_y = (-1./theta(3)).*(theta(2).*plot_x + theta(1));
21 | 
22 |     % Plot, and adjust axes for better viewing
23 |     plot(plot_x, plot_y)
24 |     
25 |     % Legend, specific for the exercise
26 |     legend('Admitted', 'Not admitted', 'Decision Boundary')
27 |     axis([30, 100, 30, 100])
28 | else
29 |     % Here is the grid range
30 |     u = linspace(-1, 1.5, 50);
31 |     v = linspace(-1, 1.5, 50);
32 | 
33 |     z = zeros(length(u), length(v));
34 |     % Evaluate z = theta*x over the grid
35 |     for i = 1:length(u)
36 |         for j = 1:length(v)
37 |             z(i,j) = mapFeature(u(i), v(j))*theta;
38 |         end
39 |     end
40 |     z = z'; % important to transpose z before calling contour
41 | 
42 |     % Plot z = 0
43 |     % Notice you need to specify the range [0, 0]
44 |     contour(u, v, z, [0, 0], 'LineWidth', 2)
45 | end
46 | hold off
47 | 
48 | end
49 | 


--------------------------------------------------------------------------------
/ex2/predict.m:
--------------------------------------------------------------------------------
 1 | function p = predict(theta, X)
 2 | %PREDICT Predict whether the label is 0 or 1 using learned logistic 
 3 | %regression parameters theta
 4 | %   p = PREDICT(theta, X) computes the predictions for X using a 
 5 | %   threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1)
 6 | 
 7 | m = size(X, 1); % Number of training examples
 8 | 
 9 | % You need to return the following variables correctly
10 | p = zeros(m, 1);
11 | 
12 | for i = 1:m
13 |     if(sigmoid(theta' * X(i,:)')) >= 0.5
14 |         p(i) = 1;
15 |     else
16 |         p(i) = 0;
17 |     end
18 | end
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | % =========================================================================
28 | 
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/ex2/sigmoid.m:
--------------------------------------------------------------------------------
 1 | function g = sigmoid(z)
 2 | %SIGMOID Compute sigmoid function
 3 | %   g = SIGMOID(z) computes the sigmoid of z.
 4 | 
 5 | % You need to return the following variables correctly 
 6 | g = zeros(size(z));
 7 | 
 8 | % ====================== YOUR CODE HERE ======================
 9 | % Instructions: Compute the sigmoid of each value of z (z can be a matrix,
10 | %               vector or scalar).
11 | 
12 | if(length(g) == 1)
13 |     g = 1 / ( 1 + exp(-z));
14 | else
15 |     [col, row] = size(z);
16 |     g = 1 ./ (1 + exp(- z));
17 | 
18 |     
19 | 
20 | 
21 | 
22 | % =============================================================
23 | 
24 | end


--------------------------------------------------------------------------------
/ex3/displayData.m:
--------------------------------------------------------------------------------
 1 | function [h, display_array] = displayData(X, example_width)
 2 | %DISPLAYDATA Display 2D data in a nice grid
 3 | %   [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
 4 | %   stored in X in a nice grid. It returns the figure handle h and the 
 5 | %   displayed array if requested.
 6 | 
 7 | % Set example_width automatically if not passed in
 8 | if ~exist('example_width', 'var') || isempty(example_width) 
 9 | 	example_width = round(sqrt(size(X, 2)));
10 | end
11 | 
12 | % Gray Image
13 | colormap(gray);
14 | 
15 | % Compute rows, cols
16 | [m n] = size(X);
17 | example_height = (n / example_width);
18 | 
19 | % Compute number of items to display
20 | display_rows = floor(sqrt(m));
21 | display_cols = ceil(m / display_rows);
22 | 
23 | % Between images padding
24 | pad = 1;
25 | 
26 | % Setup blank display
27 | display_array = - ones(pad + display_rows * (example_height + pad), ...
28 |                        pad + display_cols * (example_width + pad));
29 | 
30 | % Copy each example into a patch on the display array
31 | curr_ex = 1;
32 | for j = 1:display_rows
33 | 	for i = 1:display_cols
34 | 		if curr_ex > m, 
35 | 			break; 
36 | 		end
37 | 		% Copy the patch
38 | 		
39 | 		% Get the max value of the patch
40 | 		max_val = max(abs(X(curr_ex, :)));
41 | 		display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
42 | 		              pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
43 | 						reshape(X(curr_ex, :), example_height, example_width) / max_val;
44 | 		curr_ex = curr_ex + 1;
45 | 	end
46 | 	if curr_ex > m, 
47 | 		break; 
48 | 	end
49 | end
50 | 
51 | % Display Image
52 | h = imagesc(display_array, [-1 1]);
53 | 
54 | % Do not show axis
55 | axis image off
56 | 
57 | drawnow;
58 | 
59 | end
60 | 


--------------------------------------------------------------------------------
/ex3/ex3.m:
--------------------------------------------------------------------------------
 1 | %% Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all
 2 | 
 3 | %  Instructions
 4 | %  ------------
 5 | %
 6 | %  This file contains code that helps you get started on the
 7 | %  linear exercise. You will need to complete the following functions
 8 | %  in this exericse:
 9 | %
10 | %     lrCostFunction.m (logistic regression cost function)
11 | %     oneVsAll.m
12 | %     predictOneVsAll.m
13 | %     predict.m
14 | %
15 | %  For this exercise, you will not need to change any code in this file,
16 | %  or any other files other than those mentioned above.
17 | %
18 | 
19 | %% Initialization
20 | clear ; close all; clc
21 | 
22 | %% Setup the parameters you will use for this part of the exercise
23 | input_layer_size  = 400;  % 20x20 Input Images of Digits
24 | num_labels = 10;          % 10 labels, from 1 to 10
25 |                           % (note that we have mapped "0" to label 10)
26 | 
27 | %% =========== Part 1: Loading and Visualizing Data =============
28 | %  We start the exercise by first loading and visualizing the dataset.
29 | %  You will be working with a dataset that contains handwritten digits.
30 | %
31 | 
32 | % Load Training Data
33 | fprintf('Loading and Visualizing Data ...\n')
34 | 
35 | load('ex3data1.mat'); % training data stored in arrays X, y
36 | m = size(X, 1);
37 | 
38 | % Randomly select 100 data points to display
39 | rand_indices = randperm(m);
40 | sel = X(rand_indices(1:100), :);
41 | 
42 | displayData(sel);
43 | 
44 | fprintf('Program paused. Press enter to continue.\n');
45 | pause;
46 | 
47 | %% ============ Part 2a: Vectorize Logistic Regression ============
48 | %  In this part of the exercise, you will reuse your logistic regression
49 | %  code from the last exercise. You task here is to make sure that your
50 | %  regularized logistic regression implementation is vectorized. After
51 | %  that, you will implement one-vs-all classification for the handwritten
52 | %  digit dataset.
53 | %
54 | 
55 | % Test case for lrCostFunction
56 | fprintf('\nTesting lrCostFunction() with regularization');
57 | 
58 | theta_t = [-2; -1; 1; 2];
59 | X_t = [ones(5,1) reshape(1:15,5,3)/10];
60 | y_t = ([1;0;1;0;1] >= 0.5);
61 | lambda_t = 3;
62 | [J grad] = lrCostFunction(theta_t, X_t, y_t, lambda_t);
63 | 
64 | fprintf('\nCost: %f\n', J);
65 | fprintf('Expected cost: 2.534819\n');
66 | fprintf('Gradients:\n');
67 | fprintf(' %f \n', grad);
68 | fprintf('Expected gradients:\n');
69 | fprintf(' 0.146561\n -0.548558\n 0.724722\n 1.398003\n');
70 | 
71 | fprintf('Program paused. Press enter to continue.\n');
72 | pause;
73 | %% ============ Part 2b: One-vs-All Training ============
74 | fprintf('\nTraining One-vs-All Logistic Regression...\n')
75 | 
76 | lambda = 0.1;
77 | [all_theta] = oneVsAll(X, y, num_labels, lambda);
78 | fprintf('Program paused. Press enter to continue.\n');
79 | pause;
80 | 
81 | 
82 | %% ================ Part 3: Predict for One-Vs-All ================
83 | 
84 | pred = predictOneVsAll(all_theta, X);
85 | 
86 | y((y==10)) = 0;
87 | 
88 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100);
89 | 
90 | 


--------------------------------------------------------------------------------
/ex3/ex3_nn.m:
--------------------------------------------------------------------------------
 1 | %% Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks
 2 | 
 3 | %  Instructions
 4 | %  ------------
 5 | % 
 6 | %  This file contains code that helps you get started on the
 7 | %  linear exercise. You will need to complete the following functions 
 8 | %  in this exericse:
 9 | %
10 | %     lrCostFunction.m (logistic regression cost function)
11 | %     oneVsAll.m
12 | %     predictOneVsAll.m
13 | %     predict.m
14 | %
15 | %  For this exercise, you will not need to change any code in this file,
16 | %  or any other files other than those mentioned above.
17 | %
18 | 
19 | %% Initialization
20 | clear ; close all; clc
21 | 
22 | %% Setup the parameters you will use for this exercise
23 | input_layer_size  = 400;  % 20x20 Input Images of Digits
24 | hidden_layer_size = 25;   % 25 hidden units
25 | num_labels = 10;          % 10 labels, from 1 to 10   
26 |                           % (note that we have mapped "0" to label 10)
27 | 
28 | %% =========== Part 1: Loading and Visualizing Data =============
29 | %  We start the exercise by first loading and visualizing the dataset. 
30 | %  You will be working with a dataset that contains handwritten digits.
31 | %
32 | 
33 | % Load Training Data
34 | fprintf('Loading and Visualizing Data ...\n')
35 | 
36 | load('ex3data1.mat');
37 | m = size(X, 1);
38 | 
39 | % Randomly select 100 data points to display
40 | sel = randperm(size(X, 1));
41 | sel = sel(1:100);
42 | 
43 | displayData(X(sel, :));
44 | 
45 | fprintf('Program paused. Press enter to continue.\n');
46 | pause;
47 | 
48 | %% ================ Part 2: Loading Pameters ================
49 | % In this part of the exercise, we load some pre-initialized 
50 | % neural network parameters.
51 | 
52 | fprintf('\nLoading Saved Neural Network Parameters ...\n')
53 | 
54 | % Load the weights into variables Theta1 and Theta2
55 | load('ex3weights.mat');
56 | 
57 | %% ================= Part 3: Implement Predict =================
58 | %  After training the neural network, we would like to use it to predict
59 | %  the labels. You will now implement the "predict" function to use the
60 | %  neural network to predict the labels of the training set. This lets
61 | %  you compute the training set accuracy.
62 | 
63 | pred = predict(Theta1, Theta2, X);
64 | y(y == 10) = 0;
65 | 
66 | 
67 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100);
68 | 
69 | fprintf('Program paused. Press enter to continue.\n');
70 | pause;
71 | 
72 | %  To give you an idea of the network's output, you can also run
73 | %  through the examples one at the a time to see what it is predicting.
74 | 
75 | %  Randomly permute examples
76 | rp = randperm(m);
77 | 
78 | for i = 1:m
79 |     % Display 
80 |     fprintf('\nDisplaying Example Image\n');
81 |     displayData(X(rp(i), :));
82 | 
83 |     pred = predict(Theta1, Theta2, X(rp(i),:));
84 |     fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10));
85 |     
86 |     % Pause with quit option
87 |     s = input('Paused - press enter to continue, q to exit:','s');
88 |     if s == 'q'
89 |       break
90 |     end
91 | end
92 | 
93 | 


--------------------------------------------------------------------------------
/ex3/lrCostFunction.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = lrCostFunction(theta, X, y, lambda)
 2 | %LRCOSTFUNCTION Compute cost and gradient for logistic regression with 
 3 | %regularization
 4 | %   J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
 5 | %   theta as the parameter for regularized logistic regression and the
 6 | %   gradient of the cost w.r.t. to the parameters. 
 7 | 
 8 | % Initialize some useful values
 9 | m = length(y); % number of training examples
10 | 
11 | % You need to return the following variables correctly 
12 | J = 0;
13 | grad = zeros(size(theta));
14 | 
15 | % ====================== YOUR CODE HERE ======================
16 | % Hint: When computing the gradient of the regularized cost function, 
17 | %       there're many possible vectorized solutions, but one solution
18 | %       looks like:
19 | %           grad = (unregularized gradient for logistic regression)
20 | %           temp = theta; 
21 | %           temp(1) = 0;   % because we don't add anything for j = 0  
22 | %           grad = grad + YOUR_CODE_HERE (using the temp variable)
23 | 
24 | 
25 | [row, col] = size(X);
26 | 
27 | 
28 | cost = (-1 .* y)' * log(sigmoid( X * theta)) ...
29 |        -(ones(row,1) - y)' * log(ones(row,1) - sigmoid(X * theta));
30 | %penal =  ((norm(theta))^2);
31 | %J = J + cost;
32 | 
33 | thet = theta(2:col,1);
34 | 
35 | penal =  lambda / 2 * ((norm(thet))^2);
36 | 
37 | J = 1 / m * (cost + penal);
38 | 
39 | 
40 | 
41 | tmp = X' * (sigmoid(X* theta) - y);
42 | %grad(j) = 1 / m * tmp;
43 | %sums = zeros(col,1);
44 | 
45 | theta(1,1) = 0; 
46 | pena = lambda .* theta;
47 | grad = tmp + pena;
48 | 
49 | 
50 | 
51 | grad = (1 / m) .* grad(:);
52 |     
53 | end
54 | 
55 | 


--------------------------------------------------------------------------------
/ex3/oneVsAll.m:
--------------------------------------------------------------------------------
 1 | function [all_theta] = oneVsAll(X, y, num_labels, lambda)
 2 | 
 3 | %   [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels
 4 | %   logistic regression classifiers and returns each of these classifiers
 5 | %   in a matrix all_theta, where the i-th row of all_theta corresponds 
 6 | %   to the classifier for label i
 7 | 
 8 | % Some useful variables
 9 | m = size(X, 1);
10 | n = size(X, 2);
11 | 
12 | % You need to return the following variables correctly 
13 | all_theta = zeros(num_labels, n + 1);
14 | 
15 | % Add ones to the X data matrix
16 | X = [ones(m, 1) X];
17 | 
18 | % ====================== YOUR CODE HERE ======================
19 | % Example Code for fmincg:
20 | %
21 | %     % Set Initial theta
22 | %     initial_theta = zeros(n + 1, 1);
23 | %     
24 | %     % Set options for fminunc
25 | %     options = optimset('GradObj', 'on', 'MaxIter', 50);
26 | % 
27 | %     % Run fmincg to obtain the optimal theta
28 | %     % This function will return theta and the cost 
29 | %     [theta] = ...
30 | %         fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ...
31 | %                 initial_theta, options);
32 | %
33 | 
34 | for i = 1 : num_labels
35 |     y_tmp = (y == i);
36 |     initial_theta = zeros(n + 1, 1);
37 |     
38 |     options = optimset('GradObj', 'on', 'MaxIter', 50);
39 |     %This function will return theta and the cost 
40 |     [all_theta(i,:)] = ...
41 |         fmincg (@(t)(lrCostFunction(t, X, y_tmp, lambda)), ...
42 |                 initial_theta, options);
43 | end
44 | 
45 | 
46 | 
47 | 
48 | 
49 | % =========================================================================
50 | 
51 | 
52 | end
53 | 


--------------------------------------------------------------------------------
/ex3/predict.m:
--------------------------------------------------------------------------------
 1 | function p = predict(Theta1, Theta2, X)
 2 | %PREDICT Predict the label of an input given a trained neural network
 3 | %   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
 4 | %   trained weights of a neural network (Theta1, Theta2)
 5 | 
 6 | % Useful values
 7 | m = size(X, 1);
 8 | num_labels = size(Theta2, 1);
 9 | 
10 | X = [ones(m,1) X];
11 | % You need to return the following variables correctly 
12 | p = zeros(size(X, 1), 1);
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | %hidden_act = zeros(size(X, 1), size(Theta1, 2));
16 | hidden_layer = [ones(size(X, 1),1) sigmoid(X * Theta1')];
17 | 
18 | %hidden_layer = zeros(size(X, 1), size(hidden_act, 2));
19 | %hidden_layer = sigmoid(hidden_act);
20 | 
21 | output_act = hidden_layer * Theta2';
22 | output_layer = sigmoid(output_act);
23 | 
24 | 
25 | p = max(output_layer, [], 2);
26 | 
27 | for i = 1 : m
28 |     for j = 1 : num_labels
29 |         if( output_layer(i, j) == p(i, :))
30 |             if( j ~= 10)
31 |                 p(i,:) = j;
32 |             else
33 |                 p(i,:) = 0;
34 |             end
35 |         end
36 |     end
37 | end  
38 | 
39 | 
40 | % =========================================================================
41 | 
42 | 
43 | end
44 | 


--------------------------------------------------------------------------------
/ex3/predictOneVsAll.m:
--------------------------------------------------------------------------------
 1 | function p = predictOneVsAll(all_theta, X)
 2 | %PREDICT Predict the label for a trained one-vs-all classifier. The labels 
 3 | %are in the range 1..K, where K = size(all_theta, 1). 
 4 | %  p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions
 5 | %  for each example in the matrix X. Note that X contains the examples in
 6 | %  rows. all_theta is a matrix where the i-th row is a trained logistic
 7 | %  regression theta vector for the i-th class. You should set p to a vector
 8 | %  of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2
 9 | %  for 4 examples) 
10 | 
11 | m = size(X, 1);
12 | num_labels = size(all_theta, 1);
13 | 
14 | % You need to return the following variables correctly 
15 | p = zeros(size(X, 1), 1);
16 | 
17 | % Add ones to the X data matrix
18 | X = [ones(m, 1) X];
19 | 
20 | % ====================== YOUR CODE HERE ======================
21 | 
22 | % Hint: This code can be done all vectorized using the max function.
23 | %       In particular, the max function can also return the index of the 
24 | %       max element, for more information see 'help max'. If your examples 
25 | %       are in rows, then, you can use max(A, [], 2) to obtain the max 
26 | %       for each row.
27 | %       
28 | 
29 | pred = sigmoid( X * all_theta');
30 | 
31 | p = max(pred, [], 2);
32 | 
33 | for i = 1 : m
34 |     for j = 1 : num_labels
35 |         if( pred(i, j) == p(i, :))
36 |             if( j ~= 10)
37 |                 p(i,:) = j;
38 |             else
39 |                 p(i,:) = 0;
40 |             end
41 |         end
42 |     end
43 | end  
44 | 
45 | % =========================================================================
46 | 
47 | 
48 | end
49 | 


--------------------------------------------------------------------------------
/ex3/sigmoid.m:
--------------------------------------------------------------------------------
1 | function g = sigmoid(z)
2 | %SIGMOID Compute sigmoid functoon
3 | %   J = SIGMOID(z) computes the sigmoid of z.
4 | 
5 | g = 1.0 ./ (1.0 + exp(-z));
6 | end
7 | 


--------------------------------------------------------------------------------
/ex4/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
 3 | %and gives us a numerical estimate of the gradient.
 4 | %   numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
 5 | %   gradient of the function J around theta. Calling y = J(theta) should
 6 | %   return the function value at theta.
 7 | 
 8 | % Notes: The following code implements numerical gradient checking, and 
 9 | %        returns the numerical gradient.It sets numgrad(i) to (a numerical 
10 | %        approximation of) the partial derivative of J with respect to the 
11 | %        i-th input argument, evaluated at theta. (i.e., numgrad(i) should 
12 | %        be the (approximately) the partial derivative of J with respect 
13 | %        to theta(i).)
14 | %                
15 | 
16 | numgrad = zeros(size(theta));
17 | perturb = zeros(size(theta));
18 | e = 1e-4;
19 | for p = 1:numel(theta)
20 |     % Set perturbation vector
21 |     perturb(p) = e;
22 |     loss1 = J(theta - perturb);
23 |     loss2 = J(theta + perturb);
24 |     % Compute Numerical Gradient
25 |     numgrad(p) = (loss2 - loss1) / (2*e);
26 |     perturb(p) = 0;
27 | end
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/ex4/ex4.m:
--------------------------------------------------------------------------------
  1 | %% Machine Learning Online Class - Exercise 4 Neural Network Learning
  2 | 
  3 | %  Instructions
  4 | %  ------------
  5 | % 
  6 | %  This file contains code that helps you get started on the
  7 | %  linear exercise. You will need to complete the following functions 
  8 | %  in this exericse:
  9 | %
 10 | %     sigmoidGradient.m
 11 | %     randInitializeWeights.m
 12 | %     nnCostFunction.m
 13 | %
 14 | %  For this exercise, you will not need to change any code in this file,
 15 | %  or any other files other than those mentioned above.
 16 | %
 17 | 
 18 | %% Initialization
 19 | clear ; close all; clc
 20 | 
 21 | %% Setup the parameters you will use for this exercise
 22 | input_layer_size  = 400;  % 20x20 Input Images of Digits
 23 | hidden_layer_size = 25;   % 25 hidden units
 24 | num_labels = 10;          % 10 labels, from 1 to 10   
 25 |                           % (note that we have mapped "0" to label 10)
 26 | 
 27 | %% =========== Part 1: Loading and Visualizing Data =============
 28 | %  We start the exercise by first loading and visualizing the dataset. 
 29 | %  You will be working with a dataset that contains handwritten digits.
 30 | %
 31 | 
 32 | % Load Training Data
 33 | fprintf('Loading and Visualizing Data ...\n')
 34 | 
 35 | load('ex4data1.mat');
 36 | m = size(X, 1);
 37 | 
 38 | % Randomly select 100 data points to display
 39 | sel = randperm(size(X, 1));
 40 | sel = sel(1:100);
 41 | 
 42 | displayData(X(sel, :));
 43 | 
 44 | fprintf('Program paused. Press enter to continue.\n');
 45 | pause;
 46 | 
 47 | 
 48 | %% ================ Part 2: Loading Parameters ================
 49 | % In this part of the exercise, we load some pre-initialized 
 50 | % neural network parameters.
 51 | 
 52 | fprintf('\nLoading Saved Neural Network Parameters ...\n')
 53 | 
 54 | % Load the weights into variables Theta1 and Theta2
 55 | load('ex4weights.mat');
 56 | 
 57 | % Unroll parameters 
 58 | nn_params = [Theta1(:) ; Theta2(:)];
 59 | 
 60 | %% ================ Part 3: Compute Cost (Feedforward) ================
 61 | %  To the neural network, you should first start by implementing the
 62 | %  feedforward part of the neural network that returns the cost only. You
 63 | %  should complete the code in nnCostFunction.m to return cost. After
 64 | %  implementing the feedforward to compute the cost, you can verify that
 65 | %  your implementation is correct by verifying that you get the same cost
 66 | %  as us for the fixed debugging parameters.
 67 | %
 68 | %  We suggest implementing the feedforward cost *without* regularization
 69 | %  first so that it will be easier for you to debug. Later, in part 4, you
 70 | %  will get to implement the regularized cost.
 71 | %
 72 | fprintf('\nFeedforward Using Neural Network ...\n')
 73 | 
 74 | % Weight regularization parameter (we set this to 0 here).
 75 | lambda = 0;
 76 | 
 77 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ...
 78 |                    num_labels, X, y, lambda);
 79 | 
 80 | fprintf(['Cost at parameters (loaded from ex4weights): %f '...
 81 |          '\n(this value should be about 0.287629)\n'], J);
 82 | 
 83 | fprintf('\nProgram paused. Press enter to continue.\n');
 84 | pause;
 85 | 
 86 | %% =============== Part 4: Implement Regularization ===============
 87 | %  Once your cost function implementation is correct, you should now
 88 | %  continue to implement the regularization with the cost.
 89 | %
 90 | 
 91 | fprintf('\nChecking Cost Function (w/ Regularization) ... \n')
 92 | 
 93 | % Weight regularization parameter (we set this to 1 here).
 94 | lambda = 1;
 95 | 
 96 | J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ...
 97 |                    num_labels, X, y, lambda);
 98 | 
 99 | fprintf(['Cost at parameters (loaded from ex4weights): %f '...
100 |          '\n(this value should be about 0.383770)\n'], J);
101 | 
102 | fprintf('Program paused. Press enter to continue.\n');
103 | pause;
104 | 
105 | 
106 | %% ================ Part 5: Sigmoid Gradient  ================
107 | %  Before you start implementing the neural network, you will first
108 | %  implement the gradient for the sigmoid function. You should complete the
109 | %  code in the sigmoidGradient.m file.
110 | %
111 | 
112 | fprintf('\nEvaluating sigmoid gradient...\n')
113 | 
114 | g = sigmoidGradient([-1 -0.5 0 0.5 1]);
115 | fprintf('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n  ');
116 | fprintf('%f ', g);
117 | fprintf('\n\n');
118 | 
119 | fprintf('Program paused. Press enter to continue.\n');
120 | pause;
121 | 
122 | 
123 | %% ================ Part 6: Initializing Pameters ================
124 | %  In this part of the exercise, you will be starting to implment a two
125 | %  layer neural network that classifies digits. You will start by
126 | %  implementing a function to initialize the weights of the neural network
127 | %  (randInitializeWeights.m)
128 | 
129 | fprintf('\nInitializing Neural Network Parameters ...\n')
130 | 
131 | initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size);
132 | initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels);
133 | 
134 | % Unroll parameters
135 | initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)];
136 | 
137 | 
138 | %% =============== Part 7: Implement Backpropagation ===============
139 | %  Once your cost matches up with ours, you should proceed to implement the
140 | %  backpropagation algorithm for the neural network. You should add to the
141 | %  code you've written in nnCostFunction.m to return the partial
142 | %  derivatives of the parameters.
143 | %
144 | fprintf('\nChecking Backpropagation... \n');
145 | 
146 | %  Check gradients by running checkNNGradients
147 | checkNNGradients;
148 | 
149 | fprintf('\nProgram paused. Press enter to continue.\n');
150 | pause;
151 | 
152 | 
153 | %% =============== Part 8: Implement Regularization ===============
154 | %  Once your backpropagation implementation is correct, you should now
155 | %  continue to implement the regularization with the cost and gradient.
156 | %
157 | 
158 | fprintf('\nChecking Backpropagation (w/ Regularization) ... \n')
159 | 
160 | %  Check gradients by running checkNNGradients
161 | lambda = 3;
162 | checkNNGradients(lambda);
163 | 
164 | % Also output the costFunction debugging values
165 | debug_J  = nnCostFunction(nn_params, input_layer_size, ...
166 |                           hidden_layer_size, num_labels, X, y, lambda);
167 | 
168 | fprintf(['\n\nCost at (fixed) debugging parameters (w/ lambda = %f): %f ' ...
169 |          '\n(for lambda = 3, this value should be about 0.576051)\n\n'], lambda, debug_J);
170 | 
171 | fprintf('Program paused. Press enter to continue.\n');
172 | pause;
173 | 
174 | 
175 | %% =================== Part 8: Training NN ===================
176 | %  You have now implemented all the code necessary to train a neural 
177 | %  network. To train your neural network, we will now use "fmincg", which
178 | %  is a function which works similarly to "fminunc". Recall that these
179 | %  advanced optimizers are able to train our cost functions efficiently as
180 | %  long as we provide them with the gradient computations.
181 | %
182 | fprintf('\nTraining Neural Network... \n')
183 | 
184 | %  After you have completed the assignment, change the MaxIter to a larger
185 | %  value to see how more training helps.
186 | options = optimset('MaxIter', 50);
187 | 
188 | %  You should also try different values of lambda
189 | lambda = 1;
190 | 
191 | % Create "short hand" for the cost function to be minimized
192 | costFunction = @(p) nnCostFunction(p, ...
193 |                                    input_layer_size, ...
194 |                                    hidden_layer_size, ...
195 |                                    num_labels, X, y, lambda);
196 | 
197 | % Now, costFunction is a function that takes in only one argument (the
198 | % neural network parameters)
199 | [nn_params, cost] = fmincg(costFunction, initial_nn_params, options);
200 | 
201 | % Obtain Theta1 and Theta2 back from nn_params
202 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
203 |                  hidden_layer_size, (input_layer_size + 1));
204 | 
205 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
206 |                  num_labels, (hidden_layer_size + 1));
207 | 
208 | fprintf('Program paused. Press enter to continue.\n');
209 | pause;
210 | 
211 | 
212 | %% ================= Part 9: Visualize Weights =================
213 | %  You can now "visualize" what the neural network is learning by 
214 | %  displaying the hidden units to see what features they are capturing in 
215 | %  the data.
216 | 
217 | fprintf('\nVisualizing Neural Network... \n')
218 | 
219 | displayData(Theta1(:, 2:end));
220 | 
221 | fprintf('\nProgram paused. Press enter to continue.\n');
222 | pause;
223 | 
224 | %% ================= Part 10: Implement Predict =================
225 | %  After training the neural network, we would like to use it to predict
226 | %  the labels. You will now implement the "predict" function to use the
227 | %  neural network to predict the labels of the training set. This lets
228 | %  you compute the training set accuracy.
229 | 
230 | pred = predict(Theta1, Theta2, X);
231 | 
232 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100);
233 | 
234 | 
235 | 


--------------------------------------------------------------------------------
/ex4/nnCostFunction -V2.m:
--------------------------------------------------------------------------------
  1 | function [J grad] = nnCostFunction(nn_params, ...
  2 |                                    input_layer_size, ...
  3 |                                    hidden_layer_size, ...
  4 |                                    num_labels, ...
  5 |                                    X, y, lambda)
  6 | %NNCOSTFUNCTION Implements the neural network cost function for a two layer
  7 | %neural network which performs classification
  8 | %   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
  9 | %   X, y, lambda) computes the cost and gradient of the neural network. The
 10 | %   parameters for the neural network are "unrolled" into the vector
 11 | %   nn_params and need to be converted back into the weight matrices. 
 12 | % 
 13 | %   The returned parameter grad should be a "unrolled" vector of the
 14 | %   partial derivatives of the neural network.
 15 | %
 16 | 
 17 | % Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
 18 | % for our 2 layer neural network
 19 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
 20 |                  hidden_layer_size, (input_layer_size + 1));
 21 | 
 22 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
 23 |                  num_labels, (hidden_layer_size + 1));
 24 | 
 25 | % Setup some useful variables
 26 | m = size(X, 1);
 27 | X = [ones(m, 1) X];
 28 | 
 29 | % You need to return the following variables correctly 
 30 | J = 0;
 31 | 
 32 | y_tmp = zeros(m, num_labels);
 33 | total = 0;
 34 | 
 35 | for i = 1: m
 36 |     row_label = y(i);
 37 |     y_tmp(i, row_label) = 1;
 38 |     
 39 |     output = sigmoid([ones(1,1) sigmoid(X(i,:) * Theta1')]...
 40 |         * Theta2');
 41 |     cost = -y_tmp(i, :) * log(output') - (ones(1, num_labels)...
 42 |         - y_tmp(i, :)) * log(ones(num_labels, 1) - output');
 43 |     
 44 |     total = total + cost;
 45 | end
 46 | 
 47 | sums_1 = 0;
 48 | sums_2 = 0;
 49 | 
 50 | 
 51 | for i = 1 : hidden_layer_size
 52 |     for j = 2: (input_layer_size + 1)
 53 |         tmp_theta1 = (Theta1(i,j)) ^ 2;
 54 |         sums_1 = sums_1 + tmp_theta1;
 55 |     end
 56 | end
 57 | 
 58 | for i = 1 : num_labels
 59 |     for j = 2: (hidden_layer_size + 1)
 60 |         tmp_theta2 = (Theta2(i,j)) ^ 2;
 61 |         sums_2 = sums_2 + tmp_theta2;
 62 |     end
 63 | end
 64 | 
 65 | penal_sum = lambda / 2 * (sums_1 + sums_2);
 66 |             
 67 | J = 1 / m * (total + penal_sum);
 68 | 
 69 |     
 70 | %compute the BP algrithm
 71 | 
 72 | delta_total1 = zeros(size(Theta1));
 73 | %delta_total2 = zeros(num_labels, hidden_layer_size);
 74 | delta_total2 = zeros(size(Theta2));
 75 | 
 76 | for i = 1:m     
 77 |     
 78 |     %compute the layer-wise units
 79 |     a1 = X(i,:);
 80 |     z2 = a1 * Theta1';
 81 |     a2 = sigmoid(z2);
 82 |     a2 = [1 a2];
 83 |     
 84 |     z3 = a2 * Theta2';
 85 |     a3 = sigmoid(z3);
 86 |     z2 = [1 z2];
 87 |     
 88 |     %compute the delta within layers
 89 |     delta3 = a3 - y_tmp(i);
 90 |     delta_tmp =  delta3 * Theta2; 
 91 |     delta2 =  delta_tmp .* sigmoidGradient(z2);
 92 |         
 93 |     delta2 = delta2(2 : end);
 94 |     %sum all the delta by formula
 95 |     delta_total1 = delta_total1 + delta2' * a1;
 96 |     delta_total2 = delta_total2 + delta3' * a2;
 97 |     
 98 | end
 99 | 
100 | 
101 | 
102 | Theta1_grad = (1 / m) .* delta_total1;
103 | Theta2_grad = (1 / m) .* delta_total2;
104 | 
105 | % ====================== YOUR CODE HERE ======================
106 | % Instructions: You should complete the code by working through the
107 | % -------------------------------------------------------------
108 | 
109 | % =========================================================================
110 | 
111 | % Unroll gradients
112 | grad = [Theta1_grad(:) ; Theta2_grad(:)];
113 | 
114 | 
115 | end
116 | 


--------------------------------------------------------------------------------
/ex4/nnCostFunction.m:
--------------------------------------------------------------------------------
  1 | function [J grad] = nnCostFunction(nn_params, ...
  2 |                                    input_layer_size, ...
  3 |                                    hidden_layer_size, ...
  4 |                                    num_labels, ...
  5 |                                    X, y, lambda)
  6 | %NNCOSTFUNCTION Implements the neural network cost function for a two layer
  7 | %neural network which performs classification
  8 | %   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
  9 | %   X, y, lambda) computes the cost and gradient of the neural network. The
 10 | %   parameters for the neural network are "unrolled" into the vector
 11 | %   nn_params and need to be converted back into the weight matrices. 
 12 | % 
 13 | %   The returned parameter grad should be a "unrolled" vector of the
 14 | %   partial derivatives of the neural network.
 15 | %
 16 | 
 17 | % Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
 18 | % for our 2 layer neural network
 19 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
 20 |                  hidden_layer_size, (input_layer_size + 1));
 21 | 
 22 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
 23 |                  num_labels, (hidden_layer_size + 1));
 24 | 
 25 | % Setup some useful variables
 26 | m = size(X, 1);
 27 |          
 28 | % You need to return the following variables correctly 
 29 | J = 0;
 30 | Theta1_grad = zeros(size(Theta1));
 31 | Theta2_grad = zeros(size(Theta2));
 32 | 
 33 | % ====================== YOUR CODE HERE ======================
 34 | % Instructions: You should complete the code by working through the
 35 | %               following parts.
 36 | %
 37 | % Part 1: Feedforward the neural network and return the cost in the
 38 | %         variable J. After implementing Part 1, you can verify that your
 39 | %         cost function computation is correct by verifying the cost
 40 | %         computed in ex4.m
 41 | %
 42 | % Part 2: Implement the backpropagation algorithm to compute the gradients
 43 | %         Theta1_grad and Theta2_grad. You should return the partial derivatives of
 44 | %         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
 45 | %         Theta2_grad, respectively. After implementing Part 2, you can check
 46 | %         that your implementation is correct by running checkNNGradients
 47 | %
 48 | %         Note: The vector y passed into the function is a vector of labels
 49 | %               containing values from 1..K. You need to map this vector into a 
 50 | %               binary vector of 1's and 0's to be used with the neural network
 51 | %               cost function.
 52 | %
 53 | %         Hint: We recommend implementing backpropagation using a for-loop
 54 | %               over the training examples if you are implementing it for the 
 55 | %               first time.
 56 | %
 57 | % Part 3: Implement regularization with the cost function and gradients.
 58 | %
 59 | %         Hint: You can implement this around the code for
 60 | %               backpropagation. That is, you can compute the gradients for
 61 | %               the regularization separately and then add them to Theta1_grad
 62 | %               and Theta2_grad from Part 2.
 63 | %
 64 | 
 65 | X = [ones(m,1) X];
 66 | 
 67 | 
 68 | % foward propagation
 69 | % a1 = X; 
 70 | a2 = sigmoid(Theta1 * X');
 71 | a2 = [ones(m,1) a2'];
 72 | 
 73 | h_theta = sigmoid(Theta2 * a2'); % h_theta equals z3
 74 | 
 75 | % y(k) - the great trick - we need to recode the labels as vectors containing only values 0 or 1 (page 5 of ex4.pdf)
 76 | yk = zeros(num_labels, m); 
 77 | for i=1:m,
 78 |   yk(y(i),i)=1;
 79 | end
 80 | 
 81 | % follow the form
 82 | J = (1/m) * sum ( sum (  (-yk) .* log(h_theta)  -  (1-yk) .* log(1-h_theta) ));
 83 | 
 84 | 
 85 | 
 86 | % Note that you should not be regularizing the terms that correspond to the bias. 
 87 | % For the matrices Theta1 and Theta2, this corresponds to the first column of each matrix.
 88 | t1 = Theta1(:,2:size(Theta1,2));
 89 | t2 = Theta2(:,2:size(Theta2,2));
 90 | 
 91 | % regularization formula
 92 | Reg = lambda  * (sum( sum ( t1.^ 2 )) + sum( sum ( t2.^ 2 ))) / (2*m);
 93 | 
 94 | % cost function + reg
 95 | J = J + Reg;
 96 | 
 97 | 
 98 | % -------------------------------------------------------------
 99 | 
100 | % Backprop
101 | 
102 | for t=1:m,
103 | 
104 | 	% dummie pass-by-pass
105 | 	% forward propag
106 | 
107 | 	a1 = X(t,:); % X already have bias
108 | 	z2 = Theta1 * a1';
109 | 
110 | 	a2 = sigmoid(z2);
111 | 	a2 = [1 ; a2]; % add bias
112 | 
113 | 	z3 = Theta2 * a2;
114 | 
115 | 	a3 = sigmoid(z3); % final activation layer a3 == h(theta)
116 | 
117 | 
118 | 	% back propag (god bless me)	
119 | 
120 | 	z2=[1; z2]; % bias
121 | 
122 | 	delta_3 = a3 - yk(:,t); % y(k) trick - getting columns of t element
123 | 	delta_2 = (Theta2' * delta_3) .* sigmoidGradient(z2);
124 | 
125 | 	% skipping sigma2(0) 
126 | 	delta_2 = delta_2(2:end); 
127 | 
128 | 	Theta2_grad = Theta2_grad + delta_3 * a2';
129 | 	Theta1_grad = Theta1_grad + delta_2 * a1; % I don't know why a1 doesn't need to be transpost (brute force try)
130 | 
131 | end;
132 | 
133 | % Theta1_grad = Theta1_grad ./ m;
134 | % Theta2_grad = Theta2_grad ./ m;
135 | 
136 | 
137 | % Regularization (here you go)
138 | 
139 | 
140 | 	Theta1_grad(:, 1) = Theta1_grad(:, 1) ./ m;
141 | 
142 | 	Theta1_grad(:, 2:end) = Theta1_grad(:, 2:end) ./ m + ((lambda/m) * Theta1(:, 2:end));
143 | 
144 | 
145 | 	Theta2_grad(:, 1) = Theta2_grad(:, 1) ./ m;
146 | 
147 | 	Theta2_grad(:, 2:end) = Theta2_grad(:, 2:end) ./ m + ((lambda/m) * Theta2(:, 2:end));
148 | 
149 | 
150 | 
151 | 
152 | % =========================================================================
153 | 
154 | % Unroll gradients
155 | grad = [Theta1_grad(:) ; Theta2_grad(:)];
156 | 
157 | 
158 | 
159 | end
160 | 


--------------------------------------------------------------------------------
/ex4/randInitializeWeights.m:
--------------------------------------------------------------------------------
 1 | function W = randInitializeWeights(L_in, L_out)
 2 | %RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
 3 | %incoming connections and L_out outgoing connections
 4 | %   W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 
 5 | %   of a layer with L_in incoming connections and L_out outgoing 
 6 | %   connections. 
 7 | %
 8 | %   Note that W should be set to a matrix of size(L_out, 1 + L_in) as
 9 | %   the first column of W handles the "bias" terms
10 | %
11 | 
12 | % You need to return the following variables correctly 
13 | W = zeros(L_out, 1 + L_in);
14 | 
15 | % ====================== YOUR CODE HERE ======================
16 | % Instructions: Initialize W randomly so that we break the symmetry while
17 | %               training the neural network.
18 | %
19 | % Note: The first column of W corresponds to the parameters for the bias unit
20 | %
21 | epsilon_init = 0.12;
22 | W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init;
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | % =========================================================================
32 | 
33 | end
34 | 


--------------------------------------------------------------------------------
/ex4/sigmoidGradient.m:
--------------------------------------------------------------------------------
 1 | function g = sigmoidGradient(z)
 2 | %SIGMOIDGRADIENT returns the gradient of the sigmoid function
 3 | %evaluated at z
 4 | %   g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function
 5 | %   evaluated at z. This should work regardless if z is a matrix or a
 6 | %   vector. In particular, if z is a vector or matrix, you should return
 7 | %   the gradient for each element.
 8 | 
 9 | g = zeros(size(z));
10 | 
11 | [rows cols] = size(g);
12 | 
13 | if cols == 1 && rows == 1
14 |     g = sigmoid(z) * (ones(size(g)) - sigmoid(z));
15 | else
16 |     g = sigmoid(z) .* (ones(size(g)) - sigmoid(z));
17 | 
18 | 
19 | 
20 | % =============================================================
21 | 
22 | 
23 | 
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/ex5/ex5.m:
--------------------------------------------------------------------------------
  1 | %% Machine Learning Online Class
  2 | %  Exercise 5 | Regularized Linear Regression and Bias-Variance
  3 | %
  4 | %  Instructions
  5 | %  ------------
  6 | % 
  7 | %  This file contains code that helps you get started on the
  8 | %  exercise. You will need to complete the following functions:
  9 | %
 10 | %     linearRegCostFunction.m
 11 | %     learningCurve.m
 12 | %     validationCurve.m
 13 | %
 14 | %  For this exercise, you will not need to change any code in this file,
 15 | %  or any other files other than those mentioned above.
 16 | %
 17 | 
 18 | %% Initialization
 19 | clear ; close all; clc
 20 | 
 21 | %% =========== Part 1: Loading and Visualizing Data =============
 22 | %  We start the exercise by first loading and visualizing the dataset. 
 23 | %  The following code will load the dataset into your environment and plot
 24 | %  the data.
 25 | %
 26 | 
 27 | % Load Training Data
 28 | fprintf('Loading and Visualizing Data ...\n')
 29 | 
 30 | % Load from ex5data1: 
 31 | % You will have X, y, Xval, yval, Xtest, ytest in your environment
 32 | load ('ex5data1.mat');
 33 | 
 34 | % m = Number of examples
 35 | m = size(X, 1);
 36 | 
 37 | % Plot training data
 38 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5);
 39 | xlabel('Change in water level (x)');
 40 | ylabel('Water flowing out of the dam (y)');
 41 | 
 42 | fprintf('Program paused. Press enter to continue.\n');
 43 | pause;
 44 | 
 45 | %% =========== Part 2: Regularized Linear Regression Cost =============
 46 | %  You should now implement the cost function for regularized linear 
 47 | %  regression. 
 48 | %
 49 | 
 50 | theta = [1 ; 1];
 51 | J = linearRegCostFunction([ones(m, 1) X], y, theta, 1);
 52 | 
 53 | fprintf(['Cost at theta = [1 ; 1]: %f '...
 54 |          '\n(this value should be about 303.993192)\n'], J);
 55 | 
 56 | fprintf('Program paused. Press enter to continue.\n');
 57 | pause;
 58 | 
 59 | %% =========== Part 3: Regularized Linear Regression Gradient =============
 60 | %  You should now implement the gradient for regularized linear 
 61 | %  regression.
 62 | %
 63 | 
 64 | theta = [1 ; 1];
 65 | [J, grad] = linearRegCostFunction([ones(m, 1) X], y, theta, 1);
 66 | 
 67 | fprintf(['Gradient at theta = [1 ; 1]:  [%f; %f] '...
 68 |          '\n(this value should be about [-15.303016; 598.250744])\n'], ...
 69 |          grad(1), grad(2));
 70 | 
 71 | fprintf('Program paused. Press enter to continue.\n');
 72 | pause;
 73 | 
 74 | 
 75 | %% =========== Part 4: Train Linear Regression =============
 76 | %  Once you have implemented the cost and gradient correctly, the
 77 | %  trainLinearReg function will use your cost function to train 
 78 | %  regularized linear regression.
 79 | % 
 80 | %  Write Up Note: The data is non-linear, so this will not give a great 
 81 | %                 fit.
 82 | %
 83 | 
 84 | %  Train linear regression with lambda = 0
 85 | lambda = 0;
 86 | [theta] = trainLinearReg([ones(m, 1) X], y, lambda);
 87 | 
 88 | %  Plot fit over the data
 89 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5);
 90 | xlabel('Change in water level (x)');
 91 | ylabel('Water flowing out of the dam (y)');
 92 | hold on;
 93 | plot(X, [ones(m, 1) X]*theta, '--', 'LineWidth', 2)
 94 | hold off;
 95 | 
 96 | fprintf('Program paused. Press enter to continue.\n');
 97 | pause;
 98 | 
 99 | 
100 | %% =========== Part 5: Learning Curve for Linear Regression =============
101 | %  Next, you should implement the learningCurve function. 
102 | %
103 | %  Write Up Note: Since the model is underfitting the data, we expect to
104 | %                 see a graph with "high bias" -- Figure 3 in ex5.pdf 
105 | %
106 | 
107 | lambda = 0;
108 | [error_train, error_val] = ...
109 |     learningCurve([ones(m, 1) X], y, ...
110 |                   [ones(size(Xval, 1), 1) Xval], yval, ...
111 |                   lambda);
112 | 
113 | plot(1:m, error_train, 1:m, error_val);
114 | title('Learning curve for linear regression')
115 | legend('Train', 'Cross Validation')
116 | xlabel('Number of training examples')
117 | ylabel('Error')
118 | axis([0 13 0 150])
119 | 
120 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n');
121 | for i = 1:m
122 |     fprintf('  \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i));
123 | end
124 | 
125 | fprintf('Program paused. Press enter to continue.\n');
126 | pause;
127 | 
128 | %% =========== Part 6: Feature Mapping for Polynomial Regression =============
129 | %  One solution to this is to use polynomial regression. You should now
130 | %  complete polyFeatures to map each example into its powers
131 | %
132 | 
133 | p = 8;
134 | 
135 | % Map X onto Polynomial Features and Normalize
136 | X_poly = polyFeatures(X, p);
137 | [X_poly, mu, sigma] = featureNormalize(X_poly);  % Normalize
138 | X_poly = [ones(m, 1), X_poly];                   % Add Ones
139 | 
140 | % Map X_poly_test and normalize (using mu and sigma)
141 | X_poly_test = polyFeatures(Xtest, p);
142 | X_poly_test = bsxfun(@minus, X_poly_test, mu);
143 | X_poly_test = bsxfun(@rdivide, X_poly_test, sigma);
144 | X_poly_test = [ones(size(X_poly_test, 1), 1), X_poly_test];         % Add Ones
145 | 
146 | % Map X_poly_val and normalize (using mu and sigma)
147 | X_poly_val = polyFeatures(Xval, p);
148 | X_poly_val = bsxfun(@minus, X_poly_val, mu);
149 | X_poly_val = bsxfun(@rdivide, X_poly_val, sigma);
150 | X_poly_val = [ones(size(X_poly_val, 1), 1), X_poly_val];           % Add Ones
151 | 
152 | fprintf('Normalized Training Example 1:\n');
153 | fprintf('  %f  \n', X_poly(1, :));
154 | 
155 | fprintf('\nProgram paused. Press enter to continue.\n');
156 | pause;
157 | 
158 | 
159 | 
160 | %% =========== Part 7: Learning Curve for Polynomial Regression =============
161 | %  Now, you will get to experiment with polynomial regression with multiple
162 | %  values of lambda. The code below runs polynomial regression with 
163 | %  lambda = 0. You should try running the code with different values of
164 | %  lambda to see how the fit and learning curve change.
165 | %
166 | 
167 | lambda = 0.3;
168 | [theta] = trainLinearReg(X_poly, y, lambda);
169 | 
170 | % Plot training data and fit
171 | figure(1);
172 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5);
173 | plotFit(min(X), max(X), mu, sigma, theta, p);
174 | xlabel('Change in water level (x)');
175 | ylabel('Water flowing out of the dam (y)');
176 | title (sprintf('Polynomial Regression Fit (lambda = %f)', lambda));
177 | 
178 | figure(2);
179 | [error_train, error_val] = ...
180 |     learningCurve(X_poly, y, X_poly_val, yval, lambda);
181 | plot(1:m, error_train, 1:m, error_val);
182 | 
183 | title(sprintf('Polynomial Regression Learning Curve (lambda = %f)', lambda));
184 | xlabel('Number of training examples')
185 | ylabel('Error')
186 | axis([0 13 0 100])
187 | legend('Train', 'Cross Validation')
188 | 
189 | fprintf('Polynomial Regression (lambda = %f)\n\n', lambda);
190 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n');
191 | for i = 1:m
192 |     fprintf('  \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i));
193 | end
194 | 
195 | fprintf('Program paused. Press enter to continue.\n');
196 | pause;
197 | 
198 | %% =========== Part 8: Validation for Selecting Lambda =============
199 | %  You will now implement validationCurve to test various values of 
200 | %  lambda on a validation set. You will then use this to select the
201 | %  "best" lambda value.
202 | %
203 | 
204 | [lambda_vec, error_train, error_val] = ...
205 |     validationCurve(X_poly, y, X_poly_val, yval);
206 | 
207 | close all;
208 | plot(lambda_vec, error_train, lambda_vec, error_val);
209 | legend('Train', 'Cross Validation');
210 | xlabel('lambda');
211 | ylabel('Error');
212 | 
213 | fprintf('lambda\t\tTrain Error\tValidation Error\n');
214 | for i = 1:length(lambda_vec)
215 | 	fprintf(' %f\t%f\t%f\n', ...
216 |             lambda_vec(i), error_train(i), error_val(i));
217 | end
218 | 
219 | fprintf('Program paused. Press enter to continue.\n');
220 | pause;
221 | 


--------------------------------------------------------------------------------
/ex5/learningCurve.m:
--------------------------------------------------------------------------------
 1 | function [error_train, error_val] = ...
 2 |     learningCurve(X, y, Xval, yval, lambda)
 3 | %LEARNINGCURVE Generates the train and cross validation set errors needed 
 4 | 
 5 | % Number of training examples
 6 | m = size(X, 1);
 7 | %X = [ones(m, 1) X];
 8 | % You need to return these values correctly
 9 | error_train = zeros(m, 1);
10 | error_val   = zeros(m, 1);
11 | 
12 | 
13 | %[theta] = trainLinearReg(X, y, lambda);
14 | 
15 | % Compute train/cross validation errors 
16 | % for i = 1 : m
17 | %     [theta] = trainLinearReg(X(1:i,:), y(1:i), lambda);
18 | %     error_train(i) = linearRegCostFunction(X...
19 | %         (1:i,:), y(1:i), theta, lambda);
20 | %     error_val(i) = linearRegCostFunction(Xval, yval, theta, lambda);
21 | % end
22 | for i = 1:m
23 |     X_sub = X(1:i, :);
24 |     y_sub = y(1:i); 
25 | 
26 |     theta = trainLinearReg(X_sub, y_sub, lambda);
27 | 
28 |     error_train(i) = linearRegCostFunction(X_sub, y_sub, theta, 0);
29 |     error_val(i) = linearRegCostFunction(Xval, yval, theta, 0);
30 | end
31 | % ====================== YOUR CODE HERE ======================
32 | 
33 | % -------------------------------------------------------------
34 | 
35 | % =========================================================================
36 | 
37 | end
38 | 


--------------------------------------------------------------------------------
/ex5/linearRegCostFunction.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = linearRegCostFunction(X, y, theta, lambda)
 2 | %LINEARREGCOSTFUNCTION Compute cost and gradient for regularized linear 
 3 | %regression with multiple variables
 4 | %   [J, grad] = LINEARREGCOSTFUNCTION(X, y, theta, lambda) computes the 
 5 | %   cost of using theta as the parameter for linear regression to fit the 
 6 | %   data points in X and y. Returns the cost in J and the gradient in grad
 7 | 
 8 | % Initialize some useful values
 9 | m = length(y); % number of training examples
10 | 
11 | % You need to return the following variables correctly 
12 | J = 0;
13 | grad = zeros(size(theta));
14 | 
15 | %X = [ones(m,1) X];
16 | % ====================== YOUR CODE HERE ======================
17 | % cost = 0;
18 | % total = 0;
19 | % 
20 | % for i =1:m
21 | %     cost = (theta' * X(i,:)' - y(i,:))^2;
22 | %     total = total + cost;
23 |     
24 | 
25 | % cost = (norm((X * theta - y))) .^2;
26 | % penal = lambda ./ (norm(theta(2:end,1))) .^ 2;
27 | % 
28 | 
29 | J = (1/(2*m))*sum(power((X*theta - y),2))+ (lambda/(2*m)) * sum(power(theta(2:end),2));
30 | 
31 | G = (lambda/m) .* theta;
32 | G(1) = 0; % this is always 0
33 | 
34 | grad = ((1/m) .* X' * (X*theta - y)) + G;
35 | %J = 0.5 / m * (cost + penal);
36 | % J = (1/(2*m))*sum(power((X*theta - y),2))+ (lambda/(2*m)) * sum(power(theta(2:end),2));
37 | % % h=X*theta;
38 | % % thetas=theta(2:end,1);
39 | % % J=1/(2*m).*sum((h-y).^2)+(lambda/(2.*m)).*sum(thetas.^2);
40 | % 
41 | % grad = 1 / m .* ((theta' * X' - y') * X + lambda .* theta')';
42 | % grad(1,:) = 1 / m * (theta' * X' - y') * X(:,1);
43 | 
44 | 
45 | % =========================================================================
46 | 
47 | grad = grad(:);
48 | 
49 | end
50 | 


--------------------------------------------------------------------------------
/ex5/polyFeatures.m:
--------------------------------------------------------------------------------
 1 | function [X_poly] = polyFeatures(X, p)
 2 | %POLYFEATURES Maps X (1D vector) into the p-th power
 3 | %   [X_poly] = POLYFEATURES(X, p) takes a data matrix X (size m x 1) and
 4 | %   maps each example into its polynomial features where
 5 | %   X_poly(i, :) = [X(i) X(i).^2 X(i).^3 ...  X(i).^p];
 6 | %
 7 | 
 8 | 
 9 | % You need to return the following variables correctly.
10 | X_poly = zeros(numel(X), p);
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | % Instructions: Given a vector X, return a matrix X_poly where the p-th 
14 | %               column of X contains the values of X to the p-th power.
15 | %
16 | % 
17 | 
18 | for j = 1:p
19 |     
20 |     X_poly(:,j) = X .^ j;
21 |     
22 | end
23 | 
24 | 
25 | 
26 | 
27 | % =========================================================================
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/ex5/trainLinearReg.m:
--------------------------------------------------------------------------------
 1 | function [theta] = trainLinearReg(X, y, lambda)
 2 | %TRAINLINEARREG Trains linear regression given a dataset (X, y) and a
 3 | %regularization parameter lambda
 4 | %   [theta] = TRAINLINEARREG (X, y, lambda) trains linear regression using
 5 | %   the dataset (X, y) and regularization parameter lambda. Returns the
 6 | %   trained parameters theta.
 7 | %
 8 | 
 9 | % Initialize Theta
10 | initial_theta = zeros(size(X, 2), 1); 
11 | 
12 | % Create "short hand" for the cost function to be minimized
13 | costFunction = @(t) linearRegCostFunction(X, y, t, lambda);
14 | 
15 | % Now, costFunction is a function that takes in only one argument
16 | options = optimset('MaxIter', 200, 'GradObj', 'on');
17 | 
18 | % Minimize using fmincg
19 | theta = fmincg(costFunction, initial_theta, options);
20 | 
21 | end
22 | 


--------------------------------------------------------------------------------
/ex5/validationCurve.m:
--------------------------------------------------------------------------------
 1 | function [lambda_vec, error_train, error_val] = ...
 2 |     validationCurve(X, y, Xval, yval)
 3 | %VALIDATIONCURVE Generate the train and validation errors needed to
 4 | %plot a validation curve that we can use to select lambda
 5 | %   [lambda_vec, error_train, error_val] = ...
 6 | %       VALIDATIONCURVE(X, y, Xval, yval) returns the train
 7 | %       and validation errors (in error_train, error_val)
 8 | %       for different values of lambda. You are given the training set (X,
 9 | %       y) and validation set (Xval, yval).
10 | %
11 | 
12 | % Selected values of lambda (you should not change this)
13 | lambda_vec = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3 10]';
14 | 
15 | % You need to return these variables correctly.
16 | error_train = zeros(length(lambda_vec), 1);
17 | error_val = zeros(length(lambda_vec), 1);
18 | 
19 | %X = [ones(length(X),1) X];
20 | %Xval = [ones(length(Xval),1) Xval];
21 | 
22 | %compute test/train error
23 | for i = 1:length(lambda_vec)
24 |     [theta] = trainLinearReg(X,y,lambda_vec(i));
25 |     error_train(i) = linearRegCostFunction(X, y, theta,lambda_vec(i));
26 |     error_val(i) = linearRegCostFunction(Xval, yval, theta,lambda_vec(i));
27 | end
28 | 
29 | 
30 | 
31 | 
32 | 
33 | % =========================================================================
34 | 
35 | end
36 | 


--------------------------------------------------------------------------------
/ex6/dataset3Params.m:
--------------------------------------------------------------------------------
 1 | function [C, sigma] = dataset3Params(X, y, Xval, yval)
 2 | %DATASET3PARAMS returns your choice of C and sigma for Part 3 of the exercise
 3 | %where you select the optimal (C, sigma) learning parameters to use for SVM
 4 | %with RBF kernel
 5 | %   [C, sigma] = DATASET3PARAMS(X, y, Xval, yval) returns your choice of C and 
 6 | %   sigma. You should complete this function to return the optimal C and 
 7 | %   sigma based on a cross-validation set.
 8 | %
 9 | 
10 | % You need to return the following variables correctly.
11 | % C = 1;
12 | % sigma = 0.3;
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | 
16 | %predictions = svmPredict(model, Xval);
17 | 
18 | %svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma))
19 | 
20 | Para = zeros(62,2);
21 | 
22 | sigma = zeros(8,1);
23 | C = zeros(8,1);
24 | 
25 | sigma = [0.01; 0.03; 0.1; 0.3; 1; 3;10 ;30];
26 | C = [0.01; 0.03; 0.1; 0.3; 1; 3;10 ;30];
27 | 
28 | 
29 | tmp = 0.01;
30 | 
31 | 
32 | for i = 1:8
33 |     for j = 1:8
34 |     Para(8 * (i -1) + j,1) = sigma(i,:);
35 |     end
36 | end
37 | 
38 | for i = 1:8
39 |     for j = 1:8
40 |     Para(8 * (i -1) + j,2) = C(j,:);
41 |     end
42 | end
43 |        
44 | error = zeros(64,1);
45 | for i = 1:64
46 |     
47 |     model = svmTrain(X, y, Para(i,1), @(x1, x2) gaussianKernel...
48 |         (x1, x2, Para(i,2)));
49 |     
50 |     %  Note: You can compute the prediction error using 
51 |     predictions = svmPredict(model, Xval);
52 |     err_tmp = mean(double(predictions ~= yval));
53 |     error(i,:) = err_tmp;
54 | end
55 | 
56 | pos = 0;
57 | for j = 1:64
58 |     mins = min(error);
59 |     if(error(j,:) == mins)
60 |         pos = j;
61 |         break;
62 |     end
63 | end
64 | 
65 | C = Para(pos,1);
66 | sigma = Para(pos, 2);
67 |     
68 |       
69 | 
70 | 
71 | % =========================================================================
72 | 
73 | end
74 | 


--------------------------------------------------------------------------------
/ex6/emailFeatures.m:
--------------------------------------------------------------------------------
 1 | function x = emailFeatures(word_indices)
 2 | %EMAILFEATURES takes in a word_indices vector and produces a feature vector
 3 | %from the word indices
 4 | %   x = EMAILFEATURES(word_indices) takes in a word_indices vector and 
 5 | %   produces a feature vector from the word indices. 
 6 | 
 7 | % Total number of words in the dictionary
 8 | n = 1899;
 9 | 
10 | % You need to return the following variables correctly.
11 | x = zeros(n, 1);
12 | 
13 | % ====================== YOUR CODE HERE ======================
14 | len = length(word_indices);
15 | 
16 | for i = 1 : n
17 |     for j = 1 : len
18 |         if( i ==  word_indices(j,:))
19 |             x(i,1) = 1;
20 |         end
21 |     end
22 | end
23 | 
24 | % % This is the second method to compute
25 | % for i = 1:len
26 | %     tmp = word_indices(i,:);
27 | %     for j = 1:n
28 | %         if(tmp == j)
29 | %             x(j,1) = 1;
30 | %         end
31 | %     end
32 | % % end
33 | 
34 | 
35 | 
36 | 
37 | % =========================================================================
38 |     
39 | 
40 | end
41 | 


--------------------------------------------------------------------------------
/ex6/ex6.m:
--------------------------------------------------------------------------------
  1 | %% Machine Learning Online Class
  2 | %  Exercise 6 | Support Vector Machines
  3 | %
  4 | %  Instructions
  5 | %  ------------
  6 | % 
  7 | %  This file contains code that helps you get started on the
  8 | %  exercise. You will need to complete the following functions:
  9 | %
 10 | %     gaussianKernel.m
 11 | %     dataset3Params.m
 12 | %     processEmail.m
 13 | %     emailFeatures.m
 14 | %
 15 | %  For this exercise, you will not need to change any code in this file,
 16 | %  or any other files other than those mentioned above.
 17 | %
 18 | 
 19 | %% Initialization
 20 | clear ; close all; clc
 21 | 
 22 | %% =============== Part 1: Loading and Visualizing Data ================
 23 | %  We start the exercise by first loading and visualizing the dataset. 
 24 | %  The following code will load the dataset into your environment and plot
 25 | %  the data.
 26 | %
 27 | 
 28 | fprintf('Loading and Visualizing Data ...\n')
 29 | 
 30 | % Load from ex6data1: 
 31 | % You will have X, y in your environment
 32 | load('ex6data1.mat');
 33 | 
 34 | % Plot training data
 35 | plotData(X, y);
 36 | 
 37 | fprintf('Program paused. Press enter to continue.\n');
 38 | pause;
 39 | 
 40 | %% ==================== Part 2: Training Linear SVM ====================
 41 | %  The following code will train a linear SVM on the dataset and plot the
 42 | %  decision boundary learned.
 43 | %
 44 | 
 45 | % Load from ex6data1: 
 46 | % You will have X, y in your environment
 47 | load('ex6data1.mat');
 48 | 
 49 | fprintf('\nTraining Linear SVM ...\n')
 50 | 
 51 | % You should try to change the C value below and see how the decision
 52 | % boundary varies (e.g., try C = 1000)
 53 | C = 100;
 54 | model = svmTrain(X, y, C, @linearKernel, 1e-3, 20);
 55 | visualizeBoundaryLinear(X, y, model);
 56 | 
 57 | fprintf('Program paused. Press enter to continue.\n');
 58 | pause;
 59 | 
 60 | %% =============== Part 3: Implementing Gaussian Kernel ===============
 61 | %  You will now implement the Gaussian kernel to use
 62 | %  with the SVM. You should complete the code in gaussianKernel.m
 63 | %
 64 | fprintf('\nEvaluating the Gaussian Kernel ...\n')
 65 | 
 66 | x1 = [1 2 1]; x2 = [0 4 -1]; sigma = 2;
 67 | sim = gaussianKernel(x1, x2, sigma);
 68 | 
 69 | fprintf(['Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = %f :' ...
 70 |          '\n\t%f\n(for sigma = 2, this value should be about 0.324652)\n'], sigma, sim);
 71 | 
 72 | fprintf('Program paused. Press enter to continue.\n');
 73 | pause;
 74 | 
 75 | %% =============== Part 4: Visualizing Dataset 2 ================
 76 | %  The following code will load the next dataset into your environment and 
 77 | %  plot the data. 
 78 | %
 79 | 
 80 | fprintf('Loading and Visualizing Data ...\n')
 81 | 
 82 | % Load from ex6data2: 
 83 | % You will have X, y in your environment
 84 | load('ex6data2.mat');
 85 | 
 86 | % Plot training data
 87 | plotData(X, y);
 88 | 
 89 | fprintf('Program paused. Press enter to continue.\n');
 90 | pause;
 91 | 
 92 | %% ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ==========
 93 | %  After you have implemented the kernel, we can now use it to train the 
 94 | %  SVM classifier.
 95 | % 
 96 | fprintf('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n');
 97 | 
 98 | % Load from ex6data2: 
 99 | % You will have X, y in your environment
100 | load('ex6data2.mat');
101 | 
102 | % SVM Parameters
103 | C = 1; sigma = 0.1;
104 | 
105 | % We set the tolerance and max_passes lower here so that the code will run
106 | % faster. However, in practice, you will want to run the training to
107 | % convergence.
108 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 
109 | visualizeBoundary(X, y, model);
110 | 
111 | fprintf('Program paused. Press enter to continue.\n');
112 | pause;
113 | 
114 | %% =============== Part 6: Visualizing Dataset 3 ================
115 | %  The following code will load the next dataset into your environment and 
116 | %  plot the data. 
117 | %
118 | 
119 | fprintf('Loading and Visualizing Data ...\n')
120 | 
121 | % Load from ex6data3: 
122 | % You will have X, y in your environment
123 | load('ex6data3.mat');
124 | 
125 | % Plot training data
126 | plotData(X, y);
127 | 
128 | fprintf('Program paused. Press enter to continue.\n');
129 | pause;
130 | 
131 | %% ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ==========
132 | 
133 | %  This is a different dataset that you can use to experiment with. Try
134 | %  different values of C and sigma here.
135 | % 
136 | 
137 | % Load from ex6data3: 
138 | % You will have X, y in your environment
139 | load('ex6data3.mat');
140 | 
141 | % Try different SVM Parameters here
142 | [C, sigma] = dataset3Params(X, y, Xval, yval);
143 | 
144 | % Train the SVM
145 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma));
146 | visualizeBoundary(X, y, model);
147 | 
148 | fprintf('Program paused. Press enter to continue.\n');
149 | pause;
150 | 
151 | 


--------------------------------------------------------------------------------
/ex6/ex6_spam.m:
--------------------------------------------------------------------------------
  1 | %% Machine Learning Online Class
  2 | %  Exercise 6 | Spam Classification with SVMs
  3 | %
  4 | %  Instructions
  5 | %  ------------
  6 | % 
  7 | %  This file contains code that helps you get started on the
  8 | %  exercise. You will need to complete the following functions:
  9 | %
 10 | %     gaussianKernel.m
 11 | %     dataset3Params.m
 12 | %     processEmail.m
 13 | %     emailFeatures.m
 14 | %
 15 | %  For this exercise, you will not need to change any code in this file,
 16 | %  or any other files other than those mentioned above.
 17 | %
 18 | 
 19 | %% Initialization
 20 | clear ; close all; clc
 21 | 
 22 | %% ==================== Part 1: Email Preprocessing ====================
 23 | %  To use an SVM to classify emails into Spam v.s. Non-Spam, you first need
 24 | %  to convert each email into a vector of features. In this part, you will
 25 | %  implement the preprocessing steps for each email. You should
 26 | %  complete the code in processEmail.m to produce a word indices vector
 27 | %  for a given email.
 28 | 
 29 | fprintf('\nPreprocessing sample email (emailSample1.txt)\n');
 30 | 
 31 | % Extract Features
 32 | file_contents = readFile('emailSample1.txt');
 33 | word_indices  = processEmail(file_contents);
 34 | 
 35 | % Print Stats
 36 | fprintf('Word Indices: \n');
 37 | fprintf(' %d', word_indices);
 38 | fprintf('\n\n');
 39 | 
 40 | fprintf('Program paused. Press enter to continue.\n');
 41 | pause;
 42 | 
 43 | %% ==================== Part 2: Feature Extraction ====================
 44 | %  Now, you will convert each email into a vector of features in R^n. 
 45 | %  You should complete the code in emailFeatures.m to produce a feature
 46 | %  vector for a given email.
 47 | 
 48 | fprintf('\nExtracting features from sample email (emailSample1.txt)\n');
 49 | 
 50 | % Extract Features
 51 | file_contents = readFile('emailSample1.txt');
 52 | word_indices  = processEmail(file_contents);
 53 | features      = emailFeatures(word_indices);
 54 | 
 55 | % Print Stats
 56 | fprintf('Length of feature vector: %d\n', length(features));
 57 | fprintf('Number of non-zero entries: %d\n', sum(features > 0));
 58 | 
 59 | fprintf('Program paused. Press enter to continue.\n');
 60 | pause;
 61 | 
 62 | %% =========== Part 3: Train Linear SVM for Spam Classification ========
 63 | %  In this section, you will train a linear classifier to determine if an
 64 | %  email is Spam or Not-Spam.
 65 | 
 66 | % Load the Spam Email dataset
 67 | % You will have X, y in your environment
 68 | load('spamTrain.mat');
 69 | 
 70 | fprintf('\nTraining Linear SVM (Spam Classification)\n')
 71 | fprintf('(this may take 1 to 2 minutes) ...\n')
 72 | 
 73 | C = 0.1;
 74 | model = svmTrain(X, y, C, @linearKernel);
 75 | 
 76 | p = svmPredict(model, X);
 77 | 
 78 | fprintf('Training Accuracy: %f\n', mean(double(p == y)) * 100);
 79 | 
 80 | %% =================== Part 4: Test Spam Classification ================
 81 | %  After training the classifier, we can evaluate it on a test set. We have
 82 | %  included a test set in spamTest.mat
 83 | 
 84 | % Load the test dataset
 85 | % You will have Xtest, ytest in your environment
 86 | load('spamTest.mat');
 87 | 
 88 | fprintf('\nEvaluating the trained Linear SVM on a test set ...\n')
 89 | 
 90 | p = svmPredict(model, Xtest);
 91 | 
 92 | fprintf('Test Accuracy: %f\n', mean(double(p == ytest)) * 100);
 93 | pause;
 94 | 
 95 | 
 96 | %% ================= Part 5: Top Predictors of Spam ====================
 97 | %  Since the model we are training is a linear SVM, we can inspect the
 98 | %  weights learned by the model to understand better how it is determining
 99 | %  whether an email is spam or not. The following code finds the words with
100 | %  the highest weights in the classifier. Informally, the classifier
101 | %  'thinks' that these words are the most likely indicators of spam.
102 | %
103 | 
104 | % Sort the weights and obtin the vocabulary list
105 | [weight, idx] = sort(model.w, 'descend');
106 | vocabList = getVocabList();
107 | 
108 | fprintf('\nTop predictors of spam: \n');
109 | for i = 1:15
110 |     fprintf(' %-15s (%f) \n', vocabList{idx(i)}, weight(i));
111 | end
112 | 
113 | fprintf('\n\n');
114 | fprintf('\nProgram paused. Press enter to continue.\n');
115 | pause;
116 | 
117 | %% =================== Part 6: Try Your Own Emails =====================
118 | %  Now that you've trained the spam classifier, you can use it on your own
119 | %  emails! In the starter code, we have included spamSample1.txt,
120 | %  spamSample2.txt, emailSample1.txt and emailSample2.txt as examples. 
121 | %  The following code reads in one of these emails and then uses your 
122 | %  learned SVM classifier to determine whether the email is Spam or 
123 | %  Not Spam
124 | 
125 | % Set the file to be read in (change this to spamSample2.txt,
126 | % emailSample1.txt or emailSample2.txt to see different predictions on
127 | % different emails types). Try your own emails as well!
128 | filename = 'spamSample1.txt';
129 | 
130 | % Read and predict
131 | file_contents = readFile(filename);
132 | word_indices  = processEmail(file_contents);
133 | x             = emailFeatures(word_indices);
134 | p = svmPredict(model, x);
135 | 
136 | fprintf('\nProcessed %s\n\nSpam Classification: %d\n', filename, p);
137 | fprintf('(1 indicates spam, 0 indicates not spam)\n\n');
138 | 
139 | 


--------------------------------------------------------------------------------
/ex6/gaussianKernel.m:
--------------------------------------------------------------------------------
 1 | function sim = gaussianKernel(x1, x2, sigma)
 2 | %RBFKERNEL returns a radial basis function kernel between x1 and x2
 3 | %   sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2
 4 | %   and returns the value in sim
 5 | 
 6 | % Ensure that x1 and x2 are column vectors
 7 | x1 = x1(:); x2 = x2(:);
 8 | 
 9 | % You need to return the following variables correctly.
10 | sim = 0;
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | 
14 | Gas_kernel = exp(-(norm(x1 - x2)) ^ 2 / (2 * sigma ^ 2));
15 | 
16 | sim = Gas_kernel;
17 | 
18 | 
19 | 
20 | 
21 | % =============================================================
22 |     
23 | end
24 | 


--------------------------------------------------------------------------------
/ex6/processEmail.m:
--------------------------------------------------------------------------------
  1 | function word_indices = processEmail(email_contents)
  2 | %PROCESSEMAIL preprocesses a the body of an email and
  3 | %returns a list of word_indices 
  4 | %   word_indices = PROCESSEMAIL(email_contents) preprocesses 
  5 | %   the body of an email and returns a list of indices of the 
  6 | %   words contained in the email. 
  7 | %
  8 | 
  9 | % Load Vocabulary
 10 | vocabList = getVocabList();
 11 | 
 12 | % Init return value
 13 | word_indices = [];
 14 | 
 15 | % ========================== Preprocess Email ===========================
 16 | 
 17 | % Find the Headers ( \n\n and remove )
 18 | % Uncomment the following lines if you are working with raw emails with the
 19 | % full headers
 20 | 
 21 | % hdrstart = strfind(email_contents, ([char(10) char(10)]));
 22 | % email_contents = email_contents(hdrstart(1):end);
 23 | 
 24 | % Lower case
 25 | email_contents = lower(email_contents);
 26 | 
 27 | % Strip all HTML
 28 | % Looks for any expression that starts with < and ends with > and replace
 29 | % and does not have any < or > in the tag it with a space
 30 | email_contents = regexprep(email_contents, '<[^<>]+>', ' ');
 31 | 
 32 | % Handle Numbers
 33 | % Look for one or more characters between 0-9
 34 | email_contents = regexprep(email_contents, '[0-9]+', 'number');
 35 | 
 36 | % Handle URLS
 37 | % Look for strings starting with http:// or https://
 38 | email_contents = regexprep(email_contents, ...
 39 |                            '(http|https)://[^\s]*', 'httpaddr');
 40 | 
 41 | % Handle Email Addresses
 42 | % Look for strings with @ in the middle
 43 | email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr');
 44 | 
 45 | % Handle $ sign
 46 | email_contents = regexprep(email_contents, '[$]+', 'dollar');
 47 | 
 48 | 
 49 | % ========================== Tokenize Email ===========================
 50 | 
 51 | % Output the email to screen as well
 52 | fprintf('\n==== Processed Email ====\n\n');
 53 | 
 54 | % Process file
 55 | l = 0;
 56 | 
 57 | while ~isempty(email_contents)
 58 | 
 59 |     % Tokenize and also get rid of any punctuation
 60 |     [str, email_contents] = ...
 61 |        strtok(email_contents, ...
 62 |               [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
 63 |    
 64 |     % Remove any non alphanumeric characters
 65 |     str = regexprep(str, '[^a-zA-Z0-9]', '');
 66 | 
 67 |     % Stem the word 
 68 |     % (the porterStemmer sometimes has issues, so we use a try catch block)
 69 |     try str = porterStemmer(strtrim(str)); 
 70 |     catch str = ''; continue;
 71 |     end;
 72 | 
 73 |     % Skip the word if it is too short
 74 |     if length(str) < 1
 75 |        continue;
 76 |     end
 77 | 
 78 |     len = length(vocabList);
 79 |     
 80 |     for i = 1: len
 81 |         if(strcmp(vocabList{i}, str))
 82 |             word_indices = [word_indices; i];
 83 |         end
 84 |     end
 85 |     
 86 |     % Note: vocabList{idx} returns a the word with index idx in the
 87 |     %       vocabulary list.
 88 |     % 
 89 |     % Note: You can use strcmp(str1, str2) to compare two strings (str1 and
 90 |     %       str2). It will return 1 only if the two strings are equivalent.
 91 |     %
 92 | 
 93 | 
 94 | 
 95 | 
 96 |     % =============================================================
 97 | 
 98 | 
 99 |     % Print to screen, ensuring that the output lines are not too long
100 |     if (l + length(str) + 1) > 78
101 |         fprintf('\n');
102 |         l = 0;
103 |     end
104 |     fprintf('%s ', str);
105 |     l = l + length(str) + 1;
106 | 
107 | end
108 | 
109 | % Print footer
110 | fprintf('\n\n=========================\n');
111 | 
112 | end
113 | 


--------------------------------------------------------------------------------
/ex7/computeCentroids.m:
--------------------------------------------------------------------------------
 1 | function centroids = computeCentroids(X, idx, K)
 2 | %COMPUTECENTROIDS returns the new centroids by computing the means of the 
 3 | %data points assigned to each centroid.
 4 | 
 5 | 
 6 | % Useful variables
 7 | [m n] = size(X);
 8 | 
 9 | % You need to return the following variables correctly.
10 | centroids = zeros(K, n);
11 | 
12 | 
13 | Ck = zeros(K,1);
14 | 
15 | for j = 1:K
16 |     for i = 1:m
17 |         if(idx(i) == j)
18 |             Ck(j) = Ck(j) + 1;
19 |         end
20 |     end
21 | end
22 | 
23 | 
24 | for j = 1:K
25 |     Mu = zeros(K, n);
26 |     for i = 1:m
27 |         
28 |         if (idx(i) == j)
29 |             Mu(i,:) = X(i,:);
30 |         end
31 |     end
32 |         
33 |     for k = 1:n
34 |         centroids(j,k) = 1 / Ck(j) * sum(Mu(:,k));
35 |             
36 |             
37 |     end
38 |     
39 | end
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | % =============================================================
47 | 
48 | 
49 | end
50 | 
51 | 


--------------------------------------------------------------------------------
/ex7/ex7.m:
--------------------------------------------------------------------------------
  1 | %% Machine Learning Online Class
  2 | %  Exercise 7 | Principle Component Analysis and K-Means Clustering
  3 | %
  4 | %  Instructions
  5 | %  ------------
  6 | %
  7 | %  This file contains code that helps you get started on the
  8 | %  exercise. You will need to complete the following functions:
  9 | %
 10 | %     pca.m
 11 | %     projectData.m
 12 | %     recoverData.m
 13 | %     computeCentroids.m
 14 | %     findClosestCentroids.m
 15 | %     kMeansInitCentroids.m
 16 | %
 17 | %  For this exercise, you will not need to change any code in this file,
 18 | %  or any other files other than those mentioned above.
 19 | %
 20 | 
 21 | %% Initialization
 22 | clear ; close all; clc
 23 | 
 24 | %% ================= Part 1: Find Closest Centroids ====================
 25 | %  To help you implement K-Means, we have divided the learning algorithm 
 26 | %  into two functions -- findClosestCentroids and computeCentroids. In this
 27 | %  part, you should complete the code in the findClosestCentroids function. 
 28 | %
 29 | fprintf('Finding closest centroids.\n\n');
 30 | 
 31 | % Load an example dataset that we will be using
 32 | load('ex7data2.mat');
 33 | 
 34 | % Select an initial set of centroids
 35 | K = 3; % 3 Centroids
 36 | initial_centroids = [3 3; 6 2; 8 5];
 37 | 
 38 | % Find the closest centroids for the examples using the
 39 | % initial_centroids
 40 | idx = findClosestCentroids(X, initial_centroids);
 41 | 
 42 | fprintf('Closest centroids for the first 3 examples: \n')
 43 | fprintf(' %d', idx(1:3));
 44 | fprintf('\n(the closest centroids should be 1, 3, 2 respectively)\n');
 45 | 
 46 | fprintf('Program paused. Press enter to continue.\n');
 47 | pause;
 48 | 
 49 | %% ===================== Part 2: Compute Means =========================
 50 | %  After implementing the closest centroids function, you should now
 51 | %  complete the computeCentroids function.
 52 | %
 53 | fprintf('\nComputing centroids means.\n\n');
 54 | 
 55 | %  Compute means based on the closest centroids found in the previous part.
 56 | centroids = computeCentroids(X, idx, K);
 57 | 
 58 | fprintf('Centroids computed after initial finding of closest centroids: \n')
 59 | fprintf(' %f %f \n' , centroids');
 60 | fprintf('\n(the centroids should be\n');
 61 | fprintf('   [ 2.428301 3.157924 ]\n');
 62 | fprintf('   [ 5.813503 2.633656 ]\n');
 63 | fprintf('   [ 7.119387 3.616684 ]\n\n');
 64 | 
 65 | fprintf('Program paused. Press enter to continue.\n');
 66 | pause;
 67 | 
 68 | 
 69 | %% =================== Part 3: K-Means Clustering ======================
 70 | %  After you have completed the two functions computeCentroids and
 71 | %  findClosestCentroids, you have all the necessary pieces to run the
 72 | %  kMeans algorithm. In this part, you will run the K-Means algorithm on
 73 | %  the example dataset we have provided. 
 74 | %
 75 | fprintf('\nRunning K-Means clustering on example dataset.\n\n');
 76 | 
 77 | % Load an example dataset
 78 | load('ex7data2.mat');
 79 | 
 80 | % Settings for running K-Means
 81 | K = 3;
 82 | max_iters = 10;
 83 | 
 84 | % For consistency, here we set centroids to specific values
 85 | % but in practice you want to generate them automatically, such as by
 86 | % settings them to be random examples (as can be seen in
 87 | % kMeansInitCentroids).
 88 | initial_centroids = [3 3; 6 2; 8 5];
 89 | 
 90 | % Run K-Means algorithm. The 'true' at the end tells our function to plot
 91 | % the progress of K-Means
 92 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters, true);
 93 | fprintf('\nK-Means Done.\n\n');
 94 | 
 95 | fprintf('Program paused. Press enter to continue.\n');
 96 | pause;
 97 | 
 98 | %% ============= Part 4: K-Means Clustering on Pixels ===============
 99 | %  In this exercise, you will use K-Means to compress an image. To do this,
100 | %  you will first run K-Means on the colors of the pixels in the image and
101 | %  then you will map each pixel onto its closest centroid.
102 | %  
103 | %  You should now complete the code in kMeansInitCentroids.m
104 | %
105 | 
106 | fprintf('\nRunning K-Means clustering on pixels from an image.\n\n');
107 | 
108 | %  Load an image of a bird
109 | A = double(imread('bird_small.png'));
110 | 
111 | % If imread does not work for you, you can try instead
112 | %   load ('bird_small.mat');
113 | 
114 | A = A / 255; % Divide by 255 so that all values are in the range 0 - 1
115 | 
116 | % Size of the image
117 | img_size = size(A);
118 | 
119 | % Reshape the image into an Nx3 matrix where N = number of pixels.
120 | % Each row will contain the Red, Green and Blue pixel values
121 | % This gives us our dataset matrix X that we will use K-Means on.
122 | X = reshape(A, img_size(1) * img_size(2), 3);
123 | 
124 | % Run your K-Means algorithm on this data
125 | % You should try different values of K and max_iters here
126 | K = 16; 
127 | max_iters = 10;
128 | 
129 | % When using K-Means, it is important the initialize the centroids
130 | % randomly. 
131 | % You should complete the code in kMeansInitCentroids.m before proceeding
132 | initial_centroids = kMeansInitCentroids(X, K);
133 | 
134 | % Run K-Means
135 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters);
136 | 
137 | fprintf('Program paused. Press enter to continue.\n');
138 | pause;
139 | 
140 | 
141 | %% ================= Part 5: Image Compression ======================
142 | %  In this part of the exercise, you will use the clusters of K-Means to
143 | %  compress an image. To do this, we first find the closest clusters for
144 | %  each example. After that, we 
145 | 
146 | fprintf('\nApplying K-Means to compress an image.\n\n');
147 | 
148 | % Find closest cluster members
149 | idx = findClosestCentroids(X, centroids);
150 | 
151 | % Essentially, now we have represented the image X as in terms of the
152 | % indices in idx. 
153 | 
154 | % We can now recover the image from the indices (idx) by mapping each pixel
155 | % (specified by its index in idx) to the centroid value
156 | X_recovered = centroids(idx,:);
157 | 
158 | % Reshape the recovered image into proper dimensions
159 | X_recovered = reshape(X_recovered, img_size(1), img_size(2), 3);
160 | 
161 | % Display the original image 
162 | subplot(1, 2, 1);
163 | imagesc(A); 
164 | title('Original');
165 | 
166 | % Display compressed image side by side
167 | subplot(1, 2, 2);
168 | imagesc(X_recovered)
169 | title(sprintf('Compressed, with %d colors.', K));
170 | 
171 | 
172 | fprintf('Program paused. Press enter to continue.\n');
173 | pause;
174 | 
175 | 


--------------------------------------------------------------------------------
/ex7/ex7_pca.m:
--------------------------------------------------------------------------------
  1 | %% Machine Learning Online Class
  2 | %  Exercise 7 | Principle Component Analysis and K-Means Clustering
  3 | %
  4 | %  Instructions
  5 | %  ------------
  6 | %
  7 | %  This file contains code that helps you get started on the
  8 | %  exercise. You will need to complete the following functions:
  9 | %
 10 | %     pca.m
 11 | %     projectData.m
 12 | %     recoverData.m
 13 | %     computeCentroids.m
 14 | %     findClosestCentroids.m
 15 | %     kMeansInitCentroids.m
 16 | %
 17 | %  For this exercise, you will not need to change any code in this file,
 18 | %  or any other files other than those mentioned above.
 19 | %
 20 | 
 21 | %% Initialization
 22 | clear ; close all; clc
 23 | 
 24 | %% ================== Part 1: Load Example Dataset  ===================
 25 | %  We start this exercise by using a small dataset that is easily to
 26 | %  visualize
 27 | %
 28 | fprintf('Visualizing example dataset for PCA.\n\n');
 29 | 
 30 | %  The following command loads the dataset. You should now have the 
 31 | %  variable X in your environment
 32 | load ('ex7data1.mat');
 33 | 
 34 | %  Visualize the example dataset
 35 | plot(X(:, 1), X(:, 2), 'bo');
 36 | axis([0.5 6.5 2 8]); axis square;
 37 | 
 38 | fprintf('Program paused. Press enter to continue.\n');
 39 | pause;
 40 | 
 41 | 
 42 | %% =============== Part 2: Principal Component Analysis ===============
 43 | %  You should now implement PCA, a dimension reduction technique. You
 44 | %  should complete the code in pca.m
 45 | %
 46 | fprintf('\nRunning PCA on example dataset.\n\n');
 47 | 
 48 | %  Before running PCA, it is important to first normalize X
 49 | [X_norm, mu, sigma] = featureNormalize(X);
 50 | 
 51 | %  Run PCA
 52 | [U, S] = pca(X_norm);
 53 | 
 54 | %  Compute mu, the mean of the each feature
 55 | 
 56 | %  Draw the eigenvectors centered at mean of data. These lines show the
 57 | %  directions of maximum variations in the dataset.
 58 | hold on;
 59 | drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2);
 60 | drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2);
 61 | hold off;
 62 | 
 63 | fprintf('Top eigenvector: \n');
 64 | fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1));
 65 | fprintf('\n(you should expect to see -0.707107 -0.707107)\n');
 66 | 
 67 | fprintf('Program paused. Press enter to continue.\n');
 68 | pause;
 69 | 
 70 | 
 71 | %% =================== Part 3: Dimension Reduction ===================
 72 | %  You should now implement the projection step to map the data onto the 
 73 | %  first k eigenvectors. The code will then plot the data in this reduced 
 74 | %  dimensional space.  This will show you what the data looks like when 
 75 | %  using only the corresponding eigenvectors to reconstruct it.
 76 | %
 77 | %  You should complete the code in projectData.m
 78 | %
 79 | fprintf('\nDimension reduction on example dataset.\n\n');
 80 | 
 81 | %  Plot the normalized dataset (returned from pca)
 82 | plot(X_norm(:, 1), X_norm(:, 2), 'bo');
 83 | axis([-4 3 -4 3]); axis square
 84 | 
 85 | %  Project the data onto K = 1 dimension
 86 | K = 1;
 87 | Z = projectData(X_norm, U, K);
 88 | fprintf('Projection of the first example: %f\n', Z(1));
 89 | fprintf('\n(this value should be about 1.481274)\n\n');
 90 | 
 91 | X_rec  = recoverData(Z, U, K);
 92 | fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2));
 93 | fprintf('\n(this value should be about  -1.047419 -1.047419)\n\n');
 94 | 
 95 | %  Draw lines connecting the projected points to the original points
 96 | hold on;
 97 | plot(X_rec(:, 1), X_rec(:, 2), 'ro');
 98 | for i = 1:size(X_norm, 1)
 99 |     drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1);
100 | end
101 | hold off
102 | 
103 | fprintf('Program paused. Press enter to continue.\n');
104 | pause;
105 | 
106 | %% =============== Part 4: Loading and Visualizing Face Data =============
107 | %  We start the exercise by first loading and visualizing the dataset.
108 | %  The following code will load the dataset into your environment
109 | %
110 | fprintf('\nLoading face dataset.\n\n');
111 | 
112 | %  Load Face dataset
113 | load ('ex7faces.mat')
114 | 
115 | %  Display the first 100 faces in the dataset
116 | displayData(X(1:100, :));
117 | 
118 | fprintf('Program paused. Press enter to continue.\n');
119 | pause;
120 | 
121 | %% =========== Part 5: PCA on Face Data: Eigenfaces  ===================
122 | %  Run PCA and visualize the eigenvectors which are in this case eigenfaces
123 | %  We display the first 36 eigenfaces.
124 | %
125 | fprintf(['\nRunning PCA on face dataset.\n' ...
126 |          '(this might take a minute or two ...)\n\n']);
127 | 
128 | %  Before running PCA, it is important to first normalize X by subtracting 
129 | %  the mean value from each feature
130 | [X_norm, mu, sigma] = featureNormalize(X);
131 | 
132 | %  Run PCA
133 | [U, S] = pca(X_norm);
134 | 
135 | %  Visualize the top 36 eigenvectors found
136 | displayData(U(:, 1:36)');
137 | 
138 | fprintf('Program paused. Press enter to continue.\n');
139 | pause;
140 | 
141 | 
142 | %% ============= Part 6: Dimension Reduction for Faces =================
143 | %  Project images to the eigen space using the top k eigenvectors 
144 | %  If you are applying a machine learning algorithm 
145 | fprintf('\nDimension reduction for face dataset.\n\n');
146 | 
147 | K = 100;
148 | Z = projectData(X_norm, U, K);
149 | 
150 | fprintf('The projected data Z has a size of: ')
151 | fprintf('%d ', size(Z));
152 | 
153 | fprintf('\n\nProgram paused. Press enter to continue.\n');
154 | pause;
155 | 
156 | %% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
157 | %  Project images to the eigen space using the top K eigen vectors and 
158 | %  visualize only using those K dimensions
159 | %  Compare to the original input, which is also displayed
160 | 
161 | fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n');
162 | 
163 | K = 100;
164 | X_rec  = recoverData(Z, U, K);
165 | 
166 | % Display normalized data
167 | subplot(1, 2, 1);
168 | displayData(X_norm(1:100,:));
169 | title('Original faces');
170 | axis square;
171 | 
172 | % Display reconstructed data from only k eigenfaces
173 | subplot(1, 2, 2);
174 | displayData(X_rec(1:100,:));
175 | title('Recovered faces');
176 | axis square;
177 | 
178 | fprintf('Program paused. Press enter to continue.\n');
179 | pause;
180 | 
181 | 
182 | %% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization ===
183 | %  One useful application of PCA is to use it to visualize high-dimensional
184 | %  data. In the last K-Means exercise you ran K-Means on 3-dimensional 
185 | %  pixel colors of an image. We first visualize this output in 3D, and then
186 | %  apply PCA to obtain a visualization in 2D.
187 | 
188 | close all; close all; clc
189 | 
190 | % Reload the image from the previous exercise and run K-Means on it
191 | % For this to work, you need to complete the K-Means assignment first
192 | A = double(imread('bird_small.png'));
193 | 
194 | % If imread does not work for you, you can try instead
195 | %   load ('bird_small.mat');
196 | 
197 | A = A / 255;
198 | img_size = size(A);
199 | X = reshape(A, img_size(1) * img_size(2), 3);
200 | K = 16; 
201 | max_iters = 10;
202 | initial_centroids = kMeansInitCentroids(X, K);
203 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters);
204 | 
205 | %  Sample 1000 random indexes (since working with all the data is
206 | %  too expensive. If you have a fast computer, you may increase this.
207 | sel = floor(rand(1000, 1) * size(X, 1)) + 1;
208 | 
209 | %  Setup Color Palette
210 | palette = hsv(K);
211 | colors = palette(idx(sel), :);
212 | 
213 | %  Visualize the data and centroid memberships in 3D
214 | figure;
215 | scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors);
216 | title('Pixel dataset plotted in 3D. Color shows centroid memberships');
217 | fprintf('Program paused. Press enter to continue.\n');
218 | pause;
219 | 
220 | %% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
221 | % Use PCA to project this cloud to 2D for visualization
222 | 
223 | % Subtract the mean to use PCA
224 | [X_norm, mu, sigma] = featureNormalize(X);
225 | 
226 | % PCA and project the data to 2D
227 | [U, S] = pca(X_norm);
228 | Z = projectData(X_norm, U, 2);
229 | 
230 | % Plot in 2D
231 | figure;
232 | plotDataPoints(Z(sel, :), idx(sel), K);
233 | title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction');
234 | fprintf('Program paused. Press enter to continue.\n');
235 | pause;
236 | 


--------------------------------------------------------------------------------
/ex7/findClosestCentroids.m:
--------------------------------------------------------------------------------
 1 | function idx = findClosestCentroids(X, centroids)
 2 | %FINDCLOSESTCENTROIDS computes the centroid memberships for every example
 3 | %   idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids
 4 | %   in idx for a dataset X where each row is a single example. idx = m x 1 
 5 | %   vector of centroid assignments (i.e. each entry in range [1..K])
 6 | %
 7 | 
 8 | % Set K
 9 | K = size(centroids, 1);
10 | 
11 | % You need to return the following variables correctly.
12 | idx = zeros(size(X,1), 1);
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | % Instructions: Go over every example, find its closest centroid, and store
16 | %               the index inside idx at the appropriate location.
17 | %               Concretely, idx(i) should contain the index of the centroid
18 | %               closest to example i. Hence, it should be a value in the 
19 | %               range 1..K
20 | %
21 | % Note: You can use a for-loop over the examples to compute this.
22 | %
23 | 
24 | m = length(X);
25 | dist = zeros(K, 1);
26 | 
27 | for i = 1:m
28 |     for j = 1:K
29 |         
30 |         dist(j) = norm(X(i,:) - centroids(j,:)) ^ 2;
31 |     
32 |     end
33 |     
34 |     [minV, minI] = min(dist);
35 |     
36 |     idx(i) = minI;
37 |     
38 | 
39 | end
40 |     
41 | 
42 | 
43 | 
44 | % =============================================================
45 | 
46 | end
47 | 
48 | 


--------------------------------------------------------------------------------
/ex7/kMeansInitCentroids.m:
--------------------------------------------------------------------------------
 1 | function centroids = kMeansInitCentroids(X, K)
 2 | %KMEANSINITCENTROIDS This function initializes K centroids that are to be 
 3 | %used in K-Means on the dataset X
 4 | %   centroids = KMEANSINITCENTROIDS(X, K) returns K initial centroids to be
 5 | %   used with the K-Means on the dataset X
 6 | %
 7 | 
 8 | % You should return this values correctly
 9 | centroids = zeros(K, size(X, 2));
10 | 
11 | % ====================== YOUR CODE HERE ======================
12 | % Instructions: You should set centroids to randomly chosen examples from
13 | %               the dataset X
14 | %
15 | 
16 | 
17 | randidx = randperm(size(X,1));
18 | 
19 | centroids = X(randidx(1:K),:);
20 | 
21 | 
22 | 
23 | 
24 | 
25 | % =============================================================
26 | 
27 | end
28 | 
29 | 


--------------------------------------------------------------------------------
/ex7/pca.m:
--------------------------------------------------------------------------------
 1 | function [U, S] = pca(X)
 2 | %PCA Run principal component analysis on the dataset X
 3 | %   [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X
 4 | %   Returns the eigenvectors U, the eigenvalues (on diagonal) in S
 5 | %
 6 | 
 7 | % Useful values
 8 | [m, n] = size(X);
 9 | 
10 | % You need to return the following variables correctly.
11 | U = zeros(n);
12 | S = zeros(n);
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | 
16 | sigma = zeros(n);
17 | sigma = 1 / m .* (X' * X);
18 | 
19 | 
20 | [U,S,V] = svd(sigma);
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | % =========================================================================
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/ex7/projectData.m:
--------------------------------------------------------------------------------
 1 | function Z = projectData(X, U, K)
 2 | %PROJECTDATA Computes the reduced data representation when projecting only 
 3 | %on to the top k eigenvectors
 4 | %   Z = projectData(X, U, K) computes the projection of 
 5 | %   the normalized inputs X into the reduced dimensional space spanned by
 6 | %   the first K columns of U. It returns the projected examples in Z.
 7 | %
 8 | 
 9 | % You need to return the following variables correctly.
10 | Z = zeros(size(X, 1), K);
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | 
14 | 
15 | U_red = U(:,1:K);
16 | 
17 | Z = X * U_red;
18 | 
19 | 
20 | 
21 | 
22 | 
23 | % =============================================================
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/ex7/recoverData.m:
--------------------------------------------------------------------------------
 1 | function X_rec = recoverData(Z, U, K)
 2 | %RECOVERDATA Recovers an approximation of the original data when using the 
 3 | %projected data
 4 | %   X_rec = RECOVERDATA(Z, U, K) recovers an approximation the 
 5 | %   original data that has been reduced to K dimensions. It returns the
 6 | %   approximate reconstruction in X_rec.
 7 | %
 8 | 
 9 | % You need to return the following variables correctly.
10 | X_rec = zeros(size(Z, 1), size(U, 1));
11 | 
12 | 
13 | U = U(:,1:K);
14 | 
15 | X_rec = Z * U';        
16 | 
17 | 
18 | 
19 | % =============================================================
20 | 
21 | end
22 | 


--------------------------------------------------------------------------------
/ex8/cofiCostFunc.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ...
 2 |                                   num_features, lambda)
 3 | %COFICOSTFUNC Collaborative filtering cost function
 4 | %   [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
 5 | %   num_features, lambda) returns the cost and gradient for the
 6 | %   collaborative filtering problem.
 7 | %
 8 | 
 9 | % Unfold the U and W matrices from params
10 | X = reshape(params(1:num_movies*num_features), num_movies, num_features);
11 | Theta = reshape(params(num_movies*num_features+1:end), ...
12 |                 num_users, num_features);
13 | 
14 |             
15 | % You need to return the following values correctly
16 | J = 0;
17 | X_grad = zeros(size(X));
18 | Theta_grad = zeros(size(Theta));
19 | 
20 | % ====================== YOUR CODE HERE ======================
21 | cost = 0;
22 | tmp_Y = zeros(num_movies, num_users);
23 | 
24 | Reg_Theta = 0;
25 | Reg_X = 0;
26 | 
27 | for i = 1:num_movies
28 |     for k = 1:num_features
29 |         
30 |     %X_tmp = X(idx,:);
31 |         Reg_X =  Reg_X + norm(X(i,k)) ^ 2;
32 |         
33 |     end
34 | end
35 | 
36 | for j = 1:num_users
37 |     for k = 1:num_features    
38 |         
39 |     %X_tmp = X(idx,:);
40 |         Reg_Theta =  Reg_Theta + norm(Theta(j,k)) ^ 2;
41 |         
42 |     end
43 | end
44 | 
45 | 
46 | 
47 | for i = 1:num_movies
48 |     for j = 1:num_users
49 |         if(R(i,j) == 1)
50 |             cost = cost + (Theta(j,:)*X(i,:)' - Y(i,j)) ^ 2;
51 |         end
52 |     end
53 | end
54 | 
55 | J = 1 / 2 * (cost + lambda * (Reg_Theta + Reg_X));
56 | 
57 | 
58 | % You should set the following variables correctly:
59 | %
60 | %        X_grad - num_movies x num_features matrix, containing the 
61 | %                 partial derivatives w.r.t. to each element of X
62 | %        Theta_grad - num_users x num_features matrix, containing the 
63 | %                     partial derivatives w.r.t. to each element of Theta
64 | grad_X = zeros(size(X));
65 | grad_Theta = zeros(size(Theta));
66 | 
67 | for i = 1:num_movies
68 |     
69 |     idx = find(R(i,:) == 1);
70 |     Theta_tmp = Theta(idx,:);
71 |     Y_tmp = Y(i,idx);
72 |     
73 |     grad_X(i,:) = (X(i,:) * Theta_tmp' - Y_tmp) * Theta_tmp + ...
74 |         lambda * X(i,:);
75 |     for j = 1:num_users
76 |         jdx = find(R(:,j) == 1);
77 |         X_tmp = X(jdx,:);
78 |         Y_tmp = Y(jdx,j);
79 |         grad_Theta(j,:) = (Theta(j,:) * X_tmp' - Y_tmp') * X_tmp + ...
80 |             lambda * Theta(j,:);
81 |     end
82 | end
83 | 
84 | 
85 | 
86 | 
87 | % =============================================================
88 | 
89 | grad = [grad_X(:); grad_Theta(:)];
90 | 
91 | end
92 | 


--------------------------------------------------------------------------------
/ex8/estimateGaussian.m:
--------------------------------------------------------------------------------
 1 | function [mu sigma2] = estimateGaussian(X)
 2 | %ESTIMATEGAUSSIAN This function estimates the parameters of a 
 3 | %Gaussian distribution using the data in X
 4 | %   [mu sigma2] = estimateGaussian(X), 
 5 | %   The input X is the dataset with each n-dimensional data point in one row
 6 | %   The output is an n-dimensional vector mu, the mean of the data set
 7 | %   and the variances sigma^2, an n x 1 vector
 8 | % 
 9 | 
10 | % Useful variables
11 | [m, n] = size(X);
12 | 
13 | % You should return these values correctly
14 | mu = zeros(n, 1);
15 | sigma2 = zeros(n, 1);
16 | 
17 | % ====================== YOUR CODE HERE ======================
18 | 
19 | for i = 1:n
20 |     mu(i,:) = 1 / m * sum(X(:,i));
21 |     sigma2(i,:) = 1 / m * (norm(X(:,i) - mu(i,:)) ^ 2);
22 | end
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | % =============================================================
34 | 
35 | 
36 | end
37 | 


--------------------------------------------------------------------------------
/ex8/ex8.m:
--------------------------------------------------------------------------------
  1 | %% Machine Learning Online Class
  2 | %  Exercise 8 | Anomaly Detection and Collaborative Filtering
  3 | %
  4 | %  Instructions
  5 | %  ------------
  6 | %
  7 | %  This file contains code that helps you get started on the
  8 | %  exercise. You will need to complete the following functions:
  9 | %
 10 | %     estimateGaussian.m
 11 | %     selectThreshold.m
 12 | %     cofiCostFunc.m
 13 | %
 14 | %  For this exercise, you will not need to change any code in this file,
 15 | %  or any other files other than those mentioned above.
 16 | %
 17 | 
 18 | %% Initialization
 19 | clear ; close all; clc
 20 | 
 21 | %% ================== Part 1: Load Example Dataset  ===================
 22 | %  We start this exercise by using a small dataset that is easy to
 23 | %  visualize.
 24 | %
 25 | %  Our example case consists of 2 network server statistics across
 26 | %  several machines: the latency and throughput of each machine.
 27 | %  This exercise will help us find possibly faulty (or very fast) machines.
 28 | %
 29 | 
 30 | fprintf('Visualizing example dataset for outlier detection.\n\n');
 31 | 
 32 | %  The following command loads the dataset. You should now have the
 33 | %  variables X, Xval, yval in your environment
 34 | load('ex8data1.mat');
 35 | 
 36 | %  Visualize the example dataset
 37 | plot(X(:, 1), X(:, 2), 'bx');
 38 | axis([0 30 0 30]);
 39 | xlabel('Latency (ms)');
 40 | ylabel('Throughput (mb/s)');
 41 | 
 42 | fprintf('Program paused. Press enter to continue.\n');
 43 | pause
 44 | 
 45 | 
 46 | %% ================== Part 2: Estimate the dataset statistics ===================
 47 | %  For this exercise, we assume a Gaussian distribution for the dataset.
 48 | %
 49 | %  We first estimate the parameters of our assumed Gaussian distribution, 
 50 | %  then compute the probabilities for each of the points and then visualize 
 51 | %  both the overall distribution and where each of the points falls in 
 52 | %  terms of that distribution.
 53 | %
 54 | fprintf('Visualizing Gaussian fit.\n\n');
 55 | 
 56 | %  Estimate my and sigma2
 57 | [mu sigma2] = estimateGaussian(X);
 58 | 
 59 | %  Returns the density of the multivariate normal at each data point (row) 
 60 | %  of X
 61 | p = multivariateGaussian(X, mu, sigma2);
 62 | 
 63 | %  Visualize the fit
 64 | visualizeFit(X,  mu, sigma2);
 65 | xlabel('Latency (ms)');
 66 | ylabel('Throughput (mb/s)');
 67 | 
 68 | fprintf('Program paused. Press enter to continue.\n');
 69 | pause;
 70 | 
 71 | %% ================== Part 3: Find Outliers ===================
 72 | %  Now you will find a good epsilon threshold using a cross-validation set
 73 | %  probabilities given the estimated Gaussian distribution
 74 | % 
 75 | 
 76 | pval = multivariateGaussian(Xval, mu, sigma2);
 77 | 
 78 | [epsilon F1] = selectThreshold(yval, pval);
 79 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon);
 80 | fprintf('Best F1 on Cross Validation Set:  %f\n', F1);
 81 | fprintf('   (you should see a value epsilon of about 8.99e-05)\n');
 82 | fprintf('   (you should see a Best F1 value of  0.875000)\n\n');
 83 | 
 84 | %  Find the outliers in the training set and plot the
 85 | outliers = find(p < epsilon);
 86 | 
 87 | %  Draw a red circle around those outliers
 88 | hold on
 89 | plot(X(outliers, 1), X(outliers, 2), 'ro', 'LineWidth', 2, 'MarkerSize', 10);
 90 | hold off
 91 | 
 92 | fprintf('Program paused. Press enter to continue.\n');
 93 | pause;
 94 | 
 95 | %% ================== Part 4: Multidimensional Outliers ===================
 96 | %  We will now use the code from the previous part and apply it to a 
 97 | %  harder problem in which more features describe each datapoint and only 
 98 | %  some features indicate whether a point is an outlier.
 99 | %
100 | 
101 | %  Loads the second dataset. You should now have the
102 | %  variables X, Xval, yval in your environment
103 | load('ex8data2.mat');
104 | 
105 | %  Apply the same steps to the larger dataset
106 | [mu sigma2] = estimateGaussian(X);
107 | 
108 | %  Training set 
109 | p = multivariateGaussian(X, mu, sigma2);
110 | 
111 | %  Cross-validation set
112 | pval = multivariateGaussian(Xval, mu, sigma2);
113 | 
114 | %  Find the best threshold
115 | [epsilon F1] = selectThreshold(yval, pval);
116 | 
117 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon);
118 | fprintf('Best F1 on Cross Validation Set:  %f\n', F1);
119 | fprintf('   (you should see a value epsilon of about 1.38e-18)\n');
120 | fprintf('   (you should see a Best F1 value of 0.615385)\n');
121 | fprintf('# Outliers found: %d\n\n', sum(p < epsilon));
122 | 


--------------------------------------------------------------------------------
/ex8/ex8_cofi.m:
--------------------------------------------------------------------------------
  1 | %% Machine Learning Online Class
  2 | %  Exercise 8 | Anomaly Detection and Collaborative Filtering
  3 | %
  4 | %  Instructions
  5 | %  ------------
  6 | %
  7 | %  This file contains code that helps you get started on the
  8 | %  exercise. You will need to complete the following functions:
  9 | %
 10 | %     estimateGaussian.m
 11 | %     selectThreshold.m
 12 | %     cofiCostFunc.m
 13 | %
 14 | %  For this exercise, you will not need to change any code in this file,
 15 | %  or any other files other than those mentioned above.
 16 | %
 17 | 
 18 | %% =============== Part 1: Loading movie ratings dataset ================
 19 | %  You will start by loading the movie ratings dataset to understand the
 20 | %  structure of the data.
 21 | %  
 22 | fprintf('Loading movie ratings dataset.\n\n');
 23 | 
 24 | %  Load data
 25 | load ('ex8_movies.mat');
 26 | 
 27 | %  Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on 
 28 | %  943 users
 29 | %
 30 | %  R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
 31 | %  rating to movie i
 32 | 
 33 | %  From the matrix, we can compute statistics like average rating.
 34 | fprintf('Average rating for movie 1 (Toy Story): %f / 5\n\n', ...
 35 |         mean(Y(1, R(1, :))));
 36 | 
 37 | %  We can "visualize" the ratings matrix by plotting it with imagesc
 38 | imagesc(Y);
 39 | ylabel('Movies');
 40 | xlabel('Users');
 41 | 
 42 | fprintf('\nProgram paused. Press enter to continue.\n');
 43 | pause;
 44 | 
 45 | %% ============ Part 2: Collaborative Filtering Cost Function ===========
 46 | %  You will now implement the cost function for collaborative filtering.
 47 | %  To help you debug your cost function, we have included set of weights
 48 | %  that we trained on that. Specifically, you should complete the code in 
 49 | %  cofiCostFunc.m to return J.
 50 | 
 51 | %  Load pre-trained weights (X, Theta, num_users, num_movies, num_features)
 52 | load ('ex8_movieParams.mat');
 53 | 
 54 | %  Reduce the data set size so that this runs faster
 55 | num_users = 4; num_movies = 5; num_features = 3;
 56 | X = X(1:num_movies, 1:num_features);
 57 | Theta = Theta(1:num_users, 1:num_features);
 58 | Y = Y(1:num_movies, 1:num_users);
 59 | R = R(1:num_movies, 1:num_users);
 60 | 
 61 | %  Evaluate cost function
 62 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ...
 63 |                num_features, 0);
 64 |            
 65 | fprintf(['Cost at loaded parameters: %f '...
 66 |          '\n(this value should be about 22.22)\n'], J);
 67 | 
 68 | fprintf('\nProgram paused. Press enter to continue.\n');
 69 | pause;
 70 | 
 71 | 
 72 | %% ============== Part 3: Collaborative Filtering Gradient ==============
 73 | %  Once your cost function matches up with ours, you should now implement 
 74 | %  the collaborative filtering gradient function. Specifically, you should 
 75 | %  complete the code in cofiCostFunc.m to return the grad argument.
 76 | %  
 77 | fprintf('\nChecking Gradients (without regularization) ... \n');
 78 | 
 79 | %  Check gradients by running checkNNGradients
 80 | checkCostFunction;
 81 | 
 82 | fprintf('\nProgram paused. Press enter to continue.\n');
 83 | pause;
 84 | 
 85 | 
 86 | %% ========= Part 4: Collaborative Filtering Cost Regularization ========
 87 | %  Now, you should implement regularization for the cost function for 
 88 | %  collaborative filtering. You can implement it by adding the cost of
 89 | %  regularization to the original cost computation.
 90 | %  
 91 | 
 92 | %  Evaluate cost function
 93 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ...
 94 |                num_features, 1.5);
 95 |            
 96 | fprintf(['Cost at loaded parameters (lambda = 1.5): %f '...
 97 |          '\n(this value should be about 31.34)\n'], J);
 98 | 
 99 | fprintf('\nProgram paused. Press enter to continue.\n');
100 | pause;
101 | 
102 | 
103 | %% ======= Part 5: Collaborative Filtering Gradient Regularization ======
104 | %  Once your cost matches up with ours, you should proceed to implement 
105 | %  regularization for the gradient. 
106 | %
107 | 
108 | %  
109 | fprintf('\nChecking Gradients (with regularization) ... \n');
110 | 
111 | %  Check gradients by running checkNNGradients
112 | checkCostFunction(1.5);
113 | 
114 | fprintf('\nProgram paused. Press enter to continue.\n');
115 | pause;
116 | 
117 | 
118 | %% ============== Part 6: Entering ratings for a new user ===============
119 | %  Before we will train the collaborative filtering model, we will first
120 | %  add ratings that correspond to a new user that we just observed. This
121 | %  part of the code will also allow you to put in your own ratings for the
122 | %  movies in our dataset!
123 | %
124 | movieList = loadMovieList();
125 | 
126 | %  Initialize my ratings
127 | my_ratings = zeros(1682, 1);
128 | 
129 | % Check the file movie_idx.txt for id of each movie in our dataset
130 | % For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
131 | my_ratings(1) = 4;
132 | 
133 | % Or suppose did not enjoy Silence of the Lambs (1991), you can set
134 | my_ratings(98) = 2;
135 | 
136 | % We have selected a few movies we liked / did not like and the ratings we
137 | % gave are as follows:
138 | my_ratings(7) = 3;
139 | my_ratings(12)= 5;
140 | my_ratings(54) = 4;
141 | my_ratings(64)= 5;
142 | my_ratings(66)= 3;
143 | my_ratings(69) = 5;
144 | my_ratings(183) = 4;
145 | my_ratings(226) = 5;
146 | my_ratings(355)= 5;
147 | 
148 | fprintf('\n\nNew user ratings:\n');
149 | for i = 1:length(my_ratings)
150 |     if my_ratings(i) > 0 
151 |         fprintf('Rated %d for %s\n', my_ratings(i), ...
152 |                  movieList{i});
153 |     end
154 | end
155 | 
156 | fprintf('\nProgram paused. Press enter to continue.\n');
157 | pause;
158 | 
159 | 
160 | %% ================== Part 7: Learning Movie Ratings ====================
161 | %  Now, you will train the collaborative filtering model on a movie rating 
162 | %  dataset of 1682 movies and 943 users
163 | %
164 | 
165 | fprintf('\nTraining collaborative filtering...\n');
166 | 
167 | %  Load data
168 | load('ex8_movies.mat');
169 | 
170 | %  Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by 
171 | %  943 users
172 | %
173 | %  R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
174 | %  rating to movie i
175 | 
176 | %  Add our own ratings to the data matrix
177 | Y = [my_ratings Y];
178 | R = [(my_ratings ~= 0) R];
179 | 
180 | %  Normalize Ratings
181 | [Ynorm, Ymean] = normalizeRatings(Y, R);
182 | 
183 | %  Useful Values
184 | num_users = size(Y, 2);
185 | num_movies = size(Y, 1);
186 | num_features = 10;
187 | 
188 | % Set Initial Parameters (Theta, X)
189 | X = randn(num_movies, num_features);
190 | Theta = randn(num_users, num_features);
191 | 
192 | initial_parameters = [X(:); Theta(:)];
193 | 
194 | % Set options for fmincg
195 | options = optimset('GradObj', 'on', 'MaxIter', 100);
196 | 
197 | % Set Regularization
198 | lambda = 10;
199 | theta = fmincg (@(t)(cofiCostFunc(t, Ynorm, R, num_users, num_movies, ...
200 |                                 num_features, lambda)), ...
201 |                 initial_parameters, options);
202 | 
203 | % Unfold the returned theta back into U and W
204 | X = reshape(theta(1:num_movies*num_features), num_movies, num_features);
205 | Theta = reshape(theta(num_movies*num_features+1:end), ...
206 |                 num_users, num_features);
207 | 
208 | fprintf('Recommender system learning completed.\n');
209 | 
210 | fprintf('\nProgram paused. Press enter to continue.\n');
211 | pause;
212 | 
213 | %% ================== Part 8: Recommendation for you ====================
214 | %  After training the model, you can now make recommendations by computing
215 | %  the predictions matrix.
216 | %
217 | 
218 | p = X * Theta';
219 | my_predictions = p(:,1) + Ymean;
220 | 
221 | movieList = loadMovieList();
222 | 
223 | [r, ix] = sort(my_predictions, 'descend');
224 | fprintf('\nTop recommendations for you:\n');
225 | for i=1:10
226 |     j = ix(i);
227 |     fprintf('Predicting rating %.1f for movie %s\n', my_predictions(j), ...
228 |             movieList{j});
229 | end
230 | 
231 | fprintf('\n\nOriginal ratings provided:\n');
232 | for i = 1:length(my_ratings)
233 |     if my_ratings(i) > 0 
234 |         fprintf('Rated %d for %s\n', my_ratings(i), ...
235 |                  movieList{i});
236 |     end
237 | end
238 | 


--------------------------------------------------------------------------------
/ex8/selectThreshold.m:
--------------------------------------------------------------------------------
 1 | function [bestEpsilon bestF1] = selectThreshold(yval, pval)
 2 | %SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting
 3 | %outliers
 4 | %   [bestEpsilon bestF1] = SELECTTHRESHOLD(yval, pval) finds the best
 5 | %   threshold to use for selecting outliers based on the results from a
 6 | %   validation set (pval) and the ground truth (yval).
 7 | %
 8 | 
 9 | m = length(pval);
10 | 
11 | bestEpsilon = 0;
12 | bestF1 = 0;
13 | 
14 | stepsize = (max(pval) - min(pval)) / 1000;
15 | %tmp = (max(pval) - min(pval))/ stepsize;
16 | %F1 = zeros(tmp,1);
17 | cvPred = zeros(m, 1);
18 | %count = 1;
19 | 
20 | F1 = 0;
21 | 
22 | for epsilon = min(pval):stepsize:max(pval)
23 |     for i = 1:m
24 |         if(pval(i) < epsilon)
25 |             cvPred(i) = 1;
26 |         end
27 |     end
28 |     
29 |     fp = sum((cvPred == 1) & (yval == 0));
30 |     tp = sum((cvPred == 1) & (yval == 1));
31 |     fn = sum((cvPred == 0) & (yval == 1));
32 |     
33 |     prec = tp / (tp + fp);
34 |     recall = tp / (tp + fn);
35 |     F1 = 2 * prec * recall / (prec + recall);
36 |       
37 |     if F1 > bestF1
38 |        bestF1 = F1;
39 |        bestEpsilon = epsilon;
40 |     end
41 | end
42 |     
43 |     % ====================== YOUR CODE HERE ======================
44 | 
45 | 
46 | 
47 | end
48 | 


--------------------------------------------------------------------------------
/update:
--------------------------------------------------------------------------------
1 | no update..
2 | 


--------------------------------------------------------------------------------