├── 1
    ├── computeCost-solution.r
    ├── computeCost.r
    ├── computeCostMulti-solution.r
    ├── computeCostMulti.r
    ├── ex1.r
    ├── ex1.rproj
    ├── ex1_multi.r
    ├── ex1data1.txt
    ├── ex1data2.txt
    ├── featureNormalize-solution.r
    ├── featureNormalize.r
    ├── gradientDescent-solution.r
    ├── gradientDescent.r
    ├── gradientDescentMulti-solution.r
    ├── gradientDescentMulti.r
    ├── instructions.rmd
    ├── normalEqn-solution.r
    ├── normalEqn.r
    ├── plotData-solution.r
    ├── plotData.r
    ├── submit.r
    ├── warmUpExercise-solution.r
    └── warmUpExercise.r
├── 2
    ├── costFunction-solution.r
    ├── costFunction.r
    ├── costFunctionReg-solution.r
    ├── costFunctionReg.r
    ├── ex2.r
    ├── ex2.rproj
    ├── ex2_reg.r
    ├── ex2data1.txt
    ├── ex2data2.txt
    ├── instructions.rmd
    ├── mapFeature-solution.r
    ├── mapFeature.r
    ├── plotData-solution.r
    ├── plotData.r
    ├── plotDecisionBoundary-solution.r
    ├── plotDecisionBoundary.r
    ├── predict-solution.r
    ├── predict.r
    ├── sigmoid-solution.r
    ├── sigmoid.r
    └── submit.r
├── 3
    ├── displayData.r
    ├── ex3.r
    ├── ex3.rproj
    ├── ex3_nn.r
    ├── ex3data1.rda
    ├── ex3weights.rda
    ├── instructions.rmd
    ├── lrCostFunction-solution.r
    ├── lrCostFunction.r
    ├── nn.png
    ├── oneVsAll-solution.r
    ├── oneVsAll.r
    ├── predict-solution.r
    ├── predict.r
    ├── predictOneVsAll-solution.r
    ├── predictOneVsAll.r
    └── submit.r
├── 4
    ├── checkNNGradients.r
    ├── computeNumericalGradient.r
    ├── debugInitializeWeights.r
    ├── displayData.r
    ├── ex4.r
    ├── ex4.rproj
    ├── ex4data1.rda
    ├── ex4weights.rda
    ├── instructions.rmd
    ├── nn.png
    ├── nnCostFunction-solution.r
    ├── nnCostFunction.r
    ├── nnbp.png
    ├── predict.r
    ├── randInitializeWeights-solution.r
    ├── randInitializeWeights.r
    ├── sigmoidGradient-solution.r
    ├── sigmoidGradient.r
    └── submit.r
├── 5
    ├── ex5.r
    ├── ex5.rproj
    ├── ex5data1.rda
    ├── featureNormalize.r
    ├── instructions.rmd
    ├── learningCurve-solution.r
    ├── learningCurve.r
    ├── linearRegCostFunction-solution.r
    ├── linearRegCostFunction.r
    ├── plotFit.r
    ├── polyFeatures-solution.r
    ├── polyFeatures.r
    ├── submit.r
    ├── trainLinearReg.r
    ├── validationCurve-solution.r
    └── validationCurve.r
├── 6
    ├── dataset3Params-solution.r
    ├── dataset3Params.r
    ├── emailFeatures-solution.r
    ├── emailFeatures.r
    ├── emailSample1.txt
    ├── emailSample2.txt
    ├── ex6.r
    ├── ex6.rproj
    ├── ex6_spam.r
    ├── ex6data1.rda
    ├── ex6data2.rda
    ├── ex6data3.rda
    ├── gaussianKernel-solution.r
    ├── gaussianKernel.r
    ├── getVocabList.r
    ├── instructions.rmd
    ├── linearKernel.r
    ├── plotData.r
    ├── processEmail-solution.r
    ├── processEmail.r
    ├── spamSample1.txt
    ├── spamSample2.txt
    ├── spamTest.rda
    ├── spamTrain.rda
    ├── submit.r
    ├── svmPredict.r
    ├── svmTrain.r
    ├── visualizeBoundary.r
    ├── visualizeBoundaryLinear.r
    └── vocab.txt
├── 7
    ├── bird_small.png
    ├── bird_small.rda
    ├── computeCentroids-solution.r
    ├── computeCentroids.r
    ├── displayData.r
    ├── ex7.r
    ├── ex7.rproj
    ├── ex7_pca.r
    ├── ex7data1.rda
    ├── ex7data2.rda
    ├── ex7faces.rda
    ├── featureNormalize.r
    ├── findClosestCentroids-solution.r
    ├── findClosestCentroids.r
    ├── instructions.rmd
    ├── kMeansInitCentroids-solution.r
    ├── kMeansInitCentroids.r
    ├── pca-solution.r
    ├── pca.r
    ├── plotDataPoints.r
    ├── plotProgresskMeans.r
    ├── projectData-solution.r
    ├── projectData.r
    ├── recoverData-solution.r
    ├── recoverData.r
    ├── runkMeans.r
    └── submit.r
├── 8
    ├── checkCostFunction.r
    ├── cofiCostFunc-solution.r
    ├── cofiCostFunc.r
    ├── computeNumericalGradient.r
    ├── estimateGaussian-solution.r
    ├── estimateGaussian.r
    ├── ex8-solution.r
    ├── ex8.r
    ├── ex8.rproj
    ├── ex8_cofi.r
    ├── ex8_movieParams.rda
    ├── ex8_movies.rda
    ├── ex8data1.rda
    ├── ex8data2.rda
    ├── instructions.rmd
    ├── loadMovieList.r
    ├── movie_ids.txt
    ├── multivariateGaussian-solution.r
    ├── multivariateGaussian.r
    ├── normalizeRatings.r
    ├── selectThreshold-solution.r
    ├── selectThreshold.r
    ├── submit.r
    ├── visualizeFit-solution.r
    └── visualizeFit.r
├── .R.Rout
├── figs
    └── p01.png
├── lib
    ├── bsxfun.r
    ├── meshgrid.r
    ├── pinv.r
    ├── sigmoid.r
    └── submitWithConfiguration.r
└── readme.md


/.R.Rout:
--------------------------------------------------------------------------------
1 | ERROR: option '-f' requires a filename argument
2 | 


--------------------------------------------------------------------------------
/1/computeCost-solution.r:
--------------------------------------------------------------------------------
 1 | computeCost  <- function(X, y, theta) {
 2 |   #COMPUTECOST Compute cost for linear regression
 3 |   #   J <- COMPUTECOST(X, y, theta) computes the cost of using theta as the
 4 |   #   parameter for linear regression to fit the data points in X and y
 5 |   
 6 |   # Initialize some useful values
 7 |   m <- length(y) # number of training examples
 8 |   
 9 |   # You need to return the following variables correctly
10 |   J <- 0
11 |   #
12 |   # ---------------------- YOUR CODE HERE ----------------------
13 |   # Instructions: Compute the cost of a particular choice of theta
14 |   #               You should set J to the cost.
15 |   
16 |   # vectorized version.
17 |   # (exactly the same with multivariate version. )
18 |   
19 |   dif <- X %*% theta - y
20 |   J <- (t(dif) %*% dif) / (2 * m)
21 |   J
22 |   
23 |   # -------------------------------------------------------------------------
24 |   
25 | }
26 | 


--------------------------------------------------------------------------------
/1/computeCost.r:
--------------------------------------------------------------------------------
 1 | computeCost  <- function(X, y, theta) {
 2 |   #COMPUTECOST Compute cost for linear regression
 3 |   #   J <- COMPUTECOST(X, y, theta) computes the cost of using theta as the
 4 |   #   parameter for linear regression to fit the data points in X and y
 5 |   
 6 |   # Initialize some useful values
 7 |   m <- length(y) # number of training examples
 8 |   
 9 |   # You need to return the following variables correctly
10 |   J <- 0
11 |   #
12 |   # ---------------------- YOUR CODE HERE ----------------------
13 |   # Instructions: Compute the cost of a particular choice of theta
14 |   #               You should set J to the cost.
15 |   
16 |   # vectorized version.
17 |   # (exactly the same with multivariate version. )
18 |   
19 |   
20 |   # -------------------------------------------------------------------------
21 |   
22 | }
23 | 


--------------------------------------------------------------------------------
/1/computeCostMulti-solution.r:
--------------------------------------------------------------------------------
 1 | computeCostMulti <- function(X, y, theta) {
 2 |   #COMPUTECOSTMULTI Compute cost for linear regression with multiple variables
 3 |   #   J <- COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the
 4 |   #   parameter for linear regression to fit the data points in X and y
 5 |   
 6 |   # Initialize some useful values
 7 |   m <- length(y) # number of training examples
 8 |   
 9 |   # You need to return the following variables correctly
10 |   J <- 0
11 |   
12 |   # ---------------------- YOUR CODE HERE ----------------------
13 |   # Instructions: Compute the cost of a particular choice of theta
14 |   #               You should set J to the cost.
15 |   
16 |   dif <- X %*% theta - y
17 |   J <- (t(dif) %*% dif) / (2 * m)
18 |   J
19 |   
20 |   # -------------------------------------------------------------------------
21 |         
22 | }
23 | 


--------------------------------------------------------------------------------
/1/computeCostMulti.r:
--------------------------------------------------------------------------------
 1 | computeCostMulti <- function(X, y, theta) {
 2 |   #COMPUTECOSTMULTI Compute cost for linear regression with multiple variables
 3 |   #   J <- COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the
 4 |   #   parameter for linear regression to fit the data points in X and y
 5 |   
 6 |   # Initialize some useful values
 7 |   m <- length(y) # number of training examples
 8 |   
 9 |   # You need to return the following variables correctly
10 |   J <- 0
11 |   
12 |   # ---------------------- YOUR CODE HERE ----------------------
13 |   # Instructions: Compute the cost of a particular choice of theta
14 |   #               You should set J to the cost.
15 |   
16 |   
17 |   # -------------------------------------------------------------------------
18 |         
19 | }
20 | 


--------------------------------------------------------------------------------
/1/ex1.rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/1/ex1data1.txt:
--------------------------------------------------------------------------------
 1 | 6.1101,17.592
 2 | 5.5277,9.1302
 3 | 8.5186,13.662
 4 | 7.0032,11.854
 5 | 5.8598,6.8233
 6 | 8.3829,11.886
 7 | 7.4764,4.3483
 8 | 8.5781,12
 9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 | 


--------------------------------------------------------------------------------
/1/ex1data2.txt:
--------------------------------------------------------------------------------
 1 | 2104,3,399900
 2 | 1600,3,329900
 3 | 2400,3,369000
 4 | 1416,2,232000
 5 | 3000,4,539900
 6 | 1985,4,299900
 7 | 1534,3,314900
 8 | 1427,3,198999
 9 | 1380,3,212000
10 | 1494,3,242500
11 | 1940,4,239999
12 | 2000,3,347000
13 | 1890,3,329999
14 | 4478,5,699900
15 | 1268,3,259900
16 | 2300,4,449900
17 | 1320,2,299900
18 | 1236,3,199900
19 | 2609,4,499998
20 | 3031,4,599000
21 | 1767,3,252900
22 | 1888,2,255000
23 | 1604,3,242900
24 | 1962,4,259900
25 | 3890,3,573900
26 | 1100,3,249900
27 | 1458,3,464500
28 | 2526,3,469000
29 | 2200,3,475000
30 | 2637,3,299900
31 | 1839,2,349900
32 | 1000,1,169900
33 | 2040,4,314900
34 | 3137,3,579900
35 | 1811,4,285900
36 | 1437,3,249900
37 | 1239,3,229900
38 | 2132,4,345000
39 | 4215,4,549000
40 | 2162,4,287000
41 | 1664,2,368500
42 | 2238,3,329900
43 | 2567,4,314000
44 | 1200,3,299000
45 | 852,2,179900
46 | 1852,4,299900
47 | 1203,3,239500
48 | 


--------------------------------------------------------------------------------
/1/featureNormalize-solution.r:
--------------------------------------------------------------------------------
 1 | featureNormalize <- function(X) {
 2 |   #FEATURENORMALIZE Normalizes the features in X
 3 |   #   FEATURENORMALIZE(X) returns a normalized version of X where
 4 |   #   the mean value of each feature is 0 and the standard deviation
 5 |   #   is 1. This is often a good preprocessing step to do when
 6 |   #   working with learning algorithms.
 7 |   
 8 |   # You need to set these values correctly
 9 |   X_norm <- X
10 |   mu <- rep(0,dim(X)[2])
11 |   sigma <- rep(0,dim(X)[2])
12 |   
13 |   # ---------------------- YOUR CODE HERE ----------------------
14 |   # Instructions: First, for each feature dimension, compute the mean
15 |   #               of the feature and subtract it from the dataset,
16 |   #               storing the mean value in mu. Next, compute the
17 |   #               standard deviation of each feature and divide
18 |   #               each feature by it's standard deviation, storing
19 |   #               the standard deviation in sigma.
20 |   #
21 |   #               Note that X is a matrix where each column is a
22 |   #               feature and each row is an example. You need
23 |   #               to perform the normalization separately for
24 |   #               each feature.
25 |   #
26 |   # Hint: You might find the 'mean' and 'sd' functions useful.
27 |   #
28 |   
29 |   # mu
30 |   for (p in 1:dim(X)[2]) {
31 |     mu[p] <- mean(X[,p])
32 |   }
33 |   
34 |   # sigma
35 |   for (p in 1:dim(X)[2]) {
36 |     sigma[p] <- sd(X[,p])
37 |   }
38 |   
39 |   # X_norm
40 |   for (p in 1:dim(X)[2]) {
41 |     if (sigma[p] != 0)
42 |       for (i in 1:dim(X)[1])
43 |         X_norm[i, p] <- (X[i, p] - mu[p]) / sigma[p]
44 |       else
45 |         # sigma(p) == 0 <=> forall i, j,  X(i, p) == X(j, p) == mu(p)
46 |         # In this case,  normalized values are all zero.
47 |         # (mean is 0,  standard deviation is sigma(=0))
48 |         X_norm[, p] <- t(rep(0,dim(X)[1]))
49 |   }
50 |   
51 |   list(X_norm = X_norm, mu = mu, sigma = sigma)
52 |   # ------------------------------------------------------------
53 |   
54 | }
55 | 


--------------------------------------------------------------------------------
/1/featureNormalize.r:
--------------------------------------------------------------------------------
 1 | featureNormalize <- function(X) {
 2 |   #FEATURENORMALIZE Normalizes the features in X
 3 |   #   FEATURENORMALIZE(X) returns a normalized version of X where
 4 |   #   the mean value of each feature is 0 and the standard deviation
 5 |   #   is 1. This is often a good preprocessing step to do when
 6 |   #   working with learning algorithms.
 7 |   
 8 |   # You need to set these values correctly
 9 |   X_norm <- X
10 |   mu <- rep(0,dim(X)[2])
11 |   sigma <- rep(0,dim(X)[2])
12 |   
13 |   # ---------------------- YOUR CODE HERE ----------------------
14 |   # Instructions: First, for each feature dimension, compute the mean
15 |   #               of the feature and subtract it from the dataset,
16 |   #               storing the mean value in mu. Next, compute the
17 |   #               standard deviation of each feature and divide
18 |   #               each feature by it's standard deviation, storing
19 |   #               the standard deviation in sigma.
20 |   #
21 |   #               Note that X is a matrix where each column is a
22 |   #               feature and each row is an example. You need
23 |   #               to perform the normalization separately for
24 |   #               each feature.
25 |   #
26 |   # Hint: You might find the 'mean' and 'sd' functions useful.
27 |   #
28 |   
29 |   
30 |   
31 |   list(X_norm = X_norm, mu = mu, sigma = sigma)
32 |   # ------------------------------------------------------------
33 |   
34 | }
35 | 


--------------------------------------------------------------------------------
/1/gradientDescent-solution.r:
--------------------------------------------------------------------------------
 1 | gradientDescent <- function(X, y, theta, alpha, num_iters) {
 2 |   #gradientDescent Performs gradient descent to learn theta
 3 |   #   theta <- gradientDescent(X, y, theta, alpha, num_iters) updates theta by
 4 |   #   taking num_iters gradient steps with learning rate alpha
 5 |   
 6 |   # Initialize some useful values
 7 |   m <- length(y) # number of training examples
 8 |   J_history = rep(0, num_iters + 1)
 9 |   theta_history = matrix(0, num_iters + 1, length(theta))
10 |   
11 |   theta_history[1,] = t(theta)
12 |   J_history[1] = computeCost(X, y, theta)
13 |   
14 |   for (iter in 2:(num_iters + 1)) {
15 |     # ---------------------- YOUR CODE HERE ----------------------
16 |     # Instructions: Perform a single gradient step on the parameter vector
17 |     #               theta.
18 |     #
19 |     # Hint: While debugging, it can be useful to print out the values
20 |     #       of the cost function (computeCost) and gradient here.
21 |     #
22 |     
23 |     # create a copy of theta for simultaneous update.
24 |     theta_prev = theta
25 |     
26 |     # number of features.
27 |     p = dim(X)[2]
28 |     
29 |     # simultaneous update theta using theta_prev.
30 |     for (j in 1:p) {
31 |       # vectorized version
32 |       # (exactly the same with multivariate version)
33 |       deriv = (t(X %*% theta_prev - y) %*% X[, j]) / m
34 |       
35 |       # update theta_j
36 |       theta[j] = theta_prev[j] - (alpha * deriv)
37 |     }
38 |     
39 |     # Save the cost J in every iteration
40 |     J_history[iter] = computeCost(X, y, theta)
41 |     theta_history[iter,] = t(theta)
42 |   }
43 |   
44 |   list(theta = theta, J_history = J_history, theta_history = theta_history)
45 |   # ------------------------------------------------------------
46 | }
47 | 


--------------------------------------------------------------------------------
/1/gradientDescent.r:
--------------------------------------------------------------------------------
 1 | gradientDescent <- function(X, y, theta, alpha, num_iters) {
 2 |   #GRADIENTDESCENT Performs gradient descent to learn theta
 3 |   #   theta <- GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by
 4 |   #   taking num_iters gradient steps with learning rate alpha
 5 |   
 6 |   # Initialize some useful values
 7 |   m <- length(y) # number of training examples
 8 |   J_history = rep(0,num_iters + 1)
 9 |   theta_history = matrix(0,num_iters + 1,length(theta))
10 |   
11 |   theta_history[1,] = t(theta)
12 |   J_history[1] = computeCost(X, y, theta)
13 |   
14 |   for (iter in 2:(num_iters + 1)) {
15 |     # ---------------------- YOUR CODE HERE ----------------------
16 |     # Instructions: Perform a single gradient step on the parameter vector
17 |     #               theta.
18 |     #
19 |     # Hint: While debugging, it can be useful to print out the values
20 |     #       of the cost function (computeCost) and gradient here.
21 |     #
22 |     
23 |     
24 |   }
25 |   
26 |   list(theta = theta, J_history = J_history, theta_history = theta_history)
27 |   # ------------------------------------------------------------
28 | }
29 | 


--------------------------------------------------------------------------------
/1/gradientDescentMulti-solution.r:
--------------------------------------------------------------------------------
 1 | gradientDescentMulti <- function(X, y, theta, alpha, num_iters) {
 2 |   #GRADIENTDESCENTMULTI Performs gradient descent to learn theta
 3 |   #   theta <- GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by
 4 |   #   taking num_iters gradient steps with learning rate alpha
 5 |   
 6 |   # Initialize some useful values
 7 |   m <- length(y) # number of training examples
 8 |   J_history <- rep(0,num_iters)
 9 |   
10 |   for (iter in 1:num_iters) {
11 |     # ---------------------- YOUR CODE HERE ----------------------
12 |     # Instructions: Perform a single gradient step on the parameter vector
13 |     #               theta.
14 |     #
15 |     # Hint: While debugging, it can be useful to print out the values
16 |     #       of the cost function (computeCostMulti) and gradient here.
17 |     #
18 |     
19 |     # create a copy of theta for simultaneous update.
20 |     theta_prev <- theta
21 |     
22 |     # number of features.
23 |     p <- dim(X)[2]
24 |     
25 |     for (j in 1:p) {
26 |       # calculate dJ/d(theta_j)
27 |       deriv <- (t(X %*% theta_prev - y) %*% X[,j]) / m
28 |       
29 |       # # update theta_j
30 |       theta[j] <- theta_prev[j] - (alpha * deriv)
31 |     }
32 |     
33 |     # ------------------------------------------------------------
34 |     
35 |     # Save the cost J in every iteration
36 |     J_history[iter] <- computeCostMulti(X, y, theta)
37 |     
38 |   }
39 |   list(theta = theta, J_history = J_history)
40 | }
41 | 


--------------------------------------------------------------------------------
/1/gradientDescentMulti.r:
--------------------------------------------------------------------------------
 1 | gradientDescentMulti <- function(X, y, theta, alpha, num_iters) {
 2 |   #GRADIENTDESCENTMULTI Performs gradient descent to learn theta
 3 |   #   theta <- GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by
 4 |   #   taking num_iters gradient steps with learning rate alpha
 5 |   
 6 |   # Initialize some useful values
 7 |   m <- length(y) # number of training examples
 8 |   J_history <- rep(0,num_iters)
 9 |   
10 |   for (iter in 1:num_iters) {
11 |     # ---------------------- YOUR CODE HERE ----------------------
12 |     # Instructions: Perform a single gradient step on the parameter vector
13 |     #               theta.
14 |     #
15 |     # Hint: While debugging, it can be useful to print out the values
16 |     #       of the cost function (computeCostMulti) and gradient here.
17 |     #
18 |     
19 |     
20 |     }
21 |     
22 |     # ------------------------------------------------------------
23 |     
24 |     # Save the cost J in every iteration
25 |     J_history[iter] <- computeCostMulti(X, y, theta)
26 |     
27 |   list(theta = theta, J_history = J_history)
28 | }
29 | 


--------------------------------------------------------------------------------
/1/normalEqn-solution.r:
--------------------------------------------------------------------------------
 1 | normalEqn <- function(X, y) {
 2 |   #NORMALEQN Computes the closed-form solution to linear regression
 3 |   #   NORMALEQN(X,y) computes the closed-form solution to linear
 4 |   #   regression using the normal equations.
 5 |   source("../lib/pinv.r")
 6 |   theta <- rep(0,length(y))
 7 |   
 8 |   # ---------------------- YOUR CODE HERE ----------------------
 9 |   # Instructions: Complete the code to compute the closed form solution
10 |   #               to linear regression and put the result in theta.
11 |   #
12 |   theta <- pinv(t(X) %*% X) %*% t(X) %*% y
13 |   theta
14 |   # ------------------------------------------------------------
15 |   
16 | }
17 | 


--------------------------------------------------------------------------------
/1/normalEqn.r:
--------------------------------------------------------------------------------
 1 | normalEqn <- function(X, y) {
 2 |   #NORMALEQN Computes the closed-form solution to linear regression
 3 |   #   NORMALEQN(X,y) computes the closed-form solution to linear
 4 |   #   regression using the normal equations.
 5 |   source("../lib/pinv.r")
 6 |   theta <- rep(0,length(y))
 7 |   
 8 |   # ---------------------- YOUR CODE HERE ----------------------
 9 |   # Instructions: Complete the code to compute the closed form solution
10 |   #               to linear regression and put the result in theta.
11 |   #
12 | 
13 |   
14 |   theta
15 |   # ------------------------------------------------------------
16 |   
17 | }
18 | 


--------------------------------------------------------------------------------
/1/plotData-solution.r:
--------------------------------------------------------------------------------
 1 | plotData <- function (x, y) {
 2 |   #PLOTDATA Plots the data points x and y into a new figure
 3 |   #   PLOTDATA(x,y) plots the data points and gives the figure axes labels of
 4 |   #   population and profit.
 5 |   
 6 |   # ---------------------- YOUR CODE HERE ----------------------
 7 |   # Instructions: Plot the training data into a figure using the
 8 |   #               "plot" command. Set the axes labels using
 9 |   #               the "xlab" and "ylab" parameters of plot function. 
10 |   #               Assume the population and revenue data have been passed in
11 |   #               as the x and y arguments of this function.
12 |   #
13 |   # Hint: You can use the pch=4 and col="red" options with plot to have the markers
14 |   #       appear as red crosses. Furthermore, you can make the
15 |   #       markers larger by using plot(..., cex=1.1). cex stands for character expansion
16 |   
17 |   plot(
18 |     x, y, col = "red", pch = 4,cex = 1.1,lwd = 2,
19 |     xlab = 'Profit in $10,000s',
20 |     ylab = 'Population of City in 10,000s'
21 |   )
22 |   
23 |   # ------------------------------------------------------------
24 |   
25 | }
26 | 


--------------------------------------------------------------------------------
/1/plotData.r:
--------------------------------------------------------------------------------
 1 | plotData <- function (x, y) {
 2 |   #PLOTDATA Plots the data points x and y into a new figure
 3 |   #   PLOTDATA(x,y) plots the data points and gives the figure axes labels of
 4 |   #   population and profit.
 5 |   
 6 |   # ---------------------- YOUR CODE HERE ----------------------
 7 |   # Instructions: Plot the training data into a figure using the
 8 |   #               "plot" command. Set the axes labels using
 9 |   #               the "xlab" and "ylab" parameters of plot function. 
10 |   #               Assume the population and revenue data have been passed in
11 |   #               as the x and y arguments of this function.
12 |   #
13 |   # Hint: You can use the pch=4 and col="red" options with plot to have the markers
14 |   #       appear as red crosses. Furthermore, you can make the
15 |   #       markers larger by using plot(..., cex=1.1). cex stands for character expansion
16 |   
17 |  
18 |   
19 |   # ------------------------------------------------------------
20 |   
21 | }
22 | 


--------------------------------------------------------------------------------
/1/submit.r:
--------------------------------------------------------------------------------
 1 | submit <- function(){
 2 |   
 3 |   source("../lib/submitWithConfiguration.r")
 4 |   
 5 |   conf = list()
 6 |   conf$assignmentSlug = 'linear-regression'
 7 |   conf$itemName = 'Linear Regression with Multiple Variables'
 8 |   conf$partArrays = c(
 9 |       '1', 'warmUpExercise.r' , 'Warm-up Exercise', 
10 |       '2', 'computeCost.r' , 'Computing Cost (for One Variable)', 
11 |       '3', 'gradientDescent.r' , 'Gradient Descent (for One Variable)', 
12 |       '4', 'featureNormalize.r' , 'Feature Normalization', 
13 |       '5', 'computeCostMulti.r' , 'Computing Cost (for Multiple Variables)', 
14 |       '6', 'gradientDescentMulti.r' , 'Gradient Descent (for Multiple Variables)', 
15 |       '7', 'normalEqn.r' , 'Normal Equations')
16 |   conf$partArrays = matrix(conf$partArrays,7,3 , byrow = T)
17 |   conf$output = output
18 | 
19 |   submitWithConfiguration(conf)
20 | }
21 | 
22 | output <- function(partId){
23 |   source('warmUpExercise.r')
24 |   source('computeCost.r')
25 |   source('gradientDescent.r')
26 |   source('featureNormalize.r')
27 |   source('computeCostMulti.r')
28 |   source('gradientDescentMulti.r')
29 |   source('normalEqn.r')
30 |   # Random Test Cases
31 |   #X1 = [ones(20,1) (exp(1) + exp(2) * (0.1:0.1:2))']
32 |   X1 = cbind(rep(1,20), exp(1) + exp(2) * seq(.1, 2, .1 ) )
33 |   X1 = round(X1,4)
34 |   #Y1 = X1(:,2) + sin(X1(:,1)) + cos(X1(:,2))
35 |   Y1 = X1[,2] + sin(X1[,1]) + cos(X1[,2])
36 |   
37 |   #X2 = [X1 X1(:,2).^0.5 X1(:,2).^0.25]
38 |   X2 = cbind(X1, X1[,2]^0.5, X1[,2]^0.25)
39 |   Y2 = Y1^0.5 + Y1
40 |   
41 |   if (partId == '1')
42 |     out = paste0(sprintf('%0.5f ', warmUpExercise()), collapse = '')
43 |   else if (partId == '2')
44 |     # different rounding
45 |     out = sprintf('%0.5f ', computeCost(X1, Y1, c(0.5, -0.5)))
46 |   else if (partId == '3')
47 |     out = paste0(sprintf('%0.5f ', gradientDescent(X1, Y1, c(0.5, -0.5), 0.01, 10)$theta), collapse = '')
48 |   else if (partId == '4')
49 |     out = paste0(sprintf('%0.5f ', featureNormalize(X2[,2:4])$X_norm ), collapse = '')
50 |   else if (partId == '5')
51 |     #round
52 |     out = sprintf('%0.5f ', computeCostMulti(X2, Y2, c(0.1,0.2,0.3,0.4)))
53 |   else if (partId == '6')
54 |     out = paste0(sprintf('%0.5f ', gradientDescentMulti(X2, Y2, -1 * c(0.1,0.2,0.3,0.4) , 0.01, 10)$theta)
55 |                  , collapse = '')
56 |   else if (partId == '7')
57 |     out = paste0(sprintf('%0.5f ', normalEqn(X2, Y2)), collapse = '')
58 | }
59 | 


--------------------------------------------------------------------------------
/1/warmUpExercise-solution.r:
--------------------------------------------------------------------------------
 1 | warmUpExercise  <- function() {
 2 |   #WARMUPEXERCISE Example function in R
 3 |   #   A <- WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix
 4 |   
 5 | 
 6 |   # ------------- YOUR CODE HERE --------------
 7 |   # Instructions: Return the 5x5 identity matrix
 8 |   #               In R, the final computed variable 
 9 |   #               will be returned as output.
10 |   
11 |   A <- diag(5)
12 |   A
13 |   
14 |   # -------------------------------------------
15 |   
16 |   
17 | }
18 | 


--------------------------------------------------------------------------------
/1/warmUpExercise.r:
--------------------------------------------------------------------------------
 1 | warmUpExercise  <- function() {
 2 |   #WARMUPEXERCISE Example function in R
 3 |   #   A <- WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix
 4 |   
 5 | 
 6 |   # ------------- YOUR CODE HERE --------------
 7 |   # Instructions: Return the 5x5 identity matrix
 8 |   #               In R, the final computed variable 
 9 |   #               will be returned as output.
10 |   
11 |   A
12 |   
13 |   # -------------------------------------------
14 |   
15 |   
16 | }
17 | 


--------------------------------------------------------------------------------
/2/costFunction-solution.r:
--------------------------------------------------------------------------------
 1 | costFunction  <- function(X, y) {
 2 |   #COSTFUNCTION Compute cost for logistic regression
 3 |   #   J <- COSTFUNCTION(theta, X, y) computes the cost of using theta as the
 4 |   #   parameter for logistic regression.
 5 |   
 6 |   function(theta) {
 7 |     # Initialize some useful values
 8 |     m <- length(y) # number of training examples
 9 |     
10 |     # You need to return the following variable correctly
11 |     J <- 0
12 |     
13 |     # ----------------------- YOUR CODE HERE -----------------------
14 |     # Instructions: Compute the cost of a particular choice of theta.
15 |     #               You should set J to the cost.
16 | 
17 |     h <- sigmoid(X %*% theta)
18 |     J <- (t(-y) %*% log(h) - t(1 - y) %*% log(1 - h)) / m
19 |     J
20 |     # ----------------------------------------------------
21 |   }
22 | }
23 | 
24 | grad <- function(X, y) {
25 |   #COSTFUNCTION Compute gradient for logistic regression
26 |     #   J <- COSTFUNCTION(theta, X, y) computes the gradient of the cost
27 |     #   w.r.t. to the parameters.
28 |   function(theta) {
29 | 
30 |     # You need to return the following variable correctly
31 |     grad <- matrix(0,dim(as.matrix(theta)))
32 |     m <- length(y)
33 |     # ----------------------- YOUR CODE HERE -----------------------
34 |     # Instructions: Compute the partial derivatives and set grad to the partial
35 |     #               derivatives of the cost w.r.t. each parameter in theta
36 |     #
37 |     # Note: grad should have the same dimensions as theta
38 |     #
39 |     
40 |     h <- sigmoid(X %*% theta)
41 |     # calculate grads
42 |     grad <- (t(X) %*% (h - y)) / m
43 |     
44 |     grad
45 |     # ----------------------------------------------------
46 |     
47 |   }
48 | }


--------------------------------------------------------------------------------
/2/costFunction.r:
--------------------------------------------------------------------------------
 1 | costFunction  <- function(X, y) {
 2 |   #COSTFUNCTION Compute cost for logistic regression
 3 |   #   J <- COSTFUNCTION(theta, X, y) computes the cost of using theta as the
 4 |   #   parameter for logistic regression.
 5 |   
 6 |   function(theta) {
 7 |     # Initialize some useful values
 8 |     m <- length(y); # number of training examples
 9 |     
10 |     # You need to return the following variable correctly
11 |     J <- 0
12 |     
13 |     # ----------------------- YOUR CODE HERE -----------------------
14 |     # Instructions: Compute the cost of a particular choice of theta.
15 |     #               You should set J to the cost.
16 | 
17 |     J
18 |     # ----------------------------------------------------
19 |   }
20 | }
21 | 
22 | grad <- function(X, y) {
23 |   #COSTFUNCTION Compute gradient for logistic regression
24 |     #   J <- COSTFUNCTION(theta, X, y) computes the gradient of the cost
25 |     #   w.r.t. to the parameters.
26 |   function(theta) {
27 | 
28 |     # You need to return the following variable correctly
29 |     grad <- matrix(0,dim(as.matrix(theta)))
30 |     m <- length(y)
31 |     # ----------------------- YOUR CODE HERE -----------------------
32 |     # Instructions: Compute the partial derivatives and set grad to the partial
33 |     #               derivatives of the cost w.r.t. each parameter in theta
34 |     #
35 |     # Note: grad should have the same dimensions as theta
36 |     #
37 |     
38 |     
39 |     grad
40 |     # ----------------------------------------------------
41 |     
42 |   }
43 | }


--------------------------------------------------------------------------------
/2/costFunctionReg-solution.r:
--------------------------------------------------------------------------------
 1 | costFunctionReg  <- function(X, y, lambda) {
 2 |   #COSTFUNCTIONREG Compute cost for logistic regression with regularization
 3 |   #   J <- COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
 4 |   #   theta as the parameter for regularized logistic regression
 5 |   function(theta)  {
 6 |     # Initialize some useful values
 7 |     m <- length(y) # number of training examples
 8 |     
 9 |     # You need to return the following variables correctly
10 |     J <- 0
11 |     # ----------------------- YOUR CODE HERE -----------------------
12 |     # Instructions: Compute the cost of a particular choice of theta.
13 |     #               You should set J to the cost.
14 |     
15 |     # calculate hypothesis function h(x)
16 |     h <- sigmoid(X %*% theta)
17 |     # excluded the first theta value
18 |     theta1 <- c(0,theta[-1])
19 |     p <- lambda * (t(theta1) %*% theta1) / (2 * m)
20 |     J <- (t(-y) %*% log(h) - t(1 - y) %*% log(1 - h)) / m + p
21 |     J
22 |     # ----------------------------------------------------
23 |   }
24 | }
25 | 
26 | gradReg  <- function (X, y, lambda) {
27 |   #COSTFUNCTIONREG Compute gradient for logistic regression with regularization
28 |   #   J <- COSTFUNCTIONREG(theta, X, y, lambda) computes the
29 |   #   gradient of the cost w.r.t. to the parameters.
30 |   function(theta)  {
31 |     # Initialize some useful values
32 |     m <- length(y); # number of training examples
33 |     
34 |     # You need to return the following variables correctly
35 |     grad <- rep(0,length(theta))
36 |     
37 |     # ----------------------- YOUR CODE HERE -----------------------
38 |     # Instructions: Compute the partial derivatives and set grad to the partial
39 |     #               derivatives of the cost w.r.t. each parameter in theta
40 |     
41 |     # calculate hypothesis function
42 |     h <- sigmoid(X %*% theta)
43 |     # excluded the first theta value
44 |     theta1 <- c(0,theta[-1])
45 |     
46 |     # calculate grads
47 |     grad <- (t(X) %*% (h - y) + lambda * theta1) / m
48 |     grad
49 |     # ----------------------------------------------------
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/2/costFunctionReg.r:
--------------------------------------------------------------------------------
 1 | costFunctionReg  <- function(X, y, lambda) {
 2 |   #COSTFUNCTIONREG Compute cost for logistic regression with regularization
 3 |   #   J <- COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
 4 |   #   theta as the parameter for regularized logistic regression
 5 |   function(theta)  {
 6 |     # Initialize some useful values
 7 |     m <- length(y); # number of training examples
 8 |     
 9 |     # You need to return the following variables correctly
10 |     J <- 0
11 |     # ----------------------- YOUR CODE HERE -----------------------
12 |     # Instructions: Compute the cost of a particular choice of theta.
13 |     #               You should set J to the cost.
14 |     
15 |     
16 |     J
17 |     # ----------------------------------------------------
18 |   }
19 | }
20 | 
21 | gradReg  <- function (X, y, lambda) {
22 |   #COSTFUNCTIONREG Compute gradient for logistic regression with regularization
23 |   #   J <- COSTFUNCTIONREG(theta, X, y, lambda) computes the
24 |   #   gradient of the cost w.r.t. to the parameters.
25 |   function(theta)  {
26 |     # Initialize some useful values
27 |     m <- length(y); # number of training examples
28 |     
29 |     # You need to return the following variables correctly
30 |     grad <- rep(0,length(theta))
31 |     
32 |     # ----------------------- YOUR CODE HERE -----------------------
33 |     # Instructions: Compute the partial derivatives and set grad to the partial
34 |     #               derivatives of the cost w.r.t. each parameter in theta
35 |     
36 |     
37 |     grad
38 |     # ----------------------------------------------------
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/2/ex2.r:
--------------------------------------------------------------------------------
  1 | ## Machine Learning Online Class - Exercise 2: Logistic Regression
  2 | #
  3 | #  Instructions
  4 | #  ------------
  5 | # 
  6 | #  This file contains code that helps you get started on the logistic
  7 | #  regression exercise. You will need to complete the following functions 
  8 | #  in this exericse:
  9 | #
 10 | #     sigmoid.r
 11 | #     costFunction.r
 12 | #     predict.r
 13 | #     costFunctionReg.r
 14 | #
 15 | #  For this exercise, you will not need to change any code in this file,
 16 | #  or any other files other than those mentioned above.
 17 | #
 18 | 
 19 | ## Initialization
 20 | 
 21 | ## Load Data
 22 | #  The first two columns contains the exam scores and the third column
 23 | #  contains the label.
 24 | rm(list=ls())
 25 | sources <- c("costFunction.r","sigmoid.r","plotData.r",
 26 |              "plotDecisionBoundary.r","mapFeature.r", "predict.r")
 27 | # Uncomment the following, if you want to execute the solution scripts.
 28 | sources <- paste0(substr(sources,1,nchar(sources)-2), '-solution.r')
 29 | for (i in 1:length(sources)) {
 30 |   cat(paste("Loading ",sources[i],"\n"))
 31 |   source(sources[i])
 32 | }
 33 | 
 34 | data <- read.table('ex2data1.txt', sep = ',')
 35 | X <- data[,c(1,2)]; y <- data[,3]
 36 | X <- as.matrix(X)
 37 | ## ---------------------- Part 1: Plotting ----------------------
 38 | #  We start the exercise by first plotting the data to understand the 
 39 | #  the problem we are working with.
 40 | cat(sprintf('Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.\n'))
 41 | plotData(X, y)
 42 | cat(sprintf('\nProgram paused. Press enter to continue.\n'))
 43 | 
 44 | ## ------------ Part 2: Compute Cost and Gradient ------------
 45 | #  In this part of the exercise, you will implement the cost and gradient
 46 | #  for logistic regression. You neeed to complete the code in 
 47 | #  costFunction.r
 48 | 
 49 | #  Setup the data matrix appropriately, and add ones for the intercept term
 50 | m <- dim(X)[1]
 51 | n <- dim(X)[2]
 52 | 
 53 | # Add intercept term to x and X_test
 54 | X <- cbind(rep(1,m),X)
 55 | # Initialize fitting parameters
 56 | initial_theta <- rep(0,n+1)
 57 | 
 58 | # Compute and display initial cost and gradient
 59 | cF <- costFunction(X, y)(initial_theta)
 60 | cost <- costFunction(X, y)(initial_theta)
 61 | grd <- grad(X,y)(initial_theta)
 62 | 
 63 | cat(sprintf('Cost at initial theta (zeros): %f\n', cost))
 64 | cat(sprintf('Gradient at initial theta (zeros): \n'))
 65 | cat(sprintf(' %f \n', grd))
 66 | 
 67 | cat(sprintf('\nProgram paused. Press enter to continue.\n'))
 68 | line <- readLines(con = stdin(),1)
 69 | 
 70 | ## -------------- Part 3: Optimizing using optim  --------------
 71 | #  In this exercise, you will use a built-in function (optim) to find the
 72 | #  optimal parameters theta.
 73 | 
 74 | #  Run optim to obtain the optimal theta
 75 | #  This function will return theta and the cost 
 76 | #  maxit is maximum iteration
 77 | 
 78 | optimRes <- optim(par = initial_theta, fn = costFunction(X,y), gr = grad(X,y), 
 79 |       method="BFGS", control = list(maxit = 400))
 80 | theta <- optimRes$par
 81 | cost <- optimRes$value
 82 | 
 83 | # Print theta to screen
 84 | cat(sprintf('Cost at theta found by optim: %f\n', cost))
 85 | cat(sprintf('theta: \n'))
 86 | cat(sprintf(' %f \n', theta))
 87 | 
 88 | # Plot Boundary
 89 | plotDecisionBoundary(theta, X, y)
 90 | 
 91 | cat(sprintf('\nProgram paused. Press enter to continue.\n'))
 92 | line <- readLines(con = stdin(),1)
 93 | 
 94 | ## ---------------- Part 4: Predict and Accuracies ----------------
 95 | #  After learning the parameters, you'll like to use it to predict the outcomes
 96 | #  on unseen data. In this part, you will use the logistic regression model
 97 | #  to predict the probability that a student with score 20 on exam 1 and 
 98 | #  score 80 on exam 2 will be admitted.
 99 | #
100 | #  Furthermore, you will compute the training and test set accuracies of 
101 | #  our model.
102 | #
103 | #  Your task is to complete the code in predict.r
104 | 
105 | #  Predict probability for a student with score 45 on exam 1 
106 | #  and score 85 on exam 2 
107 | 
108 | prob <- sigmoid(t(c(1,45,85)) %*% theta)
109 | cat(sprintf('For a student with scores 45 and 85, we predict an admission probability of\n %f\n', prob))
110 | 
111 | # Compute accuracy on our training set
112 | p <- predict(theta, X)
113 | 
114 | cat(sprintf('Train Accuracy: %f\n', mean(p == y) * 100))
115 | 
116 | # pause
117 | 


--------------------------------------------------------------------------------
/2/ex2.rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/2/ex2_reg.r:
--------------------------------------------------------------------------------
  1 | ## Machine Learning Online Class - Exercise 2: Logistic Regression
  2 | #
  3 | #  Instructions
  4 | #  ------------
  5 | #
  6 | #  This file contains code that helps you get started on the second part
  7 | #  of the exercise which covers regularization with logistic regression.
  8 | #
  9 | #  You will need to complete the following functions in this exericse:
 10 | #
 11 | #     sigmoid.r
 12 | #     costFunction.r
 13 | #     predict.r
 14 | #     costFunctionReg.r
 15 | #
 16 | #  For this exercise, you will not need to change any code in this file,
 17 | #  or any other files other than those mentioned above.
 18 | #
 19 | 
 20 | ## Initialization
 21 | rm(list=ls())
 22 | sources <- c("costFunctionReg.r","sigmoid.r", "plotData.r","plotDecisionBoundary.r",
 23 |              "mapFeature.r","predict.r")
 24 | # Uncomment the following, if you want to execute the solution scripts.
 25 | sources <- paste0(substr(sources,1,nchar(sources)-2), '-solution.r')
 26 | for (i in 1:length(sources)) {
 27 |   cat(paste("Loading ",sources[i],"\n"))
 28 |   source(sources[i])
 29 | }
 30 | ## Load Data
 31 | #  The first two columns contains the exam scores and the third column
 32 | #  contains the label.
 33 | data <- read.table('ex2data2.txt', sep = ',')
 34 | X <- data[,c(1,2)]; y <- data[,3]
 35 | X <- as.matrix(X)
 36 | 
 37 | plotData(X, y, axLables = c('Microchip Test 1','Microchip Test 2'),
 38 |          legLabels = c("y = 1","y = 0"))
 39 | 
 40 | ## ------------- Part 1: Regularized Logistic Regression ------------
 41 | #  In this part, you are given a dataset with data points that are not
 42 | #  linearly separable. However, you would still like to use logistic
 43 | #  regression to classify the data points.
 44 | #
 45 | #  To do so, you introduce more features to use, in particular, you add
 46 | #  polynomial features to our data matrix (similar to polynomial
 47 | #  regression).
 48 | #
 49 | 
 50 | # Add Polynomial Features
 51 | 
 52 | # Note that mapFeature also adds a column of ones for us, so the intercept
 53 | # term is handled
 54 | X <- mapFeature(X[,1], X[,2])
 55 | 
 56 | # Initialize fitting parameters
 57 | initial_theta <- rep(0,dim(X)[2])
 58 | 
 59 | # Set regularization parameter lambda to 1
 60 | lambda <- 1
 61 | 
 62 | # Compute and display initial cost and gradient for regularized logistic
 63 | # regression
 64 | cost <- costFunctionReg(X, y, lambda)(initial_theta)
 65 | grd <- gradReg(X,y, lambda)(initial_theta)
 66 | cat(sprintf('Cost at initial theta (zeros): %f\n', cost))
 67 | 
 68 | cat(sprintf('\nProgram paused. Press enter to continue.\n'))
 69 | line <- readLines(con = stdin(),1)
 70 | 
 71 | ## -------------- Part 2: Regularization and Accuracies --------------
 72 | #  Optional Exercise:
 73 | #  In this part, you will get to try different values of lambda and
 74 | #  see how regularization affects the decision coundart
 75 | #
 76 | #  Try the following values of lambda (0, 1, 10, 100).
 77 | #
 78 | #  How does the decision boundary change when you vary lambda? How does
 79 | #  the training set accuracy vary?
 80 | #
 81 | 
 82 | # Initialize fitting parameters
 83 | initial_theta <- rep(0, dim(X)[2])
 84 | 
 85 | # Set regularization parameter lambda to 1 (you should vary this)
 86 | lambda <- 0
 87 | # try with lambda in (1,0,100,155)
 88 | # Optimize
 89 | optimRes <- optim(par = initial_theta, 
 90 |                   fn = costFunctionReg(X,y,lambda), 
 91 |                   gr = gradReg(X,y,lambda), 
 92 |                   method='BFGS', 
 93 |                   control = list(maxit = 400))
 94 | 
 95 | theta <- optimRes$par
 96 | J <- optimRes$value
 97 | 
 98 | # Plot Boundary
 99 | plotDecisionBoundary(theta, X, y, axLables = c('Microchip Test 1','Microchip Test 2'),
100 |                      legLabels = c('y = 1','y = 0'))
101 |  
102 | # Compute accuracy on our training set
103 | p <- predict(theta, X)
104 | 
105 | cat(sprintf('Train Accuracy: %f\n', mean(p == y) * 100))
106 | 
107 | 


--------------------------------------------------------------------------------
/2/ex2data1.txt:
--------------------------------------------------------------------------------
  1 | 34.62365962451697,78.0246928153624,0
  2 | 30.28671076822607,43.89499752400101,0
  3 | 35.84740876993872,72.90219802708364,0
  4 | 60.18259938620976,86.30855209546826,1
  5 | 79.0327360507101,75.3443764369103,1
  6 | 45.08327747668339,56.3163717815305,0
  7 | 61.10666453684766,96.51142588489624,1
  8 | 75.02474556738889,46.55401354116538,1
  9 | 76.09878670226257,87.42056971926803,1
 10 | 84.43281996120035,43.53339331072109,1
 11 | 95.86155507093572,38.22527805795094,0
 12 | 75.01365838958247,30.60326323428011,0
 13 | 82.30705337399482,76.48196330235604,1
 14 | 69.36458875970939,97.71869196188608,1
 15 | 39.53833914367223,76.03681085115882,0
 16 | 53.9710521485623,89.20735013750205,1
 17 | 69.07014406283025,52.74046973016765,1
 18 | 67.94685547711617,46.67857410673128,0
 19 | 70.66150955499435,92.92713789364831,1
 20 | 76.97878372747498,47.57596364975532,1
 21 | 67.37202754570876,42.83843832029179,0
 22 | 89.67677575072079,65.79936592745237,1
 23 | 50.534788289883,48.85581152764205,0
 24 | 34.21206097786789,44.20952859866288,0
 25 | 77.9240914545704,68.9723599933059,1
 26 | 62.27101367004632,69.95445795447587,1
 27 | 80.1901807509566,44.82162893218353,1
 28 | 93.114388797442,38.80067033713209,0
 29 | 61.83020602312595,50.25610789244621,0
 30 | 38.78580379679423,64.99568095539578,0
 31 | 61.379289447425,72.80788731317097,1
 32 | 85.40451939411645,57.05198397627122,1
 33 | 52.10797973193984,63.12762376881715,0
 34 | 52.04540476831827,69.43286012045222,1
 35 | 40.23689373545111,71.16774802184875,0
 36 | 54.63510555424817,52.21388588061123,0
 37 | 33.91550010906887,98.86943574220611,0
 38 | 64.17698887494485,80.90806058670817,1
 39 | 74.78925295941542,41.57341522824434,0
 40 | 34.1836400264419,75.2377203360134,0
 41 | 83.90239366249155,56.30804621605327,1
 42 | 51.54772026906181,46.85629026349976,0
 43 | 94.44336776917852,65.56892160559052,1
 44 | 82.36875375713919,40.61825515970618,0
 45 | 51.04775177128865,45.82270145776001,0
 46 | 62.22267576120188,52.06099194836679,0
 47 | 77.19303492601364,70.45820000180959,1
 48 | 97.77159928000232,86.7278223300282,1
 49 | 62.07306379667647,96.76882412413983,1
 50 | 91.56497449807442,88.69629254546599,1
 51 | 79.94481794066932,74.16311935043758,1
 52 | 99.2725269292572,60.99903099844988,1
 53 | 90.54671411399852,43.39060180650027,1
 54 | 34.52451385320009,60.39634245837173,0
 55 | 50.2864961189907,49.80453881323059,0
 56 | 49.58667721632031,59.80895099453265,0
 57 | 97.64563396007767,68.86157272420604,1
 58 | 32.57720016809309,95.59854761387875,0
 59 | 74.24869136721598,69.82457122657193,1
 60 | 71.79646205863379,78.45356224515052,1
 61 | 75.3956114656803,85.75993667331619,1
 62 | 35.28611281526193,47.02051394723416,0
 63 | 56.25381749711624,39.26147251058019,0
 64 | 30.05882244669796,49.59297386723685,0
 65 | 44.66826172480893,66.45008614558913,0
 66 | 66.56089447242954,41.09209807936973,0
 67 | 40.45755098375164,97.53518548909936,1
 68 | 49.07256321908844,51.88321182073966,0
 69 | 80.27957401466998,92.11606081344084,1
 70 | 66.74671856944039,60.99139402740988,1
 71 | 32.72283304060323,43.30717306430063,0
 72 | 64.0393204150601,78.03168802018232,1
 73 | 72.34649422579923,96.22759296761404,1
 74 | 60.45788573918959,73.09499809758037,1
 75 | 58.84095621726802,75.85844831279042,1
 76 | 99.82785779692128,72.36925193383885,1
 77 | 47.26426910848174,88.47586499559782,1
 78 | 50.45815980285988,75.80985952982456,1
 79 | 60.45555629271532,42.50840943572217,0
 80 | 82.22666157785568,42.71987853716458,0
 81 | 88.9138964166533,69.80378889835472,1
 82 | 94.83450672430196,45.69430680250754,1
 83 | 67.31925746917527,66.58935317747915,1
 84 | 57.23870631569862,59.51428198012956,1
 85 | 80.36675600171273,90.96014789746954,1
 86 | 68.46852178591112,85.59430710452014,1
 87 | 42.0754545384731,78.84478600148043,0
 88 | 75.47770200533905,90.42453899753964,1
 89 | 78.63542434898018,96.64742716885644,1
 90 | 52.34800398794107,60.76950525602592,0
 91 | 94.09433112516793,77.15910509073893,1
 92 | 90.44855097096364,87.50879176484702,1
 93 | 55.48216114069585,35.57070347228866,0
 94 | 74.49269241843041,84.84513684930135,1
 95 | 89.84580670720979,45.35828361091658,1
 96 | 83.48916274498238,48.38028579728175,1
 97 | 42.2617008099817,87.10385094025457,1
 98 | 99.31500880510394,68.77540947206617,1
 99 | 55.34001756003703,64.9319380069486,1
100 | 74.77589300092767,89.52981289513276,1
101 | 


--------------------------------------------------------------------------------
/2/ex2data2.txt:
--------------------------------------------------------------------------------
  1 | 0.051267,0.69956,1
  2 | -0.092742,0.68494,1
  3 | -0.21371,0.69225,1
  4 | -0.375,0.50219,1
  5 | -0.51325,0.46564,1
  6 | -0.52477,0.2098,1
  7 | -0.39804,0.034357,1
  8 | -0.30588,-0.19225,1
  9 | 0.016705,-0.40424,1
 10 | 0.13191,-0.51389,1
 11 | 0.38537,-0.56506,1
 12 | 0.52938,-0.5212,1
 13 | 0.63882,-0.24342,1
 14 | 0.73675,-0.18494,1
 15 | 0.54666,0.48757,1
 16 | 0.322,0.5826,1
 17 | 0.16647,0.53874,1
 18 | -0.046659,0.81652,1
 19 | -0.17339,0.69956,1
 20 | -0.47869,0.63377,1
 21 | -0.60541,0.59722,1
 22 | -0.62846,0.33406,1
 23 | -0.59389,0.005117,1
 24 | -0.42108,-0.27266,1
 25 | -0.11578,-0.39693,1
 26 | 0.20104,-0.60161,1
 27 | 0.46601,-0.53582,1
 28 | 0.67339,-0.53582,1
 29 | -0.13882,0.54605,1
 30 | -0.29435,0.77997,1
 31 | -0.26555,0.96272,1
 32 | -0.16187,0.8019,1
 33 | -0.17339,0.64839,1
 34 | -0.28283,0.47295,1
 35 | -0.36348,0.31213,1
 36 | -0.30012,0.027047,1
 37 | -0.23675,-0.21418,1
 38 | -0.06394,-0.18494,1
 39 | 0.062788,-0.16301,1
 40 | 0.22984,-0.41155,1
 41 | 0.2932,-0.2288,1
 42 | 0.48329,-0.18494,1
 43 | 0.64459,-0.14108,1
 44 | 0.46025,0.012427,1
 45 | 0.6273,0.15863,1
 46 | 0.57546,0.26827,1
 47 | 0.72523,0.44371,1
 48 | 0.22408,0.52412,1
 49 | 0.44297,0.67032,1
 50 | 0.322,0.69225,1
 51 | 0.13767,0.57529,1
 52 | -0.0063364,0.39985,1
 53 | -0.092742,0.55336,1
 54 | -0.20795,0.35599,1
 55 | -0.20795,0.17325,1
 56 | -0.43836,0.21711,1
 57 | -0.21947,-0.016813,1
 58 | -0.13882,-0.27266,1
 59 | 0.18376,0.93348,0
 60 | 0.22408,0.77997,0
 61 | 0.29896,0.61915,0
 62 | 0.50634,0.75804,0
 63 | 0.61578,0.7288,0
 64 | 0.60426,0.59722,0
 65 | 0.76555,0.50219,0
 66 | 0.92684,0.3633,0
 67 | 0.82316,0.27558,0
 68 | 0.96141,0.085526,0
 69 | 0.93836,0.012427,0
 70 | 0.86348,-0.082602,0
 71 | 0.89804,-0.20687,0
 72 | 0.85196,-0.36769,0
 73 | 0.82892,-0.5212,0
 74 | 0.79435,-0.55775,0
 75 | 0.59274,-0.7405,0
 76 | 0.51786,-0.5943,0
 77 | 0.46601,-0.41886,0
 78 | 0.35081,-0.57968,0
 79 | 0.28744,-0.76974,0
 80 | 0.085829,-0.75512,0
 81 | 0.14919,-0.57968,0
 82 | -0.13306,-0.4481,0
 83 | -0.40956,-0.41155,0
 84 | -0.39228,-0.25804,0
 85 | -0.74366,-0.25804,0
 86 | -0.69758,0.041667,0
 87 | -0.75518,0.2902,0
 88 | -0.69758,0.68494,0
 89 | -0.4038,0.70687,0
 90 | -0.38076,0.91886,0
 91 | -0.50749,0.90424,0
 92 | -0.54781,0.70687,0
 93 | 0.10311,0.77997,0
 94 | 0.057028,0.91886,0
 95 | -0.10426,0.99196,0
 96 | -0.081221,1.1089,0
 97 | 0.28744,1.087,0
 98 | 0.39689,0.82383,0
 99 | 0.63882,0.88962,0
100 | 0.82316,0.66301,0
101 | 0.67339,0.64108,0
102 | 1.0709,0.10015,0
103 | -0.046659,-0.57968,0
104 | -0.23675,-0.63816,0
105 | -0.15035,-0.36769,0
106 | -0.49021,-0.3019,0
107 | -0.46717,-0.13377,0
108 | -0.28859,-0.060673,0
109 | -0.61118,-0.067982,0
110 | -0.66302,-0.21418,0
111 | -0.59965,-0.41886,0
112 | -0.72638,-0.082602,0
113 | -0.83007,0.31213,0
114 | -0.72062,0.53874,0
115 | -0.59389,0.49488,0
116 | -0.48445,0.99927,0
117 | -0.0063364,0.99927,0
118 | 0.63265,-0.030612,0
119 | 


--------------------------------------------------------------------------------
/2/mapFeature-solution.r:
--------------------------------------------------------------------------------
 1 | mapFeature <- function(X1, X2) {
 2 |   # MAPFEATURE Feature mapping function to polynomial features
 3 |   #
 4 |   #   MAPFEATURE(X1, X2) maps the two input features
 5 |   #   to quadratic features used in the regularization exercise.
 6 |   #
 7 |   #   Returns a new feature array with more features, comprising of
 8 |   #   X1, X2, X1^2, X2^2, X1*X2, X1*X2^2, etc..
 9 |   #
10 |   #   Inputs X1, X2 must be the same size
11 |   #
12 |   
13 |   degree <- 6
14 |   
15 |   out <- matrix(1,length(X1),1)
16 |   
17 |   for (i in 1:degree)
18 |     for (j in 0:i)
19 |       out <- cbind(out, (X1 ^ (i - j)) * (X2 ^ j))
20 |   
21 |   out
22 | }


--------------------------------------------------------------------------------
/2/mapFeature.r:
--------------------------------------------------------------------------------
 1 | mapFeature <- function(X1, X2) {
 2 |   # MAPFEATURE Feature mapping function to polynomial features
 3 |   #
 4 |   #   MAPFEATURE(X1, X2) maps the two input features
 5 |   #   to quadratic features used in the regularization exercise.
 6 |   #
 7 |   #   Returns a new feature array with more features, comprising of
 8 |   #   X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc..
 9 |   #
10 |   #   Inputs X1, X2 must be the same size
11 |   #
12 |   
13 |   degree <- 6
14 |   
15 |   out <- matrix(1,length(X1),1)
16 |   
17 |   for (i in 1:degree)
18 |     for (j in 0:i)
19 |       out <- cbind(out, (X1 ^ (i - j)) * (X2 ^ j))
20 |   
21 |   out
22 | }


--------------------------------------------------------------------------------
/2/plotData-solution.r:
--------------------------------------------------------------------------------
 1 | plotData <- function (X, y, axLables = c("Exam 1 score","Exam 2 score"), 
 2 |                       legLabels = c('Admitted', 'Not admitted')) {
 3 |     #PLOTDATA Plots the data points X and y into a new device
 4 |     #   PLOTDATA(x,y) plots the data points with + for the positive examples
 5 |     #   and o for the negative examples. X is assumed to be a Mx2 matrix.
 6 |     
 7 |     # ----------------------- YOUR CODE HERE -----------------------
 8 |     # Instructions: Plot the positive and negative examples on a
 9 |     #               2D plot, using the option pch=3 for the positive (plus)
10 |     #               examples and pch=21 for the negative (circle) examples.
11 |     #
12 |     # plus and empty circle character codes
13 | 
14 |     symbolss <- c(3,21) 
15 |     yfac <- factor(y)
16 |     plot(X[,1], X[,2], pch = symbolss[yfac], bg = "yellow", lwd = 1.3,
17 |       xlab = axLables[1], ylab = axLables[2])
18 |     
19 |     legend("topright", legLabels, pch = rev(symbolss), pt.bg = "yellow")
20 |     # ----------------------------------------------------
21 | }
22 | 


--------------------------------------------------------------------------------
/2/plotData.r:
--------------------------------------------------------------------------------
 1 | plotData <-
 2 |   function (X, y, axLables = c("Exam 1 score","Exam 2 score"), legLabels =
 3 |               c('Admitted', 'Not admitted')) {
 4 |     #PLOTDATA Plots the data points X and y into a new device
 5 |     #   PLOTDATA(x,y) plots the data points with + for the positive examples
 6 |     #   and o for the negative examples. X is assumed to be a Mx2 matrix.
 7 |     
 8 |     # ----------------------- YOUR CODE HERE -----------------------
 9 |     # Instructions: Plot the positive and negative examples on a
10 |     #               2D plot, using the option pch=3 for the positive (plus)
11 |     #               examples and pch=21 for the negative (circle) examples.
12 |     #
13 |     
14 |     
15 |     # ----------------------------------------------------
16 | }


--------------------------------------------------------------------------------
/2/plotDecisionBoundary-solution.r:
--------------------------------------------------------------------------------
 1 | plotDecisionBoundary <- function (theta, X, y, 
 2 |                                   axLables = c("Exam 1 score","Exam 2 score"),
 3 |                                   legLabels = c('Admitted', 'Not admitted')) {
 4 |     #   PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with
 5 |     #   the decision boundary defined by theta
 6 |     #   PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the
 7 |     #   positive examples and o for the negative examples. X is assumed to be
 8 |     #   a either
 9 |     #   1) Mx3 matrix, where the first column is an all-ones column for the
10 |     #      intercept.
11 |     #   2) MxN, N>3 matrix, where the first column is all-ones
12 |     
13 |     # Plot Data
14 |     plotData(X[,2:3], y, axLables, legLabels)
15 |     
16 |     if (dim(X)[2] <= 3) {
17 |       # Only need 2 points to define a line, so choose two end points
18 |       plot_x <- cbind(min(X[,2] - 2), max(X[,2] + 2))
19 |       # Calculate the decision boundary line
20 |       plot_y <- -1 / theta[3] * (theta[2] * plot_x + theta[1])
21 |       # Plot, and adjust axes for better viewing
22 |       lines(plot_x, plot_y, col = "blue")
23 |     } 
24 |     else {
25 |       # the grid range
26 |       u <- seq(-1,1.5, length.out = 50)
27 |       v <- seq(-1,1.5, length.out = 50)
28 |       
29 |       z <- matrix(0, length(u), length(v))
30 |       # Evaluate z <- theta*x over the grid
31 |       for (i in 1:length(u))
32 |         for (j in 1:length(v))
33 |           z[i,j] <- mapFeature(u[i], v[j]) %*% theta
34 |       
35 |       # Notice you need to specify the range [0, 0]
36 |       contour(u, v, z, xlab = 'Microchip Test 1', ylab = 'Microchip Test 2',
37 |         levels = 0, lwd = 2, add = TRUE, drawlabels = FALSE, col = "green")
38 |       mtext(paste("lambda = ",lambda), 3)
39 |     }
40 |   }
41 | 


--------------------------------------------------------------------------------
/2/plotDecisionBoundary.r:
--------------------------------------------------------------------------------
 1 | plotDecisionBoundary <- function (theta, X, y,
 2 |                                   axLables = c("Exam 1 score","Exam 2 score"),
 3 |                                   legLabels = c('Admitted', 'Not admitted')) {
 4 |     #   PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with
 5 |     #   the decision boundary defined by theta
 6 |     #   PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the
 7 |     #   positive examples and o for the negative examples. X is assumed to be
 8 |     #   a either
 9 |     #   1) Mx3 matrix, where the first column is an all-ones column for the
10 |     #      intercept.
11 |     #   2) MxN, N>3 matrix, where the first column is all-ones
12 |     
13 |     # Plot Data
14 |     plotData(X[,2:3], y,axLables,legLabels)
15 |     
16 |     if (dim(X)[2] <= 3)
17 |     {
18 |       # Only need 2 points to define a line, so choose two }points
19 |       #plot_x <- [min(X(:,2))-2,  max(X(:,2))+2]
20 |       plot_x <- cbind(min(X[,2] - 2), max(X[,2] + 2))
21 |       # Calculate the decision boundary line
22 |       plot_y <- -1 / theta[3] * (theta[2] * plot_x + theta[1])
23 |       
24 |       # Plot, and adjust axes for better viewing
25 |       lines(plot_x, plot_y, col = "blue")
26 |       
27 |     }
28 |     else
29 |     {
30 |       # Here is the grid range
31 |       u <- seq(-1,1.5, length.out = 50)
32 |       v <- seq(-1,1.5, length.out = 50)
33 |       
34 |       z <- matrix(0, length(u), length(v))
35 |       # Evaluate z <- theta*x over the grid
36 |       for (i in 1:length(u))
37 |         for (j in 1:length(v))
38 |           z[i,j] <- mapFeature(u[i], v[j]) %*% theta
39 |       
40 |       # Notice you need to specify the range [0, 0]
41 |       contour(
42 |         u, v, z, xlab = 'Microchip Test 1', ylab = 'Microchip Test 2',
43 |         levels = 0,
44 |         lwd = 2, add = TRUE, drawlabels = FALSE, col = "green"
45 |       )
46 |       mtext(paste("lambda = ",lambda), 3)
47 |     }
48 |   }
49 | 


--------------------------------------------------------------------------------
/2/predict-solution.r:
--------------------------------------------------------------------------------
 1 | predict <- function(theta, X) {
 2 |   #PREDICT Predict whether the label is 0 or 1 using learned logistic
 3 |   #regression parameters theta
 4 |   #   p <- PREDICT(theta, X) computes the predictions for X using a
 5 |   #   threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1)
 6 |   
 7 |   m <- dim(X)[1] # Number of training examples
 8 |   # You need to return the following variables correctly
 9 |   p <- rep(0,m)
10 |   # ----------------------- YOUR CODE HERE -----------------------
11 |   # Instructions: Complete the following code to make predictions using
12 |   #               your learned logistic regression parameters.
13 |   #               You should set p to a vector of 0's and 1's
14 |   #
15 |   
16 |   p[sigmoid(X %*% theta) >= 0.5] <- 1
17 |   p
18 |   # ----------------------------------------------------
19 | }
20 | 


--------------------------------------------------------------------------------
/2/predict.r:
--------------------------------------------------------------------------------
 1 | predict <- function(theta, X) {
 2 |   #PREDICT Predict whether the label is 0 or 1 using learned logistic
 3 |   #regression parameters theta
 4 |   #   p <- PREDICT(theta, X) computes the predictions for X using a
 5 |   #   threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1)
 6 |   
 7 |   m <- dim(X)[1] # Number of training examples
 8 |   # You need to return the following variables correctly
 9 |   p <- rep(0,m)
10 |   # ----------------------- YOUR CODE HERE -----------------------
11 |   # Instructions: Complete the following code to make predictions using
12 |   #               your learned logistic regression parameters.
13 |   #               You should set p to a vector of 0's and 1's
14 |   #
15 |   
16 |   p
17 |   # ----------------------------------------------------
18 | }
19 | 


--------------------------------------------------------------------------------
/2/sigmoid-solution.r:
--------------------------------------------------------------------------------
 1 | sigmoid <- function(z) {
 2 |   #SIGMOID Compute sigmoid functoon
 3 |   #   J <- SIGMOID(z) computes the sigmoid of z.
 4 |   
 5 |   # You need to return the following variables correctly
 6 |   g <- matrix(0,dim(as.matrix(z)))
 7 |   
 8 |   # ----------------------- YOUR CODE HERE -----------------------
 9 |   # Instructions: Compute the sigmoid of each value of z (z can be a matrix,
10 |   #               vector or scalar).
11 |   
12 |   g <- 1 / (1 + exp(-1 * z))
13 |   g
14 |   # ----------------------------------------------------
15 | }
16 | 


--------------------------------------------------------------------------------
/2/sigmoid.r:
--------------------------------------------------------------------------------
 1 | sigmoid <- function(z) {
 2 |   #SIGMOID Compute sigmoid functoon
 3 |   #   J <- SIGMOID(z) computes the sigmoid of z.
 4 |   
 5 |   # You need to return the following variables correctly
 6 |   z <- as.matrix(z)
 7 |   g <- matrix(0,dim(z)[1],dim(z)[2])
 8 |   
 9 |   # ----------------------- YOUR CODE HERE -----------------------
10 |   # Instructions: Compute the sigmoid of each value of z (z can be a matrix,
11 |   #               vector or scalar).
12 |   
13 |   g
14 |   # ----------------------------------------------------
15 | }
16 | 


--------------------------------------------------------------------------------
/2/submit.r:
--------------------------------------------------------------------------------
 1 | submit = function(){
 2 |   source("../lib/submitWithConfiguration.r")
 3 | 
 4 |   conf <- list()
 5 |   conf$assignmentSlug = 'logistic-regression';
 6 |   conf$itemName = 'Logistic Regression';
 7 |   conf$partArrays =  c(
 8 |     '1', 
 9 |     'sigmoid.r' , 
10 |     'Sigmoid Function', 
11 |     '2', 
12 |     'costFunction.r' , 
13 |     'Logistic Regression Cost', 
14 |     '3', 
15 |     'costFunction.r' , 
16 |     'Logistic Regression Gradient', 
17 |     '4', 
18 |     'predict.r' , 
19 |     'Predict', 
20 |     '5', 
21 |     'costFunctionReg.r' , 
22 |     'Regularized Logistic Regression Cost',      
23 |     '6', 
24 |     'costFunctionReg.r' , 
25 |     'Regularized Logistic Regression Gradient')
26 |   
27 |   conf$partArrays = matrix(conf$partArrays, ncol =  3 , byrow = T)
28 |   
29 |   conf$output = output;
30 | 
31 |   submitWithConfiguration(conf)
32 | }
33 | 
34 | output = function(partId, auxstring) {
35 |   source("sigmoid.r")
36 |   source("costFunction.r")
37 |   source("predict.r")
38 |   source("costFunctionReg.r")
39 |   # Random Test Cases
40 |   X = cbind(rep(1,20), exp(1) * sin(seq(1,20,1)), exp(0.5) * cos(seq(1,20,1))  )
41 |   y = (sin(X[,1]+X[,2]) > 0)
42 |   theta = c(.25,.5,-.5)
43 |   if (partId == '1')
44 |     out = paste0(sprintf('%0.5f ', sigmoid(X)), collapse = '')
45 |   else if (partId == '2')
46 |     out = sprintf('%0.5f ', costFunction( X, y)(theta))
47 |   else if (partId == '3')
48 |     out = paste0(sprintf('%0.5f ', grad( X, y)(theta)), collapse = '')
49 |   else if (partId == '4')
50 |     out = paste0(sprintf('%0.5f ', predict(theta, X) ), collapse = '')
51 |   else if (partId == '5')
52 |     out = paste0(sprintf('%0.5f ', costFunctionReg(X, y, 0.1)(theta)), collapse = '')
53 |   else if (partId == '6')
54 |     out = paste0(sprintf('%0.5f ', gradReg(X, y, 0.1)(theta)), collapse = '' )
55 | }
56 | 


--------------------------------------------------------------------------------
/3/displayData.r:
--------------------------------------------------------------------------------
 1 | displayData <- function(X, example_width = round(sqrt(dim(X)[2]))) {
 2 |   #DISPLAYDATA Display 2D data in a nice grid
 3 |   #   [h, display_array] <- DISPLAYDATA(X, example_width) displays 2D data
 4 |   #   stored in X in a nice grid. It returns the figure handle h and the
 5 |   #   displayed array if requested.
 6 |   
 7 |   if (is.vector(X))
 8 |     X <- t(X)
 9 |   
10 |   # Compute rows, cols
11 |   m <- dim(X)[1]
12 |   n <- dim(X)[2]
13 |   
14 |   example_height <- (n / example_width) #20
15 |   
16 |   # Compute number of items to display
17 |   display_rows <- floor(sqrt(m)) #10
18 |   display_cols <- ceiling(m / display_rows) #10
19 |   
20 |   # Between images padding
21 |   pad <- 1
22 |   
23 |   # Setup blank display
24 |   display_array <- -matrix(0, pad + display_rows * (example_height + pad),
25 |             pad + display_cols * (example_width + pad))
26 |   
27 |   # Copy each example into a patch on the display array
28 |   curr_ex <- 1
29 |   for (j in 1:display_rows) {
30 |     for (i in 1:display_cols) {
31 |       if (curr_ex > m)
32 |         break
33 |       # Copy the patch
34 |       
35 |       # Get the max value of the patch
36 |       max_val <- max(abs(X[curr_ex,]))
37 |       display_array[pad + (j - 1) * (example_height + pad) + (1:example_height),
38 |                     pad + (i - 1) * (example_width + pad) + (1:example_width)] <-
39 |         matrix(X[curr_ex,], example_height, example_width) / max_val
40 |       curr_ex <- curr_ex + 1
41 |     }
42 |     if (curr_ex > m)
43 |       break
44 |   }
45 |   # Display Image
46 |   op <- par(bg = "gray",mar=rep(.1,4))
47 |   #image draws by row from bottom up, but R indexes matrices by column, top down
48 |   dispArr <- t(apply(display_array,2,rev))
49 |   image(z = dispArr,col = gray.colors(100), xaxt = 'n',yaxt = 'n')
50 |   grid(nx = display_cols,display_rows,col = 'black',lwd = 2,lty = 1)
51 |   box()
52 |   par(op)
53 | }
54 | 


--------------------------------------------------------------------------------
/3/ex3.r:
--------------------------------------------------------------------------------
 1 | ## Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all
 2 | 
 3 | #  Instructions
 4 | #  ------------
 5 | # 
 6 | #  This file contains code that helps you get started on the
 7 | #  linear exercise. You will need to complete the following functions 
 8 | #  in this exericse:
 9 | #
10 | #     lrCostFunction.r (logistic regression cost function)
11 | #     oneVsAll.r
12 | #     predictOneVsAll.r
13 | #     predict.r
14 | #
15 | #  For this exercise, you will not need to change any code in this file,
16 | #  or any other files other than those mentioned above.
17 | #
18 | 
19 | ## Initialization
20 | rm(list=ls())
21 | sources <- c("../lib/sigmoid.r","displayData.r","lrCostFunction.r",
22 |              "oneVsAll.r","predict.r", "predictOneVsAll.r")
23 | # Uncomment the following, if you want to execute the solution scripts.
24 | sources <- c(sources[1:2], paste0(substr(sources[-c(1,2)],1,nchar(sources[-c(1,2)])-2), '-solution.r'))
25 | for (i in 1:length(sources)) {
26 |   cat(paste("Loading ",sources[i],"\n"))
27 |   source(sources[i])
28 | }
29 | 
30 | ## Setup the parameters you will use for this part of the exercise
31 | input_layer_size  <- 400  # 20x20 Input Images of Digits
32 | num_labels <- 10          # 10 labels, from 1 to 10   
33 |                           # (note that we have mapped "0" to label 10)
34 | 
35 | ## ------------- Part 1: Loading and Visualizing Data --------------
36 | #  We start the exercise by first loading and visualizing the dataset. 
37 | #  You will be working with a dataset that contains handwritten digits.
38 | #
39 | 
40 | # Load Training Data
41 | cat(sprintf('Loading and Visualizing Data ...\n'))
42 | load('ex3data1.rda')
43 | 
44 | m <- dim(X)[1]
45 | # Randomly select 100 data points to display
46 | rand_indices <- sample(m)
47 | sel <- X[rand_indices[1:100], ]
48 | 
49 | displayData(sel)
50 | 
51 | cat(sprintf('Program paused. Press enter to continue.\n'))
52 | line <- readLines(con = stdin(),1)
53 | 
54 | ## ------------ Part 2: Vectorize Logistic Regression ------------
55 | #  In this part of the exercise, you will reuse your logistic regression
56 | #  code from the last exercise. Your task here is to make sure that your
57 | #  regularized logistic regression implementation is vectorized. After
58 | #  that, you will implement one-vs-all classification for the handwritten
59 | #  digit dataset.
60 | #
61 | 
62 | cat(sprintf('\nTraining One-vs-All Logistic Regression...\n'))
63 | 
64 | lambda <- 0.1
65 | all_theta <- oneVsAll(X, y, num_labels, lambda)
66 | 
67 | cat(sprintf('Program paused. Press enter to continue.\n'))
68 | line <- readLines(con = stdin(),1)
69 | 
70 | 
71 | ## ----------------- Part 3: Predict for One-Vs-All -----------------
72 | pred <- predictOneVsAll(all_theta, X)
73 | 
74 | cat(sprintf('\nTraining Set Accuracy: %f\n', mean(pred == y) * 100))
75 | 
76 | 


--------------------------------------------------------------------------------
/3/ex3.rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/3/ex3_nn.r:
--------------------------------------------------------------------------------
 1 | ## Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks
 2 | 
 3 | #  Instructions
 4 | #  ------------
 5 | # 
 6 | #  This file contains code that helps you get started on the
 7 | #  linear exercise. You will need to complete the following functions 
 8 | #  in this exericse:
 9 | #
10 | #     lrCostFunction.r (logistic regression cost function)
11 | #     oneVsAll.r
12 | #     predictOneVsAll.r
13 | #     predict.r
14 | #
15 | #  For this exercise, you will not need to change any code in this file,
16 | #  or any other files other than those mentioned above.
17 | #
18 | 
19 | ## Initialization
20 | rm(list=ls())
21 | sources <- c("../lib/sigmoid.r","displayData.r","lrCostFunction.r",
22 |              "oneVsAll.r","predict.r", "predictOneVsAll.r")
23 | # Uncomment the following, if you want to execute the solution scripts.
24 | sources <- c(sources[1:2], paste0(substr(sources[-c(1,2)],1,nchar(sources[-c(1,2)])-2), '-solution.r'))
25 | for (i in 1:length(sources)) {
26 |   cat(paste("Loading ",sources[i],"\n"))
27 |   source(sources[i])
28 | }
29 | 
30 | ## Setup the parameters you will use for this exercise
31 | input_layer_size  <- 400  # 20x20 Input Images of Digits
32 | hidden_layer_size <- 25   # 25 hidden units
33 | num_labels <- 10          # 10 labels, from 1 to 10   
34 |                           # (note that we have mapped "0" to label 10)
35 | 
36 | ## ------------- Part 1: Loading and Visualizing Data --------------
37 | #  We start the exercise by first loading and visualizing the dataset. 
38 | #  You will be working with a dataset that contains handwritten digits.
39 | #
40 | 
41 | # Load Training Data
42 | cat(sprintf('Loading and Visualizing Data ...\n'))
43 | 
44 | load('ex3data1.rda')
45 | m <- dim(X)[1]
46 | 
47 | # Randomly select 100 data points to display
48 | sel <- sample(m)
49 | sel <- sel[1:100]
50 | 
51 | displayData(X[sel,])
52 | 
53 | cat(sprintf('Program paused. Press enter to continue.\n'))
54 | line <- readLines(con = stdin(),1)
55 | 
56 | ## ----------------- Part 2: Loading Pameters -----------------
57 | # In this part of the exercise, we load some pre-initialized 
58 | # neural network parameters.
59 | 
60 | cat(sprintf('\nLoading Saved Neural Network Parameters ...\n'))
61 | 
62 | # Load the weights into variables Theta1 and Theta2
63 | load('ex3weights.rda')
64 | 
65 | ## ------------------- Part 3: Implement Predict -------------------
66 | #  After training the neural network, we would like to use it to predict
67 | #  the labels. You will now implement the "predict" function to use the
68 | #  neural network to predict the labels of the training set. This lets
69 | #  you compute the training set accuracy.
70 | 
71 | pred <- predict(Theta1, Theta2, X)
72 | 
73 | cat(sprintf('\nTraining Set Accuracy: %f\n', mean(pred==y) * 100))
74 | 
75 | cat(sprintf('Program paused. Press enter to continue.\n'))
76 | line <- readLines(con = stdin(),1)
77 | 
78 | #  To give you an idea of the network's output, you can also run
79 | #  through the examples one at the a time to see what it is predicting.
80 | 
81 | #  Randomly permute examples
82 | rp <- sample(m)
83 | # showing 5 random digits
84 | for (i in 1:5){
85 |     cat(sprintf('\nDisplaying Example Image. Press Ctrl-c to End\n'))
86 |     displayData(X[rp[i], ])
87 |     pred <- predict(Theta1, Theta2, X[rp[i],])
88 |     cat(sprintf('\nNeural Network Prediction: %d (y %d) (digit %d)\n', pred ,y[rp[i]],pred%%10))
89 |     #cat(sprintf('Program paused. Press enter to continue.\n'))
90 |     #line <- readLines(con = stdin(),1)
91 |     if(interactive()) Sys.sleep(2)
92 | }
93 | 


--------------------------------------------------------------------------------
/3/ex3data1.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/3/ex3data1.rda


--------------------------------------------------------------------------------
/3/ex3weights.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/3/ex3weights.rda


--------------------------------------------------------------------------------
/3/lrCostFunction-solution.r:
--------------------------------------------------------------------------------
 1 | lrCostFunction <- function(X, y, lambda) {
 2 |   #lrCostFunction Compute cost for logistic regression with
 3 |   #regularization
 4 |   #   J <- lrCostFunction(X, y, lambda)(theta) computes the cost of using
 5 |   #   theta as the parameter for regularized logistic regression.
 6 |   
 7 |   function(theta) {
 8 |     # Initialize some useful values
 9 |     m <- length(y) # number of training examples
10 |     
11 |     # You need to return the following variables correctly
12 |     J <- 0
13 |     
14 |     # ----------------------- YOUR CODE HERE -----------------------
15 |     # Instructions: Compute the cost of a particular choice of theta.
16 |     #               You should set J to the cost.
17 |     #               Compute the partial derivatives and set grad to the partial
18 |     #               derivatives of the cost w.r.t. each parameter in theta
19 |     #
20 |     # Hint: The computation of the cost function and gradients can be
21 |     #       efficiently vectorized. For example, consider the computation
22 |     #
23 |     #           sigmoid(X %*% theta)
24 |     #
25 |     #       Each row of the resulting matrix will contain the value of the
26 |     #       prediction for that example. You can make use of this to vectorize
27 |     #       the cost function and gradient computations.
28 |     #
29 |     
30 |     # calculate cost function
31 |     h <- sigmoid(X %*% theta)
32 |     # calculate penalty
33 |     # excluded the first theta value
34 |     theta1 <- c(0,c(theta[-1]))
35 |     
36 |     p <- lambda * (t(theta1) %*% theta1) / (2 * m)
37 |     J <- -(t(y) %*% log(h) + t(1 - y) %*% log(1 - h)) / m + p
38 |     J
39 |     # --------------------------------------------------------------
40 |   }
41 | }
42 | 
43 | lrGradFunction <- function(X, y, lambda) {
44 |   #lrGradFunction Compute  gradient for logistic regression with
45 |   #regularization
46 |   #   J <- lrGradFunction( X, y, lambda)(theta) computes the
47 |   #   gradient of the cost w.r.t. to the parameters.
48 |   function(theta) {
49 |     # Initialize some useful values
50 |     m <- length(y) # number of training examples
51 |     
52 |     # You need to return the following variables correctly
53 |     
54 |     grad <- matrix(0,length(theta))
55 |     
56 |     # ----------------------- YOUR CODE HERE -----------------------
57 |     # Instructions: set grad to the partial
58 |     #               derivatives of the cost w.r.t. each parameter in theta
59 |     #
60 |     # Hint: The computation of the cost function and gradients can be
61 |     #       efficiently vectorized. For example, consider the computation
62 |     #
63 |     #           sigmoid(X %*% theta)
64 |     #
65 |     #       Each row of the resulting matrix will contain the value of the
66 |     #       prediction for that example. You can make use of this to vectorize
67 |     #       the cost function and gradient computations.
68 |     #
69 |     # Hint: When computing the gradient of the regularized cost function,
70 |     #       there're many possible vectorized solutions, but one solution
71 |     #       looks like:
72 |     #           grad <- (unregularized gradient for logistic regression)
73 |     #           temp <- theta
74 |     #           temp(1) <- 0;   # because we don't add anything for j <- 0
75 |     #           grad <- grad + YOUR_CODE_HERE (using the temp variable)
76 |     #
77 |     
78 |     # calculate cost function
79 |     h <- sigmoid(X %*% theta)
80 |     # calculate penalty
81 |     # excluded the first theta value
82 |     theta1 <- c(0,c(theta[-1]))
83 |     # calculate grads
84 |     grad <- (t(X) %*% (h - y) + lambda * theta1) / m
85 |     grad
86 |     # --------------------------------------------------------------
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/3/lrCostFunction.r:
--------------------------------------------------------------------------------
 1 | lrCostFunction <- function(X, y, lambda) {
 2 |   #lrCostFunction Compute cost for logistic regression with
 3 |   #regularization
 4 |   #   J <- lrCostFunction(X, y, lambda)(theta) computes the cost of using
 5 |   #   theta as the parameter for regularized logistic regression.
 6 |   
 7 |   function(theta) {
 8 |     # Initialize some useful values
 9 |     m <- length(y) # number of training examples
10 |     
11 |     # You need to return the following variables correctly
12 |     J <- 0
13 |     
14 |     # ----------------------- YOUR CODE HERE -----------------------
15 |     # Instructions: Compute the cost of a particular choice of theta.
16 |     #               You should set J to the cost.
17 |     #               Compute the partial derivatives and set grad to the partial
18 |     #               derivatives of the cost w.r.t. each parameter in theta
19 |     #
20 |     # Hint: The computation of the cost function and gradients can be
21 |     #       efficiently vectorized. For example, consider the computation
22 |     #
23 |     #           sigmoid(X %*% theta)
24 |     #
25 |     #       Each row of the resulting matrix will contain the value of the
26 |     #       prediction for that example. You can make use of this to vectorize
27 |     #       the cost function and gradient computations.
28 |     #
29 |     
30 |     J
31 |     # --------------------------------------------------------------
32 |   }
33 | }
34 | 
35 | lrGradFunction <- function(X, y, lambda) {
36 |   #lrGradFunction Compute  gradient for logistic regression with
37 |   #regularization
38 |   #   J <- lrGradFunction( X, y, lambda)(theta) computes the
39 |   #   gradient of the cost w.r.t. to the parameters.
40 |   function(theta) {
41 |     # Initialize some useful values
42 |     m <- length(y) # number of training examples
43 |     
44 |     # You need to return the following variables correctly
45 |     
46 |     grad <- matrix(0,length(theta))
47 |     
48 |     # ----------------------- YOUR CODE HERE -----------------------
49 |     # Instructions: set grad to the partial
50 |     #               derivatives of the cost w.r.t. each parameter in theta
51 |     #
52 |     # Hint: The computation of the cost function and gradients can be
53 |     #       efficiently vectorized. For example, consider the computation
54 |     #
55 |     #           sigmoid(X %*% theta)
56 |     #
57 |     #       Each row of the resulting matrix will contain the value of the
58 |     #       prediction for that example. You can make use of this to vectorize
59 |     #       the cost function and gradient computations.
60 |     #
61 |     # Hint: When computing the gradient of the regularized cost function,
62 |     #       there're many possible vectorized solutions, but one solution
63 |     #       looks like:
64 |     #           grad <- (unregularized gradient for logistic regression)
65 |     #           temp <- theta
66 |     #           temp(1) <- 0;   # because we don't add anything for j <- 0
67 |     #           grad <- grad + YOUR_CODE_HERE (using the temp variable)
68 |     #
69 |     
70 |     
71 |     
72 |     grad
73 |     # --------------------------------------------------------------
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/3/nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/3/nn.png


--------------------------------------------------------------------------------
/3/oneVsAll-solution.r:
--------------------------------------------------------------------------------
 1 | oneVsAll <- function(X, y, num_labels, lambda) {
 2 |   #ONEVSALL trains multiple logistic regression classifiers and returns all
 3 |   #the classifiers in a matrix all_theta, where the i-th row of all_theta
 4 |   #corresponds to the classifier for label i
 5 |   #   all_theta <- ONEVSALL(X, y, num_labels, lambda) trains num_labels
 6 |   #   logisitc regression classifiers and returns each of these classifiers
 7 |   #   in a matrix all_theta, where the i-th row of all_theta corresponds
 8 |   #   to the classifier for label i
 9 |   
10 |   # Some useful variables
11 |   m <- dim(X)[1]
12 |   n <- dim(X)[2]
13 |   
14 |   # You need to return the following variables correctly
15 |   all_theta <- matrix(0,num_labels,n + 1)
16 |   
17 |   # Add ones to the X data matrix
18 |   X <- cbind(rep(1,m),X)
19 |   
20 |   # ----------------------- YOUR CODE HERE -----------------------
21 |   # Instructions: You should complete the following code to train num_labels
22 |   #               logistic regression classifiers with regularization
23 |   #               parameter lambda.
24 |   #
25 |   # Hint: c(theta) will return a column vector.
26 |   #
27 |   # Hint: You can use y == c to obtain a vector of 1's and 0's that tell use
28 |   #       whether the ground truth is true/false for this class.
29 |   #
30 |   # Note: For this assignment, we recommend using optim to optimize the cost
31 |   #       function. It is okay to use a for-loop (for i in 1:num_labels) to
32 |   #       loop over the different classes.
33 |   #
34 |   #
35 |   # Example Code for optim:
36 |   #
37 |   #     # Set Initial theta
38 |   #     initial_theta <- rep(0,n + 1)
39 |   #
40 |   #     # Run optim to obtain the optimal theta
41 |   #     # This function will return theta and the cost
42 |   #     opt <- optim(
43 |   #       initial_theta,lrCostFunction(X,y == i,lambda),
44 |   #       gr = lrGradFunction(X,y == i,lambda),method = "BFGS"
45 |   #       ,control = list(maxit = 50))
46 |   #
47 |   
48 |   for (i in 1:num_labels)
49 |   {
50 |     initial_theta <- rep(0,n + 1)
51 |     opt <- optim(
52 |       initial_theta,lrCostFunction(X,y == i,lambda),
53 |       gr = lrGradFunction(X,y == i,lambda),method = "BFGS"
54 |       ,control = list(maxit = 50)
55 |     )
56 |     theta <- opt$par
57 |     cat(sprintf("Iteration %d | Min Cost: %f\n",i,opt$value))
58 |     
59 |     all_theta[i,] <- t(theta)
60 |   }
61 |   all_theta
62 |   # --------------------------------------------------------------------------
63 |   
64 | }
65 | 


--------------------------------------------------------------------------------
/3/oneVsAll.r:
--------------------------------------------------------------------------------
 1 | oneVsAll <- function(X, y, num_labels, lambda) {
 2 |   #ONEVSALL trains multiple logistic regression classifiers and returns all
 3 |   #the classifiers in a matrix all_theta, where the i-th row of all_theta
 4 |   #corresponds to the classifier for label i
 5 |   #   all_theta <- ONEVSALL(X, y, num_labels, lambda) trains num_labels
 6 |   #   logisitc regression classifiers and returns each of these classifiers
 7 |   #   in a matrix all_theta, where the i-th row of all_theta corresponds
 8 |   #   to the classifier for label i
 9 |   
10 |   # Some useful variables
11 |   m <- dim(X)[1]
12 |   n <- dim(X)[2]
13 |   
14 |   # You need to return the following variables correctly
15 |   all_theta <- matrix(0,num_labels,n + 1)
16 |   
17 |   # Add ones to the X data matrix
18 |   X <- cbind(rep(1,m),X)
19 |   
20 |   # ----------------------- YOUR CODE HERE -----------------------
21 |   # Instructions: You should complete the following code to train num_labels
22 |   #               logistic regression classifiers with regularization
23 |   #               parameter lambda.
24 |   #
25 |   # Hint: c(theta) will return a column vector.
26 |   #
27 |   # Hint: You can use y == c to obtain a vector of 1's and 0's that tell use
28 |   #       whether the ground truth is true/false for this class.
29 |   #
30 |   # Note: For this assignment, we recommend using optim to optimize the cost
31 |   #       function. It is okay to use a for-loop (for i in 1:num_labels) to
32 |   #       loop over the different classes.
33 |   #
34 |   #
35 |   # Example Code for optim:
36 |   #
37 |   #     # Set Initial theta
38 |   #     initial_theta <- rep(0,n + 1)
39 |   #
40 |   #     # Run optim to obtain the optimal theta
41 |   #     # This function will return theta and the cost
42 |   #     opt <- optim(
43 |   #       initial_theta,lrCostFunction(X,y == i,lambda),
44 |   #       gr = lrGradFunction(X,y == i,lambda),method = "BFGS"
45 |   #       ,control = list(maxit = 50))
46 |   #
47 |   
48 |   all_theta
49 |   # --------------------------------------------------------------------------
50 |   
51 | }
52 | 


--------------------------------------------------------------------------------
/3/predict-solution.r:
--------------------------------------------------------------------------------
 1 | predict <- function(Theta1, Theta2, X) {
 2 |   #PREDICT Predict the label of an input given a trained neural network
 3 |   #   p <- PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
 4 |   #   trained weights of a neural network (Theta1, Theta2)
 5 |   
 6 |   # Useful values
 7 |   if (is.vector(X))
 8 |     X <- t(X)
 9 |   
10 |   m <- dim(X)[1]
11 |   num_labels <- dim(Theta2)[1]
12 |   
13 |   # You need to return the following variables correctly
14 |   p <- rep(0,m)
15 |   
16 |   # ----------------------- YOUR CODE HERE -----------------------
17 |   # Instructions: Complete the following code to make predictions using
18 |   #               your learned neural network. You should set p to a
19 |   #               vector containing labels between 1 to num_labels.
20 |   #
21 |   # Hint: The max function might come in useful. In particular, the which.max
22 |   #       function can return the index of the max element, for more
23 |   #       information see '?which.max'. If your examples are in rows, then, you
24 |   #       can use apply(A, 1, max) to obtain the max for each row.
25 |   #
26 |   
27 |   a1 <- cbind(rep(1,m), X)
28 |   z2 <- a1 %*% t(Theta1)
29 |   a2 <- cbind(rep(1,dim(z2)[1]) ,sigmoid(z2))
30 |   z3 <- a2 %*% t(Theta2)
31 |   a3 <- sigmoid(z3)
32 |   
33 |   p <- apply(a3,1,which.max)
34 |   p
35 |   # --------------------------------------------------------------------------
36 | }
37 | 


--------------------------------------------------------------------------------
/3/predict.r:
--------------------------------------------------------------------------------
 1 | predict <- function(Theta1, Theta2, X) {
 2 |   #PREDICT Predict the label of an input given a trained neural network
 3 |   #   p <- PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
 4 |   #   trained weights of a neural network (Theta1, Theta2)
 5 |   
 6 |   # Useful values
 7 |   if (is.vector(X))
 8 |     X <- t(X)
 9 |   
10 |   m <- dim(X)[1]
11 |   num_labels <- dim(Theta2)[1]
12 |   
13 |   # You need to return the following variables correctly
14 |   p <- rep(0,m)
15 |   
16 |   # ----------------------- YOUR CODE HERE -----------------------
17 |   # Instructions: Complete the following code to make predictions using
18 |   #               your learned neural network. You should set p to a
19 |   #               vector containing labels between 1 to num_labels.
20 |   #
21 |   # Hint: The max function might come in useful. In particular, the which.max
22 |   #       function can return the index of the max element, for more
23 |   #       information see '?which.max'. If your examples are in rows, then, you
24 |   #       can use apply(A, 1, max) to obtain the max for each row.
25 |   #
26 |   
27 |   p
28 |   # --------------------------------------------------------------------------
29 | }
30 | 


--------------------------------------------------------------------------------
/3/predictOneVsAll-solution.r:
--------------------------------------------------------------------------------
 1 | predictOneVsAll  <- function(all_theta, X) {
 2 |   #PREDICT Predict whether the label is 0 or 1 using learned logistic
 3 |   #regression parameters all_theta
 4 |   #   p <- PREDICT(all_theta, X) computes the predictions for X using a
 5 |   #   threshold at 0.5 (i.e., if sigmoid(t(all_theta) %*% x) >= 0.5, predict 1)
 6 |   
 7 |   m <- dim(X)[1]
 8 |   num_labels <- dim(all_theta)[1]
 9 |   
10 |   # You need to return the following variables correctly
11 |   p <- rep(0,dim(X)[1])
12 |   
13 |   # Add ones to the X data matrix
14 |   X <- cbind(rep(1,m), X)
15 |   
16 |   # ----------------------- YOUR CODE HERE -----------------------
17 |   # Instructions: Complete the following code to make predictions using
18 |   #               your learned logistic regression parameters (one-vs-all).
19 |   #               You should set p to a vector of predictions (from 1 to
20 |   #               num_labels).
21 |   #
22 |   # Hint: This code can be done all vectorized using the which.max function.
23 |   #       In particular, the which.max function can return the index of the
24 |   #       max element, for more information see '?which.max'. If your examples
25 |   #       are in rows, then, you can use apply(A, 1, max) to obtain the max
26 |   #       for each row.
27 |   #
28 |   
29 |   ps <- sigmoid(X %*% t(all_theta))
30 |   
31 |   i_max <- apply(ps,1, which.max)
32 |   p <- i_max
33 |   
34 |   p  
35 |   # --------------------------------------------------------------------------
36 | }
37 | 


--------------------------------------------------------------------------------
/3/predictOneVsAll.r:
--------------------------------------------------------------------------------
 1 | predictOneVsAll  <- function(all_theta, X) {
 2 |   #PREDICT Predict whether the label is 0 or 1 using learned logistic
 3 |   #regression parameters all_theta
 4 |   #   p <- PREDICT(all_theta, X) computes the predictions for X using a
 5 |   #   threshold at 0.5 (i.e., if sigmoid(t(all_theta) %*% x) >= 0.5, predict 1)
 6 |   
 7 |   m <- dim(X)[1]
 8 |   num_labels <- dim(all_theta)[1]
 9 |   
10 |   # You need to return the following variables correctly
11 |   p <- rep(0,dim(X)[1])
12 |   
13 |   # Add ones to the X data matrix
14 |   X <- cbind(rep(1,m), X)
15 |   
16 |   # ----------------------- YOUR CODE HERE -----------------------
17 |   # Instructions: Complete the following code to make predictions using
18 |   #               your learned logistic regression parameters (one-vs-all).
19 |   #               You should set p to a vector of predictions (from 1 to
20 |   #               num_labels).
21 |   #
22 |   # Hint: This code can be done all vectorized using the which.max function.
23 |   #       In particular, the which.max function can return the index of the
24 |   #       max element, for more information see '?which.max'. If your examples
25 |   #       are in rows, then, you can use apply(A, 1, max) to obtain the max
26 |   #       for each row.
27 |   #
28 |   
29 |   
30 |   p  
31 |   # --------------------------------------------------------------------------
32 | }
33 | 


--------------------------------------------------------------------------------
/3/submit.r:
--------------------------------------------------------------------------------
 1 | submit = function(){
 2 |   source("../lib/submitWithConfiguration.r")
 3 | 
 4 |   conf = list()
 5 |   
 6 |   conf$assignmentSlug = 'multi-class-classification-and-neural-networks';
 7 |   conf$itemName = 'Multi-class Classification and Neural Networks';
 8 |   conf$partArrays = c(
 9 |     '1', 
10 |     'lrCostFunction.r' , 
11 |     'Regularized Logistic Regression', 
12 |     '2', 
13 |     'oneVsAll.r' , 
14 |     'One-vs-All Classifier Training', 
15 |     '3', 
16 |     'predictOneVsAll.r' , 
17 |     'One-vs-All Classifier Prediction', 
18 |     '4', 
19 |     'predict.r' , 
20 |     'Neural Network Prediction Function')
21 |   
22 |   conf$partArrays = matrix(conf$partArrays, ncol =  3 , byrow = T)
23 |   
24 |   conf$output = output;
25 | 
26 |   submitWithConfiguration(conf);
27 | }
28 | 
29 | output = function(partId, auxstring) {
30 |   source("lrCostFunction.r")
31 |   source("oneVsAll.r")
32 |   source("predictOneVsAll.r")
33 |   source("predict.r")
34 |   source("sigmoid.r")
35 |   # Random Test Cases
36 |   X = cbind(rep(1,20), exp(1) * sin(seq(1,20,1)), exp(0.5) * cos(seq(1,20,1)) )
37 |   y = (sin(X[,1]+X[,2]) > 0)
38 |   
39 |   Xm = c(-1, -1 , -1 ,-2 , -2, -1 , -2, -2 , 
40 |           1, 1 ,  1, 2 ,  2, 1 , 2, 2 , 
41 |          -1, 1 ,  -1, 2 ,  -2, 1 , -2, 2 , 
42 |           1, -1 , 1, -2 ,  -2, -1 , -2 ,-2 )
43 |   Xm = matrix(Xm,ncol = 2, byrow = T)
44 |   
45 |   ym = c(1, 1, 1 ,1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4 ,4, 4 )
46 |   t1 = sin(matrix(seq(1,24,2), 4, 3))
47 |   t2 = cos(matrix(seq(1,40,2), 4, 5))
48 |   theta = c(.25,.5,-.5)
49 |   
50 |   if (partId == '1'){
51 |     out = sprintf('%0.5f ', lrCostFunction(X, y, 0.1)(theta) )
52 |     out = paste0(c(out,sprintf('%0.5f ',lrGradFunction(X, y, 0.1)(theta))),collapse = '')
53 |   }else if (partId == '2')
54 |     out = paste0(sprintf('%0.5f ', oneVsAll(Xm, ym, 4, 0.1)), collapse = '')
55 |   else if (partId == '3')
56 |     out = paste0(sprintf('%0.5f ', predictOneVsAll(t1, Xm)), collapse = '')
57 |   else if (partId == '4')
58 |     out = paste0(sprintf('%0.5f ', predict(t1, t2, Xm)), collapse = '')
59 | }
60 | 


--------------------------------------------------------------------------------
/4/checkNNGradients.r:
--------------------------------------------------------------------------------
 1 | checkNNGradients <- function (lambda = 0) {
 2 |   #CHECKNNGRADIENTS Creates a small neural network to check the
 3 |   #backpropagation gradients
 4 |   #   CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
 5 |   #   backpropagation gradients, it will output the analytical gradients
 6 |   #   produced by your backprop code and the numerical gradients (computed
 7 |   #   using computeNumericalGradient). These two gradient computations should
 8 |   #   result in very similar values.
 9 |   #
10 |   
11 |   input_layer_size <- 3
12 |   hidden_layer_size <- 5
13 |   num_labels <- 3
14 |   m <- 5
15 |   
16 |   # We generate some 'random' test data
17 |   Theta1 <- debugInitializeWeights(hidden_layer_size, input_layer_size)
18 |   Theta2 <- debugInitializeWeights(num_labels, hidden_layer_size)
19 |   # Reusing debugInitializeWeights to generate X
20 |   X  <- debugInitializeWeights(m, input_layer_size - 1)
21 |   y  <- 1 + t(1:m %% num_labels)
22 |   
23 |   # Unroll parameters
24 |   nn_params <- c(Theta1,Theta2)
25 |   
26 |   # Short hand for cost function
27 |   costFunc <- nnCostFunction(input_layer_size, hidden_layer_size,
28 |                              num_labels, X, y, lambda)
29 |   
30 |   cost <- costFunc(nn_params)
31 |   grad <- nnGradFunction(input_layer_size, hidden_layer_size,
32 |                          num_labels, X, y, lambda)(nn_params)
33 |   
34 |   numgrad <- computeNumericalGradient(costFunc, nn_params)
35 |   
36 |   # Visually examine the two gradient computations.  The two columns
37 |   # you get should be very similar.
38 |   print(cbind(numgrad, grad))
39 |   cat(
40 |     sprintf(
41 |       'The above two columns you get should be very similar.
42 |       (Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n'
43 |     )
44 |     )
45 |   
46 |   # Evaluate the norm of the difference between two solutions.
47 |   # If you have a correct implementation, and assuming you used EPSILON <- 0.0001
48 |   # in computeNumericalGradient.r, then diff below should be less than 1e-9
49 |   diff <-
50 |     norm(as.matrix(numgrad - grad)) / norm(as.matrix(numgrad + grad))
51 |   
52 |   cat(
53 |     sprintf(
54 |       'If your backpropagation implementation is correct, then
55 |       the relative difference will be small (less than 1e-9).
56 |       Relative Difference: %g', diff
57 |     )
58 |     )
59 | }
60 | 


--------------------------------------------------------------------------------
/4/computeNumericalGradient.r:
--------------------------------------------------------------------------------
 1 | computeNumericalGradient <- function(J, theta) {
 2 |   #COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
 3 |   #and gives us a numerical estimate of the gradient.
 4 |   #   numgrad <- COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
 5 |   #   gradient of the  J  around theta. Calling y <- function(theta) should 
 6 |   #   return the function value at theta.
 7 |   
 8 |   # Notes: The following code implements numerical gradient checking, and
 9 |   #        returns the numerical gradient.It sets numgrad[i] to (a numerical
10 |   #        approximation of) the partial derivative of J with respect to the
11 |   #        i-th input argument, evaluated at theta. (i.e., numgrad[i] should
12 |   #        be the (approximately) the partial derivative of J with respect
13 |   #        to theta[i].)
14 |   #
15 |   
16 |   numgrad <- rep(0,length(theta))
17 |   perturb <- rep(0,length(theta))
18 |   e <- 1e-4
19 |   for (p in 1:length(theta)) {
20 |     # Set perturbation vector
21 |     perturb[p] <- e
22 |     loss1 <- J(theta - perturb)
23 |     loss2 <- J(theta + perturb)
24 |     # Compute Numerical Gradient
25 |     numgrad[p] <- (loss2 - loss1) / (2 * e)
26 |     perturb[p] <- 0
27 |   }
28 |   
29 |   numgrad
30 | }
31 | 


--------------------------------------------------------------------------------
/4/debugInitializeWeights.r:
--------------------------------------------------------------------------------
 1 | debugInitializeWeights  <- function(fan_out, fan_in) {
 2 |   #DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in
 3 |   #incoming connections and fan_out outgoing connections using a fixed
 4 |   #strategy, this will help you later in debugging
 5 |   #   W <- DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights
 6 |   #   of a layer with fan_in incoming connections and fan_out outgoing
 7 |   #   connections using a fix set of values
 8 |   #
 9 |   #   Note that W should be set to a matrix of size(1 + fan_in, fan_out) as
10 |   #   the first row of W handles the "bias" terms
11 |   #
12 |   
13 |   # Set W to zeros
14 |   W <- matrix(0,fan_out,1 + fan_in)
15 |   
16 |   # Initialize W using "sin", this ensures that W is always of the same
17 |   # values and will be useful for debugging
18 |   W <- matrix(sin(1:length(W)), dim(W)[1],dim(W)[2]) / 10
19 |   W
20 |   # --------------------------------------------------------------------------
21 |   
22 | }
23 | 


--------------------------------------------------------------------------------
/4/displayData.r:
--------------------------------------------------------------------------------
 1 | displayData  <- function(X, example_width = round(sqrt(dim(X)[2])))  {
 2 |   #DISPLAYDATA Display 2D data in a nice grid
 3 |   #   [h, display_array] <- DISPLAYDATA(X, example_width) displays 2D data
 4 |   #   stored in X in a nice grid. It returns the figure handle h and the
 5 |   #   displayed array if requested.
 6 |   
 7 |   # Set example_width automatically if not passed in
 8 |   #example_width = round(sqrt(dim(X)[2])) #20
 9 |   
10 |   if (is.vector(X))
11 |     X <- t(X)
12 |   
13 |   # Compute rows, cols
14 |   m <- dim(X)[1]
15 |   n <- dim(X)[2]
16 |   
17 |   example_height <- (n / example_width) #20
18 |   
19 |   # Compute number of items to display
20 |   display_rows <- floor(sqrt(m)) #10
21 |   display_cols <- ceiling(m / display_rows) #10
22 |   
23 |   # Between images padding
24 |   pad <- 1
25 |   
26 |   # Setup blank display
27 |   display_array <- -matrix(0,pad + display_rows * (example_height + pad),
28 |             pad + display_cols * (example_width + pad))
29 |   
30 |   # Copy each example into a patch on the display array
31 |   curr_ex <- 1
32 |   for (j in 1:display_rows) {
33 |     for (i in 1:display_cols) {
34 |       if (curr_ex > m)
35 |         break
36 |       # Copy the patch
37 |       
38 |       # Get the max value of the patch
39 |       max_val <- max(abs(X[curr_ex,]))
40 |       display_array[pad + (j - 1) * (example_height + pad) + (1:example_height),
41 |                     pad + (i - 1) * (example_width + pad) + (1:example_width)] <-
42 |         matrix(X[curr_ex,], example_height, example_width) / max_val
43 |       curr_ex <- curr_ex + 1
44 |     }
45 |     if (curr_ex > m)
46 |       break
47 |   }
48 |   # Display Image
49 |   #par("bg")
50 |   op <- par(bg = "gray",mar=rep(.1,4))
51 |   #It's confusing because it draws by row from bottom up, but R indexes matrices by column, top down
52 |   display_array <- t(apply(display_array,2,rev))
53 |   
54 |   image(z = display_array,col = gray.colors(100), xaxt = 'n',yaxt = 'n')
55 |   grid(nx = display_cols,display_rows,col = 'black',lwd = 2,lty = 1)
56 |   box()
57 |   par(op)
58 | }
59 | 


--------------------------------------------------------------------------------
/4/ex4.rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/4/ex4data1.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/4/ex4data1.rda


--------------------------------------------------------------------------------
/4/ex4weights.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/4/ex4weights.rda


--------------------------------------------------------------------------------
/4/nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/4/nn.png


--------------------------------------------------------------------------------
/4/nnbp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/4/nnbp.png


--------------------------------------------------------------------------------
/4/predict.r:
--------------------------------------------------------------------------------
 1 | predict <- function(Theta1, Theta2, X) {
 2 |   #PREDICT Predict the label of an input given a trained neural network
 3 |   #   p <- PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
 4 |   #   trained weights of a neural network (Theta1, Theta2)
 5 |   
 6 |   if (is.vector(X))
 7 |     X <- t(X)
 8 |   
 9 |   # Useful values
10 |   m <- dim(X)[1]
11 |   num_labels <- dim(Theta2)[1]
12 |   
13 |   # You need to return the following variables correctly
14 |   p <- rep(0,dim(X)[1])
15 |   
16 |   h1 <- sigmoid(cbind(rep(1,m),X) %*% t(Theta1))
17 |   h2 <- sigmoid(cbind(rep(1,m),h1) %*% t(Theta2))
18 |   
19 |   p <- apply(h2,1,which.max)
20 |   p
21 |   # --------------------------------------------------------------------------
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/4/randInitializeWeights-solution.r:
--------------------------------------------------------------------------------
 1 | randInitializeWeights <- function(L_in, L_out) {
 2 |   #RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
 3 |   #incoming connections and L_out outgoing connections
 4 |   #   W <- RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights
 5 |   #   of a layer with L_in incoming connections and L_out outgoing
 6 |   #   connections.
 7 |   #
 8 |   #   Note that W should be set to a matrix of size (L_out, 1 + L_in) as
 9 |   #   the first row of W handles the "bias" terms
10 |   #
11 |   
12 |   # You need to return the following variables correctly
13 |   W <- matrix(0,L_out, 1 + L_in)
14 |   
15 |   # ----------------------- YOUR CODE HERE -----------------------
16 |   # Instructions: Initialize W randomly so that we break the symmetry while
17 |   #               training the neural network.
18 |   #
19 |   # Note: The first row of W corresponds to the parameters for the bias units
20 |   #
21 |   
22 |   epsilon_init <- 0.12
23 |   rnd <- runif(L_out * (1 + L_in))
24 |   rnd <- matrix(rnd,L_out,1 + L_in)
25 |   W <- rnd * 2 * epsilon_init - epsilon_init
26 |   W
27 |   # --------------------------------------------------------------------------
28 |   
29 | }
30 | 


--------------------------------------------------------------------------------
/4/randInitializeWeights.r:
--------------------------------------------------------------------------------
 1 | randInitializeWeights <- function(L_in, L_out) {
 2 |   #RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
 3 |   #incoming connections and L_out outgoing connections
 4 |   #   W <- RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights
 5 |   #   of a layer with L_in incoming connections and L_out outgoing
 6 |   #   connections.
 7 |   #
 8 |   #   Note that W should be set to a matrix of size (L_out, 1 + L_in) as
 9 |   #   the first row of W handles the "bias" terms
10 |   #
11 |   
12 |   # You need to return the following variables correctly
13 |   W <- matrix(0,L_out, 1 + L_in)
14 |   
15 |   # ----------------------- YOUR CODE HERE -----------------------
16 |   # Instructions: Initialize W randomly so that we break the symmetry while
17 |   #               training the neural network.
18 |   #
19 |   # Note: The first row of W corresponds to the parameters for the bias units
20 |   #
21 |   
22 |   epsilon_init <- 0.12
23 |   
24 |   
25 |   W
26 |   # --------------------------------------------------------------------------
27 |   
28 | }
29 | 


--------------------------------------------------------------------------------
/4/sigmoidGradient-solution.r:
--------------------------------------------------------------------------------
 1 | sigmoidGradient <- function(z) {
 2 |   #SIGMOIDGRADIENT returns the gradient of the sigmoid function
 3 |   #evaluated at z
 4 |   #   g <- SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function
 5 |   #   evaluated at z. This should work regardless if z is a matrix or a
 6 |   #   vector. In particular, if z is a vector or matrix, you should return
 7 |   #   the gradient for each element.
 8 |   
 9 |   #g <- rep(0,)  zeros(size(z))
10 |   
11 |   # ----------------------- YOUR CODE HERE -----------------------
12 |   # Instructions: Compute the gradient of the sigmoid function evaluated at
13 |   #               each value of z (z can be a matrix, vector or scalar).
14 |   
15 |   g  <- sigmoid(z) * (1 - sigmoid(z))
16 |   g
17 |   # --------------------------------------------------------------
18 | }
19 | 


--------------------------------------------------------------------------------
/4/sigmoidGradient.r:
--------------------------------------------------------------------------------
 1 | sigmoidGradient <- function(z) {
 2 |   #SIGMOIDGRADIENT returns the gradient of the sigmoid function
 3 |   #evaluated at z
 4 |   #   g <- SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function
 5 |   #   evaluated at z. This should work regardless if z is a matrix or a
 6 |   #   vector. In particular, if z is a vector or matrix, you should return
 7 |   #   the gradient for each element.
 8 |   
 9 |   #g <- rep(0,)  zeros(size(z))
10 |   
11 |   # ----------------------- YOUR CODE HERE -----------------------
12 |   # Instructions: Compute the gradient of the sigmoid function evaluated at
13 |   #               each value of z (z can be a matrix, vector or scalar).
14 |   
15 |   g
16 |   # --------------------------------------------------------------
17 | }
18 | 


--------------------------------------------------------------------------------
/4/submit.r:
--------------------------------------------------------------------------------
 1 | submit = function(){
 2 |   source("../lib/submitWithConfiguration.r")
 3 |   
 4 |   conf = list()
 5 |   
 6 |   conf$assignmentSlug = 'neural-network-learning';
 7 |   conf$itemName = 'Neural Networks Learning';
 8 |   conf$partArrays = c(     
 9 |     '1', 
10 |     'nnCostFunction.r' , 
11 |     'Feedforward and Cost Function', 
12 |     '2', 
13 |     'nnCostFunction.r' , 
14 |     'Regularized Cost Function', 
15 |     '3', 
16 |     'sigmoidGradient.r' , 
17 |     'Sigmoid Gradient', 
18 |     '4', 
19 |     'nnCostFunction.r' , 
20 |     'Neural Network Gradient (Backpropagation)', 
21 |     '5', 
22 |     'nnCostFunction.r' , 
23 |     'Regularized Gradient')
24 |   conf$partArrays = matrix(conf$partArrays, ncol =  3 , byrow = T)
25 |   
26 |   conf$output = output
27 | 
28 |   submitWithConfiguration(conf);
29 | }
30 | 
31 | output = function(partId, auxstring) {
32 |   source("nnCostFunction.r")
33 |   source("sigmoidGradient.r")
34 |   
35 |   # Random Test Cases
36 |   X = matrix(3 * sin(seq(1,30,1)), 3, 10);
37 |   Xm = matrix(sin(seq(1,32,1)), 16, 2) / 5;
38 |   ym = 1 + (seq(1,16) %% 4)
39 |   t1 = sin(matrix(seq(1,24,2),4,3))
40 |   t2 = cos(matrix(seq(1,40,2),4,5))
41 |   t = c(c(t1),c(t2))
42 |   
43 |   if (partId == '1')
44 |     out = sprintf('%0.5f ', nnCostFunction(2, 4, 4, Xm, ym, 0)(t))
45 |   else if (partId == '2')
46 |     out = sprintf('%0.5f ', nnCostFunction(2, 4, 4, Xm, ym, 1.5)(t))
47 |   else if (partId == '3')
48 |     out = paste0(sprintf('%0.5f ', sigmoidGradient(X)), collapse = '')
49 |   else if (partId == '4'){
50 |     out = sprintf('%0.5f ', nnCostFunction(2, 4, 4, Xm, ym, 0)(t))
51 |     out = paste0(c(out, sprintf('%0.5f ', nnGradFunction(2, 4, 4, Xm, ym, 0)(t))  ),
52 |                  collapse = '')
53 |   }else if (partId == '5'){
54 |     out = sprintf('%0.5f ', nnCostFunction(2, 4, 4, Xm, ym, 1.5)(t))
55 |     out = paste0(c(out, sprintf('%0.5f ', nnGradFunction(2, 4, 4, Xm, ym, 1.5)(t))  ),
56 |                collapse = '')
57 |   }
58 |    
59 | }
60 | 


--------------------------------------------------------------------------------
/5/ex5.rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/5/ex5data1.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/5/ex5data1.rda


--------------------------------------------------------------------------------
/5/featureNormalize.r:
--------------------------------------------------------------------------------
 1 | featureNormalize  <- function(X) {
 2 |   #FEATURENORMALIZE Normalizes the features in X 
 3 |   #   FEATURENORMALIZE(X) returns a normalized version of X where
 4 |   #   the mean value of each feature is 0 and the standard deviation
 5 |   #   is 1. This is often a good preprocessing step to do when
 6 |   #   working with learning algorithms.
 7 |   
 8 |   mu <- colMeans(X)
 9 |   #vectorized multivariate apply
10 |   X_norm <- matrix(mapply(`-`,t(X),mu),dim(X) ,byrow = TRUE)
11 |   
12 |   sigma <- apply(X,2,sd)
13 |   X_norm <- matrix(mapply(`/`,t(X_norm),sigma),dim(X) ,byrow = TRUE)
14 |   
15 |   list(X_norm=X_norm, mu=mu, sigma=sigma)
16 | }
17 | 


--------------------------------------------------------------------------------
/5/learningCurve-solution.r:
--------------------------------------------------------------------------------
 1 | learningCurve  <- function(X, y, Xval, yval, lambda) {
 2 |   #LEARNINGCURVE Generates the train and cross validation set errors needed
 3 |   #to plot a learning curve
 4 |   #   error_train_error_val <- ...
 5 |   #       LEARNINGCURVE(X, y, Xval, yval, lambda) returns the train and
 6 |   #       cross validation set errors for a learning curve. In particular,
 7 |   #       it returns two vectors of the same length - error_train and
 8 |   #       error_val. Then, error_train[i] contains the training error for
 9 |   #       i examples (and similarly for error_val[i]).
10 |   #
11 |   #   In this function, you will compute the train and test errors for
12 |   #   dataset sizes from 1 up to m. In practice, when working with larger
13 |   #   datasets, you might want to do this in larger intervals.
14 |   #
15 |   
16 |   # Number of training examples
17 |   m <- dim(X)[1]
18 |   
19 |   # You need to return these values correctly as a list
20 |   error_train <- rep(0,m)
21 |   error_val   <- rep(0,m)
22 |   
23 |   # ---------------------- YOUR CODE HERE ----------------------
24 |   # Instructions: Fill in this function to return training errors in
25 |   #               error_train and the cross validation errors in error_val.
26 |   #               The vector numex_vec contains the number of training
27 |   #               examples to use for each calculation of training error and
28 |   #               cross validation error, i.e, error_train[i] and
29 |   #               error_val[i] should give you the errors
30 |   #               obtained after training on i examples.
31 |   #
32 |   # Note: You should evaluate the training error on the first i training
33 |   #       examples (i.e., X[1:i, ] and y[1:i]).
34 |   #
35 |   #       For the cross-validation error, you should instead evaluate on
36 |   #       the _entire_ cross validation set (Xval and yval).
37 |   #
38 |   # Note: If you are using your cost function (linearRegCostFunction)
39 |   #       to compute the training and cross validation error, you should
40 |   #       call the function with the lambda argument set to 0.
41 |   #       Do note that you will still need to use lambda when running
42 |   #       the training to obtain the theta parameters.
43 |   #
44 |   # Hint: You can loop over the examples with the following:
45 |   #
46 |   #       for (i in 1:m){
47 |   #           # Compute train/cross validation errors using training examples
48 |   #           # X[1:i, ] and y[1:i], storing the result in
49 |   #           # error_train[i] and error_val[i]
50 |   #           ....
51 |   #
52 |   #       }
53 |   #
54 |   
55 |   for (i in 1:m) {
56 |     theta <- trainLinearReg(X[1:i,], y[1:i,], lambda)
57 |     
58 |     #For training set
59 |     h <- X[1:i,] %*% theta
60 |     error_train[i] <- (1 / (2 * i)) * sum((h - y[1:i,]) ^ 2) #cost
61 |     
62 |     #For CV set
63 |     h <- Xval %*% theta
64 |     error_val[i] <- (1 / (2 * m)) * sum((h - yval) ^ 2)
65 |   }
66 |   
67 |   list(error_train = error_train, error_val = error_val)
68 |   # --------------------------------------------------------------
69 | }
70 | 


--------------------------------------------------------------------------------
/5/learningCurve.r:
--------------------------------------------------------------------------------
 1 | learningCurve  <- function(X, y, Xval, yval, lambda) {
 2 |   #LEARNINGCURVE Generates the train and cross validation set errors needed
 3 |   #to plot a learning curve
 4 |   #   error_train_error_val <- ...
 5 |   #       LEARNINGCURVE(X, y, Xval, yval, lambda) returns the train and
 6 |   #       cross validation set errors for a learning curve. In particular,
 7 |   #       it returns two vectors of the same length - error_train and
 8 |   #       error_val. Then, error_train[i] contains the training error for
 9 |   #       i examples (and similarly for error_val[i]).
10 |   #
11 |   #   In this function, you will compute the train and test errors for
12 |   #   dataset sizes from 1 up to m. In practice, when working with larger
13 |   #   datasets, you might want to do this in larger intervals.
14 |   #
15 |   
16 |   # Number of training examples
17 |   m <- dim(X)[1]
18 |   
19 |   # You need to return these values correctly as a list
20 |   error_train <- rep(0,m)
21 |   error_val   <- rep(0,m)
22 |   
23 |   # ---------------------- YOUR CODE HERE ----------------------
24 |   # Instructions: Fill in this function to return training errors in
25 |   #               error_train and the cross validation errors in error_val.
26 |   #               The vector numex_vec contains the number of training
27 |   #               examples to use for each calculation of training error and
28 |   #               cross validation error, i.e, error_train[i] and
29 |   #               error_val[i] should give you the errors
30 |   #               obtained after training on i examples.
31 |   #
32 |   # Note: You should evaluate the training error on the first i training
33 |   #       examples (i.e., X[1:i, ] and y[1:i]).
34 |   #
35 |   #       For the cross-validation error, you should instead evaluate on
36 |   #       the _entire_ cross validation set (Xval and yval).
37 |   #
38 |   # Note: If you are using your cost function (linearRegCostFunction)
39 |   #       to compute the training and cross validation error, you should
40 |   #       call the function with the lambda argument set to 0.
41 |   #       Do note that you will still need to use lambda when running
42 |   #       the training to obtain the theta parameters.
43 |   #
44 |   # Hint: You can loop over the examples with the following:
45 |   #
46 |   #       for (i in 1:m){
47 |   #           # Compute train/cross validation errors using training examples
48 |   #           # X[1:i, ] and y[1:i], storing the result in
49 |   #           # error_train[i] and error_val[i]
50 |   #           ....
51 |   #
52 |   #       }
53 |   #
54 |   
55 |   
56 |   
57 |   list(error_train = error_train, error_val = error_val)
58 |   # --------------------------------------------------------------
59 | }
60 | 


--------------------------------------------------------------------------------
/5/linearRegCostFunction-solution.r:
--------------------------------------------------------------------------------
 1 | linearRegCostFunction <- function(X, y, lambda) {
 2 |   #linearRegCostFunction Compute cost for regularized linear
 3 |   #regression with multiple variables
 4 |   #   J <- linearRegCostFunction(X, y, lambda)(theta) computes the
 5 |   #   cost of using theta as the parameter for linear regression to fit the
 6 |   #   data points in X and y. Returns the cost in J
 7 |   
 8 |   function(theta) {
 9 |     # Initialize some useful values
10 |     m <- length(y) # number of training examples
11 |     
12 |     # You need to return the following variables correctly
13 |     J <- 0
14 |     # ----------------------- YOUR CODE HERE -----------------------
15 |     # Instructions: Compute the cost of regularized linear
16 |     #               regression for a particular choice of theta.
17 |     #
18 |     #               You should set J to the cost.
19 |     #
20 |     h = X %*% theta
21 |     thetas <- theta[-1]
22 |     J <- 1 / (2 * m) * sum((h - y) ^ 2) + (lambda / (2 * m)) * sum(thetas ^ 2)
23 |     J
24 |     # --------------------------------------------------------------
25 |   }
26 | }
27 | 
28 | linearRegGradFunction <- function(X, y, lambda) {
29 |   #linearRegGradFunction Compute gradient for regularized linear
30 |   #regression with multiple variables
31 |   #   grad <- linearRegGradFunction(X, y, lambda)(theta) computes the
32 |   #   gradient of using theta as the parameter for linear regression to fit the
33 |   #   data points in X and y. Returns the gradient in grad
34 |   
35 |   function(theta) {
36 |     # Initialize some useful values
37 |     m <- length(y) # number of training examples
38 |     
39 |     # You need to return the following variables correctly
40 |     grad <- rep(0,length(theta))
41 |     # ---------------------- YOUR CODE HERE ----------------------
42 |     # Instructions: Compute the gradient of regularized linear
43 |     #               regression for a particular choice of theta.
44 |     #
45 |     #               You should set grad to the gradient.
46 |     #
47 |     h = X %*% theta
48 |     
49 |     # Calculate gradient
50 |     temp <- theta
51 |     temp[1] <- 0
52 |     grad <- (1 / m) * (t(X) %*% (h - y) + lambda * temp)
53 |     grad
54 |     # --------------------------------------------------------------
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/5/linearRegCostFunction.r:
--------------------------------------------------------------------------------
 1 | linearRegCostFunction <- function(X, y, lambda) {
 2 |   #linearRegCostFunction Compute cost for regularized linear
 3 |   #regression with multiple variables
 4 |   #   J <- linearRegCostFunction(X, y, lambda)(theta) computes the
 5 |   #   cost of using theta as the parameter for linear regression to fit the
 6 |   #   data points in X and y. Returns the cost in J
 7 |   
 8 |   function(theta) {
 9 |     # Initialize some useful values
10 |     m <- length(y) # number of training examples
11 |     
12 |     # You need to return the following variables correctly
13 |     J <- 0
14 |     # ----------------------- YOUR CODE HERE -----------------------
15 |     # Instructions: Compute the cost of regularized linear
16 |     #               regression for a particular choice of theta.
17 |     #
18 |     #               You should set J to the cost.
19 |     #
20 |     h = X %*% theta
21 |     thetas <- theta[-1]
22 |     J <-
23 |       1 / (2 * m) * sum((h - y) ^ 2) + (lambda / (2 * m)) * sum(theta ^ 2)
24 |     J
25 |     # --------------------------------------------------------------
26 |   }
27 | }
28 | 
29 | linearRegGradFunction <- function(X, y, lambda) {
30 |   #linearRegGradFunction Compute gradient for regularized linear
31 |   #regression with multiple variables
32 |   #   grad <- linearRegGradFunction(X, y, lambda)(theta) computes the
33 |   #   gradient of using theta as the parameter for linear regression to fit the
34 |   #   data points in X and y. Returns the gradient in grad
35 |   
36 |   function(theta) {
37 |     # Initialize some useful values
38 |     m <- length(y) # number of training examples
39 |     
40 |     # You need to return the following variables correctly
41 |     grad <- rep(0,length(theta))
42 |     # ---------------------- YOUR CODE HERE ----------------------
43 |     # Instructions: Compute the gradient of regularized linear
44 |     #               regression for a particular choice of theta.
45 |     #
46 |     #               You should set grad to the gradient.
47 |     #
48 |     
49 | 
50 |     grad
51 |     # --------------------------------------------------------------
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/5/plotFit.r:
--------------------------------------------------------------------------------
 1 | plotFit <- function (min_x, max_x, mu, sigma, theta, p) {
 2 |   #PLOTFIT Plots a learned polynomial regression fit over an existing figure.
 3 |   #Also works with linear regression.
 4 |   #   PLOTFIT(min_x, max_x, mu, sigma, theta, p) plots the learned polynomial
 5 |   #   fit with power p and feature normalization (mu, sigma).
 6 |   
 7 |   # We plot a range slightly bigger than the min and max values to get
 8 |   # an idea of how the fit will vary outside the range of the data points
 9 |   x <- seq(min_x - 15, max_x + 25, 0.05)
10 |   
11 |   # Map the X values
12 |   X_poly <- polyFeatures(x, p)
13 |   X_poly <- matrix(mapply(`-`,t(X_poly),mu),dim(X_poly) ,byrow = TRUE)
14 |   X_poly <- matrix(mapply(`/`,t(X_poly),sigma),dim(X_poly) ,byrow = TRUE)
15 |   
16 |   # Add ones
17 |   X_poly <-  cbind(rep(1,dim(X_poly)[1]), X_poly)
18 |   plot(x, X_poly %*% theta, lwd = 2,type = 'l', lty = 2, col = "blue",
19 |     xlab = "change in water level (x)", ylab = "water flowing out of the dam (y)")
20 |   mtext(sprintf('Polynomial Regression Fit\n (lambda = %3.3f)', lambda))
21 | }
22 | 


--------------------------------------------------------------------------------
/5/polyFeatures-solution.r:
--------------------------------------------------------------------------------
 1 | polyFeatures  <- function(X, p) {
 2 |   #POLYFEATURES Maps X (1D vector) into the p-th power
 3 |   #   [X_poly] <- POLYFEATURES(X, p) takes a data matrix X (size m x 1) and
 4 |   #   maps each example into its polynomial features where
 5 |   #   X_poly[i, ] <- [X[i] X[i].^2 X[i].^3 ...  X[i].^p]
 6 |   #
 7 |   
 8 |   
 9 |   # You need to return the following variables correctly.
10 |   X_poly <-  matrix(0,length(X), p)
11 |   
12 |   # ----------------------- YOUR CODE HERE -----------------------
13 |   # Instructions: Given a vector X, return a matrix X_poly where the p-th
14 |   #               column of X contains the values of X to the p-th power.
15 |   #
16 |   #
17 |   for (i in 1:p)
18 |     X_poly[,i] <- X ^ i
19 |   
20 |   X_poly
21 |   # --------------------------------------------------------------
22 | }


--------------------------------------------------------------------------------
/5/polyFeatures.r:
--------------------------------------------------------------------------------
 1 | polyFeatures  <- function(X, p) {
 2 |   #POLYFEATURES Maps X (1D vector) into the p-th power
 3 |   #   [X_poly] <- POLYFEATURES(X, p) takes a data matrix X (size m x 1) and
 4 |   #   maps each example into its polynomial features where
 5 |   #   X_poly[i, ] <- [X[i] X[i].^2 X[i].^3 ...  X[i].^p]
 6 |   #
 7 |   
 8 |   
 9 |   # You need to return the following variables correctly.
10 |   X_poly <-  matrix(0,length(X), p)
11 |   
12 |   # ----------------------- YOUR CODE HERE -----------------------
13 |   # Instructions: Given a vector X, return a matrix X_poly where the p-th
14 |   #               column of X contains the values of X to the p-th power.
15 |   #
16 |   #
17 |   
18 |   X_poly
19 |   # --------------------------------------------------------------
20 | }


--------------------------------------------------------------------------------
/5/submit.r:
--------------------------------------------------------------------------------
 1 | submit = function(){
 2 |   source("../lib/submitWithConfiguration.r")
 3 |   
 4 |   conf = list()
 5 |   conf$assignmentSlug = 'regularized-linear-regression-and-bias-variance';
 6 |   conf$itemName = 'Regularized Linear Regression and Bias/Variance';
 7 |   conf$partArrays = c(
 8 |     '1', 
 9 |     'linearRegCostFunction.r' , 
10 |     'Regularized Linear Regression Cost Function', 
11 |     '2', 
12 |     'linearRegCostFunction.r' , 
13 |     'Regularized Linear Regression Gradient', 
14 |     '3', 
15 |     'learningCurve.r' , 
16 |     'Learning Curve', 
17 |     '4', 
18 |     'polyFeatures.r' , 
19 |     'Polynomial Feature Mapping', 
20 |     '5', 
21 |     'validationCurve.r' , 
22 |     'Validation Curve')
23 |   conf$partArrays = matrix(conf$partArrays, ncol =  3 , byrow = T)
24 |   
25 |   conf$output = output;
26 | 
27 |   submitWithConfiguration(conf);
28 | }
29 | 
30 | output = function(partId, auxstring) {
31 |   source("linearRegCostFunction.r")
32 |   source("learningCurve.r")
33 |   source("polyFeatures.r")
34 |   source("validationCurve.r")
35 |   source("trainLinearReg.r")
36 |   
37 |   # Random Test Cases
38 |   X = cbind(rep(1,10), sin(seq(1,15,1.5)), cos(seq(1,15,1.5)) )
39 |   y = sin(seq(1,30,3))
40 |   Xval = cbind(rep(1,10), sin(seq(0,14,1.5)) , cos(seq(0,14,1.5)) )
41 |   yval = sin(seq(1,10))
42 |   
43 |   theta1 = c(0.1, 0.2, 0.3)
44 |   if (partId == '1')
45 |     out = sprintf('%0.5f ', linearRegCostFunction(X, y, 0.5)(theta1))
46 |   else if (partId == '2')
47 |     out = paste0(sprintf('%0.5f ', linearRegGradFunction(X, y, 0.5)(theta1)), collapse = '')
48 |   else if (partId == '3'){
49 |     lc = learningCurve(X, as.matrix(y), Xval, as.matrix(yval), 1)
50 |     merg = c(lc$error_train, lc$error_val)
51 |     out = paste0(sprintf('%0.5f ', merg), collapse = '')
52 |   }else if (partId == '4')
53 |     out = paste0(sprintf('%0.5f ', c(polyFeatures(X[2,], 8))), collapse = '')
54 |   else if (partId == '5'){
55 |     vc = validationCurve(X, y, Xval, yval)
56 |     merg = c(vc[[1]],vc[[2]],vc[[3]])
57 |     out = paste0(sprintf('%0.5f ', merg ), collapse = '')
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/5/trainLinearReg.r:
--------------------------------------------------------------------------------
 1 | trainLinearReg <- function(X, y, lambda) {
 2 |   #TRAINLINEARREG Trains linear regression given a dataset (X, y) and a
 3 |   #regularization parameter lambda
 4 |   #   theta <- TRAINLINEARREG (X, y, lambda) trains linear regression using
 5 |   #   the dataset (X, y) and regularization parameter lambda. Returns the
 6 |   #   trained parameters theta.
 7 |   #
 8 |   
 9 |   # Initialize Theta
10 |   if (class(X) == "numeric")
11 |     X <- t(X)
12 |   
13 |   initial_theta <- rep(0,dim(X)[2])
14 |   optimRes <- optim(par = initial_theta, fn = linearRegCostFunction(X,y,lambda),
15 |                     gr = linearRegGradFunction(X,y,lambda), method = "BFGS")
16 |   optimRes$par
17 | }
18 | 


--------------------------------------------------------------------------------
/5/validationCurve-solution.r:
--------------------------------------------------------------------------------
 1 | validationCurve <- function(X, y, Xval, yval) {
 2 |   #VALIDATIONCURVE Generate the train and validation errors needed to
 3 |   #plot a validation curve that we can use to select lambda
 4 |   #   [lambda_vec, error_train, error_val] <- ...
 5 |   #       VALIDATIONCURVE(X, y, Xval, yval) returns the train
 6 |   #       and validation errors (in error_train, error_val)
 7 |   #       for different values of lambda. You are given the training set (X,
 8 |   #       y) and validation set (Xval, yval).
 9 |   #
10 |   
11 |   # Selected values of lambda (you should not change this)
12 |   lambda_vec <- c(0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10)
13 |   
14 |   # You need to return these variables correctly.
15 |   error_train <- rep(0, length(lambda_vec))
16 |   error_val <- rep(0,length(lambda_vec))
17 |   
18 |   # ----------------------- YOUR CODE HERE -----------------------
19 |   # Instructions: Fill in this function to return training errors in
20 |   #               error_train and the validation errors in error_val. The
21 |   #               vector lambda_vec contains the different lambda parameters
22 |   #               to use for each calculation of the errors, i.e,
23 |   #               error_train[i], and error_val[i] should give
24 |   #               you the errors obtained after training with
25 |   #               lambda <- lambda_vec[i]
26 |   #
27 |   # Note: You can loop over lambda_vec with the following:
28 |   #
29 |   #       for (i in 1:length(lambda_vec)) {
30 |   #           lambda <- lambda_vec[i]
31 |   #           # Compute train / val errors when training linear
32 |   #           # regression with regularization parameter lambda
33 |   #           # You should store the result in error_train[i]
34 |   #           # and error_val[i]
35 |   #           ....
36 |   #
37 |   #       }
38 |   #
39 |   #
40 |   
41 |   for (i in 1:length(lambda_vec)) {
42 |     lambda <- lambda_vec[i]
43 |     theta <- trainLinearReg(X, y, lambda)
44 |     
45 |     # Set to zero when calculating error since already been trained
46 |     lambda <- 0
47 |     # We are calc J as error not as cost. So, lambda should not be included when
48 |     # calculating error for thetas that have been trained, else it will be biased
49 |     # Refer to 2.1 of Exercise 5, error is computed without lambda
50 |     e_train <- linearRegCostFunction(X, y, lambda)(theta)
51 |     e_val <- linearRegCostFunction(Xval, yval, lambda)(theta)	# J over all CV set for new set of theta
52 |     
53 |     # Accumulating error from i=1:m
54 |     if (i == 1)
55 |     {
56 |       error_train <- e_train
57 |       error_val <- e_val
58 |     }
59 |     else
60 |     {
61 |       error_train <- rbind(error_train, e_train)
62 |       error_val <- rbind(error_val, e_val)
63 |     }
64 |   }
65 |   
66 |   list(lambda_vec = lambda_vec, error_train = error_train, error_val = error_val)
67 |   # ----------------------------------------------------------------------------
68 | }
69 | 


--------------------------------------------------------------------------------
/5/validationCurve.r:
--------------------------------------------------------------------------------
 1 | validationCurve <- function(X, y, Xval, yval) {
 2 |   #VALIDATIONCURVE Generate the train and validation errors needed to
 3 |   #plot a validation curve that we can use to select lambda
 4 |   #   [lambda_vec, error_train, error_val] <- ...
 5 |   #       VALIDATIONCURVE(X, y, Xval, yval) returns the train
 6 |   #       and validation errors (in error_train, error_val)
 7 |   #       for different values of lambda. You are given the training set (X,
 8 |   #       y) and validation set (Xval, yval).
 9 |   #
10 |   
11 |   # Selected values of lambda (you should not change this)
12 |   lambda_vec <- c(0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10)
13 |   
14 |   # You need to return these variables correctly.
15 |   error_train <- rep(0, length(lambda_vec))
16 |   error_val <- rep(0,length(lambda_vec))
17 |   
18 |   # ----------------------- YOUR CODE HERE -----------------------
19 |   # Instructions: Fill in this function to return training errors in
20 |   #               error_train and the validation errors in error_val. The
21 |   #               vector lambda_vec contains the different lambda parameters
22 |   #               to use for each calculation of the errors, i.e,
23 |   #               error_train[i], and error_val[i] should give
24 |   #               you the errors obtained after training with
25 |   #               lambda <- lambda_vec[i]
26 |   #
27 |   # Note: You can loop over lambda_vec with the following:
28 |   #
29 |   #       for (i in 1:length(lambda_vec)) {
30 |   #           lambda <- lambda_vec[i]
31 |   #           # Compute train / val errors when training linear
32 |   #           # regression with regularization parameter lambda
33 |   #           # You should store the result in error_train[i]
34 |   #           # and error_val[i]
35 |   #           ....
36 |   #
37 |   #       }
38 |   #
39 |   #
40 |   
41 |   
42 |   
43 |   list(lambda_vec = lambda_vec, error_train = error_train, error_val = error_val)
44 |   # ----------------------------------------------------------------------------
45 | }
46 | 


--------------------------------------------------------------------------------
/6/dataset3Params-solution.r:
--------------------------------------------------------------------------------
 1 | dataset3Params <- function(X, y, Xval, yval) {
 2 |   #EX6PARAMS returns your choice of C and sigma for Part 3 of the exercise
 3 |   #where you select the optimal (C, sigma) learning parameters to use for SVM
 4 |   #with RBF kernel
 5 |   #   C_sigma <- EX6PARAMS(X, y, Xval, yval) returns your choice of C and
 6 |   #   sigma. You should complete this function to return the optimal C and
 7 |   #   sigma based on a cross-validation set.
 8 |   #
 9 |   
10 |   # You need to return the following variables correctly.
11 |   C <- 1
12 |   sigma <- 0.3
13 |   
14 |   # ----------------------- YOUR CODE HERE -----------------------
15 |   # Instructions: Fill in this function to return the optimal C and sigma
16 |   #               learning parameters found using the cross validation set.
17 |   #               You can use svmPredict to predict the labels on the cross
18 |   #               validation set. For example,
19 |   #                   predictions <- svmPredict(model, Xval)
20 |   #               will return the predictions on the cross validation set.
21 |   #
22 |   #  Note: You can compute the prediction error using
23 |   #        mean(predictions != yval)
24 |   #
25 |   C_try <-    c(0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30)
26 |   sigma_try <- c(0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30)
27 |   error <- rep(0,length(sigma_try))
28 |   
29 |   for (i in 1:length(C_try)) {
30 |     C <- C_try[i]
31 |     for (j in 1:length(sigma_try)) {
32 |       sigma <- sigma_try[j]
33 |       model <- svmTrain(X, y, C, gaussianKernel(sigma))
34 |       predictions <- svmPredict(model,Xval)
35 |       
36 |       error[j] <- mean(predictions != yval)
37 |       
38 |       # Accumulating error from i <- 1:m
39 |       if (j == 1)
40 |         error_j <- error[j]
41 |       else
42 |         error_j <- rbind(error_j, error[j])
43 |     }
44 |     
45 |     if (i == 1)
46 |       error_i <- error_j
47 |     else
48 |       error_i <- cbind(error_i, error_j)
49 |   }
50 |   
51 |   # Display the error_i in matrix
52 |   print(error_i)
53 |   
54 |   # Locate the index of the minimum value in matrix error_i
55 |   rowcol <- which(error_i == min(error_i),arr.ind = TRUE)
56 |   # Optimum C and sigma are identified from the matrix above
57 |   C <- C_try[rowcol[2]]
58 |   sigma <- sigma_try[rowcol[1]]
59 |   
60 |   list(C=C, sigma = sigma)
61 | }
62 | 


--------------------------------------------------------------------------------
/6/dataset3Params.r:
--------------------------------------------------------------------------------
 1 | dataset3Params <- function(X, y, Xval, yval) {
 2 |   #EX6PARAMS returns your choice of C and sigma for Part 3 of the exercise
 3 |   #where you select the optimal (C, sigma) learning parameters to use for SVM
 4 |   #with RBF kernel
 5 |   #   C_sigma <- EX6PARAMS(X, y, Xval, yval) returns your choice of C and
 6 |   #   sigma. You should complete this function to return the optimal C and
 7 |   #   sigma based on a cross-validation set.
 8 |   #
 9 |   
10 |   # You need to return the following variables correctly.
11 |   C <- 1
12 |   sigma <- 0.3
13 |   
14 |   # ----------------------- YOUR CODE HERE -----------------------
15 |   # Instructions: Fill in this function to return the optimal C and sigma
16 |   #               learning parameters found using the cross validation set.
17 |   #               You can use svmPredict to predict the labels on the cross
18 |   #               validation set. For example,
19 |   #                   predictions <- svmPredict(model, Xval)
20 |   #               will return the predictions on the cross validation set.
21 |   #
22 |   #  Note: You can compute the prediction error using
23 |   #        mean(predictions != yval)
24 |   #
25 |   
26 |   
27 |   list(C=C, sigma = sigma)
28 |   # --------------------------------------------------------------------------
29 |   
30 | }
31 | 


--------------------------------------------------------------------------------
/6/emailFeatures-solution.r:
--------------------------------------------------------------------------------
 1 | emailFeatures <- function(word_indices) {
 2 | #EMAILFEATURES takes in a word_indices vector and produces a feature vector
 3 | #from the word indices
 4 | #   x <- EMAILFEATURES(word_indices) takes in a word_indices vector and 
 5 | #   produces a feature vector from the word indices. 
 6 | 
 7 | # Total number of words in the dictionary
 8 | n <- 1899
 9 | 
10 | # You need to return the following variables correctly.
11 | x <- rep(0,n)
12 | # ----------------------- YOUR CODE HERE -----------------------
13 | # Instructions: Fill in this function to return a feature vector for the
14 | #               given email (word_indices). To help make it easier to 
15 | #               process the emails, we have have already pre-processed each
16 | #               email and converted each word in the email into an index in
17 | #               a fixed dictionary (of 1899 words). The variable
18 | #               word_indices contains the list of indices of the words
19 | #               which occur in one email.
20 | # 
21 | #               Concretely, if an email has the text:
22 | #
23 | #                  The quick brown fox jumped over the lazy dog.
24 | #
25 | #               Then, the word_indices vector for this text might look 
26 | #               like:
27 | #               
28 | #                   60  100   33   44   10     53  60  58   5
29 | #
30 | #               where, we have mapped each word onto a number, for example:
31 | #
32 | #                   the   -- 60
33 | #                   quick -- 100
34 | #                   ...
35 | #
36 | #              (note: the above numbers are just an example and are not the
37 | #               actual mappings).
38 | #
39 | #              Your task is take one such word_indices vector and construct
40 | #              a binary feature vector that indicates whether a particular
41 | #              word occurs in the email. That is, x[i] <- 1 when word i
42 | #              is present in the email. Concretely, if the word 'the' (say,
43 | #              index 60) appears in the email, then x[60] <- 1. The feature
44 | #              vector should look like:
45 | #
46 | #              x <- [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..]
47 | #
48 | #
49 | 
50 | for (i in 1:length(word_indices))
51 |   x[word_indices[i]] <- 1
52 | 
53 | # --------------------------------------------------------------------------
54 | x
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/6/emailFeatures.r:
--------------------------------------------------------------------------------
 1 | emailFeatures <- function(word_indices) {
 2 | #EMAILFEATURES takes in a word_indices vector and produces a feature vector
 3 | #from the word indices
 4 | #   x <- EMAILFEATURES(word_indices) takes in a word_indices vector and 
 5 | #   produces a feature vector from the word indices. 
 6 | 
 7 | # Total number of words in the dictionary
 8 | n <- 1899
 9 | 
10 | # You need to return the following variables correctly.
11 | x <- rep(0,n)
12 | # ----------------------- YOUR CODE HERE -----------------------
13 | # Instructions: Fill in this function to return a feature vector for the
14 | #               given email (word_indices). To help make it easier to 
15 | #               process the emails, we have have already pre-processed each
16 | #               email and converted each word in the email into an index in
17 | #               a fixed dictionary (of 1899 words). The variable
18 | #               word_indices contains the list of indices of the words
19 | #               which occur in one email.
20 | # 
21 | #               Concretely, if an email has the text:
22 | #
23 | #                  The quick brown fox jumped over the lazy dog.
24 | #
25 | #               Then, the word_indices vector for this text might look 
26 | #               like:
27 | #               
28 | #                   60  100   33   44   10     53  60  58   5
29 | #
30 | #               where, we have mapped each word onto a number, for example:
31 | #
32 | #                   the   -- 60
33 | #                   quick -- 100
34 | #                   ...
35 | #
36 | #              (note: the above numbers are just an example and are not the
37 | #               actual mappings).
38 | #
39 | #              Your task is take one such word_indices vector and construct
40 | #              a binary feature vector that indicates whether a particular
41 | #              word occurs in the email. That is, x[i] <- 1 when word i
42 | #              is present in the email. Concretely, if the word 'the' (say,
43 | #              index 60) appears in the email, then x[60] <- 1. The feature
44 | #              vector should look like:
45 | #
46 | #              x <- [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..]
47 | #
48 | #
49 | 
50 | 
51 | # --------------------------------------------------------------------------
52 | x
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/6/emailSample1.txt:
--------------------------------------------------------------------------------
 1 | > Anyone knows how much it costs to host a web portal ?
 2 | >
 3 | Well, it depends on how many visitors you're expecting.
 4 | This can be anywhere from less than 10 bucks a month to a couple of $100. 
 5 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 
 6 | if youre running something big..
 7 | 
 8 | To unsubscribe yourself from this mailing list, send an email to:
 9 | groupname-unsubscribe@egroups.com
10 | 
11 | 


--------------------------------------------------------------------------------
/6/emailSample2.txt:
--------------------------------------------------------------------------------
 1 | Folks,
 2 |  
 3 | my first time posting - have a bit of Unix experience, but am new to Linux.
 4 | 
 5 |  
 6 | Just got a new PC at home - Dell box with Windows XP. Added a second hard disk
 7 | for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went
 8 | fine except it didn't pick up my monitor.
 9 |  
10 | I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4
11 | Ti4200 video card, both of which are probably too new to feature in Suse's default
12 | set. I downloaded a driver from the nVidia website and installed it using RPM.
13 | Then I ran Sax2 (as was recommended in some postings I found on the net), but
14 | it still doesn't feature my video card in the available list. What next?
15 |  
16 | Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice,
17 | the whole machine crashes (in Linux, not Windows) - even the on/off switch is
18 | inactive, leaving me to reach for the power cable instead.
19 |  
20 | If anyone can help me in any way with these probs., I'd be really grateful -
21 | I've searched the 'net but have run out of ideas.
22 |  
23 | Or should I be going for a different version of Linux such as RedHat? Opinions
24 | welcome.
25 |  
26 | Thanks a lot,
27 | Peter
28 | 
29 | -- 
30 | Irish Linux Users' Group: ilug@linux.ie
31 | http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information.
32 | List maintainer: listmaster@linux.ie
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/6/ex6.rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/6/ex6data1.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/6/ex6data1.rda


--------------------------------------------------------------------------------
/6/ex6data2.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/6/ex6data2.rda


--------------------------------------------------------------------------------
/6/ex6data3.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/6/ex6data3.rda


--------------------------------------------------------------------------------
/6/gaussianKernel-solution.r:
--------------------------------------------------------------------------------
 1 | gaussianKernel <- function(sigma) {
 2 |   #RBFKERNEL returns a radial basis function kernel between x1 and x2
 3 |   #   sim <- gaussianKernel(sigma) returns a gaussian kernel function between x1 and x2
 4 |   #   and gaussianKernel(sigma)(x1, x2) returns the value in sim
 5 |   function(x1, x2) {
 6 |     # Ensure that x1 and x2 are column vectors
 7 |     x1 <- c(x1)
 8 |     x2 <- c(x2)
 9 |     
10 |     # You need to return the following variables correctly.
11 |     sim <- 0
12 |     
13 |     # ----------------------- YOUR CODE HERE -----------------------
14 |     # Instructions: Fill in this function to return the similarity between x1
15 |     #               and x2 computed using a Gaussian kernel with bandwidth
16 |     #               sigma
17 |     #
18 |     #
19 |     sim <- exp(-(sum((x1 - x2) ^ 2)) / (2 * sigma ^ 2))
20 |     sim
21 |   }
22 |   # --------------------------------------------------------------
23 |   
24 | }
25 | 


--------------------------------------------------------------------------------
/6/gaussianKernel.r:
--------------------------------------------------------------------------------
 1 | gaussianKernel <- function(sigma) {
 2 |   #RBFKERNEL returns a radial basis function kernel between x1 and x2
 3 |   #   sim <- gaussianKernel(sigma) returns a gaussian kernel function between x1 and x2
 4 |   #   and gaussianKernel(sigma)(x1, x2) returns the value in sim
 5 |   function(x1, x2) {
 6 |     # Ensure that x1 and x2 are column vectors
 7 |     x1 <- c(x1)
 8 |     x2 <- c(x2)
 9 |     
10 |     # You need to return the following variables correctly.
11 |     sim <- 0
12 |     
13 |     # ----------------------- YOUR CODE HERE -----------------------
14 |     # Instructions: Fill in this function to return the similarity between x1
15 |     #               and x2 computed using a Gaussian kernel with bandwidth
16 |     #               sigma
17 |     #
18 |     #
19 | 
20 |     
21 |     sim
22 |   }
23 |   # --------------------------------------------------------------
24 |   
25 | }
26 | 


--------------------------------------------------------------------------------
/6/getVocabList.r:
--------------------------------------------------------------------------------
 1 | getVocabList  <- function() {
 2 |   #GETVOCABLIST reads the fixed vocabulary list in vocab.txt and returns a
 3 |   #cell array of the words
 4 |   #   vocabList <- GETVOCABLIST() reads the fixed vocabulary list in vocab.txt
 5 |   #   and returns a cell array of the words in vocabList.
 6 |   
 7 |   ## Read the fixed vocabulary list
 8 |   vocabList <- read.table('vocab.txt',sep = '\t',stringsAsFactors = FALSE)[,2]
 9 |   vocabList
10 | }
11 | 


--------------------------------------------------------------------------------
/6/linearKernel.r:
--------------------------------------------------------------------------------
 1 | linearKernel  <- function(x1, x2) {
 2 |   #LINEARKERNEL returns a linear kernel between x1 and x2
 3 |   #   sim <- linearKernel(x1, x2) returns a linear kernel between x1 and x2
 4 |   #   and returns the value in sim
 5 |   
 6 |   # Ensure that x1 and x2 are column vectors
 7 |   x1 <- c(x1)
 8 |   x2 <- c(x2)
 9 |   
10 |   # Compute the kernel
11 |   sim <- t(x1) %*% x2
12 |   sim
13 | }


--------------------------------------------------------------------------------
/6/plotData.r:
--------------------------------------------------------------------------------
 1 | plotData <- function (X, y) {
 2 |   #PLOTDATA Plots the data points X and y into a new figure
 3 |   #   PLOTDATA(x,y) plots the data points with + for the positive examples
 4 |   #   and o for the negative examples. X is assumed to be a Mx2 matrix.
 5 |   #
 6 |   # Note: This was slightly modified such that it expects y <- 1 or y <- 0
 7 |   
 8 |   # Find Indices of Positive and Negative Examples
 9 |   symbolss <- c(21,3) #plus and fillded circle
10 |   yfac <- factor(y)
11 |   plot(X[,1],X[,2], pch = symbolss[yfac] ,bg = "yellow", lwd = 1.3)
12 | }
13 | 


--------------------------------------------------------------------------------
/6/processEmail-solution.r:
--------------------------------------------------------------------------------
  1 | processEmail <- function(email_contents) {
  2 |   #PROCESSEMAIL preprocesses a the body of an email and
  3 |   #returns a list of word_indices
  4 |   #   word_indices <- PROCESSEMAIL(email_contents) preprocesses
  5 |   #   the body of an email and returns a list of indices of the
  6 |   #   words contained in the email.
  7 |   #
  8 |   
  9 |   # Load Vocabulary
 10 |   vocabList <- getVocabList()
 11 |   
 12 |   # Init return value
 13 |   # word_indices
 14 |   
 15 |   # ---------------------------- Preprocess Email ---------------------------
 16 |   
 17 |   # Find the Headers ( \n\n and remove )
 18 |   # Uncomment the following lines if you are working with raw emails with the
 19 |   # full headers
 20 |   
 21 |   # hdrstart <- strfind(email_contents, ([char(10) char(10)]))
 22 |   # email_contents <- email_contents(hdrstart(1):})
 23 |   
 24 |   # Lower case
 25 |   email_contents <- tolower(email_contents)
 26 |   
 27 |   # Strip all HTML
 28 |   # Looks for any expression that starts with < and ends with > and replace
 29 |   # and does not have any < or > in the tag it with a space
 30 |   email_contents <- gsub('<[^<>]+>', ' ',email_contents)
 31 |   
 32 |   # Handle Numbers
 33 |   # Look for one or more characters between 0-9
 34 |   email_contents <- gsub('[0-9]+', 'number',email_contents)
 35 |   
 36 |   # Handle URLS
 37 |   # Look for strings starting with http:// or https://
 38 |   email_contents <- gsub('(http|https)://[^\\s]*', 'httpaddr',email_contents ,perl = TRUE)
 39 |   
 40 |   # Handle Email Addresses
 41 |   # Look for strings with @ in the middle
 42 |   email_contents <- gsub('[^\\s]+@[^\\s]+', 'emailaddr',email_contents,perl = TRUE)
 43 |   
 44 |   # Handle $ sign
 45 |   email_contents <- gsub('[$]+', 'dollar',email_contents)
 46 |   
 47 |   
 48 |   # ---------------------------- Tokenize Email ---------------------------
 49 |   
 50 |   # Output the email to screen as well
 51 |   cat(sprintf('\n----- Processed Email -----\n\n'))
 52 |   
 53 |   # Process file
 54 |   l <- 0
 55 |   
 56 |   # Tokenize and also get rid of any punctuation
 57 |   splitters <- paste(" @$/#.-:&*+<-[]?!(){},'\">_<;#",
 58 |                      intToUtf8(10), #newline
 59 |                      intToUtf8(13), #tab
 60 |                      sep = '')
 61 |   
 62 |   for (i in 1:nchar(splitters)) {
 63 |     email_contents <- strsplit(email_contents, substr(splitters,i,i),fixed = TRUE)
 64 |     sp <- ""
 65 |     for (i in 1:length(email_contents)) {
 66 |       sp <- c(sp,email_contents[[i]])
 67 |     }
 68 |     email_contents <- sp
 69 |   }
 70 |   # remove empty string
 71 |   email_contents <- email_contents[email_contents != ""]
 72 |   
 73 |   # Remove any non alphanumeric characters
 74 |   email_contents <- gsub('[^a-zA-Z0-9]', '',email_contents)
 75 |   
 76 |   # Stem the word
 77 |   # (the porterStemmer sometimes has issues, so we use a try catch block)
 78 |   
 79 |   # try({str <- porterStemmer(strtrim(str))},
 80 |   # finally={str <- ''})
 81 |   
 82 |   email_contents <- SnowballC::wordStem(email_contents)
 83 |   
 84 |   # Skip the word if it is too short
 85 |   #if (nchar(str) < 1)
 86 |   #   next
 87 |   
 88 |   # Look up the word in the dictionary and add to word_indices if
 89 |   # found
 90 |   # ----------------------- YOUR CODE HERE -----------------------
 91 |   # Instructions: Fill in this function to add the index of str to
 92 |   #               word_indices if it is in the vocabulary. At this point
 93 |   #               of the code, you have a stemmed word from the email in
 94 |   #               the variable str. You should look up str in the
 95 |   #               vocabulary list (vocabList). If a match exists, you
 96 |   #               should add the index of the word to the word_indices
 97 |   #               vector. Concretely, if str <- 'action', then you should
 98 |   #               look up the vocabulary list to find where in vocabList
 99 |   #               'action' appears. For example, if vocabList{18} <-
100 |   #               'action', then, you should add 18 to the word_indices
101 |   #               vector (e.g., word_indices <- [word_indices ; 18]; ).
102 |   #
103 |   # Note: vocabList{idx} returns a the word with index idx in the
104 |   #       vocabulary list.
105 |   #
106 |   # Note: You can use strcmp(str1, str2) to compare two strings (str1 and
107 |   #       str2). It will return 1 only if the two strings are equivalent.
108 |   #
109 |   
110 |   word_indices <- match(email_contents, vocabList)
111 |   word_indices <- word_indices[!is.na(word_indices)]
112 |   
113 |   # --------------------------------------------------------------
114 |   
115 |   cat(email_contents)
116 |   
117 |   # Print footer
118 |   cat(sprintf('\n\n--------------------------\n'))
119 |   
120 |   word_indices
121 | }
122 | 


--------------------------------------------------------------------------------
/6/processEmail.r:
--------------------------------------------------------------------------------
  1 | processEmail <- function(email_contents) {
  2 |   #PROCESSEMAIL preprocesses a the body of an email and
  3 |   #returns a list of word_indices
  4 |   #   word_indices <- PROCESSEMAIL(email_contents) preprocesses
  5 |   #   the body of an email and returns a list of indices of the
  6 |   #   words contained in the email.
  7 |   #
  8 |   
  9 |   # Load Vocabulary
 10 |   vocabList <- getVocabList()
 11 |   
 12 |   # Init return value
 13 |   # word_indices
 14 |   
 15 |   # ---------------------------- Preprocess Email ---------------------------
 16 |   
 17 |   # Find the Headers ( \n\n and remove )
 18 |   # Uncomment the following lines if you are working with raw emails with the
 19 |   # full headers
 20 |   
 21 |   # hdrstart <- strfind(email_contents, ([char(10) char(10)]))
 22 |   # email_contents <- email_contents(hdrstart(1):})
 23 |   
 24 |   # Lower case
 25 |   email_contents <- tolower(email_contents)
 26 |   
 27 |   # Strip all HTML
 28 |   # Looks for any expression that starts with < and ends with > and replace
 29 |   # and does not have any < or > in the tag it with a space
 30 |   email_contents <- gsub('<[^<>]+>', ' ',email_contents)
 31 |   
 32 |   # Handle Numbers
 33 |   # Look for one or more characters between 0-9
 34 |   email_contents <- gsub('[0-9]+', 'number',email_contents)
 35 |   
 36 |   # Handle URLS
 37 |   # Look for strings starting with http:// or https://
 38 |   email_contents <-
 39 |     gsub('(http|https)://[^\\s]*', 'httpaddr',email_contents ,perl = TRUE)
 40 |   
 41 |   # Handle Email Addresses
 42 |   # Look for strings with @ in the middle
 43 |   email_contents <-
 44 |     gsub('[^\\s]+@[^\\s]+', 'emailaddr',email_contents,perl = TRUE)
 45 |   
 46 |   # Handle $ sign
 47 |   email_contents <- gsub('[$]+', 'dollar',email_contents)
 48 |   
 49 |   
 50 |   # ---------------------------- Tokenize Email ---------------------------
 51 |   
 52 |   # Output the email to screen as well
 53 |   cat(sprintf('\n----- Processed Email -----\n\n'))
 54 |   
 55 |   # Process file
 56 |   l <- 0
 57 |   
 58 |   # Tokenize and also get rid of any punctuation
 59 |   splitters <- paste(" @$/#.-:&*+<-[]?!(){},'\">_<;#",
 60 |                      intToUtf8(10), #newline
 61 |                      intToUtf8(13), #tab
 62 |                      sep = '')
 63 |   
 64 |   for (i in 1:nchar(splitters))
 65 |   {
 66 |     email_contents <-
 67 |       strsplit(email_contents, substr(splitters,i,i),fixed = TRUE)
 68 |     sp <- ""
 69 |     for (i in 1:length(email_contents)) {
 70 |       sp <- c(sp,email_contents[[i]])
 71 |     }
 72 |     email_contents <- sp
 73 |   }
 74 |   #remove empty string
 75 |   email_contents <- email_contents[email_contents != ""]
 76 |   
 77 |   # Remove any non alphanumeric characters
 78 |   email_contents <- gsub('[^a-zA-Z0-9]', '',email_contents)
 79 |   
 80 |   # Stem the word
 81 |   # (the porterStemmer sometimes has issues, so we use a try catch block)
 82 |   
 83 |   #try({str <- porterStemmer(strtrim(str))},
 84 |   #finally={str <- ''})
 85 |   
 86 |   email_contents <- SnowballC::wordStem(email_contents)
 87 |   
 88 |   # Skip the word if it is too short
 89 |   #if (nchar(str) < 1)
 90 |   #   next
 91 |   
 92 |   # Look up the word in the dictionary and add to word_indices if
 93 |   # found
 94 |   # ----------------------- YOUR CODE HERE -----------------------
 95 |   # Instructions: Fill in this function to add the index of str to
 96 |   #               word_indices if it is in the vocabulary. At this point
 97 |   #               of the code, you have a stemmed word from the email in
 98 |   #               the variable str. You should look up str in the
 99 |   #               vocabulary list (vocabList). If a match exists, you
100 |   #               should add the index of the word to the word_indices
101 |   #               vector. Concretely, if str <- 'action', then you should
102 |   #               look up the vocabulary list to find where in vocabList
103 |   #               'action' appears. For example, if vocabList{18} <-
104 |   #               'action', then, you should add 18 to the word_indices
105 |   #               vector (e.g., word_indices <- [word_indices ; 18]; ).
106 |   #
107 |   # Note: vocabList{idx} returns a the word with index idx in the
108 |   #       vocabulary list.
109 |   #
110 |   # Note: You can use strcmp(str1, str2) to compare two strings (str1 and
111 |   #       str2). It will return 1 only if the two strings are equivalent.
112 |   #
113 |   
114 |   
115 |   # --------------------------------------------------------------
116 |   
117 |   cat(email_contents)
118 |   
119 |   # Print footer
120 |   cat(sprintf('\n\n--------------------------\n'))
121 |   
122 |   word_indices
123 | }
124 | 


--------------------------------------------------------------------------------
/6/spamSample1.txt:
--------------------------------------------------------------------------------
 1 | Do You Want To Make $1000 Or More Per Week?
 2 | 
 3 |  
 4 | 
 5 | If you are a motivated and qualified individual - I 
 6 | will personally demonstrate to you a system that will 
 7 | make you $1,000 per week or more! This is NOT mlm.
 8 | 
 9 |  
10 | 
11 | Call our 24 hour pre-recorded number to get the 
12 | details.  
13 | 
14 |  
15 | 
16 | 000-456-789
17 | 
18 |  
19 | 
20 | I need people who want to make serious money.  Make 
21 | the call and get the facts. 
22 | 
23 | Invest 2 minutes in yourself now!
24 | 
25 |  
26 | 
27 | 000-456-789
28 | 
29 |  
30 | 
31 | Looking forward to your call and I will introduce you 
32 | to people like yourself who
33 | are currently making $10,000 plus per week!
34 | 
35 |  
36 | 
37 | 000-456-789
38 | 
39 | 
40 | 
41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72
42 | 
43 | 


--------------------------------------------------------------------------------
/6/spamSample2.txt:
--------------------------------------------------------------------------------
1 | Best Buy Viagra Generic Online
2 | 
3 | Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100# Quality & Satisfaction guaranteed!
4 | 
5 | We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers!
6 | http://medphysitcstech.ru
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/6/spamTest.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/6/spamTest.rda


--------------------------------------------------------------------------------
/6/spamTrain.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/6/spamTrain.rda


--------------------------------------------------------------------------------
/6/submit.r:
--------------------------------------------------------------------------------
 1 | submit = function(){
 2 |   source("../lib/submitWithConfiguration.r")
 3 |   
 4 |   conf = list()
 5 |   conf$assignmentSlug = 'support-vector-machines';
 6 |   conf$itemName = 'Support Vector Machines';
 7 |   conf$partArrays = c(
 8 |     '1', 
 9 |     'gaussianKernel.r' , 
10 |     'Gaussian Kernel', 
11 |     '2', 
12 |     'dataset3Params.r' , 
13 |     'Parameters (C, sigma) for Dataset 3', 
14 |     '3', 
15 |     'processEmail.r' , 
16 |     'Email Preprocessing', 
17 |     '4', 
18 |     'emailFeatures.r' , 
19 |     'Email Feature Extraction')
20 |   conf$partArrays = matrix(conf$partArrays, ncol =  3 , byrow = T)
21 |   
22 |   conf$output = output;
23 | 
24 |   submitWithConfiguration(conf);
25 | }
26 | 
27 | output = function(partId, auxstring) {
28 |   source("gaussianKernel.r")
29 |   source("processEmail.r")
30 |   source("emailFeatures.r")
31 |   source("dataset3Params.r")
32 |   source("svmTrain.r")
33 |   source("../lib/bsxfun.r")
34 |   source("svmPredict.r")
35 |   source("getVocabList.r")
36 |   # Random Test Cases
37 |   x1 = sin(seq(1,10))
38 |   x2 = cos(seq(1,10))
39 |   ec = 'the quick brown fox jumped over the lazy dog';
40 |   wi = 1 + abs(round(x1 * 1863));
41 |   wi = c(wi,wi)
42 |   
43 |   if (partId == '1')
44 |     out = sprintf('%0.5f ', gaussianKernel(2)(x1, x2))
45 |   else if (partId == '2'){
46 |     load('ex6data3.rda')
47 |     d3p = dataset3Params(X, y, Xval, yval)
48 |     out = sprintf('%0.5f ', d3p$C)
49 |     out = paste0(c(out, sprintf('%0.5f ', d3p$sigma)), collapse = '')
50 |   }else if (partId == '3'){
51 |     word_indices = processEmail(ec)
52 |     out = paste0(sprintf('%d ', word_indices), collapse = '')
53 |   }else if (partId == '4'){
54 |     x = emailFeatures(wi)
55 |     out = paste0(sprintf('%d ', x), collapse = '')
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/6/svmPredict.r:
--------------------------------------------------------------------------------
 1 | svmPredict <- function(model, X) {
 2 |   #SVMPREDICT returns a vector of predictions using a trained SVM model
 3 |   #(svmTrain).
 4 |   #   pred <- SVMPREDICT(model, X) returns a vector of predictions using a
 5 |   #   trained SVM model (svmTrain). X is a mxn matrix where there each
 6 |   #   example is a row. model is a svm model returned from svmTrain.
 7 |   #   predictions pred is a m x 1 column of predictions of {0, 1} values.
 8 |   #
 9 |   
10 |   # Check if we are getting a column vector, if so, then assume that we only
11 |   # need to do prediction for a single example
12 |   if (is.vector(X))
13 |     X <- as.matrix(X)
14 |   
15 |   if (dim(X)[2] == 1)
16 |     # Examples should be in rows
17 |     X <- t(X)
18 |   
19 |   # Dataset
20 |   m <- dim(X)[1]
21 |   p <- rep(0,m)
22 |   pred <- rep(0,m)
23 |   
24 |   if (grepl('linearKernel' , model$kernelFunctionName))
25 |     # We can use the weights and bias directly if working with the
26 |     # linear kernel
27 |     p <- X %*% model$w + c(model$b)
28 |   else if (grepl('gaussianKernel' , model$kernelFunctionName)) {
29 |     # Vectorized RBF Kernel
30 |     # This is equivalent to computing the kernel on every pair of examples
31 |     X1 <- apply(X ^ 2,1,sum) #row sum
32 |     X2 <- t(apply(model$X ^ 2,1,sum))
33 |     K <- bsxfun("+", X1, bsxfun("+", X2,-2 * X %*% t(model$X)))
34 |     K <- model$kernelFunction(1, 0) ^ K
35 |     K <- bsxfun("*", t(model$y), K)
36 |     K <- bsxfun("*", t(model$alphas), K)
37 |     p <- apply(K,1,sum) #row sum
38 |   }
39 |   else {
40 |     # Other Non-linear kernel
41 |     for (i in 1:m) {
42 |       prediction <- 0
43 |       for (j in 1:dim(model$X)[1]) {
44 |         prediction <- prediction + model$alphas[j] * model.y[j] *
45 |           model$kernelFunction(t(X[i,]), t(model$X[j,]))
46 |       }
47 |       p[i] <- prediction + model$b
48 |     }
49 |   }
50 |   
51 |   # Convert predictions into 0 / 1
52 |   pred[p >= 0] <- 1
53 |   pred[p < 0] <- 0
54 |   pred
55 | }
56 | 


--------------------------------------------------------------------------------
/6/visualizeBoundary.r:
--------------------------------------------------------------------------------
 1 | visualizeBoundary <- function (X, y, model, varargin) {
 2 |   #VISUALIZEBOUNDARY plots a non-linear decision boundary learned by the SVM
 3 |   #   VISUALIZEBOUNDARYLINEAR(X, y, model) plots a non-linear decision
 4 |   #   boundary learned by the SVM and overlays the data on it
 5 |   
 6 |   # Plot the training data on top of the boundary
 7 |   plotData(X, y)
 8 |   
 9 |   # Make classification predictions over a grid of values
10 |   x1plot <- t(seq(min(X[,1]) , max(X[,1]), length.out = 100))
11 |   x2plot <- t(seq(min(X[,2]) , max(X[,2]), length.out = 100))
12 |   
13 |   mg <- meshgrid(x1plot, x2plot)
14 |   X1 <- mg$X
15 |   X2 <- mg$Y
16 |   vals <- matrix(0,length(x1plot),length(x2plot))
17 |   
18 |   for (i in 1:dim(X1)[2]) {
19 |     this_X <- cbind(X1[,i], X2[,i])
20 |     vals[,i] <- svmPredict(model, this_X)
21 |   }
22 |   
23 |   # Plot the SVM boundary
24 |   contour(x1plot,x2plot, t(vals),
25 |     levels = 1, col = "blue",add = TRUE)
26 | }
27 | 


--------------------------------------------------------------------------------
/6/visualizeBoundaryLinear.r:
--------------------------------------------------------------------------------
 1 | visualizeBoundaryLinear <- function (X, y, model) {
 2 |   #VISUALIZEBOUNDARYLINEAR plots a linear decision boundary learned by the
 3 |   #SVM
 4 |   #   VISUALIZEBOUNDARYLINEAR(X, y, model) plots a linear decision boundary
 5 |   #   learned by the SVM and overlays the data on it
 6 |   
 7 |   w <- model$w
 8 |   b <- model$b
 9 |   xp <- seq(min(X[,1]),max(X[,1]),length.out = 100)
10 |   
11 |   yp <- -(w[1] * xp + c(b)) / w[2]
12 |   plotData(X, y)
13 |   
14 |   lines(xp, yp, col = "blue")
15 | }
16 | 


--------------------------------------------------------------------------------
/7/bird_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/7/bird_small.png


--------------------------------------------------------------------------------
/7/bird_small.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/7/bird_small.rda


--------------------------------------------------------------------------------
/7/computeCentroids-solution.r:
--------------------------------------------------------------------------------
 1 | computeCentroids  <- function(X, idx, K) {
 2 |   #COMPUTECENTROIDS returs the new centroids by computing the means of the
 3 |   #data points assigned to each centroid.
 4 |   #   centroids <- COMPUTECENTROIDS(X, idx, K) returns the new centroids by
 5 |   #   computing the means of the data points assigned to each centroid. It is
 6 |   #   given a dataset X where each row is a single data point, a vector
 7 |   #   idx of centroid assignments (i.e. each entry in range [1..K]) for each
 8 |   #   example, and K, the number of centroids. You should return a matrix
 9 |   #   centroids, where each row of centroids is the mean of the data points
10 |   #   assigned to it.
11 |   #
12 |   
13 |   # Useful variables
14 |   m <- dim(X)[1]
15 |   n <- dim(X)[2]
16 |   # You need to return the following variables correctly.
17 |   centroids <- matrix(0,K,n)
18 |   
19 |   
20 |   # ----------------------- YOUR CODE HERE -----------------------
21 |   # Instructions: Go over every centroid and compute mean of all points that
22 |   #               belong to it. Concretely, the row vector centroids[i, ]
23 |   #               should contain the mean of the data points assigned to
24 |   #               centroid i.
25 |   #
26 |   # Note: You can use a for-loop over the centroids to compute this.
27 |   #
28 |   
29 |   for (k in 1:K) {
30 |     num_k <- 0
31 |     sum <- rep(0,n)
32 |     for (i in 1:m) {
33 |       if (idx[i] == k) {
34 |         sum <- sum + t(X[i,])
35 |         num_k <- num_k + 1
36 |       }
37 |     }
38 |     centroids[k,] <- t(sum / num_k)
39 |   }
40 |   
41 |   centroids
42 |   # --------------------------------------------------------------
43 | }
44 | 


--------------------------------------------------------------------------------
/7/computeCentroids.r:
--------------------------------------------------------------------------------
 1 | computeCentroids  <- function(X, idx, K) {
 2 |   #COMPUTECENTROIDS returs the new centroids by computing the means of the
 3 |   #data points assigned to each centroid.
 4 |   #   centroids <- COMPUTECENTROIDS(X, idx, K) returns the new centroids by
 5 |   #   computing the means of the data points assigned to each centroid. It is
 6 |   #   given a dataset X where each row is a single data point, a vector
 7 |   #   idx of centroid assignments (i.e. each entry in range [1..K]) for each
 8 |   #   example, and K, the number of centroids. You should return a matrix
 9 |   #   centroids, where each row of centroids is the mean of the data points
10 |   #   assigned to it.
11 |   #
12 |   
13 |   # Useful variables
14 |   m <- dim(X)[1]
15 |   n <- dim(X)[2]
16 |   # You need to return the following variables correctly.
17 |   centroids <- matrix(0,K,n)
18 |   
19 |   
20 |   # ----------------------- YOUR CODE HERE -----------------------
21 |   # Instructions: Go over every centroid and compute mean of all points that
22 |   #               belong to it. Concretely, the row vector centroids[i, ]
23 |   #               should contain the mean of the data points assigned to
24 |   #               centroid i.
25 |   #
26 |   # Note: You can use a for-loop over the centroids to compute this.
27 |   #
28 |   
29 |  
30 |   
31 |   centroids
32 |   # --------------------------------------------------------------
33 | }
34 | 


--------------------------------------------------------------------------------
/7/displayData.r:
--------------------------------------------------------------------------------
 1 | displayData  <- function(X, example_width = round(sqrt(dim(X)[2]))) {
 2 |   #DISPLAYDATA Display 2D data in a nice grid
 3 |   #   DISPLAYDATA(X, example_width) displays 2D data
 4 |   #   stored in X in a nice grid. It returns the figure handle h and the
 5 |   #   displayed array if requested.
 6 |   
 7 |   # Compute rows, cols
 8 |   m <- dim(X)[1]
 9 |   n <- dim(X)[2]
10 |   example_height <- (n / example_width)
11 |   
12 |   # Compute number of items to display
13 |   display_rows <- floor(sqrt(m))
14 |   display_cols <- ceiling(m / display_rows)
15 |   
16 |   # Between images padding
17 |   pad <- 1
18 |   
19 |   # Setup blank display
20 |   display_array <- -matrix(1,
21 |                            pad + display_rows * (example_height + pad),
22 |                            pad + display_cols * (example_width + pad))
23 |   
24 |   # Copy each example into a patch on the display array
25 |   curr_ex <- 1
26 |   for (j in 1:display_rows) {
27 |     for (i in 1:display_cols) {
28 |       if (curr_ex > m)
29 |         break
30 |       # Copy the patch
31 |       
32 |       # Get the max value of the patch
33 |       max_val <- max(abs(X[curr_ex,]))
34 |       display_array[pad + (j - 1) * (example_height + pad) + (1:example_height),
35 |                     pad + (i - 1) * (example_width + pad) + (1:example_width)] <-
36 |         matrix(X[curr_ex,], example_height, example_width) / max_val
37 |       curr_ex <- curr_ex + 1
38 |     }
39 |     if (curr_ex > m)
40 |       break
41 |   }
42 |   
43 |   # Display Image
44 |   op <- par(bg = "gray",mar=c(1,1,1,1))
45 |   a <- apply(display_array,2,rev)
46 |   h <- image(t(a), col = grey.colors(100,0,1,1), axes = FALSE)
47 |   grid(
48 |     nx = display_cols,display_rows,col = 'black',lwd = 1,lty = 1
49 |   )
50 |   box()
51 |   par(op)
52 |   
53 | }
54 | 


--------------------------------------------------------------------------------
/7/ex7.rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/7/ex7data1.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/7/ex7data1.rda


--------------------------------------------------------------------------------
/7/ex7data2.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/7/ex7data2.rda


--------------------------------------------------------------------------------
/7/ex7faces.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/7/ex7faces.rda


--------------------------------------------------------------------------------
/7/featureNormalize.r:
--------------------------------------------------------------------------------
 1 | featureNormalize  <- function(X) {
 2 |   #FEATURENORMALIZE Normalizes the features in X
 3 |   #   FEATURENORMALIZE(X) returns a normalized version of X where
 4 |   #   the mean value of each feature is 0 and the standard deviation
 5 |   #   is 1. This is often a good preprocessing step to do when
 6 |   #   working with learning algorithms.
 7 |   
 8 |   mu <- colMeans(X)
 9 |   
10 |   #vectorized multivariate apply
11 |   X_norm <- matrix(mapply(`-`,t(X),mu),dim(X) ,byrow = TRUE)
12 |   
13 |   sigma <- apply(X,2,sd)
14 |   X_norm <- matrix(mapply(`/`,t(X_norm),sigma),dim(X) ,byrow = TRUE)
15 |   
16 |   list(X_norm = X_norm, mu = mu, sigma = sigma)
17 |   
18 | }
19 | 


--------------------------------------------------------------------------------
/7/findClosestCentroids-solution.r:
--------------------------------------------------------------------------------
 1 | findClosestCentroids  <- function(X, centroids) {
 2 |   #FINDCLOSESTCENTROIDS computes the centroid memberships for every example
 3 |   #   idx <- FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids
 4 |   #   in idx for a dataset X where each row is a single example. idx = m x 1
 5 |   #   vector of centroid assignments (i.e. each entry in range [1..K])
 6 |   #
 7 |   
 8 |   # Set K
 9 |   K <- dim(centroids)[1]
10 |   
11 |   # You need to return the following variables correctly.
12 |   idx <- rep(0,dim(X)[1])
13 |   
14 |   # ----------------------- YOUR CODE HERE -----------------------
15 |   # Instructions: Go over every example, find its closest centroid, and store
16 |   #               the index inside idx at the appropriate location.
17 |   #               Concretely, idx[i] should contain the index of the centroid
18 |   #               closest to example i. Hence, it should be a value in the
19 |   #               range 1..K
20 |   #
21 |   # Note: You can use a for-loop over the examples to compute this.
22 |   #
23 |   N <- dim(X)[2]
24 |   m <- dim(X)[1]
25 |   
26 |   dist <- rep(0,K)
27 |   a <- rep(0,N)
28 |   
29 |   for (i in 1:m) {
30 |     for (k in 1:K) {
31 |       for (n in 1:N)	{
32 |         a[n] <- (X[i,n] - centroids[k,n]) ^ 2	# Calc distance from X to centroids (sum to N, not just 2)
33 |       }
34 |       dist[k] <- sum(a);		# Distance from point X to centroids
35 |     }
36 |     c <- which.min(dist)		# Find the index of the min distance btw X and centroids
37 |     idx[i] <- c;				# Assign for each X which centroid it belongs to
38 |   }
39 |   idx
40 |   
41 |   # --------------------------------------------------------------
42 | }
43 | 


--------------------------------------------------------------------------------
/7/findClosestCentroids.r:
--------------------------------------------------------------------------------
 1 | findClosestCentroids  <- function(X, centroids) {
 2 |   #FINDCLOSESTCENTROIDS computes the centroid memberships for every example
 3 |   #   idx <- FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids
 4 |   #   in idx for a dataset X where each row is a single example. idx = m x 1
 5 |   #   vector of centroid assignments (i.e. each entry in range [1..K])
 6 |   #
 7 |   
 8 |   # Set K
 9 |   K <- dim(centroids)[1]
10 |   
11 |   # You need to return the following variables correctly.
12 |   idx <- rep(0,dim(X)[1])
13 |   
14 |   # ----------------------- YOUR CODE HERE -----------------------
15 |   # Instructions: Go over every example, find its closest centroid, and store
16 |   #               the index inside idx at the appropriate location.
17 |   #               Concretely, idx[i] should contain the index of the centroid
18 |   #               closest to example i. Hence, it should be a value in the
19 |   #               range 1..K
20 |   #
21 |   # Note: You can use a for-loop over the examples to compute this.
22 |   #
23 |   
24 |   idx
25 |   
26 |   # --------------------------------------------------------------
27 | }
28 | 


--------------------------------------------------------------------------------
/7/kMeansInitCentroids-solution.r:
--------------------------------------------------------------------------------
 1 | kMeansInitCentroids  <- function(X, K) {
 2 |   #KMEANSINITCENTROIDS This function initializes K centroids that are to be
 3 |   #used in K-Means on the dataset X
 4 |   #   centroids <- KMEANSINITCENTROIDS(X, K) returns K initial centroids to be
 5 |   #   used with the K-Means on the dataset X
 6 |   #
 7 |   
 8 |   # You should return these values correctly
 9 |   centroids <- matrix(0,K,dim(X)[2])
10 |   
11 |   # ----------------------- YOUR CODE HERE -----------------------
12 |   # Instructions: You should set centroids to randomly chosen examples from
13 |   #               the dataset X
14 |   #
15 |   
16 |   # Randomly reorder the indices of examples
17 |   randidx <- sample(dim(X)[1])
18 |   # Take the first K examples as centroids
19 |   centroids <- X[randidx[1:K],]
20 |   centroids
21 |   # --------------------------------------------------------------
22 |   
23 | }
24 | 


--------------------------------------------------------------------------------
/7/kMeansInitCentroids.r:
--------------------------------------------------------------------------------
 1 | kMeansInitCentroids  <- function(X, K) {
 2 |   #KMEANSINITCENTROIDS This function initializes K centroids that are to be
 3 |   #used in K-Means on the dataset X
 4 |   #   centroids <- KMEANSINITCENTROIDS(X, K) returns K initial centroids to be
 5 |   #   used with the K-Means on the dataset X
 6 |   #
 7 |   
 8 |   # You should return these values correctly
 9 |   centroids <- matrix(0,K,dim(X)[2])
10 |   
11 |   # ----------------------- YOUR CODE HERE -----------------------
12 |   # Instructions: You should set centroids to randomly chosen examples from
13 |   #               the dataset X
14 |   #
15 |   
16 |   centroids
17 |   # --------------------------------------------------------------
18 |   
19 | }
20 | 


--------------------------------------------------------------------------------
/7/pca-solution.r:
--------------------------------------------------------------------------------
 1 | pca <- function(X) {
 2 |   #PCA Run principal component analysis on the dataset X
 3 |   #   U_S_X <- pca(X) computes eigenvectors of the covariance matrix of X
 4 |   #   Returns the eigenvectors U, the eigenvalues (on diagonal) in S as a list
 5 |   #
 6 |   
 7 |   # Useful values
 8 |   m <- dim(X)[1]
 9 |   n <- dim(X)[2]
10 |   
11 |   # You need to return the following variables correctly.
12 |   U <- rep(0,n)
13 |   S <- rep(0,n)
14 |   
15 |   # ----------------------- YOUR CODE HERE -----------------------
16 |   # Instructions: You should first compute the covariance matrix. Then, you
17 |   #               should use the "svd" function to compute the eigenvectors
18 |   #               and eigenvalues of the covariance matrix.
19 |   #
20 |   # Note: When computing the covariance matrix, remember to divide by m (the
21 |   #       number of examples).
22 |   #
23 |   
24 |   Sigma <- (1 / m) * (t(X) %*% X) #covariance matrix
25 |   USV <- svd(Sigma)
26 |   
27 |   USV
28 |   # -------------------------------------------------------------------------
29 |   
30 | }
31 | 


--------------------------------------------------------------------------------
/7/pca.r:
--------------------------------------------------------------------------------
 1 | pca <- function(X) {
 2 |   #PCA Run principal component analysis on the dataset X
 3 |   #   U_S_X <- pca(X) computes eigenvectors of the covariance matrix of X
 4 |   #   Returns the eigenvectors U, the eigenvalues (on diagonal) in S as a list
 5 |   #
 6 |   
 7 |   # Useful values
 8 |   m <- dim(X)[1]
 9 |   n <- dim(X)[2]
10 |   
11 |   # You need to return the following variables correctly.
12 |   U <- rep(0,n)
13 |   S <- rep(0,n)
14 |   
15 |   # ----------------------- YOUR CODE HERE -----------------------
16 |   # Instructions: You should first compute the covariance matrix. Then, you
17 |   #               should use the "svd" function to compute the eigenvectors
18 |   #               and eigenvalues of the covariance matrix.
19 |   #
20 |   # Note: When computing the covariance matrix, remember to divide by m (the
21 |   #       number of examples).
22 |   #
23 |   
24 |   
25 |   USV
26 |   # -------------------------------------------------------------------------
27 |   
28 | }
29 | 


--------------------------------------------------------------------------------
/7/plotDataPoints.r:
--------------------------------------------------------------------------------
 1 | plotDataPoints <- function (X, idx, K, addto = TRUE) {
 2 |   #PLOTDATAPOINTS plots data points in X, coloring them so that those with the same
 3 |   #index assignments in idx have the same color
 4 |   #   PLOTDATAPOINTS(X, idx, K) plots data points in X, coloring them so that those
 5 |   #   with the same index assignments in idx have the same color
 6 |   #
 7 |   
 8 |   if (addto)
 9 |     points(X, col = rainbow(K + 1)[idx], lwd = 1.3)
10 |   else{
11 |     par(mar=c(3,3,2,1), mgp=c(1.5,.5,0))
12 |     plot(X, col = rainbow(K + 1)[idx], lwd = 1.3)
13 |   }
14 |   
15 | }
16 | 


--------------------------------------------------------------------------------
/7/plotProgresskMeans.r:
--------------------------------------------------------------------------------
 1 | plotProgresskMeans <- function (X, centroids, previous, idx, K, i) {
 2 |   #PLOTPROGRESSKMEANS is a helper function that displays the progress of
 3 |   #k-Means as it is running. It is intended for use only with 2D data.
 4 |   #   PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
 5 |   #   points with colors assigned to each centroid. With the previous
 6 |   #   centroids, it also plots a line between the previous locations and
 7 |   #   current locations of the centroids.
 8 |   #
 9 |   
10 |   # Setup the plot
11 |   plot(X,type = "n")
12 |   title(main = sprintf('Iteration number %d',i))
13 |   
14 |   # Plot the examples
15 |   plotDataPoints(X, idx, K)
16 |   
17 |   # Plot the centroids as black x's
18 |   points(centroids,pch = 4,lwd = 2)
19 |   
20 |   # Plot the history of the centroids with lines
21 |   for (q in 2:dim(previous)[3]) #num of centriods so far
22 |   {
23 |     if (all(previous[,,q] == 0))
24 |       break
25 |     else
26 |       for (j in 1:dim(centroids)[1])
27 |         #num of centroids 3
28 |         lines(rbind(previous[j,,q - 1], previous[j,,q]),lwd = 1.3)
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/7/projectData-solution.r:
--------------------------------------------------------------------------------
 1 | projectData <- function(X, U, K) {
 2 |   #PROJECTDATA Computes the reduced data representation when projecting only
 3 |   #on to the top k eigenvectors
 4 |   #   Z <- projectData(X, U, K) computes the projection of
 5 |   #   the normalized inputs X into the reduced dimensional space spanned by
 6 |   #   the first K columns of U. It returns the projected examples in Z.
 7 |   #
 8 |   
 9 |   # You need to return the following variables correctly.
10 |   Z <- matrix(0,dim(X)[1],K)
11 |   
12 |   # ----------------------- YOUR CODE HERE -----------------------
13 |   # Instructions: Compute the projection of the data using only the top K
14 |   #               eigenvectors in U (first K columns).
15 |   #               For the i-th example X[i, ], the projection on to the k-th
16 |   #               eigenvector is given as follows:
17 |   #                    x <- t(X[i,])
18 |   #                    projection_k <- t(x) %*% U[, k]
19 |   #
20 |     
21 |   Z <- X %*% U[,1:K]
22 |   Z
23 |   # --------------------------------------------------------------
24 |   
25 | }
26 | 


--------------------------------------------------------------------------------
/7/projectData.r:
--------------------------------------------------------------------------------
 1 | projectData <- function(X, U, K) {
 2 |   #PROJECTDATA Computes the reduced data representation when projecting only
 3 |   #on to the top k eigenvectors
 4 |   #   Z <- projectData(X, U, K) computes the projection of
 5 |   #   the normalized inputs X into the reduced dimensional space spanned by
 6 |   #   the first K columns of U. It returns the projected examples in Z.
 7 |   #
 8 |   
 9 |   # You need to return the following variables correctly.
10 |   Z <- matrix(0,dim(X)[1],K)
11 |   
12 |   # ----------------------- YOUR CODE HERE -----------------------
13 |   # Instructions: Compute the projection of the data using only the top K
14 |   #               eigenvectors in U (first K columns).
15 |   #               For the i-th example X[i, ], the projection on to the k-th
16 |   #               eigenvector is given as follows:
17 |   #                    x <- t(X[i,])
18 |   #                    projection_k <- t(x) %*% U[, k]
19 |   #
20 |     
21 |   
22 |   Z
23 |   # --------------------------------------------------------------
24 |   
25 | }
26 | 


--------------------------------------------------------------------------------
/7/recoverData-solution.r:
--------------------------------------------------------------------------------
 1 | recoverData <- function(Z, U, K) {
 2 |   #RECOVERDATA Recovers an approximation of the original data when using the
 3 |   #projected data
 4 |   #   X_rec <- RECOVERDATA(Z, U, K) recovers an approximation the
 5 |   #   original data that has been reduced to K dimensions. It returns the
 6 |   #   approximate reconstruction in X_rec.
 7 |   #
 8 |   
 9 |   # You need to return the following variables correctly.
10 |   X_rec <- matrix(0,dim(Z)[1],dim(U)[1])
11 |   
12 |   # ----------------------- YOUR CODE HERE -----------------------
13 |   # Instructions: Compute the approximation of the data by projecting back
14 |   #               onto the original space using the top K eigenvectors in U.
15 |   #
16 |   #               For the i-th example Z[i,], the (approximate)
17 |   #               recovered data for dimension j is given as follows:
18 |   #                    v <- t(Z[i,])
19 |   #                    recovered_j <- t(v) %*% t(U[j, 1:K])
20 |   #
21 |   #               Notice that U[j, 1:K] is a row vector.
22 |   #
23 |   
24 |   X_rec <- Z %*% t(U[,1:K])
25 |   X_rec
26 |   
27 |   # --------------------------------------------------------------
28 |   
29 | }
30 | 


--------------------------------------------------------------------------------
/7/recoverData.r:
--------------------------------------------------------------------------------
 1 | recoverData <- function(Z, U, K) {
 2 |   #RECOVERDATA Recovers an approximation of the original data when using the
 3 |   #projected data
 4 |   #   X_rec <- RECOVERDATA(Z, U, K) recovers an approximation the
 5 |   #   original data that has been reduced to K dimensions. It returns the
 6 |   #   approximate reconstruction in X_rec.
 7 |   #
 8 |   
 9 |   # You need to return the following variables correctly.
10 |   X_rec <- matrix(0,dim(Z)[1],dim(U)[1])
11 |   
12 |   # ----------------------- YOUR CODE HERE -----------------------
13 |   # Instructions: Compute the approximation of the data by projecting back
14 |   #               onto the original space using the top K eigenvectors in U.
15 |   #
16 |   #               For the i-th example Z[i,], the (approximate)
17 |   #               recovered data for dimension j is given as follows:
18 |   #                    v <- t(Z[i,])
19 |   #                    recovered_j <- t(v) %*% t(U[j, 1:K])
20 |   #
21 |   #               Notice that U[j, 1:K] is a row vector.
22 |   #
23 |   
24 |   
25 |   X_rec
26 |   
27 |   # --------------------------------------------------------------
28 |   
29 | }
30 | 


--------------------------------------------------------------------------------
/7/runkMeans.r:
--------------------------------------------------------------------------------
 1 | runkMeans  <- function(X, initial_centroids, max_iters, plot_progress = FALSE) {
 2 |   #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
 3 |   #is a single example
 4 |   #   centroids_idx <- RUNKMEANS(X, initial_centroids, max_iters, ...
 5 |   #   plot_progress) runs the K-Means algorithm on data matrix X, where each
 6 |   #   row of X is a single example. It uses initial_centroids used as the
 7 |   #   initial centroids. max_iters specifies the total number of interactions
 8 |   #   of K-Means to execute. plot_progress is a true/false flag that
 9 |   #   indicates if the function should also plot its progress as the
10 |   #   learning happens. This is set to false by default. runkMeans returns
11 |   #   centroids, a Kxn matrix of the computed centroids and idx, a m x 1
12 |   #   vector of centroid assignments (i.e. each entry in range [1..K])
13 |   #
14 |   
15 |   # Initialize values
16 |   m <- dim(X)[1]
17 |   n <- dim(X)[2]
18 |   K <- dim(initial_centroids)[1]
19 |   centroids <- initial_centroids
20 |   previous_centroids <- array(0,dim = c(dim(centroids),max_iters + 1))
21 |   previous_centroids[,,1] <- centroids
22 |   idx <- rep(0,m)
23 |   
24 |   # Run K-Means
25 |   for (i in 1:max_iters) {
26 |     # Output progress
27 |     cat(sprintf('K-Means iteration %d/%d...\n', i, max_iters))
28 |     
29 |     # For each example in X, assign it to the closest centroid
30 |     idx <- findClosestCentroids(X, centroids)
31 |     
32 |     # Optionally, plot progress here
33 |     if (plot_progress) {
34 |       plotProgresskMeans(X, centroids, previous_centroids, idx, K, i)
35 |       cat(sprintf('Press enter to continue.\n'))
36 |       line <- readLines(con = stdin(),1)
37 |       
38 |     }
39 |     
40 |     # Given the memberships, compute new centroids
41 |     centroids <- computeCentroids(X, idx, K)
42 |     previous_centroids[,,i + 1] <- centroids
43 |   }
44 |   
45 |   list(centroids = centroids, idx = idx)
46 |   
47 | }
48 | 


--------------------------------------------------------------------------------
/7/submit.r:
--------------------------------------------------------------------------------
 1 | submit = function(){
 2 |   source("../lib/submitWithConfiguration.r")
 3 |   
 4 |   conf = list()
 5 |   conf$assignmentSlug = 'k-means-clustering-and-pca';
 6 |   conf$itemName = 'K-Means Clustering and PCA';
 7 |   conf$partArrays = c(
 8 |     '1', 
 9 |     'findClosestCentroids.r' , 
10 |     'Find Closest Centroids (k-Means)', 
11 |     '2', 
12 |     'computeCentroids.r' , 
13 |     'Compute Centroid Means (k-Means)', 
14 |     '3', 
15 |     'pca.r' , 
16 |     'PCA', 
17 |     '4', 
18 |     'projectData.r' , 
19 |     'Project Data (PCA)', 
20 |     '5', 
21 |     'recoverData.r' , 
22 |     'Recover Data (PCA)')
23 |   conf$partArrays = matrix(conf$partArrays, ncol =  3 , byrow = T)
24 |   
25 |   conf$output = output;
26 | 
27 |   submitWithConfiguration(conf);
28 | }
29 | 
30 | output = function(partId, auxstring) {
31 |   source("findClosestCentroids.r")
32 |   source("computeCentroids.r")
33 |   source("pca.r")
34 |   source("projectData.r")
35 |   source("recoverData.r")
36 |   # Random Test Cases
37 |   X = matrix(sin(seq(1,165)), 15, 11)
38 |   Z = matrix(cos(seq(1,121)), 11, 11)
39 |   C = Z[1:5,]
40 |   idx = (1 + (1:15 %% 3))
41 |   if (partId == '1'){
42 |     idx = findClosestCentroids(X, C)
43 |     out = paste0(sprintf('%0.5f ', c(idx)), collapse = '')
44 |   }else if (partId == '2'){
45 |     centroids = computeCentroids(X, idx, 3)
46 |     out = paste0(sprintf('%0.5f ', c(centroids)), collapse = '')
47 |   }else if (partId == '3'){
48 |     USV = pca(X)
49 |     list2env(USV,.GlobalEnv)
50 |     out = paste0(sprintf('%0.5f ', abs(c(u,c(diag(d)) ) ) ), collapse = '')
51 |   }else if (partId == '4'){
52 |     X_proj = projectData(X, Z, 5)
53 |     out = paste0(sprintf('%0.5f ', c(X_proj)), collapse = '')
54 |   }else if (partId == '5'){
55 |     X_rec = recoverData(X[,1:5], Z, 5)
56 |     out = paste0(sprintf('%0.5f ', c(X_rec)), collapse = '')
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/8/checkCostFunction.r:
--------------------------------------------------------------------------------
 1 | checkCostFunction <- function (lambda = 0) {
 2 |   #CHECKCOSTFUNCTION Creates a collaborative filering problem
 3 |   #to check your cost function and gradients
 4 |   #   CHECKCOSTFUNCTION(lambda) Creates a collaborative filtering problem
 5 |   #   to check your cost function and gradients, it will output the
 6 |   #   analytical gradients produced by your code and the numerical gradients
 7 |   #   (computed using computeNumericalGradient). These two gradient
 8 |   #   computations should result in very similar values.
 9 |   
10 |   
11 |   ## Create small problem
12 |   X_t <- matrix(runif(12),4,3)
13 |   Theta_t <- matrix(runif(15),5,3)
14 |   
15 |   dY <- dim(Y)
16 |   
17 |   # Zap out most entries
18 |   Y <- X_t %*% t(Theta_t)
19 |   Y[matrix(runif(dY[1] * dY[2]),dY[1],dY[2]) > 0.5] <- 0
20 |   
21 |   dY <- dim(Y)
22 |   R <- matrix(0,dY[1],dY[2])
23 |   R[Y != 0] <- 1
24 |   
25 |   ## Run Gradient Checking
26 |   
27 |   X <- matrix(rnorm(dim(X_t)[1] * dim(X_t)[2]),dim(X_t)[1],dim(X_t)[2])
28 |   Theta <- matrix(rnorm(dim(Theta_t)[1] * dim(Theta_t)[2]),
29 |                   dim(Theta_t)[1],dim(Theta_t)[2])
30 |   num_users <- dim(Y)[2]
31 |   num_movies <- dim(Y)[1]
32 |   num_features <- dim(Theta_t)[2]
33 |   
34 |   cF <- cofiCostFunc(Y, R, num_users, num_movies, num_features,lambda)
35 |   
36 |   numgrad <-
37 |     computeNumericalGradient(cF , c(c(X),c(Theta)))
38 |   
39 |   cost <- cofiCostFunc(Y, R, num_users,
40 |                        num_movies, num_features, lambda)(c(c(X),c(Theta)))
41 |   
42 |   grad <- cofiGradFunc(Y, R, num_users,
43 |                               num_movies, num_features, lambda)(c(c(X),c(Theta)))
44 |   
45 |   
46 |   print(cbind(numgrad, grad))
47 |   cat(sprintf((
48 |     'The above two columns you get should be very similar.
49 |     (Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n'
50 |   )
51 |   ))
52 |   
53 |   diff <- norm(numgrad - grad,"2") / norm(numgrad + grad,"2")
54 |   
55 |   cat(
56 |     sprintf(
57 |       'If your backpropagation implementation is correct, then
58 |       the relative difference will be small (less than 1e-9). \n
59 |       Relative Difference: %g\n', diff
60 |     )
61 |     )
62 |   
63 | }
64 | 


--------------------------------------------------------------------------------
/8/cofiCostFunc-solution.r:
--------------------------------------------------------------------------------
 1 | cofiCostFunc <- function(Y, R, num_users, num_movies,
 2 |                          num_features, lambda = 0) {
 3 |   #COFICOSTFUNC Collaborative filtering cost function
 4 |   #   J <- COFICOSTFUNC(Y, R, num_users, num_movies, ...
 5 |   #   num_features, lambda)(params) returns the cost for the
 6 |   #   collaborative filtering problem.
 7 |   #
 8 |   function(params) {
 9 |     # Unfold the U and W matrices from params
10 |     X <-
11 |       matrix(params[1:(num_movies * num_features)], num_movies, num_features)
12 |     Theta <-
13 |       matrix(params[(num_movies * num_features + 1):length(params)],num_users, num_features)
14 |     
15 |     
16 |     # You need to return the following values correctly
17 |     J <- 0
18 |     
19 |     # ----------------------- YOUR CODE HERE -----------------------
20 |     # Instructions: Compute the cost function for collaborative
21 |     #               filtering. Concretely, you should first implement the cost
22 |     #               function (without regularization) and make sure it
23 |     #               matches our costs. 
24 |     #
25 |     # Notes: X - num_movies  x num_features matrix of movie features
26 |     #        Theta - num_users  x num_features matrix of user features
27 |     #        Y - num_movies x num_users matrix of user ratings of movies
28 |     #        R - num_movies x num_users matrix, where R(i, j) <- 1 if the
29 |     #            i-th movie was rated by the j-th user
30 |     
31 |     J <- (1 / 2) * sum(((X %*% t(Theta)) * R - Y * R) ^ 2) +
32 |       (lambda / 2 * sum(Theta ^ 2)) + (lambda / 2 * sum(X ^ 2))
33 |     # Only predict rating X*Theta' if user has rated (i.e. R=1)
34 |     J
35 |     
36 |     # --------------------------------------------------------------
37 |   }
38 | }
39 | 
40 | cofiGradFunc <- function(Y, R, num_users, num_movies,
41 |                          num_features, lambda = 0) {
42 |   #cofiGradFunc returns the gradient for the
43 |   #   collaborative filtering problem.
44 |   function(params) {
45 |     # Unfold the U and W matrices from params
46 |     X <-
47 |       matrix(params[1:(num_movies * num_features)], num_movies, num_features)
48 |     Theta <-
49 |       matrix(params[(num_movies * num_features + 1):length(params)],
50 |              num_users, num_features)
51 |     
52 |     
53 |     # You need to return the following values correctly
54 |     X_grad <- matrix(0,dim(X)[1],dim(X)[2])
55 |     Theta_grad <- matrix(0, dim(Theta)[1], dim(Theta)[2])
56 |     
57 |     # ----------------------- YOUR CODE HERE -----------------------
58 |     # Instructions: Compute the gradient for collaborative
59 |     #               filtering. you should implement the
60 |     #               gradient and use the checkCostFunction routine to check
61 |     #               that the gradient is correct. Finally, you should implement
62 |     #               regularization.
63 |     #
64 |     # Notes: X - num_movies  x num_features matrix of movie features
65 |     #        Theta - num_users  x num_features matrix of user features
66 |     #        Y - num_movies x num_users matrix of user ratings of movies
67 |     #        R - num_movies x num_users matrix, where R[i, j] <- 1 if the
68 |     #            i-th movie was rated by the j-th user
69 |     #
70 |     # You should set the following variables correctly:
71 |     #
72 |     #        X_grad - num_movies x num_features matrix, containing the
73 |     #                 partial derivatives w.r.t. to each element of X
74 |     #        Theta_grad - num_users x num_features matrix, containing the
75 |     #                     partial derivatives w.r.t. to each element of Theta
76 |     #
77 |     
78 |     X_grad <- (((X %*% t(Theta)) * R) %*% Theta - (Y * R) %*% Theta) + lambda * X
79 |     Theta_grad <- t((t(X) %*% ((X %*% t(Theta)) * R) - t(X) %*% (Y * R))) + lambda * Theta
80 |     
81 |     grad <- c(c(X_grad),c(Theta_grad))
82 |     grad
83 |     # --------------------------------------------------------------
84 |   }
85 | }


--------------------------------------------------------------------------------
/8/cofiCostFunc.r:
--------------------------------------------------------------------------------
 1 | cofiCostFunc <- function(Y, R, num_users, num_movies,
 2 |                          num_features, lambda = 0) {
 3 |   #COFICOSTFUNC Collaborative filtering cost function
 4 |   #   J <- COFICOSTFUNC(Y, R, num_users, num_movies, ...
 5 |   #   num_features, lambda)(params) returns the cost for the
 6 |   #   collaborative filtering problem.
 7 |   #
 8 |   function(params) {
 9 |     # Unfold the U and W matrices from params
10 |     X <-
11 |       matrix(params[1:(num_movies * num_features)], num_movies, num_features)
12 |     Theta <-
13 |       matrix(params[(num_movies * num_features + 1):length(params)],num_users, num_features)
14 |     
15 |     
16 |     # You need to return the following values correctly
17 |     J <- 0
18 |     
19 |     # ----------------------- YOUR CODE HERE -----------------------
20 |     # Instructions: Compute the cost function for collaborative
21 |     #               filtering. Concretely, you should first implement the cost
22 |     #               function (without regularization) and make sure it
23 |     #               matches our costs. 
24 |     #
25 |     # Notes: X - num_movies  x num_features matrix of movie features
26 |     #        Theta - num_users  x num_features matrix of user features
27 |     #        Y - num_movies x num_users matrix of user ratings of movies
28 |     #        R - num_movies x num_users matrix, where R(i, j) <- 1 if the
29 |     #            i-th movie was rated by the j-th user
30 |     
31 |     
32 |     J
33 |     
34 |     # --------------------------------------------------------------
35 |   }
36 | }
37 | 
38 | cofiGradFunc <- function(Y, R, num_users, num_movies,
39 |                          num_features, lambda = 0) {
40 |   #cofiGradFunc returns the gradient for the
41 |   #   collaborative filtering problem.
42 |   function(params) {
43 |     # Unfold the U and W matrices from params
44 |     X <-
45 |       matrix(params[1:(num_movies * num_features)], num_movies, num_features)
46 |     Theta <-
47 |       matrix(params[(num_movies * num_features + 1):length(params)],
48 |              num_users, num_features)
49 |     
50 |     
51 |     # You need to return the following values correctly
52 |     X_grad <- matrix(0,dim(X)[1],dim(X)[2])
53 |     Theta_grad <- matrix(0, dim(Theta)[1], dim(Theta)[2])
54 |     
55 |     # ----------------------- YOUR CODE HERE -----------------------
56 |     # Instructions: Compute the gradient for collaborative
57 |     #               filtering. you should implement the
58 |     #               gradient and use the checkCostFunction routine to check
59 |     #               that the gradient is correct. Finally, you should implement
60 |     #               regularization.
61 |     #
62 |     # Notes: X - num_movies  x num_features matrix of movie features
63 |     #        Theta - num_users  x num_features matrix of user features
64 |     #        Y - num_movies x num_users matrix of user ratings of movies
65 |     #        R - num_movies x num_users matrix, where R[i, j] <- 1 if the
66 |     #            i-th movie was rated by the j-th user
67 |     #
68 |     # You should set the following variables correctly:
69 |     #
70 |     #        X_grad - num_movies x num_features matrix, containing the
71 |     #                 partial derivatives w.r.t. to each element of X
72 |     #        Theta_grad - num_users x num_features matrix, containing the
73 |     #                     partial derivatives w.r.t. to each element of Theta
74 |     #
75 |     
76 |     
77 |     grad
78 |     # --------------------------------------------------------------
79 |   }
80 | }


--------------------------------------------------------------------------------
/8/computeNumericalGradient.r:
--------------------------------------------------------------------------------
 1 | computeNumericalGradient <- function(J, theta) {
 2 |   #COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
 3 |   #and gives us a numerical estimate of the gradient.
 4 |   #   numgrad <- COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
 5 |   #   gradient of the  J around theta. Calling y <- function(theta) should end
 6 |   #   return the function value at theta.
 7 |   
 8 |   # Notes: The following code implements numerical gradient checking, and
 9 |   #        returns the numerical gradient.It sets numgrad[i] to (a numerical
10 |   #        approximation of) the partial derivative of J with respect to the
11 |   #        i-th input argument, evaluated at theta. (i.e., numgrad[i] should
12 |   #        be the (approximately) the partial derivative of J with respect
13 |   #        to theta[i].)
14 |   #
15 |   
16 |   numgrad <- rep(0,length(theta))
17 |   perturb <- numgrad
18 |   e <- 1e-4
19 |   for (p in 1:length(theta)) {
20 |     # Set perturbation vector
21 |     perturb[p] <- e
22 |     loss1 <- J(theta - perturb)
23 |     loss2 <- J(theta + perturb)
24 |     # Compute Numerical Gradient
25 |     numgrad[p] <- (loss2 - loss1) / (2 * e)
26 |     perturb[p] <- 0
27 |   }
28 |   numgrad
29 | }
30 | 


--------------------------------------------------------------------------------
/8/estimateGaussian-solution.r:
--------------------------------------------------------------------------------
 1 | estimateGaussian <- function(X) {
 2 |   #ESTIMATEGAUSSIAN This function estimates the parameters of a
 3 |   #Gaussian distribution using the data in X
 4 |   #   mu_sigma2 <- estimateGaussian(X),
 5 |   #   The input X is the dataset with each n-dimensional data point in one row
 6 |   #   The output is an n-dimensional vector mu, the mean of the data set
 7 |   #   and the variances sigma^2, an n x 1 vector
 8 |   #
 9 |   
10 |   # Useful variables
11 |   m <- dim(X)[1]
12 |   n <- dim(X)[2]
13 |   
14 |   # You should return these values correctly
15 |   mu <- rep(0,n)
16 |   sigma2 <- rep(0,n)
17 |   
18 |   # ----------------------- YOUR CODE HERE -----------------------
19 |   # Instructions: Compute the mean of the data and the variances
20 |   #               In particular, mu[i] should contain the mean of
21 |   #               the data for the i-th feature and sigma2[i]
22 |   #               should contain variance of the i-th feature.
23 |   #
24 |   
25 |   mu <- 1 / m * apply(X,2,sum)
26 |   sigma2 <- apply(X,2,var) * (m - 1) / m
27 |   
28 |   list(mu = mu, sigma2 = sigma2)
29 |   # --------------------------------------------------------------
30 | }
31 | 


--------------------------------------------------------------------------------
/8/estimateGaussian.r:
--------------------------------------------------------------------------------
 1 | estimateGaussian <- function(X) {
 2 |   #ESTIMATEGAUSSIAN This function estimates the parameters of a
 3 |   #Gaussian distribution using the data in X
 4 |   #   mu_sigma2 <- estimateGaussian(X),
 5 |   #   The input X is the dataset with each n-dimensional data point in one row
 6 |   #   The output is an n-dimensional vector mu, the mean of the data set
 7 |   #   and the variances sigma^2, an n x 1 vector
 8 |   #
 9 |   
10 |   # Useful variables
11 |   m <- dim(X)[1]
12 |   n <- dim(X)[2]
13 |   
14 |   # You should return these values correctly
15 |   mu <- rep(0,n)
16 |   sigma2 <- rep(0,n)
17 |   
18 |   # ----------------------- YOUR CODE HERE -----------------------
19 |   # Instructions: Compute the mean of the data and the variances
20 |   #               In particular, mu[i] should contain the mean of
21 |   #               the data for the i-th feature and sigma2[i]
22 |   #               should contain variance of the i-th feature.
23 |   #
24 |   
25 |   
26 |   list(mu = mu, sigma2 = sigma2)
27 |   # --------------------------------------------------------------
28 | }
29 | 


--------------------------------------------------------------------------------
/8/ex8-solution.r:
--------------------------------------------------------------------------------
  1 | ## Machine Learning Online Class
  2 | #  Exercise 8 | Anomaly Detection and Collaborative Filtering
  3 | #
  4 | #  Instructions
  5 | #  ------------
  6 | #
  7 | #  This file contains code that helps you get started on the
  8 | #  exercise. You will need to complete the following functions:
  9 | #
 10 | #     estimateGaussian.r
 11 | #     selectThreshold.r
 12 | #     cofiCostFunc.r
 13 | #
 14 | #  For this exercise, you will not need to change any code in this file,
 15 | #  or any other files other than those mentioned above.
 16 | #
 17 | 
 18 | ## Initialization
 19 | rm(list=ls())
 20 | sources <- c("../lib/bsxfun.r","estimateGaussian.r","../lib/meshgrid.r",
 21 |              "multivariateGaussian.r","../lib/pinv.r","selectThreshold.r",
 22 |              "visualizeFit.r")
 23 | 
 24 | for (i in 1:length(sources)) {
 25 |   cat(paste("Loading ",sources[i],"\n"))
 26 |   source(sources[i])
 27 | }
 28 | 
 29 | ## ------------------ Part 1: Load Example Dataset  --------------------
 30 | #  We start this exercise by using a small dataset that is easy to
 31 | #  visualize.
 32 | #
 33 | #  Our example case consists of 2 network server statistics across
 34 | #  several machines: the latency and throughput of each machine.
 35 | #  This exercise will help us find possibly faulty (or very fast) machines.
 36 | #
 37 | 
 38 | cat(sprintf('Visualizing example dataset for outlier detection.\n\n'))
 39 | 
 40 | #  The following command loads the dataset. You should now have the
 41 | #  variables X, Xval, yval in your environment
 42 | load("ex8data1.rda")
 43 | list2env(data,.GlobalEnv)
 44 | rm(data)
 45 | 
 46 | #  Visualize the example dataset
 47 | plot(X[,1], X[,2], xlim=c(0,30),ylim=c(0,30), pch=4, col="blue",
 48 |      xlab='Latency (ms)', ylab='Throughput (mb/s)')
 49 | 
 50 | cat(sprintf('Program paused. Press enter to continue.\n'))
 51 | line <- readLines(con = stdin(),1)
 52 | 
 53 | 
 54 | ## ------------------ Part 2: Estimate the dataset statistics --------------------
 55 | #  For this exercise, we assume a Gaussian distribution for the dataset.
 56 | #
 57 | #  We first estimate the parameters of our assumed Gaussian distribution, 
 58 | #  then compute the probabilities for each of the points and then visualize 
 59 | #  both the overall distribution and where each of the points falls in 
 60 | #  terms of that distribution.
 61 | #
 62 | cat(sprintf('Visualizing Gaussian fit.\n\n'))
 63 | 
 64 | #  Estimate mu and sigma2
 65 | eG <- estimateGaussian(X)
 66 | mu <- eG$mu
 67 | sigma2 <- eG$sigma2
 68 | 
 69 | #  Returns the density of the multivariate normal at each data point (row) 
 70 | #  of X
 71 | p <- multivariateGaussian(X, mu, sigma2)
 72 | 
 73 | #  Visualize the fit
 74 | visualizeFit(X,  mu, sigma2)
 75 | 
 76 | cat(sprintf('Program paused. Press enter to continue.\n'))
 77 | line <- readLines(con = stdin(),1)
 78 | 
 79 | ## ------------------ Part 3: Find Outliers --------------------
 80 | #  Now you will find a good epsilon threshold using a cross-validation set
 81 | #  probabilities given the estimated Gaussian distribution
 82 | # 
 83 | 
 84 | pval <- multivariateGaussian(Xval, mu, sigma2)
 85 | 
 86 | sT <- selectThreshold(yval, pval)
 87 | epsilon <- sT$bestEpsilon
 88 | F1 <- sT$bestF1
 89 | 
 90 | cat(sprintf('Best epsilon found using cross-validation: %e\n', epsilon))
 91 | cat(sprintf('Best F1 on Cross Validation Set:  %f\n', F1))
 92 | cat(sprintf('(you should see a value epsilon of about 8.99e-05)\n\n'))
 93 | 
 94 | #  Find the outliers in the training set and plot the
 95 | outliers <- which(p < epsilon)
 96 | 
 97 | #  Draw a red circle around those outliers
 98 | points(X[outliers, 1], X[outliers, 2], col="red", lwd=2, cex=2 )
 99 | 
100 | cat(sprintf('Program paused. Press enter to continue.\n'))
101 | line <- readLines(con = stdin(),1)
102 | 
103 | ## ------------------ Part 4: Multidimensional Outliers --------------------
104 | #  We will now use the code from the previous part and apply it to a 
105 | #  harder problem in which more features describe each datapoint and only 
106 | #  some features indicate whether a point is an outlier.
107 | #
108 | 
109 | #  Loads the second dataset. You should now have the
110 | #  variables X, Xval, yval in your environment
111 | load("ex8data2.rda")
112 | list2env(data,.GlobalEnv)
113 | rm(data)
114 | 
115 | #  Apply the same steps to the larger dataset
116 | eG <- estimateGaussian(X)
117 | mu <- eG$mu
118 | sigma2 <- eG$sigma2
119 | rm(eG)
120 | 
121 | #  Training set 
122 | p <- multivariateGaussian(X, mu, sigma2)
123 | 
124 | #  Cross-validation set
125 | pval <- multivariateGaussian(Xval, mu, sigma2)
126 | 
127 | #  Find the best threshold
128 | sT <- selectThreshold(yval, pval)
129 | epsilon <- sT$bestEpsilon
130 | F1 <- sT$bestF1
131 | 
132 | cat(sprintf('Best epsilon found using cross-validation: %e\n', epsilon))
133 | cat(sprintf('Best F1 on Cross Validation Set:  %f\n', F1))
134 | cat(sprintf('# Outliers found: %d\n', sum(p < epsilon)))
135 | cat(sprintf('   (you should see a value epsilon of about 1.38e-18)\n\n'))
136 | line <- readLines(con = stdin(),1)
137 | 


--------------------------------------------------------------------------------
/8/ex8.rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/8/ex8_movieParams.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/8/ex8_movieParams.rda


--------------------------------------------------------------------------------
/8/ex8_movies.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/8/ex8_movies.rda


--------------------------------------------------------------------------------
/8/ex8data1.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/8/ex8data1.rda


--------------------------------------------------------------------------------
/8/ex8data2.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/8/ex8data2.rda


--------------------------------------------------------------------------------
/8/loadMovieList.r:
--------------------------------------------------------------------------------
 1 | loadMovieList <- function() {
 2 |   #GETMOVIELIST reads the fixed movie list in movie.txt and returns a
 3 |   #cell array of the words
 4 |   #   movieList <- GETMOVIELIST() reads the fixed movie list in movie.txt
 5 |   #   and returns a cell array of the words in movieList.
 6 |   
 7 |   
 8 |   ## Read the fixed movieulary list
 9 |   fName <- 'movie_ids.txt'
10 |   file_contents <- readChar(fName,file.info(fName)$size)
11 |   
12 |   movieList <- strsplit(file_contents,split = '\n')[[1]]
13 |   movieList <- gsub('^[0-9]+ ','',movieList)
14 |   
15 |   movieList
16 |   
17 | }
18 | 


--------------------------------------------------------------------------------
/8/movie_ids.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/8/movie_ids.txt


--------------------------------------------------------------------------------
/8/multivariateGaussian-solution.r:
--------------------------------------------------------------------------------
 1 | multivariateGaussian <- function(X, mu, Sigma2) {
 2 |   #MULTIVARIATEGAUSSIAN Computes the probability density function of the
 3 |   #multivariate gaussian distribution.
 4 |   #    p <- MULTIVARIATEGAUSSIAN(X, mu, Sigma2) Computes the probability
 5 |   #    density function of the examples X under the multivariate gaussian
 6 |   #    distribution with parameters mu and Sigma2. If Sigma2 is a matrix, it is
 7 |   #    treated as the covariance matrix. If Sigma2 is a vector, it is treated
 8 |   #    as the sigma^2 values of the variances in each dimension (a diagonal
 9 |   #    covariance matrix)
10 |   #
11 |   
12 |   k <- length(mu)
13 |   
14 |   if (is.vector(sigma2))
15 |     Sigma2 <- diag(Sigma2)
16 |   
17 |   X <- bsxfun('-', X, mu)
18 |   
19 |   p <- (2 * pi) ^ (-k / 2) * det(Sigma2) ^ (-0.5) *
20 |     exp(-0.5 * apply(
21 |                      bsxfun("*", X %*% pinv(Sigma2), X)
22 |                      ,1, sum)
23 |                     )
24 |   #simple example
25 |   #[1 2]*[1 2;1 2]*[1;2]
26 |   p
27 | }
28 | 


--------------------------------------------------------------------------------
/8/multivariateGaussian.r:
--------------------------------------------------------------------------------
 1 | multivariateGaussian <- function(X, mu, Sigma2) {
 2 |   #MULTIVARIATEGAUSSIAN Computes the probability density function of the
 3 |   #multivariate gaussian distribution.
 4 |   #    p <- MULTIVARIATEGAUSSIAN(X, mu, Sigma2) Computes the probability
 5 |   #    density function of the examples X under the multivariate gaussian
 6 |   #    distribution with parameters mu and Sigma2. If Sigma2 is a matrix, it is
 7 |   #    treated as the covariance matrix. If Sigma2 is a vector, it is treated
 8 |   #    as the sigma^2 values of the variances in each dimension (a diagonal
 9 |   #    covariance matrix)
10 |   #
11 |   
12 |   k <- length(mu)
13 |   
14 |   if (is.vector(sigma2))
15 |     Sigma2 <- diag(Sigma2)
16 |   
17 |   X <- bsxfun('-', X, mu)
18 |   p <- (2 * pi) ^ (-k / 2) * det(Sigma2) ^ (-0.5) *
19 |     exp(-0.5 * apply(bsxfun("*", X %*% pinv(Sigma2), X),1, sum))
20 |   
21 |   p
22 | }


--------------------------------------------------------------------------------
/8/normalizeRatings.r:
--------------------------------------------------------------------------------
 1 | normalizeRatings <- function(Y, R) {
 2 |   #NORMALIZERATINGS Preprocess data by subtracting mean rating for every
 3 |   #movie (every row)
 4 |   #   Ynorm_Ymean <- NORMALIZERATINGS(Y, R) normalized Y so that each movie
 5 |   #   has a rating of 0 on average, and returns the mean rating in Ymean.
 6 |   #
 7 |   
 8 |   m <- dim(Y)[1]
 9 |   n <- dim(Y)[2]
10 |   
11 |   Ymean <- rep(0,m)
12 |   Ynorm <- matrix(0,m,n)
13 |   for (i in 1:m) {
14 |     Ymean[i] <- mean(Y[i, R[i,] == 1])
15 |     Ynorm[i, R[i,]] <- Y[i, R[i,] == 1] - Ymean[i]
16 |   }
17 |   list(Ynorm = Ynorm, Ymean = Ymean)
18 | }


--------------------------------------------------------------------------------
/8/selectThreshold-solution.r:
--------------------------------------------------------------------------------
 1 | selectThreshold <- function(yval, pval) {
 2 |   #SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting
 3 |   #outliers
 4 |   #   bestEpsilon_bestF1 <- SELECTTHRESHOLD(yval, pval) finds the best
 5 |   #   threshold to use for selecting outliers based on the results from a
 6 |   #   validation set (pval) and the ground truth (yval).
 7 |   #
 8 |   
 9 |   bestEpsilon <- 0
10 |   bestF1 <- 0
11 |   F1 <- 0
12 |   
13 |   stepsize <- (max(pval) - min(pval)) / 1000
14 |   for (epsilon in seq(min(pval),max(pval),stepsize)) {
15 |     # ----------------------- YOUR CODE HERE -----------------------
16 |     # Instructions: Compute the F1 score of choosing epsilon as the
17 |     #               threshold and place the value in F1. The code at the
18 |     #               end of the loop will compare the F1 score for this
19 |     #               choice of epsilon and set it to be the best epsilon if
20 |     #               it is better than the current choice of epsilon.
21 |     #
22 |     # Note: You can use predictions <- (pval < epsilon) to get a binary vector
23 |     #       of 0's and 1's of the outlier predictions
24 |     
25 |     predictions <- (pval < epsilon)
26 |     tp <- sum((yval == 1) & (predictions == 1))
27 |     fp <- sum((yval == 0) & (predictions == 1))
28 |     fn <- sum((yval == 1) & (predictions == 0))
29 |     
30 |     prec <- tp / (tp + fp)
31 |     rec  <- tp / (tp + fn)
32 |     
33 |     F1 <- 2 * prec * rec / (prec + rec)
34 |     
35 |     # --------------------------------------------------------------
36 |     
37 |     if (!is.na(F1) && !is.na(bestF1) && F1 > bestF1) {
38 |       bestF1 <- F1
39 |       bestEpsilon <- epsilon
40 |     }
41 |   }
42 |   list(bestEpsilon = bestEpsilon, bestF1 = bestF1)
43 |   
44 | }
45 | 
46 | # selectThreshold2 <- function(yval, pval) {
47 | #   
48 | #   function (epsilon){
49 | #     predictions <- (pval < epsilon)
50 | #     tp <- sum((yval == 1) & (predictions == 1))
51 | #     fp <- sum((yval == 0) & (predictions == 1))
52 | #     fn <- sum((yval == 1) & (predictions == 0))
53 | #     
54 | #     prec <- tp / (tp + fp)
55 | #     rec  <- tp / (tp + fn)
56 | #     
57 | #     F1 <- 2 * prec * rec / (prec + rec)
58 | #     if(is.na(F1))
59 | #       F1 <- -1000
60 | #     F1
61 | #   }
62 | # }
63 | # a1 <- selectThreshold(yval, pval)
64 | # sT <- selectThreshold2(yval, pval)
65 | # stepsize <- (max(pval) - min(pval)) / 1000
66 | # op<-optim(par=stepsize,fn = sT ,method = "Brent",lower = stepsize,upper = max(pval),
67 | #           control = list(ndeps=stepsize ,fnscale=-1))
68 | # 
69 | # op2 <- optimise(f = sT, interval = c(0, max(pval)),maximum = TRUE) #Brent
70 | # 
71 | # op<-optim(par=0,fn = sT ,method = "L-BFGS-B",lower = min(pval),upper = max(pval),
72 | #           control = list(fnscale=-1))
73 | 


--------------------------------------------------------------------------------
/8/selectThreshold.r:
--------------------------------------------------------------------------------
 1 | selectThreshold <- function(yval, pval) {
 2 |   #SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting
 3 |   #outliers
 4 |   #   bestEpsilon_bestF1 <- SELECTTHRESHOLD(yval, pval) finds the best
 5 |   #   threshold to use for selecting outliers based on the results from a
 6 |   #   validation set (pval) and the ground truth (yval).
 7 |   #
 8 |   
 9 |   bestEpsilon <- 0
10 |   bestF1 <- 0
11 |   F1 <- 0
12 |   
13 |   stepsize <- (max(pval) - min(pval)) / 1000
14 |   for (epsilon in seq(min(pval),max(pval),stepsize)) {
15 |     # ----------------------- YOUR CODE HERE -----------------------
16 |     # Instructions: Compute the F1 score of choosing epsilon as the
17 |     #               threshold and place the value in F1. The code at the
18 |     #               end of the loop will compare the F1 score for this
19 |     #               choice of epsilon and set it to be the best epsilon if
20 |     #               it is better than the current choice of epsilon.
21 |     #
22 |     # Note: You can use predictions <- (pval < epsilon) to get a binary vector
23 |     #       of 0's and 1's of the outlier predictions
24 |     
25 | 
26 |     
27 |     # --------------------------------------------------------------
28 |     
29 |     if (!is.na(F1) && !is.na(bestF1) && F1 > bestF1) {
30 |       bestF1 <- F1
31 |       bestEpsilon <- epsilon
32 |     }
33 |   }
34 |   list(bestEpsilon = bestEpsilon, bestF1 = bestF1)
35 |   
36 | }
37 | 


--------------------------------------------------------------------------------
/8/submit.r:
--------------------------------------------------------------------------------
 1 | submit = function(){
 2 |   source("../lib/submitWithConfiguration.r")
 3 |   
 4 |   conf = list()
 5 |   conf$assignmentSlug = 'anomaly-detection-and-recommender-systems'
 6 |   conf$itemName = 'Anomaly Detection and Recommender Systems'
 7 |   conf$partArrays = c(     
 8 |     '1', 
 9 |     'estimateGaussian.r' , 
10 |     'Estimate Gaussian Parameters', 
11 |     '2', 
12 |     'selectThreshold.r' , 
13 |     'Select Threshold', 
14 |     '3', 
15 |     'cofiCostFunc.r' , 
16 |     'Collaborative Filtering Cost', 
17 |     '4', 
18 |     'cofiCostFunc.r' , 
19 |     'Collaborative Filtering Gradient', 
20 |     '5', 
21 |     'cofiCostFunc.r' , 
22 |     'Regularized Cost', 
23 |     '6', 
24 |     'cofiCostFunc.r' , 
25 |     'Regularized Gradient')
26 |   
27 |   conf$partArrays = matrix(conf$partArrays, ncol =  3 , byrow = T)
28 |   
29 |   conf$output = output
30 | 
31 |   submitWithConfiguration(conf)
32 | }
33 | 
34 | output = function(partId, auxstring) {  
35 |   # Random Test Cases
36 |   n_u = 3; n_m = 4; n = 5;
37 |   X = matrix(sin(seq(1,n_m*n)), n_m, n)
38 |   Theta = matrix(cos(seq(1,n_u*n)), n_u, n)
39 |   Y = matrix(sin(seq(1,2*n_m*n_u,2)), n_m, n_u)
40 |   R = (Y > 0.5)
41 |   pval = c(abs(c(Y)),0.001,1)
42 |   yval = c(c(R), 1, 0)
43 |   params = c(c(X),c(Theta))
44 |   
45 |   source("estimateGaussian.r")
46 |   source("selectThreshold.r")
47 |   source("cofiCostFunc.r")
48 |   
49 |   if (partId == '1'){
50 |     eg = estimateGaussian(X)
51 |     out = paste0(sprintf('%0.5f ', c(eg$mu,eg$sigma2)), collapse = '')
52 |   }else if (partId == '2'){
53 |     stt = selectThreshold(yval, pval)
54 |     out = paste0(sprintf('%0.5f ', c(stt$bestEpsilon,stt$bestF1)), collapse = '')
55 |   }else if (partId == '3'){
56 |     J = cofiCostFunc(Y, R, n_u, n_m, n, 0)(params)
57 |     out = sprintf('%0.5f ', c(J))
58 |   }else if (partId == '4'){
59 |     grad = cofiGradFunc(Y, R, n_u, n_m, n, 0)(params)
60 |     out = paste0(sprintf('%0.5f ', c(grad)),collapse = '')
61 |   }else if (partId == '5'){
62 |     J = cofiCostFunc(Y, R, n_u, n_m, n, 1.5)(params)
63 |     out = sprintf('%0.5f ', c(J))
64 |   }else if (partId == '6'){
65 |     grad = cofiGradFunc( Y, R, n_u, n_m, n, 1.5)(params)
66 |     out = paste0(sprintf('%0.5f ', c(grad)), collapse = '')
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/8/visualizeFit-solution.r:
--------------------------------------------------------------------------------
 1 | visualizeFit <- function (X, mu, sigma2) {
 2 |   #VISUALIZEFIT Visualize the dataset and its estimated distribution.
 3 |   #   VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the
 4 |   #   probability density function of the Gaussian distribution. Each example
 5 |   #   has a location (x1, x2) that depends on its feature values.
 6 |   #
 7 |   
 8 |   #X <- seq(1,5,1)
 9 |   #Y <- seq(6,11)
10 |   XY <- seq(0,35,.5)
11 |   
12 |   mg <- meshgrid(XY, XY)
13 |   
14 |   X1 <- c(mg$X)
15 |   X2 <- c(mg$Y)
16 |   
17 |   Z <- multivariateGaussian(cbind(X1,X2),mu,sigma2)
18 |   Z <- matrix(Z,length(XY),length(XY))
19 |   
20 |   plot(
21 |     X[,1], X[,2], xlim = c(0,30),ylim = c(0,30), pch = 4, col = "blue",
22 |     xlab = 'Latency (ms)', ylab = 'Throughput (mb/s)'
23 |   )
24 |   
25 |   # Do not plot if there are infinities
26 |   if (sum(is.infinite(Z)) == 0)
27 |     contour(
28 |       XY, XY, Z, levels = 10 ^ t(seq(-20,0,3)) ,add = TRUE, drawlabels = FALSE
29 |     )
30 |   
31 | }


--------------------------------------------------------------------------------
/8/visualizeFit.r:
--------------------------------------------------------------------------------
 1 | visualizeFit <- function (X, mu, sigma2) {
 2 |   #VISUALIZEFIT Visualize the dataset and its estimated distribution.
 3 |   #   VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the
 4 |   #   probability density function of the Gaussian distribution. Each example
 5 |   #   has a location (x1, x2) that depends on its feature values.
 6 |   #
 7 |   
 8 |   XY <- seq(0,35,.5)
 9 |   
10 |   mg <- meshgrid(XY, XY)
11 |   X1 <- mg$X
12 |   X2 <- mg$Y
13 |   
14 |   X1 <- matrix(X1, length(X1), 1)
15 |   X2 <- matrix(X2, length(X2), 1)
16 |   
17 |   Z <- multivariateGaussian(cbind(X1,X2),mu,sigma2)
18 |   Z <- matrix(Z,length(XY),length(XY))
19 |   
20 |   
21 |   plot(
22 |     X[,1], X[,2], xlim = c(0,30),ylim = c(0,30), pch = 4, col = "blue",
23 |     xlab = 'Latency (ms)', ylab = 'Throughput (mb/s)'
24 |   )
25 |   
26 |   # Do not plot if there are infinities
27 |   if (sum(is.infinite(Z)) == 0)
28 |     contour(
29 |       XY, XY, Z, levels = 10 ^ t(seq(-20,0,3)) ,add = TRUE, drawlabels = FALSE
30 |     )
31 |   
32 | }


--------------------------------------------------------------------------------
/figs/p01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faridcher/ml-course/65075c5427725bf2caeb25e5c6809ee34fcfdbe0/figs/p01.png


--------------------------------------------------------------------------------
/lib/bsxfun.r:
--------------------------------------------------------------------------------
 1 | bsxfun <- function (func, x, y){
 2 |   dim_fix <- function(x,y) {
 3 |     # y is always with the smaller dimension
 4 |     
 5 |     d <- dim(x)[!(dim(x) %in% dim(y))]
 6 |     if (is.na(d[1]))
 7 |       d <- dim(x)[1]
 8 |     ifelse(dim(y)[1] == 1,
 9 |            y <- repmat(y,d,1),
10 |            y <- repmat(y,1,d))
11 |     
12 |     if (dim(x)[1] != dim(y)[1])
13 |       y <- t(y)
14 |     
15 |     return(y)
16 |   }
17 |   
18 |   x <- as.matrix(x)
19 |   y <- as.matrix(y)
20 |   
21 |   stopifnot(is.numeric(x), is.numeric(y))
22 |   dx <- dim(x)
23 |   dy <- dim(y)
24 |   
25 |   if (!all(dx == dy))
26 |     ifelse (sum(dx) < sum(dy), x <- dim_fix(y,x), y <- dim_fix(x,y))
27 |   
28 |   dx <- dim(x)
29 |   dy <- dim(y)
30 |   if (is.vector(x) && is.vector(y)) {
31 |     z <- mapply(func, x, y)
32 |   }
33 |   else if (is.array(x) && is.array(y) && all(dx == dy)) {
34 |     z <- mapply(func, x, y)
35 |     dim(z) <- dx
36 |   }
37 |   else {
38 |     stop("Argument 'x', 'y' must be vectors or arrays of the same size.")
39 |   }
40 |   return(z)
41 | }
42 | 
43 | repmat <- function (a, n, m = n) {
44 |   if (length(a) == 0)
45 |     return(c())
46 |   if (!is.numeric(a) && !is.complex(a))
47 |     stop("Argument 'a' must be a numeric or complex.")
48 |   if (is.vector(a))
49 |     a <- matrix(a, nrow = 1, ncol = length(a))
50 |   if (!is.numeric(n) || !is.numeric(m) || length(n) != 1 ||
51 |       length(m) != 1)
52 |     stop("Arguments 'n' and 'm' must be single integers.")
53 |   n <- max(floor(n), 0)
54 |   m <- max(floor(m), 0)
55 |   if (n <= 0 || m <= 0)
56 |     return(matrix(0, nrow = n, ncol = m))
57 |   matrix(1, n, m) %x% a
58 | }
59 | 


--------------------------------------------------------------------------------
/lib/meshgrid.r:
--------------------------------------------------------------------------------
 1 | meshgrid <- function (x, y = x)
 2 | {
 3 |   if (!is.numeric(x) || !is.numeric(y))
 4 |     stop("Arguments 'x' and 'y' must be numeric vectors.")
 5 |   x <- c(x)
 6 |   y <- c(y)
 7 |   n <- length(x)
 8 |   m <- length(y)
 9 |   X <- matrix(rep(x, each = m), nrow = m, ncol = n)
10 |   Y <- matrix(rep(y, times = n), nrow = m, ncol = n)
11 |   return(list(X = X, Y = Y))
12 | }
13 | 


--------------------------------------------------------------------------------
/lib/pinv.r:
--------------------------------------------------------------------------------
 1 | pinv <-
 2 |   function (X, tol = max(dim(X)) * max(X) * .Machine$double.eps)
 3 |   {
 4 |     if (length(dim(X)) > 2L || !(is.numeric(X) || is.complex(X)))
 5 |       stop("'X' must be a numeric or complex matrix")
 6 |     if (!is.matrix(X))
 7 |       X <- as.matrix(X)
 8 |     Xsvd <- svd(X)
 9 |     if (is.complex(X))
10 |       Xsvd$u <- Conj(Xsvd$u)
11 |     Positive <- any(Xsvd$d > max(tol * Xsvd$d[1L], 0))
12 |     if (Positive)
13 |       Xsvd$v %*% (1 / Xsvd$d * t(Xsvd$u))
14 |     else
15 |       array(0, dim(X)[2L:1L])
16 |   }


--------------------------------------------------------------------------------
/lib/sigmoid.r:
--------------------------------------------------------------------------------
1 | sigmoid <- function(z) {
2 |   g <- matrix(0,dim(as.matrix(z)))
3 |   1 / (1 + exp(-1 * z))
4 | }
5 | 


--------------------------------------------------------------------------------
/lib/submitWithConfiguration.r:
--------------------------------------------------------------------------------
  1 | submitWithConfiguration = function(conf) {
  2 |   
  3 |   cat(sprintf('== Submitting solutions | %s... \n', conf$itemName))
  4 | 
  5 |   tokenFile = 'lib/token.rda';
  6 |   if (file.exists(tokenFile)){
  7 |     load(tokenFile)
  8 |     ptt = promptToken(ptt$email, ptt$token, tokenFile)
  9 |     email = ptt$email
 10 |     token = ptt$token
 11 |   }else{
 12 |     ptt = promptToken('', '', tokenFile)
 13 |     email = ptt$email
 14 |     token = ptt$token
 15 |   }
 16 | 
 17 |   if (token==''){
 18 |     cat('!! Submission Cancelled')
 19 |     return
 20 |   }
 21 |   
 22 |   response <<- tryCatch(
 23 |     {
 24 |       submitParts(conf, email, token)
 25 |     }, 
 26 |     error = function(e)
 27 |     {
 28 |       cat(sprintf('!! Submission failed: unexpected error: %s\n',e$message))
 29 |       cat(sprintf('!! Please try again later.'))
 30 |       return
 31 |     }
 32 |   )
 33 |   
 34 |   if (is.null(response)){
 35 |     return
 36 |   }else if (!is.null(response$errorMessage)){
 37 |     cat(sprintf('!! Submission failed: %s \n', response$errorMessage))
 38 |   }else{
 39 |     showFeedback(conf$partArrays, response)
 40 |     save(ptt,file=tokenFile )
 41 |   }
 42 | }
 43 | 
 44 | promptToken <- function (email, existingToken, tokenFile){
 45 |   if (email!='' && existingToken!=''){
 46 |     cat(sprintf('Use token from last successful submission (%s)? (Y/n): ', email))
 47 |     
 48 |     reenter = readLines(con = stdin(),1)
 49 |     if (reenter=='' || reenter == 'Y' || reenter == 'y'){
 50 |       #token = existingToken
 51 |       return (list(email=email, token=existingToken))
 52 |     } else
 53 |       file.remove(tokenFile)
 54 |   }
 55 |   cat('Login (email address): ')
 56 |   email = readLines(con = stdin(),1)
 57 |   cat('token:')
 58 |   token = readLines(con = stdin(),1)
 59 |   list(email=email, token=token)
 60 | }
 61 | 
 62 | submitParts <- function(conf, email, token){
 63 |   bodys = makePostBody(conf, email, token)
 64 |   submissionUrl = submissionUrl()
 65 |   #params = {'jsonBody', body}
 66 |   
 67 |   js = jsonlite::toJSON(bodys)
 68 |   js = gsub('[\\[\\]]','',js,perl = T)
 69 |   params = list(jsonBody=js)
 70 |   r = httr::POST(submissionUrl, body = params, encode = "form")
 71 |   response=httr::content(r,as = "parsed")
 72 | }
 73 | 
 74 | makePostBody = function(conf, email, token){
 75 |   bodyStruct = list()
 76 |   bodyStruct$assignmentSlug = conf$assignmentSlug
 77 |   bodyStruct$submitterEmail = email
 78 |   bodyStruct$secret = token
 79 |   bodyStruct$parts = makePartsStruct(conf)
 80 |   bodyStruct
 81 | }
 82 | 
 83 | makePartsStruct = function(conf){
 84 |   parts = list()
 85 |   for (row in 1:nrow(conf$partArrays)){
 86 |     partId = conf$partArrays[row,1]
 87 |     output1 = conf$output(partId)
 88 |     parts[[partId]] <- list(output=output1)
 89 |   }
 90 |   parts
 91 | }
 92 | 
 93 | showFeedback <- function (parts, response){
 94 |   cat('== \n')
 95 |   cat(sprintf('== %43s | %9s | %-s\n', 'Part Name', 'Score', 'Feedback'))
 96 |   cat(sprintf('== %43s | %9s | %-s\n', '---------', '-----', '--------'))
 97 |   for (i in 1:nrow(parts)){
 98 |     ii <- which(names(response$partFeedbacks) == parts[i,1])
 99 |     partFeedback <- response$partFeedbacks[[ii]]
100 |     partEvaluation <- response$partEvaluations[[ii]]
101 |     score <- sprintf('%d / %3d', partEvaluation$score, partEvaluation$maxScore)
102 |     cat(sprintf('== %43s | %9s | %-s\n', parts[i,3], score, partFeedback))
103 |   }
104 |   evaluation <- response$evaluation
105 |   totalScore <- sprintf('%d / %d', evaluation$score, evaluation$maxScore)
106 |   cat(sprintf('==                                   --------------------------------\n'))
107 |   cat(sprintf('== %43s | %9s | %-s\n', '', totalScore, ''))
108 |   cat('== ')
109 | }
110 | 
111 | ###############################################################################
112 | #
113 | # Service configuration
114 | #
115 | ###############################################################################
116 | submissionUrl = function(){
117 |   submissionUrl = 'https://www-origin.coursera.org/api/onDemandProgrammingImmediateFormSubmissions.v1';
118 | }
119 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | ## Introduction
 2 | This is one of the best massive open online courses (MOOCs) on machine learning and is taught by Prof. Andrew NG. However, Prof. NG teaches the course along with MATLAB/Octave and the programming exercises must be done and submitted with either of them. Do you like the course but not the proprietary MATLAB or the sluggish Octave? Or for any reason, would you rather to use the free GNU R to complete the programming exercises?
 3 | 
 4 | To watch the lecture videos and slides please visit the [course original website](https://www.coursera.org/learn/machine-learning). This repository provides the starter code to solve the programming exercises in R statistical software. Simply follow these steps to complete the programming exercises:
 5 | 
 6 | 1. Watch the lecture videos
 7 | 2. Read R compatible version of instructions which are available as [wiki](https://github.com/faridcher/machine-learning-course/wiki) pages
 8 | 3. fill the parts of the code that are written "YOUR CODE HERE"
 9 | 4. If you couldn't solve it by yourself, get help from the accompanied file suffixed by `-solution` inside the same directory of the starter code. For example, `starter/ex1/computeCost.r` has an associated solution file named `starter/ex1/computeCost-solution.r`
10 | 5. Submit
11 | 
12 | ## Dependencies
13 | In order to produce similar results and plots to Octave/MATLAB, you should install a few packages (`install.packages(c('rgl','lbfgsb3c','SnowballC','jsonlite', 'httr'))`):
14 | 
15 | - `rgl` package is used to produce the 3D scatter plots and surface plots in the exercises 1 and 7.
16 | - `lbfgsb3c`: to solve large optimization tasks in exercises 4 and 8
17 | - `SnowballC`: `portStemmer` function in this package plays the same role of the `portStemmer.m` in exercise 6
18 | - `jsonlite` and `httr` packages are needed for submissions.
19 | 
20 | Furthermore, the `ginv` (generalized inverse) function in `MASS` package doesn't produce the same result of the MATLAB `pinv` (pseudo-inverse). So `lib/pinv.r` is the modified version of `MASS::ginv` to produce the same result of the MATLAB `pinv`. 
21 | 
22 | ## Submission
23 | After completing each assignment, `source("submit.r")`  and then `submit()` in your R console. I submitted the solutions to Coursera for testing and the scores were 100%. Please report any problems with submission here. 
24 | 
25 | ## Licence
26 | This project is released under MIT to the extent it is original.
27 | 


--------------------------------------------------------------------------------