├── 1.linear_regression ├── ex1.linear_regreesion.ipynb ├── ex1data1.txt └── ex1data2.txt ├── 2.logistic_regression ├── ex2.logistic_regression.ipynb ├── ex2data1.txt ├── ex2data2.txt └── sample_for_scipy.optimize.minimize.ipynb ├── 3.multi-class_classification_and_neural_network ├── ex3.Multi-class_Classification.ipynb ├── ex3.neural_network.ipynb ├── ex3data1.mat ├── ex3weights.mat └── img │ └── nn_model.png ├── 4.nurual_network_back_propagation ├── ex4.nerual_network_backpropagation.ipynb ├── ex4data1.mat ├── ex4weights.mat └── img │ └── nn_model.png ├── 5.bias_vs_variance ├── ex5.bias_vs_variance.ipynb └── ex5data1.mat ├── 6.svm ├── data │ ├── emailSample1.txt │ ├── emailSample2.txt │ ├── ex6data1.mat │ ├── ex6data2.mat │ ├── ex6data3.mat │ ├── spamSample1.txt │ ├── spamSample2.txt │ ├── spamTest.mat │ ├── spamTrain.mat │ └── vocab.txt └── ex6.SVM.ipynb ├── 7.kmeans_and_PCA ├── data │ ├── bird_small.mat │ ├── bird_small.png │ ├── ex7data1.mat │ ├── ex7data2.mat │ └── ex7faces.mat └── ex7.K-means_and_PCA.ipynb ├── 8.anomaly_detection_and_recommendation ├── data │ ├── ex8_movieParams.mat │ ├── ex8_movies.mat │ ├── ex8data1.mat │ ├── ex8data2.mat │ └── movie_ids.txt ├── ex8.Anomaly_Detection_and_Recommendation.ipynb └── img │ ├── rcmd_cost.png │ ├── rcmd_gradient.png │ ├── rcmd_reg_grad.png │ └── rcmd_vectorized_grad.png ├── Machine_Learning_Assignment(ex1-ex8).pdf ├── README.md └── original-machine-learning-MATLAB ├── ex1 ├── computeCost.m ├── computeCostMulti.m ├── ex1.m ├── ex1_multi.m ├── ex1data1.txt ├── ex1data2.txt ├── featureNormalize.m ├── gradientDescent.m ├── gradientDescentMulti.m ├── normalEqn.m ├── plotData.m └── warmUpExercise.m ├── ex2 ├── costFunction.m ├── costFunctionReg.m ├── ex2.m ├── ex2_reg.m ├── ex2data1.txt ├── ex2data2.txt ├── mapFeature.m ├── plotData.m ├── plotDecisionBoundary.m ├── predict.m └── sigmoid.m ├── ex3 ├── displayData.m ├── ex3.m ├── ex3_nn.m ├── ex3data1.mat ├── ex3weights.mat ├── fmincg.m ├── lrCostFunction.m ├── oneVsAll.m ├── predict.m ├── predictOneVsAll.m └── sigmoid.m ├── ex4 ├── 0.png ├── 1.png ├── 3.png ├── 4.png ├── 7.png ├── 8.png ├── checkNNGradients.m ├── computeNumericalGradient.m ├── debugInitializeWeights.m ├── displayData.m ├── ex4.m ├── ex4data1.mat ├── ex4weights.mat ├── fmincg.m ├── nnCostFunction.m ├── predict.m ├── randInitializeWeights.m ├── sigmoid.m └── sigmoidGradient.m ├── ex5 ├── ex5.m ├── ex5data1.mat ├── featureNormalize.m ├── fmincg.m ├── learningCurve.m ├── linearRegCostFunction.m ├── plotFit.m ├── polyFeatures.m ├── trainLinearReg.m └── validationCurve.m ├── ex6 ├── dataset3Params.m ├── emailFeatures.m ├── emailSample1.txt ├── emailSample2.txt ├── ex6.m ├── ex6_spam.m ├── ex6data1.mat ├── ex6data2.mat ├── ex6data3.mat ├── gaussianKernel.m ├── getVocabList.m ├── linearKernel.m ├── plotData.m ├── porterStemmer.m ├── processEmail.m ├── readFile.m ├── spamSample1.txt ├── spamSample2.txt ├── spamTest.mat ├── spamTrain.mat ├── svmPredict.m ├── svmTrain.m ├── visualizeBoundary.m ├── visualizeBoundaryLinear.m └── vocab.txt ├── ex7 ├── bird_small.mat ├── bird_small.png ├── computeCentroids.m ├── displayData.m ├── drawLine.m ├── ex7.m ├── ex7_pca.m ├── ex7data1.mat ├── ex7data2.mat ├── ex7faces.mat ├── featureNormalize.m ├── findClosestCentroids.m ├── kMeansInitCentroids.m ├── pca.m ├── plotDataPoints.m ├── plotProgresskMeans.m ├── projectData.m ├── recoverData.m └── runkMeans.m └── ex8 ├── checkCostFunction.m ├── cofiCostFunc.m ├── computeNumericalGradient.m ├── estimateGaussian.m ├── ex8.m ├── ex8_cofi.m ├── ex8_movieParams.mat ├── ex8_movies.mat ├── ex8data1.mat ├── ex8data2.mat ├── fmincg.m ├── loadMovieList.m ├── movie_ids.txt ├── multivariateGaussian.m ├── normalizeRatings.m ├── selectThreshold.m └── visualizeFit.m /1.linear_regression/ex1data1.txt: -------------------------------------------------------------------------------- 1 | 6.1101,17.592 2 | 5.5277,9.1302 3 | 8.5186,13.662 4 | 7.0032,11.854 5 | 5.8598,6.8233 6 | 8.3829,11.886 7 | 7.4764,4.3483 8 | 8.5781,12 9 | 6.4862,6.5987 10 | 5.0546,3.8166 11 | 5.7107,3.2522 12 | 14.164,15.505 13 | 5.734,3.1551 14 | 8.4084,7.2258 15 | 5.6407,0.71618 16 | 5.3794,3.5129 17 | 6.3654,5.3048 18 | 5.1301,0.56077 19 | 6.4296,3.6518 20 | 7.0708,5.3893 21 | 6.1891,3.1386 22 | 20.27,21.767 23 | 5.4901,4.263 24 | 6.3261,5.1875 25 | 5.5649,3.0825 26 | 18.945,22.638 27 | 12.828,13.501 28 | 10.957,7.0467 29 | 13.176,14.692 30 | 22.203,24.147 31 | 5.2524,-1.22 32 | 6.5894,5.9966 33 | 9.2482,12.134 34 | 5.8918,1.8495 35 | 8.2111,6.5426 36 | 7.9334,4.5623 37 | 8.0959,4.1164 38 | 5.6063,3.3928 39 | 12.836,10.117 40 | 6.3534,5.4974 41 | 5.4069,0.55657 42 | 6.8825,3.9115 43 | 11.708,5.3854 44 | 5.7737,2.4406 45 | 7.8247,6.7318 46 | 7.0931,1.0463 47 | 5.0702,5.1337 48 | 5.8014,1.844 49 | 11.7,8.0043 50 | 5.5416,1.0179 51 | 7.5402,6.7504 52 | 5.3077,1.8396 53 | 7.4239,4.2885 54 | 7.6031,4.9981 55 | 6.3328,1.4233 56 | 6.3589,-1.4211 57 | 6.2742,2.4756 58 | 5.6397,4.6042 59 | 9.3102,3.9624 60 | 9.4536,5.4141 61 | 8.8254,5.1694 62 | 5.1793,-0.74279 63 | 21.279,17.929 64 | 14.908,12.054 65 | 18.959,17.054 66 | 7.2182,4.8852 67 | 8.2951,5.7442 68 | 10.236,7.7754 69 | 5.4994,1.0173 70 | 20.341,20.992 71 | 10.136,6.6799 72 | 7.3345,4.0259 73 | 6.0062,1.2784 74 | 7.2259,3.3411 75 | 5.0269,-2.6807 76 | 6.5479,0.29678 77 | 7.5386,3.8845 78 | 5.0365,5.7014 79 | 10.274,6.7526 80 | 5.1077,2.0576 81 | 5.7292,0.47953 82 | 5.1884,0.20421 83 | 6.3557,0.67861 84 | 9.7687,7.5435 85 | 6.5159,5.3436 86 | 8.5172,4.2415 87 | 9.1802,6.7981 88 | 6.002,0.92695 89 | 5.5204,0.152 90 | 5.0594,2.8214 91 | 5.7077,1.8451 92 | 7.6366,4.2959 93 | 5.8707,7.2029 94 | 5.3054,1.9869 95 | 8.2934,0.14454 96 | 13.394,9.0551 97 | 5.4369,0.61705 98 | -------------------------------------------------------------------------------- /1.linear_regression/ex1data2.txt: -------------------------------------------------------------------------------- 1 | 2104,3,399900 2 | 1600,3,329900 3 | 2400,3,369000 4 | 1416,2,232000 5 | 3000,4,539900 6 | 1985,4,299900 7 | 1534,3,314900 8 | 1427,3,198999 9 | 1380,3,212000 10 | 1494,3,242500 11 | 1940,4,239999 12 | 2000,3,347000 13 | 1890,3,329999 14 | 4478,5,699900 15 | 1268,3,259900 16 | 2300,4,449900 17 | 1320,2,299900 18 | 1236,3,199900 19 | 2609,4,499998 20 | 3031,4,599000 21 | 1767,3,252900 22 | 1888,2,255000 23 | 1604,3,242900 24 | 1962,4,259900 25 | 3890,3,573900 26 | 1100,3,249900 27 | 1458,3,464500 28 | 2526,3,469000 29 | 2200,3,475000 30 | 2637,3,299900 31 | 1839,2,349900 32 | 1000,1,169900 33 | 2040,4,314900 34 | 3137,3,579900 35 | 1811,4,285900 36 | 1437,3,249900 37 | 1239,3,229900 38 | 2132,4,345000 39 | 4215,4,549000 40 | 2162,4,287000 41 | 1664,2,368500 42 | 2238,3,329900 43 | 2567,4,314000 44 | 1200,3,299000 45 | 852,2,179900 46 | 1852,4,299900 47 | 1203,3,239500 48 | -------------------------------------------------------------------------------- /2.logistic_regression/ex2data1.txt: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | -------------------------------------------------------------------------------- /2.logistic_regression/ex2data2.txt: -------------------------------------------------------------------------------- 1 | 0.051267,0.69956,1 2 | -0.092742,0.68494,1 3 | -0.21371,0.69225,1 4 | -0.375,0.50219,1 5 | -0.51325,0.46564,1 6 | -0.52477,0.2098,1 7 | -0.39804,0.034357,1 8 | -0.30588,-0.19225,1 9 | 0.016705,-0.40424,1 10 | 0.13191,-0.51389,1 11 | 0.38537,-0.56506,1 12 | 0.52938,-0.5212,1 13 | 0.63882,-0.24342,1 14 | 0.73675,-0.18494,1 15 | 0.54666,0.48757,1 16 | 0.322,0.5826,1 17 | 0.16647,0.53874,1 18 | -0.046659,0.81652,1 19 | -0.17339,0.69956,1 20 | -0.47869,0.63377,1 21 | -0.60541,0.59722,1 22 | -0.62846,0.33406,1 23 | -0.59389,0.005117,1 24 | -0.42108,-0.27266,1 25 | -0.11578,-0.39693,1 26 | 0.20104,-0.60161,1 27 | 0.46601,-0.53582,1 28 | 0.67339,-0.53582,1 29 | -0.13882,0.54605,1 30 | -0.29435,0.77997,1 31 | -0.26555,0.96272,1 32 | -0.16187,0.8019,1 33 | -0.17339,0.64839,1 34 | -0.28283,0.47295,1 35 | -0.36348,0.31213,1 36 | -0.30012,0.027047,1 37 | -0.23675,-0.21418,1 38 | -0.06394,-0.18494,1 39 | 0.062788,-0.16301,1 40 | 0.22984,-0.41155,1 41 | 0.2932,-0.2288,1 42 | 0.48329,-0.18494,1 43 | 0.64459,-0.14108,1 44 | 0.46025,0.012427,1 45 | 0.6273,0.15863,1 46 | 0.57546,0.26827,1 47 | 0.72523,0.44371,1 48 | 0.22408,0.52412,1 49 | 0.44297,0.67032,1 50 | 0.322,0.69225,1 51 | 0.13767,0.57529,1 52 | -0.0063364,0.39985,1 53 | -0.092742,0.55336,1 54 | -0.20795,0.35599,1 55 | -0.20795,0.17325,1 56 | -0.43836,0.21711,1 57 | -0.21947,-0.016813,1 58 | -0.13882,-0.27266,1 59 | 0.18376,0.93348,0 60 | 0.22408,0.77997,0 61 | 0.29896,0.61915,0 62 | 0.50634,0.75804,0 63 | 0.61578,0.7288,0 64 | 0.60426,0.59722,0 65 | 0.76555,0.50219,0 66 | 0.92684,0.3633,0 67 | 0.82316,0.27558,0 68 | 0.96141,0.085526,0 69 | 0.93836,0.012427,0 70 | 0.86348,-0.082602,0 71 | 0.89804,-0.20687,0 72 | 0.85196,-0.36769,0 73 | 0.82892,-0.5212,0 74 | 0.79435,-0.55775,0 75 | 0.59274,-0.7405,0 76 | 0.51786,-0.5943,0 77 | 0.46601,-0.41886,0 78 | 0.35081,-0.57968,0 79 | 0.28744,-0.76974,0 80 | 0.085829,-0.75512,0 81 | 0.14919,-0.57968,0 82 | -0.13306,-0.4481,0 83 | -0.40956,-0.41155,0 84 | -0.39228,-0.25804,0 85 | -0.74366,-0.25804,0 86 | -0.69758,0.041667,0 87 | -0.75518,0.2902,0 88 | -0.69758,0.68494,0 89 | -0.4038,0.70687,0 90 | -0.38076,0.91886,0 91 | -0.50749,0.90424,0 92 | -0.54781,0.70687,0 93 | 0.10311,0.77997,0 94 | 0.057028,0.91886,0 95 | -0.10426,0.99196,0 96 | -0.081221,1.1089,0 97 | 0.28744,1.087,0 98 | 0.39689,0.82383,0 99 | 0.63882,0.88962,0 100 | 0.82316,0.66301,0 101 | 0.67339,0.64108,0 102 | 1.0709,0.10015,0 103 | -0.046659,-0.57968,0 104 | -0.23675,-0.63816,0 105 | -0.15035,-0.36769,0 106 | -0.49021,-0.3019,0 107 | -0.46717,-0.13377,0 108 | -0.28859,-0.060673,0 109 | -0.61118,-0.067982,0 110 | -0.66302,-0.21418,0 111 | -0.59965,-0.41886,0 112 | -0.72638,-0.082602,0 113 | -0.83007,0.31213,0 114 | -0.72062,0.53874,0 115 | -0.59389,0.49488,0 116 | -0.48445,0.99927,0 117 | -0.0063364,0.99927,0 118 | 0.63265,-0.030612,0 119 | -------------------------------------------------------------------------------- /3.multi-class_classification_and_neural_network/ex3data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/3.multi-class_classification_and_neural_network/ex3data1.mat -------------------------------------------------------------------------------- /3.multi-class_classification_and_neural_network/ex3weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/3.multi-class_classification_and_neural_network/ex3weights.mat -------------------------------------------------------------------------------- /3.multi-class_classification_and_neural_network/img/nn_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/3.multi-class_classification_and_neural_network/img/nn_model.png -------------------------------------------------------------------------------- /4.nurual_network_back_propagation/ex4data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/4.nurual_network_back_propagation/ex4data1.mat -------------------------------------------------------------------------------- /4.nurual_network_back_propagation/ex4weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/4.nurual_network_back_propagation/ex4weights.mat -------------------------------------------------------------------------------- /4.nurual_network_back_propagation/img/nn_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/4.nurual_network_back_propagation/img/nn_model.png -------------------------------------------------------------------------------- /5.bias_vs_variance/ex5data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/5.bias_vs_variance/ex5data1.mat -------------------------------------------------------------------------------- /6.svm/data/emailSample1.txt: -------------------------------------------------------------------------------- 1 | > Anyone knows how much it costs to host a web portal ? 2 | > 3 | Well, it depends on how many visitors you're expecting. 4 | This can be anywhere from less than 10 bucks a month to a couple of $100. 5 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 6 | if youre running something big.. 7 | 8 | To unsubscribe yourself from this mailing list, send an email to: 9 | groupname-unsubscribe@egroups.com 10 | 11 | -------------------------------------------------------------------------------- /6.svm/data/emailSample2.txt: -------------------------------------------------------------------------------- 1 | Folks, 2 | 3 | my first time posting - have a bit of Unix experience, but am new to Linux. 4 | 5 | 6 | Just got a new PC at home - Dell box with Windows XP. Added a second hard disk 7 | for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went 8 | fine except it didn't pick up my monitor. 9 | 10 | I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4 11 | Ti4200 video card, both of which are probably too new to feature in Suse's default 12 | set. I downloaded a driver from the nVidia website and installed it using RPM. 13 | Then I ran Sax2 (as was recommended in some postings I found on the net), but 14 | it still doesn't feature my video card in the available list. What next? 15 | 16 | Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice, 17 | the whole machine crashes (in Linux, not Windows) - even the on/off switch is 18 | inactive, leaving me to reach for the power cable instead. 19 | 20 | If anyone can help me in any way with these probs., I'd be really grateful - 21 | I've searched the 'net but have run out of ideas. 22 | 23 | Or should I be going for a different version of Linux such as RedHat? Opinions 24 | welcome. 25 | 26 | Thanks a lot, 27 | Peter 28 | 29 | -- 30 | Irish Linux Users' Group: ilug@linux.ie 31 | http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information. 32 | List maintainer: listmaster@linux.ie 33 | 34 | 35 | -------------------------------------------------------------------------------- /6.svm/data/ex6data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/6.svm/data/ex6data1.mat -------------------------------------------------------------------------------- /6.svm/data/ex6data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/6.svm/data/ex6data2.mat -------------------------------------------------------------------------------- /6.svm/data/ex6data3.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/6.svm/data/ex6data3.mat -------------------------------------------------------------------------------- /6.svm/data/spamSample1.txt: -------------------------------------------------------------------------------- 1 | Do You Want To Make $1000 Or More Per Week? 2 | 3 | 4 | 5 | If you are a motivated and qualified individual - I 6 | will personally demonstrate to you a system that will 7 | make you $1,000 per week or more! This is NOT mlm. 8 | 9 | 10 | 11 | Call our 24 hour pre-recorded number to get the 12 | details. 13 | 14 | 15 | 16 | 000-456-789 17 | 18 | 19 | 20 | I need people who want to make serious money. Make 21 | the call and get the facts. 22 | 23 | Invest 2 minutes in yourself now! 24 | 25 | 26 | 27 | 000-456-789 28 | 29 | 30 | 31 | Looking forward to your call and I will introduce you 32 | to people like yourself who 33 | are currently making $10,000 plus per week! 34 | 35 | 36 | 37 | 000-456-789 38 | 39 | 40 | 41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72 42 | 43 | -------------------------------------------------------------------------------- /6.svm/data/spamSample2.txt: -------------------------------------------------------------------------------- 1 | Best Buy Viagra Generic Online 2 | 3 | Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed! 4 | 5 | We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers! 6 | http://medphysitcstech.ru 7 | 8 | 9 | -------------------------------------------------------------------------------- /6.svm/data/spamTest.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/6.svm/data/spamTest.mat -------------------------------------------------------------------------------- /6.svm/data/spamTrain.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/6.svm/data/spamTrain.mat -------------------------------------------------------------------------------- /7.kmeans_and_PCA/data/bird_small.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/7.kmeans_and_PCA/data/bird_small.mat -------------------------------------------------------------------------------- /7.kmeans_and_PCA/data/bird_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/7.kmeans_and_PCA/data/bird_small.png -------------------------------------------------------------------------------- /7.kmeans_and_PCA/data/ex7data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/7.kmeans_and_PCA/data/ex7data1.mat -------------------------------------------------------------------------------- /7.kmeans_and_PCA/data/ex7data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/7.kmeans_and_PCA/data/ex7data2.mat -------------------------------------------------------------------------------- /7.kmeans_and_PCA/data/ex7faces.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/7.kmeans_and_PCA/data/ex7faces.mat -------------------------------------------------------------------------------- /8.anomaly_detection_and_recommendation/data/ex8_movieParams.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/8.anomaly_detection_and_recommendation/data/ex8_movieParams.mat -------------------------------------------------------------------------------- /8.anomaly_detection_and_recommendation/data/ex8_movies.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/8.anomaly_detection_and_recommendation/data/ex8_movies.mat -------------------------------------------------------------------------------- /8.anomaly_detection_and_recommendation/data/ex8data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/8.anomaly_detection_and_recommendation/data/ex8data1.mat -------------------------------------------------------------------------------- /8.anomaly_detection_and_recommendation/data/ex8data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/8.anomaly_detection_and_recommendation/data/ex8data2.mat -------------------------------------------------------------------------------- /8.anomaly_detection_and_recommendation/data/movie_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/8.anomaly_detection_and_recommendation/data/movie_ids.txt -------------------------------------------------------------------------------- /8.anomaly_detection_and_recommendation/img/rcmd_cost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/8.anomaly_detection_and_recommendation/img/rcmd_cost.png -------------------------------------------------------------------------------- /8.anomaly_detection_and_recommendation/img/rcmd_gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/8.anomaly_detection_and_recommendation/img/rcmd_gradient.png -------------------------------------------------------------------------------- /8.anomaly_detection_and_recommendation/img/rcmd_reg_grad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/8.anomaly_detection_and_recommendation/img/rcmd_reg_grad.png -------------------------------------------------------------------------------- /8.anomaly_detection_and_recommendation/img/rcmd_vectorized_grad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/8.anomaly_detection_and_recommendation/img/rcmd_vectorized_grad.png -------------------------------------------------------------------------------- /Machine_Learning_Assignment(ex1-ex8).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/Machine_Learning_Assignment(ex1-ex8).pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 机器学习Jupyter Notebook 2 | 3 | 吴恩达的Machine Learning课程是非常适合机器学习的初学者。 4 | 5 | 但其assignment使用了Matlab/Octave语言,这显然已经不适合当前机器学习/深度学习技术的发展。 6 | 7 | 此Repository将其所有课程assignment内容更新为Python版本,并采用Jupyter Notebook的形式,方便阅读和实验。 8 | 9 | ## Jupyter Notebook (Python) 10 | 11 | * [1.linear_regression](1.linear_regression) 12 | * [2.logistic_regression](2.logistic_regression) 13 | * [3.multi-class_classification_and_neural_network](3.multi-class_classification_and_neural_network) 14 | * [4.nurual_network_back_propagation](4.nurual_network_back_propagation) 15 | * [5.bias_vs_variance](5.bias_vs_variance) 16 | * [6.svm](6.svm) 17 | * [7.kmeans_and_PCA](7.kmeans_and_PCA) 18 | * [8.anomaly_detection_and_recommendation](8.anomaly_detection_and_recommendation) 19 | 20 | ## 原始MATLAB版本 21 | * [原始Assignment的MATLAB版本](original-machine-learning-MATLAB) 22 | 23 | ## 练习的描述PDF 24 | * [原始Assignment的文字描述](Machine_Learning_Assignment(ex1-ex8).pdf)(整合为一个PDF) 25 | 26 | 27 | 如有疑问,请Submit issue或Pull Request,谢谢 28 | 29 | ## 更多 30 | 31 | * [深度学习的Jupyter Notebooks](https://github.com/loveunk/deep-learning-exercise-notebook) 32 | * [机器学习、深度学习的学习路径及知识总结](https://github.com/loveunk/machine-learning-deep-learning-notes) -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/computeCost.m: -------------------------------------------------------------------------------- 1 | function J = computeCost(X, y, theta) 2 | %COMPUTECOST Compute cost for linear regression 3 | % J = COMPUTECOST(X, y, theta) computes the cost of using theta as the 4 | % parameter for linear regression to fit the data points in X and y 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | 9 | % You need to return the following variables correctly 10 | J = 0; 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the cost of a particular choice of theta 14 | % You should set J to the cost. 15 | 16 | predictions = X * theta; 17 | J = (1 / (2 * m)) * sum((predictions - y).^2); 18 | 19 | 20 | % ========================================================================= 21 | 22 | end 23 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/computeCostMulti.m: -------------------------------------------------------------------------------- 1 | function J = computeCostMulti(X, y, theta) 2 | %COMPUTECOSTMULTI Compute cost for linear regression with multiple variables 3 | % J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the 4 | % parameter for linear regression to fit the data points in X and y 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | 9 | % You need to return the following variables correctly 10 | J = 0; 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the cost of a particular choice of theta 14 | % You should set J to the cost. 15 | 16 | 17 | predictions = X * theta; 18 | J = (1 / (2 * m)) * sum((predictions - y).^2); 19 | 20 | 21 | % ========================================================================= 22 | 23 | end 24 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/ex1.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 1: Linear Regression 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % warmUpExercise.m 11 | % plotData.m 12 | % gradientDescent.m 13 | % computeCost.m 14 | % gradientDescentMulti.m 15 | % computeCostMulti.m 16 | % featureNormalize.m 17 | % normalEqn.m 18 | % 19 | % For this exercise, you will not need to change any code in this file, 20 | % or any other files other than those mentioned above. 21 | % 22 | % x refers to the population size in 10,000s 23 | % y refers to the profit in $10,000s 24 | % 25 | 26 | %% Initialization 27 | clear ; close all; clc 28 | 29 | %% ==================== Part 1: Basic Function ==================== 30 | % Complete warmUpExercise.m 31 | fprintf('Running warmUpExercise ... \n'); 32 | fprintf('5x5 Identity Matrix: \n'); 33 | warmUpExercise() 34 | 35 | fprintf('Program paused. Press enter to continue.\n'); 36 | pause; 37 | 38 | 39 | %% ======================= Part 2: Plotting ======================= 40 | fprintf('Plotting Data ...\n') 41 | data = load('ex1data1.txt'); 42 | X = data(:, 1); y = data(:, 2); 43 | m = length(y); % number of training examples 44 | 45 | % Plot Data 46 | % Note: You have to complete the code in plotData.m 47 | plotData(X, y); 48 | 49 | fprintf('Program paused. Press enter to continue.\n'); 50 | pause; 51 | 52 | %% =================== Part 3: Cost and Gradient descent =================== 53 | 54 | X = [ones(m, 1), data(:,1)]; % Add a column of ones to x 55 | theta = zeros(2, 1); % initialize fitting parameters 56 | 57 | % Some gradient descent settings 58 | iterations = 1500; 59 | alpha = 0.01; 60 | 61 | fprintf('\nTesting the cost function ...\n') 62 | % compute and display initial cost 63 | J = computeCost(X, y, theta); 64 | fprintf('With theta = [0 ; 0]\nCost computed = %f\n', J); 65 | fprintf('Expected cost value (approx) 32.07\n'); 66 | 67 | % further testing of the cost function 68 | J = computeCost(X, y, [-1 ; 2]); 69 | fprintf('\nWith theta = [-1 ; 2]\nCost computed = %f\n', J); 70 | fprintf('Expected cost value (approx) 54.24\n'); 71 | 72 | fprintf('Program paused. Press enter to continue.\n'); 73 | pause; 74 | 75 | fprintf('\nRunning Gradient Descent ...\n') 76 | % run gradient descent 77 | theta = gradientDescent(X, y, theta, alpha, iterations); 78 | 79 | % print theta to screen 80 | fprintf('Theta found by gradient descent:\n'); 81 | fprintf('%f\n', theta); 82 | fprintf('Expected theta values (approx)\n'); 83 | fprintf(' -3.6303\n 1.1664\n\n'); 84 | 85 | % Plot the linear fit 86 | hold on; % keep previous plot visible 87 | plot(X(:,2), X*theta, '-') 88 | legend('Training data', 'Linear regression') 89 | hold off % don't overlay any more plots on this figure 90 | 91 | % Predict values for population sizes of 35,000 and 70,000 92 | predict1 = [1, 3.5] *theta; 93 | fprintf('For population = 35,000, we predict a profit of %f\n',... 94 | predict1*10000); 95 | predict2 = [1, 7] * theta; 96 | fprintf('For population = 70,000, we predict a profit of %f\n',... 97 | predict2*10000); 98 | 99 | fprintf('Program paused. Press enter to continue.\n'); 100 | pause; 101 | 102 | %% ============= Part 4: Visualizing J(theta_0, theta_1) ============= 103 | fprintf('Visualizing J(theta_0, theta_1) ...\n') 104 | 105 | % Grid over which we will calculate J 106 | theta0_vals = linspace(-10, 10, 100); 107 | theta1_vals = linspace(-1, 4, 100); 108 | 109 | % initialize J_vals to a matrix of 0's 110 | J_vals = zeros(length(theta0_vals), length(theta1_vals)); 111 | 112 | % Fill out J_vals 113 | for i = 1:length(theta0_vals) 114 | for j = 1:length(theta1_vals) 115 | t = [theta0_vals(i); theta1_vals(j)]; 116 | J_vals(i,j) = computeCost(X, y, t); 117 | end 118 | end 119 | 120 | 121 | % Because of the way meshgrids work in the surf command, we need to 122 | % transpose J_vals before calling surf, or else the axes will be flipped 123 | J_vals = J_vals'; 124 | % Surface plot 125 | figure; 126 | surf(theta0_vals, theta1_vals, J_vals) 127 | xlabel('\theta_0'); ylabel('\theta_1'); 128 | 129 | % Contour plot 130 | figure; 131 | % Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100 132 | contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20)) 133 | xlabel('\theta_0'); ylabel('\theta_1'); 134 | hold on; 135 | plot(theta(1), theta(2), 'rx', 'MarkerSize', 10, 'LineWidth', 2); 136 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/ex1_multi.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 1: Linear regression with multiple variables 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % linear regression exercise. 9 | % 10 | % You will need to complete the following functions in this 11 | % exericse: 12 | % 13 | % warmUpExercise.m 14 | % plotData.m 15 | % gradientDescent.m 16 | % computeCost.m 17 | % gradientDescentMulti.m 18 | % computeCostMulti.m 19 | % featureNormalize.m 20 | % normalEqn.m 21 | % 22 | % For this part of the exercise, you will need to change some 23 | % parts of the code below for various experiments (e.g., changing 24 | % learning rates). 25 | % UfCo2GyyeCNRlEnZ 26 | 27 | %% Initialization 28 | 29 | %% ================ Part 1: Feature Normalization ================ 30 | 31 | %% Clear and Close Figures 32 | clear ; close all; clc 33 | 34 | fprintf('Loading data ...\n'); 35 | 36 | %% Load Data 37 | data = load('ex1data2.txt'); 38 | X = data(:, 1:2); 39 | y = data(:, 3); 40 | m = length(y); 41 | 42 | % Print out some data points 43 | fprintf('First 10 examples from the dataset: \n'); 44 | fprintf(' x = [%.0f %.0f], y = %.0f \n', [X(1:10,:) y(1:10,:)]'); 45 | 46 | fprintf('Program paused. Press enter to continue.\n'); 47 | pause; 48 | 49 | % Scale features and set them to zero mean 50 | fprintf('Normalizing Features ...\n'); 51 | 52 | [X mu sigma] = featureNormalize(X); 53 | 54 | % Add intercept term to X 55 | X = [ones(m, 1) X]; 56 | 57 | %% ================ Part 2: Gradient Descent ================ 58 | 59 | % ====================== YOUR CODE HERE ====================== 60 | % Instructions: We have provided you with the following starter 61 | % code that runs gradient descent with a particular 62 | % learning rate (alpha). 63 | % 64 | % Your task is to first make sure that your functions - 65 | % computeCost and gradientDescent already work with 66 | % this starter code and support multiple variables. 67 | % 68 | % After that, try running gradient descent with 69 | % different values of alpha and see which one gives 70 | % you the best result. 71 | % 72 | % Finally, you should complete the code at the end 73 | % to predict the price of a 1650 sq-ft, 3 br house. 74 | % 75 | % Hint: By using the 'hold on' command, you can plot multiple 76 | % graphs on the same figure. 77 | % 78 | % Hint: At prediction, make sure you do the same feature normalization. 79 | % 80 | 81 | fprintf('Running gradient descent ...\n'); 82 | 83 | % Choose some alpha value 84 | alpha = 0.1; 85 | num_iters = 100; 86 | 87 | % Init Theta and Run Gradient Descent 88 | theta = zeros(3, 1); 89 | [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters); 90 | 91 | % Plot the convergence graph 92 | figure; 93 | plot(1:numel(J_history), J_history, '-b', 'LineWidth', 2); 94 | xlabel('Number of iterations'); 95 | ylabel('Cost J'); 96 | 97 | % Display gradient descent's result 98 | fprintf('Theta computed from gradient descent: \n'); 99 | fprintf(' %f \n', theta); 100 | fprintf('\n'); 101 | 102 | % Estimate the price of a 1650 sq-ft, 3 br house 103 | % ====================== YOUR CODE HERE ====================== 104 | % Recall that the first column of X is all-ones. Thus, it does 105 | % not need to be normalized. 106 | X = [1650 3]; 107 | X = (X - mu) ./ sigma; 108 | price = [1 X] * theta; % You should change this 109 | 110 | 111 | % ============================================================ 112 | 113 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ... 114 | '(using gradient descent):\n $%f\n'], price); 115 | 116 | fprintf('Program paused. Press enter to continue.\n'); 117 | pause; 118 | 119 | %% ================ Part 3: Normal Equations ================ 120 | 121 | fprintf('Solving with normal equations...\n'); 122 | 123 | % ====================== YOUR CODE HERE ====================== 124 | % Instructions: The following code computes the closed form 125 | % solution for linear regression using the normal 126 | % equations. You should complete the code in 127 | % normalEqn.m 128 | % 129 | % After doing so, you should complete this code 130 | % to predict the price of a 1650 sq-ft, 3 br house. 131 | % 132 | 133 | %% Load Data 134 | data = csvread('ex1data2.txt'); 135 | X = data(:, 1:2); 136 | y = data(:, 3); 137 | m = length(y); 138 | 139 | % Add intercept term to X 140 | X = [ones(m, 1) X]; 141 | 142 | % Calculate the parameters from the normal equation 143 | theta = normalEqn(X, y); 144 | 145 | % Display normal equation's result 146 | fprintf('Theta computed from the normal equations: \n'); 147 | fprintf(' %f \n', theta); 148 | fprintf('\n'); 149 | 150 | 151 | % Estimate the price of a 1650 sq-ft, 3 br house 152 | % ====================== YOUR CODE HERE ====================== 153 | price = [1 1650 3] * theta;; % You should change this 154 | 155 | 156 | % ============================================================ 157 | 158 | fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ... 159 | '(using normal equations):\n $%f\n'], price); 160 | 161 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/ex1data1.txt: -------------------------------------------------------------------------------- 1 | 6.1101,17.592 2 | 5.5277,9.1302 3 | 8.5186,13.662 4 | 7.0032,11.854 5 | 5.8598,6.8233 6 | 8.3829,11.886 7 | 7.4764,4.3483 8 | 8.5781,12 9 | 6.4862,6.5987 10 | 5.0546,3.8166 11 | 5.7107,3.2522 12 | 14.164,15.505 13 | 5.734,3.1551 14 | 8.4084,7.2258 15 | 5.6407,0.71618 16 | 5.3794,3.5129 17 | 6.3654,5.3048 18 | 5.1301,0.56077 19 | 6.4296,3.6518 20 | 7.0708,5.3893 21 | 6.1891,3.1386 22 | 20.27,21.767 23 | 5.4901,4.263 24 | 6.3261,5.1875 25 | 5.5649,3.0825 26 | 18.945,22.638 27 | 12.828,13.501 28 | 10.957,7.0467 29 | 13.176,14.692 30 | 22.203,24.147 31 | 5.2524,-1.22 32 | 6.5894,5.9966 33 | 9.2482,12.134 34 | 5.8918,1.8495 35 | 8.2111,6.5426 36 | 7.9334,4.5623 37 | 8.0959,4.1164 38 | 5.6063,3.3928 39 | 12.836,10.117 40 | 6.3534,5.4974 41 | 5.4069,0.55657 42 | 6.8825,3.9115 43 | 11.708,5.3854 44 | 5.7737,2.4406 45 | 7.8247,6.7318 46 | 7.0931,1.0463 47 | 5.0702,5.1337 48 | 5.8014,1.844 49 | 11.7,8.0043 50 | 5.5416,1.0179 51 | 7.5402,6.7504 52 | 5.3077,1.8396 53 | 7.4239,4.2885 54 | 7.6031,4.9981 55 | 6.3328,1.4233 56 | 6.3589,-1.4211 57 | 6.2742,2.4756 58 | 5.6397,4.6042 59 | 9.3102,3.9624 60 | 9.4536,5.4141 61 | 8.8254,5.1694 62 | 5.1793,-0.74279 63 | 21.279,17.929 64 | 14.908,12.054 65 | 18.959,17.054 66 | 7.2182,4.8852 67 | 8.2951,5.7442 68 | 10.236,7.7754 69 | 5.4994,1.0173 70 | 20.341,20.992 71 | 10.136,6.6799 72 | 7.3345,4.0259 73 | 6.0062,1.2784 74 | 7.2259,3.3411 75 | 5.0269,-2.6807 76 | 6.5479,0.29678 77 | 7.5386,3.8845 78 | 5.0365,5.7014 79 | 10.274,6.7526 80 | 5.1077,2.0576 81 | 5.7292,0.47953 82 | 5.1884,0.20421 83 | 6.3557,0.67861 84 | 9.7687,7.5435 85 | 6.5159,5.3436 86 | 8.5172,4.2415 87 | 9.1802,6.7981 88 | 6.002,0.92695 89 | 5.5204,0.152 90 | 5.0594,2.8214 91 | 5.7077,1.8451 92 | 7.6366,4.2959 93 | 5.8707,7.2029 94 | 5.3054,1.9869 95 | 8.2934,0.14454 96 | 13.394,9.0551 97 | 5.4369,0.61705 98 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/ex1data2.txt: -------------------------------------------------------------------------------- 1 | 2104,3,399900 2 | 1600,3,329900 3 | 2400,3,369000 4 | 1416,2,232000 5 | 3000,4,539900 6 | 1985,4,299900 7 | 1534,3,314900 8 | 1427,3,198999 9 | 1380,3,212000 10 | 1494,3,242500 11 | 1940,4,239999 12 | 2000,3,347000 13 | 1890,3,329999 14 | 4478,5,699900 15 | 1268,3,259900 16 | 2300,4,449900 17 | 1320,2,299900 18 | 1236,3,199900 19 | 2609,4,499998 20 | 3031,4,599000 21 | 1767,3,252900 22 | 1888,2,255000 23 | 1604,3,242900 24 | 1962,4,259900 25 | 3890,3,573900 26 | 1100,3,249900 27 | 1458,3,464500 28 | 2526,3,469000 29 | 2200,3,475000 30 | 2637,3,299900 31 | 1839,2,349900 32 | 1000,1,169900 33 | 2040,4,314900 34 | 3137,3,579900 35 | 1811,4,285900 36 | 1437,3,249900 37 | 1239,3,229900 38 | 2132,4,345000 39 | 4215,4,549000 40 | 2162,4,287000 41 | 1664,2,368500 42 | 2238,3,329900 43 | 2567,4,314000 44 | 1200,3,299000 45 | 852,2,179900 46 | 1852,4,299900 47 | 1203,3,239500 48 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/featureNormalize.m: -------------------------------------------------------------------------------- 1 | function [X_norm, mu, sigma] = featureNormalize(X) 2 | %FEATURENORMALIZE Normalizes the features in X 3 | % FEATURENORMALIZE(X) returns a normalized version of X where 4 | % the mean value of each feature is 0 and the standard deviation 5 | % is 1. This is often a good preprocessing step to do when 6 | % working with learning algorithms. 7 | 8 | % You need to set these values correctly 9 | X_norm = X; 10 | mu = zeros(1, size(X, 2)); 11 | sigma = zeros(1, size(X, 2)); 12 | 13 | % ====================== YOUR CODE HERE ====================== 14 | % Instructions: First, for each feature dimension, compute the mean 15 | % of the feature and subtract it from the dataset, 16 | % storing the mean value in mu. Next, compute the 17 | % standard deviation of each feature and divide 18 | % each feature by it's standard deviation, storing 19 | % the standard deviation in sigma. 20 | % 21 | % Note that X is a matrix where each column is a 22 | % feature and each row is an example. You need 23 | % to perform the normalization separately for 24 | % each feature. 25 | % 26 | % Hint: You might find the 'mean' and 'std' functions useful. 27 | % 28 | 29 | mu = mean(X, 1); 30 | sigma = std(X, 1); 31 | 32 | 33 | X_norm = (X - mu) ./ sigma; 34 | 35 | 36 | % ============================================================ 37 | 38 | end 39 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/gradientDescent.m: -------------------------------------------------------------------------------- 1 | function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters) 2 | %GRADIENTDESCENT Performs gradient descent to learn theta 3 | % theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by 4 | % taking num_iters gradient steps with learning rate alpha 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | J_history = zeros(num_iters, 1); 9 | 10 | for iter = 1:num_iters 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Perform a single gradient step on the parameter vector 14 | % theta. 15 | % 16 | % Hint: While debugging, it can be useful to print out the values 17 | % of the cost function (computeCost) and gradient here. 18 | % 19 | theta = theta - (alpha / m) * (X' * (X * theta - y)); 20 | 21 | % ============================================================ 22 | 23 | % Save the cost J in every iteration 24 | J_history(iter) = computeCost(X, y, theta); 25 | 26 | end 27 | 28 | end 29 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/gradientDescentMulti.m: -------------------------------------------------------------------------------- 1 | function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters) 2 | %GRADIENTDESCENTMULTI Performs gradient descent to learn theta 3 | % theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by 4 | % taking num_iters gradient steps with learning rate alpha 5 | 6 | % Initialize some useful values 7 | m = length(y); % number of training examples 8 | J_history = zeros(num_iters, 1); 9 | 10 | for iter = 1:num_iters 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Perform a single gradient step on the parameter vector 14 | % theta. 15 | % 16 | % Hint: While debugging, it can be useful to print out the values 17 | % of the cost function (computeCostMulti) and gradient here. 18 | % 19 | 20 | theta = theta - (alpha / m) * (X' * (X * theta - y)); 21 | 22 | % ============================================================ 23 | 24 | % Save the cost J in every iteration 25 | J_history(iter) = computeCostMulti(X, y, theta); 26 | 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/normalEqn.m: -------------------------------------------------------------------------------- 1 | function [theta] = normalEqn(X, y) 2 | %NORMALEQN Computes the closed-form solution to linear regression 3 | % NORMALEQN(X,y) computes the closed-form solution to linear 4 | % regression using the normal equations. 5 | 6 | theta = zeros(size(X, 2), 1); 7 | 8 | % ====================== YOUR CODE HERE ====================== 9 | % Instructions: Complete the code to compute the closed form solution 10 | % to linear regression and put the result in theta. 11 | % 12 | 13 | % ---------------------- Sample Solution ---------------------- 14 | 15 | 16 | theta = pinv(X' * X) * X' * y; 17 | 18 | % ------------------------------------------------------------- 19 | 20 | 21 | % ============================================================ 22 | 23 | end 24 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(x, y) 2 | %PLOTDATA Plots the data points x and y into a new figure 3 | % PLOTDATA(x,y) plots the data points and gives the figure axes labels of 4 | % population and profit. 5 | 6 | figure; % open a new figure window 7 | 8 | % ====================== YOUR CODE HERE ====================== 9 | % Instructions: Plot the training data into a figure using the 10 | % "figure" and "plot" commands. Set the axes labels using 11 | % the "xlabel" and "ylabel" commands. Assume the 12 | % population and revenue data have been passed in 13 | % as the x and y arguments of this function. 14 | % 15 | % Hint: You can use the 'rx' option with plot to have the markers 16 | % appear as red crosses. Furthermore, you can make the 17 | % markers larger by using plot(..., 'rx', 'MarkerSize', 10); 18 | 19 | plot(x, y, 'rx', 'MarkerSize', 10); 20 | ylabel('Proft in $10,000s'); 21 | xlabel('Population of City in 10,000s'); 22 | 23 | 24 | 25 | % ============================================================ 26 | 27 | end 28 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex1/warmUpExercise.m: -------------------------------------------------------------------------------- 1 | function A = warmUpExercise() 2 | %WARMUPEXERCISE Example function in octave 3 | % A = WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix 4 | 5 | A = []; 6 | % ============= YOUR CODE HERE ============== 7 | % Instructions: Return the 5x5 identity matrix 8 | % In octave, we return values by defining which variables 9 | % represent the return values (at the top of the file) 10 | % and then set them accordingly. 11 | 12 | 13 | A = eye(5); 14 | 15 | 16 | 17 | % =========================================== 18 | 19 | 20 | end 21 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/costFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = costFunction(theta, X, y) 2 | %COSTFUNCTION Compute cost and gradient for logistic regression 3 | % J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the 4 | % parameter for logistic regression and the gradient of the cost 5 | % w.r.t. to the parameters. 6 | 7 | % Initialize some useful values 8 | m = length(y); % number of training examples 9 | 10 | % You need to return the following variables correctly 11 | J = 0; 12 | grad = zeros(size(theta)); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Compute the cost of a particular choice of theta. 16 | % You should set J to the cost. 17 | % Compute the partial derivatives and set grad to the partial 18 | % derivatives of the cost w.r.t. each parameter in theta 19 | % 20 | % Note: grad should have the same dimensions as theta 21 | % 22 | 23 | hypothesis = sigmoid(X * theta); 24 | 25 | J = (1/m) * (-y'*log(hypothesis) - (1-y')*log(1 - hypothesis)); 26 | grad = (1/m) * ((hypothesis - y)' * X)'; 27 | 28 | % ============================================================= 29 | 30 | end 31 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/costFunctionReg.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = costFunctionReg(theta, X, y, lambda) 2 | %COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization 3 | % J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using 4 | % theta as the parameter for regularized logistic regression and the 5 | % gradient of the cost w.r.t. to the parameters. 6 | 7 | % Initialize some useful values 8 | m = length(y); % number of training examples 9 | 10 | % You need to return the following variables correctly 11 | J = 0; 12 | grad = zeros(size(theta)); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Compute the cost of a particular choice of theta. 16 | % You should set J to the cost. 17 | % Compute the partial derivatives and set grad to the partial 18 | % derivatives of the cost w.r.t. each parameter in theta 19 | 20 | hypothesis = sigmoid(X * theta); 21 | 22 | thetaTmp = [0; theta(2:end)]; 23 | J = (1/m) * (-y'*log(hypothesis) - (1-y')*log(1 - hypothesis)) ... 24 | + (lambda / (2*m)) * (thetaTmp' * thetaTmp); 25 | 26 | %grad(1) = (1/m) * ((hypothesis - y)' * X(:, 1))'; 27 | %grad(2:end) = (1/m) * ((hypothesis - y)' * X(:, 2:end))' ... 28 | % + (lambda / m) * theta(2:end); 29 | 30 | grad = (1/m) * ((hypothesis - y)' * X)' + (lambda / m) * thetaTmp; 31 | 32 | % ============================================================= 33 | 34 | end 35 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/ex2.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 2: Logistic Regression 2 | % 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the logistic 7 | % regression exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % sigmoid.m 11 | % costFunction.m 12 | % predict.m 13 | % costFunctionReg.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% Load Data 23 | % The first two columns contains the exam scores and the third column 24 | % contains the label. 25 | 26 | data = load('ex2data1.txt'); 27 | X = data(:, [1, 2]); y = data(:, 3); 28 | 29 | %% ==================== Part 1: Plotting ==================== 30 | % We start the exercise by first plotting the data to understand the 31 | % the problem we are working with. 32 | 33 | fprintf(['Plotting data with + indicating (y = 1) examples and o ' ... 34 | 'indicating (y = 0) examples.\n']); 35 | 36 | plotData(X, y); 37 | 38 | % Put some labels 39 | hold on; 40 | % Labels and Legend 41 | xlabel('Exam 1 score') 42 | ylabel('Exam 2 score') 43 | 44 | % Specified in plot order 45 | legend('Admitted', 'Not admitted') 46 | hold off; 47 | 48 | fprintf('\nProgram paused. Press enter to continue.\n'); 49 | pause; 50 | 51 | 52 | %% ============ Part 2: Compute Cost and Gradient ============ 53 | % In this part of the exercise, you will implement the cost and gradient 54 | % for logistic regression. You neeed to complete the code in 55 | % costFunction.m 56 | 57 | % Setup the data matrix appropriately, and add ones for the intercept term 58 | [m, n] = size(X); 59 | 60 | % Add intercept term to x and X_test 61 | X = [ones(m, 1) X]; 62 | 63 | % Initialize fitting parameters 64 | initial_theta = zeros(n + 1, 1); 65 | 66 | % Compute and display initial cost and gradient 67 | [cost, grad] = costFunction(initial_theta, X, y); 68 | 69 | fprintf('Cost at initial theta (zeros): %f\n', cost); 70 | fprintf('Expected cost (approx): 0.693\n'); 71 | fprintf('Gradient at initial theta (zeros): \n'); 72 | fprintf(' %f \n', grad); 73 | fprintf('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n'); 74 | 75 | % Compute and display cost and gradient with non-zero theta 76 | test_theta = [-24; 0.2; 0.2]; 77 | [cost, grad] = costFunction(test_theta, X, y); 78 | 79 | fprintf('\nCost at test theta: %f\n', cost); 80 | fprintf('Expected cost (approx): 0.218\n'); 81 | fprintf('Gradient at test theta: \n'); 82 | fprintf(' %f \n', grad); 83 | fprintf('Expected gradients (approx):\n 0.043\n 2.566\n 2.647\n'); 84 | 85 | fprintf('\nProgram paused. Press enter to continue.\n'); 86 | pause; 87 | 88 | 89 | %% ============= Part 3: Optimizing using fminunc ============= 90 | % In this exercise, you will use a built-in function (fminunc) to find the 91 | % optimal parameters theta. 92 | 93 | % Set options for fminunc 94 | options = optimset('GradObj', 'on', 'MaxIter', 400); 95 | 96 | % Run fminunc to obtain the optimal theta 97 | % This function will return theta and the cost 98 | [theta, cost] = ... 99 | fminunc(@(t)(costFunction(t, X, y)), initial_theta, options); 100 | 101 | % Print theta to screen 102 | fprintf('Cost at theta found by fminunc: %f\n', cost); 103 | fprintf('Expected cost (approx): 0.203\n'); 104 | fprintf('theta: \n'); 105 | fprintf(' %f \n', theta); 106 | fprintf('Expected theta (approx):\n'); 107 | fprintf(' -25.161\n 0.206\n 0.201\n'); 108 | 109 | % Plot Boundary 110 | plotDecisionBoundary(theta, X, y); 111 | 112 | % Put some labels 113 | hold on; 114 | % Labels and Legend 115 | xlabel('Exam 1 score') 116 | ylabel('Exam 2 score') 117 | 118 | % Specified in plot order 119 | legend('Admitted', 'Not admitted') 120 | hold off; 121 | 122 | fprintf('\nProgram paused. Press enter to continue.\n'); 123 | pause; 124 | 125 | %% ============== Part 4: Predict and Accuracies ============== 126 | % After learning the parameters, you'll like to use it to predict the outcomes 127 | % on unseen data. In this part, you will use the logistic regression model 128 | % to predict the probability that a student with score 45 on exam 1 and 129 | % score 85 on exam 2 will be admitted. 130 | % 131 | % Furthermore, you will compute the training and test set accuracies of 132 | % our model. 133 | % 134 | % Your task is to complete the code in predict.m 135 | 136 | % Predict probability for a student with score 45 on exam 1 137 | % and score 85 on exam 2 138 | 139 | prob = sigmoid([1 45 85] * theta); 140 | fprintf(['For a student with scores 45 and 85, we predict an admission ' ... 141 | 'probability of %f\n'], prob); 142 | fprintf('Expected value: 0.775 +/- 0.002\n\n'); 143 | 144 | % Compute accuracy on our training set 145 | p = predict(theta, X); 146 | 147 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); 148 | fprintf('Expected accuracy (approx): 89.0\n'); 149 | fprintf('\n'); 150 | 151 | 152 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/ex2_reg.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 2: Logistic Regression 2 | % 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the second part 7 | % of the exercise which covers regularization with logistic regression. 8 | % 9 | % You will need to complete the following functions in this exericse: 10 | % 11 | % sigmoid.m 12 | % costFunction.m 13 | % predict.m 14 | % costFunctionReg.m 15 | % 16 | % For this exercise, you will not need to change any code in this file, 17 | % or any other files other than those mentioned above. 18 | % 19 | 20 | %% Initialization 21 | clear ; close all; clc 22 | 23 | %% Load Data 24 | % The first two columns contains the X values and the third column 25 | % contains the label (y). 26 | 27 | data = load('ex2data2.txt'); 28 | X = data(:, [1, 2]); y = data(:, 3); 29 | 30 | plotData(X, y); 31 | 32 | % Put some labels 33 | hold on; 34 | 35 | % Labels and Legend 36 | xlabel('Microchip Test 1') 37 | ylabel('Microchip Test 2') 38 | 39 | % Specified in plot order 40 | legend('y = 1', 'y = 0') 41 | hold off; 42 | 43 | 44 | %% =========== Part 1: Regularized Logistic Regression ============ 45 | % In this part, you are given a dataset with data points that are not 46 | % linearly separable. However, you would still like to use logistic 47 | % regression to classify the data points. 48 | % 49 | % To do so, you introduce more features to use -- in particular, you add 50 | % polynomial features to our data matrix (similar to polynomial 51 | % regression). 52 | % 53 | 54 | % Add Polynomial Features 55 | 56 | % Note that mapFeature also adds a column of ones for us, so the intercept 57 | % term is handled 58 | X = mapFeature(X(:,1), X(:,2)); 59 | 60 | % Initialize fitting parameters 61 | initial_theta = zeros(size(X, 2), 1); 62 | 63 | % Set regularization parameter lambda to 1 64 | lambda = 1; 65 | 66 | % Compute and display initial cost and gradient for regularized logistic 67 | % regression 68 | [cost, grad] = costFunctionReg(initial_theta, X, y, lambda); 69 | 70 | fprintf('Cost at initial theta (zeros): %f\n', cost); 71 | fprintf('Expected cost (approx): 0.693\n'); 72 | fprintf('Gradient at initial theta (zeros) - first five values only:\n'); 73 | fprintf(' %f \n', grad(1:5)); 74 | fprintf('Expected gradients (approx) - first five values only:\n'); 75 | fprintf(' 0.0085\n 0.0188\n 0.0001\n 0.0503\n 0.0115\n'); 76 | 77 | fprintf('\nProgram paused. Press enter to continue.\n'); 78 | pause; 79 | 80 | % Compute and display cost and gradient 81 | % with all-ones theta and lambda = 10 82 | test_theta = ones(size(X,2),1); 83 | [cost, grad] = costFunctionReg(test_theta, X, y, 10); 84 | 85 | fprintf('\nCost at test theta (with lambda = 10): %f\n', cost); 86 | fprintf('Expected cost (approx): 3.16\n'); 87 | fprintf('Gradient at test theta - first five values only:\n'); 88 | fprintf(' %f \n', grad(1:5)); 89 | fprintf('Expected gradients (approx) - first five values only:\n'); 90 | fprintf(' 0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922\n'); 91 | 92 | fprintf('\nProgram paused. Press enter to continue.\n'); 93 | pause; 94 | 95 | %% ============= Part 2: Regularization and Accuracies ============= 96 | % Optional Exercise: 97 | % In this part, you will get to try different values of lambda and 98 | % see how regularization affects the decision coundart 99 | % 100 | % Try the following values of lambda (0, 1, 10, 100). 101 | % 102 | % How does the decision boundary change when you vary lambda? How does 103 | % the training set accuracy vary? 104 | % 105 | 106 | % Initialize fitting parameters 107 | initial_theta = zeros(size(X, 2), 1); 108 | 109 | % Set regularization parameter lambda to 1 (you should vary this) 110 | lambda = 1; 111 | 112 | % Set Options 113 | options = optimset('GradObj', 'on', 'MaxIter', 400); 114 | 115 | % Optimize 116 | [theta, J, exit_flag] = ... 117 | fminunc(@(t)(costFunctionReg(t, X, y, lambda)), initial_theta, options); 118 | 119 | % Plot Boundary 120 | plotDecisionBoundary(theta, X, y); 121 | hold on; 122 | title(sprintf('lambda = %g', lambda)) 123 | 124 | % Labels and Legend 125 | xlabel('Microchip Test 1') 126 | ylabel('Microchip Test 2') 127 | 128 | legend('y = 1', 'y = 0', 'Decision boundary') 129 | hold off; 130 | 131 | % Compute accuracy on our training set 132 | p = predict(theta, X); 133 | 134 | fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); 135 | fprintf('Expected accuracy (with lambda = 1): 83.1 (approx)\n'); 136 | 137 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/ex2data1.txt: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/ex2data2.txt: -------------------------------------------------------------------------------- 1 | 0.051267,0.69956,1 2 | -0.092742,0.68494,1 3 | -0.21371,0.69225,1 4 | -0.375,0.50219,1 5 | -0.51325,0.46564,1 6 | -0.52477,0.2098,1 7 | -0.39804,0.034357,1 8 | -0.30588,-0.19225,1 9 | 0.016705,-0.40424,1 10 | 0.13191,-0.51389,1 11 | 0.38537,-0.56506,1 12 | 0.52938,-0.5212,1 13 | 0.63882,-0.24342,1 14 | 0.73675,-0.18494,1 15 | 0.54666,0.48757,1 16 | 0.322,0.5826,1 17 | 0.16647,0.53874,1 18 | -0.046659,0.81652,1 19 | -0.17339,0.69956,1 20 | -0.47869,0.63377,1 21 | -0.60541,0.59722,1 22 | -0.62846,0.33406,1 23 | -0.59389,0.005117,1 24 | -0.42108,-0.27266,1 25 | -0.11578,-0.39693,1 26 | 0.20104,-0.60161,1 27 | 0.46601,-0.53582,1 28 | 0.67339,-0.53582,1 29 | -0.13882,0.54605,1 30 | -0.29435,0.77997,1 31 | -0.26555,0.96272,1 32 | -0.16187,0.8019,1 33 | -0.17339,0.64839,1 34 | -0.28283,0.47295,1 35 | -0.36348,0.31213,1 36 | -0.30012,0.027047,1 37 | -0.23675,-0.21418,1 38 | -0.06394,-0.18494,1 39 | 0.062788,-0.16301,1 40 | 0.22984,-0.41155,1 41 | 0.2932,-0.2288,1 42 | 0.48329,-0.18494,1 43 | 0.64459,-0.14108,1 44 | 0.46025,0.012427,1 45 | 0.6273,0.15863,1 46 | 0.57546,0.26827,1 47 | 0.72523,0.44371,1 48 | 0.22408,0.52412,1 49 | 0.44297,0.67032,1 50 | 0.322,0.69225,1 51 | 0.13767,0.57529,1 52 | -0.0063364,0.39985,1 53 | -0.092742,0.55336,1 54 | -0.20795,0.35599,1 55 | -0.20795,0.17325,1 56 | -0.43836,0.21711,1 57 | -0.21947,-0.016813,1 58 | -0.13882,-0.27266,1 59 | 0.18376,0.93348,0 60 | 0.22408,0.77997,0 61 | 0.29896,0.61915,0 62 | 0.50634,0.75804,0 63 | 0.61578,0.7288,0 64 | 0.60426,0.59722,0 65 | 0.76555,0.50219,0 66 | 0.92684,0.3633,0 67 | 0.82316,0.27558,0 68 | 0.96141,0.085526,0 69 | 0.93836,0.012427,0 70 | 0.86348,-0.082602,0 71 | 0.89804,-0.20687,0 72 | 0.85196,-0.36769,0 73 | 0.82892,-0.5212,0 74 | 0.79435,-0.55775,0 75 | 0.59274,-0.7405,0 76 | 0.51786,-0.5943,0 77 | 0.46601,-0.41886,0 78 | 0.35081,-0.57968,0 79 | 0.28744,-0.76974,0 80 | 0.085829,-0.75512,0 81 | 0.14919,-0.57968,0 82 | -0.13306,-0.4481,0 83 | -0.40956,-0.41155,0 84 | -0.39228,-0.25804,0 85 | -0.74366,-0.25804,0 86 | -0.69758,0.041667,0 87 | -0.75518,0.2902,0 88 | -0.69758,0.68494,0 89 | -0.4038,0.70687,0 90 | -0.38076,0.91886,0 91 | -0.50749,0.90424,0 92 | -0.54781,0.70687,0 93 | 0.10311,0.77997,0 94 | 0.057028,0.91886,0 95 | -0.10426,0.99196,0 96 | -0.081221,1.1089,0 97 | 0.28744,1.087,0 98 | 0.39689,0.82383,0 99 | 0.63882,0.88962,0 100 | 0.82316,0.66301,0 101 | 0.67339,0.64108,0 102 | 1.0709,0.10015,0 103 | -0.046659,-0.57968,0 104 | -0.23675,-0.63816,0 105 | -0.15035,-0.36769,0 106 | -0.49021,-0.3019,0 107 | -0.46717,-0.13377,0 108 | -0.28859,-0.060673,0 109 | -0.61118,-0.067982,0 110 | -0.66302,-0.21418,0 111 | -0.59965,-0.41886,0 112 | -0.72638,-0.082602,0 113 | -0.83007,0.31213,0 114 | -0.72062,0.53874,0 115 | -0.59389,0.49488,0 116 | -0.48445,0.99927,0 117 | -0.0063364,0.99927,0 118 | 0.63265,-0.030612,0 119 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/mapFeature.m: -------------------------------------------------------------------------------- 1 | function out = mapFeature(X1, X2) 2 | % MAPFEATURE Feature mapping function to polynomial features 3 | % 4 | % MAPFEATURE(X1, X2) maps the two input features 5 | % to quadratic features used in the regularization exercise. 6 | % 7 | % Returns a new feature array with more features, comprising of 8 | % X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc.. 9 | % 10 | % Inputs X1, X2 must be the same size 11 | % 12 | 13 | degree = 6; 14 | out = ones(size(X1(:,1))); 15 | for i = 1:degree 16 | for j = 0:i 17 | out(:, end+1) = (X1.^(i-j)).*(X2.^j); 18 | end 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(X, y) 2 | %PLOTDATA Plots the data points X and y into a new figure 3 | % PLOTDATA(x,y) plots the data points with + for the positive examples 4 | % and o for the negative examples. X is assumed to be a Mx2 matrix. 5 | 6 | % Create New Figure 7 | figure; hold on; 8 | 9 | % ====================== YOUR CODE HERE ====================== 10 | % Instructions: Plot the positive and negative examples on a 11 | % 2D plot, using the option 'k+' for the positive 12 | % examples and 'ko' for the negative examples. 13 | % 14 | 15 | pos = find(y==1); 16 | neg = find(y==0); 17 | 18 | plot(X(pos, 1), X(pos, 2), 'k+', 'LineWidth', 2, 'MarkerSize', 7); 19 | plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7); 20 | 21 | 22 | % ========================================================================= 23 | 24 | 25 | hold off; 26 | 27 | end 28 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/plotDecisionBoundary.m: -------------------------------------------------------------------------------- 1 | function plotDecisionBoundary(theta, X, y) 2 | %PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with 3 | %the decision boundary defined by theta 4 | % PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the 5 | % positive examples and o for the negative examples. X is assumed to be 6 | % a either 7 | % 1) Mx3 matrix, where the first column is an all-ones column for the 8 | % intercept. 9 | % 2) MxN, N>3 matrix, where the first column is all-ones 10 | 11 | % Plot Data 12 | plotData(X(:,2:3), y); 13 | hold on 14 | 15 | if size(X, 2) <= 3 16 | % Only need 2 points to define a line, so choose two endpoints 17 | plot_x = [min(X(:,2))-2, max(X(:,2))+2]; 18 | 19 | % Calculate the decision boundary line 20 | plot_y = (-1./theta(3)).*(theta(2).*plot_x + theta(1)); 21 | 22 | % Plot, and adjust axes for better viewing 23 | plot(plot_x, plot_y) 24 | 25 | % Legend, specific for the exercise 26 | legend('Admitted', 'Not admitted', 'Decision Boundary') 27 | axis([30, 100, 30, 100]) 28 | else 29 | % Here is the grid range 30 | u = linspace(-1, 1.5, 50); 31 | v = linspace(-1, 1.5, 50); 32 | 33 | z = zeros(length(u), length(v)); 34 | % Evaluate z = theta*x over the grid 35 | for i = 1:length(u) 36 | for j = 1:length(v) 37 | z(i,j) = mapFeature(u(i), v(j))*theta; 38 | end 39 | end 40 | z = z'; % important to transpose z before calling contour 41 | 42 | % Plot z = 0 43 | % Notice you need to specify the range [0, 0] 44 | contour(u, v, z, [0, 0], 'LineWidth', 2) 45 | end 46 | hold off 47 | 48 | end 49 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(theta, X) 2 | %PREDICT Predict whether the label is 0 or 1 using learned logistic 3 | %regression parameters theta 4 | % p = PREDICT(theta, X) computes the predictions for X using a 5 | % threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1) 6 | 7 | m = size(X, 1); % Number of training examples 8 | 9 | % You need to return the following variables correctly 10 | p = zeros(m, 1); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Complete the following code to make predictions using 14 | % your learned logistic regression parameters. 15 | % You should set p to a vector of 0's and 1's 16 | % 17 | 18 | p = sigmoid(X * theta) >= 0.5; 19 | 20 | % ========================================================================= 21 | 22 | 23 | end 24 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex2/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid function 3 | % g = SIGMOID(z) computes the sigmoid of z. 4 | 5 | % You need to return the following variables correctly 6 | g = zeros(size(z)); 7 | 8 | % ====================== YOUR CODE HERE ====================== 9 | % Instructions: Compute the sigmoid of each value of z (z can be a matrix, 10 | % vector or scalar). 11 | 12 | g = 1 ./ (1 + exp(-z)); 13 | 14 | 15 | % ============================================================= 16 | 17 | end 18 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex3/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex3/ex3.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % lrCostFunction.m (logistic regression cost function) 11 | % oneVsAll.m 12 | % predictOneVsAll.m 13 | % predict.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% Setup the parameters you will use for this part of the exercise 23 | input_layer_size = 400; % 20x20 Input Images of Digits 24 | num_labels = 10; % 10 labels, from 1 to 10 25 | % (note that we have mapped "0" to label 10) 26 | 27 | %% =========== Part 1: Loading and Visualizing Data ============= 28 | % We start the exercise by first loading and visualizing the dataset. 29 | % You will be working with a dataset that contains handwritten digits. 30 | % 31 | 32 | % Load Training Data 33 | fprintf('Loading and Visualizing Data ...\n') 34 | 35 | load('ex3data1.mat'); % training data stored in arrays X, y 36 | m = size(X, 1); 37 | 38 | % Randomly select 100 data points to display 39 | rand_indices = randperm(m); 40 | sel = X(rand_indices(1:100), :); 41 | 42 | displayData(sel); 43 | 44 | fprintf('Program paused. Press enter to continue.\n'); 45 | %pause; 46 | 47 | %% ============ Part 2a: Vectorize Logistic Regression ============ 48 | % In this part of the exercise, you will reuse your logistic regression 49 | % code from the last exercise. You task here is to make sure that your 50 | % regularized logistic regression implementation is vectorized. After 51 | % that, you will implement one-vs-all classification for the handwritten 52 | % digit dataset. 53 | % 54 | 55 | % Test case for lrCostFunction 56 | fprintf('\nTesting lrCostFunction() with regularization'); 57 | 58 | theta_t = [-2; -1; 1; 2]; 59 | X_t = [ones(5,1) reshape(1:15,5,3)/10]; 60 | y_t = ([1;0;1;0;1] >= 0.5); 61 | lambda_t = 3; 62 | [J grad] = lrCostFunction(theta_t, X_t, y_t, lambda_t); 63 | 64 | fprintf('\nCost: %f\n', J); 65 | fprintf('Expected cost: 2.534819\n'); 66 | fprintf('Gradients:\n'); 67 | fprintf(' %f \n', grad); 68 | fprintf('Expected gradients:\n'); 69 | fprintf(' 0.146561\n -0.548558\n 0.724722\n 1.398003\n'); 70 | 71 | fprintf('Program paused. Press enter to continue.\n'); 72 | %pause; 73 | %% ============ Part 2b: One-vs-All Training ============ 74 | fprintf('\nTraining One-vs-All Logistic Regression...\n') 75 | 76 | lambda = 0.1; 77 | [all_theta] = oneVsAll(X, y, num_labels, lambda); 78 | 79 | fprintf('Program paused. Press enter to continue.\n'); 80 | %pause; 81 | 82 | 83 | %% ================ Part 3: Predict for One-Vs-All ================ 84 | 85 | pred = predictOneVsAll(all_theta, X); 86 | 87 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 88 | 89 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex3/ex3_nn.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks 2 | 3 | % Instructions 4 | % ------------ 5 | % 6 | % This file contains code that helps you get started on the 7 | % linear exercise. You will need to complete the following functions 8 | % in this exericse: 9 | % 10 | % lrCostFunction.m (logistic regression cost function) 11 | % oneVsAll.m 12 | % predictOneVsAll.m 13 | % predict.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% Setup the parameters you will use for this exercise 23 | input_layer_size = 400; % 20x20 Input Images of Digits 24 | hidden_layer_size = 25; % 25 hidden units 25 | num_labels = 10; % 10 labels, from 1 to 10 26 | % (note that we have mapped "0" to label 10) 27 | 28 | %% =========== Part 1: Loading and Visualizing Data ============= 29 | % We start the exercise by first loading and visualizing the dataset. 30 | % You will be working with a dataset that contains handwritten digits. 31 | % 32 | 33 | % Load Training Data 34 | fprintf('Loading and Visualizing Data ...\n') 35 | 36 | load('ex3data1.mat'); 37 | m = size(X, 1); 38 | 39 | % Randomly select 100 data points to display 40 | sel = randperm(size(X, 1)); 41 | sel = sel(1:100); 42 | 43 | displayData(X(sel, :)); 44 | 45 | fprintf('Program paused. Press enter to continue.\n'); 46 | pause; 47 | 48 | %% ================ Part 2: Loading Pameters ================ 49 | % In this part of the exercise, we load some pre-initialized 50 | % neural network parameters. 51 | 52 | fprintf('\nLoading Saved Neural Network Parameters ...\n') 53 | 54 | % Load the weights into variables Theta1 and Theta2 55 | load('ex3weights.mat'); 56 | 57 | %% ================= Part 3: Implement Predict ================= 58 | % After training the neural network, we would like to use it to predict 59 | % the labels. You will now implement the "predict" function to use the 60 | % neural network to predict the labels of the training set. This lets 61 | % you compute the training set accuracy. 62 | 63 | pred = predict(Theta1, Theta2, X); 64 | 65 | fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); 66 | 67 | fprintf('Program paused. Press enter to continue.\n'); 68 | pause; 69 | 70 | % To give you an idea of the network's output, you can also run 71 | % through the examples one at the a time to see what it is predicting. 72 | 73 | % Randomly permute examples 74 | rp = randperm(m); 75 | 76 | for i = 1:m 77 | % Display 78 | fprintf('\nDisplaying Example Image\n'); 79 | displayData(X(rp(i), :)); 80 | 81 | pred = predict(Theta1, Theta2, X(rp(i),:)); 82 | fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10)); 83 | 84 | % Pause with quit option 85 | s = input('Paused - press enter to continue, q to exit:','s'); 86 | if s == 'q' 87 | break 88 | end 89 | end 90 | 91 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex3/ex3data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex3/ex3data1.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex3/ex3weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex3/ex3weights.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex3/lrCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = lrCostFunction(theta, X, y, lambda) 2 | %LRCOSTFUNCTION Compute cost and gradient for logistic regression with 3 | %regularization 4 | % J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using 5 | % theta as the parameter for regularized logistic regression and the 6 | % gradient of the cost w.r.t. to the parameters. 7 | 8 | % Initialize some useful values 9 | m = length(y); % number of training examples 10 | 11 | % You need to return the following variables correctly 12 | J = 0; 13 | grad = zeros(size(theta)); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Instructions: Compute the cost of a particular choice of theta. 17 | % You should set J to the cost. 18 | % Compute the partial derivatives and set grad to the partial 19 | % derivatives of the cost w.r.t. each parameter in theta 20 | % 21 | % Hint: The computation of the cost function and gradients can be 22 | % efficiently vectorized. For example, consider the computation 23 | % 24 | % sigmoid(X * theta) 25 | % 26 | % Each row of the resulting matrix will contain the value of the 27 | % prediction for that example. You can make use of this to vectorize 28 | % the cost function and gradient computations. 29 | % 30 | % Hint: When computing the gradient of the regularized cost function, 31 | % there're many possible vectorized solutions, but one solution 32 | % looks like: 33 | % grad = (unregularized gradient for logistic regression) 34 | % temp = theta; 35 | % temp(1) = 0; % because we don't add anything for j = 0 36 | % grad = grad + YOUR_CODE_HERE (using the temp variable) 37 | % 38 | 39 | hypothesis = sigmoid(X * theta); 40 | 41 | thetaTmp = zeros(size(theta)); 42 | thetaTmp(2:end) = theta(2:end); 43 | 44 | J = (1/m) * (-y'*log(hypothesis) - (1-y')*log(1 - hypothesis)) ... 45 | + (lambda / (2*m)) * (thetaTmp' * thetaTmp); 46 | 47 | grad = (1/m) * ((hypothesis - y)' * X)' + (lambda / m) * thetaTmp; 48 | 49 | % ============================================================= 50 | 51 | grad = grad(:); 52 | 53 | end 54 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex3/oneVsAll.m: -------------------------------------------------------------------------------- 1 | function [all_theta] = oneVsAll(X, y, num_labels, lambda) 2 | %ONEVSALL trains multiple logistic regression classifiers and returns all 3 | %the classifiers in a matrix all_theta, where the i-th row of all_theta 4 | %corresponds to the classifier for label i 5 | % [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels 6 | % logistic regression classifiers and returns each of these classifiers 7 | % in a matrix all_theta, where the i-th row of all_theta corresponds 8 | % to the classifier for label i 9 | 10 | % Some useful variables 11 | m = size(X, 1); 12 | n = size(X, 2); 13 | 14 | % You need to return the following variables correctly 15 | all_theta = zeros(num_labels, n + 1); 16 | 17 | % Add ones to the X data matrix 18 | X = [ones(m, 1) X]; 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: You should complete the following code to train num_labels 22 | % logistic regression classifiers with regularization 23 | % parameter lambda. 24 | % 25 | % Hint: theta(:) will return a column vector. 26 | % 27 | % Hint: You can use y == c to obtain a vector of 1's and 0's that tell you 28 | % whether the ground truth is true/false for this class. 29 | % 30 | % Note: For this assignment, we recommend using fmincg to optimize the cost 31 | % function. It is okay to use a for-loop (for c = 1:num_labels) to 32 | % loop over the different classes. 33 | % 34 | % fmincg works similarly to fminunc, but is more efficient when we 35 | % are dealing with large number of parameters. 36 | % 37 | % Example Code for fmincg: 38 | % 39 | % % Set Initial theta 40 | % initial_theta = zeros(n + 1, 1); 41 | % 42 | % % Set options for fminunc 43 | % options = optimset('GradObj', 'on', 'MaxIter', 50); 44 | % 45 | % % Run fmincg to obtain the optimal theta 46 | % % This function will return theta and the cost 47 | % [theta] = ... 48 | % fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ... 49 | % initial_theta, options); 50 | % 51 | 52 | options = optimset('GradObj', 'on', 'MaxIter', 50); 53 | 54 | for c = 1:num_labels 55 | initial_theta = zeros(n + 1, 1); 56 | theta_i = fmincg(@(t)lrCostFunction(t, X, (y == c), lambda), initial_theta, options); 57 | all_theta(c,:) = theta_i'; 58 | end 59 | 60 | % ========================================================================= 61 | 62 | 63 | end 64 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex3/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(Theta1, Theta2, X) 2 | %PREDICT Predict the label of an input given a trained neural network 3 | % p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the 4 | % trained weights of a neural network (Theta1, Theta2) 5 | 6 | % Useful values 7 | m = size(X, 1); 8 | num_labels = size(Theta2, 1); 9 | 10 | % You need to return the following variables correctly 11 | p = zeros(size(X, 1), 1); 12 | 13 | % ====================== YOUR CODE HERE ====================== 14 | % Instructions: Complete the following code to make predictions using 15 | % your learned neural network. You should set p to a 16 | % vector containing labels between 1 to num_labels. 17 | % 18 | % Hint: The max function might come in useful. In particular, the max 19 | % function can also return the index of the max element, for more 20 | % information see 'help max'. If your examples are in rows, then, you 21 | % can use max(A, [], 2) to obtain the max for each row. 22 | % 23 | 24 | a1 = [ones(m, 1) X]'; 25 | 26 | z2 = Theta1 * a1; 27 | a2 = sigmoid(z2); 28 | 29 | a2 = [ones(m, 1)'; a2]; 30 | z3 = Theta2 * a2; 31 | a3 = sigmoid(z3); 32 | 33 | h_theta_x = a3'; 34 | 35 | [Y p] = max(h_theta_x, [], 2); 36 | 37 | % ========================================================================= 38 | 39 | 40 | end 41 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex3/predictOneVsAll.m: -------------------------------------------------------------------------------- 1 | function p = predictOneVsAll(all_theta, X) 2 | %PREDICT Predict the label for a trained one-vs-all classifier. The labels 3 | %are in the range 1..K, where K = size(all_theta, 1). 4 | % p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions 5 | % for each example in the matrix X. Note that X contains the examples in 6 | % rows. all_theta is a matrix where the i-th row is a trained logistic 7 | % regression theta vector for the i-th class. You should set p to a vector 8 | % of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2 9 | % for 4 examples) 10 | 11 | m = size(X, 1); 12 | num_labels = size(all_theta, 1); 13 | 14 | % You need to return the following variables correctly 15 | p = zeros(size(X, 1), 1); 16 | 17 | % Add ones to the X data matrix 18 | X = [ones(m, 1) X]; 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: Complete the following code to make predictions using 22 | % your learned logistic regression parameters (one-vs-all). 23 | % You should set p to a vector of predictions (from 1 to 24 | % num_labels). 25 | % 26 | % Hint: This code can be done all vectorized using the max function. 27 | % In particular, the max function can also return the index of the 28 | % max element, for more information see 'help max'. If your examples 29 | % are in rows, then, you can use max(A, [], 2) to obtain the max 30 | % for each row. 31 | % 32 | 33 | predictions = X * all_theta'; 34 | [Y p] = max(predictions, [], 2); 35 | 36 | % ========================================================================= 37 | 38 | 39 | end 40 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex3/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid functoon 3 | % J = SIGMOID(z) computes the sigmoid of z. 4 | 5 | g = 1.0 ./ (1.0 + exp(-z)); 6 | end 7 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex4/0.png -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex4/1.png -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex4/3.png -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex4/4.png -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex4/7.png -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex4/8.png -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/checkNNGradients.m: -------------------------------------------------------------------------------- 1 | function checkNNGradients(lambda) 2 | %CHECKNNGRADIENTS Creates a small neural network to check the 3 | %backpropagation gradients 4 | % CHECKNNGRADIENTS(lambda) Creates a small neural network to check the 5 | % backpropagation gradients, it will output the analytical gradients 6 | % produced by your backprop code and the numerical gradients (computed 7 | % using computeNumericalGradient). These two gradient computations should 8 | % result in very similar values. 9 | % 10 | 11 | if ~exist('lambda', 'var') || isempty(lambda) 12 | lambda = 0; 13 | end 14 | 15 | input_layer_size = 3; 16 | hidden_layer_size = 5; 17 | num_labels = 3; 18 | m = 5; 19 | 20 | % We generate some 'random' test data 21 | Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size); 22 | Theta2 = debugInitializeWeights(num_labels, hidden_layer_size); 23 | % Reusing debugInitializeWeights to generate X 24 | X = debugInitializeWeights(m, input_layer_size - 1); 25 | y = 1 + mod(1:m, num_labels)'; 26 | 27 | % Unroll parameters 28 | nn_params = [Theta1(:) ; Theta2(:)]; 29 | 30 | % Short hand for cost function 31 | costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ... 32 | num_labels, X, y, lambda); 33 | 34 | [cost, grad] = costFunc(nn_params); 35 | numgrad = computeNumericalGradient(costFunc, nn_params); 36 | 37 | % Visually examine the two gradient computations. The two columns 38 | % you get should be very similar. 39 | disp([numgrad grad]); 40 | fprintf(['The above two columns you get should be very similar.\n' ... 41 | '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']); 42 | 43 | % Evaluate the norm of the difference between two solutions. 44 | % If you have a correct implementation, and assuming you used EPSILON = 0.0001 45 | % in computeNumericalGradient.m, then diff below should be less than 1e-9 46 | diff = norm(numgrad-grad)/norm(numgrad+grad); 47 | 48 | fprintf(['If your backpropagation implementation is correct, then \n' ... 49 | 'the relative difference will be small (less than 1e-9). \n' ... 50 | '\nRelative Difference: %g\n'], diff); 51 | 52 | end 53 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" 3 | %and gives us a numerical estimate of the gradient. 4 | % numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical 5 | % gradient of the function J around theta. Calling y = J(theta) should 6 | % return the function value at theta. 7 | 8 | % Notes: The following code implements numerical gradient checking, and 9 | % returns the numerical gradient.It sets numgrad(i) to (a numerical 10 | % approximation of) the partial derivative of J with respect to the 11 | % i-th input argument, evaluated at theta. (i.e., numgrad(i) should 12 | % be the (approximately) the partial derivative of J with respect 13 | % to theta(i).) 14 | % 15 | 16 | numgrad = zeros(size(theta)); 17 | perturb = zeros(size(theta)); 18 | e = 1e-4; 19 | for p = 1:numel(theta) 20 | % Set perturbation vector 21 | perturb(p) = e; 22 | loss1 = J(theta - perturb); 23 | loss2 = J(theta + perturb); 24 | % Compute Numerical Gradient 25 | numgrad(p) = (loss2 - loss1) / (2*e); 26 | perturb(p) = 0; 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/debugInitializeWeights.m: -------------------------------------------------------------------------------- 1 | function W = debugInitializeWeights(fan_out, fan_in) 2 | %DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in 3 | %incoming connections and fan_out outgoing connections using a fixed 4 | %strategy, this will help you later in debugging 5 | % W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights 6 | % of a layer with fan_in incoming connections and fan_out outgoing 7 | % connections using a fix set of values 8 | % 9 | % Note that W should be set to a matrix of size(1 + fan_in, fan_out) as 10 | % the first row of W handles the "bias" terms 11 | % 12 | 13 | % Set W to zeros 14 | W = zeros(fan_out, 1 + fan_in); 15 | 16 | % Initialize W using "sin", this ensures that W is always of the same 17 | % values and will be useful for debugging 18 | W = reshape(sin(1:numel(W)), size(W)) / 10; 19 | 20 | % ========================================================================= 21 | 22 | end 23 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/ex4data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex4/ex4data1.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/ex4weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex4/ex4weights.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/nnCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J grad] = nnCostFunction(nn_params, ... 2 | input_layer_size, ... 3 | hidden_layer_size, ... 4 | num_labels, ... 5 | X, y, lambda) 6 | %NNCOSTFUNCTION Implements the neural network cost function for a two layer 7 | %neural network which performs classification 8 | % [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ... 9 | % X, y, lambda) computes the cost and gradient of the neural network. The 10 | % parameters for the neural network are "unrolled" into the vector 11 | % nn_params and need to be converted back into the weight matrices. 12 | % 13 | % The returned parameter grad should be a "unrolled" vector of the 14 | % partial derivatives of the neural network. 15 | % 16 | 17 | % Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices 18 | % for our 2 layer neural network 19 | Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... 20 | hidden_layer_size, (input_layer_size + 1)); 21 | 22 | Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... 23 | num_labels, (hidden_layer_size + 1)); 24 | 25 | % Setup some useful variables 26 | m = size(X, 1); 27 | 28 | % You need to return the following variables correctly 29 | J = 0; 30 | Theta1_grad = zeros(size(Theta1)); 31 | Theta2_grad = zeros(size(Theta2)); 32 | 33 | % ====================== YOUR CODE HERE ====================== 34 | % Instructions: You should complete the code by working through the 35 | % following parts. 36 | % 37 | % Part 1: Feedforward the neural network and return the cost in the 38 | % variable J. After implementing Part 1, you can verify that your 39 | % cost function computation is correct by verifying the cost 40 | % computed in ex4.m 41 | % 42 | a1 = [ones(m,1) X]; 43 | z2 = a1 * Theta1'; 44 | a2 = [ones(m,1) sigmoid(z2)]; 45 | h_theta = sigmoid(a2*Theta2'); 46 | 47 | yk = zeros(m, num_labels); 48 | for i = 1:m 49 | yk(i,y(i)) = 1; 50 | end 51 | 52 | J = (1/m) * sum(sum(-yk.*log(h_theta) - (1-yk).*log(1-h_theta))) + ... 53 | (lambda / (2*m)) * (sum(sum(Theta1(:,2:end).^2)) + sum(sum(Theta2(:,2:end).^2))); 54 | 55 | % Part 2: Implement the backpropagation algorithm to compute the gradients 56 | % Theta1_grad and Theta2_grad. You should return the partial derivatives of 57 | % the cost function with respect to Theta1 and Theta2 in Theta1_grad and 58 | % Theta2_grad, respectively. After implementing Part 2, you can check 59 | % that your implementation is correct by running checkNNGradients 60 | % 61 | % Note: The vector y passed into the function is a vector of labels 62 | % containing values from 1..K. You need to map this vector into a 63 | % binary vector of 1's and 0's to be used with the neural network 64 | % cost function. 65 | % 66 | % Hint: We recommend implementing backpropagation using a for-loop 67 | % over the training examples if you are implementing it for the 68 | % first time. 69 | 70 | delta3 = h_theta - yk; 71 | delta2 = delta3 * Theta2 .* [ones(m,1) sigmoidGradient(z2)]; 72 | 73 | theta1_copy = Theta1; 74 | theta2_copy = Theta2; 75 | 76 | theta1_copy(:, 1) = 0; 77 | theta2_copy(:, 1) = 0; 78 | 79 | Theta1_grad = (1/m) * (Theta1_grad + delta2(:,2:end)' * a1) + (lambda/m)*theta1_copy; 80 | Theta2_grad = (1/m) * (Theta2_grad + delta3' * a2) + (lambda/m)*theta2_copy; 81 | 82 | % Part 3: Implement regularization with the cost function and gradients. 83 | % 84 | % Hint: You can implement this around the code for 85 | % backpropagation. That is, you can compute the gradients for 86 | % the regularization separately and then add them to Theta1_grad 87 | % and Theta2_grad from Part 2. 88 | % 89 | 90 | % ------------------------------------------------------------- 91 | 92 | % ========================================================================= 93 | 94 | % Unroll gradients 95 | grad = [Theta1_grad(:) ; Theta2_grad(:)]; 96 | 97 | end 98 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/predict.m: -------------------------------------------------------------------------------- 1 | function p = predict(Theta1, Theta2, X) 2 | %PREDICT Predict the label of an input given a trained neural network 3 | % p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the 4 | % trained weights of a neural network (Theta1, Theta2) 5 | 6 | % Useful values 7 | m = size(X, 1); 8 | num_labels = size(Theta2, 1); 9 | 10 | % You need to return the following variables correctly 11 | p = zeros(size(X, 1), 1); 12 | 13 | h1 = sigmoid([ones(m, 1) X] * Theta1'); 14 | h2 = sigmoid([ones(m, 1) h1] * Theta2'); 15 | [dummy, p] = max(h2, [], 2); 16 | 17 | % ========================================================================= 18 | 19 | 20 | end 21 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/randInitializeWeights.m: -------------------------------------------------------------------------------- 1 | function W = randInitializeWeights(L_in, L_out) 2 | %RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in 3 | %incoming connections and L_out outgoing connections 4 | % W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 5 | % of a layer with L_in incoming connections and L_out outgoing 6 | % connections. 7 | % 8 | % Note that W should be set to a matrix of size(L_out, 1 + L_in) as 9 | % the first column of W handles the "bias" terms 10 | % 11 | 12 | % You need to return the following variables correctly 13 | W = zeros(L_out, 1 + L_in); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Instructions: Initialize W randomly so that we break the symmetry while 17 | % training the neural network. 18 | % 19 | % Note: The first column of W corresponds to the parameters for the bias unit 20 | % 21 | 22 | epsilon_init = 0.12; 23 | 24 | W = rand(L_out, L_in + 1) * 2 * epsilon_init - epsilon_init; 25 | 26 | % ========================================================================= 27 | 28 | end 29 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/sigmoid.m: -------------------------------------------------------------------------------- 1 | function g = sigmoid(z) 2 | %SIGMOID Compute sigmoid functoon 3 | % J = SIGMOID(z) computes the sigmoid of z. 4 | 5 | g = 1.0 ./ (1.0 + exp(-z)); 6 | end 7 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex4/sigmoidGradient.m: -------------------------------------------------------------------------------- 1 | function g = sigmoidGradient(z) 2 | %SIGMOIDGRADIENT returns the gradient of the sigmoid function 3 | %evaluated at z 4 | % g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function 5 | % evaluated at z. This should work regardless if z is a matrix or a 6 | % vector. In particular, if z is a vector or matrix, you should return 7 | % the gradient for each element. 8 | 9 | g = zeros(size(z)); 10 | 11 | % ====================== YOUR CODE HERE ====================== 12 | % Instructions: Compute the gradient of the sigmoid function evaluated at 13 | % each value of z (z can be a matrix, vector or scalar). 14 | 15 | g = sigmoid(z) .* (1 - sigmoid(z)); 16 | 17 | % ============================================================= 18 | 19 | end 20 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex5/ex5.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 5 | Regularized Linear Regression and Bias-Variance 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % linearRegCostFunction.m 11 | % learningCurve.m 12 | % validationCurve.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% Initialization 19 | clear ; close all; clc 20 | 21 | %% =========== Part 1: Loading and Visualizing Data ============= 22 | % We start the exercise by first loading and visualizing the dataset. 23 | % The following code will load the dataset into your environment and plot 24 | % the data. 25 | % 26 | 27 | % Load Training Data 28 | fprintf('Loading and Visualizing Data ...\n') 29 | 30 | % Load from ex5data1: 31 | % You will have X, y, Xval, yval, Xtest, ytest in your environment 32 | load ('ex5data1.mat'); 33 | 34 | % m = Number of examples 35 | m = size(X, 1); 36 | 37 | % Plot training data 38 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 39 | xlabel('Change in water level (x)'); 40 | ylabel('Water flowing out of the dam (y)'); 41 | 42 | fprintf('Program paused. Press enter to continue.\n'); 43 | %pause; 44 | 45 | %% =========== Part 2: Regularized Linear Regression Cost ============= 46 | % You should now implement the cost function for regularized linear 47 | % regression. 48 | % 49 | 50 | theta = [1 ; 1]; 51 | J = linearRegCostFunction([ones(m, 1) X], y, theta, 1); 52 | 53 | fprintf(['Cost at theta = [1 ; 1]: %f '... 54 | '\n(this value should be about 303.993192)\n'], J); 55 | 56 | fprintf('Program paused. Press enter to continue.\n'); 57 | %pause; 58 | 59 | %% =========== Part 3: Regularized Linear Regression Gradient ============= 60 | % You should now implement the gradient for regularized linear 61 | % regression. 62 | % 63 | 64 | theta = [1 ; 1]; 65 | [J, grad] = linearRegCostFunction([ones(m, 1) X], y, theta, 1); 66 | 67 | fprintf(['Gradient at theta = [1 ; 1]: [%f; %f] '... 68 | '\n(this value should be about [-15.303016; 598.250744])\n'], ... 69 | grad(1), grad(2)); 70 | 71 | fprintf('Program paused. Press enter to continue.\n'); 72 | %pause; 73 | 74 | 75 | %% =========== Part 4: Train Linear Regression ============= 76 | % Once you have implemented the cost and gradient correctly, the 77 | % trainLinearReg function will use your cost function to train 78 | % regularized linear regression. 79 | % 80 | % Write Up Note: The data is non-linear, so this will not give a great 81 | % fit. 82 | % 83 | 84 | % Train linear regression with lambda = 0 85 | lambda = 0; 86 | [theta] = trainLinearReg([ones(m, 1) X], y, lambda); 87 | 88 | % Plot fit over the data 89 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 90 | xlabel('Change in water level (x)'); 91 | ylabel('Water flowing out of the dam (y)'); 92 | hold on; 93 | plot(X, [ones(m, 1) X]*theta, '--', 'LineWidth', 2) 94 | hold off; 95 | 96 | fprintf('Program paused. Press enter to continue.\n'); 97 | %pause; 98 | 99 | 100 | %% =========== Part 5: Learning Curve for Linear Regression ============= 101 | % Next, you should implement the learningCurve function. 102 | % 103 | % Write Up Note: Since the model is underfitting the data, we expect to 104 | % see a graph with "high bias" -- Figure 3 in ex5.pdf 105 | % 106 | 107 | lambda = 0; 108 | [error_train, error_val] = ... 109 | learningCurve([ones(m, 1) X], y, ... 110 | [ones(size(Xval, 1), 1) Xval], yval, ... 111 | lambda); 112 | 113 | plot(1:m, error_train, 1:m, error_val); 114 | title('Learning curve for linear regression') 115 | legend('Train', 'Cross Validation') 116 | xlabel('Number of training examples') 117 | ylabel('Error') 118 | axis([0 13 0 150]) 119 | 120 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n'); 121 | for i = 1:m 122 | fprintf(' \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i)); 123 | end 124 | 125 | fprintf('Program paused. Press enter to continue.\n'); 126 | pause; 127 | 128 | %% =========== Part 6: Feature Mapping for Polynomial Regression ============= 129 | % One solution to this is to use polynomial regression. You should now 130 | % complete polyFeatures to map each example into its powers 131 | % 132 | 133 | p = 8; 134 | 135 | % Map X onto Polynomial Features and Normalize 136 | X_poly = polyFeatures(X, p); 137 | [X_poly, mu, sigma] = featureNormalize(X_poly); % Normalize 138 | X_poly = [ones(m, 1), X_poly]; % Add Ones 139 | 140 | % Map X_poly_test and normalize (using mu and sigma) 141 | X_poly_test = polyFeatures(Xtest, p); 142 | X_poly_test = bsxfun(@minus, X_poly_test, mu); 143 | X_poly_test = bsxfun(@rdivide, X_poly_test, sigma); 144 | X_poly_test = [ones(size(X_poly_test, 1), 1), X_poly_test]; % Add Ones 145 | 146 | % Map X_poly_val and normalize (using mu and sigma) 147 | X_poly_val = polyFeatures(Xval, p); 148 | X_poly_val = bsxfun(@minus, X_poly_val, mu); 149 | X_poly_val = bsxfun(@rdivide, X_poly_val, sigma); 150 | X_poly_val = [ones(size(X_poly_val, 1), 1), X_poly_val]; % Add Ones 151 | 152 | fprintf('Normalized Training Example 1:\n'); 153 | fprintf(' %f \n', X_poly(1, :)); 154 | 155 | fprintf('\nProgram paused. Press enter to continue.\n'); 156 | %pause; 157 | 158 | 159 | 160 | %% =========== Part 7: Learning Curve for Polynomial Regression ============= 161 | % Now, you will get to experiment with polynomial regression with multiple 162 | % values of lambda. The code below runs polynomial regression with 163 | % lambda = 0. You should try running the code with different values of 164 | % lambda to see how the fit and learning curve change. 165 | % 166 | 167 | lambda = 3; 168 | [theta] = trainLinearReg(X_poly, y, lambda); 169 | 170 | % Plot training data and fit 171 | figure(1); 172 | plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); 173 | plotFit(min(X), max(X), mu, sigma, theta, p); 174 | xlabel('Change in water level (x)'); 175 | ylabel('Water flowing out of the dam (y)'); 176 | title (sprintf('Polynomial Regression Fit (lambda = %f)', lambda)); 177 | 178 | figure(2); 179 | [error_train, error_val] = ... 180 | learningCurve(X_poly, y, X_poly_val, yval, lambda); 181 | plot(1:m, error_train, 1:m, error_val); 182 | 183 | title(sprintf('Polynomial Regression Learning Curve (lambda = %f)', lambda)); 184 | xlabel('Number of training examples') 185 | ylabel('Error') 186 | axis([0 13 0 100]) 187 | legend('Train', 'Cross Validation') 188 | 189 | fprintf('Polynomial Regression (lambda = %f)\n\n', lambda); 190 | fprintf('# Training Examples\tTrain Error\tCross Validation Error\n'); 191 | for i = 1:m 192 | fprintf(' \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i)); 193 | end 194 | 195 | fprintf('Program paused. Press enter to continue.\n'); 196 | %pause; 197 | 198 | %% =========== Part 8: Validation for Selecting Lambda ============= 199 | % You will now implement validationCurve to test various values of 200 | % lambda on a validation set. You will then use this to select the 201 | % "best" lambda value. 202 | % 203 | 204 | [lambda_vec, error_train, error_val] = ... 205 | validationCurve(X_poly, y, X_poly_val, yval); 206 | 207 | close all; 208 | plot(lambda_vec, error_train, lambda_vec, error_val); 209 | legend('Train', 'Cross Validation'); 210 | xlabel('lambda'); 211 | ylabel('Error'); 212 | 213 | fprintf('lambda\t\tTrain Error\tValidation Error\n'); 214 | for i = 1:length(lambda_vec) 215 | fprintf(' %f\t%f\t%f\n', ... 216 | lambda_vec(i), error_train(i), error_val(i)); 217 | end 218 | 219 | fprintf('Program paused. Press enter to continue.\n'); 220 | pause; 221 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex5/ex5data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex5/ex5data1.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex5/featureNormalize.m: -------------------------------------------------------------------------------- 1 | function [X_norm, mu, sigma] = featureNormalize(X) 2 | %FEATURENORMALIZE Normalizes the features in X 3 | % FEATURENORMALIZE(X) returns a normalized version of X where 4 | % the mean value of each feature is 0 and the standard deviation 5 | % is 1. This is often a good preprocessing step to do when 6 | % working with learning algorithms. 7 | 8 | mu = mean(X); 9 | X_norm = bsxfun(@minus, X, mu); 10 | 11 | sigma = std(X_norm); 12 | X_norm = bsxfun(@rdivide, X_norm, sigma); 13 | 14 | 15 | % ============================================================ 16 | 17 | end 18 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex5/learningCurve.m: -------------------------------------------------------------------------------- 1 | function [error_train, error_val] = ... 2 | learningCurve(X, y, Xval, yval, lambda) 3 | %LEARNINGCURVE Generates the train and cross validation set errors needed 4 | %to plot a learning curve 5 | % [error_train, error_val] = ... 6 | % LEARNINGCURVE(X, y, Xval, yval, lambda) returns the train and 7 | % cross validation set errors for a learning curve. In particular, 8 | % it returns two vectors of the same length - error_train and 9 | % error_val. Then, error_train(i) contains the training error for 10 | % i examples (and similarly for error_val(i)). 11 | % 12 | % In this function, you will compute the train and test errors for 13 | % dataset sizes from 1 up to m. In practice, when working with larger 14 | % datasets, you might want to do this in larger intervals. 15 | % 16 | 17 | % Number of training examples 18 | m = size(X, 1); 19 | 20 | % You need to return these values correctly 21 | error_train = zeros(m, 1); 22 | error_val = zeros(m, 1); 23 | 24 | % ====================== YOUR CODE HERE ====================== 25 | % Instructions: Fill in this function to return training errors in 26 | % error_train and the cross validation errors in error_val. 27 | % i.e., error_train(i) and 28 | % error_val(i) should give you the errors 29 | % obtained after training on i examples. 30 | % 31 | % Note: You should evaluate the training error on the first i training 32 | % examples (i.e., X(1:i, :) and y(1:i)). 33 | % 34 | % For the cross-validation error, you should instead evaluate on 35 | % the _entire_ cross validation set (Xval and yval). 36 | % 37 | % Note: If you are using your cost function (linearRegCostFunction) 38 | % to compute the training and cross validation error, you should 39 | % call the function with the lambda argument set to 0. 40 | % Do note that you will still need to use lambda when running 41 | % the training to obtain the theta parameters. 42 | % 43 | % Hint: You can loop over the examples with the following: 44 | % 45 | % for i = 1:m 46 | % % Compute train/cross validation errors using training examples 47 | % % X(1:i, :) and y(1:i), storing the result in 48 | % % error_train(i) and error_val(i) 49 | % .... 50 | % 51 | % end 52 | % 53 | 54 | % ---------------------- Sample Solution ---------------------- 55 | 56 | for i = 1:m 57 | X_sub = [ones(i, 1) X(1:i, :)]; 58 | y_sub = y(1:i); 59 | 60 | [theta] = trainLinearReg(X_sub, y_sub, lambda); 61 | 62 | predictions_train = X_sub * theta; 63 | error_train(i) = (1/(2*i)) * sum((predictions_train - y_sub).^2); 64 | 65 | predictions_val = [ones(size(Xval,1), 1) Xval] * theta; 66 | error_val(i) = (1/(2*size(Xval,1))) * sum((predictions_val - yval).^2); 67 | end 68 | 69 | % ------------------------------------------------------------- 70 | 71 | % ========================================================================= 72 | 73 | end 74 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex5/linearRegCostFunction.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = linearRegCostFunction(X, y, theta, lambda) 2 | %LINEARREGCOSTFUNCTION Compute cost and gradient for regularized linear 3 | %regression with multiple variables 4 | % [J, grad] = LINEARREGCOSTFUNCTION(X, y, theta, lambda) computes the 5 | % cost of using theta as the parameter for linear regression to fit the 6 | % data points in X and y. Returns the cost in J and the gradient in grad 7 | 8 | % Initialize some useful values 9 | m = length(y); % number of training examples 10 | 11 | % You need to return the following variables correctly 12 | J = 0; 13 | grad = zeros(size(theta)); 14 | 15 | % ====================== YOUR CODE HERE ====================== 16 | % Instructions: Compute the cost and gradient of regularized linear 17 | % regression for a particular choice of theta. 18 | % 19 | % You should set J to the cost and grad to the gradient. 20 | % 21 | 22 | predictions = X * theta; 23 | J = (1/(2*m)) * sum((predictions - y).^2) + (lambda/(2*m)) * sum(theta(2:end).^2); 24 | 25 | theta4Calc = zeros(size(theta)); 26 | theta4Calc(2:end) = theta(2:end); 27 | grad = (1/m) * X' *(predictions - y) + (lambda/m) * theta4Calc; 28 | 29 | % ========================================================================= 30 | 31 | grad = grad(:); 32 | 33 | end 34 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex5/plotFit.m: -------------------------------------------------------------------------------- 1 | function plotFit(min_x, max_x, mu, sigma, theta, p) 2 | %PLOTFIT Plots a learned polynomial regression fit over an existing figure. 3 | %Also works with linear regression. 4 | % PLOTFIT(min_x, max_x, mu, sigma, theta, p) plots the learned polynomial 5 | % fit with power p and feature normalization (mu, sigma). 6 | 7 | % Hold on to the current figure 8 | hold on; 9 | 10 | % We plot a range slightly bigger than the min and max values to get 11 | % an idea of how the fit will vary outside the range of the data points 12 | x = (min_x - 15: 0.05 : max_x + 25)'; 13 | 14 | % Map the X values 15 | X_poly = polyFeatures(x, p); 16 | X_poly = bsxfun(@minus, X_poly, mu); 17 | X_poly = bsxfun(@rdivide, X_poly, sigma); 18 | 19 | % Add ones 20 | X_poly = [ones(size(x, 1), 1) X_poly]; 21 | 22 | % Plot 23 | plot(x, X_poly * theta, '--', 'LineWidth', 2) 24 | 25 | % Hold off to the current figure 26 | hold off 27 | 28 | end 29 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex5/polyFeatures.m: -------------------------------------------------------------------------------- 1 | function [X_poly] = polyFeatures(X, p) 2 | %POLYFEATURES Maps X (1D vector) into the p-th power 3 | % [X_poly] = POLYFEATURES(X, p) takes a data matrix X (size m x 1) and 4 | % maps each example into its polynomial features where 5 | % X_poly(i, :) = [X(i) X(i).^2 X(i).^3 ... X(i).^p]; 6 | % 7 | 8 | 9 | % You need to return the following variables correctly. 10 | X_poly = zeros(numel(X), p); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Given a vector X, return a matrix X_poly where the p-th 14 | % column of X contains the values of X to the p-th power. 15 | % 16 | % 17 | 18 | for i=1:p 19 | X_poly(:, i) = X.^i; 20 | end 21 | 22 | % ========================================================================= 23 | 24 | end 25 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex5/trainLinearReg.m: -------------------------------------------------------------------------------- 1 | function [theta] = trainLinearReg(X, y, lambda) 2 | %TRAINLINEARREG Trains linear regression given a dataset (X, y) and a 3 | %regularization parameter lambda 4 | % [theta] = TRAINLINEARREG (X, y, lambda) trains linear regression using 5 | % the dataset (X, y) and regularization parameter lambda. Returns the 6 | % trained parameters theta. 7 | % 8 | 9 | % Initialize Theta 10 | initial_theta = zeros(size(X, 2), 1); 11 | 12 | % Create "short hand" for the cost function to be minimized 13 | costFunction = @(t) linearRegCostFunction(X, y, t, lambda); 14 | 15 | % Now, costFunction is a function that takes in only one argument 16 | options = optimset('MaxIter', 200, 'GradObj', 'on'); 17 | 18 | % Minimize using fmincg 19 | theta = fmincg(costFunction, initial_theta, options); 20 | 21 | end 22 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex5/validationCurve.m: -------------------------------------------------------------------------------- 1 | function [lambda_vec, error_train, error_val] = ... 2 | validationCurve(X, y, Xval, yval) 3 | %VALIDATIONCURVE Generate the train and validation errors needed to 4 | %plot a validation curve that we can use to select lambda 5 | % [lambda_vec, error_train, error_val] = ... 6 | % VALIDATIONCURVE(X, y, Xval, yval) returns the train 7 | % and validation errors (in error_train, error_val) 8 | % for different values of lambda. You are given the training set (X, 9 | % y) and validation set (Xval, yval). 10 | % 11 | 12 | % Selected values of lambda (you should not change this) 13 | lambda_vec = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3 10]'; 14 | 15 | % You need to return these variables correctly. 16 | error_train = zeros(length(lambda_vec), 1); 17 | error_val = zeros(length(lambda_vec), 1); 18 | 19 | % ====================== YOUR CODE HERE ====================== 20 | % Instructions: Fill in this function to return training errors in 21 | % error_train and the validation errors in error_val. The 22 | % vector lambda_vec contains the different lambda parameters 23 | % to use for each calculation of the errors, i.e, 24 | % error_train(i), and error_val(i) should give 25 | % you the errors obtained after training with 26 | % lambda = lambda_vec(i) 27 | % 28 | % Note: You can loop over lambda_vec with the following: 29 | % 30 | % for i = 1:length(lambda_vec) 31 | % lambda = lambda_vec(i); 32 | % % Compute train / val errors when training linear 33 | % % regression with regularization parameter lambda 34 | % % You should store the result in error_train(i) 35 | % % and error_val(i) 36 | % .... 37 | % 38 | % end 39 | % 40 | % 41 | 42 | m_X = size(X,1); 43 | m_Xval = size(Xval, 1); 44 | for i = 1:length(lambda_vec) 45 | lambda = lambda_vec(i); 46 | 47 | %X = [ones(size(X,1), 1) X]; 48 | [theta] = trainLinearReg(X, y, lambda); 49 | 50 | predictions_train = X * theta; 51 | error_train(i) = (1/(2*m_X)) * sum((predictions_train - y).^2); 52 | 53 | %Xval = [ones(size(Xval,1), 1) Xval] 54 | predictions_val = Xval * theta; 55 | error_val(i) = (1/(2*m_Xval)) * sum((predictions_val - yval).^2); 56 | end 57 | 58 | % ========================================================================= 59 | 60 | end 61 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/dataset3Params.m: -------------------------------------------------------------------------------- 1 | function [C, sigma] = dataset3Params(X, y, Xval, yval) 2 | %DATASET3PARAMS returns your choice of C and sigma for Part 3 of the exercise 3 | %where you select the optimal (C, sigma) learning parameters to use for SVM 4 | %with RBF kernel 5 | % [C, sigma] = DATASET3PARAMS(X, y, Xval, yval) returns your choice of C and 6 | % sigma. You should complete this function to return the optimal C and 7 | % sigma based on a cross-validation set. 8 | % 9 | 10 | % You need to return the following variables correctly. 11 | C = 1; 12 | sigma = 0.3; 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Fill in this function to return the optimal C and sigma 16 | % learning parameters found using the cross validation set. 17 | % You can use svmPredict to predict the labels on the cross 18 | % validation set. For example, 19 | % predictions = svmPredict(model, Xval); 20 | % will return the predictions on the cross validation set. 21 | % 22 | % Note: You can compute the prediction error using 23 | % mean(double(predictions ~= yval)) 24 | % 25 | 26 | param_list = [0.01 0.03 0.1 0.3 1 3 10 30]; 27 | prediction_errors = zeros(length(param_list), length(param_list)); 28 | 29 | for i = 1:length(param_list) 30 | for j = 1:length(param_list) 31 | C_val = param_list(i); 32 | sigma_val = param_list(j); 33 | %fprintf('%f, %f\r\n', C_val, sigma_val); 34 | 35 | model= svmTrain(X, y, C_val, @(x1, x2) gaussianKernel(x1, x2, sigma_val)); 36 | predictions = svmPredict(model, Xval); 37 | 38 | prediction_errors(i,j) = mean(double(predictions ~= yval)); 39 | end 40 | end 41 | 42 | prediction_min = min(min(prediction_errors)); 43 | [min_i, min_j] = find(prediction_errors == prediction_min); 44 | 45 | C = param_list(min_i); 46 | sigma = param_list(min_j); 47 | 48 | % ========================================================================= 49 | 50 | end 51 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/emailFeatures.m: -------------------------------------------------------------------------------- 1 | function x = emailFeatures(word_indices) 2 | %EMAILFEATURES takes in a word_indices vector and produces a feature vector 3 | %from the word indices 4 | % x = EMAILFEATURES(word_indices) takes in a word_indices vector and 5 | % produces a feature vector from the word indices. 6 | 7 | % Total number of words in the dictionary 8 | n = 1899; 9 | 10 | % You need to return the following variables correctly. 11 | x = zeros(n, 1); 12 | 13 | % ====================== YOUR CODE HERE ====================== 14 | % Instructions: Fill in this function to return a feature vector for the 15 | % given email (word_indices). To help make it easier to 16 | % process the emails, we have have already pre-processed each 17 | % email and converted each word in the email into an index in 18 | % a fixed dictionary (of 1899 words). The variable 19 | % word_indices contains the list of indices of the words 20 | % which occur in one email. 21 | % 22 | % Concretely, if an email has the text: 23 | % 24 | % The quick brown fox jumped over the lazy dog. 25 | % 26 | % Then, the word_indices vector for this text might look 27 | % like: 28 | % 29 | % 60 100 33 44 10 53 60 58 5 30 | % 31 | % where, we have mapped each word onto a number, for example: 32 | % 33 | % the -- 60 34 | % quick -- 100 35 | % ... 36 | % 37 | % (note: the above numbers are just an example and are not the 38 | % actual mappings). 39 | % 40 | % Your task is take one such word_indices vector and construct 41 | % a binary feature vector that indicates whether a particular 42 | % word occurs in the email. That is, x(i) = 1 when word i 43 | % is present in the email. Concretely, if the word 'the' (say, 44 | % index 60) appears in the email, then x(60) = 1. The feature 45 | % vector should look like: 46 | % 47 | % x = [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..]; 48 | % 49 | % 50 | 51 | for i = 1:length(word_indices) 52 | x(word_indices(i)) = 1; 53 | end 54 | 55 | % ========================================================================= 56 | 57 | 58 | end 59 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/emailSample1.txt: -------------------------------------------------------------------------------- 1 | > Anyone knows how much it costs to host a web portal ? 2 | > 3 | Well, it depends on how many visitors you're expecting. 4 | This can be anywhere from less than 10 bucks a month to a couple of $100. 5 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 6 | if youre running something big.. 7 | 8 | To unsubscribe yourself from this mailing list, send an email to: 9 | groupname-unsubscribe@egroups.com 10 | 11 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/emailSample2.txt: -------------------------------------------------------------------------------- 1 | Folks, 2 | 3 | my first time posting - have a bit of Unix experience, but am new to Linux. 4 | 5 | 6 | Just got a new PC at home - Dell box with Windows XP. Added a second hard disk 7 | for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went 8 | fine except it didn't pick up my monitor. 9 | 10 | I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4 11 | Ti4200 video card, both of which are probably too new to feature in Suse's default 12 | set. I downloaded a driver from the nVidia website and installed it using RPM. 13 | Then I ran Sax2 (as was recommended in some postings I found on the net), but 14 | it still doesn't feature my video card in the available list. What next? 15 | 16 | Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice, 17 | the whole machine crashes (in Linux, not Windows) - even the on/off switch is 18 | inactive, leaving me to reach for the power cable instead. 19 | 20 | If anyone can help me in any way with these probs., I'd be really grateful - 21 | I've searched the 'net but have run out of ideas. 22 | 23 | Or should I be going for a different version of Linux such as RedHat? Opinions 24 | welcome. 25 | 26 | Thanks a lot, 27 | Peter 28 | 29 | -- 30 | Irish Linux Users' Group: ilug@linux.ie 31 | http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information. 32 | List maintainer: listmaster@linux.ie 33 | 34 | 35 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/ex6.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 6 | Support Vector Machines 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % gaussianKernel.m 11 | % dataset3Params.m 12 | % processEmail.m 13 | % emailFeatures.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% =============== Part 1: Loading and Visualizing Data ================ 23 | % We start the exercise by first loading and visualizing the dataset. 24 | % The following code will load the dataset into your environment and plot 25 | % the data. 26 | % 27 | 28 | fprintf('Loading and Visualizing Data ...\n') 29 | 30 | % Load from ex6data1: 31 | % You will have X, y in your environment 32 | load('ex6data1.mat'); 33 | 34 | % Plot training data 35 | plotData(X, y); 36 | 37 | fprintf('Program paused. Press enter to continue.\n'); 38 | %pause; 39 | 40 | %% ==================== Part 2: Training Linear SVM ==================== 41 | % The following code will train a linear SVM on the dataset and plot the 42 | % decision boundary learned. 43 | % 44 | 45 | % Load from ex6data1: 46 | % You will have X, y in your environment 47 | load('ex6data1.mat'); 48 | 49 | fprintf('\nTraining Linear SVM ...\n') 50 | 51 | % You should try to change the C value below and see how the decision 52 | % boundary varies (e.g., try C = 1000) 53 | C = 1; 54 | model = svmTrain(X, y, C, @linearKernel, 1e-3, 20); 55 | visualizeBoundaryLinear(X, y, model); 56 | 57 | fprintf('Program paused. Press enter to continue.\n'); 58 | %pause; 59 | 60 | %% =============== Part 3: Implementing Gaussian Kernel =============== 61 | % You will now implement the Gaussian kernel to use 62 | % with the SVM. You should complete the code in gaussianKernel.m 63 | % 64 | fprintf('\nEvaluating the Gaussian Kernel ...\n') 65 | 66 | x1 = [1 2 1]; x2 = [0 4 -1]; sigma = 2; 67 | sim = gaussianKernel(x1, x2, sigma); 68 | 69 | fprintf(['Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = %f :' ... 70 | '\n\t%f\n(for sigma = 2, this value should be about 0.324652)\n'], sigma, sim); 71 | 72 | fprintf('Program paused. Press enter to continue.\n'); 73 | %pause; 74 | 75 | %% =============== Part 4: Visualizing Dataset 2 ================ 76 | % The following code will load the next dataset into your environment and 77 | % plot the data. 78 | % 79 | 80 | fprintf('Loading and Visualizing Data ...\n') 81 | 82 | % Load from ex6data2: 83 | % You will have X, y in your environment 84 | load('ex6data2.mat'); 85 | 86 | % Plot training data 87 | plotData(X, y); 88 | 89 | fprintf('Program paused. Press enter to continue.\n'); 90 | %pause; 91 | 92 | %% ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ========== 93 | % After you have implemented the kernel, we can now use it to train the 94 | % SVM classifier. 95 | % 96 | fprintf('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n'); 97 | 98 | % Load from ex6data2: 99 | % You will have X, y in your environment 100 | load('ex6data2.mat'); 101 | 102 | % SVM Parameters 103 | C = 1; sigma = 0.1; 104 | 105 | % We set the tolerance and max_passes lower here so that the code will run 106 | % faster. However, in practice, you will want to run the training to 107 | % convergence. 108 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 109 | visualizeBoundary(X, y, model); 110 | 111 | fprintf('Program paused. Press enter to continue.\n'); 112 | %pause; 113 | 114 | %% =============== Part 6: Visualizing Dataset 3 ================ 115 | % The following code will load the next dataset into your environment and 116 | % plot the data. 117 | % 118 | 119 | fprintf('Loading and Visualizing Data ...\n') 120 | 121 | % Load from ex6data3: 122 | % You will have X, y in your environment 123 | load('ex6data3.mat'); 124 | 125 | % Plot training data 126 | plotData(X, y); 127 | 128 | fprintf('Program paused. Press enter to continue.\n'); 129 | %pause; 130 | 131 | %% ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ========== 132 | 133 | % This is a different dataset that you can use to experiment with. Try 134 | % different values of C and sigma here. 135 | % 136 | 137 | % Load from ex6data3: 138 | % You will have X, y in your environment 139 | load('ex6data3.mat'); 140 | 141 | % Try different SVM Parameters here 142 | [C, sigma] = dataset3Params(X, y, Xval, yval); 143 | 144 | % Train the SVM 145 | model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 146 | visualizeBoundary(X, y, model); 147 | 148 | fprintf('Program paused. Press enter to continue.\n'); 149 | pause; 150 | 151 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/ex6_spam.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 6 | Spam Classification with SVMs 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % gaussianKernel.m 11 | % dataset3Params.m 12 | % processEmail.m 13 | % emailFeatures.m 14 | % 15 | % For this exercise, you will not need to change any code in this file, 16 | % or any other files other than those mentioned above. 17 | % 18 | 19 | %% Initialization 20 | clear ; close all; clc 21 | 22 | %% ==================== Part 1: Email Preprocessing ==================== 23 | % To use an SVM to classify emails into Spam v.s. Non-Spam, you first need 24 | % to convert each email into a vector of features. In this part, you will 25 | % implement the preprocessing steps for each email. You should 26 | % complete the code in processEmail.m to produce a word indices vector 27 | % for a given email. 28 | 29 | fprintf('\nPreprocessing sample email (emailSample1.txt)\n'); 30 | 31 | % Extract Features 32 | file_contents = readFile('emailSample1.txt'); 33 | word_indices = processEmail(file_contents); 34 | 35 | % Print Stats 36 | fprintf('Word Indices: \n'); 37 | fprintf(' %d', word_indices); 38 | fprintf('\n\n'); 39 | 40 | fprintf('Program paused. Press enter to continue.\n'); 41 | %pause; 42 | 43 | %% ==================== Part 2: Feature Extraction ==================== 44 | % Now, you will convert each email into a vector of features in R^n. 45 | % You should complete the code in emailFeatures.m to produce a feature 46 | % vector for a given email. 47 | 48 | fprintf('\nExtracting features from sample email (emailSample1.txt)\n'); 49 | 50 | % Extract Features 51 | file_contents = readFile('emailSample1.txt'); 52 | word_indices = processEmail(file_contents); 53 | features = emailFeatures(word_indices); 54 | 55 | % Print Stats 56 | fprintf('Length of feature vector: %d\n', length(features)); 57 | fprintf('Number of non-zero entries: %d\n', sum(features > 0)); 58 | 59 | fprintf('Program paused. Press enter to continue.\n'); 60 | %pause; 61 | 62 | %% =========== Part 3: Train Linear SVM for Spam Classification ======== 63 | % In this section, you will train a linear classifier to determine if an 64 | % email is Spam or Not-Spam. 65 | 66 | % Load the Spam Email dataset 67 | % You will have X, y in your environment 68 | load('spamTrain.mat'); 69 | 70 | fprintf('\nTraining Linear SVM (Spam Classification)\n') 71 | fprintf('(this may take 1 to 2 minutes) ...\n') 72 | 73 | C = 0.1; 74 | model = svmTrain(X, y, C, @linearKernel); 75 | 76 | p = svmPredict(model, X); 77 | 78 | fprintf('Training Accuracy: %f\n', mean(double(p == y)) * 100); 79 | 80 | %% =================== Part 4: Test Spam Classification ================ 81 | % After training the classifier, we can evaluate it on a test set. We have 82 | % included a test set in spamTest.mat 83 | 84 | % Load the test dataset 85 | % You will have Xtest, ytest in your environment 86 | load('spamTest.mat'); 87 | 88 | fprintf('\nEvaluating the trained Linear SVM on a test set ...\n') 89 | 90 | p = svmPredict(model, Xtest); 91 | 92 | fprintf('Test Accuracy: %f\n', mean(double(p == ytest)) * 100); 93 | pause; 94 | 95 | 96 | %% ================= Part 5: Top Predictors of Spam ==================== 97 | % Since the model we are training is a linear SVM, we can inspect the 98 | % weights learned by the model to understand better how it is determining 99 | % whether an email is spam or not. The following code finds the words with 100 | % the highest weights in the classifier. Informally, the classifier 101 | % 'thinks' that these words are the most likely indicators of spam. 102 | % 103 | 104 | % Sort the weights and obtin the vocabulary list 105 | [weight, idx] = sort(model.w, 'descend'); 106 | vocabList = getVocabList(); 107 | 108 | fprintf('\nTop predictors of spam: \n'); 109 | for i = 1:15 110 | fprintf(' %-15s (%f) \n', vocabList{idx(i)}, weight(i)); 111 | end 112 | 113 | fprintf('\n\n'); 114 | fprintf('\nProgram paused. Press enter to continue.\n'); 115 | pause; 116 | 117 | %% =================== Part 6: Try Your Own Emails ===================== 118 | % Now that you've trained the spam classifier, you can use it on your own 119 | % emails! In the starter code, we have included spamSample1.txt, 120 | % spamSample2.txt, emailSample1.txt and emailSample2.txt as examples. 121 | % The following code reads in one of these emails and then uses your 122 | % learned SVM classifier to determine whether the email is Spam or 123 | % Not Spam 124 | 125 | % Set the file to be read in (change this to spamSample2.txt, 126 | % emailSample1.txt or emailSample2.txt to see different predictions on 127 | % different emails types). Try your own emails as well! 128 | filename = 'spamSample2.txt'; 129 | 130 | % Read and predict 131 | file_contents = readFile(filename); 132 | word_indices = processEmail(file_contents); 133 | x = emailFeatures(word_indices); 134 | p = svmPredict(model, x); 135 | 136 | fprintf('\nProcessed %s\n\nSpam Classification: %d\n', filename, p); 137 | fprintf('(1 indicates spam, 0 indicates not spam)\n\n'); 138 | 139 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/ex6data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex6/ex6data1.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/ex6data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex6/ex6data2.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/ex6data3.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex6/ex6data3.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/gaussianKernel.m: -------------------------------------------------------------------------------- 1 | function sim = gaussianKernel(x1, x2, sigma) 2 | %RBFKERNEL returns a radial basis function kernel between x1 and x2 3 | % sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2 4 | % and returns the value in sim 5 | 6 | % Ensure that x1 and x2 are column vectors 7 | x1 = x1(:); x2 = x2(:); 8 | 9 | % You need to return the following variables correctly. 10 | sim = 0; 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Fill in this function to return the similarity between x1 14 | % and x2 computed using a Gaussian kernel with bandwidth 15 | % sigma 16 | % 17 | % 18 | 19 | 20 | gaussianExp = -sum((x1-x2).^2)/(2*(sigma^2)); 21 | sim = exp(gaussianExp); 22 | 23 | 24 | % ============================================================= 25 | 26 | end 27 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/getVocabList.m: -------------------------------------------------------------------------------- 1 | function vocabList = getVocabList() 2 | %GETVOCABLIST reads the fixed vocabulary list in vocab.txt and returns a 3 | %cell array of the words 4 | % vocabList = GETVOCABLIST() reads the fixed vocabulary list in vocab.txt 5 | % and returns a cell array of the words in vocabList. 6 | 7 | 8 | %% Read the fixed vocabulary list 9 | fid = fopen('vocab.txt'); 10 | 11 | % Store all dictionary words in cell array vocab{} 12 | n = 1899; % Total number of words in the dictionary 13 | 14 | % For ease of implementation, we use a struct to map the strings => integers 15 | % In practice, you'll want to use some form of hashmap 16 | vocabList = cell(n, 1); 17 | for i = 1:n 18 | % Word Index (can ignore since it will be = i) 19 | fscanf(fid, '%d', 1); 20 | % Actual Word 21 | vocabList{i} = fscanf(fid, '%s', 1); 22 | end 23 | fclose(fid); 24 | 25 | end 26 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/linearKernel.m: -------------------------------------------------------------------------------- 1 | function sim = linearKernel(x1, x2) 2 | %LINEARKERNEL returns a linear kernel between x1 and x2 3 | % sim = linearKernel(x1, x2) returns a linear kernel between x1 and x2 4 | % and returns the value in sim 5 | 6 | % Ensure that x1 and x2 are column vectors 7 | x1 = x1(:); x2 = x2(:); 8 | 9 | % Compute the kernel 10 | sim = x1' * x2; % dot product 11 | 12 | end -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/plotData.m: -------------------------------------------------------------------------------- 1 | function plotData(X, y) 2 | %PLOTDATA Plots the data points X and y into a new figure 3 | % PLOTDATA(x,y) plots the data points with + for the positive examples 4 | % and o for the negative examples. X is assumed to be a Mx2 matrix. 5 | % 6 | % Note: This was slightly modified such that it expects y = 1 or y = 0 7 | 8 | % Find Indices of Positive and Negative Examples 9 | pos = find(y == 1); neg = find(y == 0); 10 | 11 | % Plot Examples 12 | plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 1, 'MarkerSize', 7) 13 | hold on; 14 | plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7) 15 | hold off; 16 | 17 | end 18 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/processEmail.m: -------------------------------------------------------------------------------- 1 | function word_indices = processEmail(email_contents) 2 | %PROCESSEMAIL preprocesses a the body of an email and 3 | %returns a list of word_indices 4 | % word_indices = PROCESSEMAIL(email_contents) preprocesses 5 | % the body of an email and returns a list of indices of the 6 | % words contained in the email. 7 | % 8 | 9 | % Load Vocabulary 10 | vocabList = getVocabList(); 11 | 12 | % Init return value 13 | word_indices = []; 14 | 15 | % ========================== Preprocess Email =========================== 16 | 17 | % Find the Headers ( \n\n and remove ) 18 | % Uncomment the following lines if you are working with raw emails with the 19 | % full headers 20 | 21 | % hdrstart = strfind(email_contents, ([char(10) char(10)])); 22 | % email_contents = email_contents(hdrstart(1):end); 23 | 24 | % Lower case 25 | email_contents = lower(email_contents); 26 | 27 | % Strip all HTML 28 | % Looks for any expression that starts with < and ends with > and replace 29 | % and does not have any < or > in the tag it with a space 30 | email_contents = regexprep(email_contents, '<[^<>]+>', ' '); 31 | 32 | % Handle Numbers 33 | % Look for one or more characters between 0-9 34 | email_contents = regexprep(email_contents, '[0-9]+', 'number'); 35 | 36 | % Handle URLS 37 | % Look for strings starting with http:// or https:// 38 | email_contents = regexprep(email_contents, ... 39 | '(http|https)://[^\s]*', 'httpaddr'); 40 | 41 | % Handle Email Addresses 42 | % Look for strings with @ in the middle 43 | email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr'); 44 | 45 | % Handle $ sign 46 | email_contents = regexprep(email_contents, '[$]+', 'dollar'); 47 | 48 | 49 | % ========================== Tokenize Email =========================== 50 | 51 | % Output the email to screen as well 52 | fprintf('\n==== Processed Email ====\n\n'); 53 | 54 | % Process file 55 | l = 0; 56 | 57 | while ~isempty(email_contents) 58 | 59 | % Tokenize and also get rid of any punctuation 60 | [str, email_contents] = ... 61 | strtok(email_contents, ... 62 | [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]); 63 | 64 | % Remove any non alphanumeric characters 65 | str = regexprep(str, '[^a-zA-Z0-9]', ''); 66 | 67 | % Stem the word 68 | % (the porterStemmer sometimes has issues, so we use a try catch block) 69 | try str = porterStemmer(strtrim(str)); 70 | catch str = ''; continue; 71 | end; 72 | 73 | % Skip the word if it is too short 74 | if length(str) < 1 75 | continue; 76 | end 77 | 78 | % Look up the word in the dictionary and add to word_indices if 79 | % found 80 | % ====================== YOUR CODE HERE ====================== 81 | % Instructions: Fill in this function to add the index of str to 82 | % word_indices if it is in the vocabulary. At this point 83 | % of the code, you have a stemmed word from the email in 84 | % the variable str. You should look up str in the 85 | % vocabulary list (vocabList). If a match exists, you 86 | % should add the index of the word to the word_indices 87 | % vector. Concretely, if str = 'action', then you should 88 | % look up the vocabulary list to find where in vocabList 89 | % 'action' appears. For example, if vocabList{18} = 90 | % 'action', then, you should add 18 to the word_indices 91 | % vector (e.g., word_indices = [word_indices ; 18]; ). 92 | % 93 | % Note: vocabList{idx} returns a the word with index idx in the 94 | % vocabulary list. 95 | % 96 | % Note: You can use strcmp(str1, str2) to compare two strings (str1 and 97 | % str2). It will return 1 only if the two strings are equivalent. 98 | % 99 | 100 | for i = 1:length(vocabList) 101 | if strcmp(vocabList{i}, str) == 1 102 | word_indices = [word_indices ; i]; 103 | break; 104 | end 105 | end 106 | % ============================================================= 107 | 108 | 109 | % Print to screen, ensuring that the output lines are not too long 110 | if (l + length(str) + 1) > 78 111 | fprintf('\n'); 112 | l = 0; 113 | end 114 | fprintf('%s ', str); 115 | l = l + length(str) + 1; 116 | 117 | end 118 | 119 | % Print footer 120 | fprintf('\n\n=========================\n'); 121 | 122 | end 123 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/readFile.m: -------------------------------------------------------------------------------- 1 | function file_contents = readFile(filename) 2 | %READFILE reads a file and returns its entire contents 3 | % file_contents = READFILE(filename) reads a file and returns its entire 4 | % contents in file_contents 5 | % 6 | 7 | % Load File 8 | fid = fopen(filename); 9 | if fid 10 | file_contents = fscanf(fid, '%c', inf); 11 | fclose(fid); 12 | else 13 | file_contents = ''; 14 | fprintf('Unable to open %s\n', filename); 15 | end 16 | 17 | end 18 | 19 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/spamSample1.txt: -------------------------------------------------------------------------------- 1 | Do You Want To Make $1000 Or More Per Week? 2 | 3 | 4 | 5 | If you are a motivated and qualified individual - I 6 | will personally demonstrate to you a system that will 7 | make you $1,000 per week or more! This is NOT mlm. 8 | 9 | 10 | 11 | Call our 24 hour pre-recorded number to get the 12 | details. 13 | 14 | 15 | 16 | 000-456-789 17 | 18 | 19 | 20 | I need people who want to make serious money. Make 21 | the call and get the facts. 22 | 23 | Invest 2 minutes in yourself now! 24 | 25 | 26 | 27 | 000-456-789 28 | 29 | 30 | 31 | Looking forward to your call and I will introduce you 32 | to people like yourself who 33 | are currently making $10,000 plus per week! 34 | 35 | 36 | 37 | 000-456-789 38 | 39 | 40 | 41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72 42 | 43 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/spamSample2.txt: -------------------------------------------------------------------------------- 1 | Dear Tenants, 2 | 3 | Incubation Team is going to organise a free seminar on Intellectual Property (IP) Protection on December 17, 2018 at idea Lab. 4 | 5 | Details are as follows: 6 | 7 | 8 | IP: Protecting and enhancing your current and future success 9 | 10 | Venue: 11 | 12 | idea Lab, 1/F, No. 5 Science Park West Avenue, Hong Kong Science Park 13 | 14 | Date: 15 | 16 | December 17, 2018 17 | 18 | Time: 19 | 20 | 3:00 pm – 5:00 pm 21 | 22 | Registration: 23 | 24 | 2:30 pm 25 | 26 | 27 | Why Protect? 28 | 29 | All companies succeed on the basis of what they know, create, design or invent. This is all valuable intellectual property (IP) that is, for many, their most prized asset. 30 | 31 | In particular, start ups and small businesses want to cost effectively protect their valuable intellectual assets. 32 | 33 | An unprecedented opportunity to meet world class expertise 34 | 35 | For the first time, members of the Science & Technology Park community will be able to meet and talk to: 36 | 37 | A creator of successful, world class processes to protect the valuable IP of a major corporation (TSMC) 38 | 39 | A key driver of IP and innovation policies at the US Patent and Trademark Office 40 | 41 | 42 | 43 | These distinguished leaders in the IP field will talk about the importance of IP today, cost effectively protecting IP and why companies must now pay special attention to the most valuable and often overlooked IP - trade secrets and databases. 44 | 45 | Register now 46 | 47 | Seating is on a first come first registered basis. We have limited seating. 48 | 49 | Register at: 50 | 51 | https://goo.gl/forms/P8ePu3Vog8WfgVXf1 52 | 53 | 54 | 55 | For more details, please click HERE. 56 | 57 | 58 | For enquiries, please call Christine Hui at 2629 6757 or by email christine.hui@hkstp.org . 59 | 60 | Best regards, 61 | Incubation Team -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/spamTest.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex6/spamTest.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/spamTrain.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex6/spamTrain.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/svmPredict.m: -------------------------------------------------------------------------------- 1 | function pred = svmPredict(model, X) 2 | %SVMPREDICT returns a vector of predictions using a trained SVM model 3 | %(svmTrain). 4 | % pred = SVMPREDICT(model, X) returns a vector of predictions using a 5 | % trained SVM model (svmTrain). X is a mxn matrix where there each 6 | % example is a row. model is a svm model returned from svmTrain. 7 | % predictions pred is a m x 1 column of predictions of {0, 1} values. 8 | % 9 | 10 | % Check if we are getting a column vector, if so, then assume that we only 11 | % need to do prediction for a single example 12 | if (size(X, 2) == 1) 13 | % Examples should be in rows 14 | X = X'; 15 | end 16 | 17 | % Dataset 18 | m = size(X, 1); 19 | p = zeros(m, 1); 20 | pred = zeros(m, 1); 21 | 22 | if strcmp(func2str(model.kernelFunction), 'linearKernel') 23 | % We can use the weights and bias directly if working with the 24 | % linear kernel 25 | p = X * model.w + model.b; 26 | elseif strfind(func2str(model.kernelFunction), 'gaussianKernel') 27 | % Vectorized RBF Kernel 28 | % This is equivalent to computing the kernel on every pair of examples 29 | X1 = sum(X.^2, 2); 30 | X2 = sum(model.X.^2, 2)'; 31 | K = bsxfun(@plus, X1, bsxfun(@plus, X2, - 2 * X * model.X')); 32 | K = model.kernelFunction(1, 0) .^ K; 33 | K = bsxfun(@times, model.y', K); 34 | K = bsxfun(@times, model.alphas', K); 35 | p = sum(K, 2); 36 | else 37 | % Other Non-linear kernel 38 | for i = 1:m 39 | prediction = 0; 40 | for j = 1:size(model.X, 1) 41 | prediction = prediction + ... 42 | model.alphas(j) * model.y(j) * ... 43 | model.kernelFunction(X(i,:)', model.X(j,:)'); 44 | end 45 | p(i) = prediction + model.b; 46 | end 47 | end 48 | 49 | % Convert predictions into 0 / 1 50 | pred(p >= 0) = 1; 51 | pred(p < 0) = 0; 52 | 53 | end 54 | 55 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/svmTrain.m: -------------------------------------------------------------------------------- 1 | function [model] = svmTrain(X, Y, C, kernelFunction, ... 2 | tol, max_passes) 3 | %SVMTRAIN Trains an SVM classifier using a simplified version of the SMO 4 | %algorithm. 5 | % [model] = SVMTRAIN(X, Y, C, kernelFunction, tol, max_passes) trains an 6 | % SVM classifier and returns trained model. X is the matrix of training 7 | % examples. Each row is a training example, and the jth column holds the 8 | % jth feature. Y is a column matrix containing 1 for positive examples 9 | % and 0 for negative examples. C is the standard SVM regularization 10 | % parameter. tol is a tolerance value used for determining equality of 11 | % floating point numbers. max_passes controls the number of iterations 12 | % over the dataset (without changes to alpha) before the algorithm quits. 13 | % 14 | % Note: This is a simplified version of the SMO algorithm for training 15 | % SVMs. In practice, if you want to train an SVM classifier, we 16 | % recommend using an optimized package such as: 17 | % 18 | % LIBSVM (http://www.csie.ntu.edu.tw/~cjlin/libsvm/) 19 | % SVMLight (http://svmlight.joachims.org/) 20 | % 21 | % 22 | 23 | if ~exist('tol', 'var') || isempty(tol) 24 | tol = 1e-3; 25 | end 26 | 27 | if ~exist('max_passes', 'var') || isempty(max_passes) 28 | max_passes = 5; 29 | end 30 | 31 | % Data parameters 32 | m = size(X, 1); 33 | n = size(X, 2); 34 | 35 | % Map 0 to -1 36 | Y(Y==0) = -1; 37 | 38 | % Variables 39 | alphas = zeros(m, 1); 40 | b = 0; 41 | E = zeros(m, 1); 42 | passes = 0; 43 | eta = 0; 44 | L = 0; 45 | H = 0; 46 | 47 | % Pre-compute the Kernel Matrix since our dataset is small 48 | % (in practice, optimized SVM packages that handle large datasets 49 | % gracefully will _not_ do this) 50 | % 51 | % We have implemented optimized vectorized version of the Kernels here so 52 | % that the svm training will run faster. 53 | if strcmp(func2str(kernelFunction), 'linearKernel') 54 | % Vectorized computation for the Linear Kernel 55 | % This is equivalent to computing the kernel on every pair of examples 56 | K = X*X'; 57 | elseif strfind(func2str(kernelFunction), 'gaussianKernel') 58 | % Vectorized RBF Kernel 59 | % This is equivalent to computing the kernel on every pair of examples 60 | X2 = sum(X.^2, 2); 61 | K = bsxfun(@plus, X2, bsxfun(@plus, X2', - 2 * (X * X'))); 62 | K = kernelFunction(1, 0) .^ K; 63 | else 64 | % Pre-compute the Kernel Matrix 65 | % The following can be slow due to the lack of vectorization 66 | K = zeros(m); 67 | for i = 1:m 68 | for j = i:m 69 | K(i,j) = kernelFunction(X(i,:)', X(j,:)'); 70 | K(j,i) = K(i,j); %the matrix is symmetric 71 | end 72 | end 73 | end 74 | 75 | % Train 76 | fprintf('\nTraining ...'); 77 | dots = 12; 78 | while passes < max_passes, 79 | 80 | num_changed_alphas = 0; 81 | for i = 1:m, 82 | 83 | % Calculate Ei = f(x(i)) - y(i) using (2). 84 | % E(i) = b + sum (X(i, :) * (repmat(alphas.*Y,1,n).*X)') - Y(i); 85 | E(i) = b + sum (alphas.*Y.*K(:,i)) - Y(i); 86 | 87 | if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0)), 88 | 89 | % In practice, there are many heuristics one can use to select 90 | % the i and j. In this simplified code, we select them randomly. 91 | j = ceil(m * rand()); 92 | while j == i, % Make sure i \neq j 93 | j = ceil(m * rand()); 94 | end 95 | 96 | % Calculate Ej = f(x(j)) - y(j) using (2). 97 | E(j) = b + sum (alphas.*Y.*K(:,j)) - Y(j); 98 | 99 | % Save old alphas 100 | alpha_i_old = alphas(i); 101 | alpha_j_old = alphas(j); 102 | 103 | % Compute L and H by (10) or (11). 104 | if (Y(i) == Y(j)), 105 | L = max(0, alphas(j) + alphas(i) - C); 106 | H = min(C, alphas(j) + alphas(i)); 107 | else 108 | L = max(0, alphas(j) - alphas(i)); 109 | H = min(C, C + alphas(j) - alphas(i)); 110 | end 111 | 112 | if (L == H), 113 | % continue to next i. 114 | continue; 115 | end 116 | 117 | % Compute eta by (14). 118 | eta = 2 * K(i,j) - K(i,i) - K(j,j); 119 | if (eta >= 0), 120 | % continue to next i. 121 | continue; 122 | end 123 | 124 | % Compute and clip new value for alpha j using (12) and (15). 125 | alphas(j) = alphas(j) - (Y(j) * (E(i) - E(j))) / eta; 126 | 127 | % Clip 128 | alphas(j) = min (H, alphas(j)); 129 | alphas(j) = max (L, alphas(j)); 130 | 131 | % Check if change in alpha is significant 132 | if (abs(alphas(j) - alpha_j_old) < tol), 133 | % continue to next i. 134 | % replace anyway 135 | alphas(j) = alpha_j_old; 136 | continue; 137 | end 138 | 139 | % Determine value for alpha i using (16). 140 | alphas(i) = alphas(i) + Y(i)*Y(j)*(alpha_j_old - alphas(j)); 141 | 142 | % Compute b1 and b2 using (17) and (18) respectively. 143 | b1 = b - E(i) ... 144 | - Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ... 145 | - Y(j) * (alphas(j) - alpha_j_old) * K(i,j)'; 146 | b2 = b - E(j) ... 147 | - Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ... 148 | - Y(j) * (alphas(j) - alpha_j_old) * K(j,j)'; 149 | 150 | % Compute b by (19). 151 | if (0 < alphas(i) && alphas(i) < C), 152 | b = b1; 153 | elseif (0 < alphas(j) && alphas(j) < C), 154 | b = b2; 155 | else 156 | b = (b1+b2)/2; 157 | end 158 | 159 | num_changed_alphas = num_changed_alphas + 1; 160 | 161 | end 162 | 163 | end 164 | 165 | if (num_changed_alphas == 0), 166 | passes = passes + 1; 167 | else 168 | passes = 0; 169 | end 170 | 171 | fprintf('.'); 172 | dots = dots + 1; 173 | if dots > 78 174 | dots = 0; 175 | fprintf('\n'); 176 | end 177 | if exist('OCTAVE_VERSION') 178 | fflush(stdout); 179 | end 180 | end 181 | fprintf(' Done! \n\n'); 182 | 183 | % Save the model 184 | idx = alphas > 0; 185 | model.X= X(idx,:); 186 | model.y= Y(idx); 187 | model.kernelFunction = kernelFunction; 188 | model.b= b; 189 | model.alphas= alphas(idx); 190 | model.w = ((alphas.*Y)'*X)'; 191 | 192 | end 193 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/visualizeBoundary.m: -------------------------------------------------------------------------------- 1 | function visualizeBoundary(X, y, model, varargin) 2 | %VISUALIZEBOUNDARY plots a non-linear decision boundary learned by the SVM 3 | % VISUALIZEBOUNDARYLINEAR(X, y, model) plots a non-linear decision 4 | % boundary learned by the SVM and overlays the data on it 5 | 6 | % Plot the training data on top of the boundary 7 | plotData(X, y) 8 | 9 | % Make classification predictions over a grid of values 10 | x1plot = linspace(min(X(:,1)), max(X(:,1)), 100)'; 11 | x2plot = linspace(min(X(:,2)), max(X(:,2)), 100)'; 12 | [X1, X2] = meshgrid(x1plot, x2plot); 13 | vals = zeros(size(X1)); 14 | for i = 1:size(X1, 2) 15 | this_X = [X1(:, i), X2(:, i)]; 16 | vals(:, i) = svmPredict(model, this_X); 17 | end 18 | 19 | % Plot the SVM boundary 20 | hold on 21 | contour(X1, X2, vals, [0.5 0.5], 'b'); 22 | hold off; 23 | 24 | end 25 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex6/visualizeBoundaryLinear.m: -------------------------------------------------------------------------------- 1 | function visualizeBoundaryLinear(X, y, model) 2 | %VISUALIZEBOUNDARYLINEAR plots a linear decision boundary learned by the 3 | %SVM 4 | % VISUALIZEBOUNDARYLINEAR(X, y, model) plots a linear decision boundary 5 | % learned by the SVM and overlays the data on it 6 | 7 | w = model.w; 8 | b = model.b; 9 | xp = linspace(min(X(:,1)), max(X(:,1)), 100); 10 | yp = - (w(1)*xp + b)/w(2); 11 | plotData(X, y); 12 | hold on; 13 | plot(xp, yp, '-b'); 14 | hold off 15 | 16 | end 17 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/bird_small.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex7/bird_small.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/bird_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex7/bird_small.png -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/computeCentroids.m: -------------------------------------------------------------------------------- 1 | function centroids = computeCentroids(X, idx, K) 2 | %COMPUTECENTROIDS returns the new centroids by computing the means of the 3 | %data points assigned to each centroid. 4 | % centroids = COMPUTECENTROIDS(X, idx, K) returns the new centroids by 5 | % computing the means of the data points assigned to each centroid. It is 6 | % given a dataset X where each row is a single data point, a vector 7 | % idx of centroid assignments (i.e. each entry in range [1..K]) for each 8 | % example, and K, the number of centroids. You should return a matrix 9 | % centroids, where each row of centroids is the mean of the data points 10 | % assigned to it. 11 | % 12 | 13 | % Useful variables 14 | [m n] = size(X); 15 | 16 | % You need to return the following variables correctly. 17 | centroids = zeros(K, n); 18 | 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: Go over every centroid and compute mean of all points that 22 | % belong to it. Concretely, the row vector centroids(i, :) 23 | % should contain the mean of the data points assigned to 24 | % centroid i. 25 | % 26 | % Note: You can use a for-loop over the centroids to compute this. 27 | % 28 | 29 | for i = 1:K 30 | X_idx = (idx == i); 31 | centroids(i,:) = (X_idx' * X) / sum(X_idx); 32 | end 33 | 34 | % ============================================================= 35 | 36 | 37 | end 38 | 39 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/displayData.m: -------------------------------------------------------------------------------- 1 | function [h, display_array] = displayData(X, example_width) 2 | %DISPLAYDATA Display 2D data in a nice grid 3 | % [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data 4 | % stored in X in a nice grid. It returns the figure handle h and the 5 | % displayed array if requested. 6 | 7 | % Set example_width automatically if not passed in 8 | if ~exist('example_width', 'var') || isempty(example_width) 9 | example_width = round(sqrt(size(X, 2))); 10 | end 11 | 12 | % Gray Image 13 | colormap(gray); 14 | 15 | % Compute rows, cols 16 | [m n] = size(X); 17 | example_height = (n / example_width); 18 | 19 | % Compute number of items to display 20 | display_rows = floor(sqrt(m)); 21 | display_cols = ceil(m / display_rows); 22 | 23 | % Between images padding 24 | pad = 1; 25 | 26 | % Setup blank display 27 | display_array = - ones(pad + display_rows * (example_height + pad), ... 28 | pad + display_cols * (example_width + pad)); 29 | 30 | % Copy each example into a patch on the display array 31 | curr_ex = 1; 32 | for j = 1:display_rows 33 | for i = 1:display_cols 34 | if curr_ex > m, 35 | break; 36 | end 37 | % Copy the patch 38 | 39 | % Get the max value of the patch 40 | max_val = max(abs(X(curr_ex, :))); 41 | display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... 42 | pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... 43 | reshape(X(curr_ex, :), example_height, example_width) / max_val; 44 | curr_ex = curr_ex + 1; 45 | end 46 | if curr_ex > m, 47 | break; 48 | end 49 | end 50 | 51 | % Display Image 52 | h = imagesc(display_array, [-1 1]); 53 | 54 | % Do not show axis 55 | axis image off 56 | 57 | drawnow; 58 | 59 | end 60 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/drawLine.m: -------------------------------------------------------------------------------- 1 | function drawLine(p1, p2, varargin) 2 | %DRAWLINE Draws a line from point p1 to point p2 3 | % DRAWLINE(p1, p2) Draws a line from point p1 to point p2 and holds the 4 | % current figure 5 | 6 | plot([p1(1) p2(1)], [p1(2) p2(2)], varargin{:}); 7 | 8 | end -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/ex7.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 7 | Principle Component Analysis and K-Means Clustering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % pca.m 11 | % projectData.m 12 | % recoverData.m 13 | % computeCentroids.m 14 | % findClosestCentroids.m 15 | % kMeansInitCentroids.m 16 | % 17 | % For this exercise, you will not need to change any code in this file, 18 | % or any other files other than those mentioned above. 19 | % 20 | 21 | %% Initialization 22 | clear ; close all; clc 23 | 24 | %% ================= Part 1: Find Closest Centroids ==================== 25 | % To help you implement K-Means, we have divided the learning algorithm 26 | % into two functions -- findClosestCentroids and computeCentroids. In this 27 | % part, you should complete the code in the findClosestCentroids function. 28 | % 29 | fprintf('Finding closest centroids.\n\n'); 30 | 31 | % Load an example dataset that we will be using 32 | load('ex7data2.mat'); 33 | 34 | % Select an initial set of centroids 35 | K = 3; % 3 Centroids 36 | initial_centroids = [3 3; 6 2; 8 5]; 37 | 38 | % Find the closest centroids for the examples using the 39 | % initial_centroids 40 | idx = findClosestCentroids(X, initial_centroids); 41 | 42 | fprintf('Closest centroids for the first 3 examples: \n') 43 | fprintf(' %d', idx(1:3)); 44 | fprintf('\n(the closest centroids should be 1, 3, 2 respectively)\n'); 45 | 46 | fprintf('Program paused. Press enter to continue.\n'); 47 | %pause; 48 | 49 | %% ===================== Part 2: Compute Means ========================= 50 | % After implementing the closest centroids function, you should now 51 | % complete the computeCentroids function. 52 | % 53 | fprintf('\nComputing centroids means.\n\n'); 54 | 55 | % Compute means based on the closest centroids found in the previous part. 56 | centroids = computeCentroids(X, idx, K); 57 | 58 | fprintf('Centroids computed after initial finding of closest centroids: \n') 59 | fprintf(' %f %f \n' , centroids'); 60 | fprintf('\n(the centroids should be\n'); 61 | fprintf(' [ 2.428301 3.157924 ]\n'); 62 | fprintf(' [ 5.813503 2.633656 ]\n'); 63 | fprintf(' [ 7.119387 3.616684 ]\n\n'); 64 | 65 | fprintf('Program paused. Press enter to continue.\n'); 66 | %pause; 67 | 68 | 69 | %% =================== Part 3: K-Means Clustering ====================== 70 | % After you have completed the two functions computeCentroids and 71 | % findClosestCentroids, you have all the necessary pieces to run the 72 | % kMeans algorithm. In this part, you will run the K-Means algorithm on 73 | % the example dataset we have provided. 74 | % 75 | fprintf('\nRunning K-Means clustering on example dataset.\n\n'); 76 | 77 | % Load an example dataset 78 | load('ex7data2.mat'); 79 | 80 | % Settings for running K-Means 81 | K = 3; 82 | max_iters = 10; 83 | 84 | % For consistency, here we set centroids to specific values 85 | % but in practice you want to generate them automatically, such as by 86 | % settings them to be random examples (as can be seen in 87 | % kMeansInitCentroids). 88 | initial_centroids = [3 3; 6 2; 8 5;];% 1 1; 2 2; 5 5; 8 8; 7 7; 6 6;]; 89 | 90 | %initial_centroids = kMeansInitCentroids(initial_centroids, 3); 91 | 92 | % Run K-Means algorithm. The 'true' at the end tells our function to plot 93 | % the progress of K-Means 94 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters, true); 95 | fprintf('\nK-Means Done.\n\n'); 96 | 97 | fprintf('Program paused. Press enter to continue.\n'); 98 | pause; 99 | 100 | %% ============= Part 4: K-Means Clustering on Pixels =============== 101 | % In this exercise, you will use K-Means to compress an image. To do this, 102 | % you will first run K-Means on the colors of the pixels in the image and 103 | % then you will map each pixel onto its closest centroid. 104 | % 105 | % You should now complete the code in kMeansInitCentroids.m 106 | % 107 | 108 | fprintf('\nRunning K-Means clustering on pixels from an image.\n\n'); 109 | 110 | % Load an image of a bird 111 | A = double(imread('bird_small.png')); 112 | 113 | % If imread does not work for you, you can try instead 114 | % load ('bird_small.mat'); 115 | 116 | A = A / 255; % Divide by 255 so that all values are in the range 0 - 1 117 | 118 | % Size of the image 119 | img_size = size(A); 120 | 121 | % Reshape the image into an Nx3 matrix where N = number of pixels. 122 | % Each row will contain the Red, Green and Blue pixel values 123 | % This gives us our dataset matrix X that we will use K-Means on. 124 | X = reshape(A, img_size(1) * img_size(2), 3); 125 | 126 | % Run your K-Means algorithm on this data 127 | % You should try different values of K and max_iters here 128 | K = 16; 129 | max_iters = 10; 130 | 131 | % When using K-Means, it is important the initialize the centroids 132 | % randomly. 133 | % You should complete the code in kMeansInitCentroids.m before proceeding 134 | initial_centroids = kMeansInitCentroids(X, K); 135 | 136 | % Run K-Means 137 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters, true); 138 | 139 | fprintf('Program paused. Press enter to continue.\n'); 140 | pause; 141 | 142 | 143 | %% ================= Part 5: Image Compression ====================== 144 | % In this part of the exercise, you will use the clusters of K-Means to 145 | % compress an image. To do this, we first find the closest clusters for 146 | % each example. After that, we 147 | 148 | fprintf('\nApplying K-Means to compress an image.\n\n'); 149 | 150 | % Find closest cluster members 151 | idx = findClosestCentroids(X, centroids); 152 | 153 | % Essentially, now we have represented the image X as in terms of the 154 | % indices in idx. 155 | 156 | % We can now recover the image from the indices (idx) by mapping each pixel 157 | % (specified by its index in idx) to the centroid value 158 | X_recovered = centroids(idx,:); 159 | 160 | % Reshape the recovered image into proper dimensions 161 | X_recovered = reshape(X_recovered, img_size(1), img_size(2), 3); 162 | 163 | % Display the original image 164 | subplot(1, 2, 1); 165 | imagesc(A); 166 | title('Original'); 167 | 168 | % Display compressed image side by side 169 | subplot(1, 2, 2); 170 | imagesc(X_recovered) 171 | title(sprintf('Compressed, with %d colors.', K)); 172 | 173 | 174 | fprintf('Program paused. Press enter to continue.\n'); 175 | pause; 176 | 177 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/ex7_pca.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 7 | Principle Component Analysis and K-Means Clustering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % pca.m 11 | % projectData.m 12 | % recoverData.m 13 | % computeCentroids.m 14 | % findClosestCentroids.m 15 | % kMeansInitCentroids.m 16 | % 17 | % For this exercise, you will not need to change any code in this file, 18 | % or any other files other than those mentioned above. 19 | % 20 | 21 | %% Initialization 22 | clear ; close all; clc 23 | 24 | %% ================== Part 1: Load Example Dataset =================== 25 | % We start this exercise by using a small dataset that is easily to 26 | % visualize 27 | % 28 | fprintf('Visualizing example dataset for PCA.\n\n'); 29 | 30 | % The following command loads the dataset. You should now have the 31 | % variable X in your environment 32 | load ('ex7data1.mat'); 33 | 34 | % Visualize the example dataset 35 | plot(X(:, 1), X(:, 2), 'bo'); 36 | axis([0.5 6.5 2 8]); axis square; 37 | 38 | fprintf('Program paused. Press enter to continue.\n'); 39 | %pause; 40 | 41 | 42 | %% =============== Part 2: Principal Component Analysis =============== 43 | % You should now implement PCA, a dimension reduction technique. You 44 | % should complete the code in pca.m 45 | % 46 | fprintf('\nRunning PCA on example dataset.\n\n'); 47 | 48 | % Before running PCA, it is important to first normalize X 49 | [X_norm, mu, sigma] = featureNormalize(X); 50 | 51 | % Run PCA 52 | [U, S] = pca(X_norm); 53 | 54 | % Compute mu, the mean of the each feature 55 | 56 | % Draw the eigenvectors centered at mean of data. These lines show the 57 | % directions of maximum variations in the dataset. 58 | hold on; 59 | drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2); 60 | drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2); 61 | hold off; 62 | 63 | fprintf('Top eigenvector: \n'); 64 | fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1)); 65 | fprintf('\n(you should expect to see -0.707107 -0.707107)\n'); 66 | 67 | fprintf('Program paused. Press enter to continue.\n'); 68 | %pause; 69 | 70 | 71 | %% =================== Part 3: Dimension Reduction =================== 72 | % You should now implement the projection step to map the data onto the 73 | % first k eigenvectors. The code will then plot the data in this reduced 74 | % dimensional space. This will show you what the data looks like when 75 | % using only the corresponding eigenvectors to reconstruct it. 76 | % 77 | % You should complete the code in projectData.m 78 | % 79 | fprintf('\nDimension reduction on example dataset.\n\n'); 80 | 81 | % Plot the normalized dataset (returned from pca) 82 | plot(X_norm(:, 1), X_norm(:, 2), 'bo'); 83 | axis([-4 3 -4 3]); axis square 84 | 85 | % Project the data onto K = 1 dimension 86 | K = 1; 87 | Z = projectData(X_norm, U, K); 88 | fprintf('Projection of the first example: %f\n', Z(1)); 89 | fprintf('\n(this value should be about 1.481274)\n\n'); 90 | 91 | X_rec = recoverData(Z, U, K); 92 | fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2)); 93 | fprintf('\n(this value should be about -1.047419 -1.047419)\n\n'); 94 | 95 | % Draw lines connecting the projected points to the original points 96 | hold on; 97 | plot(X_rec(:, 1), X_rec(:, 2), 'ro'); 98 | for i = 1:size(X_norm, 1) 99 | drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1); 100 | end 101 | hold off 102 | 103 | fprintf('Program paused. Press enter to continue.\n'); 104 | %pause; 105 | 106 | %% =============== Part 4: Loading and Visualizing Face Data ============= 107 | % We start the exercise by first loading and visualizing the dataset. 108 | % The following code will load the dataset into your environment 109 | % 110 | fprintf('\nLoading face dataset.\n\n'); 111 | 112 | % Load Face dataset 113 | load ('ex7faces.mat') 114 | 115 | % Display the first 100 faces in the dataset 116 | displayData(X(1:end, :)); 117 | 118 | fprintf('Program paused. Press enter to continue.\n'); 119 | %pause; 120 | 121 | %% =========== Part 5: PCA on Face Data: Eigenfaces =================== 122 | % Run PCA and visualize the eigenvectors which are in this case eigenfaces 123 | % We display the first 36 eigenfaces. 124 | % 125 | fprintf(['\nRunning PCA on face dataset.\n' ... 126 | '(this might take a minute or two ...)\n\n']); 127 | 128 | % Before running PCA, it is important to first normalize X by subtracting 129 | % the mean value from each feature 130 | [X_norm, mu, sigma] = featureNormalize(X); 131 | 132 | displayData(X_norm(1:100, :)); 133 | 134 | % Run PCA 135 | [U, S] = pca(X_norm); 136 | 137 | % Visualize the top 36 eigenvectors found 138 | displayData(U(:, 1:36)'); 139 | 140 | fprintf('Program paused. Press enter to continue.\n'); 141 | %pause; 142 | 143 | 144 | %% ============= Part 6: Dimension Reduction for Faces ================= 145 | % Project images to the eigen space using the top k eigenvectors 146 | % If you are applying a machine learning algorithm 147 | fprintf('\nDimension reduction for face dataset.\n\n'); 148 | 149 | K = 100; 150 | Z = projectData(X_norm, U, K); 151 | 152 | fprintf('The projected data Z has a size of: ') 153 | fprintf('%d ', size(Z)); 154 | 155 | fprintf('\n\nProgram paused. Press enter to continue.\n'); 156 | %pause; 157 | 158 | %% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== 159 | % Project images to the eigen space using the top K eigen vectors and 160 | % visualize only using those K dimensions 161 | % Compare to the original input, which is also displayed 162 | 163 | fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n'); 164 | 165 | K = 100; 166 | X_rec = recoverData(Z, U, K); 167 | 168 | % Display normalized data 169 | subplot(1, 2, 1); 170 | displayData(X_norm(1:100,:)); 171 | title('Original faces'); 172 | axis square; 173 | 174 | % Display reconstructed data from only k eigenfaces 175 | subplot(1, 2, 2); 176 | displayData(X_rec(1:100,:)); 177 | title('Recovered faces'); 178 | axis square; 179 | 180 | fprintf('Program paused. Press enter to continue.\n'); 181 | %pause; 182 | 183 | 184 | %% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === 185 | % One useful application of PCA is to use it to visualize high-dimensional 186 | % data. In the last K-Means exercise you ran K-Means on 3-dimensional 187 | % pixel colors of an image. We first visualize this output in 3D, and then 188 | % apply PCA to obtain a visualization in 2D. 189 | 190 | close all; close all; clc 191 | 192 | % Reload the image from the previous exercise and run K-Means on it 193 | % For this to work, you need to complete the K-Means assignment first 194 | A = double(imread('bird_small.png')); 195 | 196 | % If imread does not work for you, you can try instead 197 | % load ('bird_small.mat'); 198 | 199 | A = A / 255; 200 | img_size = size(A); 201 | X = reshape(A, img_size(1) * img_size(2), 3); 202 | K = 16; 203 | max_iters = 10; 204 | initial_centroids = kMeansInitCentroids(X, K); 205 | [centroids, idx] = runkMeans(X, initial_centroids, max_iters); 206 | 207 | % Sample 1000 random indexes (since working with all the data is 208 | % too expensive. If you have a fast computer, you may increase this. 209 | sel = floor(rand(1000, 1) * size(X, 1)) + 1; 210 | 211 | % Setup Color Palette 212 | palette = hsv(K); 213 | colors = palette(idx(sel), :); 214 | 215 | % Visualize the data and centroid memberships in 3D 216 | figure; 217 | scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors); 218 | title('Pixel dataset plotted in 3D. Color shows centroid memberships'); 219 | fprintf('Program paused. Press enter to continue.\n'); 220 | %pause; 221 | 222 | %% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === 223 | % Use PCA to project this cloud to 2D for visualization 224 | 225 | % Subtract the mean to use PCA 226 | [X_norm, mu, sigma] = featureNormalize(X); 227 | 228 | % PCA and project the data to 2D 229 | [U, S] = pca(X_norm); 230 | Z = projectData(X_norm, U, 2); 231 | 232 | % Plot in 2D 233 | figure; 234 | plotDataPoints(Z(sel, :), idx(sel), K); 235 | title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction'); 236 | fprintf('Program paused. Press enter to continue.\n'); 237 | %pause; 238 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/ex7data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex7/ex7data1.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/ex7data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex7/ex7data2.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/ex7faces.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex7/ex7faces.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/featureNormalize.m: -------------------------------------------------------------------------------- 1 | function [X_norm, mu, sigma] = featureNormalize(X) 2 | %FEATURENORMALIZE Normalizes the features in X 3 | % FEATURENORMALIZE(X) returns a normalized version of X where 4 | % the mean value of each feature is 0 and the standard deviation 5 | % is 1. This is often a good preprocessing step to do when 6 | % working with learning algorithms. 7 | 8 | mu = mean(X); 9 | X_norm = bsxfun(@minus, X, mu); 10 | 11 | sigma = std(X_norm); 12 | X_norm = bsxfun(@rdivide, X_norm, sigma); 13 | 14 | 15 | % ============================================================ 16 | 17 | end 18 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/findClosestCentroids.m: -------------------------------------------------------------------------------- 1 | function idx = findClosestCentroids(X, centroids) 2 | %FINDCLOSESTCENTROIDS computes the centroid memberships for every example 3 | % idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids 4 | % in idx for a dataset X where each row is a single example. idx = m x 1 5 | % vector of centroid assignments (i.e. each entry in range [1..K]) 6 | % 7 | 8 | % Set K 9 | K = size(centroids, 1); 10 | 11 | % You need to return the following variables correctly. 12 | idx = zeros(size(X,1), 1); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: Go over every example, find its closest centroid, and store 16 | % the index inside idx at the appropriate location. 17 | % Concretely, idx(i) should contain the index of the centroid 18 | % closest to example i. Hence, it should be a value in the 19 | % range 1..K 20 | % 21 | % Note: You can use a for-loop over the examples to compute this. 22 | % 23 | 24 | for sample_i = 1:size(X,1) 25 | distance = sum((repmat(X(sample_i,:), K, 1) - centroids).^2, 2); 26 | [Y, I] = min(distance); 27 | idx(sample_i) = I; 28 | end 29 | 30 | % ============================================================= 31 | 32 | end 33 | 34 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/kMeansInitCentroids.m: -------------------------------------------------------------------------------- 1 | function centroids = kMeansInitCentroids(X, K) 2 | %KMEANSINITCENTROIDS This function initializes K centroids that are to be 3 | %used in K-Means on the dataset X 4 | % centroids = KMEANSINITCENTROIDS(X, K) returns K initial centroids to be 5 | % used with the K-Means on the dataset X 6 | % 7 | 8 | % You should return this values correctly 9 | centroids = zeros(K, size(X, 2)); 10 | 11 | % ====================== YOUR CODE HERE ====================== 12 | % Instructions: You should set centroids to randomly chosen examples from 13 | % the dataset X 14 | % 15 | 16 | randidx = randperm(size(X,1)); 17 | 18 | centroids = X(randidx(1:K), :); 19 | 20 | % ============================================================= 21 | 22 | end 23 | 24 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/pca.m: -------------------------------------------------------------------------------- 1 | function [U, S] = pca(X) 2 | %PCA Run principal component analysis on the dataset X 3 | % [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X 4 | % Returns the eigenvectors U, the eigenvalues (on diagonal) in S 5 | % 6 | 7 | % Useful values 8 | [m, n] = size(X); 9 | 10 | % You need to return the following variables correctly. 11 | U = zeros(n); 12 | S = zeros(n); 13 | 14 | % ====================== YOUR CODE HERE ====================== 15 | % Instructions: You should first compute the covariance matrix. Then, you 16 | % should use the "svd" function to compute the eigenvectors 17 | % and eigenvalues of the covariance matrix. 18 | % 19 | % Note: When computing the covariance matrix, remember to divide by m (the 20 | % number of examples). 21 | % 22 | covariance = (1/m) * X' * X; 23 | 24 | [U, S, V] = svd(covariance); 25 | 26 | % ========================================================================= 27 | 28 | end 29 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/plotDataPoints.m: -------------------------------------------------------------------------------- 1 | function plotDataPoints(X, idx, K) 2 | %PLOTDATAPOINTS plots data points in X, coloring them so that those with the same 3 | %index assignments in idx have the same color 4 | % PLOTDATAPOINTS(X, idx, K) plots data points in X, coloring them so that those 5 | % with the same index assignments in idx have the same color 6 | 7 | % Create palette 8 | palette = hsv(K + 1); 9 | colors = palette(idx, :); 10 | 11 | % Plot the data 12 | scatter(X(:,1), X(:,2), 15, colors); 13 | 14 | end 15 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/plotProgresskMeans.m: -------------------------------------------------------------------------------- 1 | function plotProgresskMeans(X, centroids, previous, idx, K, i) 2 | %PLOTPROGRESSKMEANS is a helper function that displays the progress of 3 | %k-Means as it is running. It is intended for use only with 2D data. 4 | % PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data 5 | % points with colors assigned to each centroid. With the previous 6 | % centroids, it also plots a line between the previous locations and 7 | % current locations of the centroids. 8 | % 9 | 10 | % Plot the examples 11 | plotDataPoints(X, idx, K); 12 | 13 | % Plot the centroids as black x's 14 | plot(centroids(:,1), centroids(:,2), 'x', ... 15 | 'MarkerEdgeColor','k', ... 16 | 'MarkerSize', 10, 'LineWidth', 3); 17 | 18 | % Plot the history of the centroids with lines 19 | for j=1:size(centroids,1) 20 | drawLine(centroids(j, :), previous(j, :)); 21 | end 22 | 23 | % Title 24 | title(sprintf('Iteration number %d', i)) 25 | 26 | end 27 | 28 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/projectData.m: -------------------------------------------------------------------------------- 1 | function Z = projectData(X, U, K) 2 | %PROJECTDATA Computes the reduced data representation when projecting only 3 | %on to the top k eigenvectors 4 | % Z = projectData(X, U, K) computes the projection of 5 | % the normalized inputs X into the reduced dimensional space spanned by 6 | % the first K columns of U. It returns the projected examples in Z. 7 | % 8 | 9 | % You need to return the following variables correctly. 10 | Z = zeros(size(X, 1), K); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the projection of the data using only the top K 14 | % eigenvectors in U (first K columns). 15 | % For the i-th example X(i,:), the projection on to the k-th 16 | % eigenvector is given as follows: 17 | % x = X(i, :)'; 18 | % projection_k = x' * U(:, k); 19 | % 20 | U_reduce = U(:, 1:K); 21 | 22 | Z = X * U_reduce; 23 | 24 | % ============================================================= 25 | 26 | end 27 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/recoverData.m: -------------------------------------------------------------------------------- 1 | function X_rec = recoverData(Z, U, K) 2 | %RECOVERDATA Recovers an approximation of the original data when using the 3 | %projected data 4 | % X_rec = RECOVERDATA(Z, U, K) recovers an approximation the 5 | % original data that has been reduced to K dimensions. It returns the 6 | % approximate reconstruction in X_rec. 7 | % 8 | 9 | % You need to return the following variables correctly. 10 | X_rec = zeros(size(Z, 1), size(U, 1)); 11 | 12 | % ====================== YOUR CODE HERE ====================== 13 | % Instructions: Compute the approximation of the data by projecting back 14 | % onto the original space using the top K eigenvectors in U. 15 | % 16 | % For the i-th example Z(i,:), the (approximate) 17 | % recovered data for dimension j is given as follows: 18 | % v = Z(i, :)'; 19 | % recovered_j = v' * U(j, 1:K)'; 20 | % 21 | % Notice that U(j, 1:K) is a row vector. 22 | % 23 | 24 | X_rec = Z * U(:, 1:K)'; 25 | 26 | % ============================================================= 27 | 28 | end 29 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex7/runkMeans.m: -------------------------------------------------------------------------------- 1 | function [centroids, idx] = runkMeans(X, initial_centroids, ... 2 | max_iters, plot_progress) 3 | %RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X 4 | %is a single example 5 | % [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... 6 | % plot_progress) runs the K-Means algorithm on data matrix X, where each 7 | % row of X is a single example. It uses initial_centroids used as the 8 | % initial centroids. max_iters specifies the total number of interactions 9 | % of K-Means to execute. plot_progress is a true/false flag that 10 | % indicates if the function should also plot its progress as the 11 | % learning happens. This is set to false by default. runkMeans returns 12 | % centroids, a Kxn matrix of the computed centroids and idx, a m x 1 13 | % vector of centroid assignments (i.e. each entry in range [1..K]) 14 | % 15 | 16 | % Set default value for plot progress 17 | if ~exist('plot_progress', 'var') || isempty(plot_progress) 18 | plot_progress = false; 19 | end 20 | 21 | % Plot the data if we are plotting progress 22 | if plot_progress 23 | figure; 24 | hold on; 25 | end 26 | 27 | % Initialize values 28 | [m n] = size(X); 29 | K = size(initial_centroids, 1); 30 | centroids = initial_centroids; 31 | previous_centroids = centroids; 32 | idx = zeros(m, 1); 33 | 34 | % Run K-Means 35 | for i=1:max_iters 36 | 37 | % Output progress 38 | fprintf('K-Means iteration %d/%d...\n', i, max_iters); 39 | if exist('OCTAVE_VERSION') 40 | fflush(stdout); 41 | end 42 | 43 | % For each example in X, assign it to the closest centroid 44 | idx = findClosestCentroids(X, centroids); 45 | 46 | % Optionally, plot progress here 47 | if plot_progress 48 | plotProgresskMeans(X, centroids, previous_centroids, idx, K, i); 49 | previous_centroids = centroids; 50 | fprintf('Press enter to continue.\n'); 51 | pause; 52 | end 53 | 54 | % Given the memberships, compute new centroids 55 | centroids = computeCentroids(X, idx, K); 56 | end 57 | 58 | % Hold off if we are plotting progress 59 | if plot_progress 60 | hold off; 61 | end 62 | 63 | end 64 | 65 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/checkCostFunction.m: -------------------------------------------------------------------------------- 1 | function checkCostFunction(lambda) 2 | %CHECKCOSTFUNCTION Creates a collaborative filering problem 3 | %to check your cost function and gradients 4 | % CHECKCOSTFUNCTION(lambda) Creates a collaborative filering problem 5 | % to check your cost function and gradients, it will output the 6 | % analytical gradients produced by your code and the numerical gradients 7 | % (computed using computeNumericalGradient). These two gradient 8 | % computations should result in very similar values. 9 | 10 | % Set lambda 11 | if ~exist('lambda', 'var') || isempty(lambda) 12 | lambda = 0; 13 | end 14 | 15 | %% Create small problem 16 | X_t = rand(4, 3); 17 | Theta_t = rand(5, 3); 18 | 19 | % Zap out most entries 20 | Y = X_t * Theta_t'; 21 | Y(rand(size(Y)) > 0.5) = 0; 22 | R = zeros(size(Y)); 23 | R(Y ~= 0) = 1; 24 | 25 | %% Run Gradient Checking 26 | X = randn(size(X_t)); 27 | Theta = randn(size(Theta_t)); 28 | num_users = size(Y, 2); 29 | num_movies = size(Y, 1); 30 | num_features = size(Theta_t, 2); 31 | 32 | numgrad = computeNumericalGradient( ... 33 | @(t) cofiCostFunc(t, Y, R, num_users, num_movies, ... 34 | num_features, lambda), [X(:); Theta(:)]); 35 | 36 | [cost, grad] = cofiCostFunc([X(:); Theta(:)], Y, R, num_users, ... 37 | num_movies, num_features, lambda); 38 | 39 | disp([numgrad grad]); 40 | fprintf(['The above two columns you get should be very similar.\n' ... 41 | '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']); 42 | 43 | diff = norm(numgrad-grad)/norm(numgrad+grad); 44 | fprintf(['If your cost function implementation is correct, then \n' ... 45 | 'the relative difference will be small (less than 1e-9). \n' ... 46 | '\nRelative Difference: %g\n'], diff); 47 | 48 | end -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/cofiCostFunc.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ... 2 | num_features, lambda) 3 | %COFICOSTFUNC Collaborative filtering cost function 4 | % [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ... 5 | % num_features, lambda) returns the cost and gradient for the 6 | % collaborative filtering problem. 7 | % 8 | 9 | % Unfold the U and W matrices from params 10 | X = reshape(params(1:num_movies*num_features), num_movies, num_features); 11 | Theta = reshape(params(num_movies*num_features+1:end), ... 12 | num_users, num_features); 13 | 14 | 15 | % You need to return the following values correctly 16 | J = 0; 17 | X_grad = zeros(size(X)); 18 | Theta_grad = zeros(size(Theta)); 19 | 20 | % ====================== YOUR CODE HERE ====================== 21 | % Instructions: Compute the cost function and gradient for collaborative 22 | % filtering. Concretely, you should first implement the cost 23 | % function (without regularization) and make sure it is 24 | % matches our costs. After that, you should implement the 25 | % gradient and use the checkCostFunction routine to check 26 | % that the gradient is correct. Finally, you should implement 27 | % regularization. 28 | % 29 | % Notes: X - num_movies x num_features matrix of movie features 30 | % Theta - num_users x num_features matrix of user features 31 | % Y - num_movies x num_users matrix of user ratings of movies 32 | % R - num_movies x num_users matrix, where R(i, j) = 1 if the 33 | % i-th movie was rated by the j-th user 34 | % 35 | % You should set the following variables correctly: 36 | % 37 | % X_grad - num_movies x num_features matrix, containing the 38 | % partial derivatives w.r.t. to each element of X 39 | % Theta_grad - num_users x num_features matrix, containing the 40 | % partial derivatives w.r.t. to each element of Theta 41 | % 42 | 43 | J = 0.5 * sum(sum(((X * Theta' - Y) .* R).^2)) + ... 44 | (lambda/2) * (sum(sum(Theta.^2)) + sum(sum(X.^2))); 45 | 46 | X_grad = (X * Theta' - Y) .* R * Theta + lambda * X; 47 | Theta_grad = ((X * Theta' - Y) .* R)' * X + lambda * Theta; 48 | 49 | % ============================================================= 50 | 51 | grad = [X_grad(:); Theta_grad(:)]; 52 | 53 | end 54 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | %COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" 3 | %and gives us a numerical estimate of the gradient. 4 | % numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical 5 | % gradient of the function J around theta. Calling y = J(theta) should 6 | % return the function value at theta. 7 | 8 | % Notes: The following code implements numerical gradient checking, and 9 | % returns the numerical gradient.It sets numgrad(i) to (a numerical 10 | % approximation of) the partial derivative of J with respect to the 11 | % i-th input argument, evaluated at theta. (i.e., numgrad(i) should 12 | % be the (approximately) the partial derivative of J with respect 13 | % to theta(i).) 14 | % 15 | 16 | numgrad = zeros(size(theta)); 17 | perturb = zeros(size(theta)); 18 | e = 1e-4; 19 | for p = 1:numel(theta) 20 | % Set perturbation vector 21 | perturb(p) = e; 22 | loss1 = J(theta - perturb); 23 | loss2 = J(theta + perturb); 24 | % Compute Numerical Gradient 25 | numgrad(p) = (loss2 - loss1) / (2*e); 26 | perturb(p) = 0; 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/estimateGaussian.m: -------------------------------------------------------------------------------- 1 | function [mu sigma2] = estimateGaussian(X) 2 | %ESTIMATEGAUSSIAN This function estimates the parameters of a 3 | %Gaussian distribution using the data in X 4 | % [mu sigma2] = estimateGaussian(X), 5 | % The input X is the dataset with each n-dimensional data point in one row 6 | % The output is an n-dimensional vector mu, the mean of the data set 7 | % and the variances sigma^2, an n x 1 vector 8 | % 9 | 10 | % Useful variables 11 | [m, n] = size(X); 12 | 13 | % You should return these values correctly 14 | mu = zeros(n, 1); 15 | sigma2 = zeros(n, 1); 16 | 17 | % ====================== YOUR CODE HERE ====================== 18 | % Instructions: Compute the mean of the data and the variances 19 | % In particular, mu(i) should contain the mean of 20 | % the data for the i-th feature and sigma2(i) 21 | % should contain variance of the i-th feature. 22 | % 23 | 24 | 25 | mu = mean(X); 26 | sigma2 = ((m-1)/m)*var(X); 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | % ============================================================= 35 | 36 | 37 | end 38 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/ex8.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 8 | Anomaly Detection and Collaborative Filtering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % estimateGaussian.m 11 | % selectThreshold.m 12 | % cofiCostFunc.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% Initialization 19 | clear ; close all; clc 20 | 21 | %% ================== Part 1: Load Example Dataset =================== 22 | % We start this exercise by using a small dataset that is easy to 23 | % visualize. 24 | % 25 | % Our example case consists of 2 network server statistics across 26 | % several machines: the latency and throughput of each machine. 27 | % This exercise will help us find possibly faulty (or very fast) machines. 28 | % 29 | 30 | fprintf('Visualizing example dataset for outlier detection.\n\n'); 31 | 32 | % The following command loads the dataset. You should now have the 33 | % variables X, Xval, yval in your environment 34 | load('ex8data1.mat'); 35 | 36 | % Visualize the example dataset 37 | plot(X(:, 1), X(:, 2), 'bx'); 38 | axis([0 30 0 30]); 39 | xlabel('Latency (ms)'); 40 | ylabel('Throughput (mb/s)'); 41 | 42 | fprintf('Program paused. Press enter to continue.\n'); 43 | pause 44 | 45 | 46 | %% ================== Part 2: Estimate the dataset statistics =================== 47 | % For this exercise, we assume a Gaussian distribution for the dataset. 48 | % 49 | % We first estimate the parameters of our assumed Gaussian distribution, 50 | % then compute the probabilities for each of the points and then visualize 51 | % both the overall distribution and where each of the points falls in 52 | % terms of that distribution. 53 | % 54 | fprintf('Visualizing Gaussian fit.\n\n'); 55 | 56 | % Estimate my and sigma2 57 | [mu sigma2] = estimateGaussian(X); 58 | 59 | % Returns the density of the multivariate normal at each data point (row) 60 | % of X 61 | p = multivariateGaussian(X, mu, sigma2); 62 | 63 | % Visualize the fit 64 | visualizeFit(X, mu, sigma2); 65 | xlabel('Latency (ms)'); 66 | ylabel('Throughput (mb/s)'); 67 | 68 | fprintf('Program paused. Press enter to continue.\n'); 69 | pause; 70 | 71 | %% ================== Part 3: Find Outliers =================== 72 | % Now you will find a good epsilon threshold using a cross-validation set 73 | % probabilities given the estimated Gaussian distribution 74 | % 75 | 76 | pval = multivariateGaussian(Xval, mu, sigma2); 77 | 78 | [epsilon F1] = selectThreshold(yval, pval); 79 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon); 80 | fprintf('Best F1 on Cross Validation Set: %f\n', F1); 81 | fprintf(' (you should see a value epsilon of about 8.99e-05)\n'); 82 | fprintf(' (you should see a Best F1 value of 0.875000)\n\n'); 83 | 84 | % Find the outliers in the training set and plot the 85 | outliers = find(p < epsilon); 86 | 87 | % Draw a red circle around those outliers 88 | hold on 89 | plot(X(outliers, 1), X(outliers, 2), 'ro', 'LineWidth', 2, 'MarkerSize', 10); 90 | hold off 91 | 92 | fprintf('Program paused. Press enter to continue.\n'); 93 | %pause; 94 | 95 | %% ================== Part 4: Multidimensional Outliers =================== 96 | % We will now use the code from the previous part and apply it to a 97 | % harder problem in which more features describe each datapoint and only 98 | % some features indicate whether a point is an outlier. 99 | % 100 | 101 | % Loads the second dataset. You should now have the 102 | % variables X, Xval, yval in your environment 103 | load('ex8data2.mat'); 104 | 105 | % Apply the same steps to the larger dataset 106 | [mu sigma2] = estimateGaussian(X); 107 | 108 | % Training set 109 | p = multivariateGaussian(X, mu, sigma2); 110 | 111 | % Cross-validation set 112 | pval = multivariateGaussian(Xval, mu, sigma2); 113 | 114 | % Find the best threshold 115 | [epsilon F1] = selectThreshold(yval, pval); 116 | 117 | fprintf('Best epsilon found using cross-validation: %e\n', epsilon); 118 | fprintf('Best F1 on Cross Validation Set: %f\n', F1); 119 | fprintf(' (you should see a value epsilon of about 1.38e-18)\n'); 120 | fprintf(' (you should see a Best F1 value of 0.615385)\n'); 121 | fprintf('# Outliers found: %d\n\n', sum(p < epsilon)); 122 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/ex8_cofi.m: -------------------------------------------------------------------------------- 1 | %% Machine Learning Online Class 2 | % Exercise 8 | Anomaly Detection and Collaborative Filtering 3 | % 4 | % Instructions 5 | % ------------ 6 | % 7 | % This file contains code that helps you get started on the 8 | % exercise. You will need to complete the following functions: 9 | % 10 | % estimateGaussian.m 11 | % selectThreshold.m 12 | % cofiCostFunc.m 13 | % 14 | % For this exercise, you will not need to change any code in this file, 15 | % or any other files other than those mentioned above. 16 | % 17 | 18 | %% =============== Part 1: Loading movie ratings dataset ================ 19 | % You will start by loading the movie ratings dataset to understand the 20 | % structure of the data. 21 | % 22 | fprintf('Loading movie ratings dataset.\n\n'); 23 | 24 | % Load data 25 | load ('ex8_movies.mat'); 26 | 27 | % Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on 28 | % 943 users 29 | % 30 | % R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a 31 | % rating to movie i 32 | 33 | % From the matrix, we can compute statistics like average rating. 34 | fprintf('Average rating for movie 1 (Toy Story): %f / 5\n\n', ... 35 | mean(Y(1, R(1, :)))); 36 | 37 | % We can "visualize" the ratings matrix by plotting it with imagesc 38 | imagesc(Y); 39 | ylabel('Movies'); 40 | xlabel('Users'); 41 | 42 | fprintf('\nProgram paused. Press enter to continue.\n'); 43 | pause; 44 | 45 | %% ============ Part 2: Collaborative Filtering Cost Function =========== 46 | % You will now implement the cost function for collaborative filtering. 47 | % To help you debug your cost function, we have included set of weights 48 | % that we trained on that. Specifically, you should complete the code in 49 | % cofiCostFunc.m to return J. 50 | 51 | % Load pre-trained weights (X, Theta, num_users, num_movies, num_features) 52 | load ('ex8_movieParams.mat'); 53 | 54 | % Reduce the data set size so that this runs faster 55 | num_users = 4; num_movies = 5; num_features = 3; 56 | X = X(1:num_movies, 1:num_features); 57 | Theta = Theta(1:num_users, 1:num_features); 58 | Y = Y(1:num_movies, 1:num_users); 59 | R = R(1:num_movies, 1:num_users); 60 | 61 | % Evaluate cost function 62 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ... 63 | num_features, 0); 64 | 65 | fprintf(['Cost at loaded parameters: %f '... 66 | '\n(this value should be about 22.22)\n'], J); 67 | 68 | fprintf('\nProgram paused. Press enter to continue.\n'); 69 | pause; 70 | 71 | 72 | %% ============== Part 3: Collaborative Filtering Gradient ============== 73 | % Once your cost function matches up with ours, you should now implement 74 | % the collaborative filtering gradient function. Specifically, you should 75 | % complete the code in cofiCostFunc.m to return the grad argument. 76 | % 77 | fprintf('\nChecking Gradients (without regularization) ... \n'); 78 | 79 | % Check gradients by running checkNNGradients 80 | checkCostFunction; 81 | 82 | fprintf('\nProgram paused. Press enter to continue.\n'); 83 | pause; 84 | 85 | 86 | %% ========= Part 4: Collaborative Filtering Cost Regularization ======== 87 | % Now, you should implement regularization for the cost function for 88 | % collaborative filtering. You can implement it by adding the cost of 89 | % regularization to the original cost computation. 90 | % 91 | 92 | % Evaluate cost function 93 | J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ... 94 | num_features, 1.5); 95 | 96 | fprintf(['Cost at loaded parameters (lambda = 1.5): %f '... 97 | '\n(this value should be about 31.34)\n'], J); 98 | 99 | fprintf('\nProgram paused. Press enter to continue.\n'); 100 | pause; 101 | 102 | 103 | %% ======= Part 5: Collaborative Filtering Gradient Regularization ====== 104 | % Once your cost matches up with ours, you should proceed to implement 105 | % regularization for the gradient. 106 | % 107 | 108 | % 109 | fprintf('\nChecking Gradients (with regularization) ... \n'); 110 | 111 | % Check gradients by running checkNNGradients 112 | checkCostFunction(1.5); 113 | 114 | fprintf('\nProgram paused. Press enter to continue.\n'); 115 | pause; 116 | 117 | 118 | %% ============== Part 6: Entering ratings for a new user =============== 119 | % Before we will train the collaborative filtering model, we will first 120 | % add ratings that correspond to a new user that we just observed. This 121 | % part of the code will also allow you to put in your own ratings for the 122 | % movies in our dataset! 123 | % 124 | movieList = loadMovieList(); 125 | 126 | % Initialize my ratings 127 | my_ratings = zeros(1682, 1); 128 | 129 | % Check the file movie_idx.txt for id of each movie in our dataset 130 | % For example, Toy Story (1995) has ID 1, so to rate it "4", you can set 131 | my_ratings(1) = 4; 132 | 133 | % Or suppose did not enjoy Silence of the Lambs (1991), you can set 134 | my_ratings(98) = 2; 135 | 136 | % We have selected a few movies we liked / did not like and the ratings we 137 | % gave are as follows: 138 | my_ratings(7) = 3; 139 | my_ratings(12)= 5; 140 | my_ratings(54) = 4; 141 | my_ratings(64)= 5; 142 | my_ratings(66)= 3; 143 | my_ratings(69) = 5; 144 | my_ratings(183) = 4; 145 | my_ratings(226) = 5; 146 | my_ratings(355)= 5; 147 | 148 | fprintf('\n\nNew user ratings:\n'); 149 | for i = 1:length(my_ratings) 150 | if my_ratings(i) > 0 151 | fprintf('Rated %d for %s\n', my_ratings(i), ... 152 | movieList{i}); 153 | end 154 | end 155 | 156 | fprintf('\nProgram paused. Press enter to continue.\n'); 157 | pause; 158 | 159 | 160 | %% ================== Part 7: Learning Movie Ratings ==================== 161 | % Now, you will train the collaborative filtering model on a movie rating 162 | % dataset of 1682 movies and 943 users 163 | % 164 | 165 | fprintf('\nTraining collaborative filtering...\n'); 166 | 167 | % Load data 168 | load('ex8_movies.mat'); 169 | 170 | % Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by 171 | % 943 users 172 | % 173 | % R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a 174 | % rating to movie i 175 | 176 | % Add our own ratings to the data matrix 177 | Y = [my_ratings Y]; 178 | R = [(my_ratings ~= 0) R]; 179 | 180 | % Normalize Ratings 181 | [Ynorm, Ymean] = normalizeRatings(Y, R); 182 | 183 | % Useful Values 184 | num_users = size(Y, 2); 185 | num_movies = size(Y, 1); 186 | num_features = 10; 187 | 188 | % Set Initial Parameters (Theta, X) 189 | X = randn(num_movies, num_features); 190 | Theta = randn(num_users, num_features); 191 | 192 | initial_parameters = [X(:); Theta(:)]; 193 | 194 | % Set options for fmincg 195 | options = optimset('GradObj', 'on', 'MaxIter', 100); 196 | 197 | % Set Regularization 198 | lambda = 10; 199 | theta = fmincg (@(t)(cofiCostFunc(t, Ynorm, R, num_users, num_movies, ... 200 | num_features, lambda)), ... 201 | initial_parameters, options); 202 | 203 | % Unfold the returned theta back into U and W 204 | X = reshape(theta(1:num_movies*num_features), num_movies, num_features); 205 | Theta = reshape(theta(num_movies*num_features+1:end), ... 206 | num_users, num_features); 207 | 208 | fprintf('Recommender system learning completed.\n'); 209 | 210 | fprintf('\nProgram paused. Press enter to continue.\n'); 211 | pause; 212 | 213 | %% ================== Part 8: Recommendation for you ==================== 214 | % After training the model, you can now make recommendations by computing 215 | % the predictions matrix. 216 | % 217 | 218 | p = X * Theta'; 219 | my_predictions = p(:,1) + Ymean; 220 | 221 | movieList = loadMovieList(); 222 | 223 | [r, ix] = sort(my_predictions, 'descend'); 224 | fprintf('\nTop recommendations for you:\n'); 225 | for i=1:10 226 | j = ix(i); 227 | fprintf('Predicting rating %.1f for movie %s\n', my_predictions(j), ... 228 | movieList{j}); 229 | end 230 | 231 | fprintf('\n\nOriginal ratings provided:\n'); 232 | for i = 1:length(my_ratings) 233 | if my_ratings(i) > 0 234 | fprintf('Rated %d for %s\n', my_ratings(i), ... 235 | movieList{i}); 236 | end 237 | end 238 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/ex8_movieParams.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex8/ex8_movieParams.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/ex8_movies.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex8/ex8_movies.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/ex8data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex8/ex8data1.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/ex8data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex8/ex8data2.mat -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/loadMovieList.m: -------------------------------------------------------------------------------- 1 | function movieList = loadMovieList() 2 | %GETMOVIELIST reads the fixed movie list in movie.txt and returns a 3 | %cell array of the words 4 | % movieList = GETMOVIELIST() reads the fixed movie list in movie.txt 5 | % and returns a cell array of the words in movieList. 6 | 7 | 8 | %% Read the fixed movieulary list 9 | fid = fopen('movie_ids.txt'); 10 | 11 | % Store all movies in cell array movie{} 12 | n = 1682; % Total number of movies 13 | 14 | movieList = cell(n, 1); 15 | for i = 1:n 16 | % Read line 17 | line = fgets(fid); 18 | % Word Index (can ignore since it will be = i) 19 | [idx, movieName] = strtok(line, ' '); 20 | % Actual Word 21 | movieList{i} = strtrim(movieName); 22 | end 23 | fclose(fid); 24 | 25 | end 26 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/movie_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveunk/machine-learning-excercise-notebook-python/b94f6c07388234c086a40de0132550653aa17cc3/original-machine-learning-MATLAB/ex8/movie_ids.txt -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/multivariateGaussian.m: -------------------------------------------------------------------------------- 1 | function p = multivariateGaussian(X, mu, Sigma2) 2 | %MULTIVARIATEGAUSSIAN Computes the probability density function of the 3 | %multivariate gaussian distribution. 4 | % p = MULTIVARIATEGAUSSIAN(X, mu, Sigma2) Computes the probability 5 | % density function of the examples X under the multivariate gaussian 6 | % distribution with parameters mu and Sigma2. If Sigma2 is a matrix, it is 7 | % treated as the covariance matrix. If Sigma2 is a vector, it is treated 8 | % as the \sigma^2 values of the variances in each dimension (a diagonal 9 | % covariance matrix) 10 | % 11 | 12 | k = length(mu); 13 | 14 | if (size(Sigma2, 2) == 1) || (size(Sigma2, 1) == 1) 15 | Sigma2 = diag(Sigma2); 16 | end 17 | 18 | X = bsxfun(@minus, X, mu(:)'); 19 | p = (2 * pi) ^ (- k / 2) * det(Sigma2) ^ (-0.5) * ... 20 | exp(-0.5 * sum(bsxfun(@times, X * pinv(Sigma2), X), 2)); 21 | 22 | end -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/normalizeRatings.m: -------------------------------------------------------------------------------- 1 | function [Ynorm, Ymean] = normalizeRatings(Y, R) 2 | %NORMALIZERATINGS Preprocess data by subtracting mean rating for every 3 | %movie (every row) 4 | % [Ynorm, Ymean] = NORMALIZERATINGS(Y, R) normalized Y so that each movie 5 | % has a rating of 0 on average, and returns the mean rating in Ymean. 6 | % 7 | 8 | [m, n] = size(Y); 9 | Ymean = zeros(m, 1); 10 | Ynorm = zeros(size(Y)); 11 | for i = 1:m 12 | idx = find(R(i, :) == 1); 13 | Ymean(i) = mean(Y(i, idx)); 14 | Ynorm(i, idx) = Y(i, idx) - Ymean(i); 15 | end 16 | 17 | end 18 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/selectThreshold.m: -------------------------------------------------------------------------------- 1 | function [bestEpsilon bestF1] = selectThreshold(yval, pval) 2 | %SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting 3 | %outliers 4 | % [bestEpsilon bestF1] = SELECTTHRESHOLD(yval, pval) finds the best 5 | % threshold to use for selecting outliers based on the results from a 6 | % validation set (pval) and the ground truth (yval). 7 | % 8 | 9 | bestEpsilon = 0; 10 | bestF1 = 0; 11 | F1 = 0; 12 | 13 | stepsize = (max(pval) - min(pval)) / 1000; 14 | for epsilon = min(pval):stepsize:max(pval) 15 | 16 | % ====================== YOUR CODE HERE ====================== 17 | % Instructions: Compute the F1 score of choosing epsilon as the 18 | % threshold and place the value in F1. The code at the 19 | % end of the loop will compare the F1 score for this 20 | % choice of epsilon and set it to be the best epsilon if 21 | % it is better than the current choice of epsilon. 22 | % 23 | % Note: You can use predictions = (pval < epsilon) to get a binary vector 24 | % of 0's and 1's of the outlier predictions 25 | predictions = (pval < epsilon); 26 | tp = sum(predictions(yval == 1)); 27 | fp = sum(predictions(yval == 0)); 28 | fn = sum(yval(predictions == 0)); 29 | 30 | prec = tp / (tp + fp); 31 | rec = tp / (tp + fn); 32 | F1 = 2 * prec * rec / (prec + rec); 33 | 34 | % ============================================================= 35 | 36 | if F1 > bestF1 37 | bestF1 = F1; 38 | bestEpsilon = epsilon; 39 | end 40 | end 41 | 42 | end 43 | -------------------------------------------------------------------------------- /original-machine-learning-MATLAB/ex8/visualizeFit.m: -------------------------------------------------------------------------------- 1 | function visualizeFit(X, mu, sigma2) 2 | %VISUALIZEFIT Visualize the dataset and its estimated distribution. 3 | % VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the 4 | % probability density function of the Gaussian distribution. Each example 5 | % has a location (x1, x2) that depends on its feature values. 6 | % 7 | 8 | [X1,X2] = meshgrid(0:.5:35); 9 | Z = multivariateGaussian([X1(:) X2(:)],mu,sigma2); 10 | Z = reshape(Z,size(X1)); 11 | 12 | plot(X(:, 1), X(:, 2),'bx'); 13 | hold on; 14 | % Do not plot if there are infinities 15 | if (sum(isinf(Z)) == 0) 16 | contour(X1, X2, Z, 10.^(-20:3:0)'); 17 | end 18 | hold off; 19 | 20 | end --------------------------------------------------------------------------------