├── README.md ├── ex1 ├── ex1.pdf ├── ex1.py ├── ex1data1.txt ├── ex1data2.txt └── token.mat ├── ex2 ├── ex2.pdf ├── ex2data1.txt ├── ex2data2.txt ├── ex2part1.py ├── ex2part2.py ├── i.txt └── token.mat ├── ex3 ├── ex3.pdf ├── ex3.py ├── ex3data1.mat └── ex3weights.mat ├── ex4 ├── ex4.pdf ├── ex4.py ├── ex4data1.mat ├── ex4weights.mat └── token.mat ├── ex5 ├── ex5.pdf ├── ex5.py ├── ex5data1.mat └── token.mat ├── ex6 ├── emailSample1.txt ├── emailSample2.txt ├── ex6.pdf ├── ex6data1.mat ├── ex6data2.mat ├── ex6data3.mat ├── ex6spam.py ├── ex6svm.py ├── spamSample1.txt ├── spamSample2.txt ├── spamTest.mat ├── spamTrain.mat ├── token.mat └── vocab.txt ├── ex7 ├── bird_small.mat ├── bird_small.png ├── ex7.pdf ├── ex7data1.mat ├── ex7data2.mat ├── ex7faces.mat ├── ex7kmeans.py ├── ex7pca.py └── token.mat └── ex8 ├── ex8_movieParams.mat ├── ex8_movies.mat ├── ex8anomaly_detection.py ├── ex8cofi.py ├── ex8data1.mat ├── ex8data2.mat ├── i.txt ├── movie_ids.txt └── token.mat /README.md: -------------------------------------------------------------------------------- 1 | # ml-andrewng-python 2 | # This is the Python implementation of the programming assignments in Andrew Ng's online machine-learning course. 3 | Contents: 4 | 5 | Programming Assignment #1: Linear Regression (Week 2) 6 | 7 | Programming Assignment #2: Logistic Regression (Week 3) 8 | 9 | Programming Assignment #3: Multi-class Classification and Neural Networks (Week 4) 10 | 11 | Programming Assignment #4: Neural Network Learning (Week 5) 12 | 13 | Programming Assignment #5: Regularized Linear Regression and Bias/Variance (Week 6) 14 | 15 | Programming Assignment #6: Support Vector Machines (Week 7) 16 | 17 | Programming Assignment #7: K-Means Clustering and PCA (Week 8) 18 | 19 | Programming Assignment #8: Anomaly Detection and Recommender Systems (Week 9) 20 | -------------------------------------------------------------------------------- /ex1/ex1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex1/ex1.pdf -------------------------------------------------------------------------------- /ex1/ex1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from mpl_toolkits.mplot3d import Axes3D 4 | from matplotlib import cm 5 | import matplotlib.pyplot as plt 6 | 7 | def warmUpExercise(): 8 | return np.eye(5) 9 | 10 | 11 | def plotData(x, y): 12 | 13 | fig, ax = plt.subplots() # create empty figure 14 | ax.plot(x,y,'rx',markersize=10) 15 | ax.set_xlabel("Population of City in 10,000s") 16 | ax.set_ylabel("Profit in $10,000s") 17 | 18 | return fig 19 | 20 | 21 | def normalEqn(X,y): 22 | 23 | return np.dot((np.linalg.inv(np.dot(X.T,X))),np.dot(X.T,y)) 24 | 25 | 26 | def gradientDescentMulti(X, y, theta, alpha, num_iters): 27 | 28 | m = len(y) # number of training examples 29 | J_history = np.zeros(num_iters) 30 | 31 | for i in range(num_iters): 32 | theta = theta - (alpha/m)*np.sum((np.dot(X,theta)-y)[:,None]*X,axis=0) 33 | J_history[i] = computeCost(X, y, theta) 34 | print('Cost function: ', J_history[i]) 35 | 36 | return (theta,J_History) 37 | 38 | 39 | def gradientDescent(X, y, theta, alpha, num_iters): 40 | 41 | m = len(y) # number of training examples 42 | J_history = np.zeros(num_iters) 43 | 44 | for i in range(num_iters): 45 | theta = theta - (alpha/m)*np.sum((np.dot(X,theta)-y)[:,None]*X,axis=0) 46 | J_history[i] = computeCost(X, y, theta) 47 | print('Cost function: ',J_history[i]) 48 | 49 | return (theta, J_history) 50 | 51 | 52 | def featureNormalize(X): 53 | return np.divide((X - np.mean(X,axis=0)),np.std(X,axis=0)) 54 | 55 | 56 | def computeCost(X, y, theta): 57 | m = len(y) 58 | J = (np.sum((np.dot(X,theta) - y)**2))/(2*m) 59 | return J 60 | 61 | print('Running warmUpExercise ... \n') 62 | print('5x5 Identity Matrix: \n') 63 | 64 | print(warmUpExercise()) 65 | input('Program paused. Press enter to continue.\n') 66 | 67 | print('Plotting Data ...\n') 68 | data = pd.read_csv("ex1data1.txt",names=["X","y"]) 69 | x = np.array(data.X)[:,None] # population in 10,0000 70 | y = np.array(data.y) # profit for a food truck 71 | m = len(y) 72 | fig = plotData(x,y) 73 | fig.show() 74 | input('Program paused. Press enter to continue.\n') 75 | print('Running Gradient Descent ...\n') 76 | ones = np.ones_like(x) #an array of ones of same dimension as x 77 | X = np.hstack((ones,x)) # Add a column of ones to x. hstack means stacking horizontally i.e. columnwise 78 | theta = np.zeros(2) # initialize 79 | iterations = 1500 80 | alpha = 0.01 81 | computeCost(X, y, theta) 82 | theta, hist = gradientDescent(X, y, theta, alpha, iterations) 83 | print('Theta found by gradient descent: ') 84 | print(theta[0],"\n", theta[1]) 85 | 86 | # Plot the linear fit 87 | plt.plot(x,y,'rx',x,np.dot(X,theta),'b-') 88 | plt.legend(['Training Data','Linear Regression']) 89 | plt.show() 90 | 91 | # Predict values for population sizes of 35,000 and 70,000 92 | predict1 = np.dot([1, 3.5],theta) # takes inner product to get y_bar 93 | print('For population = 35,000, we predict a profit of ', predict1*10000) 94 | 95 | predict2 = np.dot([1, 7],theta) 96 | print('For population = 70,000, we predict a profit of ', predict2*10000) 97 | input('Program paused. Press enter to continue.\n'); 98 | print('Visualizing J(theta_0, theta_1) ...\n') 99 | 100 | # Grid over which we will calculate J 101 | theta0_vals = np.linspace(-10, 10, 100) 102 | theta1_vals = np.linspace(-1, 4, 100) 103 | J_vals = np.zeros((len(theta0_vals),len(theta1_vals))) 104 | 105 | for i in range(len(theta0_vals)): 106 | for j in range(len(theta1_vals)): 107 | t = np.array([theta0_vals[i],theta1_vals[j]]) 108 | J_vals[i][j] = computeCost(X,y,t) 109 | """ 110 | # Surface plot using J_Vals 111 | fig = plt.figure() 112 | ax = plt.subplot(111,projection='3d') 113 | Axes3D.plot_surface(ax,theta0_vals,theta1_vals,J_vals,cmap=cm.coolwarm) 114 | plt.show() 115 | 116 | fig = plt.figure() 117 | ax = plt.subplot(111) 118 | plt.contour(theta0_vals,theta1_vals,J_vals) 119 | """ 120 | print('Loading data ...','\n') 121 | print('Plotting Data ...','\n') 122 | data = pd.read_csv("ex1data2.txt",names=["size","bedrooms","price"]) 123 | s = np.array(data.size) 124 | r = np.array(data.bedrooms) 125 | p = np.array(data.price) 126 | m = len(r) 127 | s = np.vstack(s) 128 | r = np.vstack(r) 129 | X = np.hstack((s,r)) 130 | print('First 10 examples from the dataset: \n') 131 | print(" size = ", s[:10],"\n"," bedrooms = ", r[:10], "\n") 132 | input('Program paused. Press enter to continue.\n') 133 | print('Normalizing Features ...\n') 134 | X = featureNormalize(X) 135 | X = np.hstack((np.ones_like(s),X)) 136 | 137 | print('Running gradient descent ...\n') 138 | alpha = 0.05 139 | num_iters = 400 140 | theta = np.zeros(3) 141 | 142 | # Multiple Dimension Gradient Descent 143 | theta, hist = gradientDescent(X, p, theta, alpha, num_iters) 144 | 145 | # Plot convergence graph 146 | fig = plt.figure() 147 | ax = plt.subplot(111) 148 | plt.plot(np.arange(len(hist)),hist ,'-b') 149 | plt.xlabel('Number of iterations') 150 | plt.ylabel('Cost J') 151 | plt.show() 152 | 153 | 154 | print('Theta computed from gradient descent: \n') 155 | print(theta,'\n') 156 | 157 | # Estimate the price of a 1650 sq-ft, 3 br house 158 | #the first column of X is all-ones.it doesnot need to be normalized. 159 | normalized_specs = np.array([1,((1650-s.mean())/s.std()),((3-r.mean())/r.std())]) 160 | price = np.dot(normalized_specs,theta) 161 | print('Predicted price of a 1650 sq-ft, 3 br house (using gradient descent):\n ', 162 | price) 163 | input('Program paused. Press enter to continue.\n') 164 | 165 | print('Solving with normal equations...\n') 166 | 167 | data = pd.read_csv("ex1data2.txt",names=["sz","bed","price"]) 168 | s = np.array(data.sz) 169 | r = np.array(data.bed) 170 | p = np.array(data.price) 171 | m = len(r) 172 | s = np.vstack(s) 173 | r = np.vstack(r) 174 | X = np.hstack((s,r)) 175 | X = np.hstack((np.ones_like(s),X)) 176 | 177 | theta = normalEqn(X, p) 178 | 179 | print('Theta computed from the normal equations: \n') 180 | print(theta) 181 | print('\n') 182 | 183 | # Estimate the price of a 1650 sq-ft, 3 br house 184 | price = np.dot([1,1650,3],theta) 185 | 186 | 187 | print('Predicted price of a 1650 sq-ft, 3 br house (using normal equations): \n', 188 | price) 189 | -------------------------------------------------------------------------------- /ex1/ex1data1.txt: -------------------------------------------------------------------------------- 1 | 6.1101,17.592 2 | 5.5277,9.1302 3 | 8.5186,13.662 4 | 7.0032,11.854 5 | 5.8598,6.8233 6 | 8.3829,11.886 7 | 7.4764,4.3483 8 | 8.5781,12 9 | 6.4862,6.5987 10 | 5.0546,3.8166 11 | 5.7107,3.2522 12 | 14.164,15.505 13 | 5.734,3.1551 14 | 8.4084,7.2258 15 | 5.6407,0.71618 16 | 5.3794,3.5129 17 | 6.3654,5.3048 18 | 5.1301,0.56077 19 | 6.4296,3.6518 20 | 7.0708,5.3893 21 | 6.1891,3.1386 22 | 20.27,21.767 23 | 5.4901,4.263 24 | 6.3261,5.1875 25 | 5.5649,3.0825 26 | 18.945,22.638 27 | 12.828,13.501 28 | 10.957,7.0467 29 | 13.176,14.692 30 | 22.203,24.147 31 | 5.2524,-1.22 32 | 6.5894,5.9966 33 | 9.2482,12.134 34 | 5.8918,1.8495 35 | 8.2111,6.5426 36 | 7.9334,4.5623 37 | 8.0959,4.1164 38 | 5.6063,3.3928 39 | 12.836,10.117 40 | 6.3534,5.4974 41 | 5.4069,0.55657 42 | 6.8825,3.9115 43 | 11.708,5.3854 44 | 5.7737,2.4406 45 | 7.8247,6.7318 46 | 7.0931,1.0463 47 | 5.0702,5.1337 48 | 5.8014,1.844 49 | 11.7,8.0043 50 | 5.5416,1.0179 51 | 7.5402,6.7504 52 | 5.3077,1.8396 53 | 7.4239,4.2885 54 | 7.6031,4.9981 55 | 6.3328,1.4233 56 | 6.3589,-1.4211 57 | 6.2742,2.4756 58 | 5.6397,4.6042 59 | 9.3102,3.9624 60 | 9.4536,5.4141 61 | 8.8254,5.1694 62 | 5.1793,-0.74279 63 | 21.279,17.929 64 | 14.908,12.054 65 | 18.959,17.054 66 | 7.2182,4.8852 67 | 8.2951,5.7442 68 | 10.236,7.7754 69 | 5.4994,1.0173 70 | 20.341,20.992 71 | 10.136,6.6799 72 | 7.3345,4.0259 73 | 6.0062,1.2784 74 | 7.2259,3.3411 75 | 5.0269,-2.6807 76 | 6.5479,0.29678 77 | 7.5386,3.8845 78 | 5.0365,5.7014 79 | 10.274,6.7526 80 | 5.1077,2.0576 81 | 5.7292,0.47953 82 | 5.1884,0.20421 83 | 6.3557,0.67861 84 | 9.7687,7.5435 85 | 6.5159,5.3436 86 | 8.5172,4.2415 87 | 9.1802,6.7981 88 | 6.002,0.92695 89 | 5.5204,0.152 90 | 5.0594,2.8214 91 | 5.7077,1.8451 92 | 7.6366,4.2959 93 | 5.8707,7.2029 94 | 5.3054,1.9869 95 | 8.2934,0.14454 96 | 13.394,9.0551 97 | 5.4369,0.61705 98 | -------------------------------------------------------------------------------- /ex1/ex1data2.txt: -------------------------------------------------------------------------------- 1 | 2104,3,399900 2 | 1600,3,329900 3 | 2400,3,369000 4 | 1416,2,232000 5 | 3000,4,539900 6 | 1985,4,299900 7 | 1534,3,314900 8 | 1427,3,198999 9 | 1380,3,212000 10 | 1494,3,242500 11 | 1940,4,239999 12 | 2000,3,347000 13 | 1890,3,329999 14 | 4478,5,699900 15 | 1268,3,259900 16 | 2300,4,449900 17 | 1320,2,299900 18 | 1236,3,199900 19 | 2609,4,499998 20 | 3031,4,599000 21 | 1767,3,252900 22 | 1888,2,255000 23 | 1604,3,242900 24 | 1962,4,259900 25 | 3890,3,573900 26 | 1100,3,249900 27 | 1458,3,464500 28 | 2526,3,469000 29 | 2200,3,475000 30 | 2637,3,299900 31 | 1839,2,349900 32 | 1000,1,169900 33 | 2040,4,314900 34 | 3137,3,579900 35 | 1811,4,285900 36 | 1437,3,249900 37 | 1239,3,229900 38 | 2132,4,345000 39 | 4215,4,549000 40 | 2162,4,287000 41 | 1664,2,368500 42 | 2238,3,329900 43 | 2567,4,314000 44 | 1200,3,299000 45 | 852,2,179900 46 | 1852,4,299900 47 | 1203,3,239500 48 | -------------------------------------------------------------------------------- /ex1/token.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex1/token.mat -------------------------------------------------------------------------------- /ex2/ex2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex2/ex2.pdf -------------------------------------------------------------------------------- /ex2/ex2data1.txt: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | -------------------------------------------------------------------------------- /ex2/ex2data2.txt: -------------------------------------------------------------------------------- 1 | 0.051267,0.69956,1 2 | -0.092742,0.68494,1 3 | -0.21371,0.69225,1 4 | -0.375,0.50219,1 5 | -0.51325,0.46564,1 6 | -0.52477,0.2098,1 7 | -0.39804,0.034357,1 8 | -0.30588,-0.19225,1 9 | 0.016705,-0.40424,1 10 | 0.13191,-0.51389,1 11 | 0.38537,-0.56506,1 12 | 0.52938,-0.5212,1 13 | 0.63882,-0.24342,1 14 | 0.73675,-0.18494,1 15 | 0.54666,0.48757,1 16 | 0.322,0.5826,1 17 | 0.16647,0.53874,1 18 | -0.046659,0.81652,1 19 | -0.17339,0.69956,1 20 | -0.47869,0.63377,1 21 | -0.60541,0.59722,1 22 | -0.62846,0.33406,1 23 | -0.59389,0.005117,1 24 | -0.42108,-0.27266,1 25 | -0.11578,-0.39693,1 26 | 0.20104,-0.60161,1 27 | 0.46601,-0.53582,1 28 | 0.67339,-0.53582,1 29 | -0.13882,0.54605,1 30 | -0.29435,0.77997,1 31 | -0.26555,0.96272,1 32 | -0.16187,0.8019,1 33 | -0.17339,0.64839,1 34 | -0.28283,0.47295,1 35 | -0.36348,0.31213,1 36 | -0.30012,0.027047,1 37 | -0.23675,-0.21418,1 38 | -0.06394,-0.18494,1 39 | 0.062788,-0.16301,1 40 | 0.22984,-0.41155,1 41 | 0.2932,-0.2288,1 42 | 0.48329,-0.18494,1 43 | 0.64459,-0.14108,1 44 | 0.46025,0.012427,1 45 | 0.6273,0.15863,1 46 | 0.57546,0.26827,1 47 | 0.72523,0.44371,1 48 | 0.22408,0.52412,1 49 | 0.44297,0.67032,1 50 | 0.322,0.69225,1 51 | 0.13767,0.57529,1 52 | -0.0063364,0.39985,1 53 | -0.092742,0.55336,1 54 | -0.20795,0.35599,1 55 | -0.20795,0.17325,1 56 | -0.43836,0.21711,1 57 | -0.21947,-0.016813,1 58 | -0.13882,-0.27266,1 59 | 0.18376,0.93348,0 60 | 0.22408,0.77997,0 61 | 0.29896,0.61915,0 62 | 0.50634,0.75804,0 63 | 0.61578,0.7288,0 64 | 0.60426,0.59722,0 65 | 0.76555,0.50219,0 66 | 0.92684,0.3633,0 67 | 0.82316,0.27558,0 68 | 0.96141,0.085526,0 69 | 0.93836,0.012427,0 70 | 0.86348,-0.082602,0 71 | 0.89804,-0.20687,0 72 | 0.85196,-0.36769,0 73 | 0.82892,-0.5212,0 74 | 0.79435,-0.55775,0 75 | 0.59274,-0.7405,0 76 | 0.51786,-0.5943,0 77 | 0.46601,-0.41886,0 78 | 0.35081,-0.57968,0 79 | 0.28744,-0.76974,0 80 | 0.085829,-0.75512,0 81 | 0.14919,-0.57968,0 82 | -0.13306,-0.4481,0 83 | -0.40956,-0.41155,0 84 | -0.39228,-0.25804,0 85 | -0.74366,-0.25804,0 86 | -0.69758,0.041667,0 87 | -0.75518,0.2902,0 88 | -0.69758,0.68494,0 89 | -0.4038,0.70687,0 90 | -0.38076,0.91886,0 91 | -0.50749,0.90424,0 92 | -0.54781,0.70687,0 93 | 0.10311,0.77997,0 94 | 0.057028,0.91886,0 95 | -0.10426,0.99196,0 96 | -0.081221,1.1089,0 97 | 0.28744,1.087,0 98 | 0.39689,0.82383,0 99 | 0.63882,0.88962,0 100 | 0.82316,0.66301,0 101 | 0.67339,0.64108,0 102 | 1.0709,0.10015,0 103 | -0.046659,-0.57968,0 104 | -0.23675,-0.63816,0 105 | -0.15035,-0.36769,0 106 | -0.49021,-0.3019,0 107 | -0.46717,-0.13377,0 108 | -0.28859,-0.060673,0 109 | -0.61118,-0.067982,0 110 | -0.66302,-0.21418,0 111 | -0.59965,-0.41886,0 112 | -0.72638,-0.082602,0 113 | -0.83007,0.31213,0 114 | -0.72062,0.53874,0 115 | -0.59389,0.49488,0 116 | -0.48445,0.99927,0 117 | -0.0063364,0.99927,0 118 | 0.63265,-0.030612,0 119 | -------------------------------------------------------------------------------- /ex2/ex2part1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | from scipy.optimize import minimize 5 | 6 | 7 | def plotData(X, y): 8 | pos = X[np.where(y==1)] 9 | neg = X[np.where(y==0)] 10 | fig, ax = plt.subplots() 11 | ax.plot(pos[:,0],pos[:,1],"k+",neg[:,0],neg[:,1],"yo") 12 | return (fig, ax) 13 | 14 | def costFunction(theta,X,y): 15 | m = len(y) 16 | J =(np.sum(-y*np.log(sigmoid(np.dot(X,theta)))- 17 | (1-y)*(np.log(1-sigmoid(np.dot(X,theta)))))/m) 18 | grad = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m) 19 | return (J, grad) 20 | 21 | def sigmoid(z): 22 | return 1.0/(1 + np.e**(-z)) 23 | 24 | 25 | def predict(theta,X): 26 | """ 27 | Given a vector of parameter results and training set X, 28 | returns the model prediction for admission. If predicted 29 | probability of admission is greater than .5, predict will 30 | return a value of 1. 31 | """ 32 | return np.where(np.dot(X,theta) > 5.,1,0) 33 | 34 | def mapFeatureVector(X1,X2): 35 | """ 36 | Feature mapping function to polynomial features. Maps the two features 37 | X1,X2 to quadratic features used in the regularization exercise. X1, X2 38 | must be the same size.returns new feature array with interactions and quadratic terms 39 | """ 40 | 41 | degree = 6 42 | output_feature_vec = np.ones(len(X1))[:,None] 43 | 44 | for i in range(1,7): 45 | for j in range(i+1): 46 | new_feature = np.array(X1**(i-j)*X2**j)[:,None] 47 | output_feature_vec = np.hstack((output_feature_vec,new_feature)) 48 | 49 | return output_feature_vec 50 | 51 | 52 | def costFunctionReg(theta,X,y,reg_param): 53 | m = len(y) 54 | J =((np.sum(-y*np.log(sigmoid(np.dot(X,theta)))- 55 | (1-y)*(np.log(1-sigmoid(np.dot(X,theta))))))/m + 56 | (reg_param/m)*np.sum(theta**2)) 57 | 58 | # Non-regularized 59 | grad_0 = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m) 60 | 61 | # Regularized 62 | grad_reg = grad_0 + (reg_param/m)*theta 63 | # Replace gradient for theta_0 with non-regularized gradient 64 | grad_reg[0] = grad_0[0] 65 | 66 | return J 67 | 68 | 69 | def plotDecisionBoundary(theta,X,y): 70 | """X is asssumed to be either: 71 | 1) Mx3 matrix where the first column is all ones for the intercept 72 | 2) MxN with N>3, where the first column is all ones 73 | """ 74 | fig, ax = plotData(X[:,1:],y) 75 | """ 76 | if len(X[0]<=3): 77 | # Choose two endpoints and plot the line between them 78 | plot_x = np.array([min(X[:,1])-2,max(X[:,2])+2]) 79 | ax.plot(plot_x,plot_y) 80 | ax.legend(['Admitted','Fail','Pass']) 81 | ax.set_xbound(30,100) 82 | ax.set_ybound(30,100) 83 | else: 84 | """ 85 | 86 | # Create grid space 87 | u = np.linspace(-1,1.5,50) 88 | v = np.linspace(-1,1.5,50) 89 | z = np.zeros((len(u),len(v))) 90 | 91 | # Evaluate z = theta*x over values in the gridspace 92 | for i in range(len(u)): 93 | for j in range(len(v)): 94 | z[i][j] = np.dot(mapFeatureVector(np.array([u[i]]), 95 | np.array([v[j]])),theta) 96 | 97 | # Plot contour 98 | ax.contour(u,v,z,levels=[0]) 99 | 100 | return (fig,ax) 101 | 102 | ## Load Data 103 | # The first two columns contains the exam scores and the third column 104 | # contains the label. 105 | 106 | data = pd.read_csv('ex2data1.txt', names=['x1','x2','y']) 107 | X = np.asarray(data[["x1","x2"]]) 108 | y = np.asarray(data["y"]) 109 | 110 | print("Plotting data with + indicating (y = 1) examples and o indicating", 111 | " (y =0) examples.") 112 | fig, ax = plotData(X, y) 113 | ax.legend(['Admitted', 'Not admitted']) 114 | fig.show() 115 | input('\nProgram paused. Press enter to continue.\n') 116 | 117 | # Add intercept term to x and X_test 118 | X = np.hstack((np.ones_like(y)[:,None],X)) 119 | initial_theta = np.zeros(3) 120 | cost, grad = costFunction(initial_theta, X, y) 121 | 122 | print('Cost at initial theta (zeros): \n', cost) 123 | print('Gradient at initial theta (zeros): \n',grad) 124 | 125 | input('\nProgram paused. Press enter to continue.') 126 | 127 | res = minimize(costFunction, 128 | initial_theta, 129 | method='Newton-CG', 130 | args=(X,y), 131 | jac=True, 132 | options={'maxiter':400, 133 | 'disp':True}) 134 | 135 | theta = res.x 136 | print('Cost at theta found by minimize: \n', res.fun) 137 | print('theta: \n', theta) 138 | plotDecisionBoundary(theta, X, y) 139 | input('\nProgram paused. Press enter to continue.\n') 140 | 141 | # In this part, you will use the logistic regression model 142 | # to predict the probability that a student with score 45 on exam 1 and 143 | # score 85 on exam 2 will be admitted. 144 | # Furthermore, you will compute the training and test set accuracies of 145 | # our model. 146 | 147 | prob = sigmoid(np.dot([1,45,85],theta)) 148 | print('For a student with scores 45 and 85, we predict an ', 149 | 'admission probability of ', prob) 150 | 151 | # Compute accuracy on our training set 152 | p = predict(theta, X) 153 | 154 | print('Train Accuracy: \n', np.mean(p==y)*100) 155 | 156 | input('Program paused. Press enter to continue.\n') 157 | -------------------------------------------------------------------------------- /ex2/ex2part2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | from scipy.optimize import minimize 5 | 6 | 7 | def plotData(X, y): 8 | pos = X[np.where(y==1)] 9 | neg = X[np.where(y==0)] 10 | fig, ax = plt.subplots() 11 | ax.plot(pos[:,0],pos[:,1],"k+",neg[:,0],neg[:,1],"yo") 12 | return (fig, ax) 13 | 14 | def costFunction(theta,X,y): 15 | 16 | m = len(y) 17 | J =(np.sum(-y*np.log(sigmoid(np.dot(X,theta)))- 18 | (1-y)*(np.log(1-sigmoid(np.dot(X,theta)))))/m) 19 | grad = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m) 20 | return (J, grad) 21 | 22 | def sigmoid(z): 23 | return 1.0/(1 + np.e**(-z)) 24 | 25 | 26 | def predict(theta,X): 27 | return np.where(np.dot(X,theta) > 5.,1,0) 28 | 29 | def mapFeatureVector(X1,X2): 30 | degree = 6 31 | output_feature_vec = np.ones(len(X1))[:,None] 32 | for i in range(1,7): 33 | for j in range(i+1): 34 | new_feature = np.array(X1**(i-j)*X2**j)[:,None] 35 | output_feature_vec = np.hstack((output_feature_vec,new_feature)) 36 | 37 | return output_feature_vec 38 | 39 | 40 | def costFunctionReg(theta,X,y,reg_param): 41 | m = len(y) 42 | J =((np.sum(-y*np.log(sigmoid(np.dot(X,theta)))- 43 | (1-y)*(np.log(1-sigmoid(np.dot(X,theta))))))/m + 44 | (reg_param/m)*np.sum(theta**2) 45 | # Non-regularized 46 | grad_0 = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m) 47 | # Regularized 48 | grad_reg = grad_0 + (reg_param/m)*theta 49 | grad_reg[0] = grad_0[0] 50 | 51 | return J 52 | 53 | 54 | def plotDecisionBoundary(theta,X,y): 55 | fig, ax = plotData(X[:,1:],y) 56 | 57 | """ 58 | if len(X[0]<=3): 59 | # Choose two endpoints and plot the line between them 60 | plot_x = np.array([min(X[:,1])-2,max(X[:,2])+2]) 61 | # Calculate the decision boundary line 62 | # Add boundary and adjust axes 63 | ax.plot(plot_x,plot_y) 64 | ax.legend(['Admitted','Fail','Pass']) 65 | ax.set_xbound(30,100) 66 | ax.set_ybound(30,100) 67 | else: 68 | """ 69 | 70 | u = np.linspace(-1,1.5,50) 71 | v = np.linspace(-1,1.5,50) 72 | z = np.zeros((len(u),len(v))) 73 | for i in range(len(u)): 74 | for j in range(len(v)): 75 | z[i][j] = np.dot(mapFeatureVector(np.array([u[i]]), 76 | np.array([v[j]])),theta) 77 | 78 | ax.contour(u,v,z,levels=[0]) 79 | 80 | return (fig,ax) 81 | 82 | ## Load Data 83 | data = pd.read_csv('ex2data2.txt', names=['x1','x2','y']) 84 | X = np.asarray(data[["x1","x2"]]) 85 | y = np.asarray(data["y"]) 86 | fig, ax = plotData(X, y) 87 | 88 | ax.legend(['Pass', 'Fail']) 89 | 90 | # Labels 91 | ax.set_xlabel('Microchip test 1') 92 | ax.set_ylabel('Microchip test 2') 93 | fig.show() 94 | 95 | input('\nProgram paused. Press enter to continue.\n') 96 | 97 | ## Part 1 -- Regularized Logistic Regression 98 | X = mapFeatureVector(X[:,0],X[:,1]) 99 | initial_theta = np.zeros(len(X[0,:])) 100 | 101 | # Set regularization parameter to 1 102 | reg_param = 1.0 103 | 104 | # Optimize for theta letting python choose method 105 | res = minimize(costFunctionReg, 106 | initial_theta, 107 | args=(X,y,reg_param), 108 | tol=1e-6, 109 | options={'maxiter':400, 110 | 'disp':True}) 111 | 112 | 113 | theta = res.x 114 | fig.clear() 115 | fig, ax = plotDecisionBoundary(theta,X,y) 116 | 117 | ax.legend(['Pass', 'Fail','Decision Boundary']) 118 | 119 | # Labels 120 | ax.set_xlabel('Microchip test 1') 121 | ax.set_ylabel('Microchip test 2') 122 | ax.set_title('Lambda = 1') 123 | 124 | fig.show() 125 | 126 | input('\nProgram paused. Press enter to continue.\n') 127 | -------------------------------------------------------------------------------- /ex2/i.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ex2/token.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex2/token.mat -------------------------------------------------------------------------------- /ex3/ex3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex3/ex3.pdf -------------------------------------------------------------------------------- /ex3/ex3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from scipy.optimize import minimize 4 | import multiprocessing as mp 5 | import pandas as pd 6 | import scipy.io 7 | 8 | def displayData(X): 9 | fig, ax = plt.subplots(10,10,sharex=True,sharey=True) 10 | img_num = 0 11 | for i in range(10): 12 | for j in range(10): 13 | # Convert column vector into 20x20 pixel matrix 14 | # You have to transpose to display correctly 15 | img = X[img_num,:].reshape(20,20).T 16 | ax[i][j].imshow(img,cmap='gray') 17 | img_num += 1 18 | 19 | return (fig, ax) 20 | 21 | def displayImage(im): 22 | """ 23 | Displays a single image stored as a column vector 24 | """ 25 | fig2, ax2 = plt.subplots() 26 | image = im.reshape(20,20).T 27 | ax2.imshow(image,cmap='gray') 28 | return (fig2, ax2) 29 | 30 | def sigmoid(z): 31 | return 1.0/(1 + np.e**(-z)) 32 | 33 | def lrCostFunction(theta,X,y,reg_param): 34 | m = len(y) 35 | J =((np.sum(-y*np.log(sigmoid(np.dot(X,theta)))- 36 | (1-y)*(np.log(1-sigmoid(np.dot(X,theta))))))/m + 37 | (reg_param/m)*np.sum(theta**2)) 38 | # Gradient 39 | # Non-regularized 40 | grad_0 = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m) 41 | # Regularized 42 | grad_reg = grad_0 + (reg_param/m)*theta 43 | grad_reg[0] = grad_0[0] 44 | return (J,grad_reg) 45 | 46 | def oneVsAll(X, y, num_labels, reg_param): 47 | #Calculates parameters 48 | n = np.size(X,1) 49 | theta = np.zeros((n,num_labels)) 50 | # Function to find parameters for single logit 51 | def findOptParam(p_num): 52 | outcome = np.array(y == p_num).astype(int) 53 | initial_theta = theta[:,p_num] 54 | results = minimize(lrCostFunction, 55 | initial_theta, 56 | method='Newton-CG', 57 | args=(X,outcome,reg_param), 58 | jac=True, 59 | tol=1e-6, 60 | options={'maxiter':400, 61 | 'disp':True}) 62 | theta[:,p_num] = results.x 63 | 64 | 65 | for digit in range(10): 66 | findOptParam(digit) 67 | 68 | return theta 69 | 70 | 71 | def predictOneVsAllAccuracy(est_theta,X): 72 | """ 73 | classifies each observation by using the 74 | highest predicted probability from possible classifications. 75 | """ 76 | 77 | probs = np.dot(X,est_theta) 78 | predict = np.argmax(probs,axis=1) 79 | 80 | return predict 81 | 82 | 83 | def predict(theta1,theta2,X): 84 | m = len(X) 85 | if np.ndim(X) == 1: 86 | X = X.reshape((-1,1)) #one dimensional or not 87 | D1 = np.hstack((np.ones((m,1)),X)) 88 | 89 | #hidden layer from theta1 parameters 90 | hidden_pred = np.dot(D1,theta1.T) # (5000 x 401) x (401 x 25) = 5000 x 25 91 | ones = np.ones((len(hidden_pred),1)) # 5000 x 1 92 | hidden_pred = sigmoid(hidden_pred) 93 | hidden_pred = np.hstack((ones,hidden_pred)) # 5000 x 26 94 | 95 | #output layer from new design matrix 96 | output_pred = np.dot(hidden_pred,theta2.T) # (5000 x 26) x (26 x 10) 97 | output_pred = sigmoid(output_pred) 98 | # Get predictions 99 | p = np.argmax(output_pred,axis=1) 100 | 101 | return p 102 | #parameters 103 | input_layer_size = 400 104 | num_labels = 10 105 | 106 | print("Loading training data...") 107 | raw_mat = scipy.io.loadmat("ex3data1.mat") 108 | X = raw_mat.get("X") 109 | y = raw_mat.get("y").flatten() 110 | y[y== 10] = 0 111 | 112 | m = np.hstack((np.ones((len(y),1)),X))# add column of ones 113 | # Randomly select 100 datapoints to display 114 | rand_indices = np.random.randint(0,len(m),100) 115 | sel = X[rand_indices,:] 116 | 117 | # Display 118 | digit_grid, ax = displayData(sel) 119 | digit_grid.show() 120 | 121 | input("Program paused, press enter to continue...") 122 | 123 | # Vectorize Logistic Regression 124 | reg_param = 1.0 125 | theta = oneVsAll(m,y,10,reg_param) 126 | 127 | predictions = predictOneVsAllAccuracy(theta,m) 128 | accuracy = np.mean(y == predictions) * 100 129 | print("Training Accuracy with logit: ", accuracy, "%") 130 | input("Program pauses, press enter to continue...") 131 | 132 | #Neural Network 133 | # Load pre-estimated weights 134 | print("Loading saved neural networks parameters...") 135 | raw_params = scipy.io.loadmat("ex3weights.mat") 136 | theta1 = raw_params.get("Theta1") # 25 x 401 137 | theta2 = raw_params.get("Theta2") # 10 x 26 138 | # Parameters in theta1,theta2 are based on 1 indexing. add 1 and take the mod w.r.t. 10 so 10s become zeros and everything else gets bumped up one. 139 | 140 | predictions = (predict(theta1,theta2,X) + 1) % 10 141 | accuracy = np.mean(y == predictions) * 100 142 | print("Training Accuracy with neural network: ", accuracy, "%") 143 | -------------------------------------------------------------------------------- /ex3/ex3data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex3/ex3data1.mat -------------------------------------------------------------------------------- /ex3/ex3weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex3/ex3weights.mat -------------------------------------------------------------------------------- /ex4/ex4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex4/ex4.pdf -------------------------------------------------------------------------------- /ex4/ex4.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy.optimize import minimize 4 | import scipy.io 5 | 6 | 7 | def displayData(X): 8 | 9 | num_plots = int(np.size(X,0)**.5) 10 | fig, ax = plt.subplots(num_plots,num_plots,sharex=True,sharey=True) 11 | img_num = 0 12 | for i in range(num_plots): 13 | for j in range(num_plots): 14 | # Convert column vector into 20x20 pixel matrix 15 | # transpose 16 | img = X[img_num,:].reshape(20,20).T 17 | ax[i][j].imshow(img,cmap='gray') 18 | img_num += 1 19 | 20 | return (fig, ax) 21 | 22 | def displayImage(im): 23 | fig2, ax2 = plt.subplots() 24 | image = im.reshape(20,20).T 25 | ax2.imshow(image,cmap='gray') 26 | return (fig2, ax2) 27 | 28 | def sigmoid(z): 29 | return 1.0/(1 + np.e**(-z)) 30 | 31 | def sigmoidGradient(z): 32 | return sigmoid(z)*(1-sigmoid(z)) 33 | 34 | def predict(theta1,theta2,X): 35 | m = len(X) 36 | 37 | if np.ndim(X) == 1: 38 | X = X.reshape((-1,1)) 39 | 40 | D1 = np.hstack((np.ones((m,1)),X))# add column of ones 41 | 42 | # hidden layer 43 | hidden_pred = np.dot(D1,theta1.T) # (5000 x 401) x (401 x 25) = 5000 x 25 44 | 45 | # Add column of ones 46 | ones = np.ones((len(hidden_pred),1)) # 5000 x 1 47 | hidden_pred = sigmoid(hidden_pred) 48 | hidden_pred = np.hstack((ones,hidden_pred)) # 5000 x 26 49 | 50 | #output layer 51 | output_pred = np.dot(hidden_pred,theta2.T) # (5000 x 26) x (26 x 10) 52 | output_pred = sigmoid(output_pred) 53 | # Get predictions 54 | p = np.argmax(output_pred,axis=1) 55 | 56 | return p 57 | 58 | def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, 59 | X,y,reg_param): 60 | """ 61 | Computes loss using sum of square errors for a neural network 62 | using theta as the parameter vector for linear regression to fit 63 | the data points in X and y with penalty reg_param. 64 | """ 65 | m = len(y) 66 | # Reshape nn_params back into neural network 67 | theta1 = nn_params[:(hidden_layer_size * 68 | (input_layer_size + 1))].reshape((hidden_layer_size, 69 | input_layer_size +1)) 1)) 70 | 71 | theta2 = nn_params[-((hidden_layer_size + 1) * 72 | num_labels):].reshape((num_labels, 73 | hidden_layer_size + 1)) 74 | 75 | # Turn scalar y values into a matrix of binary 76 | init_y = np.zeros((m,num_labels)) # 5000 x 10 77 | 78 | for i in range(m): 79 | init_y[i][y[i]] = 1 80 | 81 | # Add column of ones to X 82 | ones = np.ones((m,1)) 83 | d = np.hstack((ones,X))# add column of ones 84 | 85 | # forward propogation with theta1 and theta2 86 | cost = [0]*m 87 | # Initalize gradient vector 88 | D1 = np.zeros_like(theta1) 89 | D2 = np.zeros_like(theta2) 90 | for i in range(m): 91 | 92 | a1 = d[i][:,None] # 401 x 1 93 | z2 = np.dot(theta1,a1) # 25 x 1 94 | a2 = sigmoid(z2) # 25 x 1 95 | a2 = np.vstack((np.ones(1),a2)) # 26 x 1 96 | z3 = np.dot(theta2,a2) #10 x 1 97 | h = sigmoid(z3) # 10 x 1 98 | a3 = h # 10 x 1 99 | cost[i] = (np.sum((-init_y[i][:,None])*(np.log(h)) - 100 | (1-init_y[i][:,None])*(np.log(1-h))))/m 101 | 102 | # Calculate Gradient 103 | d3 = a3 - init_y[i][:,None] 104 | d2 = np.dot(theta2.T,d3)[1:]*(sigmoidGradient(z2)) 105 | 106 | # Accumulate errors for gradient calculation 107 | D1 = D1 + np.dot(d2,a1.T) # 25 x 401 (matches theta0) 108 | D2 = D2 + np.dot(d3,a2.T) # 10 x 26 (matches theta1) 109 | 110 | # regularization 111 | reg = (reg_param/(2*m))*((np.sum(theta1[:,1:]**2)) + 112 | (np.sum(theta2[:,1:]**2))) 113 | 114 | # Compute final gradient with regularization 115 | grad1 = (1.0/m)*D1 + (reg_param/m)*theta1 116 | grad1[0] = grad1[0] - (reg_param/m)*theta1[0] 117 | 118 | grad2 = (1.0/m)*D2 + (reg_param/m)*theta2 119 | grad2[0] = grad2[0] - (reg_param/m)*theta2[0] 120 | 121 | # Append and unroll gradient 122 | grad = np.append(grad1,grad2).reshape(-1) 123 | final_cost = sum(cost) + reg 124 | 125 | return (final_cost, grad) 126 | 127 | 128 | 129 | def randInitializeWeights(L_in,L_out): 130 | """ 131 | Randomly initalize the weights of a layer with L_in incoming 132 | connections and L_out outgoing connections. Avoids symmetry 133 | problems when training the neural network. 134 | """ 135 | randWeights = np.random.uniform(low=-.12,high=.12, 136 | size=(L_in,L_out)) 137 | return randWeights 138 | 139 | def debugInitializeWeights(fan_in, fan_out): 140 | """ 141 | Initializes the weights of a layer with fan_in incoming connections and 142 | fan_out outgoing connections using a fixed set of values. 143 | """ 144 | 145 | # Set W to zero matrix 146 | W = np.zeros((fan_out,fan_in + 1)) 147 | 148 | # Initialize W using "sin". This ensures that W is always of the same 149 | # values and will be useful in debugging. 150 | W = np.array([np.sin(w) for w in 151 | range(np.size(W))]).reshape((np.size(W,0),np.size(W,1))) 152 | 153 | return W 154 | 155 | def computeNumericalGradient(J,theta): 156 | """ 157 | Computes the gradient of J around theta using finite differences and 158 | yields a numerical estimate of the gradient. 159 | """ 160 | 161 | numgrad = np.zeros_like(theta) 162 | perturb = np.zeros_like(theta) 163 | tol = 1e-4 164 | 165 | for p in range(len(theta)): 166 | # Set perturbation vector 167 | perturb[p] = tol 168 | loss1 = J(theta - perturb) 169 | loss2 = J(theta + perturb) 170 | 171 | # Compute numerical gradient 172 | numgrad[p] = (loss2 - loss1)/(2 * tol) 173 | perturb[p] = 0 174 | 175 | 176 | return numgrad 177 | 178 | def checkNNGradients(reg_param): 179 | """ 180 | Creates a small neural network to check the back propogation gradients. 181 | Outputs the analytical gradients produced by the back prop code and the 182 | numerical gradients computed using the computeNumericalGradient function. 183 | These should result in very similar values. 184 | """ 185 | # Set up small NN 186 | input_layer_size = 3 187 | hidden_layer_size = 5 188 | num_labels = 3 189 | m = 5 190 | 191 | # Generate some random test data 192 | Theta1 = debugInitializeWeights(hidden_layer_size,input_layer_size) 193 | Theta2 = debugInitializeWeights(num_labels,hidden_layer_size) 194 | 195 | # Reusing debugInitializeWeights to get random X 196 | X = debugInitializeWeights(input_layer_size - 1, m) 197 | 198 | # Set each element of y to be in [0,num_labels] 199 | y = [(i % num_labels) for i in range(m)] 200 | 201 | # Unroll parameters 202 | nn_params = np.append(Theta1,Theta2).reshape(-1) 203 | 204 | # Compute Cost 205 | cost, grad = nnCostFunction(nn_params, 206 | input_layer_size, 207 | hidden_layer_size, 208 | num_labels, 209 | X, y, reg_param) 210 | 211 | def reduced_cost_func(p): 212 | """ Cheaply decorated nnCostFunction """ 213 | return nnCostFunction(p,input_layer_size,hidden_layer_size,num_labels, 214 | X,y,reg_param)[0] 215 | 216 | numgrad = computeNumericalGradient(reduced_cost_func,nn_params) 217 | 218 | # Check two gradients 219 | np.testing.assert_almost_equal(grad, numgrad) 220 | 221 | return 222 | 223 | 224 | input_layer_size = 400 225 | hidden_layer_size = 25 226 | num_labels = 10 227 | 228 | print("Loading training data...") 229 | 230 | raw_mat = scipy.io.loadmat("ex4data1.mat") 231 | X = raw_mat.get("X") 232 | y = raw_mat.get("y").flatten() 233 | y = (y - 1) % 10 # ex3 way of converting MATLAB 1-indexing 234 | 235 | # Randomly select 100 datapoints to display 236 | rand_indices = np.random.randint(0,len(X),100) 237 | sel = X[rand_indices,:] 238 | 239 | digit_grid, ax = displayData(sel) 240 | digit_grid.show() 241 | 242 | print("Loading neural network parameters \n") 243 | 244 | raw_params = scipy.io.loadmat("ex4weights.mat") 245 | theta1 = raw_params.get("Theta1") # 25 x 401 246 | theta2 = raw_params.get("Theta2") # 10 x 26 247 | 248 | # Unroll Parameters 249 | nn_params = np.append(theta1,theta2).reshape(-1) 250 | 251 | print("Checking cost function without regularization...") 252 | reg_param = 0.0 253 | cost, g = nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels, 254 | X,y,reg_param) 255 | 256 | # Test for correct cost 257 | np.testing.assert_almost_equal(0.287629,cost,decimal=6, err_msg="Cost incorrect.") 258 | 259 | # Regularized 260 | print("Checking cost function with regularization...") 261 | reg_param = 1.0 262 | reg_cost, g = nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels, 263 | X,y,reg_param) 264 | np.testing.assert_almost_equal(0.383770,reg_cost,decimal=6, 265 | err_msg="Regularized Cost incorrect.") 266 | 267 | print("Checking sigmoid gradient...") 268 | vals = np.array([1,-0.5,0,0.5,1]) 269 | g = sigmoidGradient(vals) 270 | np.testing.assert_almost_equal(0.25, g[2],decimal=2, err_msg="Sigmoid function incorrect") 271 | 272 | # Initialize neural network parameters 273 | print("Initializing neural network parameters...") 274 | initial_theta1 = randInitializeWeights(input_layer_size+1,hidden_layer_size) 275 | initial_theta2 = randInitializeWeights(hidden_layer_size+1,num_labels) 276 | 277 | # Unroll 278 | initial_nn_params = np.append(initial_theta1,initial_theta2).reshape(-1) 279 | 280 | reg_param = 0.0 281 | initial_cost, g = nnCostFunction(initial_nn_params,input_layer_size, 282 | hidden_layer_size,num_labels,X,y,reg_param) 283 | 284 | print("The initial cost after random initialization: ", initial_cost) 285 | 286 | # Check gradients 287 | checkNNGradients(0) 288 | 289 | # TO FIX: Gradient checking with non-zero regularization parameter fails ? 290 | # Implement Regularization 291 | # punisher = 3.0 292 | # checkNNGradients(punisher) 293 | 294 | # # Debugging value of the cost function 295 | # reg_param = 10 296 | # debug_J = nnCostFunction(initial_nn_params,input_layer_size, 297 | # hidden_layer_size,num_labels,X,y,reg_param)[0] 298 | # np.testing.assert_almost_equal(debug_J, 0.576051) 299 | 300 | 301 | # Train NN Parameters 302 | reg_param = 3.0 303 | def reduced_cost_func(p): 304 | 305 | return nnCostFunction(p,input_layer_size,hidden_layer_size,num_labels, 306 | X,y,reg_param) 307 | 308 | results = minimize(reduced_cost_func, 309 | initial_nn_params, 310 | method="CG", 311 | jac=True, 312 | options={'maxiter':50, "disp":True}) 313 | 314 | fitted_params = results.x 315 | # Reshape fitted_params back into neural network 316 | theta1 = fitted_params[:(hidden_layer_size * 317 | (input_layer_size + 1))].reshape((hidden_layer_size, 318 | input_layer_size + 1)) 319 | 320 | theta2 = fitted_params[-((hidden_layer_size + 1) * 321 | num_labels):].reshape((num_labels, 322 | hidden_layer_size + 1)) 323 | 324 | predictions = predict(theta1, theta2, X) 325 | accuracy = np.mean(y == predictions) * 100 326 | print("Training Accuracy with neural network: ", accuracy, "%") 327 | 328 | # Display the hidden layer 329 | digit_grid, ax = displayData(theta1[:,1:]) 330 | digit_grid.show() 331 | 332 | 333 | 334 | 335 | 336 | -------------------------------------------------------------------------------- /ex4/ex4data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex4/ex4data1.mat -------------------------------------------------------------------------------- /ex4/ex4weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex4/ex4weights.mat -------------------------------------------------------------------------------- /ex4/token.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex4/token.mat -------------------------------------------------------------------------------- /ex5/ex5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex5/ex5.pdf -------------------------------------------------------------------------------- /ex5/ex5.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import scipy.io #Used to load the OCTAVE *.mat files 4 | import scipy.optimize 5 | 6 | def plotData(): 7 | plt.figure(figsize=(8,5)) 8 | plt.ylabel('Water flowing out of the dam (y)') 9 | plt.xlabel('Change in water level (x)') 10 | plt.plot(X[:,1],y,'rx') 11 | plt.grid(True) 12 | 13 | datafile = 'data/ex5data1.mat' 14 | mat = scipy.io.loadmat( datafile ) 15 | X = mat.get("X") 16 | y = mat.get("y") 17 | ytest = mat.get("ytest") 18 | yval = mat.get("yval") 19 | Xtest = mat.get("Xtest") 20 | Xval = mat.get("Xval") 21 | 22 | def linearRegCostFunction(init_theta,X,y,reg): 23 | m = len(y) 24 | # Make theta 2-d to get cost and gradient 25 | theta = init_theta[:,None] 26 | cost = ((np.sum((np.dot(X,theta) - y)**2))/(2*m) + 27 | (reg/(2*m))*np.sum(theta**2)) 28 | 29 | # Regularized 30 | gradients = ((np.sum((np.dot(X,theta)-y)*X,axis=0)/m)+ 31 | (reg/m)*theta.T) 32 | 33 | # Replace gradient for theta_0 with non-regularized 34 | gradients[0] = gradients[0] - (reg/m)*theta.T 35 | gradients = gradients.flatten() 36 | 37 | return (cost, gradients) 38 | 39 | def trainLinearReg(X,y,reg): 40 | initial_theta = np.zeros(np.size(X,1)) 41 | 42 | res = minimize(linearRegCostFunction, 43 | initial_theta, 44 | args=(X,y,reg), 45 | jac=True, 46 | options={'maxiter':400,'disp':True}) 47 | 48 | return res.x 49 | 50 | def learningCurve(X,y,Xval,yval,reg): 51 | m_train = len(X) 52 | m_val = len(Xval) 53 | error_train = np.zeros(m_train) 54 | error_val = np.zeros(m_train) 55 | for i in range(1,m_train+1): 56 | est_theta = trainLinearReg(X[0:i],y[0:i],reg) 57 | error_train[i-1] = (np.sum((np.dot(X[0:i],est_theta)-y[0:i])**2))/(2.0*i) 58 | error_val[i-1] = (np.sum((np.dot(Xval,est_theta)-yval)**2))/(2.0*m_val) 59 | 60 | return (error_train, error_val) 61 | 62 | def polyFeatures(X,p): 63 | X_poly = np.zeros((len(X),p)) 64 | X = X.flatten() 65 | for i in range(1,p+1): 66 | X_poly[:,i-1] = X**i 67 | return X_poly 68 | 69 | def featureNormalize(X): 70 | mu = np.mean(X,axis=0) 71 | sigma = np.std(X,axis=0) 72 | normalized_X = np.divide(X - mu,sigma) 73 | 74 | return (normalized_X, mu, sigma) 75 | 76 | def plotFit(min_x, max_x, mu, sigma, theta, p): 77 | x = np.arange(min_x - 15, max_x + 25, 0.05).reshape((-1,1)) 78 | X_poly = polyFeatures(x,p) 79 | X_poly = np.divide(X_poly - mu, sigma) 80 | X_poly = np.hstack((np.ones(len(X_poly)).reshape((-1,1)),X_poly)) 81 | plt.plot(x,np.dot(X_poly,theta),'b--',linewidth=2) 82 | return 83 | 84 | def validationCurve(X, y, Xval, yval): 85 | lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]).reshape((-1,1)) 86 | error_train = np.zeros(len(lambda_vec)) 87 | error_val = np.zeros(len(lambda_vec)) 88 | 89 | for i in range(len(lambda_vec)): 90 | m_train = len(X) 91 | m_val = len(Xval) 92 | reg_param = lambda_vec[i] 93 | est_theta = trainLinearReg(X,y,reg_param) 94 | error_train[i] = (np.sum((np.dot(X,est_theta)-y)**2))/(2.0*m_train) 95 | error_val[i] = (np.sum((np.dot(Xval,est_theta)-yval)**2))/(2.0*m_val) 96 | 97 | return (lambda_vec, error_train, error_val) 98 | 99 | 100 | 101 | 102 | plt.plot(X, y, 'rx', markersize=10, linewidth=1.5) 103 | plt.xlabel('Change in water level (x)') 104 | plt.ylabel('Water flowing out of the dam (y)') 105 | plt.show() 106 | 107 | # Regularized Cost 108 | full_X = np.hstack((np.ones_like(y), X)) 109 | theta = np.array([1,1]) 110 | J, g = linearRegCostFunction(theta,full_X,y,0.0) 111 | 112 | # Reguliarized Gradient 113 | J, g = linearRegCostFunction(theta,full_X,y,1.0) 114 | 115 | # Train Linear Regression 116 | reg_param = 0 117 | est_theta = trainLinearReg(full_X,y,reg_param) 118 | 119 | # Plot linear fit 120 | plt.plot(X, y, 'rx', markersize=10, linewidth=1.5) 121 | plt.plot(X,np.dot(full_X,est_theta),'b-',linewidth=2) 122 | plt.xlabel('Change in water level (x)') 123 | plt.ylabel('Water flowing out of the dam (y)') 124 | plt.show() 125 | 126 | # Learning Curve 127 | reg_param = 0.0 128 | full_Xval = np.hstack((np.ones_like(yval),Xval)) 129 | error_train, error_val = learningCurve(full_X,y,full_Xval,yval,reg_param) 130 | 131 | plt.plot(range(len(X)), error_train, range(len(X)), error_val); 132 | plt.title('Learning curve for linear regression') 133 | plt.legend(['Train', 'Cross Validation']) 134 | plt.xlabel('Number of training examples') 135 | plt.ylabel('Error') 136 | plt.show() 137 | 138 | # Feature Mapping for Polynomial Regression 139 | p = 8 140 | X_poly = polyFeatures(X,p) 141 | X_poly, mu, sigma = featureNormalize(X_poly) 142 | X_poly = np.hstack((np.ones_like(y),X_poly)) 143 | 144 | X_poly_test = polyFeatures(Xtest,p) 145 | X_poly_test = np.divide(X_poly_test - mu, sigma) 146 | X_poly_test = np.hstack((np.ones_like(ytest),X_poly_test)) 147 | 148 | X_poly_val = polyFeatures(Xval,p) 149 | X_poly_val = np.divide(X_poly_val - mu, sigma) 150 | X_poly_val = np.hstack((np.ones_like(yval),X_poly_val)) 151 | 152 | #Learning Curve for Polynomial Regression 153 | reg_param = 1.0 154 | est_theta = trainLinearReg(X_poly,y,reg_param) 155 | plt.plot(X, y, 'rx', markersize=10, linewidth=1.5) 156 | plotFit(np.min(X), np.max(X), mu, sigma, est_theta, p) 157 | plt.xlabel('Change in water level (x)') 158 | plt.ylabel('Water flowing out of the dam (y)') 159 | plt.show() 160 | 161 | error_train, error_val = learningCurve(X_poly,y,X_poly_val,yval,reg_param) 162 | 163 | plt.plot(range(len(X)), error_train, range(len(X)), error_val); 164 | plt.title('Learning curve for linear regression') 165 | plt.legend(['Train', 'Cross Validation']) 166 | plt.xlabel('Number of training examples') 167 | plt.ylabel('Error') 168 | plt.show() 169 | 170 | # Validation for selecting lambda 171 | lambda_vec, error_train, error_val = validationCurve(full_X,y,full_Xval,yval) 172 | 173 | plt.plot(lambda_vec, error_train, lambda_vec, error_val); 174 | plt.title('Selecting \lambda using a cross validation set') 175 | plt.legend(['Train', 'Cross Validation']) 176 | plt.xlabel('lambda') 177 | plt.ylabel('Error') 178 | plt.show() 179 | -------------------------------------------------------------------------------- /ex5/ex5data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex5/ex5data1.mat -------------------------------------------------------------------------------- /ex5/token.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex5/token.mat -------------------------------------------------------------------------------- /ex6/emailSample1.txt: -------------------------------------------------------------------------------- 1 | > Anyone knows how much it costs to host a web portal ? 2 | > 3 | Well, it depends on how many visitors you're expecting. 4 | This can be anywhere from less than 10 bucks a month to a couple of $100. 5 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 6 | if youre running something big.. 7 | 8 | To unsubscribe yourself from this mailing list, send an email to: 9 | groupname-unsubscribe@egroups.com 10 | 11 | -------------------------------------------------------------------------------- /ex6/emailSample2.txt: -------------------------------------------------------------------------------- 1 | Folks, 2 | 3 | my first time posting - have a bit of Unix experience, but am new to Linux. 4 | 5 | 6 | Just got a new PC at home - Dell box with Windows XP. Added a second hard disk 7 | for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went 8 | fine except it didn't pick up my monitor. 9 | 10 | I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4 11 | Ti4200 video card, both of which are probably too new to feature in Suse's default 12 | set. I downloaded a driver from the nVidia website and installed it using RPM. 13 | Then I ran Sax2 (as was recommended in some postings I found on the net), but 14 | it still doesn't feature my video card in the available list. What next? 15 | 16 | Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice, 17 | the whole machine crashes (in Linux, not Windows) - even the on/off switch is 18 | inactive, leaving me to reach for the power cable instead. 19 | 20 | If anyone can help me in any way with these probs., I'd be really grateful - 21 | I've searched the 'net but have run out of ideas. 22 | 23 | Or should I be going for a different version of Linux such as RedHat? Opinions 24 | welcome. 25 | 26 | Thanks a lot, 27 | Peter 28 | 29 | -- 30 | Irish Linux Users' Group: ilug@linux.ie 31 | http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information. 32 | List maintainer: listmaster@linux.ie 33 | 34 | 35 | -------------------------------------------------------------------------------- /ex6/ex6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/ex6.pdf -------------------------------------------------------------------------------- /ex6/ex6data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/ex6data1.mat -------------------------------------------------------------------------------- /ex6/ex6data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/ex6data2.mat -------------------------------------------------------------------------------- /ex6/ex6data3.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/ex6data3.mat -------------------------------------------------------------------------------- /ex6/ex6spam.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import scipy.io 4 | from sklearn import svm 5 | import re 6 | import nltk, nltk.stem.porter 7 | 8 | print ("emailSample1.txt:") 9 | !cat data/emailSample1.txt 10 | 11 | """ 12 | Anyone knows how much it costs to host a web portal ? 13 | > 14 | Well, it depends on how many visitors you're expecting. 15 | This can be anywhere from less than 10 bucks a month to a couple of $100. 16 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 17 | if youre running something big.. 18 | 19 | To unsubscribe yourself from this mailing list, send an email to: 20 | groupname-unsubscribe@egroups.com 21 | """ 22 | 23 | def preProcess( email ): 24 | email = email.lower() 25 | # Strip html tags. replace with a space 26 | email = re.sub('<[^<>]+>', ' ', email); 27 | #Any numbers get replaced with the string 'number' 28 | email = re.sub('[0-9]+', 'number', email) 29 | #Anything starting with http or https:// replaced with 'httpaddr' 30 | email = re.sub('(http|https)://[^\s]*', 'httpaddr', email) 31 | #Strings with "@" in the middle are considered emails --> 'emailaddr' 32 | email = re.sub('[^\s]+@[^\s]+', 'emailaddr', email); 33 | #The '$' sign gets replaced with 'dollar' 34 | email = re.sub('[$]+', 'dollar', email); 35 | return email 36 | 37 | 38 | def email2TokenList( raw_email ): 39 | """ 40 | Function that takes in preprocessed (simplified) email, tokenizes it, 41 | stems each word, and returns an (ordered) list of tokens in the e-mail 42 | """ 43 | 44 | stemmer = nltk.stem.porter.PorterStemmer() 45 | email = preProcess( raw_email ) 46 | 47 | #Split the e-mail into individual words (tokens) (split by the delimiter ' ') 48 | #Splitting by many delimiters is easiest with re.split() 49 | tokens = re.split('[ \@\$\/\#\.\-\:\&\*\+\=\[\]\?\!\(\)\{\}\,\'\"\>\_\<\;\%]', email) 50 | 51 | #Loop over each token and use a stemmer to shorten it, check if the word is in the vocab_list... if it is, store index 52 | tokenlist = [] 53 | for token in tokens: 54 | 55 | token = re.sub('[^a-zA-Z0-9]', '', token); 56 | stemmed = stemmer.stem( token ) 57 | #Throw out empty tokens 58 | if not len(token): continue 59 | #Store a list of all unique stemmed words 60 | tokenlist.append(stemmed) 61 | 62 | return tokenlist 63 | 64 | def getVocabDict(reverse=False): 65 | """ 66 | Function to read in the supplied vocab list text file into a dictionary 67 | Dictionary key is the stemmed word, value is the index in the text file 68 | If "reverse", the keys and values are switched. 69 | """ 70 | vocab_dict = {} 71 | with open("data/vocab.txt") as f: 72 | for line in f: 73 | (val, key) = line.split() 74 | if not reverse: 75 | vocab_dict[key] = int(val) 76 | else: 77 | vocab_dict[int(val)] = key 78 | 79 | return vocab_dict 80 | 81 | 82 | def email2VocabIndices( raw_email, vocab_dict ): 83 | #returns a list of indices corresponding to the location in vocab_dict for each stemmed word 84 | tokenlist = email2TokenList( raw_email ) 85 | index_list = [ vocab_dict[token] for token in tokenlist if token in vocab_dict ] 86 | return index_list 87 | 88 | #feature extraction 89 | 90 | def email2FeatureVector( raw_email, vocab_dict ): 91 | # returns a vector of shape(n,1) where n is the size of the vocab_dict. 92 | #he first element in this vector is 1 if the vocab word with index == 1 is in raw_email, else 0 93 | n = len(vocab_dict) 94 | result = np.zeros((n,1)) 95 | vocab_indices = email2VocabIndices( email_contents, vocab_dict ) 96 | for idx in vocab_indices: 97 | result[idx] = 1 98 | return result 99 | 100 | # the feature vector has length 1899 and 45 non-zero entries." 101 | 102 | vocab_dict = getVocabDict() 103 | email_contents = open( 'data/emailSample1.txt', 'r' ).read() 104 | test_fv = email2FeatureVector( email_contents, vocab_dict ) 105 | 106 | print "Length of feature vector is %d" % len(test_fv) 107 | print "Number of non-zero entries is: %d" % sum(test_fv==1) 108 | 109 | 110 | 111 | #svm for spam classification 112 | datafile = 'data/spamTrain.mat' 113 | mat = scipy.io.loadmat( datafile ) 114 | X, y = mat['X'], mat['y'] 115 | # Test set 116 | datafile = 'data/spamTest.mat' 117 | mat = scipy.io.loadmat( datafile ) 118 | Xtest, ytest = mat['Xtest'], mat['ytest'] 119 | pos = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 1]) 120 | neg = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 0]) 121 | print 'Total number of training emails = ',X.shape[0] 122 | print 'Number of training spam emails = ',pos.shape[0] 123 | print 'Number of training nonspam emails = ',neg.shape[0] 124 | 125 | # First we make an instance of an SVM with C=0.1 and 'linear' kernel 126 | linear_svm = svm.SVC(C=0.1, kernel='linear') 127 | 128 | # Now we fit the SVM to our X matrix, given the labels y 129 | linear_svm.fit( X, y.flatten() ) 130 | 131 | 132 | # training accuracy of about 99.8% and a test accuracy of about 98.5%" 133 | 134 | train_predictions = linear_svm.predict(X).reshape((y.shape[0],1)) 135 | train_acc = 100. * float(sum(train_predictions == y))/y.shape[0] 136 | print 'Training accuracy = %0.2f%%' % train_acc 137 | 138 | test_predictions = linear_svm.predict(Xtest).reshape((ytest.shape[0],1)) 139 | test_acc = 100. * float(sum(test_predictions == ytest))/ytest.shape[0] 140 | print 'Test set accuracy = %0.2f%%' % test_acc 141 | 142 | # Determine the words most likely to indicate an e-mail is a spam 143 | # From the trained SVM we can get a list of the weight coefficients for each 144 | # word (technically, each word index) 145 | 146 | vocab_dict_flipped = getVocabDict(reverse=True) 147 | 148 | #Sort indicies from most important to least-important (high to low weight) 149 | sorted_indices = np.argsort( linear_svm.coef_, axis=None )[::-1] 150 | print "The 15 most important words to classify a spam e-mail are:" 151 | print [ vocab_dict_flipped[x] for x in sorted_indices[:15] ] 152 | print 153 | print "The 15 least important words to classify a spam e-mail are:" 154 | print [ vocab_dict_flipped[x] for x in sorted_indices[-15:] ] 155 | print 156 | 157 | # Most common word (mostly to debug): 158 | most_common_word = vocab_dict_flipped[sorted_indices[0]] 159 | print '# of spam containing \"%s\" = %d/%d = %0.2f%%'% \ 160 | (most_common_word, sum(pos[:,1190]),pos.shape[0], \ 161 | 100.*float(sum(pos[:,1190]))/pos.shape[0]) 162 | print '# of NON spam containing \"%s\" = %d/%d = %0.2f%%'% \ 163 | (most_common_word, sum(neg[:,1190]),neg.shape[0], \ 164 | 100.*float(sum(neg[:,1190]))/neg.shape[0]) 165 | -------------------------------------------------------------------------------- /ex6/ex6svm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import scipy.io as sio 4 | from sklearn.svm import SVC 5 | from sklearn.metrics import accuracy_score 6 | from functions import ( 7 | gaussian_kernel, dataset3_params 8 | ) 9 | 10 | #use cross validation set Xval, yval to determine best C and σ 11 | 12 | def dataset3_params(X, y, Xval, yval): 13 | C_vec = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] 14 | sigma_vec = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] 15 | scores = np.zeros((len(C_vec), len(sigma_vec))) 16 | 17 | for i in range(len(C_vec)): 18 | for j in range(len(sigma_vec)): 19 | svm = SVC(kernel='rbf', C=C_vec[i], gamma=sigma_vec[j]) 20 | svm.fit(X, y.ravel()) 21 | scores[i, j] = accuracy_score(yval, svm.predict(Xval)) 22 | 23 | max_c_index, max_s_index = np.unravel_index(scores.argmax(), scores.shape) 24 | return (C_vec[max_c_index], sigma_vec[max_s_index]) 25 | 26 | print('Loading and Visualizing Data ...\n') 27 | 28 | data = sio.loadmat('ex6data1.mat') 29 | X = data['X'] # 51 x 2 matrix 30 | y = data['y'] # 51 x 1 matrix 31 | 32 | pos = (y == 1).ravel() #flattens i.e. makes 1d array 33 | neg = (y == 0).ravel() #alternative code neg = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 0]) 34 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+') 35 | plt.scatter( 36 | X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o') 37 | plt.xlim(0, 4.5) 38 | plt.ylim(1.5, 5) 39 | plt.show() 40 | 41 | input('Program paused. Press enter to continue.\n') 42 | plt.close() 43 | print('Training Linear SVM ...\n') 44 | C = 1 #default 45 | svm = SVC(kernel='linear', C=C) 46 | svm.fit(X, y.ravel()) 47 | weights = svm.coef_[0] 48 | intercept = svm.intercept_[0] 49 | #draw svm boundary 50 | xp = np.linspace(X.min(), X.max(), 100) 51 | yp = - (weights[0] * xp + intercept) / weights[1] 52 | 53 | pos = (y == 1).ravel() 54 | neg = (y == 0).ravel() 55 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+') 56 | plt.scatter( 57 | X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o') 58 | plt.plot(xp, yp) 59 | plt.xlim(0, 4.5) 60 | plt.ylim(1.5, 5) 61 | plt.show() 62 | 63 | input('Program paused. Press enter to continue.\n') 64 | plt.close() 65 | 66 | #Gaussian Kernel 67 | print('Evaluating the Gaussian Kernel ...\n') 68 | 69 | #linalg.norm returns one of seven different matrix norms 70 | def gaussian_kernel(x1, x2, sigma): 71 | return np.exp(- (np.linalg.norm(x1 - x2) ** 2).sum() / (2 * (sigma ** 2))) 72 | 73 | 74 | x1 = np.array([1, 2, 1]) 75 | x2 = np.array([0, 4, -1]) 76 | sigma = 2 77 | sim = gaussian_kernel(x1, x2, sigma) 78 | 79 | 80 | 81 | print( 82 | 'Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = {0} :\n' 83 | .format(sigma), 84 | '\t{0:.6f}\n(for sigma = 2, this value should be about 0.324652)' 85 | .format(sim)) 86 | 87 | input('Program paused. Press enter to continue.\n') 88 | plt.close() 89 | 90 | #Visualizing Dataset 2 91 | data = sio.loadmat('ex6data2.mat') 92 | X = data['X'] # 863 x 2 matrix 93 | y = data['y'] # 863 x 1 matrix 94 | 95 | pos = (y == 1).ravel() 96 | neg = (y == 0).ravel() 97 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+') 98 | plt.scatter( 99 | X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o') 100 | plt.xlim(0, 1) 101 | plt.ylim(0.4, 1) 102 | plt.show() 103 | 104 | input('Program paused. Press enter to continue.\n') 105 | plt.close() 106 | 107 | #RBF Kernel (Dataset 2) 108 | print('Training SVM with RBF Kernel ...\n') 109 | 110 | C = 30 111 | sigma = 30 112 | 113 | svm = SVC(kernel='rbf', C=C, gamma=sigma) 114 | svm.fit(X, y.ravel()) 115 | 116 | x1 = np.linspace(X[:, 0].min(), X[:, 0].max(), 100) 117 | x2 = np.linspace(X[:, 1].min(), X[:, 1].max(), 100) 118 | x1, x2 = np.meshgrid(x1, x2) 119 | yp = svm.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape) 120 | 121 | pos = (y == 1).ravel() 122 | neg = (y == 0).ravel() 123 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+') 124 | plt.scatter( 125 | X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o') 126 | plt.xlim(0, 1) 127 | plt.ylim(0.4, 1) 128 | plt.contour(x1, x2, yp) 129 | plt.show() 130 | 131 | input('Program paused. Press enter to continue.\n') 132 | plt.close() 133 | 134 | # Visualizing Dataset 3 135 | data = sio.loadmat('ex6data3.mat') 136 | X = data['X'] # 211 x 2 matrix 137 | y = data['y'] # 211 x 1 matrix 138 | Xval = data['Xval'] # 200 x 2 matrix 139 | yval = data['yval'] # 200 x 1 matrix 140 | 141 | pos = (y == 1).ravel() 142 | neg = (y == 0).ravel() 143 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+') 144 | plt.scatter( 145 | X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o') 146 | plt.xlim(-0.6, 0.3) 147 | plt.ylim(-0.8, 0.6) 148 | plt.show() 149 | 150 | input('Program paused. Press enter to continue.\n') 151 | plt.close() 152 | 153 | # RBF Kernel (Dataset 3) 154 | C, sigma = dataset3_params(X, y, Xval, yval) 155 | 156 | svm = SVC(kernel='rbf', C=C, gamma=sigma) 157 | svm.fit(X, y.ravel()) 158 | 159 | x1 = np.linspace(X[:, 0].min(), X[:, 0].max(), 100) 160 | x2 = np.linspace(X[:, 1].min(), X[:, 1].max(), 100) 161 | x1, x2 = np.meshgrid(x1, x2) 162 | yp = svm.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape) 163 | 164 | pos = (y == 1).ravel() 165 | neg = (y == 0).ravel() 166 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+') 167 | plt.scatter( 168 | X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o') 169 | plt.xlim(-0.6, 0.3) 170 | plt.ylim(-0.8, 0.6) 171 | plt.contour(x1, x2, yp) 172 | plt.show() 173 | 174 | input('Program paused. Press enter to continue.\n') 175 | plt.close() 176 | -------------------------------------------------------------------------------- /ex6/spamSample1.txt: -------------------------------------------------------------------------------- 1 | Do You Want To Make $1000 Or More Per Week? 2 | 3 | 4 | 5 | If you are a motivated and qualified individual - I 6 | will personally demonstrate to you a system that will 7 | make you $1,000 per week or more! This is NOT mlm. 8 | 9 | 10 | 11 | Call our 24 hour pre-recorded number to get the 12 | details. 13 | 14 | 15 | 16 | 000-456-789 17 | 18 | 19 | 20 | I need people who want to make serious money. Make 21 | the call and get the facts. 22 | 23 | Invest 2 minutes in yourself now! 24 | 25 | 26 | 27 | 000-456-789 28 | 29 | 30 | 31 | Looking forward to your call and I will introduce you 32 | to people like yourself who 33 | are currently making $10,000 plus per week! 34 | 35 | 36 | 37 | 000-456-789 38 | 39 | 40 | 41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72 42 | 43 | -------------------------------------------------------------------------------- /ex6/spamSample2.txt: -------------------------------------------------------------------------------- 1 | Best Buy Viagra Generic Online 2 | 3 | Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed! 4 | 5 | We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers! 6 | http://medphysitcstech.ru 7 | 8 | 9 | -------------------------------------------------------------------------------- /ex6/spamTest.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/spamTest.mat -------------------------------------------------------------------------------- /ex6/spamTrain.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/spamTrain.mat -------------------------------------------------------------------------------- /ex6/token.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/token.mat -------------------------------------------------------------------------------- /ex6/vocab.txt: -------------------------------------------------------------------------------- 1 | 1 aa 2 | 2 ab 3 | 3 abil 4 | 4 abl 5 | 5 about 6 | 6 abov 7 | 7 absolut 8 | 8 abus 9 | 9 ac 10 | 10 accept 11 | 11 access 12 | 12 accord 13 | 13 account 14 | 14 achiev 15 | 15 acquir 16 | 16 across 17 | 17 act 18 | 18 action 19 | 19 activ 20 | 20 actual 21 | 21 ad 22 | 22 adam 23 | 23 add 24 | 24 addit 25 | 25 address 26 | 26 administr 27 | 27 adult 28 | 28 advanc 29 | 29 advantag 30 | 30 advertis 31 | 31 advic 32 | 32 advis 33 | 33 ae 34 | 34 af 35 | 35 affect 36 | 36 affili 37 | 37 afford 38 | 38 africa 39 | 39 after 40 | 40 ag 41 | 41 again 42 | 42 against 43 | 43 agenc 44 | 44 agent 45 | 45 ago 46 | 46 agre 47 | 47 agreement 48 | 48 aid 49 | 49 air 50 | 50 al 51 | 51 alb 52 | 52 align 53 | 53 all 54 | 54 allow 55 | 55 almost 56 | 56 alon 57 | 57 along 58 | 58 alreadi 59 | 59 alsa 60 | 60 also 61 | 61 altern 62 | 62 although 63 | 63 alwai 64 | 64 am 65 | 65 amaz 66 | 66 america 67 | 67 american 68 | 68 among 69 | 69 amount 70 | 70 amp 71 | 71 an 72 | 72 analysi 73 | 73 analyst 74 | 74 and 75 | 75 ani 76 | 76 anim 77 | 77 announc 78 | 78 annual 79 | 79 annuiti 80 | 80 anoth 81 | 81 answer 82 | 82 anti 83 | 83 anumb 84 | 84 anybodi 85 | 85 anymor 86 | 86 anyon 87 | 87 anyth 88 | 88 anywai 89 | 89 anywher 90 | 90 aol 91 | 91 ap 92 | 92 apolog 93 | 93 app 94 | 94 appar 95 | 95 appear 96 | 96 appl 97 | 97 appli 98 | 98 applic 99 | 99 appreci 100 | 100 approach 101 | 101 approv 102 | 102 apt 103 | 103 ar 104 | 104 archiv 105 | 105 area 106 | 106 aren 107 | 107 argument 108 | 108 arial 109 | 109 arm 110 | 110 around 111 | 111 arrai 112 | 112 arriv 113 | 113 art 114 | 114 articl 115 | 115 artist 116 | 116 as 117 | 117 ascii 118 | 118 ask 119 | 119 asset 120 | 120 assist 121 | 121 associ 122 | 122 assum 123 | 123 assur 124 | 124 at 125 | 125 atol 126 | 126 attach 127 | 127 attack 128 | 128 attempt 129 | 129 attent 130 | 130 attornei 131 | 131 attract 132 | 132 audio 133 | 133 aug 134 | 134 august 135 | 135 author 136 | 136 auto 137 | 137 autom 138 | 138 automat 139 | 139 avail 140 | 140 averag 141 | 141 avoid 142 | 142 awai 143 | 143 awar 144 | 144 award 145 | 145 ba 146 | 146 babi 147 | 147 back 148 | 148 background 149 | 149 backup 150 | 150 bad 151 | 151 balanc 152 | 152 ban 153 | 153 bank 154 | 154 bar 155 | 155 base 156 | 156 basenumb 157 | 157 basi 158 | 158 basic 159 | 159 bb 160 | 160 bc 161 | 161 bd 162 | 162 be 163 | 163 beat 164 | 164 beberg 165 | 165 becaus 166 | 166 becom 167 | 167 been 168 | 168 befor 169 | 169 begin 170 | 170 behalf 171 | 171 behavior 172 | 172 behind 173 | 173 believ 174 | 174 below 175 | 175 benefit 176 | 176 best 177 | 177 beta 178 | 178 better 179 | 179 between 180 | 180 bf 181 | 181 big 182 | 182 bill 183 | 183 billion 184 | 184 bin 185 | 185 binari 186 | 186 bit 187 | 187 black 188 | 188 blank 189 | 189 block 190 | 190 blog 191 | 191 blood 192 | 192 blue 193 | 193 bnumber 194 | 194 board 195 | 195 bodi 196 | 196 boi 197 | 197 bonu 198 | 198 book 199 | 199 boot 200 | 200 border 201 | 201 boss 202 | 202 boston 203 | 203 botan 204 | 204 both 205 | 205 bottl 206 | 206 bottom 207 | 207 boundari 208 | 208 box 209 | 209 brain 210 | 210 brand 211 | 211 break 212 | 212 brian 213 | 213 bring 214 | 214 broadcast 215 | 215 broker 216 | 216 browser 217 | 217 bug 218 | 218 bui 219 | 219 build 220 | 220 built 221 | 221 bulk 222 | 222 burn 223 | 223 bush 224 | 224 busi 225 | 225 but 226 | 226 button 227 | 227 by 228 | 228 byte 229 | 229 ca 230 | 230 cabl 231 | 231 cach 232 | 232 calcul 233 | 233 california 234 | 234 call 235 | 235 came 236 | 236 camera 237 | 237 campaign 238 | 238 can 239 | 239 canada 240 | 240 cannot 241 | 241 canon 242 | 242 capabl 243 | 243 capillari 244 | 244 capit 245 | 245 car 246 | 246 card 247 | 247 care 248 | 248 career 249 | 249 carri 250 | 250 cartridg 251 | 251 case 252 | 252 cash 253 | 253 cat 254 | 254 catch 255 | 255 categori 256 | 256 caus 257 | 257 cb 258 | 258 cc 259 | 259 cd 260 | 260 ce 261 | 261 cell 262 | 262 cent 263 | 263 center 264 | 264 central 265 | 265 centuri 266 | 266 ceo 267 | 267 certain 268 | 268 certainli 269 | 269 cf 270 | 270 challeng 271 | 271 chanc 272 | 272 chang 273 | 273 channel 274 | 274 char 275 | 275 charact 276 | 276 charg 277 | 277 charset 278 | 278 chat 279 | 279 cheap 280 | 280 check 281 | 281 cheer 282 | 282 chief 283 | 283 children 284 | 284 china 285 | 285 chip 286 | 286 choic 287 | 287 choos 288 | 288 chri 289 | 289 citi 290 | 290 citizen 291 | 291 civil 292 | 292 claim 293 | 293 class 294 | 294 classifi 295 | 295 clean 296 | 296 clear 297 | 297 clearli 298 | 298 click 299 | 299 client 300 | 300 close 301 | 301 clue 302 | 302 cnet 303 | 303 cnumber 304 | 304 co 305 | 305 code 306 | 306 collect 307 | 307 colleg 308 | 308 color 309 | 309 com 310 | 310 combin 311 | 311 come 312 | 312 comfort 313 | 313 command 314 | 314 comment 315 | 315 commentari 316 | 316 commerci 317 | 317 commiss 318 | 318 commit 319 | 319 common 320 | 320 commun 321 | 321 compani 322 | 322 compar 323 | 323 comparison 324 | 324 compat 325 | 325 compet 326 | 326 competit 327 | 327 compil 328 | 328 complet 329 | 329 comprehens 330 | 330 comput 331 | 331 concentr 332 | 332 concept 333 | 333 concern 334 | 334 condit 335 | 335 conf 336 | 336 confer 337 | 337 confid 338 | 338 confidenti 339 | 339 config 340 | 340 configur 341 | 341 confirm 342 | 342 conflict 343 | 343 confus 344 | 344 congress 345 | 345 connect 346 | 346 consid 347 | 347 consolid 348 | 348 constitut 349 | 349 construct 350 | 350 consult 351 | 351 consum 352 | 352 contact 353 | 353 contain 354 | 354 content 355 | 355 continu 356 | 356 contract 357 | 357 contribut 358 | 358 control 359 | 359 conveni 360 | 360 convers 361 | 361 convert 362 | 362 cool 363 | 363 cooper 364 | 364 copi 365 | 365 copyright 366 | 366 core 367 | 367 corpor 368 | 368 correct 369 | 369 correspond 370 | 370 cost 371 | 371 could 372 | 372 couldn 373 | 373 count 374 | 374 countri 375 | 375 coupl 376 | 376 cours 377 | 377 court 378 | 378 cover 379 | 379 coverag 380 | 380 crash 381 | 381 creat 382 | 382 creativ 383 | 383 credit 384 | 384 critic 385 | 385 cross 386 | 386 cultur 387 | 387 current 388 | 388 custom 389 | 389 cut 390 | 390 cv 391 | 391 da 392 | 392 dagga 393 | 393 dai 394 | 394 daili 395 | 395 dan 396 | 396 danger 397 | 397 dark 398 | 398 data 399 | 399 databas 400 | 400 datapow 401 | 401 date 402 | 402 dave 403 | 403 david 404 | 404 dc 405 | 405 de 406 | 406 dead 407 | 407 deal 408 | 408 dear 409 | 409 death 410 | 410 debt 411 | 411 decad 412 | 412 decid 413 | 413 decis 414 | 414 declar 415 | 415 declin 416 | 416 decor 417 | 417 default 418 | 418 defend 419 | 419 defens 420 | 420 defin 421 | 421 definit 422 | 422 degre 423 | 423 delai 424 | 424 delet 425 | 425 deliv 426 | 426 deliveri 427 | 427 dell 428 | 428 demand 429 | 429 democrat 430 | 430 depart 431 | 431 depend 432 | 432 deposit 433 | 433 describ 434 | 434 descript 435 | 435 deserv 436 | 436 design 437 | 437 desir 438 | 438 desktop 439 | 439 despit 440 | 440 detail 441 | 441 detect 442 | 442 determin 443 | 443 dev 444 | 444 devel 445 | 445 develop 446 | 446 devic 447 | 447 di 448 | 448 dial 449 | 449 did 450 | 450 didn 451 | 451 diet 452 | 452 differ 453 | 453 difficult 454 | 454 digit 455 | 455 direct 456 | 456 directli 457 | 457 director 458 | 458 directori 459 | 459 disabl 460 | 460 discount 461 | 461 discov 462 | 462 discoveri 463 | 463 discuss 464 | 464 disk 465 | 465 displai 466 | 466 disposit 467 | 467 distanc 468 | 468 distribut 469 | 469 dn 470 | 470 dnumber 471 | 471 do 472 | 472 doc 473 | 473 document 474 | 474 doe 475 | 475 doer 476 | 476 doesn 477 | 477 dollar 478 | 478 dollarac 479 | 479 dollarnumb 480 | 480 domain 481 | 481 don 482 | 482 done 483 | 483 dont 484 | 484 doubl 485 | 485 doubt 486 | 486 down 487 | 487 download 488 | 488 dr 489 | 489 draw 490 | 490 dream 491 | 491 drive 492 | 492 driver 493 | 493 drop 494 | 494 drug 495 | 495 due 496 | 496 dure 497 | 497 dvd 498 | 498 dw 499 | 499 dynam 500 | 500 ea 501 | 501 each 502 | 502 earli 503 | 503 earlier 504 | 504 earn 505 | 505 earth 506 | 506 easi 507 | 507 easier 508 | 508 easili 509 | 509 eat 510 | 510 eb 511 | 511 ebai 512 | 512 ec 513 | 513 echo 514 | 514 econom 515 | 515 economi 516 | 516 ed 517 | 517 edg 518 | 518 edit 519 | 519 editor 520 | 520 educ 521 | 521 eff 522 | 522 effect 523 | 523 effici 524 | 524 effort 525 | 525 either 526 | 526 el 527 | 527 electron 528 | 528 elimin 529 | 529 els 530 | 530 email 531 | 531 emailaddr 532 | 532 emerg 533 | 533 empir 534 | 534 employ 535 | 535 employe 536 | 536 en 537 | 537 enabl 538 | 538 encod 539 | 539 encourag 540 | 540 end 541 | 541 enemi 542 | 542 enenkio 543 | 543 energi 544 | 544 engin 545 | 545 english 546 | 546 enhanc 547 | 547 enjoi 548 | 548 enough 549 | 549 ensur 550 | 550 enter 551 | 551 enterpris 552 | 552 entertain 553 | 553 entir 554 | 554 entri 555 | 555 enumb 556 | 556 environ 557 | 557 equal 558 | 558 equip 559 | 559 equival 560 | 560 error 561 | 561 especi 562 | 562 essenti 563 | 563 establish 564 | 564 estat 565 | 565 estim 566 | 566 et 567 | 567 etc 568 | 568 euro 569 | 569 europ 570 | 570 european 571 | 571 even 572 | 572 event 573 | 573 eventu 574 | 574 ever 575 | 575 everi 576 | 576 everyon 577 | 577 everyth 578 | 578 evid 579 | 579 evil 580 | 580 exactli 581 | 581 exampl 582 | 582 excel 583 | 583 except 584 | 584 exchang 585 | 585 excit 586 | 586 exclus 587 | 587 execut 588 | 588 exercis 589 | 589 exist 590 | 590 exmh 591 | 591 expand 592 | 592 expect 593 | 593 expens 594 | 594 experi 595 | 595 expert 596 | 596 expir 597 | 597 explain 598 | 598 explor 599 | 599 express 600 | 600 extend 601 | 601 extens 602 | 602 extra 603 | 603 extract 604 | 604 extrem 605 | 605 ey 606 | 606 fa 607 | 607 face 608 | 608 fact 609 | 609 factor 610 | 610 fail 611 | 611 fair 612 | 612 fall 613 | 613 fals 614 | 614 famili 615 | 615 faq 616 | 616 far 617 | 617 fast 618 | 618 faster 619 | 619 fastest 620 | 620 fat 621 | 621 father 622 | 622 favorit 623 | 623 fax 624 | 624 fb 625 | 625 fd 626 | 626 featur 627 | 627 feder 628 | 628 fee 629 | 629 feed 630 | 630 feedback 631 | 631 feel 632 | 632 femal 633 | 633 few 634 | 634 ffffff 635 | 635 ffnumber 636 | 636 field 637 | 637 fight 638 | 638 figur 639 | 639 file 640 | 640 fill 641 | 641 film 642 | 642 filter 643 | 643 final 644 | 644 financ 645 | 645 financi 646 | 646 find 647 | 647 fine 648 | 648 finish 649 | 649 fire 650 | 650 firewal 651 | 651 firm 652 | 652 first 653 | 653 fit 654 | 654 five 655 | 655 fix 656 | 656 flag 657 | 657 flash 658 | 658 flow 659 | 659 fnumber 660 | 660 focu 661 | 661 folder 662 | 662 folk 663 | 663 follow 664 | 664 font 665 | 665 food 666 | 666 for 667 | 667 forc 668 | 668 foreign 669 | 669 forev 670 | 670 forget 671 | 671 fork 672 | 672 form 673 | 673 format 674 | 674 former 675 | 675 fortun 676 | 676 forward 677 | 677 found 678 | 678 foundat 679 | 679 four 680 | 680 franc 681 | 681 free 682 | 682 freedom 683 | 683 french 684 | 684 freshrpm 685 | 685 fri 686 | 686 fridai 687 | 687 friend 688 | 688 from 689 | 689 front 690 | 690 ftoc 691 | 691 ftp 692 | 692 full 693 | 693 fulli 694 | 694 fun 695 | 695 function 696 | 696 fund 697 | 697 further 698 | 698 futur 699 | 699 ga 700 | 700 gain 701 | 701 game 702 | 702 gari 703 | 703 garrigu 704 | 704 gave 705 | 705 gcc 706 | 706 geek 707 | 707 gener 708 | 708 get 709 | 709 gif 710 | 710 gift 711 | 711 girl 712 | 712 give 713 | 713 given 714 | 714 global 715 | 715 gnome 716 | 716 gnu 717 | 717 gnupg 718 | 718 go 719 | 719 goal 720 | 720 god 721 | 721 goe 722 | 722 gold 723 | 723 gone 724 | 724 good 725 | 725 googl 726 | 726 got 727 | 727 govern 728 | 728 gpl 729 | 729 grand 730 | 730 grant 731 | 731 graphic 732 | 732 great 733 | 733 greater 734 | 734 ground 735 | 735 group 736 | 736 grow 737 | 737 growth 738 | 738 gt 739 | 739 guarante 740 | 740 guess 741 | 741 gui 742 | 742 guid 743 | 743 ha 744 | 744 hack 745 | 745 had 746 | 746 half 747 | 747 ham 748 | 748 hand 749 | 749 handl 750 | 750 happen 751 | 751 happi 752 | 752 hard 753 | 753 hardwar 754 | 754 hat 755 | 755 hate 756 | 756 have 757 | 757 haven 758 | 758 he 759 | 759 head 760 | 760 header 761 | 761 headlin 762 | 762 health 763 | 763 hear 764 | 764 heard 765 | 765 heart 766 | 766 heaven 767 | 767 hei 768 | 768 height 769 | 769 held 770 | 770 hello 771 | 771 help 772 | 772 helvetica 773 | 773 her 774 | 774 herba 775 | 775 here 776 | 776 hermio 777 | 777 hettinga 778 | 778 hi 779 | 779 high 780 | 780 higher 781 | 781 highli 782 | 782 highlight 783 | 783 him 784 | 784 histori 785 | 785 hit 786 | 786 hold 787 | 787 home 788 | 788 honor 789 | 789 hope 790 | 790 host 791 | 791 hot 792 | 792 hour 793 | 793 hous 794 | 794 how 795 | 795 howev 796 | 796 hp 797 | 797 html 798 | 798 http 799 | 799 httpaddr 800 | 800 huge 801 | 801 human 802 | 802 hundr 803 | 803 ibm 804 | 804 id 805 | 805 idea 806 | 806 ident 807 | 807 identifi 808 | 808 idnumb 809 | 809 ie 810 | 810 if 811 | 811 ignor 812 | 812 ii 813 | 813 iii 814 | 814 iiiiiiihnumberjnumberhnumberjnumberhnumb 815 | 815 illeg 816 | 816 im 817 | 817 imag 818 | 818 imagin 819 | 819 immedi 820 | 820 impact 821 | 821 implement 822 | 822 import 823 | 823 impress 824 | 824 improv 825 | 825 in 826 | 826 inc 827 | 827 includ 828 | 828 incom 829 | 829 increas 830 | 830 incred 831 | 831 inde 832 | 832 independ 833 | 833 index 834 | 834 india 835 | 835 indian 836 | 836 indic 837 | 837 individu 838 | 838 industri 839 | 839 info 840 | 840 inform 841 | 841 initi 842 | 842 inlin 843 | 843 innov 844 | 844 input 845 | 845 insert 846 | 846 insid 847 | 847 instal 848 | 848 instanc 849 | 849 instant 850 | 850 instead 851 | 851 institut 852 | 852 instruct 853 | 853 insur 854 | 854 int 855 | 855 integr 856 | 856 intel 857 | 857 intellig 858 | 858 intend 859 | 859 interact 860 | 860 interest 861 | 861 interfac 862 | 862 intern 863 | 863 internet 864 | 864 interview 865 | 865 into 866 | 866 intro 867 | 867 introduc 868 | 868 inumb 869 | 869 invest 870 | 870 investig 871 | 871 investor 872 | 872 invok 873 | 873 involv 874 | 874 ip 875 | 875 ireland 876 | 876 irish 877 | 877 is 878 | 878 island 879 | 879 isn 880 | 880 iso 881 | 881 isp 882 | 882 issu 883 | 883 it 884 | 884 item 885 | 885 itself 886 | 886 jabber 887 | 887 jame 888 | 888 java 889 | 889 jim 890 | 890 jnumberiiiiiiihepihepihf 891 | 891 job 892 | 892 joe 893 | 893 john 894 | 894 join 895 | 895 journal 896 | 896 judg 897 | 897 judgment 898 | 898 jul 899 | 899 juli 900 | 900 jump 901 | 901 june 902 | 902 just 903 | 903 justin 904 | 904 keep 905 | 905 kei 906 | 906 kept 907 | 907 kernel 908 | 908 kevin 909 | 909 keyboard 910 | 910 kid 911 | 911 kill 912 | 912 kind 913 | 913 king 914 | 914 kingdom 915 | 915 knew 916 | 916 know 917 | 917 knowledg 918 | 918 known 919 | 919 la 920 | 920 lack 921 | 921 land 922 | 922 languag 923 | 923 laptop 924 | 924 larg 925 | 925 larger 926 | 926 largest 927 | 927 laser 928 | 928 last 929 | 929 late 930 | 930 later 931 | 931 latest 932 | 932 launch 933 | 933 law 934 | 934 lawrenc 935 | 935 le 936 | 936 lead 937 | 937 leader 938 | 938 learn 939 | 939 least 940 | 940 leav 941 | 941 left 942 | 942 legal 943 | 943 lender 944 | 944 length 945 | 945 less 946 | 946 lesson 947 | 947 let 948 | 948 letter 949 | 949 level 950 | 950 lib 951 | 951 librari 952 | 952 licens 953 | 953 life 954 | 954 lifetim 955 | 955 light 956 | 956 like 957 | 957 limit 958 | 958 line 959 | 959 link 960 | 960 linux 961 | 961 list 962 | 962 listen 963 | 963 littl 964 | 964 live 965 | 965 ll 966 | 966 lo 967 | 967 load 968 | 968 loan 969 | 969 local 970 | 970 locat 971 | 971 lock 972 | 972 lockergnom 973 | 973 log 974 | 974 long 975 | 975 longer 976 | 976 look 977 | 977 lose 978 | 978 loss 979 | 979 lost 980 | 980 lot 981 | 981 love 982 | 982 low 983 | 983 lower 984 | 984 lowest 985 | 985 lt 986 | 986 ma 987 | 987 mac 988 | 988 machin 989 | 989 made 990 | 990 magazin 991 | 991 mai 992 | 992 mail 993 | 993 mailer 994 | 994 main 995 | 995 maintain 996 | 996 major 997 | 997 make 998 | 998 maker 999 | 999 male 1000 | 1000 man 1001 | 1001 manag 1002 | 1002 mani 1003 | 1003 manual 1004 | 1004 manufactur 1005 | 1005 map 1006 | 1006 march 1007 | 1007 margin 1008 | 1008 mark 1009 | 1009 market 1010 | 1010 marshal 1011 | 1011 mass 1012 | 1012 master 1013 | 1013 match 1014 | 1014 materi 1015 | 1015 matter 1016 | 1016 matthia 1017 | 1017 mayb 1018 | 1018 me 1019 | 1019 mean 1020 | 1020 measur 1021 | 1021 mechan 1022 | 1022 media 1023 | 1023 medic 1024 | 1024 meet 1025 | 1025 member 1026 | 1026 membership 1027 | 1027 memori 1028 | 1028 men 1029 | 1029 mention 1030 | 1030 menu 1031 | 1031 merchant 1032 | 1032 messag 1033 | 1033 method 1034 | 1034 mh 1035 | 1035 michael 1036 | 1036 microsoft 1037 | 1037 middl 1038 | 1038 might 1039 | 1039 mike 1040 | 1040 mile 1041 | 1041 militari 1042 | 1042 million 1043 | 1043 mime 1044 | 1044 mind 1045 | 1045 mine 1046 | 1046 mini 1047 | 1047 minimum 1048 | 1048 minut 1049 | 1049 miss 1050 | 1050 mistak 1051 | 1051 mobil 1052 | 1052 mode 1053 | 1053 model 1054 | 1054 modem 1055 | 1055 modifi 1056 | 1056 modul 1057 | 1057 moment 1058 | 1058 mon 1059 | 1059 mondai 1060 | 1060 monei 1061 | 1061 monitor 1062 | 1062 month 1063 | 1063 monthli 1064 | 1064 more 1065 | 1065 morn 1066 | 1066 mortgag 1067 | 1067 most 1068 | 1068 mostli 1069 | 1069 mother 1070 | 1070 motiv 1071 | 1071 move 1072 | 1072 movi 1073 | 1073 mpnumber 1074 | 1074 mr 1075 | 1075 ms 1076 | 1076 msg 1077 | 1077 much 1078 | 1078 multi 1079 | 1079 multipart 1080 | 1080 multipl 1081 | 1081 murphi 1082 | 1082 music 1083 | 1083 must 1084 | 1084 my 1085 | 1085 myself 1086 | 1086 name 1087 | 1087 nation 1088 | 1088 natur 1089 | 1089 nbsp 1090 | 1090 near 1091 | 1091 nearli 1092 | 1092 necessari 1093 | 1093 need 1094 | 1094 neg 1095 | 1095 net 1096 | 1096 netscap 1097 | 1097 network 1098 | 1098 never 1099 | 1099 new 1100 | 1100 newslett 1101 | 1101 next 1102 | 1102 nextpart 1103 | 1103 nice 1104 | 1104 nigeria 1105 | 1105 night 1106 | 1106 no 1107 | 1107 nobodi 1108 | 1108 non 1109 | 1109 none 1110 | 1110 nor 1111 | 1111 normal 1112 | 1112 north 1113 | 1113 not 1114 | 1114 note 1115 | 1115 noth 1116 | 1116 notic 1117 | 1117 now 1118 | 1118 nt 1119 | 1119 null 1120 | 1120 number 1121 | 1121 numbera 1122 | 1122 numberam 1123 | 1123 numberanumb 1124 | 1124 numberb 1125 | 1125 numberbit 1126 | 1126 numberc 1127 | 1127 numbercb 1128 | 1128 numbercbr 1129 | 1129 numbercfont 1130 | 1130 numbercli 1131 | 1131 numbercnumb 1132 | 1132 numbercp 1133 | 1133 numberctd 1134 | 1134 numberd 1135 | 1135 numberdari 1136 | 1136 numberdnumb 1137 | 1137 numberenumb 1138 | 1138 numberf 1139 | 1139 numberfb 1140 | 1140 numberff 1141 | 1141 numberffont 1142 | 1142 numberfp 1143 | 1143 numberftd 1144 | 1144 numberk 1145 | 1145 numberm 1146 | 1146 numbermb 1147 | 1147 numberp 1148 | 1148 numberpd 1149 | 1149 numberpm 1150 | 1150 numberpx 1151 | 1151 numberst 1152 | 1152 numberth 1153 | 1153 numbertnumb 1154 | 1154 numberx 1155 | 1155 object 1156 | 1156 oblig 1157 | 1157 obtain 1158 | 1158 obvious 1159 | 1159 occur 1160 | 1160 oct 1161 | 1161 octob 1162 | 1162 of 1163 | 1163 off 1164 | 1164 offer 1165 | 1165 offic 1166 | 1166 offici 1167 | 1167 often 1168 | 1168 oh 1169 | 1169 ok 1170 | 1170 old 1171 | 1171 on 1172 | 1172 onc 1173 | 1173 onli 1174 | 1174 onlin 1175 | 1175 open 1176 | 1176 oper 1177 | 1177 opinion 1178 | 1178 opportun 1179 | 1179 opt 1180 | 1180 optim 1181 | 1181 option 1182 | 1182 or 1183 | 1183 order 1184 | 1184 org 1185 | 1185 organ 1186 | 1186 origin 1187 | 1187 os 1188 | 1188 osdn 1189 | 1189 other 1190 | 1190 otherwis 1191 | 1191 our 1192 | 1192 out 1193 | 1193 outlook 1194 | 1194 output 1195 | 1195 outsid 1196 | 1196 over 1197 | 1197 own 1198 | 1198 owner 1199 | 1199 oz 1200 | 1200 pacif 1201 | 1201 pack 1202 | 1202 packag 1203 | 1203 page 1204 | 1204 pai 1205 | 1205 paid 1206 | 1206 pain 1207 | 1207 palm 1208 | 1208 panel 1209 | 1209 paper 1210 | 1210 paragraph 1211 | 1211 parent 1212 | 1212 part 1213 | 1213 parti 1214 | 1214 particip 1215 | 1215 particular 1216 | 1216 particularli 1217 | 1217 partit 1218 | 1218 partner 1219 | 1219 pass 1220 | 1220 password 1221 | 1221 past 1222 | 1222 patch 1223 | 1223 patent 1224 | 1224 path 1225 | 1225 pattern 1226 | 1226 paul 1227 | 1227 payment 1228 | 1228 pc 1229 | 1229 peac 1230 | 1230 peopl 1231 | 1231 per 1232 | 1232 percent 1233 | 1233 percentag 1234 | 1234 perfect 1235 | 1235 perfectli 1236 | 1236 perform 1237 | 1237 perhap 1238 | 1238 period 1239 | 1239 perl 1240 | 1240 perman 1241 | 1241 permiss 1242 | 1242 person 1243 | 1243 pgp 1244 | 1244 phone 1245 | 1245 photo 1246 | 1246 php 1247 | 1247 phrase 1248 | 1248 physic 1249 | 1249 pick 1250 | 1250 pictur 1251 | 1251 piec 1252 | 1252 piiiiiiii 1253 | 1253 pipe 1254 | 1254 pjnumber 1255 | 1255 place 1256 | 1256 plai 1257 | 1257 plain 1258 | 1258 plan 1259 | 1259 planet 1260 | 1260 plant 1261 | 1261 planta 1262 | 1262 platform 1263 | 1263 player 1264 | 1264 pleas 1265 | 1265 plu 1266 | 1266 plug 1267 | 1267 pm 1268 | 1268 pocket 1269 | 1269 point 1270 | 1270 polic 1271 | 1271 polici 1272 | 1272 polit 1273 | 1273 poor 1274 | 1274 pop 1275 | 1275 popul 1276 | 1276 popular 1277 | 1277 port 1278 | 1278 posit 1279 | 1279 possibl 1280 | 1280 post 1281 | 1281 potenti 1282 | 1282 pound 1283 | 1283 powel 1284 | 1284 power 1285 | 1285 powershot 1286 | 1286 practic 1287 | 1287 pre 1288 | 1288 predict 1289 | 1289 prefer 1290 | 1290 premium 1291 | 1291 prepar 1292 | 1292 present 1293 | 1293 presid 1294 | 1294 press 1295 | 1295 pretti 1296 | 1296 prevent 1297 | 1297 previou 1298 | 1298 previous 1299 | 1299 price 1300 | 1300 principl 1301 | 1301 print 1302 | 1302 printabl 1303 | 1303 printer 1304 | 1304 privaci 1305 | 1305 privat 1306 | 1306 prize 1307 | 1307 pro 1308 | 1308 probabl 1309 | 1309 problem 1310 | 1310 procedur 1311 | 1311 process 1312 | 1312 processor 1313 | 1313 procmail 1314 | 1314 produc 1315 | 1315 product 1316 | 1316 profession 1317 | 1317 profil 1318 | 1318 profit 1319 | 1319 program 1320 | 1320 programm 1321 | 1321 progress 1322 | 1322 project 1323 | 1323 promis 1324 | 1324 promot 1325 | 1325 prompt 1326 | 1326 properti 1327 | 1327 propos 1328 | 1328 proprietari 1329 | 1329 prospect 1330 | 1330 protect 1331 | 1331 protocol 1332 | 1332 prove 1333 | 1333 proven 1334 | 1334 provid 1335 | 1335 proxi 1336 | 1336 pub 1337 | 1337 public 1338 | 1338 publish 1339 | 1339 pudg 1340 | 1340 pull 1341 | 1341 purchas 1342 | 1342 purpos 1343 | 1343 put 1344 | 1344 python 1345 | 1345 qnumber 1346 | 1346 qualifi 1347 | 1347 qualiti 1348 | 1348 quarter 1349 | 1349 question 1350 | 1350 quick 1351 | 1351 quickli 1352 | 1352 quit 1353 | 1353 quot 1354 | 1354 radio 1355 | 1355 ragga 1356 | 1356 rais 1357 | 1357 random 1358 | 1358 rang 1359 | 1359 rate 1360 | 1360 rather 1361 | 1361 ratio 1362 | 1362 razor 1363 | 1363 razornumb 1364 | 1364 re 1365 | 1365 reach 1366 | 1366 read 1367 | 1367 reader 1368 | 1368 readi 1369 | 1369 real 1370 | 1370 realiz 1371 | 1371 realli 1372 | 1372 reason 1373 | 1373 receiv 1374 | 1374 recent 1375 | 1375 recipi 1376 | 1376 recommend 1377 | 1377 record 1378 | 1378 red 1379 | 1379 redhat 1380 | 1380 reduc 1381 | 1381 refer 1382 | 1382 refin 1383 | 1383 reg 1384 | 1384 regard 1385 | 1385 region 1386 | 1386 regist 1387 | 1387 regul 1388 | 1388 regular 1389 | 1389 rel 1390 | 1390 relat 1391 | 1391 relationship 1392 | 1392 releas 1393 | 1393 relev 1394 | 1394 reliabl 1395 | 1395 remain 1396 | 1396 rememb 1397 | 1397 remot 1398 | 1398 remov 1399 | 1399 replac 1400 | 1400 repli 1401 | 1401 report 1402 | 1402 repositori 1403 | 1403 repres 1404 | 1404 republ 1405 | 1405 request 1406 | 1406 requir 1407 | 1407 research 1408 | 1408 reserv 1409 | 1409 resid 1410 | 1410 resourc 1411 | 1411 respect 1412 | 1412 respond 1413 | 1413 respons 1414 | 1414 rest 1415 | 1415 result 1416 | 1416 retail 1417 | 1417 return 1418 | 1418 reveal 1419 | 1419 revenu 1420 | 1420 revers 1421 | 1421 review 1422 | 1422 revok 1423 | 1423 rh 1424 | 1424 rich 1425 | 1425 right 1426 | 1426 risk 1427 | 1427 road 1428 | 1428 robert 1429 | 1429 rock 1430 | 1430 role 1431 | 1431 roll 1432 | 1432 rom 1433 | 1433 roman 1434 | 1434 room 1435 | 1435 root 1436 | 1436 round 1437 | 1437 rpm 1438 | 1438 rss 1439 | 1439 rule 1440 | 1440 run 1441 | 1441 sa 1442 | 1442 safe 1443 | 1443 sai 1444 | 1444 said 1445 | 1445 sale 1446 | 1446 same 1447 | 1447 sampl 1448 | 1448 san 1449 | 1449 saou 1450 | 1450 sat 1451 | 1451 satellit 1452 | 1452 save 1453 | 1453 saw 1454 | 1454 scan 1455 | 1455 schedul 1456 | 1456 school 1457 | 1457 scienc 1458 | 1458 score 1459 | 1459 screen 1460 | 1460 script 1461 | 1461 se 1462 | 1462 search 1463 | 1463 season 1464 | 1464 second 1465 | 1465 secret 1466 | 1466 section 1467 | 1467 secur 1468 | 1468 see 1469 | 1469 seed 1470 | 1470 seek 1471 | 1471 seem 1472 | 1472 seen 1473 | 1473 select 1474 | 1474 self 1475 | 1475 sell 1476 | 1476 seminar 1477 | 1477 send 1478 | 1478 sender 1479 | 1479 sendmail 1480 | 1480 senior 1481 | 1481 sens 1482 | 1482 sensit 1483 | 1483 sent 1484 | 1484 sep 1485 | 1485 separ 1486 | 1486 septemb 1487 | 1487 sequenc 1488 | 1488 seri 1489 | 1489 serif 1490 | 1490 seriou 1491 | 1491 serv 1492 | 1492 server 1493 | 1493 servic 1494 | 1494 set 1495 | 1495 setup 1496 | 1496 seven 1497 | 1497 seventh 1498 | 1498 sever 1499 | 1499 sex 1500 | 1500 sexual 1501 | 1501 sf 1502 | 1502 shape 1503 | 1503 share 1504 | 1504 she 1505 | 1505 shell 1506 | 1506 ship 1507 | 1507 shop 1508 | 1508 short 1509 | 1509 shot 1510 | 1510 should 1511 | 1511 show 1512 | 1512 side 1513 | 1513 sign 1514 | 1514 signatur 1515 | 1515 signific 1516 | 1516 similar 1517 | 1517 simpl 1518 | 1518 simpli 1519 | 1519 sinc 1520 | 1520 sincer 1521 | 1521 singl 1522 | 1522 sit 1523 | 1523 site 1524 | 1524 situat 1525 | 1525 six 1526 | 1526 size 1527 | 1527 skeptic 1528 | 1528 skill 1529 | 1529 skin 1530 | 1530 skip 1531 | 1531 sleep 1532 | 1532 slow 1533 | 1533 small 1534 | 1534 smart 1535 | 1535 smoke 1536 | 1536 smtp 1537 | 1537 snumber 1538 | 1538 so 1539 | 1539 social 1540 | 1540 societi 1541 | 1541 softwar 1542 | 1542 sold 1543 | 1543 solut 1544 | 1544 solv 1545 | 1545 some 1546 | 1546 someon 1547 | 1547 someth 1548 | 1548 sometim 1549 | 1549 son 1550 | 1550 song 1551 | 1551 soni 1552 | 1552 soon 1553 | 1553 sorri 1554 | 1554 sort 1555 | 1555 sound 1556 | 1556 sourc 1557 | 1557 south 1558 | 1558 space 1559 | 1559 spain 1560 | 1560 spam 1561 | 1561 spamassassin 1562 | 1562 spamd 1563 | 1563 spammer 1564 | 1564 speak 1565 | 1565 spec 1566 | 1566 special 1567 | 1567 specif 1568 | 1568 specifi 1569 | 1569 speech 1570 | 1570 speed 1571 | 1571 spend 1572 | 1572 sponsor 1573 | 1573 sport 1574 | 1574 spot 1575 | 1575 src 1576 | 1576 ssh 1577 | 1577 st 1578 | 1578 stabl 1579 | 1579 staff 1580 | 1580 stai 1581 | 1581 stand 1582 | 1582 standard 1583 | 1583 star 1584 | 1584 start 1585 | 1585 state 1586 | 1586 statement 1587 | 1587 statu 1588 | 1588 step 1589 | 1589 steve 1590 | 1590 still 1591 | 1591 stock 1592 | 1592 stop 1593 | 1593 storag 1594 | 1594 store 1595 | 1595 stori 1596 | 1596 strategi 1597 | 1597 stream 1598 | 1598 street 1599 | 1599 string 1600 | 1600 strip 1601 | 1601 strong 1602 | 1602 structur 1603 | 1603 studi 1604 | 1604 stuff 1605 | 1605 stupid 1606 | 1606 style 1607 | 1607 subject 1608 | 1608 submit 1609 | 1609 subscrib 1610 | 1610 subscript 1611 | 1611 substanti 1612 | 1612 success 1613 | 1613 such 1614 | 1614 suffer 1615 | 1615 suggest 1616 | 1616 suit 1617 | 1617 sum 1618 | 1618 summari 1619 | 1619 summer 1620 | 1620 sun 1621 | 1621 super 1622 | 1622 suppli 1623 | 1623 support 1624 | 1624 suppos 1625 | 1625 sure 1626 | 1626 surpris 1627 | 1627 suse 1628 | 1628 suspect 1629 | 1629 sweet 1630 | 1630 switch 1631 | 1631 system 1632 | 1632 tab 1633 | 1633 tabl 1634 | 1634 tablet 1635 | 1635 tag 1636 | 1636 take 1637 | 1637 taken 1638 | 1638 talk 1639 | 1639 tape 1640 | 1640 target 1641 | 1641 task 1642 | 1642 tax 1643 | 1643 teach 1644 | 1644 team 1645 | 1645 tech 1646 | 1646 technic 1647 | 1647 techniqu 1648 | 1648 technolog 1649 | 1649 tel 1650 | 1650 telecom 1651 | 1651 telephon 1652 | 1652 tell 1653 | 1653 temperatur 1654 | 1654 templ 1655 | 1655 ten 1656 | 1656 term 1657 | 1657 termin 1658 | 1658 terror 1659 | 1659 terrorist 1660 | 1660 test 1661 | 1661 texa 1662 | 1662 text 1663 | 1663 than 1664 | 1664 thank 1665 | 1665 that 1666 | 1666 the 1667 | 1667 thei 1668 | 1668 their 1669 | 1669 them 1670 | 1670 themselv 1671 | 1671 then 1672 | 1672 theori 1673 | 1673 there 1674 | 1674 therefor 1675 | 1675 these 1676 | 1676 thi 1677 | 1677 thing 1678 | 1678 think 1679 | 1679 thinkgeek 1680 | 1680 third 1681 | 1681 those 1682 | 1682 though 1683 | 1683 thought 1684 | 1684 thousand 1685 | 1685 thread 1686 | 1686 threat 1687 | 1687 three 1688 | 1688 through 1689 | 1689 thu 1690 | 1690 thursdai 1691 | 1691 ti 1692 | 1692 ticket 1693 | 1693 tim 1694 | 1694 time 1695 | 1695 tip 1696 | 1696 tire 1697 | 1697 titl 1698 | 1698 tm 1699 | 1699 to 1700 | 1700 todai 1701 | 1701 togeth 1702 | 1702 token 1703 | 1703 told 1704 | 1704 toll 1705 | 1705 tom 1706 | 1706 toner 1707 | 1707 toni 1708 | 1708 too 1709 | 1709 took 1710 | 1710 tool 1711 | 1711 top 1712 | 1712 topic 1713 | 1713 total 1714 | 1714 touch 1715 | 1715 toward 1716 | 1716 track 1717 | 1717 trade 1718 | 1718 tradit 1719 | 1719 traffic 1720 | 1720 train 1721 | 1721 transact 1722 | 1722 transfer 1723 | 1723 travel 1724 | 1724 treat 1725 | 1725 tree 1726 | 1726 tri 1727 | 1727 trial 1728 | 1728 trick 1729 | 1729 trip 1730 | 1730 troubl 1731 | 1731 true 1732 | 1732 truli 1733 | 1733 trust 1734 | 1734 truth 1735 | 1735 try 1736 | 1736 tue 1737 | 1737 tuesdai 1738 | 1738 turn 1739 | 1739 tv 1740 | 1740 two 1741 | 1741 type 1742 | 1742 uk 1743 | 1743 ultim 1744 | 1744 un 1745 | 1745 under 1746 | 1746 understand 1747 | 1747 unfortun 1748 | 1748 uniqu 1749 | 1749 unison 1750 | 1750 unit 1751 | 1751 univers 1752 | 1752 unix 1753 | 1753 unless 1754 | 1754 unlik 1755 | 1755 unlimit 1756 | 1756 unseen 1757 | 1757 unsolicit 1758 | 1758 unsubscrib 1759 | 1759 until 1760 | 1760 up 1761 | 1761 updat 1762 | 1762 upgrad 1763 | 1763 upon 1764 | 1764 urgent 1765 | 1765 url 1766 | 1766 us 1767 | 1767 usa 1768 | 1768 usag 1769 | 1769 usb 1770 | 1770 usd 1771 | 1771 usdollarnumb 1772 | 1772 useless 1773 | 1773 user 1774 | 1774 usr 1775 | 1775 usual 1776 | 1776 util 1777 | 1777 vacat 1778 | 1778 valid 1779 | 1779 valu 1780 | 1780 valuabl 1781 | 1781 var 1782 | 1782 variabl 1783 | 1783 varieti 1784 | 1784 variou 1785 | 1785 ve 1786 | 1786 vendor 1787 | 1787 ventur 1788 | 1788 veri 1789 | 1789 verifi 1790 | 1790 version 1791 | 1791 via 1792 | 1792 video 1793 | 1793 view 1794 | 1794 virtual 1795 | 1795 visa 1796 | 1796 visit 1797 | 1797 visual 1798 | 1798 vnumber 1799 | 1799 voic 1800 | 1800 vote 1801 | 1801 vs 1802 | 1802 vulner 1803 | 1803 wa 1804 | 1804 wai 1805 | 1805 wait 1806 | 1806 wake 1807 | 1807 walk 1808 | 1808 wall 1809 | 1809 want 1810 | 1810 war 1811 | 1811 warm 1812 | 1812 warn 1813 | 1813 warranti 1814 | 1814 washington 1815 | 1815 wasn 1816 | 1816 wast 1817 | 1817 watch 1818 | 1818 water 1819 | 1819 we 1820 | 1820 wealth 1821 | 1821 weapon 1822 | 1822 web 1823 | 1823 weblog 1824 | 1824 websit 1825 | 1825 wed 1826 | 1826 wednesdai 1827 | 1827 week 1828 | 1828 weekli 1829 | 1829 weight 1830 | 1830 welcom 1831 | 1831 well 1832 | 1832 went 1833 | 1833 were 1834 | 1834 west 1835 | 1835 what 1836 | 1836 whatev 1837 | 1837 when 1838 | 1838 where 1839 | 1839 whether 1840 | 1840 which 1841 | 1841 while 1842 | 1842 white 1843 | 1843 whitelist 1844 | 1844 who 1845 | 1845 whole 1846 | 1846 whose 1847 | 1847 why 1848 | 1848 wi 1849 | 1849 wide 1850 | 1850 width 1851 | 1851 wife 1852 | 1852 will 1853 | 1853 william 1854 | 1854 win 1855 | 1855 window 1856 | 1856 wing 1857 | 1857 winner 1858 | 1858 wireless 1859 | 1859 wish 1860 | 1860 with 1861 | 1861 within 1862 | 1862 without 1863 | 1863 wnumberp 1864 | 1864 woman 1865 | 1865 women 1866 | 1866 won 1867 | 1867 wonder 1868 | 1868 word 1869 | 1869 work 1870 | 1870 worker 1871 | 1871 world 1872 | 1872 worldwid 1873 | 1873 worri 1874 | 1874 worst 1875 | 1875 worth 1876 | 1876 would 1877 | 1877 wouldn 1878 | 1878 write 1879 | 1879 written 1880 | 1880 wrong 1881 | 1881 wrote 1882 | 1882 www 1883 | 1883 ximian 1884 | 1884 xml 1885 | 1885 xp 1886 | 1886 yahoo 1887 | 1887 ye 1888 | 1888 yeah 1889 | 1889 year 1890 | 1890 yesterdai 1891 | 1891 yet 1892 | 1892 york 1893 | 1893 you 1894 | 1894 young 1895 | 1895 your 1896 | 1896 yourself 1897 | 1897 zdnet 1898 | 1898 zero 1899 | 1899 zip 1900 | -------------------------------------------------------------------------------- /ex7/bird_small.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/bird_small.mat -------------------------------------------------------------------------------- /ex7/bird_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/bird_small.png -------------------------------------------------------------------------------- /ex7/ex7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/ex7.pdf -------------------------------------------------------------------------------- /ex7/ex7data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/ex7data1.mat -------------------------------------------------------------------------------- /ex7/ex7data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/ex7data2.mat -------------------------------------------------------------------------------- /ex7/ex7faces.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/ex7faces.mat -------------------------------------------------------------------------------- /ex7/ex7kmeans.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy.optimize import minimize 4 | import scipy.io 5 | import matplotlib.pyplot as plt 6 | 7 | def findClosestCentroids(X, centroids): 8 | K = np.size(centroids, 1) 9 | idx = [] 10 | 11 | for i in range(len(X)): 12 | norm = np.sum(((X[i] - centroids)**2), axis=1) 13 | idx.append(norm.argmin()) 14 | 15 | return idx 16 | 17 | def computeCentroids(X, idx, K): 18 | centroid = np.zeros((K,np.size(X,1))) 19 | aug_X = np.hstack((np.array(idx)[:,None],X)) 20 | for i in range(K): 21 | centroid[i] = np.mean(X[aug_X[:,0] == i], axis=0) 22 | 23 | return centroid 24 | 25 | def runKMeans(X, initial_centroids, max_iters, plot_progress=False): 26 | K = np.size(initial_centroids, 0) 27 | centroids = initial_centroids 28 | previous_centroids = centroids 29 | 30 | for i in range(max_iters): 31 | # Centroid assignment 32 | idx = findClosestCentroids(X, centroids) 33 | 34 | if plot_progress: 35 | plt.plot(X[:,0],X[:,1], 'bo') 36 | plt.plot(centroids[:,0], centroids[:,1], 'rx') 37 | plt.plot(previous_centroids[:,0], previous_centroids[:,1], 'gx') 38 | plt.show() 39 | 40 | previous_centroids = centroids 41 | centroids = computeCentroids(X, idx, K) 42 | 43 | return (centroids, idx) 44 | 45 | def displayData(X): 46 | 47 | num_images = len(X) 48 | rows = int(num_images**.5) 49 | cols = int(num_images**.5) 50 | fig, ax = plt.subplots(rows,cols,sharex=True,sharey=True) 51 | img_num = 0 52 | 53 | for i in range(rows): 54 | for j in range(cols): 55 | # Convert column vector into 32x232 pixel matrix 56 | # transpose to display correctly 57 | img = X[img_num,:].reshape(32,32).T 58 | ax[i][j].imshow(img,cmap='gray') 59 | img_num += 1 60 | 61 | return (fig, ax) 62 | 63 | def kMeansInitCentroids(X, K): 64 | return X[np.random.choice(X.shape[0], K)] 65 | 66 | # Find Closest Centroids 67 | raw_mat = scipy.io.loadmat("ex7data2.mat") 68 | X = raw_mat.get("X") 69 | 70 | # Select an initial set of centroids 71 | K = 3 72 | initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) 73 | idx = findClosestCentroids(X, initial_centroids) 74 | 75 | #Compute Means 76 | centroids = computeCentroids(X, idx, K) 77 | 78 | # K-means Clustering 79 | max_iters = 10 80 | initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) 81 | centroids, idx = runKMeans(X, initial_centroids, max_iters, plot_progress=True) 82 | 83 | # K-means Clustering on Pixels 84 | A = plt.imread("bird_small.png") 85 | plt.imshow(A) 86 | plt.show() 87 | 88 | original_shape = np.shape(A) 89 | 90 | # Reshape A to get R, G, B values for each pixel 91 | X = A.reshape((np.size(A, 0)*np.size(A, 1), 3)) 92 | K = 16 93 | max_iters = 10 94 | 95 | # Initialize centroids 96 | initial_centroids = kMeansInitCentroids(X, K) 97 | centroids, idx = runKMeans(X, initial_centroids, max_iters, plot_progress=False) 98 | 99 | # Image Compression 100 | idx = findClosestCentroids(X, centroids) 101 | X_recovered = centroids[idx,:] 102 | X_recovered = X_recovered.reshape(original_shape) 103 | 104 | # Display 105 | f, (ax1, ax2) = plt.subplots(2, sharex=True, sharey=True) 106 | ax1.imshow(A) 107 | ax2.imshow(X_recovered) 108 | -------------------------------------------------------------------------------- /ex7/ex7pca.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy.optimize import minimize 4 | import matplotlib.pyplot as plt 5 | import scipy.io 6 | 7 | def featureNormalize(X): 8 | mu = np.mean(X,axis=0) 9 | sigma = np.std(X,axis=0) 10 | normalized_X = np.divide(X - mu,sigma) 11 | 12 | return (normalized_X, mu, sigma) 13 | 14 | def pca(X): 15 | covar = np.dot(X.T,X) / len(X) 16 | U, S, V = np.linalg.svd(covar) 17 | return (U, S) 18 | 19 | def projectData(X, U, K): 20 | U_reduce = U[:, 0:K] 21 | Z = np.zeros((len(X), K)) 22 | for i in range(len(X)): 23 | x = X[i,:] 24 | projection_k = np.dot(x, U_reduce) 25 | Z[i] = projection_k 26 | return Z 27 | 28 | def recoverData(Z, U, K): 29 | X_rec = np.zeros((len(Z), len(U))) 30 | for i in range(len(Z)): 31 | v = Z[i,:] 32 | for j in range(np.size(U,1)): 33 | recovered_j = np.dot(v.T,U[j,0:K]) 34 | X_rec[i][j] = recovered_j 35 | return X_rec 36 | 37 | def displayData(X): 38 | 39 | num_images = len(X) 40 | rows = int(num_images**.5) 41 | cols = int(num_images**.5) 42 | fig, ax = plt.subplots(rows,cols,sharex=True,sharey=True) 43 | img_num = 0 44 | 45 | for i in range(rows): 46 | for j in range(cols): 47 | # Convert column vector into 32x232 pixel matrix 48 | # transpose 49 | img = X[img_num,:].reshape(32,32).T 50 | ax[i][j].imshow(img,cmap='gray') 51 | img_num += 1 52 | 53 | return (fig, ax) 54 | 55 | raw_mat = scipy.io.loadmat("ex7data1.mat") 56 | X = raw_mat.get("X") 57 | plt.cla() 58 | plt.plot(X[:,0], X[:,1], 'bo') 59 | plt.show() 60 | 61 | X_norm, mu, sigma = featureNormalize(X) 62 | U, S = pca(X_norm) 63 | 64 | plt.cla() 65 | plt.plot(X_norm[:,0], X_norm[:,1], 'bo') 66 | plt.show() 67 | 68 | K = 1 69 | Z = projectData(X_norm, U, K) 70 | X_rec = recoverData(Z, U, K) 71 | 72 | plt.cla() 73 | plt.plot(X_norm[:,0], X_norm[:,1], 'bo') 74 | plt.plot(X_rec[:,0], X_rec[:,1], 'rx') 75 | plt.show() 76 | 77 | # Loading and Visualizing Face Data 78 | raw_mat = scipy.io.loadmat("ex7faces.mat") 79 | X = raw_mat.get("X") 80 | face_grid, ax = displayData(X[:100, :]) 81 | face_grid.show() 82 | 83 | X_norm, mu, sigma = featureNormalize(X) 84 | U, S = pca(X_norm) 85 | 86 | face_grid, ax = displayData(U[:,:36].T) 87 | face_grid.show() 88 | 89 | # Dimension Reduction on Faces 90 | K = 100 91 | Z = projectData(X_norm, U, K) 92 | 93 | #Visualization of Faces after PCA 94 | K = 100 95 | X_rec = recoverData(Z, U, K) 96 | 97 | plt.close() 98 | plt.cla() 99 | f, (ax1, ax2) = plt.subplots(2, sharex=True, sharey=True) 100 | f, ax1 = displayData(X_norm[:100,:]) 101 | f, ax2 = displayData(X_rec[:100,:]) 102 | f.show() 103 | -------------------------------------------------------------------------------- /ex7/token.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/token.mat -------------------------------------------------------------------------------- /ex8/ex8_movieParams.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/ex8_movieParams.mat -------------------------------------------------------------------------------- /ex8/ex8_movies.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/ex8_movies.mat -------------------------------------------------------------------------------- /ex8/ex8anomaly_detection.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy.optimize import minimize 4 | from ex8_utils import * 5 | import scipy.io 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | 10 | def estimateGaussian(X): 11 | mu = np.mean(X, axis=0, keepdims=True) 12 | sigma2 = np.var(X, axis=0, keepdims=True) 13 | 14 | return (mu, sigma2) 15 | 16 | def multivariateGaussian(X, mu, sigma2): 17 | k = np.size(mu,1) 18 | if ((np.size(sigma2,0) == 1) | (np.size(sigma2,1) == 1)): 19 | sigma2 = np.diagflat(sigma2) 20 | # De-mean 21 | X = X - mu 22 | # Calculate p-values 23 | p = ((1 / (2* (np.pi)**(-k / 2) * np.linalg.det(sigma2)**(-.5))) * 24 | np.exp(-.5 * np.sum(np.dot(X, np.linalg.inv(sigma2)) * X, 1))) 25 | 26 | return p 27 | 28 | def visualizeFit(X, mu, sigma2): 29 | meshvals = np.arange(0, 35, .5) 30 | X1, X2 = np.meshgrid(meshvals, meshvals) 31 | Z = np.hstack((X1.reshape((-1,1)), X2.reshape((-1,1)))) 32 | Z = multivariateGaussian(Z, mu, sigma2).reshape(np.shape(X1)) 33 | 34 | mylevels = np.array([10**i for i in np.arange(-20,0,3)]) 35 | fig, ax = plt.subplots(1) 36 | ax.plot(X[:, 0], X[:, 1], 'bx') 37 | ax.contour(X1, X2, Z, mylevels) 38 | 39 | return fig, ax 40 | 41 | def selectThreshold(yval, pval): 42 | bestEpsilon = 0 43 | bestF1 = 0 44 | F1 = 0 45 | 46 | stepsize = (np.max(pval) - np.min(pval)) / 1000 47 | evals = np.arange(np.min(pval), np.max(pval), stepsize) 48 | for epsilon in evals: 49 | predictions = (pval < epsilon).reshape((-1,1)) 50 | X = np.hstack((predictions, yval)) 51 | fp = np.sum((X[:,0] == 1) & (X[:,1] == 0)) 52 | tp = np.sum((X[:,0] == 1) & (X[:,1] == 1)) 53 | fn = np.sum((X[:,0] == 0) & (X[:,1] == 1)) 54 | prec = tp / (tp + fp) 55 | rec = tp / (tp + fn) 56 | F1 = (2 * prec * rec) / (prec + rec) 57 | 58 | if F1 > bestF1: 59 | bestF1 = F1 60 | bestEpsilon = epsilon 61 | 62 | return (bestEpsilon, bestF1) 63 | 64 | 65 | raw_mat = scipy.io.loadmat("ex8data1.mat") 66 | X = raw_mat.get("X") 67 | Xval = raw_mat.get("Xval") 68 | yval = raw_mat.get("yval") 69 | 70 | plt.plot(X[:, 0], X[:, 1], 'bx') 71 | plt.xlabel('Latency (ms)') 72 | plt.ylabel('Throughput (mb/s)'); 73 | plt.show() 74 | 75 | mu, sigma2 = estimateGaussian(X) # returns flattened arrays 76 | 77 | # Density of data based on multivariate normal distribution 78 | p = multivariateGaussian(X, mu, sigma2) 79 | fig, ax = visualizeFit(X, mu, sigma2) 80 | fig.show() 81 | 82 | # Find Outliers 83 | pval = multivariateGaussian(Xval, mu, sigma2) 84 | epsilon, F1 = selectThreshold(yval, pval) 85 | 86 | outliers = np.where(p < epsilon) 87 | fig, ax = visualizeFit(X, mu, sigma2) 88 | ax.plot(X[outliers, 0], X[outliers, 1], 'ro', linewidth=2, markersize=10) 89 | fig.show() 90 | 91 | # Multi-Dimensional Outliers 92 | raw_mat2 = scipy.io.loadmat("ex8data2.mat") 93 | X = raw_mat2.get("X") 94 | Xval = raw_mat2.get("Xval") 95 | yval = raw_mat2.get("yval") 96 | 97 | mu, sigma2 = estimateGaussian(X) 98 | p = multivariateGaussian(X, mu, sigma2) 99 | pval = multivariateGaussian(Xval, mu, sigma2) 100 | epsilon, F1 = selectThreshold(yval, pval) 101 | -------------------------------------------------------------------------------- /ex8/ex8cofi.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy.optimize import minimize 4 | import scipy.io 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | 9 | def cofiCostFunc(params, Y, R, num_users, num_movies, num_features, reg): 10 | # Unfold the U and W matrices from params 11 | X = params[:num_movies * num_features].reshape((num_movies, num_features)) 12 | Theta = params[num_movies * num_features:].reshape((num_users, num_features)) 13 | 14 | # Cost 15 | J = (.5 * np.sum(((np.dot(Theta,X.T).T - Y) * R)**2) + 16 | ((reg / 2) * np.sum(Theta**2)) + 17 | ((reg / 2) * np.sum(X**2))) 18 | 19 | # Gradients 20 | X_grad = np.zeros_like(X) 21 | for i in range(num_movies): 22 | idx = np.where(R[i,:]==1)[0] # users who have rated movie i 23 | temp_theta = Theta[idx,:] # parameter vector for those users 24 | temp_Y = Y[idx, :] # ratings given to movie i 25 | X_grad[i,:] = np.sum(np.dot(np.dot(temp_theta, X[i, :]) - temp_Y.T, 26 | temp_theta) + reg*X[i,:], axis=0) 27 | 28 | Theta_grad = np.zeros_like(Theta) 29 | for j in range(num_users): 30 | idx = np.where(R[:,j]==1)[0] 31 | temp_X = X[idx,:] 32 | temp_Y = Y[idx,j] 33 | Theta_grad[j,:] = np.sum(np.dot(np.dot(Theta[j], temp_X.T) - 34 | temp_Y, temp_X) + reg*Theta[j], axis=0) 35 | grad = np.append(X_grad.flatten(), Theta_grad.flatten()) 36 | 37 | return (J, grad) 38 | 39 | def computeNumericalGradient(J,theta): 40 | 41 | 42 | numgrad = np.zeros_like(theta) 43 | perturb = np.zeros_like(theta) 44 | tol = 1e-4 45 | 46 | for p in range(len(theta)): 47 | 48 | perturb[p] = tol 49 | loss1 = J(theta - perturb) 50 | loss2 = J(theta + perturb) 51 | numgrad[p] = (loss2 - loss1)/(2 * tol) 52 | perturb[p] = 0 53 | 54 | return numgrad 55 | 56 | def checkCostFunction(reg): 57 | # Create small problem 58 | X_t = np.random.random((4,3)) 59 | Theta_t = np.random.random((5,3)) 60 | 61 | # Zap out most entries 62 | Y = np.dot(Theta_t, X_t.T) 63 | Y[(np.random.random(np.shape(Y)) > .5)] = 0 64 | R = np.zeros_like(Y) 65 | R[Y != 0] = 1 66 | 67 | # gradient checking 68 | X = np.random.random(np.shape(X_t)) 69 | Theta = np.random.random(np.shape(Theta_t)) 70 | num_users = np.size(Y, 1) 71 | num_movies = np.size(Y,0) 72 | num_features = np.size(Theta_t,1) 73 | 74 | params = np.append(X.flatten(), Theta.flatten()) 75 | 76 | def reducedCofiCostFunc(p): 77 | 78 | return cofiCostFunc(p,Y, R, num_users, num_movies, num_features,0)[0] 79 | 80 | numgrad = computeNumericalGradient(reducedCofiCostFunc,params) 81 | J, grad = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 0) 82 | # Check two gradients 83 | np.testing.assert_almost_equal(grad, numgrad) 84 | 85 | return 86 | 87 | def normalizeRatings(Y, R): 88 | m, n = np.shape(Y) 89 | Ymean = np.zeros((m,1)) 90 | Ynorm = np.zeros_like(Y) 91 | for i in range(m): 92 | idx = (R[i] == 1) 93 | Ymean[i] = np.mean(Y[i,idx]) 94 | Ynorm[i,idx] = Y[i,idx] - Ymean[i] 95 | 96 | return (Ynorm, Ymean) 97 | 98 | raw_mat = scipy.io.loadmat("ex8_movies.mat") 99 | R = raw_mat.get("R") # num movies x num users indicator matrix 100 | Y = raw_mat.get("Y") # num movies x num users ratings matrix 101 | 102 | # Visualize 103 | plt.matshow[.] 104 | plt.xlabel("Users") 105 | plt.ylabel("Movies") 106 | plt.show() 107 | 108 | # Collaborative Filtering Cost Function 109 | raw_mat2 = scipy.io.loadmat("ex8_movieParams.mat") 110 | X = raw_mat2.get("X") # rows correspond to feature vector of the ith movie 111 | Theta = raw_mat2.get("Theta") # rows are the parameter vector for jth user 112 | 113 | # Reduce data size to have it run faster 114 | num_users = 4 115 | num_movies = 5 116 | num_features = 3 117 | 118 | X = X[:num_movies, :num_features] 119 | Theta = Theta[:num_users, :num_features] 120 | Y = Y[:num_movies, :num_users] 121 | R = R[:num_movies, :num_users] 122 | 123 | # Evaluate Cost 124 | params = np.append(X.flatten(), Theta.flatten()) 125 | J, grad = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 0) 126 | np.testing.assert_almost_equal(22.22, J,decimal=2, err_msg="Incorrect unregularized error") 127 | 128 | # Gradient 129 | checkCostFunction(0) 130 | 131 | #Regularization 132 | J, grad = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 1.5) 133 | np.testing.assert_almost_equal(31.34, J,decimal=2, 134 | err_msg="Incorrect regularized cost") 135 | 136 | checkCostFunction(1.5) 137 | 138 | # Entering ratings for a new users 139 | movieList = pd.read_table("movie_ids.txt",encoding='latin-1',names=["Movie"]) 140 | movies = movieList.Movie.tolist() 141 | my_ratings = [0]*len(movies) 142 | 143 | # Check the file movie_idx.txt for id of each movie in our dataset 144 | # For example, Toy Story (1995) has ID 1, so to rate it "4", set 145 | my_ratings[0] = 4 146 | 147 | # Or suppose did not enjoy Silence of the Lambs (1991),set 148 | my_ratings[97] = 2 149 | 150 | # selected a few movies liked / did not like 151 | my_ratings[6] = 3 152 | my_ratings[11]= 5 153 | my_ratings[53]= 4 154 | my_ratings[63]= 5 155 | my_ratings[65]= 3 156 | my_ratings[68]= 5 157 | my_ratings[182]= 4 158 | my_ratings[225]= 5 159 | my_ratings[354]= 5 160 | 161 | for i in range(len(movies)): 162 | if my_ratings[i] > 0: 163 | print("User rated " + str(movies[i]) + ": " + str(my_ratings[i])) 164 | 165 | # Learning 166 | raw_mat = scipy.io.loadmat("ex8_movies.mat") 167 | R = raw_mat.get("R") # num movies x num users indicator matrix 168 | Y = raw_mat.get("Y") # num movies x num users ratings matrix 169 | 170 | # Add own ratings to Y 171 | ratings_col = np.array(my_ratings).reshape((-1,1)) 172 | Y = np.hstack((ratings_col, Y)) 173 | 174 | # Add indicators to R 175 | R = np.hstack((ratings_col !=0, R)) 176 | 177 | # Normalize 178 | Ynorm, Ymean = normalizeRatings(Y,R) 179 | 180 | # Useful values 181 | num_users = np.size(Y,1) 182 | num_movies = np.size(Y,0) 183 | num_features = 10 184 | 185 | # Set initial parameters 186 | X = np.random.normal(size=(num_movies, num_features)) 187 | Theta = np.random.normal(size=(num_users, num_features)) 188 | 189 | initial_parameters = np.append(X.flatten(), Theta.flatten()) 190 | reg = 10 191 | 192 | def reducedCofiCostFunc(p): 193 | 194 | return cofiCostFunc(p,Y, R, num_users, num_movies, num_features,reg) 195 | 196 | results = minimize(reducedCofiCostFunc, 197 | initial_parameters, 198 | method="CG", 199 | jac=True, 200 | options={'maxiter':100, "disp":True}) 201 | 202 | out_params = results.x 203 | 204 | # Unfold the returned parameters back into X and Theta 205 | X = np.reshape(out_params[:num_moves*num_features], (num_movies, num_features)) 206 | Theta = np.reshape(out_params[num_movies*num_features:], 207 | (num_users,num_features)) 208 | 209 | # Recommendation 210 | p = np.dot(X, Theta.T) 211 | my_predictions = p[:,0] + Ymean.T.flatten() 212 | sorted_predictions = np.sort(my_predictions) 213 | sorted_ix = my_predictions.ravel().argsort() 214 | 215 | print("\nTop recommendations for you:\n") 216 | for i in range(10): 217 | j = sorted_ix[-i] 218 | print("Predicting rating " + str(my_predictions[j]) + 219 | " for movie " + str(movies[j])) 220 | 221 | print("\n Original ratings provided: \n") 222 | for i in range(len(my_ratings)): 223 | if my_ratings[i] > 0: 224 | print("Rated " + str(my_ratings[i]) + " for " + str(movies[i])) 225 | -------------------------------------------------------------------------------- /ex8/ex8data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/ex8data1.mat -------------------------------------------------------------------------------- /ex8/ex8data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/ex8data2.mat -------------------------------------------------------------------------------- /ex8/i.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ex8/movie_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/movie_ids.txt -------------------------------------------------------------------------------- /ex8/token.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/token.mat --------------------------------------------------------------------------------