├── README.md
├── ex1
    ├── ex1.pdf
    ├── ex1.py
    ├── ex1data1.txt
    ├── ex1data2.txt
    └── token.mat
├── ex2
    ├── ex2.pdf
    ├── ex2data1.txt
    ├── ex2data2.txt
    ├── ex2part1.py
    ├── ex2part2.py
    ├── i.txt
    └── token.mat
├── ex3
    ├── ex3.pdf
    ├── ex3.py
    ├── ex3data1.mat
    └── ex3weights.mat
├── ex4
    ├── ex4.pdf
    ├── ex4.py
    ├── ex4data1.mat
    ├── ex4weights.mat
    └── token.mat
├── ex5
    ├── ex5.pdf
    ├── ex5.py
    ├── ex5data1.mat
    └── token.mat
├── ex6
    ├── emailSample1.txt
    ├── emailSample2.txt
    ├── ex6.pdf
    ├── ex6data1.mat
    ├── ex6data2.mat
    ├── ex6data3.mat
    ├── ex6spam.py
    ├── ex6svm.py
    ├── spamSample1.txt
    ├── spamSample2.txt
    ├── spamTest.mat
    ├── spamTrain.mat
    ├── token.mat
    └── vocab.txt
├── ex7
    ├── bird_small.mat
    ├── bird_small.png
    ├── ex7.pdf
    ├── ex7data1.mat
    ├── ex7data2.mat
    ├── ex7faces.mat
    ├── ex7kmeans.py
    ├── ex7pca.py
    └── token.mat
└── ex8
    ├── ex8_movieParams.mat
    ├── ex8_movies.mat
    ├── ex8anomaly_detection.py
    ├── ex8cofi.py
    ├── ex8data1.mat
    ├── ex8data2.mat
    ├── i.txt
    ├── movie_ids.txt
    └── token.mat


/README.md:
--------------------------------------------------------------------------------
 1 | # ml-andrewng-python
 2 | # This is the Python implementation of the programming assignments in Andrew Ng's online machine-learning course.
 3 | Contents:
 4 | 
 5 | Programming Assignment #1: Linear Regression (Week 2)
 6 | 
 7 | Programming Assignment #2: Logistic Regression (Week 3)
 8 | 
 9 | Programming Assignment #3: Multi-class Classification and Neural Networks (Week 4)
10 | 
11 | Programming Assignment #4: Neural Network Learning (Week 5)
12 | 
13 | Programming Assignment #5: Regularized Linear Regression and Bias/Variance (Week 6)
14 | 
15 | Programming Assignment #6: Support Vector Machines (Week 7)
16 | 
17 | Programming Assignment #7: K-Means Clustering and PCA (Week 8)
18 | 
19 | Programming Assignment #8: Anomaly Detection and Recommender Systems (Week 9)
20 | 


--------------------------------------------------------------------------------
/ex1/ex1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex1/ex1.pdf


--------------------------------------------------------------------------------
/ex1/ex1.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from mpl_toolkits.mplot3d import Axes3D
  4 | from matplotlib import cm
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | def warmUpExercise():
  8 | 	return np.eye(5)
  9 | 
 10 | 
 11 | def plotData(x, y):
 12 |     
 13 |     fig, ax = plt.subplots() # create empty figure
 14 |     ax.plot(x,y,'rx',markersize=10)
 15 |     ax.set_xlabel("Population of City in 10,000s")
 16 |     ax.set_ylabel("Profit in $10,000s")
 17 | 
 18 |     return fig
 19 | 
 20 | 
 21 | def normalEqn(X,y):
 22 |     
 23 |     return np.dot((np.linalg.inv(np.dot(X.T,X))),np.dot(X.T,y))
 24 | 
 25 | 
 26 | def gradientDescentMulti(X, y, theta, alpha, num_iters):
 27 |     
 28 |     m = len(y) # number of training examples
 29 |     J_history = np.zeros(num_iters)
 30 | 
 31 |     for i in range(num_iters):
 32 |         theta = theta - (alpha/m)*np.sum((np.dot(X,theta)-y)[:,None]*X,axis=0)
 33 |         J_history[i] = computeCost(X, y, theta)
 34 |         print('Cost function: ', J_history[i])
 35 |     
 36 |     return (theta,J_History)
 37 | 
 38 | 
 39 | def gradientDescent(X, y, theta, alpha, num_iters):
 40 |     
 41 |     m = len(y) # number of training examples
 42 |     J_history = np.zeros(num_iters)
 43 | 
 44 |     for i in range(num_iters):
 45 |         theta = theta - (alpha/m)*np.sum((np.dot(X,theta)-y)[:,None]*X,axis=0)
 46 |         J_history[i] = computeCost(X, y, theta)
 47 |         print('Cost function: ',J_history[i])
 48 |     
 49 |     return (theta, J_history)
 50 | 
 51 | 
 52 | def featureNormalize(X):
 53 |     return np.divide((X - np.mean(X,axis=0)),np.std(X,axis=0))
 54 | 
 55 | 
 56 | def computeCost(X, y, theta):
 57 |     m = len(y)
 58 |     J = (np.sum((np.dot(X,theta) - y)**2))/(2*m)
 59 |     return J
 60 | 
 61 | print('Running warmUpExercise ... \n')
 62 | print('5x5 Identity Matrix: \n')
 63 | 
 64 | print(warmUpExercise()) 
 65 | input('Program paused. Press enter to continue.\n')
 66 | 
 67 | print('Plotting Data ...\n')
 68 | data = pd.read_csv("ex1data1.txt",names=["X","y"])
 69 | x = np.array(data.X)[:,None] # population in 10,0000
 70 | y = np.array(data.y) # profit for a food truck
 71 | m = len(y) 
 72 | fig = plotData(x,y)
 73 | fig.show()
 74 | input('Program paused. Press enter to continue.\n')
 75 | print('Running Gradient Descent ...\n')
 76 | ones = np.ones_like(x) #an array of ones of same dimension as x
 77 | X = np.hstack((ones,x)) # Add a column of ones to x. hstack means stacking horizontally i.e. columnwise
 78 | theta = np.zeros(2) # initialize
 79 | iterations = 1500
 80 | alpha = 0.01
 81 | computeCost(X, y, theta)
 82 | theta, hist = gradientDescent(X, y, theta, alpha, iterations)
 83 | print('Theta found by gradient descent: ')
 84 | print(theta[0],"\n", theta[1])
 85 | 
 86 | # Plot the linear fit
 87 | plt.plot(x,y,'rx',x,np.dot(X,theta),'b-')
 88 | plt.legend(['Training Data','Linear Regression'])
 89 | plt.show()
 90 | 
 91 | # Predict values for population sizes of 35,000 and 70,000
 92 | predict1 = np.dot([1, 3.5],theta) # takes inner product to get y_bar
 93 | print('For population = 35,000, we predict a profit of ', predict1*10000)
 94 | 
 95 | predict2 = np.dot([1, 7],theta)
 96 | print('For population = 70,000, we predict a profit of ', predict2*10000)
 97 | input('Program paused. Press enter to continue.\n');
 98 | print('Visualizing J(theta_0, theta_1) ...\n')
 99 | 
100 | # Grid over which we will calculate J 
101 | theta0_vals = np.linspace(-10, 10, 100)
102 | theta1_vals = np.linspace(-1, 4, 100)
103 | J_vals = np.zeros((len(theta0_vals),len(theta1_vals)))
104 | 
105 | for i in range(len(theta0_vals)):
106 |     for j in range(len(theta1_vals)):
107 |         t = np.array([theta0_vals[i],theta1_vals[j]])
108 |         J_vals[i][j] = computeCost(X,y,t)
109 | """
110 | # Surface plot using J_Vals
111 | fig = plt.figure()
112 | ax = plt.subplot(111,projection='3d')
113 | Axes3D.plot_surface(ax,theta0_vals,theta1_vals,J_vals,cmap=cm.coolwarm)
114 | plt.show()
115 | 
116 | fig = plt.figure()
117 | ax = plt.subplot(111)
118 | plt.contour(theta0_vals,theta1_vals,J_vals) 
119 | """
120 | print('Loading data ...','\n')
121 | print('Plotting Data ...','\n')
122 | data = pd.read_csv("ex1data2.txt",names=["size","bedrooms","price"])
123 | s = np.array(data.size)
124 | r = np.array(data.bedrooms)
125 | p = np.array(data.price)
126 | m = len(r) 
127 | s = np.vstack(s)
128 | r = np.vstack(r)
129 | X = np.hstack((s,r))
130 | print('First 10 examples from the dataset: \n')
131 | print(" size = ", s[:10],"\n"," bedrooms = ", r[:10], "\n")
132 | input('Program paused. Press enter to continue.\n')
133 | print('Normalizing Features ...\n')
134 | X = featureNormalize(X)
135 | X = np.hstack((np.ones_like(s),X))
136 | 
137 | print('Running gradient descent ...\n')
138 | alpha = 0.05
139 | num_iters = 400
140 | theta = np.zeros(3)
141 | 
142 | # Multiple Dimension Gradient Descent
143 | theta, hist = gradientDescent(X, p, theta, alpha, num_iters)
144 | 
145 | # Plot convergence graph
146 | fig = plt.figure()
147 | ax = plt.subplot(111)
148 | plt.plot(np.arange(len(hist)),hist ,'-b')
149 | plt.xlabel('Number of iterations')
150 | plt.ylabel('Cost J')
151 | plt.show()
152 | 
153 | 
154 | print('Theta computed from gradient descent: \n')
155 | print(theta,'\n')
156 | 
157 | # Estimate the price of a 1650 sq-ft, 3 br house
158 | #the first column of X is all-ones.it doesnot need to be normalized.
159 | normalized_specs = np.array([1,((1650-s.mean())/s.std()),((3-r.mean())/r.std())])
160 | price = np.dot(normalized_specs,theta) 
161 | print('Predicted price of a 1650 sq-ft, 3 br house (using gradient descent):\n ',
162 |       price)
163 | input('Program paused. Press enter to continue.\n')
164 | 
165 | print('Solving with normal equations...\n')
166 | 
167 | data = pd.read_csv("ex1data2.txt",names=["sz","bed","price"])
168 | s = np.array(data.sz)
169 | r = np.array(data.bed)
170 | p = np.array(data.price)
171 | m = len(r) 
172 | s = np.vstack(s)
173 | r = np.vstack(r)
174 | X = np.hstack((s,r))
175 | X = np.hstack((np.ones_like(s),X))
176 | 
177 | theta = normalEqn(X, p)
178 | 
179 | print('Theta computed from the normal equations: \n')
180 | print(theta)
181 | print('\n')
182 | 
183 | # Estimate the price of a 1650 sq-ft, 3 br house
184 | price = np.dot([1,1650,3],theta) 
185 | 
186 | 
187 | print('Predicted price of a 1650 sq-ft, 3 br house (using normal equations): \n',
188 |        price)
189 | 


--------------------------------------------------------------------------------
/ex1/ex1data1.txt:
--------------------------------------------------------------------------------
 1 | 6.1101,17.592
 2 | 5.5277,9.1302
 3 | 8.5186,13.662
 4 | 7.0032,11.854
 5 | 5.8598,6.8233
 6 | 8.3829,11.886
 7 | 7.4764,4.3483
 8 | 8.5781,12
 9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 | 


--------------------------------------------------------------------------------
/ex1/ex1data2.txt:
--------------------------------------------------------------------------------
 1 | 2104,3,399900
 2 | 1600,3,329900
 3 | 2400,3,369000
 4 | 1416,2,232000
 5 | 3000,4,539900
 6 | 1985,4,299900
 7 | 1534,3,314900
 8 | 1427,3,198999
 9 | 1380,3,212000
10 | 1494,3,242500
11 | 1940,4,239999
12 | 2000,3,347000
13 | 1890,3,329999
14 | 4478,5,699900
15 | 1268,3,259900
16 | 2300,4,449900
17 | 1320,2,299900
18 | 1236,3,199900
19 | 2609,4,499998
20 | 3031,4,599000
21 | 1767,3,252900
22 | 1888,2,255000
23 | 1604,3,242900
24 | 1962,4,259900
25 | 3890,3,573900
26 | 1100,3,249900
27 | 1458,3,464500
28 | 2526,3,469000
29 | 2200,3,475000
30 | 2637,3,299900
31 | 1839,2,349900
32 | 1000,1,169900
33 | 2040,4,314900
34 | 3137,3,579900
35 | 1811,4,285900
36 | 1437,3,249900
37 | 1239,3,229900
38 | 2132,4,345000
39 | 4215,4,549000
40 | 2162,4,287000
41 | 1664,2,368500
42 | 2238,3,329900
43 | 2567,4,314000
44 | 1200,3,299000
45 | 852,2,179900
46 | 1852,4,299900
47 | 1203,3,239500
48 | 


--------------------------------------------------------------------------------
/ex1/token.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex1/token.mat


--------------------------------------------------------------------------------
/ex2/ex2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex2/ex2.pdf


--------------------------------------------------------------------------------
/ex2/ex2data1.txt:
--------------------------------------------------------------------------------
  1 | 34.62365962451697,78.0246928153624,0
  2 | 30.28671076822607,43.89499752400101,0
  3 | 35.84740876993872,72.90219802708364,0
  4 | 60.18259938620976,86.30855209546826,1
  5 | 79.0327360507101,75.3443764369103,1
  6 | 45.08327747668339,56.3163717815305,0
  7 | 61.10666453684766,96.51142588489624,1
  8 | 75.02474556738889,46.55401354116538,1
  9 | 76.09878670226257,87.42056971926803,1
 10 | 84.43281996120035,43.53339331072109,1
 11 | 95.86155507093572,38.22527805795094,0
 12 | 75.01365838958247,30.60326323428011,0
 13 | 82.30705337399482,76.48196330235604,1
 14 | 69.36458875970939,97.71869196188608,1
 15 | 39.53833914367223,76.03681085115882,0
 16 | 53.9710521485623,89.20735013750205,1
 17 | 69.07014406283025,52.74046973016765,1
 18 | 67.94685547711617,46.67857410673128,0
 19 | 70.66150955499435,92.92713789364831,1
 20 | 76.97878372747498,47.57596364975532,1
 21 | 67.37202754570876,42.83843832029179,0
 22 | 89.67677575072079,65.79936592745237,1
 23 | 50.534788289883,48.85581152764205,0
 24 | 34.21206097786789,44.20952859866288,0
 25 | 77.9240914545704,68.9723599933059,1
 26 | 62.27101367004632,69.95445795447587,1
 27 | 80.1901807509566,44.82162893218353,1
 28 | 93.114388797442,38.80067033713209,0
 29 | 61.83020602312595,50.25610789244621,0
 30 | 38.78580379679423,64.99568095539578,0
 31 | 61.379289447425,72.80788731317097,1
 32 | 85.40451939411645,57.05198397627122,1
 33 | 52.10797973193984,63.12762376881715,0
 34 | 52.04540476831827,69.43286012045222,1
 35 | 40.23689373545111,71.16774802184875,0
 36 | 54.63510555424817,52.21388588061123,0
 37 | 33.91550010906887,98.86943574220611,0
 38 | 64.17698887494485,80.90806058670817,1
 39 | 74.78925295941542,41.57341522824434,0
 40 | 34.1836400264419,75.2377203360134,0
 41 | 83.90239366249155,56.30804621605327,1
 42 | 51.54772026906181,46.85629026349976,0
 43 | 94.44336776917852,65.56892160559052,1
 44 | 82.36875375713919,40.61825515970618,0
 45 | 51.04775177128865,45.82270145776001,0
 46 | 62.22267576120188,52.06099194836679,0
 47 | 77.19303492601364,70.45820000180959,1
 48 | 97.77159928000232,86.7278223300282,1
 49 | 62.07306379667647,96.76882412413983,1
 50 | 91.56497449807442,88.69629254546599,1
 51 | 79.94481794066932,74.16311935043758,1
 52 | 99.2725269292572,60.99903099844988,1
 53 | 90.54671411399852,43.39060180650027,1
 54 | 34.52451385320009,60.39634245837173,0
 55 | 50.2864961189907,49.80453881323059,0
 56 | 49.58667721632031,59.80895099453265,0
 57 | 97.64563396007767,68.86157272420604,1
 58 | 32.57720016809309,95.59854761387875,0
 59 | 74.24869136721598,69.82457122657193,1
 60 | 71.79646205863379,78.45356224515052,1
 61 | 75.3956114656803,85.75993667331619,1
 62 | 35.28611281526193,47.02051394723416,0
 63 | 56.25381749711624,39.26147251058019,0
 64 | 30.05882244669796,49.59297386723685,0
 65 | 44.66826172480893,66.45008614558913,0
 66 | 66.56089447242954,41.09209807936973,0
 67 | 40.45755098375164,97.53518548909936,1
 68 | 49.07256321908844,51.88321182073966,0
 69 | 80.27957401466998,92.11606081344084,1
 70 | 66.74671856944039,60.99139402740988,1
 71 | 32.72283304060323,43.30717306430063,0
 72 | 64.0393204150601,78.03168802018232,1
 73 | 72.34649422579923,96.22759296761404,1
 74 | 60.45788573918959,73.09499809758037,1
 75 | 58.84095621726802,75.85844831279042,1
 76 | 99.82785779692128,72.36925193383885,1
 77 | 47.26426910848174,88.47586499559782,1
 78 | 50.45815980285988,75.80985952982456,1
 79 | 60.45555629271532,42.50840943572217,0
 80 | 82.22666157785568,42.71987853716458,0
 81 | 88.9138964166533,69.80378889835472,1
 82 | 94.83450672430196,45.69430680250754,1
 83 | 67.31925746917527,66.58935317747915,1
 84 | 57.23870631569862,59.51428198012956,1
 85 | 80.36675600171273,90.96014789746954,1
 86 | 68.46852178591112,85.59430710452014,1
 87 | 42.0754545384731,78.84478600148043,0
 88 | 75.47770200533905,90.42453899753964,1
 89 | 78.63542434898018,96.64742716885644,1
 90 | 52.34800398794107,60.76950525602592,0
 91 | 94.09433112516793,77.15910509073893,1
 92 | 90.44855097096364,87.50879176484702,1
 93 | 55.48216114069585,35.57070347228866,0
 94 | 74.49269241843041,84.84513684930135,1
 95 | 89.84580670720979,45.35828361091658,1
 96 | 83.48916274498238,48.38028579728175,1
 97 | 42.2617008099817,87.10385094025457,1
 98 | 99.31500880510394,68.77540947206617,1
 99 | 55.34001756003703,64.9319380069486,1
100 | 74.77589300092767,89.52981289513276,1
101 | 


--------------------------------------------------------------------------------
/ex2/ex2data2.txt:
--------------------------------------------------------------------------------
  1 | 0.051267,0.69956,1
  2 | -0.092742,0.68494,1
  3 | -0.21371,0.69225,1
  4 | -0.375,0.50219,1
  5 | -0.51325,0.46564,1
  6 | -0.52477,0.2098,1
  7 | -0.39804,0.034357,1
  8 | -0.30588,-0.19225,1
  9 | 0.016705,-0.40424,1
 10 | 0.13191,-0.51389,1
 11 | 0.38537,-0.56506,1
 12 | 0.52938,-0.5212,1
 13 | 0.63882,-0.24342,1
 14 | 0.73675,-0.18494,1
 15 | 0.54666,0.48757,1
 16 | 0.322,0.5826,1
 17 | 0.16647,0.53874,1
 18 | -0.046659,0.81652,1
 19 | -0.17339,0.69956,1
 20 | -0.47869,0.63377,1
 21 | -0.60541,0.59722,1
 22 | -0.62846,0.33406,1
 23 | -0.59389,0.005117,1
 24 | -0.42108,-0.27266,1
 25 | -0.11578,-0.39693,1
 26 | 0.20104,-0.60161,1
 27 | 0.46601,-0.53582,1
 28 | 0.67339,-0.53582,1
 29 | -0.13882,0.54605,1
 30 | -0.29435,0.77997,1
 31 | -0.26555,0.96272,1
 32 | -0.16187,0.8019,1
 33 | -0.17339,0.64839,1
 34 | -0.28283,0.47295,1
 35 | -0.36348,0.31213,1
 36 | -0.30012,0.027047,1
 37 | -0.23675,-0.21418,1
 38 | -0.06394,-0.18494,1
 39 | 0.062788,-0.16301,1
 40 | 0.22984,-0.41155,1
 41 | 0.2932,-0.2288,1
 42 | 0.48329,-0.18494,1
 43 | 0.64459,-0.14108,1
 44 | 0.46025,0.012427,1
 45 | 0.6273,0.15863,1
 46 | 0.57546,0.26827,1
 47 | 0.72523,0.44371,1
 48 | 0.22408,0.52412,1
 49 | 0.44297,0.67032,1
 50 | 0.322,0.69225,1
 51 | 0.13767,0.57529,1
 52 | -0.0063364,0.39985,1
 53 | -0.092742,0.55336,1
 54 | -0.20795,0.35599,1
 55 | -0.20795,0.17325,1
 56 | -0.43836,0.21711,1
 57 | -0.21947,-0.016813,1
 58 | -0.13882,-0.27266,1
 59 | 0.18376,0.93348,0
 60 | 0.22408,0.77997,0
 61 | 0.29896,0.61915,0
 62 | 0.50634,0.75804,0
 63 | 0.61578,0.7288,0
 64 | 0.60426,0.59722,0
 65 | 0.76555,0.50219,0
 66 | 0.92684,0.3633,0
 67 | 0.82316,0.27558,0
 68 | 0.96141,0.085526,0
 69 | 0.93836,0.012427,0
 70 | 0.86348,-0.082602,0
 71 | 0.89804,-0.20687,0
 72 | 0.85196,-0.36769,0
 73 | 0.82892,-0.5212,0
 74 | 0.79435,-0.55775,0
 75 | 0.59274,-0.7405,0
 76 | 0.51786,-0.5943,0
 77 | 0.46601,-0.41886,0
 78 | 0.35081,-0.57968,0
 79 | 0.28744,-0.76974,0
 80 | 0.085829,-0.75512,0
 81 | 0.14919,-0.57968,0
 82 | -0.13306,-0.4481,0
 83 | -0.40956,-0.41155,0
 84 | -0.39228,-0.25804,0
 85 | -0.74366,-0.25804,0
 86 | -0.69758,0.041667,0
 87 | -0.75518,0.2902,0
 88 | -0.69758,0.68494,0
 89 | -0.4038,0.70687,0
 90 | -0.38076,0.91886,0
 91 | -0.50749,0.90424,0
 92 | -0.54781,0.70687,0
 93 | 0.10311,0.77997,0
 94 | 0.057028,0.91886,0
 95 | -0.10426,0.99196,0
 96 | -0.081221,1.1089,0
 97 | 0.28744,1.087,0
 98 | 0.39689,0.82383,0
 99 | 0.63882,0.88962,0
100 | 0.82316,0.66301,0
101 | 0.67339,0.64108,0
102 | 1.0709,0.10015,0
103 | -0.046659,-0.57968,0
104 | -0.23675,-0.63816,0
105 | -0.15035,-0.36769,0
106 | -0.49021,-0.3019,0
107 | -0.46717,-0.13377,0
108 | -0.28859,-0.060673,0
109 | -0.61118,-0.067982,0
110 | -0.66302,-0.21418,0
111 | -0.59965,-0.41886,0
112 | -0.72638,-0.082602,0
113 | -0.83007,0.31213,0
114 | -0.72062,0.53874,0
115 | -0.59389,0.49488,0
116 | -0.48445,0.99927,0
117 | -0.0063364,0.99927,0
118 | 0.63265,-0.030612,0
119 | 


--------------------------------------------------------------------------------
/ex2/ex2part1.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import pandas as pd
  4 | from scipy.optimize import minimize
  5 | 
  6 | 
  7 | def plotData(X, y):
  8 |     pos = X[np.where(y==1)]
  9 |     neg = X[np.where(y==0)]
 10 |     fig, ax = plt.subplots()
 11 |     ax.plot(pos[:,0],pos[:,1],"k+",neg[:,0],neg[:,1],"yo")
 12 |     return (fig, ax)
 13 | 
 14 | def costFunction(theta,X,y):
 15 |     m = len(y) 
 16 |     J =(np.sum(-y*np.log(sigmoid(np.dot(X,theta)))-
 17 |        (1-y)*(np.log(1-sigmoid(np.dot(X,theta)))))/m)
 18 |     grad = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m)
 19 |       return (J, grad)
 20 | 
 21 | def sigmoid(z):
 22 |     return 1.0/(1 +  np.e**(-z))
 23 | 
 24 | 
 25 | def predict(theta,X):
 26 |     """
 27 |     Given a vector of parameter results and training set X,
 28 |     returns the model prediction for admission. If predicted
 29 |     probability of admission is greater than .5, predict will
 30 |     return a value of 1.
 31 |     """
 32 |     return np.where(np.dot(X,theta) > 5.,1,0)
 33 | 
 34 | def mapFeatureVector(X1,X2):
 35 |     """
 36 |     Feature mapping function to polynomial features. Maps the two features
 37 |     X1,X2 to quadratic features used in the regularization exercise. X1, X2
 38 |     must be the same size.returns new feature array with interactions and quadratic terms
 39 |     """
 40 |     
 41 |     degree = 6
 42 |     output_feature_vec = np.ones(len(X1))[:,None]
 43 | 
 44 |     for i in range(1,7):
 45 |         for j in range(i+1):
 46 |             new_feature = np.array(X1**(i-j)*X2**j)[:,None]
 47 |             output_feature_vec = np.hstack((output_feature_vec,new_feature))
 48 |    
 49 |     return output_feature_vec
 50 | 
 51 | 
 52 | def costFunctionReg(theta,X,y,reg_param):
 53 |     m = len(y) 
 54 |     J =((np.sum(-y*np.log(sigmoid(np.dot(X,theta)))-
 55 |        (1-y)*(np.log(1-sigmoid(np.dot(X,theta))))))/m +
 56 |        (reg_param/m)*np.sum(theta**2))
 57 | 
 58 |     # Non-regularized 
 59 |     grad_0 = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m)
 60 |     
 61 |     # Regularized
 62 |     grad_reg = grad_0 + (reg_param/m)*theta
 63 |     # Replace gradient for theta_0 with non-regularized gradient
 64 |     grad_reg[0] = grad_0[0] 
 65 |     
 66 |     return J
 67 | 
 68 | 
 69 | def plotDecisionBoundary(theta,X,y):
 70 |     """X is asssumed to be either:
 71 |         1) Mx3 matrix where the first column is all ones for the intercept
 72 |         2) MxN with N>3, where the first column is all ones
 73 |     """
 74 |     fig, ax = plotData(X[:,1:],y)
 75 |     """
 76 |     if len(X[0]<=3):
 77 |         # Choose two endpoints and plot the line between them
 78 |         plot_x = np.array([min(X[:,1])-2,max(X[:,2])+2])
 79 |         ax.plot(plot_x,plot_y)
 80 |         ax.legend(['Admitted','Fail','Pass'])
 81 |         ax.set_xbound(30,100)
 82 |         ax.set_ybound(30,100)
 83 |     else:
 84 |     """
 85 | 
 86 |     # Create grid space
 87 |     u = np.linspace(-1,1.5,50)
 88 |     v = np.linspace(-1,1.5,50)
 89 |     z = np.zeros((len(u),len(v)))
 90 |     
 91 |     # Evaluate z = theta*x over values in the gridspace
 92 |     for i in range(len(u)):
 93 |         for j in range(len(v)):
 94 |             z[i][j] = np.dot(mapFeatureVector(np.array([u[i]]),
 95 | 		      np.array([v[j]])),theta)
 96 |     
 97 |     # Plot contour
 98 |     ax.contour(u,v,z,levels=[0])
 99 | 
100 |     return (fig,ax)
101 | 	
102 | ## Load Data
103 | #  The first two columns contains the exam scores and the third column
104 | #  contains the label.
105 | 
106 | data = pd.read_csv('ex2data1.txt', names=['x1','x2','y'])
107 | X = np.asarray(data[["x1","x2"]])
108 | y = np.asarray(data["y"])
109 | 
110 | print("Plotting data with + indicating (y = 1) examples and o indicating",
111 | " (y =0) examples.")
112 | fig, ax = plotData(X, y)
113 | ax.legend(['Admitted', 'Not admitted'])
114 | fig.show()
115 | input('\nProgram paused. Press enter to continue.\n')
116 | 
117 | # Add intercept term to x and X_test
118 | X = np.hstack((np.ones_like(y)[:,None],X))
119 | initial_theta = np.zeros(3)
120 | cost, grad = costFunction(initial_theta, X, y)
121 | 
122 | print('Cost at initial theta (zeros): \n', cost)
123 | print('Gradient at initial theta (zeros): \n',grad)
124 | 
125 | input('\nProgram paused. Press enter to continue.')
126 | 
127 | res = minimize(costFunction,
128 | 	       initial_theta,
129 | 	       method='Newton-CG',
130 | 	       args=(X,y),
131 | 	       jac=True, 
132 | 	       options={'maxiter':400,
133 | 			'disp':True})
134 | 
135 | theta = res.x
136 | print('Cost at theta found by minimize: \n', res.fun)
137 | print('theta: \n', theta)
138 | plotDecisionBoundary(theta, X, y)
139 | input('\nProgram paused. Press enter to continue.\n')
140 | 
141 | # In this part, you will use the logistic regression model
142 | #  to predict the probability that a student with score 45 on exam 1 and 
143 | #  score 85 on exam 2 will be admitted.
144 | #  Furthermore, you will compute the training and test set accuracies of 
145 | #  our model.
146 | 
147 | prob = sigmoid(np.dot([1,45,85],theta))
148 | print('For a student with scores 45 and 85, we predict an ',
149 |       'admission probability of ', prob)
150 | 
151 | # Compute accuracy on our training set
152 | p = predict(theta, X)
153 | 
154 | print('Train Accuracy: \n', np.mean(p==y)*100)
155 | 
156 | input('Program paused. Press enter to continue.\n')
157 | 


--------------------------------------------------------------------------------
/ex2/ex2part2.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import pandas as pd
  4 | from scipy.optimize import minimize
  5 | 
  6 | 
  7 | def plotData(X, y):
  8 |     pos = X[np.where(y==1)]
  9 |     neg = X[np.where(y==0)]
 10 |     fig, ax = plt.subplots()
 11 |     ax.plot(pos[:,0],pos[:,1],"k+",neg[:,0],neg[:,1],"yo")
 12 |     return (fig, ax)
 13 | 
 14 | def costFunction(theta,X,y):
 15 |     
 16 |     m = len(y) 
 17 |     J =(np.sum(-y*np.log(sigmoid(np.dot(X,theta)))-
 18 |        (1-y)*(np.log(1-sigmoid(np.dot(X,theta)))))/m)
 19 |     grad = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m)
 20 |     return (J, grad)
 21 | 
 22 | def sigmoid(z):
 23 |      return 1.0/(1 +  np.e**(-z))
 24 | 
 25 | 
 26 | def predict(theta,X):
 27 |     return np.where(np.dot(X,theta) > 5.,1,0)
 28 | 
 29 | def mapFeatureVector(X1,X2):
 30 |     degree = 6
 31 |     output_feature_vec = np.ones(len(X1))[:,None]
 32 |     for i in range(1,7):
 33 |         for j in range(i+1):
 34 |             new_feature = np.array(X1**(i-j)*X2**j)[:,None]
 35 |             output_feature_vec = np.hstack((output_feature_vec,new_feature))
 36 |    
 37 |     return output_feature_vec
 38 | 
 39 | 
 40 | def costFunctionReg(theta,X,y,reg_param):
 41 |     m = len(y)
 42 |     J =((np.sum(-y*np.log(sigmoid(np.dot(X,theta)))-
 43 |        (1-y)*(np.log(1-sigmoid(np.dot(X,theta))))))/m +
 44 |        (reg_param/m)*np.sum(theta**2)
 45 |     # Non-regularized 
 46 |     grad_0 = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m)
 47 |     # Regularized
 48 |     grad_reg = grad_0 + (reg_param/m)*theta
 49 |     grad_reg[0] = grad_0[0] 
 50 |     
 51 |     return J
 52 | 
 53 | 
 54 | def plotDecisionBoundary(theta,X,y):
 55 |     fig, ax = plotData(X[:,1:],y)
 56 |     
 57 |     """
 58 |     if len(X[0]<=3):
 59 |         # Choose two endpoints and plot the line between them
 60 |         plot_x = np.array([min(X[:,1])-2,max(X[:,2])+2])
 61 |         # Calculate the decision boundary line
 62 |         # Add boundary and adjust axes
 63 |         ax.plot(plot_x,plot_y)
 64 |         ax.legend(['Admitted','Fail','Pass'])
 65 |         ax.set_xbound(30,100)
 66 |         ax.set_ybound(30,100)
 67 |     else:
 68 |     """
 69 | 
 70 |     u = np.linspace(-1,1.5,50)
 71 |     v = np.linspace(-1,1.5,50)
 72 |     z = np.zeros((len(u),len(v)))
 73 |     for i in range(len(u)):
 74 |         for j in range(len(v)):
 75 |             z[i][j] = np.dot(mapFeatureVector(np.array([u[i]]),
 76 | 		      np.array([v[j]])),theta)
 77 | 
 78 |     ax.contour(u,v,z,levels=[0])
 79 | 
 80 |     return (fig,ax)
 81 | 	
 82 | ## Load Data
 83 | data = pd.read_csv('ex2data2.txt', names=['x1','x2','y'])
 84 | X = np.asarray(data[["x1","x2"]])
 85 | y = np.asarray(data["y"])
 86 | fig, ax = plotData(X, y)
 87 | 
 88 | ax.legend(['Pass', 'Fail'])
 89 | 
 90 | # Labels
 91 | ax.set_xlabel('Microchip test 1')
 92 | ax.set_ylabel('Microchip test 2')
 93 | fig.show()
 94 | 
 95 | input('\nProgram paused. Press enter to continue.\n')
 96 | 
 97 | ## Part 1 -- Regularized Logistic Regression
 98 | X = mapFeatureVector(X[:,0],X[:,1])
 99 | initial_theta = np.zeros(len(X[0,:]))
100 | 
101 | # Set regularization parameter to 1
102 | reg_param = 1.0
103 | 
104 | # Optimize for theta letting python choose method
105 | res = minimize(costFunctionReg,
106 | 	       initial_theta,
107 | 	       args=(X,y,reg_param),
108 | 	       tol=1e-6,
109 | 	       options={'maxiter':400,
110 | 			'disp':True})
111 | 
112 | 
113 | theta = res.x
114 | fig.clear()
115 | fig, ax = plotDecisionBoundary(theta,X,y)
116 | 
117 | ax.legend(['Pass', 'Fail','Decision Boundary'])
118 | 
119 | # Labels
120 | ax.set_xlabel('Microchip test 1')
121 | ax.set_ylabel('Microchip test 2')
122 | ax.set_title('Lambda = 1')
123 | 
124 | fig.show()
125 | 
126 | input('\nProgram paused. Press enter to continue.\n')
127 | 


--------------------------------------------------------------------------------
/ex2/i.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/ex2/token.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex2/token.mat


--------------------------------------------------------------------------------
/ex3/ex3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex3/ex3.pdf


--------------------------------------------------------------------------------
/ex3/ex3.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from scipy.optimize import minimize
  4 | import multiprocessing as mp
  5 | import pandas as pd
  6 | import scipy.io
  7 | 
  8 | def displayData(X):
  9 |     fig, ax = plt.subplots(10,10,sharex=True,sharey=True)
 10 |     img_num = 0
 11 |     for i in range(10):
 12 |         for j in range(10):
 13 |             # Convert column vector into 20x20 pixel matrix
 14 |             # You have to transpose to display correctly
 15 |             img = X[img_num,:].reshape(20,20).T
 16 |             ax[i][j].imshow(img,cmap='gray')
 17 |             img_num += 1
 18 | 
 19 |     return (fig, ax)
 20 | 
 21 | def displayImage(im):
 22 |     """
 23 |     Displays a single image stored as a column vector
 24 |     """
 25 |     fig2, ax2 = plt.subplots()
 26 |     image = im.reshape(20,20).T
 27 |     ax2.imshow(image,cmap='gray')
 28 |     return (fig2, ax2)
 29 | 
 30 | def sigmoid(z):
 31 |     return 1.0/(1 +  np.e**(-z))
 32 | 
 33 | def lrCostFunction(theta,X,y,reg_param):
 34 |     m = len(y) 
 35 |     J =((np.sum(-y*np.log(sigmoid(np.dot(X,theta)))-
 36 |        (1-y)*(np.log(1-sigmoid(np.dot(X,theta))))))/m +
 37 |        (reg_param/m)*np.sum(theta**2))   
 38 |     # Gradient
 39 |     # Non-regularized 
 40 |     grad_0 = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m)
 41 |     # Regularized
 42 |     grad_reg = grad_0 + (reg_param/m)*theta
 43 |     grad_reg[0] = grad_0[0] 
 44 |     return (J,grad_reg)
 45 | 
 46 | def oneVsAll(X, y, num_labels, reg_param):
 47 |     #Calculates parameters 
 48 |     n = np.size(X,1)
 49 |     theta = np.zeros((n,num_labels))
 50 |     # Function to find parameters for single logit
 51 |     def findOptParam(p_num):
 52 |         outcome = np.array(y == p_num).astype(int)
 53 |         initial_theta = theta[:,p_num]
 54 |         results = minimize(lrCostFunction,
 55 | 			   initial_theta,
 56 |                            method='Newton-CG',
 57 |                            args=(X,outcome,reg_param),
 58 |                            jac=True,
 59 | 		           tol=1e-6,
 60 |                            options={'maxiter':400,
 61 |                                     'disp':True})
 62 |         theta[:,p_num] = results.x
 63 |     
 64 |     
 65 |     for digit in range(10):
 66 |         findOptParam(digit)
 67 |     
 68 |     return theta
 69 | 
 70 | 
 71 | def predictOneVsAllAccuracy(est_theta,X):
 72 |     """
 73 |     classifies each observation by using the
 74 |     highest predicted probability from possible classifications.
 75 |     """
 76 | 
 77 |     probs = np.dot(X,est_theta)
 78 |     predict = np.argmax(probs,axis=1)
 79 |     
 80 |     return predict
 81 | 
 82 | 
 83 | def predict(theta1,theta2,X):
 84 |     m = len(X) 
 85 |     if np.ndim(X) == 1:
 86 |         X = X.reshape((-1,1))     #one dimensional or not
 87 |     D1 = np.hstack((np.ones((m,1)),X))
 88 |    
 89 |     #hidden layer from theta1 parameters
 90 |     hidden_pred = np.dot(D1,theta1.T) # (5000 x 401) x (401 x 25) = 5000 x 25
 91 |     ones = np.ones((len(hidden_pred),1)) # 5000 x 1
 92 |     hidden_pred = sigmoid(hidden_pred)
 93 |     hidden_pred = np.hstack((ones,hidden_pred)) # 5000 x 26
 94 |     
 95 |     #output layer from new design matrix
 96 |     output_pred = np.dot(hidden_pred,theta2.T) # (5000 x 26) x (26 x 10)    
 97 |     output_pred = sigmoid(output_pred)
 98 |     # Get predictions
 99 |     p = np.argmax(output_pred,axis=1)
100 |     
101 |     return p
102 | #parameters	
103 | input_layer_size = 400
104 | num_labels = 10
105 | 
106 | print("Loading training data...")
107 | raw_mat = scipy.io.loadmat("ex3data1.mat")
108 | X = raw_mat.get("X")
109 | y = raw_mat.get("y").flatten()
110 | y[y== 10] = 0
111 | 
112 | m = np.hstack((np.ones((len(y),1)),X))# add column of ones
113 | # Randomly select 100 datapoints to display
114 | rand_indices = np.random.randint(0,len(m),100)
115 | sel = X[rand_indices,:] 
116 | 
117 | # Display 
118 | digit_grid, ax = displayData(sel)
119 | digit_grid.show()
120 | 
121 | input("Program paused, press enter to continue...")
122 | 
123 | #  Vectorize Logistic Regression 
124 | reg_param = 1.0
125 | theta = oneVsAll(m,y,10,reg_param)
126 | 
127 | predictions = predictOneVsAllAccuracy(theta,m)
128 | accuracy = np.mean(y == predictions) * 100
129 | print("Training Accuracy with logit: ", accuracy, "%")
130 | input("Program pauses, press enter to continue...")
131 | 
132 | #Neural Network
133 | # Load pre-estimated weights
134 | print("Loading saved neural networks parameters...")
135 | raw_params = scipy.io.loadmat("ex3weights.mat")
136 | theta1 = raw_params.get("Theta1") # 25 x 401
137 | theta2 = raw_params.get("Theta2") # 10 x 26
138 | # Parameters in theta1,theta2 are based on 1 indexing. add 1 and take the mod w.r.t. 10 so 10s become zeros and everything else gets bumped up one.
139 | 
140 | predictions = (predict(theta1,theta2,X) + 1) % 10
141 | accuracy = np.mean(y == predictions) * 100
142 | print("Training Accuracy with neural network: ", accuracy, "%")
143 | 


--------------------------------------------------------------------------------
/ex3/ex3data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex3/ex3data1.mat


--------------------------------------------------------------------------------
/ex3/ex3weights.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex3/ex3weights.mat


--------------------------------------------------------------------------------
/ex4/ex4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex4/ex4.pdf


--------------------------------------------------------------------------------
/ex4/ex4.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from scipy.optimize import minimize
  4 | import scipy.io
  5 | 
  6 | 
  7 | def displayData(X):
  8 |     
  9 |     num_plots = int(np.size(X,0)**.5)
 10 |     fig, ax = plt.subplots(num_plots,num_plots,sharex=True,sharey=True)
 11 |     img_num = 0
 12 |     for i in range(num_plots):
 13 |         for j in range(num_plots):
 14 |             # Convert column vector into 20x20 pixel matrix
 15 |             # transpose
 16 |             img = X[img_num,:].reshape(20,20).T
 17 |             ax[i][j].imshow(img,cmap='gray')
 18 |             img_num += 1
 19 | 
 20 |     return (fig, ax)
 21 | 
 22 | def displayImage(im):
 23 |     fig2, ax2 = plt.subplots()
 24 |     image = im.reshape(20,20).T
 25 |     ax2.imshow(image,cmap='gray')
 26 |     return (fig2, ax2)
 27 | 
 28 | def sigmoid(z):
 29 |     return 1.0/(1 +  np.e**(-z))
 30 | 
 31 | def sigmoidGradient(z):
 32 |     return sigmoid(z)*(1-sigmoid(z))
 33 | 
 34 | def predict(theta1,theta2,X):
 35 |     m = len(X) 
 36 | 
 37 |     if np.ndim(X) == 1:
 38 |         X = X.reshape((-1,1))
 39 |     
 40 |     D1 = np.hstack((np.ones((m,1)),X))# add column of ones
 41 |    
 42 |     #  hidden layer
 43 |     hidden_pred = np.dot(D1,theta1.T) # (5000 x 401) x (401 x 25) = 5000 x 25
 44 |     
 45 |     # Add column of ones 
 46 |     ones = np.ones((len(hidden_pred),1)) # 5000 x 1
 47 |     hidden_pred = sigmoid(hidden_pred)
 48 |     hidden_pred = np.hstack((ones,hidden_pred)) # 5000 x 26
 49 |     
 50 |     #output layer 
 51 |     output_pred = np.dot(hidden_pred,theta2.T) # (5000 x 26) x (26 x 10)    
 52 |     output_pred = sigmoid(output_pred)
 53 |     # Get predictions
 54 |     p = np.argmax(output_pred,axis=1)
 55 |     
 56 |     return p
 57 | 
 58 | def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
 59 |                    X,y,reg_param):
 60 |     """
 61 |     Computes loss using sum of square errors for a neural network
 62 |     using theta as the parameter vector for linear regression to fit 
 63 |     the data points in X and y with penalty reg_param.
 64 |     """
 65 |     m = len(y)    
 66 |     # Reshape nn_params back into neural network
 67 |     theta1 = nn_params[:(hidden_layer_size * 
 68 | 			           (input_layer_size + 1))].reshape((hidden_layer_size, 
 69 | 							                             input_layer_size +1)) 1))
 70 |   
 71 |     theta2 = nn_params[-((hidden_layer_size + 1) * 
 72 |                           num_labels):].reshape((num_labels,
 73 | 					                             hidden_layer_size + 1))
 74 |    
 75 |     # Turn scalar y values into a matrix of binary 
 76 |     init_y = np.zeros((m,num_labels)) # 5000 x 10
 77 |  
 78 |     for i in range(m):
 79 |         init_y[i][y[i]] = 1
 80 | 
 81 |     # Add column of ones to X
 82 |     ones = np.ones((m,1)) 
 83 |     d = np.hstack((ones,X))# add column of ones
 84 |  
 85 |     # forward propogation with theta1 and theta2
 86 |     cost = [0]*m
 87 |     # Initalize gradient vector
 88 |     D1 = np.zeros_like(theta1)
 89 |     D2 = np.zeros_like(theta2)
 90 |     for i in range(m):
 91 | 	
 92 |         a1 = d[i][:,None] # 401 x 1
 93 |         z2 = np.dot(theta1,a1) # 25 x 1 
 94 |         a2 = sigmoid(z2) # 25 x 1
 95 |         a2 = np.vstack((np.ones(1),a2)) # 26 x 1
 96 |         z3 = np.dot(theta2,a2) #10 x 1
 97 |         h = sigmoid(z3) # 10 x 1
 98 |         a3 = h # 10 x 1
 99 |         cost[i] = (np.sum((-init_y[i][:,None])*(np.log(h)) -
100 | 	              (1-init_y[i][:,None])*(np.log(1-h))))/m
101 | 	
102 | 	# Calculate Gradient
103 |         d3 = a3 - init_y[i][:,None]
104 |         d2 = np.dot(theta2.T,d3)[1:]*(sigmoidGradient(z2))
105 | 	
106 |         # Accumulate errors for gradient calculation
107 |         D1 = D1 + np.dot(d2,a1.T) # 25 x 401 (matches theta0)
108 |         D2 = D2 + np.dot(d3,a2.T) # 10 x 26 (matches theta1)
109 | 
110 |     # regularization
111 |     reg = (reg_param/(2*m))*((np.sum(theta1[:,1:]**2)) + 
112 | 	      (np.sum(theta2[:,1:]**2)))
113 |     
114 |     # Compute final gradient with regularization
115 |     grad1 = (1.0/m)*D1 + (reg_param/m)*theta1
116 |     grad1[0] = grad1[0] - (reg_param/m)*theta1[0]
117 |     
118 |     grad2 = (1.0/m)*D2 + (reg_param/m)*theta2
119 |     grad2[0] = grad2[0] - (reg_param/m)*theta2[0]
120 |     
121 |     # Append and unroll gradient
122 |     grad = np.append(grad1,grad2).reshape(-1)
123 |     final_cost = sum(cost) + reg
124 | 
125 |     return (final_cost, grad)
126 | 
127 | 
128 | 
129 | def randInitializeWeights(L_in,L_out):
130 |     """
131 |     Randomly initalize the weights of a layer with L_in incoming
132 |     connections and L_out outgoing connections. Avoids symmetry
133 |     problems when training the neural network.
134 |     """
135 |     randWeights = np.random.uniform(low=-.12,high=.12,
136 |                                     size=(L_in,L_out))
137 |     return randWeights
138 | 
139 | def debugInitializeWeights(fan_in, fan_out):
140 |     """
141 |     Initializes the weights of a layer with fan_in incoming connections and
142 |     fan_out outgoing connections using a fixed set of values.
143 |     """
144 |     
145 |     # Set W to zero matrix
146 |     W = np.zeros((fan_out,fan_in + 1))
147 | 
148 |     # Initialize W using "sin". This ensures that W is always of the same
149 |     # values and will be useful in debugging.
150 |     W = np.array([np.sin(w) for w in 
151 |                  range(np.size(W))]).reshape((np.size(W,0),np.size(W,1)))
152 |     
153 |     return W
154 | 
155 | def computeNumericalGradient(J,theta):
156 |     """
157 |     Computes the gradient of J around theta using finite differences and 
158 |     yields a numerical estimate of the gradient.
159 |     """
160 |     
161 |     numgrad = np.zeros_like(theta)
162 |     perturb = np.zeros_like(theta)
163 |     tol = 1e-4
164 |     
165 |     for p in range(len(theta)):
166 |         # Set perturbation vector
167 |         perturb[p] = tol
168 |         loss1 = J(theta - perturb)
169 |         loss2 = J(theta + perturb)
170 | 	
171 |         # Compute numerical gradient
172 |         numgrad[p] = (loss2 - loss1)/(2 * tol)
173 |         perturb[p] = 0
174 | 
175 | 	
176 |     return numgrad
177 | 
178 | def checkNNGradients(reg_param):
179 |     """
180 |     Creates a small neural network to check the back propogation gradients.
181 |     Outputs the analytical gradients produced by the back prop code and the
182 |     numerical gradients computed using the computeNumericalGradient function.
183 |     These should result in very similar values.
184 |     """
185 |     # Set up small NN
186 |     input_layer_size = 3
187 |     hidden_layer_size = 5
188 |     num_labels = 3
189 |     m = 5
190 | 
191 |     # Generate some random test data
192 |     Theta1 = debugInitializeWeights(hidden_layer_size,input_layer_size)
193 |     Theta2 = debugInitializeWeights(num_labels,hidden_layer_size)
194 | 
195 |     # Reusing debugInitializeWeights to get random X
196 |     X = debugInitializeWeights(input_layer_size - 1, m)
197 | 
198 |     # Set each element of y to be in [0,num_labels]
199 |     y = [(i % num_labels) for i in range(m)]
200 | 
201 |     # Unroll parameters
202 |     nn_params = np.append(Theta1,Theta2).reshape(-1)
203 | 
204 |     # Compute Cost
205 |     cost, grad = nnCostFunction(nn_params,
206 |                                 input_layer_size,
207 |                 				hidden_layer_size,
208 |                 				num_labels,
209 |                 				X, y, reg_param)
210 | 
211 |     def reduced_cost_func(p):
212 |         """ Cheaply decorated nnCostFunction """
213 |         return nnCostFunction(p,input_layer_size,hidden_layer_size,num_labels,
214 |                               X,y,reg_param)[0]
215 | 
216 |     numgrad = computeNumericalGradient(reduced_cost_func,nn_params)
217 | 
218 |     # Check two gradients
219 |     np.testing.assert_almost_equal(grad, numgrad)
220 | 
221 |     return
222 | 
223 | 
224 | input_layer_size = 400
225 | hidden_layer_size = 25
226 | num_labels = 10
227 | 
228 | print("Loading training data...")
229 | 
230 | raw_mat = scipy.io.loadmat("ex4data1.mat")
231 | X = raw_mat.get("X")
232 | y = raw_mat.get("y").flatten()
233 | y = (y - 1) % 10 # ex3 way of converting MATLAB 1-indexing
234 | 
235 | # Randomly select 100 datapoints to display
236 | rand_indices = np.random.randint(0,len(X),100)
237 | sel = X[rand_indices,:] 
238 | 
239 | digit_grid, ax = displayData(sel)
240 | digit_grid.show()
241 | 
242 | print("Loading neural network parameters \n")
243 | 
244 | raw_params = scipy.io.loadmat("ex4weights.mat")
245 | theta1 = raw_params.get("Theta1") # 25 x 401
246 | theta2 = raw_params.get("Theta2") # 10 x 26
247 | 
248 | # Unroll Parameters
249 | nn_params = np.append(theta1,theta2).reshape(-1)
250 | 
251 | print("Checking cost function without regularization...")
252 | reg_param = 0.0
253 | cost, g = nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,
254 | 		                     X,y,reg_param)
255 | 
256 | # Test for correct cost
257 | np.testing.assert_almost_equal(0.287629,cost,decimal=6, err_msg="Cost incorrect.")
258 | 
259 | # Regularized 
260 | print("Checking cost function with regularization...")
261 | reg_param = 1.0
262 | reg_cost, g = nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,
263 | 		                        X,y,reg_param)
264 | np.testing.assert_almost_equal(0.383770,reg_cost,decimal=6, 
265 |                                err_msg="Regularized Cost incorrect.")
266 | 
267 | print("Checking sigmoid gradient...")
268 | vals = np.array([1,-0.5,0,0.5,1])
269 | g = sigmoidGradient(vals)
270 | np.testing.assert_almost_equal(0.25, g[2],decimal=2, err_msg="Sigmoid function incorrect")
271 | 
272 | # Initialize neural network parameters
273 | print("Initializing neural network parameters...")
274 | initial_theta1 = randInitializeWeights(input_layer_size+1,hidden_layer_size)
275 | initial_theta2 = randInitializeWeights(hidden_layer_size+1,num_labels)
276 | 
277 | # Unroll 
278 | initial_nn_params = np.append(initial_theta1,initial_theta2).reshape(-1)
279 | 
280 | reg_param = 0.0
281 | initial_cost, g = nnCostFunction(initial_nn_params,input_layer_size,
282 |                                  hidden_layer_size,num_labels,X,y,reg_param)
283 | 
284 | print("The initial cost after random initialization: ", initial_cost)
285 | 
286 | # Check gradients
287 | checkNNGradients(0)
288 | 
289 | # TO FIX: Gradient checking with non-zero regularization parameter fails  ?
290 | # Implement Regularization
291 | # punisher = 3.0
292 | # checkNNGradients(punisher)
293 | 
294 | # # Debugging value of the cost function
295 | # reg_param = 10
296 | # debug_J = nnCostFunction(initial_nn_params,input_layer_size,
297 | #                          hidden_layer_size,num_labels,X,y,reg_param)[0]
298 | # np.testing.assert_almost_equal(debug_J, 0.576051)
299 | 
300 | 
301 | # Train NN Parameters
302 | reg_param = 3.0
303 | def reduced_cost_func(p):
304 |     
305 |     return nnCostFunction(p,input_layer_size,hidden_layer_size,num_labels,
306 |                           X,y,reg_param)
307 | 
308 | results = minimize(reduced_cost_func,
309 |                    initial_nn_params,
310 |                    method="CG",
311 |                    jac=True,
312 |                    options={'maxiter':50, "disp":True})
313 | 
314 | fitted_params = results.x
315 | # Reshape fitted_params back into neural network
316 | theta1 = fitted_params[:(hidden_layer_size * 
317 |              (input_layer_size + 1))].reshape((hidden_layer_size, 
318 |                                        input_layer_size + 1))
319 | 
320 | theta2 = fitted_params[-((hidden_layer_size + 1) * 
321 |                       num_labels):].reshape((num_labels,
322 |                                    hidden_layer_size + 1)) 
323 | 
324 | predictions = predict(theta1, theta2, X)
325 | accuracy = np.mean(y == predictions) * 100
326 | print("Training Accuracy with neural network: ", accuracy, "%")
327 | 
328 | # Display the hidden layer 
329 | digit_grid, ax = displayData(theta1[:,1:])
330 | digit_grid.show()
331 | 
332 | 
333 | 
334 | 
335 | 
336 | 


--------------------------------------------------------------------------------
/ex4/ex4data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex4/ex4data1.mat


--------------------------------------------------------------------------------
/ex4/ex4weights.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex4/ex4weights.mat


--------------------------------------------------------------------------------
/ex4/token.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex4/token.mat


--------------------------------------------------------------------------------
/ex5/ex5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex5/ex5.pdf


--------------------------------------------------------------------------------
/ex5/ex5.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import scipy.io #Used to load the OCTAVE *.mat files
  4 | import scipy.optimize
  5 | 
  6 | def plotData():
  7 |     plt.figure(figsize=(8,5))
  8 |     plt.ylabel('Water flowing out of the dam (y)')
  9 |     plt.xlabel('Change in water level (x)')
 10 |     plt.plot(X[:,1],y,'rx')
 11 |     plt.grid(True)
 12 |     
 13 | datafile = 'data/ex5data1.mat'
 14 | mat = scipy.io.loadmat( datafile )
 15 | X = mat.get("X")
 16 | y = mat.get("y")
 17 | ytest = mat.get("ytest")
 18 | yval = mat.get("yval")
 19 | Xtest = mat.get("Xtest")
 20 | Xval = mat.get("Xval")
 21 | 
 22 | def linearRegCostFunction(init_theta,X,y,reg):
 23 | 	m = len(y)
 24 | 	# Make theta 2-d to get cost and gradient
 25 | 	theta = init_theta[:,None]
 26 | 	cost = ((np.sum((np.dot(X,theta) - y)**2))/(2*m) + 
 27 | 			(reg/(2*m))*np.sum(theta**2))
 28 | 
 29 | 	# Regularized 
 30 | 	gradients = ((np.sum((np.dot(X,theta)-y)*X,axis=0)/m)+
 31 | 		(reg/m)*theta.T)
 32 | 
 33 | 	# Replace gradient for theta_0 with non-regularized
 34 | 	gradients[0] = gradients[0] - (reg/m)*theta.T
 35 | 	gradients = gradients.flatten()
 36 | 
 37 | 	return (cost, gradients)
 38 | 
 39 | def trainLinearReg(X,y,reg):
 40 | 	initial_theta = np.zeros(np.size(X,1))
 41 | 	
 42 | 	res = minimize(linearRegCostFunction,
 43 | 		initial_theta,
 44 | 		args=(X,y,reg),
 45 | 		jac=True,
 46 | 		options={'maxiter':400,'disp':True})
 47 | 
 48 | 	return res.x
 49 | 
 50 | def learningCurve(X,y,Xval,yval,reg):
 51 | 	m_train = len(X)
 52 | 	m_val = len(Xval)
 53 | 	error_train = np.zeros(m_train)
 54 | 	error_val = np.zeros(m_train)
 55 | 	for i in range(1,m_train+1):
 56 | 		est_theta = trainLinearReg(X[0:i],y[0:i],reg)
 57 | 		error_train[i-1] = (np.sum((np.dot(X[0:i],est_theta)-y[0:i])**2))/(2.0*i)
 58 | 		error_val[i-1] = (np.sum((np.dot(Xval,est_theta)-yval)**2))/(2.0*m_val)
 59 | 
 60 | 	return (error_train, error_val)
 61 | 
 62 | def polyFeatures(X,p):
 63 | 	X_poly = np.zeros((len(X),p))
 64 | 	X = X.flatten()
 65 | 	for i in range(1,p+1):
 66 | 		X_poly[:,i-1] = X**i
 67 | 	return X_poly
 68 | 
 69 | def featureNormalize(X):
 70 | 	mu = np.mean(X,axis=0)
 71 | 	sigma = np.std(X,axis=0)
 72 | 	normalized_X = np.divide(X - mu,sigma)
 73 | 
 74 | 	return (normalized_X, mu, sigma)
 75 | 
 76 | def plotFit(min_x, max_x, mu, sigma, theta, p):
 77 | 	x = np.arange(min_x - 15, max_x + 25, 0.05).reshape((-1,1))
 78 | 	X_poly = polyFeatures(x,p)
 79 | 	X_poly = np.divide(X_poly - mu, sigma)
 80 | 	X_poly = np.hstack((np.ones(len(X_poly)).reshape((-1,1)),X_poly))
 81 | 	plt.plot(x,np.dot(X_poly,theta),'b--',linewidth=2)
 82 | 	return
 83 | 
 84 | def validationCurve(X, y, Xval, yval):
 85 | 	lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]).reshape((-1,1))
 86 | 	error_train = np.zeros(len(lambda_vec))
 87 | 	error_val = np.zeros(len(lambda_vec))
 88 | 
 89 | 	for i in range(len(lambda_vec)):
 90 | 		m_train = len(X)
 91 | 		m_val = len(Xval)
 92 | 		reg_param = lambda_vec[i]
 93 | 		est_theta = trainLinearReg(X,y,reg_param)
 94 | 		error_train[i] = (np.sum((np.dot(X,est_theta)-y)**2))/(2.0*m_train)
 95 | 		error_val[i] = (np.sum((np.dot(Xval,est_theta)-yval)**2))/(2.0*m_val)
 96 | 
 97 | 	return (lambda_vec, error_train, error_val)
 98 | 	
 99 | 	
100 | 	
101 | 	
102 | plt.plot(X, y, 'rx', markersize=10, linewidth=1.5)
103 | plt.xlabel('Change in water level (x)')
104 | plt.ylabel('Water flowing out of the dam (y)')
105 | plt.show()
106 | 
107 | # Regularized Cost
108 | full_X = np.hstack((np.ones_like(y), X))
109 | theta = np.array([1,1])
110 | J, g = linearRegCostFunction(theta,full_X,y,0.0)
111 | 
112 | # Reguliarized Gradient
113 | J, g = linearRegCostFunction(theta,full_X,y,1.0)
114 | 
115 | # Train Linear Regression
116 | reg_param = 0
117 | est_theta = trainLinearReg(full_X,y,reg_param)
118 | 
119 | # Plot linear fit 
120 | plt.plot(X, y, 'rx', markersize=10, linewidth=1.5)
121 | plt.plot(X,np.dot(full_X,est_theta),'b-',linewidth=2)
122 | plt.xlabel('Change in water level (x)')
123 | plt.ylabel('Water flowing out of the dam (y)')
124 | plt.show()
125 | 
126 | # Learning Curve
127 | reg_param = 0.0
128 | full_Xval = np.hstack((np.ones_like(yval),Xval))
129 | error_train, error_val = learningCurve(full_X,y,full_Xval,yval,reg_param)
130 | 
131 | plt.plot(range(len(X)), error_train, range(len(X)), error_val);
132 | plt.title('Learning curve for linear regression')
133 | plt.legend(['Train', 'Cross Validation'])
134 | plt.xlabel('Number of training examples')
135 | plt.ylabel('Error')
136 | plt.show()
137 | 
138 | # Feature Mapping for Polynomial Regression
139 | p = 8
140 | X_poly = polyFeatures(X,p)
141 | X_poly, mu, sigma = featureNormalize(X_poly)
142 | X_poly = np.hstack((np.ones_like(y),X_poly))
143 | 
144 | X_poly_test = polyFeatures(Xtest,p)
145 | X_poly_test = np.divide(X_poly_test - mu, sigma)
146 | X_poly_test = np.hstack((np.ones_like(ytest),X_poly_test))
147 | 
148 | X_poly_val = polyFeatures(Xval,p)
149 | X_poly_val = np.divide(X_poly_val - mu, sigma)
150 | X_poly_val = np.hstack((np.ones_like(yval),X_poly_val))
151 | 
152 | #Learning Curve for Polynomial Regression
153 | reg_param = 1.0
154 | est_theta = trainLinearReg(X_poly,y,reg_param)
155 | plt.plot(X, y, 'rx', markersize=10, linewidth=1.5)
156 | plotFit(np.min(X), np.max(X), mu, sigma, est_theta, p)
157 | plt.xlabel('Change in water level (x)')
158 | plt.ylabel('Water flowing out of the dam (y)')
159 | plt.show()
160 | 
161 | error_train, error_val = learningCurve(X_poly,y,X_poly_val,yval,reg_param)
162 | 
163 | plt.plot(range(len(X)), error_train, range(len(X)), error_val);
164 | plt.title('Learning curve for linear regression')
165 | plt.legend(['Train', 'Cross Validation'])
166 | plt.xlabel('Number of training examples')
167 | plt.ylabel('Error')
168 | plt.show()
169 | 
170 | # Validation for selecting lambda
171 | lambda_vec, error_train, error_val = validationCurve(full_X,y,full_Xval,yval)
172 | 
173 | plt.plot(lambda_vec, error_train, lambda_vec, error_val);
174 | plt.title('Selecting \lambda using a cross validation set')
175 | plt.legend(['Train', 'Cross Validation'])
176 | plt.xlabel('lambda')
177 | plt.ylabel('Error')
178 | plt.show()
179 | 


--------------------------------------------------------------------------------
/ex5/ex5data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex5/ex5data1.mat


--------------------------------------------------------------------------------
/ex5/token.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex5/token.mat


--------------------------------------------------------------------------------
/ex6/emailSample1.txt:
--------------------------------------------------------------------------------
 1 | > Anyone knows how much it costs to host a web portal ?
 2 | >
 3 | Well, it depends on how many visitors you're expecting.
 4 | This can be anywhere from less than 10 bucks a month to a couple of $100. 
 5 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 
 6 | if youre running something big..
 7 | 
 8 | To unsubscribe yourself from this mailing list, send an email to:
 9 | groupname-unsubscribe@egroups.com
10 | 
11 | 


--------------------------------------------------------------------------------
/ex6/emailSample2.txt:
--------------------------------------------------------------------------------
 1 | Folks,
 2 |  
 3 | my first time posting - have a bit of Unix experience, but am new to Linux.
 4 | 
 5 |  
 6 | Just got a new PC at home - Dell box with Windows XP. Added a second hard disk
 7 | for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went
 8 | fine except it didn't pick up my monitor.
 9 |  
10 | I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4
11 | Ti4200 video card, both of which are probably too new to feature in Suse's default
12 | set. I downloaded a driver from the nVidia website and installed it using RPM.
13 | Then I ran Sax2 (as was recommended in some postings I found on the net), but
14 | it still doesn't feature my video card in the available list. What next?
15 |  
16 | Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice,
17 | the whole machine crashes (in Linux, not Windows) - even the on/off switch is
18 | inactive, leaving me to reach for the power cable instead.
19 |  
20 | If anyone can help me in any way with these probs., I'd be really grateful -
21 | I've searched the 'net but have run out of ideas.
22 |  
23 | Or should I be going for a different version of Linux such as RedHat? Opinions
24 | welcome.
25 |  
26 | Thanks a lot,
27 | Peter
28 | 
29 | -- 
30 | Irish Linux Users' Group: ilug@linux.ie
31 | http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information.
32 | List maintainer: listmaster@linux.ie
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/ex6/ex6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/ex6.pdf


--------------------------------------------------------------------------------
/ex6/ex6data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/ex6data1.mat


--------------------------------------------------------------------------------
/ex6/ex6data2.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/ex6data2.mat


--------------------------------------------------------------------------------
/ex6/ex6data3.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/ex6data3.mat


--------------------------------------------------------------------------------
/ex6/ex6spam.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import scipy.io 
  4 | from sklearn import svm
  5 | import re 
  6 | import nltk, nltk.stem.porter
  7 | 
  8 | print ("emailSample1.txt:")
  9 | !cat data/emailSample1.txt
 10 | 
 11 | """
 12 | Anyone knows how much it costs to host a web portal ?
 13 | >
 14 | Well, it depends on how many visitors you're expecting.
 15 | This can be anywhere from less than 10 bucks a month to a couple of $100. 
 16 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 
 17 | if youre running something big..
 18 | 
 19 | To unsubscribe yourself from this mailing list, send an email to:
 20 | groupname-unsubscribe@egroups.com
 21 | """
 22 | 
 23 | def preProcess( email ):
 24 |     email = email.lower()
 25 |     # Strip html tags. replace with a space
 26 |     email = re.sub('<[^<>]+>', ' ', email);
 27 |     #Any numbers get replaced with the string 'number'
 28 |     email = re.sub('[0-9]+', 'number', email)
 29 |     #Anything starting with http or https:// replaced with 'httpaddr'
 30 |     email = re.sub('(http|https)://[^\s]*', 'httpaddr', email)
 31 |     #Strings with "@" in the middle are considered emails --> 'emailaddr'
 32 |     email = re.sub('[^\s]+@[^\s]+', 'emailaddr', email);
 33 |     #The '$' sign gets replaced with 'dollar'
 34 |     email = re.sub('[$]+', 'dollar', email);
 35 |     return email
 36 | 	
 37 | 	
 38 | def email2TokenList( raw_email ):
 39 |     """
 40 |     Function that takes in preprocessed (simplified) email, tokenizes it,
 41 |     stems each word, and returns an (ordered) list of tokens in the e-mail
 42 |     """
 43 |     
 44 |     stemmer = nltk.stem.porter.PorterStemmer()
 45 |     email = preProcess( raw_email )
 46 | 
 47 |     #Split the e-mail into individual words (tokens) (split by the delimiter ' ')
 48 |     #Splitting by many delimiters is easiest with re.split()
 49 |     tokens = re.split('[ \@\$\/\#\.\-\:\&\*\+\=\[\]\?\!\(\)\{\}\,\'\"\>\_\<\;\%]', email)
 50 |     
 51 |     #Loop over each token and use a stemmer to shorten it, check if the word is in the vocab_list... if it is, store index
 52 |     tokenlist = []
 53 |     for token in tokens:
 54 |       
 55 |         token = re.sub('[^a-zA-Z0-9]', '', token);
 56 |         stemmed = stemmer.stem( token )
 57 |         #Throw out empty tokens
 58 |         if not len(token): continue
 59 |         #Store a list of all unique stemmed words
 60 |         tokenlist.append(stemmed)
 61 |             
 62 |     return tokenlist
 63 | 
 64 | def getVocabDict(reverse=False):
 65 |     """
 66 |     Function to read in the supplied vocab list text file into a dictionary
 67 |     Dictionary key is the stemmed word, value is the index in the text file
 68 |     If "reverse", the keys and values are switched.
 69 |     """
 70 |     vocab_dict = {}
 71 |     with open("data/vocab.txt") as f:
 72 |         for line in f:
 73 |             (val, key) = line.split()
 74 |             if not reverse:
 75 |                 vocab_dict[key] = int(val)
 76 |             else:
 77 |                 vocab_dict[int(val)] = key
 78 |                 
 79 |     return vocab_dict
 80 | 
 81 | 	
 82 | def email2VocabIndices( raw_email, vocab_dict ):
 83 |     #returns a list of indices corresponding to the location in vocab_dict for each stemmed word 
 84 |     tokenlist = email2TokenList( raw_email )
 85 |     index_list = [ vocab_dict[token] for token in tokenlist if token in vocab_dict ]
 86 |     return index_list
 87 | 	
 88 | #feature extraction
 89 | 
 90 | def email2FeatureVector( raw_email, vocab_dict ):
 91 |     # returns a vector of shape(n,1) where n is the size of the vocab_dict.
 92 |     #he first element in this vector is 1 if the vocab word with index == 1 is in raw_email, else 0
 93 |     n = len(vocab_dict)
 94 |     result = np.zeros((n,1))
 95 |     vocab_indices = email2VocabIndices( email_contents, vocab_dict )
 96 |     for idx in vocab_indices:
 97 |         result[idx] = 1
 98 |     return result
 99 | 	
100 | # the feature vector has length 1899 and 45 non-zero entries."
101 | 
102 | vocab_dict = getVocabDict()
103 | email_contents = open( 'data/emailSample1.txt', 'r' ).read()
104 | test_fv = email2FeatureVector( email_contents, vocab_dict )
105 | 
106 | print "Length of feature vector is %d" % len(test_fv)
107 | print "Number of non-zero entries is: %d" % sum(test_fv==1)
108 | 
109 | 
110 | 
111 | #svm for spam classification
112 | datafile = 'data/spamTrain.mat'
113 | mat = scipy.io.loadmat( datafile )
114 | X, y = mat['X'], mat['y']
115 | # Test set
116 | datafile = 'data/spamTest.mat'
117 | mat = scipy.io.loadmat( datafile )
118 | Xtest, ytest = mat['Xtest'], mat['ytest']
119 | pos = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 1])
120 | neg = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 0])
121 | print 'Total number of training emails = ',X.shape[0]
122 | print 'Number of training spam emails = ',pos.shape[0]
123 | print 'Number of training nonspam emails = ',neg.shape[0]
124 | 
125 | # First we make an instance of an SVM with C=0.1 and 'linear' kernel
126 | linear_svm = svm.SVC(C=0.1, kernel='linear')
127 | 
128 | # Now we fit the SVM to our X matrix, given the labels y
129 | linear_svm.fit( X, y.flatten() )
130 | 
131 | 
132 | #  training accuracy of about 99.8% and a test accuracy of about 98.5%"
133 | 
134 | train_predictions = linear_svm.predict(X).reshape((y.shape[0],1))
135 | train_acc = 100. * float(sum(train_predictions == y))/y.shape[0]
136 | print 'Training accuracy = %0.2f%%' % train_acc
137 | 
138 | test_predictions = linear_svm.predict(Xtest).reshape((ytest.shape[0],1))
139 | test_acc = 100. * float(sum(test_predictions == ytest))/ytest.shape[0]
140 | print 'Test set accuracy = %0.2f%%' % test_acc
141 | 
142 | # Determine the words most likely to indicate an e-mail is a spam
143 | # From the trained SVM we can get a list of the weight coefficients for each
144 | # word (technically, each word index)
145 | 
146 | vocab_dict_flipped = getVocabDict(reverse=True)
147 | 
148 | #Sort indicies from most important to least-important (high to low weight)
149 | sorted_indices = np.argsort( linear_svm.coef_, axis=None )[::-1]
150 | print "The 15 most important words to classify a spam e-mail are:"
151 | print [ vocab_dict_flipped[x] for x in sorted_indices[:15] ]
152 | print
153 | print "The 15 least important words to classify a spam e-mail are:"
154 | print [ vocab_dict_flipped[x] for x in sorted_indices[-15:] ]
155 | print
156 | 
157 | # Most common word (mostly to debug):
158 | most_common_word = vocab_dict_flipped[sorted_indices[0]]
159 | print '# of spam containing \"%s\" = %d/%d = %0.2f%%'% \
160 |     (most_common_word, sum(pos[:,1190]),pos.shape[0],  \
161 |      100.*float(sum(pos[:,1190]))/pos.shape[0])
162 | print '# of NON spam containing \"%s\" = %d/%d = %0.2f%%'% \
163 |     (most_common_word, sum(neg[:,1190]),neg.shape[0],      \
164 |      100.*float(sum(neg[:,1190]))/neg.shape[0])
165 | 


--------------------------------------------------------------------------------
/ex6/ex6svm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import scipy.io as sio
  4 | from sklearn.svm import SVC
  5 | from sklearn.metrics import accuracy_score
  6 | from functions import (
  7 |     gaussian_kernel, dataset3_params
  8 | )
  9 | 
 10 | #use  cross validation set Xval, yval to determine best C and σ 
 11 | 
 12 | def dataset3_params(X, y, Xval, yval):
 13 |     C_vec = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
 14 |     sigma_vec = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
 15 |     scores = np.zeros((len(C_vec), len(sigma_vec)))
 16 | 
 17 |     for i in range(len(C_vec)):
 18 |         for j in range(len(sigma_vec)):
 19 |             svm = SVC(kernel='rbf', C=C_vec[i], gamma=sigma_vec[j])
 20 |             svm.fit(X, y.ravel())
 21 |             scores[i, j] = accuracy_score(yval, svm.predict(Xval))
 22 | 
 23 |     max_c_index, max_s_index = np.unravel_index(scores.argmax(), scores.shape)
 24 |     return (C_vec[max_c_index], sigma_vec[max_s_index])
 25 | 	
 26 | print('Loading and Visualizing Data ...\n')
 27 | 
 28 | data = sio.loadmat('ex6data1.mat')
 29 | X = data['X']  # 51 x 2 matrix
 30 | y = data['y']  # 51 x 1 matrix
 31 | 
 32 | pos = (y == 1).ravel()         #flattens i.e. makes 1d array
 33 | neg = (y == 0).ravel()         #alternative code neg = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 0])
 34 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+')
 35 | plt.scatter(
 36 |     X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o')
 37 | plt.xlim(0, 4.5)
 38 | plt.ylim(1.5, 5)
 39 | plt.show()
 40 | 
 41 | input('Program paused. Press enter to continue.\n')
 42 | plt.close()
 43 | print('Training Linear SVM ...\n')
 44 | C = 1  #default
 45 | svm = SVC(kernel='linear', C=C)
 46 | svm.fit(X, y.ravel())
 47 | weights = svm.coef_[0]
 48 | intercept = svm.intercept_[0]
 49 | #draw svm boundary
 50 | xp = np.linspace(X.min(), X.max(), 100)
 51 | yp = - (weights[0] * xp + intercept) / weights[1]
 52 | 
 53 | pos = (y == 1).ravel()
 54 | neg = (y == 0).ravel()
 55 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+')
 56 | plt.scatter(
 57 |     X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o')
 58 | plt.plot(xp, yp)
 59 | plt.xlim(0, 4.5)
 60 | plt.ylim(1.5, 5)
 61 | plt.show()
 62 | 
 63 | input('Program paused. Press enter to continue.\n')
 64 | plt.close()
 65 | 
 66 | #Gaussian Kernel
 67 | print('Evaluating the Gaussian Kernel ...\n')
 68 | 
 69 | #linalg.norm returns one of seven different matrix norms
 70 | def gaussian_kernel(x1, x2, sigma):
 71 |     return np.exp(- (np.linalg.norm(x1 - x2) ** 2).sum() / (2 * (sigma ** 2)))
 72 | 
 73 | 
 74 | x1 = np.array([1, 2, 1])
 75 | x2 = np.array([0, 4, -1])
 76 | sigma = 2
 77 | sim = gaussian_kernel(x1, x2, sigma)
 78 | 
 79 | 
 80 | 
 81 | print(
 82 |     'Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = {0} :\n'
 83 |     .format(sigma),
 84 |     '\t{0:.6f}\n(for sigma = 2, this value should be about 0.324652)'
 85 |     .format(sim))
 86 | 
 87 | input('Program paused. Press enter to continue.\n')
 88 | plt.close()
 89 | 
 90 | #Visualizing Dataset 2 
 91 | data = sio.loadmat('ex6data2.mat')
 92 | X = data['X']  # 863 x 2 matrix
 93 | y = data['y']  # 863 x 1 matrix
 94 | 
 95 | pos = (y == 1).ravel()
 96 | neg = (y == 0).ravel()
 97 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+')
 98 | plt.scatter(
 99 |     X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o')
100 | plt.xlim(0, 1)
101 | plt.ylim(0.4, 1)
102 | plt.show()
103 | 
104 | input('Program paused. Press enter to continue.\n')
105 | plt.close()
106 | 
107 | #RBF Kernel (Dataset 2)
108 | print('Training SVM with RBF Kernel ...\n')
109 | 
110 | C = 30
111 | sigma = 30
112 | 
113 | svm = SVC(kernel='rbf', C=C, gamma=sigma)
114 | svm.fit(X, y.ravel())
115 | 
116 | x1 = np.linspace(X[:, 0].min(), X[:, 0].max(), 100)
117 | x2 = np.linspace(X[:, 1].min(), X[:, 1].max(), 100)
118 | x1, x2 = np.meshgrid(x1, x2)
119 | yp = svm.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape)
120 | 
121 | pos = (y == 1).ravel()
122 | neg = (y == 0).ravel()
123 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+')
124 | plt.scatter(
125 |     X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o')
126 | plt.xlim(0, 1)
127 | plt.ylim(0.4, 1)
128 | plt.contour(x1, x2, yp)
129 | plt.show()
130 | 
131 | input('Program paused. Press enter to continue.\n')
132 | plt.close()
133 | 
134 | # Visualizing Dataset 3 
135 | data = sio.loadmat('ex6data3.mat')
136 | X = data['X']  # 211 x 2 matrix
137 | y = data['y']  # 211 x 1 matrix
138 | Xval = data['Xval']  # 200 x 2 matrix
139 | yval = data['yval']  # 200 x 1 matrix
140 | 
141 | pos = (y == 1).ravel()
142 | neg = (y == 0).ravel()
143 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+')
144 | plt.scatter(
145 |     X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o')
146 | plt.xlim(-0.6, 0.3)
147 | plt.ylim(-0.8, 0.6)
148 | plt.show()
149 | 
150 | input('Program paused. Press enter to continue.\n')
151 | plt.close()
152 | 
153 | # RBF Kernel (Dataset 3)
154 | C, sigma = dataset3_params(X, y, Xval, yval)
155 | 
156 | svm = SVC(kernel='rbf', C=C, gamma=sigma)
157 | svm.fit(X, y.ravel())
158 | 
159 | x1 = np.linspace(X[:, 0].min(), X[:, 0].max(), 100)
160 | x2 = np.linspace(X[:, 1].min(), X[:, 1].max(), 100)
161 | x1, x2 = np.meshgrid(x1, x2)
162 | yp = svm.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape)
163 | 
164 | pos = (y == 1).ravel()
165 | neg = (y == 0).ravel()
166 | plt.scatter(X[pos, 0], X[pos, 1], color='black', marker='+')
167 | plt.scatter(
168 |     X[neg, 0], X[neg, 1], color='yellow', edgecolors='black', marker='o')
169 | plt.xlim(-0.6, 0.3)
170 | plt.ylim(-0.8, 0.6)
171 | plt.contour(x1, x2, yp)
172 | plt.show()
173 | 
174 | input('Program paused. Press enter to continue.\n')
175 | plt.close()
176 | 


--------------------------------------------------------------------------------
/ex6/spamSample1.txt:
--------------------------------------------------------------------------------
 1 | Do You Want To Make $1000 Or More Per Week?
 2 | 
 3 |  
 4 | 
 5 | If you are a motivated and qualified individual - I 
 6 | will personally demonstrate to you a system that will 
 7 | make you $1,000 per week or more! This is NOT mlm.
 8 | 
 9 |  
10 | 
11 | Call our 24 hour pre-recorded number to get the 
12 | details.  
13 | 
14 |  
15 | 
16 | 000-456-789
17 | 
18 |  
19 | 
20 | I need people who want to make serious money.  Make 
21 | the call and get the facts. 
22 | 
23 | Invest 2 minutes in yourself now!
24 | 
25 |  
26 | 
27 | 000-456-789
28 | 
29 |  
30 | 
31 | Looking forward to your call and I will introduce you 
32 | to people like yourself who
33 | are currently making $10,000 plus per week!
34 | 
35 |  
36 | 
37 | 000-456-789
38 | 
39 | 
40 | 
41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72
42 | 
43 | 


--------------------------------------------------------------------------------
/ex6/spamSample2.txt:
--------------------------------------------------------------------------------
1 | Best Buy Viagra Generic Online
2 | 
3 | Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed!
4 | 
5 | We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers!
6 | http://medphysitcstech.ru
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/ex6/spamTest.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/spamTest.mat


--------------------------------------------------------------------------------
/ex6/spamTrain.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/spamTrain.mat


--------------------------------------------------------------------------------
/ex6/token.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex6/token.mat


--------------------------------------------------------------------------------
/ex6/vocab.txt:
--------------------------------------------------------------------------------
   1 | 1	aa
   2 | 2	ab
   3 | 3	abil
   4 | 4	abl
   5 | 5	about
   6 | 6	abov
   7 | 7	absolut
   8 | 8	abus
   9 | 9	ac
  10 | 10	accept
  11 | 11	access
  12 | 12	accord
  13 | 13	account
  14 | 14	achiev
  15 | 15	acquir
  16 | 16	across
  17 | 17	act
  18 | 18	action
  19 | 19	activ
  20 | 20	actual
  21 | 21	ad
  22 | 22	adam
  23 | 23	add
  24 | 24	addit
  25 | 25	address
  26 | 26	administr
  27 | 27	adult
  28 | 28	advanc
  29 | 29	advantag
  30 | 30	advertis
  31 | 31	advic
  32 | 32	advis
  33 | 33	ae
  34 | 34	af
  35 | 35	affect
  36 | 36	affili
  37 | 37	afford
  38 | 38	africa
  39 | 39	after
  40 | 40	ag
  41 | 41	again
  42 | 42	against
  43 | 43	agenc
  44 | 44	agent
  45 | 45	ago
  46 | 46	agre
  47 | 47	agreement
  48 | 48	aid
  49 | 49	air
  50 | 50	al
  51 | 51	alb
  52 | 52	align
  53 | 53	all
  54 | 54	allow
  55 | 55	almost
  56 | 56	alon
  57 | 57	along
  58 | 58	alreadi
  59 | 59	alsa
  60 | 60	also
  61 | 61	altern
  62 | 62	although
  63 | 63	alwai
  64 | 64	am
  65 | 65	amaz
  66 | 66	america
  67 | 67	american
  68 | 68	among
  69 | 69	amount
  70 | 70	amp
  71 | 71	an
  72 | 72	analysi
  73 | 73	analyst
  74 | 74	and
  75 | 75	ani
  76 | 76	anim
  77 | 77	announc
  78 | 78	annual
  79 | 79	annuiti
  80 | 80	anoth
  81 | 81	answer
  82 | 82	anti
  83 | 83	anumb
  84 | 84	anybodi
  85 | 85	anymor
  86 | 86	anyon
  87 | 87	anyth
  88 | 88	anywai
  89 | 89	anywher
  90 | 90	aol
  91 | 91	ap
  92 | 92	apolog
  93 | 93	app
  94 | 94	appar
  95 | 95	appear
  96 | 96	appl
  97 | 97	appli
  98 | 98	applic
  99 | 99	appreci
 100 | 100	approach
 101 | 101	approv
 102 | 102	apt
 103 | 103	ar
 104 | 104	archiv
 105 | 105	area
 106 | 106	aren
 107 | 107	argument
 108 | 108	arial
 109 | 109	arm
 110 | 110	around
 111 | 111	arrai
 112 | 112	arriv
 113 | 113	art
 114 | 114	articl
 115 | 115	artist
 116 | 116	as
 117 | 117	ascii
 118 | 118	ask
 119 | 119	asset
 120 | 120	assist
 121 | 121	associ
 122 | 122	assum
 123 | 123	assur
 124 | 124	at
 125 | 125	atol
 126 | 126	attach
 127 | 127	attack
 128 | 128	attempt
 129 | 129	attent
 130 | 130	attornei
 131 | 131	attract
 132 | 132	audio
 133 | 133	aug
 134 | 134	august
 135 | 135	author
 136 | 136	auto
 137 | 137	autom
 138 | 138	automat
 139 | 139	avail
 140 | 140	averag
 141 | 141	avoid
 142 | 142	awai
 143 | 143	awar
 144 | 144	award
 145 | 145	ba
 146 | 146	babi
 147 | 147	back
 148 | 148	background
 149 | 149	backup
 150 | 150	bad
 151 | 151	balanc
 152 | 152	ban
 153 | 153	bank
 154 | 154	bar
 155 | 155	base
 156 | 156	basenumb
 157 | 157	basi
 158 | 158	basic
 159 | 159	bb
 160 | 160	bc
 161 | 161	bd
 162 | 162	be
 163 | 163	beat
 164 | 164	beberg
 165 | 165	becaus
 166 | 166	becom
 167 | 167	been
 168 | 168	befor
 169 | 169	begin
 170 | 170	behalf
 171 | 171	behavior
 172 | 172	behind
 173 | 173	believ
 174 | 174	below
 175 | 175	benefit
 176 | 176	best
 177 | 177	beta
 178 | 178	better
 179 | 179	between
 180 | 180	bf
 181 | 181	big
 182 | 182	bill
 183 | 183	billion
 184 | 184	bin
 185 | 185	binari
 186 | 186	bit
 187 | 187	black
 188 | 188	blank
 189 | 189	block
 190 | 190	blog
 191 | 191	blood
 192 | 192	blue
 193 | 193	bnumber
 194 | 194	board
 195 | 195	bodi
 196 | 196	boi
 197 | 197	bonu
 198 | 198	book
 199 | 199	boot
 200 | 200	border
 201 | 201	boss
 202 | 202	boston
 203 | 203	botan
 204 | 204	both
 205 | 205	bottl
 206 | 206	bottom
 207 | 207	boundari
 208 | 208	box
 209 | 209	brain
 210 | 210	brand
 211 | 211	break
 212 | 212	brian
 213 | 213	bring
 214 | 214	broadcast
 215 | 215	broker
 216 | 216	browser
 217 | 217	bug
 218 | 218	bui
 219 | 219	build
 220 | 220	built
 221 | 221	bulk
 222 | 222	burn
 223 | 223	bush
 224 | 224	busi
 225 | 225	but
 226 | 226	button
 227 | 227	by
 228 | 228	byte
 229 | 229	ca
 230 | 230	cabl
 231 | 231	cach
 232 | 232	calcul
 233 | 233	california
 234 | 234	call
 235 | 235	came
 236 | 236	camera
 237 | 237	campaign
 238 | 238	can
 239 | 239	canada
 240 | 240	cannot
 241 | 241	canon
 242 | 242	capabl
 243 | 243	capillari
 244 | 244	capit
 245 | 245	car
 246 | 246	card
 247 | 247	care
 248 | 248	career
 249 | 249	carri
 250 | 250	cartridg
 251 | 251	case
 252 | 252	cash
 253 | 253	cat
 254 | 254	catch
 255 | 255	categori
 256 | 256	caus
 257 | 257	cb
 258 | 258	cc
 259 | 259	cd
 260 | 260	ce
 261 | 261	cell
 262 | 262	cent
 263 | 263	center
 264 | 264	central
 265 | 265	centuri
 266 | 266	ceo
 267 | 267	certain
 268 | 268	certainli
 269 | 269	cf
 270 | 270	challeng
 271 | 271	chanc
 272 | 272	chang
 273 | 273	channel
 274 | 274	char
 275 | 275	charact
 276 | 276	charg
 277 | 277	charset
 278 | 278	chat
 279 | 279	cheap
 280 | 280	check
 281 | 281	cheer
 282 | 282	chief
 283 | 283	children
 284 | 284	china
 285 | 285	chip
 286 | 286	choic
 287 | 287	choos
 288 | 288	chri
 289 | 289	citi
 290 | 290	citizen
 291 | 291	civil
 292 | 292	claim
 293 | 293	class
 294 | 294	classifi
 295 | 295	clean
 296 | 296	clear
 297 | 297	clearli
 298 | 298	click
 299 | 299	client
 300 | 300	close
 301 | 301	clue
 302 | 302	cnet
 303 | 303	cnumber
 304 | 304	co
 305 | 305	code
 306 | 306	collect
 307 | 307	colleg
 308 | 308	color
 309 | 309	com
 310 | 310	combin
 311 | 311	come
 312 | 312	comfort
 313 | 313	command
 314 | 314	comment
 315 | 315	commentari
 316 | 316	commerci
 317 | 317	commiss
 318 | 318	commit
 319 | 319	common
 320 | 320	commun
 321 | 321	compani
 322 | 322	compar
 323 | 323	comparison
 324 | 324	compat
 325 | 325	compet
 326 | 326	competit
 327 | 327	compil
 328 | 328	complet
 329 | 329	comprehens
 330 | 330	comput
 331 | 331	concentr
 332 | 332	concept
 333 | 333	concern
 334 | 334	condit
 335 | 335	conf
 336 | 336	confer
 337 | 337	confid
 338 | 338	confidenti
 339 | 339	config
 340 | 340	configur
 341 | 341	confirm
 342 | 342	conflict
 343 | 343	confus
 344 | 344	congress
 345 | 345	connect
 346 | 346	consid
 347 | 347	consolid
 348 | 348	constitut
 349 | 349	construct
 350 | 350	consult
 351 | 351	consum
 352 | 352	contact
 353 | 353	contain
 354 | 354	content
 355 | 355	continu
 356 | 356	contract
 357 | 357	contribut
 358 | 358	control
 359 | 359	conveni
 360 | 360	convers
 361 | 361	convert
 362 | 362	cool
 363 | 363	cooper
 364 | 364	copi
 365 | 365	copyright
 366 | 366	core
 367 | 367	corpor
 368 | 368	correct
 369 | 369	correspond
 370 | 370	cost
 371 | 371	could
 372 | 372	couldn
 373 | 373	count
 374 | 374	countri
 375 | 375	coupl
 376 | 376	cours
 377 | 377	court
 378 | 378	cover
 379 | 379	coverag
 380 | 380	crash
 381 | 381	creat
 382 | 382	creativ
 383 | 383	credit
 384 | 384	critic
 385 | 385	cross
 386 | 386	cultur
 387 | 387	current
 388 | 388	custom
 389 | 389	cut
 390 | 390	cv
 391 | 391	da
 392 | 392	dagga
 393 | 393	dai
 394 | 394	daili
 395 | 395	dan
 396 | 396	danger
 397 | 397	dark
 398 | 398	data
 399 | 399	databas
 400 | 400	datapow
 401 | 401	date
 402 | 402	dave
 403 | 403	david
 404 | 404	dc
 405 | 405	de
 406 | 406	dead
 407 | 407	deal
 408 | 408	dear
 409 | 409	death
 410 | 410	debt
 411 | 411	decad
 412 | 412	decid
 413 | 413	decis
 414 | 414	declar
 415 | 415	declin
 416 | 416	decor
 417 | 417	default
 418 | 418	defend
 419 | 419	defens
 420 | 420	defin
 421 | 421	definit
 422 | 422	degre
 423 | 423	delai
 424 | 424	delet
 425 | 425	deliv
 426 | 426	deliveri
 427 | 427	dell
 428 | 428	demand
 429 | 429	democrat
 430 | 430	depart
 431 | 431	depend
 432 | 432	deposit
 433 | 433	describ
 434 | 434	descript
 435 | 435	deserv
 436 | 436	design
 437 | 437	desir
 438 | 438	desktop
 439 | 439	despit
 440 | 440	detail
 441 | 441	detect
 442 | 442	determin
 443 | 443	dev
 444 | 444	devel
 445 | 445	develop
 446 | 446	devic
 447 | 447	di
 448 | 448	dial
 449 | 449	did
 450 | 450	didn
 451 | 451	diet
 452 | 452	differ
 453 | 453	difficult
 454 | 454	digit
 455 | 455	direct
 456 | 456	directli
 457 | 457	director
 458 | 458	directori
 459 | 459	disabl
 460 | 460	discount
 461 | 461	discov
 462 | 462	discoveri
 463 | 463	discuss
 464 | 464	disk
 465 | 465	displai
 466 | 466	disposit
 467 | 467	distanc
 468 | 468	distribut
 469 | 469	dn
 470 | 470	dnumber
 471 | 471	do
 472 | 472	doc
 473 | 473	document
 474 | 474	doe
 475 | 475	doer
 476 | 476	doesn
 477 | 477	dollar
 478 | 478	dollarac
 479 | 479	dollarnumb
 480 | 480	domain
 481 | 481	don
 482 | 482	done
 483 | 483	dont
 484 | 484	doubl
 485 | 485	doubt
 486 | 486	down
 487 | 487	download
 488 | 488	dr
 489 | 489	draw
 490 | 490	dream
 491 | 491	drive
 492 | 492	driver
 493 | 493	drop
 494 | 494	drug
 495 | 495	due
 496 | 496	dure
 497 | 497	dvd
 498 | 498	dw
 499 | 499	dynam
 500 | 500	ea
 501 | 501	each
 502 | 502	earli
 503 | 503	earlier
 504 | 504	earn
 505 | 505	earth
 506 | 506	easi
 507 | 507	easier
 508 | 508	easili
 509 | 509	eat
 510 | 510	eb
 511 | 511	ebai
 512 | 512	ec
 513 | 513	echo
 514 | 514	econom
 515 | 515	economi
 516 | 516	ed
 517 | 517	edg
 518 | 518	edit
 519 | 519	editor
 520 | 520	educ
 521 | 521	eff
 522 | 522	effect
 523 | 523	effici
 524 | 524	effort
 525 | 525	either
 526 | 526	el
 527 | 527	electron
 528 | 528	elimin
 529 | 529	els
 530 | 530	email
 531 | 531	emailaddr
 532 | 532	emerg
 533 | 533	empir
 534 | 534	employ
 535 | 535	employe
 536 | 536	en
 537 | 537	enabl
 538 | 538	encod
 539 | 539	encourag
 540 | 540	end
 541 | 541	enemi
 542 | 542	enenkio
 543 | 543	energi
 544 | 544	engin
 545 | 545	english
 546 | 546	enhanc
 547 | 547	enjoi
 548 | 548	enough
 549 | 549	ensur
 550 | 550	enter
 551 | 551	enterpris
 552 | 552	entertain
 553 | 553	entir
 554 | 554	entri
 555 | 555	enumb
 556 | 556	environ
 557 | 557	equal
 558 | 558	equip
 559 | 559	equival
 560 | 560	error
 561 | 561	especi
 562 | 562	essenti
 563 | 563	establish
 564 | 564	estat
 565 | 565	estim
 566 | 566	et
 567 | 567	etc
 568 | 568	euro
 569 | 569	europ
 570 | 570	european
 571 | 571	even
 572 | 572	event
 573 | 573	eventu
 574 | 574	ever
 575 | 575	everi
 576 | 576	everyon
 577 | 577	everyth
 578 | 578	evid
 579 | 579	evil
 580 | 580	exactli
 581 | 581	exampl
 582 | 582	excel
 583 | 583	except
 584 | 584	exchang
 585 | 585	excit
 586 | 586	exclus
 587 | 587	execut
 588 | 588	exercis
 589 | 589	exist
 590 | 590	exmh
 591 | 591	expand
 592 | 592	expect
 593 | 593	expens
 594 | 594	experi
 595 | 595	expert
 596 | 596	expir
 597 | 597	explain
 598 | 598	explor
 599 | 599	express
 600 | 600	extend
 601 | 601	extens
 602 | 602	extra
 603 | 603	extract
 604 | 604	extrem
 605 | 605	ey
 606 | 606	fa
 607 | 607	face
 608 | 608	fact
 609 | 609	factor
 610 | 610	fail
 611 | 611	fair
 612 | 612	fall
 613 | 613	fals
 614 | 614	famili
 615 | 615	faq
 616 | 616	far
 617 | 617	fast
 618 | 618	faster
 619 | 619	fastest
 620 | 620	fat
 621 | 621	father
 622 | 622	favorit
 623 | 623	fax
 624 | 624	fb
 625 | 625	fd
 626 | 626	featur
 627 | 627	feder
 628 | 628	fee
 629 | 629	feed
 630 | 630	feedback
 631 | 631	feel
 632 | 632	femal
 633 | 633	few
 634 | 634	ffffff
 635 | 635	ffnumber
 636 | 636	field
 637 | 637	fight
 638 | 638	figur
 639 | 639	file
 640 | 640	fill
 641 | 641	film
 642 | 642	filter
 643 | 643	final
 644 | 644	financ
 645 | 645	financi
 646 | 646	find
 647 | 647	fine
 648 | 648	finish
 649 | 649	fire
 650 | 650	firewal
 651 | 651	firm
 652 | 652	first
 653 | 653	fit
 654 | 654	five
 655 | 655	fix
 656 | 656	flag
 657 | 657	flash
 658 | 658	flow
 659 | 659	fnumber
 660 | 660	focu
 661 | 661	folder
 662 | 662	folk
 663 | 663	follow
 664 | 664	font
 665 | 665	food
 666 | 666	for
 667 | 667	forc
 668 | 668	foreign
 669 | 669	forev
 670 | 670	forget
 671 | 671	fork
 672 | 672	form
 673 | 673	format
 674 | 674	former
 675 | 675	fortun
 676 | 676	forward
 677 | 677	found
 678 | 678	foundat
 679 | 679	four
 680 | 680	franc
 681 | 681	free
 682 | 682	freedom
 683 | 683	french
 684 | 684	freshrpm
 685 | 685	fri
 686 | 686	fridai
 687 | 687	friend
 688 | 688	from
 689 | 689	front
 690 | 690	ftoc
 691 | 691	ftp
 692 | 692	full
 693 | 693	fulli
 694 | 694	fun
 695 | 695	function
 696 | 696	fund
 697 | 697	further
 698 | 698	futur
 699 | 699	ga
 700 | 700	gain
 701 | 701	game
 702 | 702	gari
 703 | 703	garrigu
 704 | 704	gave
 705 | 705	gcc
 706 | 706	geek
 707 | 707	gener
 708 | 708	get
 709 | 709	gif
 710 | 710	gift
 711 | 711	girl
 712 | 712	give
 713 | 713	given
 714 | 714	global
 715 | 715	gnome
 716 | 716	gnu
 717 | 717	gnupg
 718 | 718	go
 719 | 719	goal
 720 | 720	god
 721 | 721	goe
 722 | 722	gold
 723 | 723	gone
 724 | 724	good
 725 | 725	googl
 726 | 726	got
 727 | 727	govern
 728 | 728	gpl
 729 | 729	grand
 730 | 730	grant
 731 | 731	graphic
 732 | 732	great
 733 | 733	greater
 734 | 734	ground
 735 | 735	group
 736 | 736	grow
 737 | 737	growth
 738 | 738	gt
 739 | 739	guarante
 740 | 740	guess
 741 | 741	gui
 742 | 742	guid
 743 | 743	ha
 744 | 744	hack
 745 | 745	had
 746 | 746	half
 747 | 747	ham
 748 | 748	hand
 749 | 749	handl
 750 | 750	happen
 751 | 751	happi
 752 | 752	hard
 753 | 753	hardwar
 754 | 754	hat
 755 | 755	hate
 756 | 756	have
 757 | 757	haven
 758 | 758	he
 759 | 759	head
 760 | 760	header
 761 | 761	headlin
 762 | 762	health
 763 | 763	hear
 764 | 764	heard
 765 | 765	heart
 766 | 766	heaven
 767 | 767	hei
 768 | 768	height
 769 | 769	held
 770 | 770	hello
 771 | 771	help
 772 | 772	helvetica
 773 | 773	her
 774 | 774	herba
 775 | 775	here
 776 | 776	hermio
 777 | 777	hettinga
 778 | 778	hi
 779 | 779	high
 780 | 780	higher
 781 | 781	highli
 782 | 782	highlight
 783 | 783	him
 784 | 784	histori
 785 | 785	hit
 786 | 786	hold
 787 | 787	home
 788 | 788	honor
 789 | 789	hope
 790 | 790	host
 791 | 791	hot
 792 | 792	hour
 793 | 793	hous
 794 | 794	how
 795 | 795	howev
 796 | 796	hp
 797 | 797	html
 798 | 798	http
 799 | 799	httpaddr
 800 | 800	huge
 801 | 801	human
 802 | 802	hundr
 803 | 803	ibm
 804 | 804	id
 805 | 805	idea
 806 | 806	ident
 807 | 807	identifi
 808 | 808	idnumb
 809 | 809	ie
 810 | 810	if
 811 | 811	ignor
 812 | 812	ii
 813 | 813	iii
 814 | 814	iiiiiiihnumberjnumberhnumberjnumberhnumb
 815 | 815	illeg
 816 | 816	im
 817 | 817	imag
 818 | 818	imagin
 819 | 819	immedi
 820 | 820	impact
 821 | 821	implement
 822 | 822	import
 823 | 823	impress
 824 | 824	improv
 825 | 825	in
 826 | 826	inc
 827 | 827	includ
 828 | 828	incom
 829 | 829	increas
 830 | 830	incred
 831 | 831	inde
 832 | 832	independ
 833 | 833	index
 834 | 834	india
 835 | 835	indian
 836 | 836	indic
 837 | 837	individu
 838 | 838	industri
 839 | 839	info
 840 | 840	inform
 841 | 841	initi
 842 | 842	inlin
 843 | 843	innov
 844 | 844	input
 845 | 845	insert
 846 | 846	insid
 847 | 847	instal
 848 | 848	instanc
 849 | 849	instant
 850 | 850	instead
 851 | 851	institut
 852 | 852	instruct
 853 | 853	insur
 854 | 854	int
 855 | 855	integr
 856 | 856	intel
 857 | 857	intellig
 858 | 858	intend
 859 | 859	interact
 860 | 860	interest
 861 | 861	interfac
 862 | 862	intern
 863 | 863	internet
 864 | 864	interview
 865 | 865	into
 866 | 866	intro
 867 | 867	introduc
 868 | 868	inumb
 869 | 869	invest
 870 | 870	investig
 871 | 871	investor
 872 | 872	invok
 873 | 873	involv
 874 | 874	ip
 875 | 875	ireland
 876 | 876	irish
 877 | 877	is
 878 | 878	island
 879 | 879	isn
 880 | 880	iso
 881 | 881	isp
 882 | 882	issu
 883 | 883	it
 884 | 884	item
 885 | 885	itself
 886 | 886	jabber
 887 | 887	jame
 888 | 888	java
 889 | 889	jim
 890 | 890	jnumberiiiiiiihepihepihf
 891 | 891	job
 892 | 892	joe
 893 | 893	john
 894 | 894	join
 895 | 895	journal
 896 | 896	judg
 897 | 897	judgment
 898 | 898	jul
 899 | 899	juli
 900 | 900	jump
 901 | 901	june
 902 | 902	just
 903 | 903	justin
 904 | 904	keep
 905 | 905	kei
 906 | 906	kept
 907 | 907	kernel
 908 | 908	kevin
 909 | 909	keyboard
 910 | 910	kid
 911 | 911	kill
 912 | 912	kind
 913 | 913	king
 914 | 914	kingdom
 915 | 915	knew
 916 | 916	know
 917 | 917	knowledg
 918 | 918	known
 919 | 919	la
 920 | 920	lack
 921 | 921	land
 922 | 922	languag
 923 | 923	laptop
 924 | 924	larg
 925 | 925	larger
 926 | 926	largest
 927 | 927	laser
 928 | 928	last
 929 | 929	late
 930 | 930	later
 931 | 931	latest
 932 | 932	launch
 933 | 933	law
 934 | 934	lawrenc
 935 | 935	le
 936 | 936	lead
 937 | 937	leader
 938 | 938	learn
 939 | 939	least
 940 | 940	leav
 941 | 941	left
 942 | 942	legal
 943 | 943	lender
 944 | 944	length
 945 | 945	less
 946 | 946	lesson
 947 | 947	let
 948 | 948	letter
 949 | 949	level
 950 | 950	lib
 951 | 951	librari
 952 | 952	licens
 953 | 953	life
 954 | 954	lifetim
 955 | 955	light
 956 | 956	like
 957 | 957	limit
 958 | 958	line
 959 | 959	link
 960 | 960	linux
 961 | 961	list
 962 | 962	listen
 963 | 963	littl
 964 | 964	live
 965 | 965	ll
 966 | 966	lo
 967 | 967	load
 968 | 968	loan
 969 | 969	local
 970 | 970	locat
 971 | 971	lock
 972 | 972	lockergnom
 973 | 973	log
 974 | 974	long
 975 | 975	longer
 976 | 976	look
 977 | 977	lose
 978 | 978	loss
 979 | 979	lost
 980 | 980	lot
 981 | 981	love
 982 | 982	low
 983 | 983	lower
 984 | 984	lowest
 985 | 985	lt
 986 | 986	ma
 987 | 987	mac
 988 | 988	machin
 989 | 989	made
 990 | 990	magazin
 991 | 991	mai
 992 | 992	mail
 993 | 993	mailer
 994 | 994	main
 995 | 995	maintain
 996 | 996	major
 997 | 997	make
 998 | 998	maker
 999 | 999	male
1000 | 1000	man
1001 | 1001	manag
1002 | 1002	mani
1003 | 1003	manual
1004 | 1004	manufactur
1005 | 1005	map
1006 | 1006	march
1007 | 1007	margin
1008 | 1008	mark
1009 | 1009	market
1010 | 1010	marshal
1011 | 1011	mass
1012 | 1012	master
1013 | 1013	match
1014 | 1014	materi
1015 | 1015	matter
1016 | 1016	matthia
1017 | 1017	mayb
1018 | 1018	me
1019 | 1019	mean
1020 | 1020	measur
1021 | 1021	mechan
1022 | 1022	media
1023 | 1023	medic
1024 | 1024	meet
1025 | 1025	member
1026 | 1026	membership
1027 | 1027	memori
1028 | 1028	men
1029 | 1029	mention
1030 | 1030	menu
1031 | 1031	merchant
1032 | 1032	messag
1033 | 1033	method
1034 | 1034	mh
1035 | 1035	michael
1036 | 1036	microsoft
1037 | 1037	middl
1038 | 1038	might
1039 | 1039	mike
1040 | 1040	mile
1041 | 1041	militari
1042 | 1042	million
1043 | 1043	mime
1044 | 1044	mind
1045 | 1045	mine
1046 | 1046	mini
1047 | 1047	minimum
1048 | 1048	minut
1049 | 1049	miss
1050 | 1050	mistak
1051 | 1051	mobil
1052 | 1052	mode
1053 | 1053	model
1054 | 1054	modem
1055 | 1055	modifi
1056 | 1056	modul
1057 | 1057	moment
1058 | 1058	mon
1059 | 1059	mondai
1060 | 1060	monei
1061 | 1061	monitor
1062 | 1062	month
1063 | 1063	monthli
1064 | 1064	more
1065 | 1065	morn
1066 | 1066	mortgag
1067 | 1067	most
1068 | 1068	mostli
1069 | 1069	mother
1070 | 1070	motiv
1071 | 1071	move
1072 | 1072	movi
1073 | 1073	mpnumber
1074 | 1074	mr
1075 | 1075	ms
1076 | 1076	msg
1077 | 1077	much
1078 | 1078	multi
1079 | 1079	multipart
1080 | 1080	multipl
1081 | 1081	murphi
1082 | 1082	music
1083 | 1083	must
1084 | 1084	my
1085 | 1085	myself
1086 | 1086	name
1087 | 1087	nation
1088 | 1088	natur
1089 | 1089	nbsp
1090 | 1090	near
1091 | 1091	nearli
1092 | 1092	necessari
1093 | 1093	need
1094 | 1094	neg
1095 | 1095	net
1096 | 1096	netscap
1097 | 1097	network
1098 | 1098	never
1099 | 1099	new
1100 | 1100	newslett
1101 | 1101	next
1102 | 1102	nextpart
1103 | 1103	nice
1104 | 1104	nigeria
1105 | 1105	night
1106 | 1106	no
1107 | 1107	nobodi
1108 | 1108	non
1109 | 1109	none
1110 | 1110	nor
1111 | 1111	normal
1112 | 1112	north
1113 | 1113	not
1114 | 1114	note
1115 | 1115	noth
1116 | 1116	notic
1117 | 1117	now
1118 | 1118	nt
1119 | 1119	null
1120 | 1120	number
1121 | 1121	numbera
1122 | 1122	numberam
1123 | 1123	numberanumb
1124 | 1124	numberb
1125 | 1125	numberbit
1126 | 1126	numberc
1127 | 1127	numbercb
1128 | 1128	numbercbr
1129 | 1129	numbercfont
1130 | 1130	numbercli
1131 | 1131	numbercnumb
1132 | 1132	numbercp
1133 | 1133	numberctd
1134 | 1134	numberd
1135 | 1135	numberdari
1136 | 1136	numberdnumb
1137 | 1137	numberenumb
1138 | 1138	numberf
1139 | 1139	numberfb
1140 | 1140	numberff
1141 | 1141	numberffont
1142 | 1142	numberfp
1143 | 1143	numberftd
1144 | 1144	numberk
1145 | 1145	numberm
1146 | 1146	numbermb
1147 | 1147	numberp
1148 | 1148	numberpd
1149 | 1149	numberpm
1150 | 1150	numberpx
1151 | 1151	numberst
1152 | 1152	numberth
1153 | 1153	numbertnumb
1154 | 1154	numberx
1155 | 1155	object
1156 | 1156	oblig
1157 | 1157	obtain
1158 | 1158	obvious
1159 | 1159	occur
1160 | 1160	oct
1161 | 1161	octob
1162 | 1162	of
1163 | 1163	off
1164 | 1164	offer
1165 | 1165	offic
1166 | 1166	offici
1167 | 1167	often
1168 | 1168	oh
1169 | 1169	ok
1170 | 1170	old
1171 | 1171	on
1172 | 1172	onc
1173 | 1173	onli
1174 | 1174	onlin
1175 | 1175	open
1176 | 1176	oper
1177 | 1177	opinion
1178 | 1178	opportun
1179 | 1179	opt
1180 | 1180	optim
1181 | 1181	option
1182 | 1182	or
1183 | 1183	order
1184 | 1184	org
1185 | 1185	organ
1186 | 1186	origin
1187 | 1187	os
1188 | 1188	osdn
1189 | 1189	other
1190 | 1190	otherwis
1191 | 1191	our
1192 | 1192	out
1193 | 1193	outlook
1194 | 1194	output
1195 | 1195	outsid
1196 | 1196	over
1197 | 1197	own
1198 | 1198	owner
1199 | 1199	oz
1200 | 1200	pacif
1201 | 1201	pack
1202 | 1202	packag
1203 | 1203	page
1204 | 1204	pai
1205 | 1205	paid
1206 | 1206	pain
1207 | 1207	palm
1208 | 1208	panel
1209 | 1209	paper
1210 | 1210	paragraph
1211 | 1211	parent
1212 | 1212	part
1213 | 1213	parti
1214 | 1214	particip
1215 | 1215	particular
1216 | 1216	particularli
1217 | 1217	partit
1218 | 1218	partner
1219 | 1219	pass
1220 | 1220	password
1221 | 1221	past
1222 | 1222	patch
1223 | 1223	patent
1224 | 1224	path
1225 | 1225	pattern
1226 | 1226	paul
1227 | 1227	payment
1228 | 1228	pc
1229 | 1229	peac
1230 | 1230	peopl
1231 | 1231	per
1232 | 1232	percent
1233 | 1233	percentag
1234 | 1234	perfect
1235 | 1235	perfectli
1236 | 1236	perform
1237 | 1237	perhap
1238 | 1238	period
1239 | 1239	perl
1240 | 1240	perman
1241 | 1241	permiss
1242 | 1242	person
1243 | 1243	pgp
1244 | 1244	phone
1245 | 1245	photo
1246 | 1246	php
1247 | 1247	phrase
1248 | 1248	physic
1249 | 1249	pick
1250 | 1250	pictur
1251 | 1251	piec
1252 | 1252	piiiiiiii
1253 | 1253	pipe
1254 | 1254	pjnumber
1255 | 1255	place
1256 | 1256	plai
1257 | 1257	plain
1258 | 1258	plan
1259 | 1259	planet
1260 | 1260	plant
1261 | 1261	planta
1262 | 1262	platform
1263 | 1263	player
1264 | 1264	pleas
1265 | 1265	plu
1266 | 1266	plug
1267 | 1267	pm
1268 | 1268	pocket
1269 | 1269	point
1270 | 1270	polic
1271 | 1271	polici
1272 | 1272	polit
1273 | 1273	poor
1274 | 1274	pop
1275 | 1275	popul
1276 | 1276	popular
1277 | 1277	port
1278 | 1278	posit
1279 | 1279	possibl
1280 | 1280	post
1281 | 1281	potenti
1282 | 1282	pound
1283 | 1283	powel
1284 | 1284	power
1285 | 1285	powershot
1286 | 1286	practic
1287 | 1287	pre
1288 | 1288	predict
1289 | 1289	prefer
1290 | 1290	premium
1291 | 1291	prepar
1292 | 1292	present
1293 | 1293	presid
1294 | 1294	press
1295 | 1295	pretti
1296 | 1296	prevent
1297 | 1297	previou
1298 | 1298	previous
1299 | 1299	price
1300 | 1300	principl
1301 | 1301	print
1302 | 1302	printabl
1303 | 1303	printer
1304 | 1304	privaci
1305 | 1305	privat
1306 | 1306	prize
1307 | 1307	pro
1308 | 1308	probabl
1309 | 1309	problem
1310 | 1310	procedur
1311 | 1311	process
1312 | 1312	processor
1313 | 1313	procmail
1314 | 1314	produc
1315 | 1315	product
1316 | 1316	profession
1317 | 1317	profil
1318 | 1318	profit
1319 | 1319	program
1320 | 1320	programm
1321 | 1321	progress
1322 | 1322	project
1323 | 1323	promis
1324 | 1324	promot
1325 | 1325	prompt
1326 | 1326	properti
1327 | 1327	propos
1328 | 1328	proprietari
1329 | 1329	prospect
1330 | 1330	protect
1331 | 1331	protocol
1332 | 1332	prove
1333 | 1333	proven
1334 | 1334	provid
1335 | 1335	proxi
1336 | 1336	pub
1337 | 1337	public
1338 | 1338	publish
1339 | 1339	pudg
1340 | 1340	pull
1341 | 1341	purchas
1342 | 1342	purpos
1343 | 1343	put
1344 | 1344	python
1345 | 1345	qnumber
1346 | 1346	qualifi
1347 | 1347	qualiti
1348 | 1348	quarter
1349 | 1349	question
1350 | 1350	quick
1351 | 1351	quickli
1352 | 1352	quit
1353 | 1353	quot
1354 | 1354	radio
1355 | 1355	ragga
1356 | 1356	rais
1357 | 1357	random
1358 | 1358	rang
1359 | 1359	rate
1360 | 1360	rather
1361 | 1361	ratio
1362 | 1362	razor
1363 | 1363	razornumb
1364 | 1364	re
1365 | 1365	reach
1366 | 1366	read
1367 | 1367	reader
1368 | 1368	readi
1369 | 1369	real
1370 | 1370	realiz
1371 | 1371	realli
1372 | 1372	reason
1373 | 1373	receiv
1374 | 1374	recent
1375 | 1375	recipi
1376 | 1376	recommend
1377 | 1377	record
1378 | 1378	red
1379 | 1379	redhat
1380 | 1380	reduc
1381 | 1381	refer
1382 | 1382	refin
1383 | 1383	reg
1384 | 1384	regard
1385 | 1385	region
1386 | 1386	regist
1387 | 1387	regul
1388 | 1388	regular
1389 | 1389	rel
1390 | 1390	relat
1391 | 1391	relationship
1392 | 1392	releas
1393 | 1393	relev
1394 | 1394	reliabl
1395 | 1395	remain
1396 | 1396	rememb
1397 | 1397	remot
1398 | 1398	remov
1399 | 1399	replac
1400 | 1400	repli
1401 | 1401	report
1402 | 1402	repositori
1403 | 1403	repres
1404 | 1404	republ
1405 | 1405	request
1406 | 1406	requir
1407 | 1407	research
1408 | 1408	reserv
1409 | 1409	resid
1410 | 1410	resourc
1411 | 1411	respect
1412 | 1412	respond
1413 | 1413	respons
1414 | 1414	rest
1415 | 1415	result
1416 | 1416	retail
1417 | 1417	return
1418 | 1418	reveal
1419 | 1419	revenu
1420 | 1420	revers
1421 | 1421	review
1422 | 1422	revok
1423 | 1423	rh
1424 | 1424	rich
1425 | 1425	right
1426 | 1426	risk
1427 | 1427	road
1428 | 1428	robert
1429 | 1429	rock
1430 | 1430	role
1431 | 1431	roll
1432 | 1432	rom
1433 | 1433	roman
1434 | 1434	room
1435 | 1435	root
1436 | 1436	round
1437 | 1437	rpm
1438 | 1438	rss
1439 | 1439	rule
1440 | 1440	run
1441 | 1441	sa
1442 | 1442	safe
1443 | 1443	sai
1444 | 1444	said
1445 | 1445	sale
1446 | 1446	same
1447 | 1447	sampl
1448 | 1448	san
1449 | 1449	saou
1450 | 1450	sat
1451 | 1451	satellit
1452 | 1452	save
1453 | 1453	saw
1454 | 1454	scan
1455 | 1455	schedul
1456 | 1456	school
1457 | 1457	scienc
1458 | 1458	score
1459 | 1459	screen
1460 | 1460	script
1461 | 1461	se
1462 | 1462	search
1463 | 1463	season
1464 | 1464	second
1465 | 1465	secret
1466 | 1466	section
1467 | 1467	secur
1468 | 1468	see
1469 | 1469	seed
1470 | 1470	seek
1471 | 1471	seem
1472 | 1472	seen
1473 | 1473	select
1474 | 1474	self
1475 | 1475	sell
1476 | 1476	seminar
1477 | 1477	send
1478 | 1478	sender
1479 | 1479	sendmail
1480 | 1480	senior
1481 | 1481	sens
1482 | 1482	sensit
1483 | 1483	sent
1484 | 1484	sep
1485 | 1485	separ
1486 | 1486	septemb
1487 | 1487	sequenc
1488 | 1488	seri
1489 | 1489	serif
1490 | 1490	seriou
1491 | 1491	serv
1492 | 1492	server
1493 | 1493	servic
1494 | 1494	set
1495 | 1495	setup
1496 | 1496	seven
1497 | 1497	seventh
1498 | 1498	sever
1499 | 1499	sex
1500 | 1500	sexual
1501 | 1501	sf
1502 | 1502	shape
1503 | 1503	share
1504 | 1504	she
1505 | 1505	shell
1506 | 1506	ship
1507 | 1507	shop
1508 | 1508	short
1509 | 1509	shot
1510 | 1510	should
1511 | 1511	show
1512 | 1512	side
1513 | 1513	sign
1514 | 1514	signatur
1515 | 1515	signific
1516 | 1516	similar
1517 | 1517	simpl
1518 | 1518	simpli
1519 | 1519	sinc
1520 | 1520	sincer
1521 | 1521	singl
1522 | 1522	sit
1523 | 1523	site
1524 | 1524	situat
1525 | 1525	six
1526 | 1526	size
1527 | 1527	skeptic
1528 | 1528	skill
1529 | 1529	skin
1530 | 1530	skip
1531 | 1531	sleep
1532 | 1532	slow
1533 | 1533	small
1534 | 1534	smart
1535 | 1535	smoke
1536 | 1536	smtp
1537 | 1537	snumber
1538 | 1538	so
1539 | 1539	social
1540 | 1540	societi
1541 | 1541	softwar
1542 | 1542	sold
1543 | 1543	solut
1544 | 1544	solv
1545 | 1545	some
1546 | 1546	someon
1547 | 1547	someth
1548 | 1548	sometim
1549 | 1549	son
1550 | 1550	song
1551 | 1551	soni
1552 | 1552	soon
1553 | 1553	sorri
1554 | 1554	sort
1555 | 1555	sound
1556 | 1556	sourc
1557 | 1557	south
1558 | 1558	space
1559 | 1559	spain
1560 | 1560	spam
1561 | 1561	spamassassin
1562 | 1562	spamd
1563 | 1563	spammer
1564 | 1564	speak
1565 | 1565	spec
1566 | 1566	special
1567 | 1567	specif
1568 | 1568	specifi
1569 | 1569	speech
1570 | 1570	speed
1571 | 1571	spend
1572 | 1572	sponsor
1573 | 1573	sport
1574 | 1574	spot
1575 | 1575	src
1576 | 1576	ssh
1577 | 1577	st
1578 | 1578	stabl
1579 | 1579	staff
1580 | 1580	stai
1581 | 1581	stand
1582 | 1582	standard
1583 | 1583	star
1584 | 1584	start
1585 | 1585	state
1586 | 1586	statement
1587 | 1587	statu
1588 | 1588	step
1589 | 1589	steve
1590 | 1590	still
1591 | 1591	stock
1592 | 1592	stop
1593 | 1593	storag
1594 | 1594	store
1595 | 1595	stori
1596 | 1596	strategi
1597 | 1597	stream
1598 | 1598	street
1599 | 1599	string
1600 | 1600	strip
1601 | 1601	strong
1602 | 1602	structur
1603 | 1603	studi
1604 | 1604	stuff
1605 | 1605	stupid
1606 | 1606	style
1607 | 1607	subject
1608 | 1608	submit
1609 | 1609	subscrib
1610 | 1610	subscript
1611 | 1611	substanti
1612 | 1612	success
1613 | 1613	such
1614 | 1614	suffer
1615 | 1615	suggest
1616 | 1616	suit
1617 | 1617	sum
1618 | 1618	summari
1619 | 1619	summer
1620 | 1620	sun
1621 | 1621	super
1622 | 1622	suppli
1623 | 1623	support
1624 | 1624	suppos
1625 | 1625	sure
1626 | 1626	surpris
1627 | 1627	suse
1628 | 1628	suspect
1629 | 1629	sweet
1630 | 1630	switch
1631 | 1631	system
1632 | 1632	tab
1633 | 1633	tabl
1634 | 1634	tablet
1635 | 1635	tag
1636 | 1636	take
1637 | 1637	taken
1638 | 1638	talk
1639 | 1639	tape
1640 | 1640	target
1641 | 1641	task
1642 | 1642	tax
1643 | 1643	teach
1644 | 1644	team
1645 | 1645	tech
1646 | 1646	technic
1647 | 1647	techniqu
1648 | 1648	technolog
1649 | 1649	tel
1650 | 1650	telecom
1651 | 1651	telephon
1652 | 1652	tell
1653 | 1653	temperatur
1654 | 1654	templ
1655 | 1655	ten
1656 | 1656	term
1657 | 1657	termin
1658 | 1658	terror
1659 | 1659	terrorist
1660 | 1660	test
1661 | 1661	texa
1662 | 1662	text
1663 | 1663	than
1664 | 1664	thank
1665 | 1665	that
1666 | 1666	the
1667 | 1667	thei
1668 | 1668	their
1669 | 1669	them
1670 | 1670	themselv
1671 | 1671	then
1672 | 1672	theori
1673 | 1673	there
1674 | 1674	therefor
1675 | 1675	these
1676 | 1676	thi
1677 | 1677	thing
1678 | 1678	think
1679 | 1679	thinkgeek
1680 | 1680	third
1681 | 1681	those
1682 | 1682	though
1683 | 1683	thought
1684 | 1684	thousand
1685 | 1685	thread
1686 | 1686	threat
1687 | 1687	three
1688 | 1688	through
1689 | 1689	thu
1690 | 1690	thursdai
1691 | 1691	ti
1692 | 1692	ticket
1693 | 1693	tim
1694 | 1694	time
1695 | 1695	tip
1696 | 1696	tire
1697 | 1697	titl
1698 | 1698	tm
1699 | 1699	to
1700 | 1700	todai
1701 | 1701	togeth
1702 | 1702	token
1703 | 1703	told
1704 | 1704	toll
1705 | 1705	tom
1706 | 1706	toner
1707 | 1707	toni
1708 | 1708	too
1709 | 1709	took
1710 | 1710	tool
1711 | 1711	top
1712 | 1712	topic
1713 | 1713	total
1714 | 1714	touch
1715 | 1715	toward
1716 | 1716	track
1717 | 1717	trade
1718 | 1718	tradit
1719 | 1719	traffic
1720 | 1720	train
1721 | 1721	transact
1722 | 1722	transfer
1723 | 1723	travel
1724 | 1724	treat
1725 | 1725	tree
1726 | 1726	tri
1727 | 1727	trial
1728 | 1728	trick
1729 | 1729	trip
1730 | 1730	troubl
1731 | 1731	true
1732 | 1732	truli
1733 | 1733	trust
1734 | 1734	truth
1735 | 1735	try
1736 | 1736	tue
1737 | 1737	tuesdai
1738 | 1738	turn
1739 | 1739	tv
1740 | 1740	two
1741 | 1741	type
1742 | 1742	uk
1743 | 1743	ultim
1744 | 1744	un
1745 | 1745	under
1746 | 1746	understand
1747 | 1747	unfortun
1748 | 1748	uniqu
1749 | 1749	unison
1750 | 1750	unit
1751 | 1751	univers
1752 | 1752	unix
1753 | 1753	unless
1754 | 1754	unlik
1755 | 1755	unlimit
1756 | 1756	unseen
1757 | 1757	unsolicit
1758 | 1758	unsubscrib
1759 | 1759	until
1760 | 1760	up
1761 | 1761	updat
1762 | 1762	upgrad
1763 | 1763	upon
1764 | 1764	urgent
1765 | 1765	url
1766 | 1766	us
1767 | 1767	usa
1768 | 1768	usag
1769 | 1769	usb
1770 | 1770	usd
1771 | 1771	usdollarnumb
1772 | 1772	useless
1773 | 1773	user
1774 | 1774	usr
1775 | 1775	usual
1776 | 1776	util
1777 | 1777	vacat
1778 | 1778	valid
1779 | 1779	valu
1780 | 1780	valuabl
1781 | 1781	var
1782 | 1782	variabl
1783 | 1783	varieti
1784 | 1784	variou
1785 | 1785	ve
1786 | 1786	vendor
1787 | 1787	ventur
1788 | 1788	veri
1789 | 1789	verifi
1790 | 1790	version
1791 | 1791	via
1792 | 1792	video
1793 | 1793	view
1794 | 1794	virtual
1795 | 1795	visa
1796 | 1796	visit
1797 | 1797	visual
1798 | 1798	vnumber
1799 | 1799	voic
1800 | 1800	vote
1801 | 1801	vs
1802 | 1802	vulner
1803 | 1803	wa
1804 | 1804	wai
1805 | 1805	wait
1806 | 1806	wake
1807 | 1807	walk
1808 | 1808	wall
1809 | 1809	want
1810 | 1810	war
1811 | 1811	warm
1812 | 1812	warn
1813 | 1813	warranti
1814 | 1814	washington
1815 | 1815	wasn
1816 | 1816	wast
1817 | 1817	watch
1818 | 1818	water
1819 | 1819	we
1820 | 1820	wealth
1821 | 1821	weapon
1822 | 1822	web
1823 | 1823	weblog
1824 | 1824	websit
1825 | 1825	wed
1826 | 1826	wednesdai
1827 | 1827	week
1828 | 1828	weekli
1829 | 1829	weight
1830 | 1830	welcom
1831 | 1831	well
1832 | 1832	went
1833 | 1833	were
1834 | 1834	west
1835 | 1835	what
1836 | 1836	whatev
1837 | 1837	when
1838 | 1838	where
1839 | 1839	whether
1840 | 1840	which
1841 | 1841	while
1842 | 1842	white
1843 | 1843	whitelist
1844 | 1844	who
1845 | 1845	whole
1846 | 1846	whose
1847 | 1847	why
1848 | 1848	wi
1849 | 1849	wide
1850 | 1850	width
1851 | 1851	wife
1852 | 1852	will
1853 | 1853	william
1854 | 1854	win
1855 | 1855	window
1856 | 1856	wing
1857 | 1857	winner
1858 | 1858	wireless
1859 | 1859	wish
1860 | 1860	with
1861 | 1861	within
1862 | 1862	without
1863 | 1863	wnumberp
1864 | 1864	woman
1865 | 1865	women
1866 | 1866	won
1867 | 1867	wonder
1868 | 1868	word
1869 | 1869	work
1870 | 1870	worker
1871 | 1871	world
1872 | 1872	worldwid
1873 | 1873	worri
1874 | 1874	worst
1875 | 1875	worth
1876 | 1876	would
1877 | 1877	wouldn
1878 | 1878	write
1879 | 1879	written
1880 | 1880	wrong
1881 | 1881	wrote
1882 | 1882	www
1883 | 1883	ximian
1884 | 1884	xml
1885 | 1885	xp
1886 | 1886	yahoo
1887 | 1887	ye
1888 | 1888	yeah
1889 | 1889	year
1890 | 1890	yesterdai
1891 | 1891	yet
1892 | 1892	york
1893 | 1893	you
1894 | 1894	young
1895 | 1895	your
1896 | 1896	yourself
1897 | 1897	zdnet
1898 | 1898	zero
1899 | 1899	zip
1900 | 


--------------------------------------------------------------------------------
/ex7/bird_small.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/bird_small.mat


--------------------------------------------------------------------------------
/ex7/bird_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/bird_small.png


--------------------------------------------------------------------------------
/ex7/ex7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/ex7.pdf


--------------------------------------------------------------------------------
/ex7/ex7data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/ex7data1.mat


--------------------------------------------------------------------------------
/ex7/ex7data2.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/ex7data2.mat


--------------------------------------------------------------------------------
/ex7/ex7faces.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/ex7faces.mat


--------------------------------------------------------------------------------
/ex7/ex7kmeans.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from scipy.optimize import minimize
  4 | import scipy.io
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | def findClosestCentroids(X, centroids):
  8 | 	K = np.size(centroids, 1)
  9 | 	idx = []
 10 | 
 11 | 	for i in range(len(X)):
 12 | 		norm = np.sum(((X[i] - centroids)**2), axis=1)
 13 | 		idx.append(norm.argmin())
 14 | 		
 15 | 	return idx
 16 | 
 17 | def computeCentroids(X, idx, K):
 18 | 	centroid = np.zeros((K,np.size(X,1)))
 19 | 	aug_X = np.hstack((np.array(idx)[:,None],X))
 20 | 	for i in range(K):
 21 | 		centroid[i] = np.mean(X[aug_X[:,0] == i], axis=0)
 22 | 	
 23 | 	return centroid
 24 | 
 25 | def runKMeans(X, initial_centroids, max_iters, plot_progress=False):
 26 | 	K = np.size(initial_centroids, 0)
 27 | 	centroids = initial_centroids 
 28 | 	previous_centroids = centroids
 29 | 
 30 | 	for i in range(max_iters):
 31 | 		# Centroid assignment
 32 | 		idx = findClosestCentroids(X, centroids)
 33 | 
 34 | 		if plot_progress:
 35 | 			plt.plot(X[:,0],X[:,1], 'bo')
 36 | 			plt.plot(centroids[:,0], centroids[:,1], 'rx')
 37 | 			plt.plot(previous_centroids[:,0], previous_centroids[:,1], 'gx')
 38 | 			plt.show()
 39 | 
 40 | 			previous_centroids = centroids
 41 | 			centroids = computeCentroids(X, idx, K)
 42 | 
 43 | 	return (centroids, idx)
 44 | 	
 45 | def displayData(X):
 46 |    
 47 |     num_images = len(X)
 48 |     rows = int(num_images**.5)
 49 |     cols = int(num_images**.5)
 50 |     fig, ax = plt.subplots(rows,cols,sharex=True,sharey=True)
 51 |     img_num = 0
 52 | 
 53 |     for i in range(rows):
 54 |         for j in range(cols):
 55 |             # Convert column vector into 32x232 pixel matrix
 56 |             # transpose to display correctly
 57 |             img = X[img_num,:].reshape(32,32).T
 58 |             ax[i][j].imshow(img,cmap='gray')
 59 |             img_num += 1
 60 | 
 61 |     return (fig, ax)
 62 | 
 63 | def kMeansInitCentroids(X, K):
 64 | 	return X[np.random.choice(X.shape[0], K)]
 65 | 
 66 | # Find Closest Centroids
 67 | raw_mat = scipy.io.loadmat("ex7data2.mat")
 68 | X = raw_mat.get("X")
 69 | 
 70 | # Select an initial set of centroids
 71 | K = 3
 72 | initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])
 73 | idx = findClosestCentroids(X, initial_centroids)
 74 | 
 75 | #Compute Means
 76 | centroids = computeCentroids(X, idx, K)
 77 | 
 78 | # K-means Clustering
 79 | max_iters = 10
 80 | initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])
 81 | centroids, idx = runKMeans(X, initial_centroids, max_iters, plot_progress=True)
 82 | 
 83 | #  K-means Clustering on Pixels
 84 | A = plt.imread("bird_small.png")
 85 | plt.imshow(A)
 86 | plt.show()
 87 | 
 88 | original_shape = np.shape(A)
 89 | 
 90 | # Reshape A to get R, G, B values for each pixel
 91 | X = A.reshape((np.size(A, 0)*np.size(A, 1), 3))
 92 | K = 16
 93 | max_iters = 10
 94 | 
 95 | # Initialize centroids
 96 | initial_centroids = kMeansInitCentroids(X, K)
 97 | centroids, idx = runKMeans(X, initial_centroids, max_iters, plot_progress=False)
 98 | 
 99 | # Image Compression
100 | idx = findClosestCentroids(X, centroids)
101 | X_recovered = centroids[idx,:]
102 | X_recovered = X_recovered.reshape(original_shape)
103 | 
104 | # Display 
105 | f, (ax1, ax2) = plt.subplots(2, sharex=True, sharey=True)
106 | ax1.imshow(A)
107 | ax2.imshow(X_recovered)
108 | 


--------------------------------------------------------------------------------
/ex7/ex7pca.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from scipy.optimize import minimize
  4 | import matplotlib.pyplot as plt
  5 | import scipy.io
  6 | 
  7 | def featureNormalize(X):
  8 | 	mu = np.mean(X,axis=0)
  9 | 	sigma = np.std(X,axis=0)
 10 | 	normalized_X = np.divide(X - mu,sigma)
 11 | 
 12 | 	return (normalized_X, mu, sigma)
 13 | 
 14 | def pca(X):
 15 | 	covar = np.dot(X.T,X) / len(X)
 16 | 	U, S, V = np.linalg.svd(covar)
 17 | 	return (U, S)
 18 | 
 19 | def projectData(X, U, K):
 20 | 	U_reduce = U[:, 0:K]
 21 | 	Z = np.zeros((len(X), K))
 22 | 	for i in range(len(X)):
 23 | 		x = X[i,:]
 24 | 		projection_k = np.dot(x, U_reduce)
 25 | 		Z[i] = projection_k
 26 | 	return Z
 27 | 
 28 | def recoverData(Z, U, K):
 29 | 	X_rec = np.zeros((len(Z), len(U)))
 30 | 	for i in range(len(Z)):
 31 | 		v = Z[i,:]
 32 | 		for j in range(np.size(U,1)):
 33 | 			recovered_j = np.dot(v.T,U[j,0:K])
 34 | 			X_rec[i][j] = recovered_j
 35 | 	return X_rec
 36 | 
 37 | def displayData(X):
 38 |     
 39 |     num_images = len(X)
 40 |     rows = int(num_images**.5)
 41 |     cols = int(num_images**.5)
 42 |     fig, ax = plt.subplots(rows,cols,sharex=True,sharey=True)
 43 |     img_num = 0
 44 | 
 45 |     for i in range(rows):
 46 |         for j in range(cols):
 47 |             # Convert column vector into 32x232 pixel matrix
 48 |             #  transpose
 49 |             img = X[img_num,:].reshape(32,32).T
 50 |             ax[i][j].imshow(img,cmap='gray')
 51 |             img_num += 1
 52 | 
 53 |     return (fig, ax)
 54 | 	
 55 | raw_mat = scipy.io.loadmat("ex7data1.mat")
 56 | X = raw_mat.get("X")
 57 | plt.cla()
 58 | plt.plot(X[:,0], X[:,1], 'bo')
 59 | plt.show()
 60 | 
 61 | X_norm, mu, sigma = featureNormalize(X)
 62 | U, S = pca(X_norm)
 63 | 
 64 | plt.cla()
 65 | plt.plot(X_norm[:,0], X_norm[:,1], 'bo')
 66 | plt.show()
 67 | 
 68 | K = 1
 69 | Z = projectData(X_norm, U, K)
 70 | X_rec = recoverData(Z, U, K)
 71 | 
 72 | plt.cla()
 73 | plt.plot(X_norm[:,0], X_norm[:,1], 'bo')
 74 | plt.plot(X_rec[:,0], X_rec[:,1], 'rx')
 75 | plt.show()
 76 | 
 77 | # Loading and Visualizing Face Data
 78 | raw_mat = scipy.io.loadmat("ex7faces.mat")
 79 | X = raw_mat.get("X")
 80 | face_grid, ax = displayData(X[:100, :])
 81 | face_grid.show()
 82 | 
 83 | X_norm, mu, sigma = featureNormalize(X)
 84 | U, S = pca(X_norm)
 85 | 
 86 | face_grid, ax = displayData(U[:,:36].T)
 87 | face_grid.show()
 88 | 
 89 | # Dimension Reduction on Faces
 90 | K = 100
 91 | Z = projectData(X_norm, U, K)
 92 | 
 93 | #Visualization of Faces after PCA
 94 | K = 100
 95 | X_rec  = recoverData(Z, U, K)
 96 | 
 97 | plt.close()
 98 | plt.cla()
 99 | f, (ax1, ax2) = plt.subplots(2, sharex=True, sharey=True)
100 | f, ax1 = displayData(X_norm[:100,:])
101 | f, ax2 = displayData(X_rec[:100,:])
102 | f.show()
103 | 


--------------------------------------------------------------------------------
/ex7/token.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex7/token.mat


--------------------------------------------------------------------------------
/ex8/ex8_movieParams.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/ex8_movieParams.mat


--------------------------------------------------------------------------------
/ex8/ex8_movies.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/ex8_movies.mat


--------------------------------------------------------------------------------
/ex8/ex8anomaly_detection.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from scipy.optimize import minimize
  4 | from ex8_utils import *
  5 | import scipy.io
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | 
  9 | 
 10 | def estimateGaussian(X):
 11 | 	mu = np.mean(X, axis=0, keepdims=True)
 12 | 	sigma2 = np.var(X, axis=0, keepdims=True)
 13 | 
 14 | 	return (mu, sigma2)
 15 | 
 16 | def multivariateGaussian(X, mu, sigma2):
 17 | 	k = np.size(mu,1)
 18 | 	if ((np.size(sigma2,0) == 1) | (np.size(sigma2,1) == 1)):
 19 | 		sigma2 = np.diagflat(sigma2)
 20 | 	# De-mean 
 21 | 	X = X - mu
 22 | 	# Calculate p-values
 23 | 	p = ((1 / (2* (np.pi)**(-k / 2) * np.linalg.det(sigma2)**(-.5))) *
 24 | 		np.exp(-.5 * np.sum(np.dot(X, np.linalg.inv(sigma2)) * X, 1)))
 25 | 
 26 | 	return p
 27 | 
 28 | def visualizeFit(X, mu, sigma2):
 29 | 	meshvals = np.arange(0, 35, .5)
 30 | 	X1, X2 = np.meshgrid(meshvals, meshvals)
 31 | 	Z = np.hstack((X1.reshape((-1,1)), X2.reshape((-1,1))))
 32 | 	Z = multivariateGaussian(Z, mu, sigma2).reshape(np.shape(X1))
 33 | 
 34 | 	mylevels = np.array([10**i for i in np.arange(-20,0,3)])
 35 | 	fig, ax = plt.subplots(1)
 36 | 	ax.plot(X[:, 0], X[:, 1], 'bx')
 37 | 	ax.contour(X1, X2, Z, mylevels)
 38 | 
 39 | 	return fig, ax
 40 | 
 41 | def selectThreshold(yval, pval):
 42 | 	bestEpsilon = 0
 43 | 	bestF1 = 0
 44 | 	F1 = 0
 45 | 
 46 | 	stepsize = (np.max(pval) - np.min(pval)) / 1000
 47 | 	evals = np.arange(np.min(pval), np.max(pval), stepsize)
 48 | 	for epsilon in evals:
 49 | 		predictions = (pval < epsilon).reshape((-1,1))
 50 | 		X = np.hstack((predictions, yval))
 51 | 		fp = np.sum((X[:,0] == 1) & (X[:,1] == 0))
 52 | 		tp = np.sum((X[:,0] == 1) & (X[:,1] == 1))
 53 | 		fn = np.sum((X[:,0] == 0) & (X[:,1] == 1))
 54 | 		prec = tp / (tp + fp)
 55 | 		rec = tp / (tp + fn)
 56 | 		F1 = (2 * prec * rec) / (prec + rec)
 57 | 
 58 | 		if F1 > bestF1:
 59 | 			bestF1 = F1
 60 | 			bestEpsilon = epsilon
 61 | 
 62 | 	return (bestEpsilon, bestF1)
 63 | 	
 64 | 	
 65 | raw_mat = scipy.io.loadmat("ex8data1.mat")
 66 | X = raw_mat.get("X")
 67 | Xval = raw_mat.get("Xval")
 68 | yval = raw_mat.get("yval")
 69 | 
 70 | plt.plot(X[:, 0], X[:, 1], 'bx')
 71 | plt.xlabel('Latency (ms)')
 72 | plt.ylabel('Throughput (mb/s)');
 73 | plt.show()
 74 | 
 75 | mu, sigma2 = estimateGaussian(X) # returns flattened arrays
 76 | 
 77 | # Density of data based on multivariate normal distribution
 78 | p = multivariateGaussian(X, mu, sigma2)
 79 | fig, ax = visualizeFit(X,  mu, sigma2)
 80 | fig.show()
 81 | 
 82 | # Find Outliers
 83 | pval = multivariateGaussian(Xval, mu, sigma2)
 84 | epsilon, F1 = selectThreshold(yval, pval)
 85 | 
 86 | outliers = np.where(p < epsilon)
 87 | fig, ax = visualizeFit(X,  mu, sigma2)
 88 | ax.plot(X[outliers, 0], X[outliers, 1], 'ro', linewidth=2, markersize=10)
 89 | fig.show()
 90 | 
 91 | # Multi-Dimensional Outliers
 92 | raw_mat2 = scipy.io.loadmat("ex8data2.mat")
 93 | X = raw_mat2.get("X")
 94 | Xval = raw_mat2.get("Xval")
 95 | yval = raw_mat2.get("yval")
 96 | 
 97 | mu, sigma2 = estimateGaussian(X)
 98 | p = multivariateGaussian(X, mu, sigma2)
 99 | pval = multivariateGaussian(Xval, mu, sigma2)
100 | epsilon, F1 = selectThreshold(yval, pval)
101 | 


--------------------------------------------------------------------------------
/ex8/ex8cofi.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from scipy.optimize import minimize
  4 | import scipy.io
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | 
  8 | 
  9 | def cofiCostFunc(params, Y, R, num_users, num_movies, num_features, reg):
 10 | 	# Unfold the U and W matrices from params
 11 | 	X = params[:num_movies * num_features].reshape((num_movies, num_features))
 12 | 	Theta = params[num_movies * num_features:].reshape((num_users, num_features))
 13 | 	
 14 | 	# Cost
 15 | 	J = (.5 * np.sum(((np.dot(Theta,X.T).T - Y) * R)**2) + 
 16 | 	    ((reg / 2) * np.sum(Theta**2)) +
 17 | 	    ((reg / 2) * np.sum(X**2)))
 18 | 	
 19 | 	# Gradients
 20 | 	X_grad = np.zeros_like(X)
 21 | 	for i in range(num_movies):
 22 | 		idx = np.where(R[i,:]==1)[0] # users who have rated movie i
 23 | 		temp_theta = Theta[idx,:] # parameter vector for those users 
 24 | 		temp_Y = Y[idx, :] # ratings given to movie i
 25 | 		X_grad[i,:] = np.sum(np.dot(np.dot(temp_theta, X[i, :]) - temp_Y.T,
 26 | 		    temp_theta) + reg*X[i,:], axis=0)
 27 | 
 28 | 	Theta_grad = np.zeros_like(Theta)
 29 | 	for j in range(num_users):
 30 | 		idx = np.where(R[:,j]==1)[0]
 31 | 		temp_X = X[idx,:]
 32 | 		temp_Y = Y[idx,j]
 33 | 		Theta_grad[j,:] = np.sum(np.dot(np.dot(Theta[j], temp_X.T) -
 34 | 		    temp_Y, temp_X) + reg*Theta[j], axis=0) 
 35 | 	grad = np.append(X_grad.flatten(), Theta_grad.flatten())
 36 | 	
 37 | 	return (J, grad)
 38 | 
 39 | def computeNumericalGradient(J,theta):
 40 |     
 41 |     
 42 |     numgrad = np.zeros_like(theta)
 43 |     perturb = np.zeros_like(theta)
 44 |     tol = 1e-4
 45 |     
 46 |     for p in range(len(theta)):
 47 |         
 48 |         perturb[p] = tol
 49 |         loss1 = J(theta - perturb)
 50 |         loss2 = J(theta + perturb)
 51 |         numgrad[p] = (loss2 - loss1)/(2 * tol)
 52 |         perturb[p] = 0
 53 | 
 54 |     return numgrad
 55 | 
 56 | def checkCostFunction(reg):
 57 |     # Create small problem
 58 |     X_t = np.random.random((4,3))
 59 |     Theta_t = np.random.random((5,3))
 60 | 
 61 |     # Zap out most entries
 62 |     Y = np.dot(Theta_t, X_t.T)
 63 |     Y[(np.random.random(np.shape(Y)) > .5)] = 0
 64 |     R = np.zeros_like(Y)
 65 |     R[Y != 0] = 1
 66 | 
 67 |     # gradient checking
 68 |     X = np.random.random(np.shape(X_t))
 69 |     Theta = np.random.random(np.shape(Theta_t))
 70 |     num_users = np.size(Y, 1)
 71 |     num_movies = np.size(Y,0)
 72 |     num_features = np.size(Theta_t,1)
 73 | 
 74 |     params = np.append(X.flatten(), Theta.flatten())
 75 |     
 76 |     def reducedCofiCostFunc(p):
 77 |         
 78 |         return cofiCostFunc(p,Y, R, num_users, num_movies, num_features,0)[0]
 79 | 
 80 |     numgrad = computeNumericalGradient(reducedCofiCostFunc,params)
 81 |     J, grad = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 0)  
 82 |     # Check two gradients
 83 |     np.testing.assert_almost_equal(grad, numgrad)
 84 | 
 85 |     return
 86 | 
 87 | def normalizeRatings(Y, R):
 88 |     m, n = np.shape(Y)
 89 |     Ymean = np.zeros((m,1))
 90 |     Ynorm = np.zeros_like(Y)
 91 |     for i in range(m):
 92 |         idx = (R[i] == 1)
 93 |         Ymean[i] = np.mean(Y[i,idx])
 94 |         Ynorm[i,idx] = Y[i,idx] - Ymean[i]
 95 | 
 96 |     return (Ynorm, Ymean)
 97 | 
 98 | raw_mat = scipy.io.loadmat("ex8_movies.mat")
 99 | R = raw_mat.get("R") # num movies x num users indicator matrix
100 | Y = raw_mat.get("Y") # num movies x num users ratings matrix
101 | 
102 | # Visualize 
103 | plt.matshow[.]
104 | plt.xlabel("Users")
105 | plt.ylabel("Movies")
106 | plt.show()
107 | 
108 | # Collaborative Filtering Cost Function
109 | raw_mat2 = scipy.io.loadmat("ex8_movieParams.mat")
110 | X = raw_mat2.get("X") # rows correspond to feature vector of the ith movie 
111 | Theta = raw_mat2.get("Theta") # rows are the parameter vector for jth user
112 | 
113 | # Reduce data size to have it run faster
114 | num_users = 4
115 | num_movies = 5
116 | num_features = 3
117 | 
118 | X = X[:num_movies, :num_features]
119 | Theta = Theta[:num_users, :num_features]
120 | Y = Y[:num_movies, :num_users]
121 | R = R[:num_movies, :num_users]
122 | 
123 | # Evaluate Cost 
124 | params = np.append(X.flatten(), Theta.flatten())
125 | J, grad = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 0)
126 | np.testing.assert_almost_equal(22.22, J,decimal=2, err_msg="Incorrect unregularized error")
127 | 
128 | # Gradient
129 | checkCostFunction(0)
130 | 
131 | #Regularization
132 | J, grad = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 1.5)
133 | np.testing.assert_almost_equal(31.34, J,decimal=2, 
134 |     err_msg="Incorrect regularized cost")
135 | 
136 | checkCostFunction(1.5)
137 | 
138 | # Entering ratings for a new users
139 | movieList = pd.read_table("movie_ids.txt",encoding='latin-1',names=["Movie"])
140 | movies = movieList.Movie.tolist()
141 | my_ratings = [0]*len(movies)
142 | 
143 | # Check the file movie_idx.txt for id of each movie in our dataset
144 | # For example, Toy Story (1995) has ID 1, so to rate it "4", set
145 | my_ratings[0] = 4
146 | 
147 | # Or suppose did not enjoy Silence of the Lambs (1991),set
148 | my_ratings[97] = 2
149 | 
150 | # selected a few movies liked / did not like 
151 | my_ratings[6] = 3
152 | my_ratings[11]= 5
153 | my_ratings[53]= 4
154 | my_ratings[63]= 5
155 | my_ratings[65]= 3
156 | my_ratings[68]= 5
157 | my_ratings[182]= 4
158 | my_ratings[225]= 5
159 | my_ratings[354]= 5
160 | 
161 | for i in range(len(movies)):
162 |     if my_ratings[i] > 0:
163 |         print("User rated " + str(movies[i]) + ": " + str(my_ratings[i]))
164 | 
165 | # Learning
166 | raw_mat = scipy.io.loadmat("ex8_movies.mat")
167 | R = raw_mat.get("R") # num movies x num users indicator matrix
168 | Y = raw_mat.get("Y") # num movies x num users ratings matrix
169 | 
170 | # Add own ratings to Y
171 | ratings_col = np.array(my_ratings).reshape((-1,1))
172 | Y = np.hstack((ratings_col, Y))
173 | 
174 | # Add indicators to R
175 | R = np.hstack((ratings_col !=0, R))
176 | 
177 | # Normalize 
178 | Ynorm, Ymean = normalizeRatings(Y,R)
179 | 
180 | # Useful values
181 | num_users = np.size(Y,1)
182 | num_movies = np.size(Y,0)
183 | num_features = 10
184 | 
185 | # Set initial parameters
186 | X = np.random.normal(size=(num_movies, num_features))
187 | Theta = np.random.normal(size=(num_users, num_features))
188 | 
189 | initial_parameters = np.append(X.flatten(), Theta.flatten())
190 | reg = 10
191 | 
192 | def reducedCofiCostFunc(p):
193 |     
194 |     return cofiCostFunc(p,Y, R, num_users, num_movies, num_features,reg)
195 | 
196 | results = minimize(reducedCofiCostFunc,
197 |                    initial_parameters,
198 | 		   method="CG",
199 |                    jac=True,
200 |                    options={'maxiter':100, "disp":True})
201 | 
202 | out_params = results.x
203 | 
204 | # Unfold the returned parameters back into X and Theta
205 | X = np.reshape(out_params[:num_moves*num_features], (num_movies, num_features))
206 | Theta = np.reshape(out_params[num_movies*num_features:],
207 |     (num_users,num_features))
208 | 
209 | # Recommendation
210 | p = np.dot(X, Theta.T)
211 | my_predictions = p[:,0] + Ymean.T.flatten()
212 | sorted_predictions = np.sort(my_predictions)
213 | sorted_ix = my_predictions.ravel().argsort()
214 | 
215 | print("\nTop recommendations for you:\n")
216 | for i in range(10):
217 |     j = sorted_ix[-i]
218 |     print("Predicting rating " + str(my_predictions[j]) + 
219 | 	" for movie " + str(movies[j]))
220 | 
221 | print("\n Original ratings provided: \n")
222 | for i in range(len(my_ratings)):
223 |     if my_ratings[i] > 0:
224 |         print("Rated " + str(my_ratings[i]) + " for " + str(movies[i]))
225 | 


--------------------------------------------------------------------------------
/ex8/ex8data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/ex8data1.mat


--------------------------------------------------------------------------------
/ex8/ex8data2.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/ex8data2.mat


--------------------------------------------------------------------------------
/ex8/i.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/ex8/movie_ids.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/movie_ids.txt


--------------------------------------------------------------------------------
/ex8/token.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deyachatterjee/ml-andrewng-python/03cf16e3d26cd65e791dc6a9e1b49d01ff0b70f3/ex8/token.mat


--------------------------------------------------------------------------------