├── LICENSE.md
├── README.md
├── ex1
    ├── ex1data1.txt
    ├── ex1data2.txt
    ├── ml-ex1-multivariate.ipynb
    └── ml-ex1.ipynb
├── ex2
    ├── ex2data1.txt
    ├── ex2data2.txt
    └── ml-ex2.ipynb
├── ex3
    ├── ex3data1.mat
    ├── ex3weights.mat
    ├── ml-ex3-2.ipynb
    └── ml-ex3-onevsall.ipynb
├── ex4
    ├── ex4data1.mat
    ├── ex4weights.mat
    └── ml-ex4.ipynb
├── ex5
    ├── ex5data1.mat
    └── ml-ex5.ipynb
├── ex6
    ├── ex6data1.mat
    ├── ex6data2.mat
    ├── ex6data3.mat
    ├── ml-ex6.ipynb
    ├── spamSample1.txt
    ├── spamSample2.txt
    ├── spamTest.mat
    ├── spamTrain.mat
    └── vocab.txt
├── ex7
    ├── bird_small.mat
    ├── bird_small.png
    ├── ex7data1.mat
    ├── ex7data2.mat
    ├── ex7faces.mat
    ├── ml-ex7-kmeans.ipynb
    └── ml-ex7-pca.ipynb
└── ex8
    ├── ex8_movieParams.mat
    ├── ex8_movies.mat
    ├── ex8data1.mat
    ├── ex8data2.mat
    ├── iterate.dat
    ├── ml-ex8-anomaly-detection.ipynb
    ├── ml-ex8-recommender-systems.ipynb
    └── movie_ids.txt


/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Noam Mor
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # coursera-machinelearning-python
 2 | Skeleton code for the coursera machine learning course exercises, in the form of ipython notebooks.
 3 | 
 4 | # What you will need to run this
 5 | 
 6 |  * Python 3
 7 |  * numpy
 8 |  * scipy
 9 |  * matplotlib
10 |  * pandas
11 |  * scikit-learn
12 |  * Pillow
13 | 
14 | The most convenient way to get these is via the Anaconda scientific Python distribution.
15 | 
16 | This code was written for Python 3. If you wish to work with Python 2, have no fear. You will likely need to change a few print statements, but that's just about it.
17 | 


--------------------------------------------------------------------------------
/ex1/ex1data1.txt:
--------------------------------------------------------------------------------
 1 | 6.1101,17.592
 2 | 5.5277,9.1302
 3 | 8.5186,13.662
 4 | 7.0032,11.854
 5 | 5.8598,6.8233
 6 | 8.3829,11.886
 7 | 7.4764,4.3483
 8 | 8.5781,12
 9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 | 


--------------------------------------------------------------------------------
/ex1/ex1data2.txt:
--------------------------------------------------------------------------------
 1 | 2104,3,399900
 2 | 1600,3,329900
 3 | 2400,3,369000
 4 | 1416,2,232000
 5 | 3000,4,539900
 6 | 1985,4,299900
 7 | 1534,3,314900
 8 | 1427,3,198999
 9 | 1380,3,212000
10 | 1494,3,242500
11 | 1940,4,239999
12 | 2000,3,347000
13 | 1890,3,329999
14 | 4478,5,699900
15 | 1268,3,259900
16 | 2300,4,449900
17 | 1320,2,299900
18 | 1236,3,199900
19 | 2609,4,499998
20 | 3031,4,599000
21 | 1767,3,252900
22 | 1888,2,255000
23 | 1604,3,242900
24 | 1962,4,259900
25 | 3890,3,573900
26 | 1100,3,249900
27 | 1458,3,464500
28 | 2526,3,469000
29 | 2200,3,475000
30 | 2637,3,299900
31 | 1839,2,349900
32 | 1000,1,169900
33 | 2040,4,314900
34 | 3137,3,579900
35 | 1811,4,285900
36 | 1437,3,249900
37 | 1239,3,229900
38 | 2132,4,345000
39 | 4215,4,549000
40 | 2162,4,287000
41 | 1664,2,368500
42 | 2238,3,329900
43 | 2567,4,314000
44 | 1200,3,299000
45 | 852,2,179900
46 | 1852,4,299900
47 | 1203,3,239500
48 | 


--------------------------------------------------------------------------------
/ex1/ml-ex1-multivariate.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#  Exercise 1: Linear regression with multiple variables"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pandas\n",
 19 |     "import numpy as np\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "%matplotlib inline"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "Load and explore data:"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "collapsed": true
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "data = pandas.read_csv('ex1data2.txt', header=None, names=['x1', 'x2', 'y'])"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {
 46 |     "collapsed": false
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "data.head()"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {
 57 |     "collapsed": false
 58 |    },
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "data.shape"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {
 68 |     "collapsed": true
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "X = data[['x1', 'x2']].values\n",
 73 |     "Y = data['y'].values\n",
 74 |     "m = len(data)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "## Part 1: Feature Normalization "
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {
 88 |     "collapsed": false
 89 |    },
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "def feature_normalize(X):\n",
 93 |     "    #   FEATURENORMALIZE Normalizes the features in X \n",
 94 |     "    #   FEATURENORMALIZE(X) returns a normalized version of X where\n",
 95 |     "    #   the mean value of each feature is 0 and the standard deviation\n",
 96 |     "    #   is 1. This is often a good preprocessing step to do when\n",
 97 |     "    #   working with learning algorithms.\n",
 98 |     "\n",
 99 |     "    # You need to set these values correctly\n",
100 |     "    X_norm = X\n",
101 |     "    mu     = np.zeros(X.shape[1])\n",
102 |     "    sigma  = np.zeros(X.shape[1])\n",
103 |     "    \n",
104 |     "    # ====================== YOUR CODE HERE ======================\n",
105 |     "    # Instructions: First, for each feature dimension, compute the mean\n",
106 |     "    #               of the feature and subtract it from the dataset,\n",
107 |     "    #               storing the mean value in mu. Next, compute the \n",
108 |     "    #               standard deviation of each feature and divide\n",
109 |     "    #               each feature by it's standard deviation, storing\n",
110 |     "    #               the standard deviation in sigma. \n",
111 |     "    #\n",
112 |     "    #               Note that X is a matrix where each column is a \n",
113 |     "    #               feature and each row is an example. You need \n",
114 |     "    #               to perform the normalization separately for \n",
115 |     "    #               each feature. \n",
116 |     "    #\n",
117 |     "    # Hint: You might find the 'np.mean' and 'np.std' functions useful.\n",
118 |     "    #  \n",
119 |     "    \n",
120 |     "\n",
121 |     "    \n",
122 |     "    \n",
123 |     "    \n",
124 |     "    # ============================================================\n",
125 |     "    \n",
126 |     "    return X_norm, mu, sigma"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "Scale features and set them to zero mean:"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {
140 |     "collapsed": true
141 |    },
142 |    "outputs": [],
143 |    "source": [
144 |     "X_norm, mu, sigma = feature_normalize(X)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "Add intercept term to X:"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "collapsed": false
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "X_norm = np.insert(X_norm, 0, 1, 1)\n",
163 |     "X_norm[:2]"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {
170 |     "collapsed": true
171 |    },
172 |    "outputs": [],
173 |    "source": [
174 |     "# choose some alpha value\n",
175 |     "alpha = 0.01\n",
176 |     "# Init Theta\n",
177 |     "theta = np.zeros(3)\n",
178 |     "\n",
179 |     "iterations = 400"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "## Part 2: Gradient Descent "
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "Make sure your implementations of compute_cost and gradient_descent work when X has more than 2 columns!"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "metadata": {
200 |     "collapsed": true
201 |    },
202 |    "outputs": [],
203 |    "source": [
204 |     "def compute_cost_multi(X, y, theta):\n",
205 |     "    # COMPUTECOSTMULTI Compute cost for linear regression\n",
206 |     "    # J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the\n",
207 |     "    # parameter for linear regression to fit the data points in X and y\n",
208 |     "    \n",
209 |     "    # some useful values\n",
210 |     "    m = len(X)\n",
211 |     "    \n",
212 |     "    # You need to return this value correctly:\n",
213 |     "    J = 0\n",
214 |     "    \n",
215 |     "    # ====================== YOUR CODE HERE ======================\n",
216 |     "    # Instructions: Compute the cost of a particular choice of theta\n",
217 |     "    #               You should set J to the cost.\n",
218 |     "    \n",
219 |     "    \n",
220 |     "    \n",
221 |     "    # ============================================================\n",
222 |     "    return J"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "markdown",
227 |    "metadata": {},
228 |    "source": [
229 |     "Cost at initial theta:"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {
236 |     "collapsed": false
237 |    },
238 |    "outputs": [],
239 |    "source": [
240 |     "compute_cost_multi(X_norm, Y, theta)"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {
247 |     "collapsed": true
248 |    },
249 |    "outputs": [],
250 |    "source": [
251 |     "def gradient_descent_multi(X, y, theta, alpha, num_iters):\n",
252 |     "    # GRADIENTDESCENT Performs gradient descent to learn theta\n",
253 |     "    # theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by \n",
254 |     "    # taking num_iters gradient steps with learning rate alpha\n",
255 |     "    \n",
256 |     "    # Initialize\n",
257 |     "    J_history = np.zeros(num_iters)\n",
258 |     "    T_history = np.zeros((num_iters,X.shape[1]))\n",
259 |     "    \n",
260 |     "    for i in range(num_iters):\n",
261 |     "        T_history[i] = theta\n",
262 |     "\n",
263 |     "        ### ========= YOUR CODE HERE ============\n",
264 |     "        # Instructions: Perform a single gradient step on the parameter vector theta.\n",
265 |     "        \n",
266 |     "        \n",
267 |     "        \n",
268 |     "        \n",
269 |     "        ### =====================================\n",
270 |     "        \n",
271 |     "        J_history[i] = compute_cost_multi(X, y, theta)\n",
272 |     "    return theta, J_history, T_history"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "Run gradient descent:"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "metadata": {
286 |     "collapsed": false
287 |    },
288 |    "outputs": [],
289 |    "source": [
290 |     "theta, J_history, T_history = gradient_descent_multi(X_norm, Y, theta, alpha, iterations)"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "The theta values found by gradient descent should be [ 340412.65957447,  109447.79646964,   -6578.35485416])."
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {
304 |     "collapsed": false,
305 |     "scrolled": true
306 |    },
307 |    "outputs": [],
308 |    "source": [
309 |     "theta"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "markdown",
314 |    "metadata": {},
315 |    "source": [
316 |     "Convergence graph:"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {
323 |     "collapsed": false
324 |    },
325 |    "outputs": [],
326 |    "source": [
327 |     "pandas.Series(J_history).plot()"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "markdown",
332 |    "metadata": {},
333 |    "source": [
334 |     "Estimate the price of a 1650 sqft, 3 bedroom house:"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": null,
340 |    "metadata": {
341 |     "collapsed": false
342 |    },
343 |    "outputs": [],
344 |    "source": [
345 |     "# Estimate the price of a 1650 sq-ft, 3 br house\n",
346 |     "# ====================== YOUR CODE HERE ======================\n",
347 |     "# Recall that the first column of X is all-ones. Thus, it does\n",
348 |     "# not need to be normalized.\n",
349 |     "\n",
350 |     "price = 0\n",
351 |     "\n",
352 |     "# ============================================================\n",
353 |     "\n",
354 |     "price"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "markdown",
359 |    "metadata": {},
360 |    "source": [
361 |     "## Part 3: Normal Equations \n",
362 |     "The following code computes the closed form \n",
363 |     "solution for linear regression using the normal\n",
364 |     "equations. You should complete the code in \n",
365 |     "normal_eqn().\n",
366 |     "\n",
367 |     "After doing so, you should complete this code \n",
368 |     "to predict the price of a 1650 sq-ft, 3 br house.\n"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": null,
374 |    "metadata": {
375 |     "collapsed": false
376 |    },
377 |    "outputs": [],
378 |    "source": [
379 |     "data = pandas.read_csv('ex1data2.txt', header=None, names=['x1', 'x2', 'y'])\n",
380 |     "X = data[['x1', 'x2']].values\n",
381 |     "Y = data['y'].values\n",
382 |     "X = np.insert(X, 0, 1, 1)"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "metadata": {
389 |     "collapsed": true
390 |    },
391 |    "outputs": [],
392 |    "source": [
393 |     "def normal_eqn(X, y):\n",
394 |     "    #NORMALEQN Computes the closed-form solution to linear regression \n",
395 |     "    #   NORMALEQN(X,y) computes the closed-form solution to linear \n",
396 |     "    #   regression using the normal equations.\n",
397 |     "\n",
398 |     "    theta = np.zeros(X.shape[1]);\n",
399 |     "\n",
400 |     "    # ====================== YOUR CODE HERE ======================\n",
401 |     "    # Instructions: Complete the code to compute the closed form solution\n",
402 |     "    #               to linear regression and put the result in theta.\n",
403 |     "    #\n",
404 |     "\n",
405 |     "\n",
406 |     "\n",
407 |     "    # ============================================================\n",
408 |     "    return theta"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": null,
414 |    "metadata": {
415 |     "collapsed": false
416 |    },
417 |    "outputs": [],
418 |    "source": [
419 |     "theta = normal_eqn(X, Y)"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "markdown",
424 |    "metadata": {},
425 |    "source": [
426 |     "Theta found using the normal equations:"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": null,
432 |    "metadata": {
433 |     "collapsed": false
434 |    },
435 |    "outputs": [],
436 |    "source": [
437 |     "theta"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "markdown",
442 |    "metadata": {},
443 |    "source": [
444 |     "Price estimation of a 1650sqft house with 3 bedrooms, using theta from the normal equations:"
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "code",
449 |    "execution_count": null,
450 |    "metadata": {
451 |     "collapsed": false
452 |    },
453 |    "outputs": [],
454 |    "source": [
455 |     "# ====================== YOUR CODE HERE ======================\n",
456 |     "0\n",
457 |     "# ============================================================"
458 |    ]
459 |   }
460 |  ],
461 |  "metadata": {
462 |   "kernelspec": {
463 |    "display_name": "Python 3",
464 |    "language": "python",
465 |    "name": "python3"
466 |   },
467 |   "language_info": {
468 |    "codemirror_mode": {
469 |     "name": "ipython",
470 |     "version": 3
471 |    },
472 |    "file_extension": ".py",
473 |    "mimetype": "text/x-python",
474 |    "name": "python",
475 |    "nbconvert_exporter": "python",
476 |    "pygments_lexer": "ipython3",
477 |    "version": "3.4.1"
478 |   }
479 |  },
480 |  "nbformat": 4,
481 |  "nbformat_minor": 0
482 | }
483 | 


--------------------------------------------------------------------------------
/ex1/ml-ex1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#Exercise 1: Linear Regression"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pandas\n",
 19 |     "import numpy as np\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "import os\n",
 22 |     "%matplotlib inline"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Part 1: Basic Function \n",
 30 |     " Complete warm_up_exercise. \n"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {
 37 |     "collapsed": false
 38 |    },
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "def warm_up_exercise():\n",
 42 |     "    #WARMUPEXERCISE Example function in Python\n",
 43 |     "    #   A = WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix\n",
 44 |     "\n",
 45 |     "    # ============= YOUR CODE HERE ==============\n",
 46 |     "    # Instructions: Return the 5x5 identity matrix \n",
 47 |     "\n",
 48 |     "    A = np.zeros((5,5))\n",
 49 |     "\n",
 50 |     "    # ===========================================\n",
 51 |     "\n",
 52 |     "    \n",
 53 |     "    return A"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "A 5x5 identity matrix:"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {
 67 |     "collapsed": false
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "warm_up_exercise()"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "## Part 2: Plotting \n"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "collapsed": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "data = pandas.read_csv('ex1data1.txt', header=None, names=['population', 'profit'])"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "collapsed": false
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "data.head()"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "collapsed": false
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "data.shape"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {
118 |     "collapsed": false
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "data.plot(x='population', y='profit', kind='scatter')"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "## Part 3: Gradient descent"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {
136 |     "collapsed": false
137 |    },
138 |    "outputs": [],
139 |    "source": [
140 |     "# Adding a column of ones to the dataset\n",
141 |     "data.insert(0, 'ones', 1.)\n",
142 |     "data.head()"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "Initialize fitting parameters:"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {
156 |     "collapsed": true
157 |    },
158 |    "outputs": [],
159 |    "source": [
160 |     "theta = np.zeros(2)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "Gradient descent settings:"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {
174 |     "collapsed": true
175 |    },
176 |    "outputs": [],
177 |    "source": [
178 |     "iterations = 1500\n",
179 |     "alpha = 0.01"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "Getting the pandas data as raw numpy arrays:"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "collapsed": true
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "X = data[['ones', 'population']].values\n",
198 |     "Y = data['profit'].values\n",
199 |     "m = len(data)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {
206 |     "collapsed": true
207 |    },
208 |    "outputs": [],
209 |    "source": [
210 |     "def compute_cost(X, y, theta):\n",
211 |     "    # COMPUTECOST Compute cost for linear regression\n",
212 |     "    # J = COMPUTECOST(X, y, theta) computes the cost of using theta as the\n",
213 |     "    # parameter for linear regression to fit the data points in X and y\n",
214 |     "    \n",
215 |     "    # some useful values\n",
216 |     "    m = len(X)\n",
217 |     "    \n",
218 |     "    # You need to return this value correctly:\n",
219 |     "    J = 0\n",
220 |     "    \n",
221 |     "    # ====================== YOUR CODE HERE ======================\n",
222 |     "    # Instructions: Compute the cost of a particular choice of theta\n",
223 |     "    #               You should set J to the cost.\n",
224 |     "    \n",
225 |     "    \n",
226 |     "    \n",
227 |     "    # ============================================================\n",
228 |     "    return J"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "Cost at initial theta:"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {
242 |     "collapsed": false
243 |    },
244 |    "outputs": [],
245 |    "source": [
246 |     "compute_cost(X, Y, theta)"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {
253 |     "collapsed": false
254 |    },
255 |    "outputs": [],
256 |    "source": [
257 |     "def gradient_descent(X, y, theta, alpha, num_iters):\n",
258 |     "    # GRADIENTDESCENT Performs gradient descent to learn theta\n",
259 |     "    # theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by \n",
260 |     "    # taking num_iters gradient steps with learning rate alpha\n",
261 |     "    \n",
262 |     "    # Initialize\n",
263 |     "    J_history = np.zeros((num_iters,))\n",
264 |     "    T_history = np.zeros((num_iters,2))\n",
265 |     "    \n",
266 |     "    for i in range(num_iters):\n",
267 |     "        T_history[i] = theta\n",
268 |     "\n",
269 |     "        ### ========= YOUR CODE HERE ============\n",
270 |     "        # Instructions: Perform a single gradient step on the parameter vector theta.\n",
271 |     "        \n",
272 |     "        \n",
273 |     "        \n",
274 |     "        ### =====================================\n",
275 |     "        \n",
276 |     "        J_history[i] = compute_cost(X, y, theta)\n",
277 |     "    return theta, J_history, T_history"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "markdown",
282 |    "metadata": {},
283 |    "source": [
284 |     "Run gradient descent:"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": null,
290 |    "metadata": {
291 |     "collapsed": true
292 |    },
293 |    "outputs": [],
294 |    "source": [
295 |     "theta, J_history, T_history = gradient_descent(X, Y, theta, alpha, iterations)"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "metadata": {},
301 |    "source": [
302 |     "Theta found by gradient descent:"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {
309 |     "collapsed": false
310 |    },
311 |    "outputs": [],
312 |    "source": [
313 |     "theta"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "metadata": {
320 |     "collapsed": false
321 |    },
322 |    "outputs": [],
323 |    "source": [
324 |     "pandas.Series(J_history).plot()"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {},
330 |    "source": [
331 |     "Predicting profit for population size 35000 and 70000:"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": null,
337 |    "metadata": {
338 |     "collapsed": false
339 |    },
340 |    "outputs": [],
341 |    "source": [
342 |     "np.array([1, 3.5]).dot(theta) * 10000"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": null,
348 |    "metadata": {
349 |     "collapsed": false
350 |    },
351 |    "outputs": [],
352 |    "source": [
353 |     "np.array([1, 7]).dot(theta) * 10000"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "markdown",
358 |    "metadata": {},
359 |    "source": [
360 |     "## Part 4: Visualizing J(theta_0, theta_1) "
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": null,
366 |    "metadata": {
367 |     "collapsed": true
368 |    },
369 |    "outputs": [],
370 |    "source": [
371 |     "xx, yy = np.meshgrid(np.linspace(-10, 10, 100), np.linspace(-1, 4, 100))"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {
378 |     "collapsed": true
379 |    },
380 |    "outputs": [],
381 |    "source": [
382 |     "J_vals = np.zeros(xx.shape)"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "metadata": {
389 |     "collapsed": true
390 |    },
391 |    "outputs": [],
392 |    "source": [
393 |     "for i in range(xx.shape[0]):\n",
394 |     "    for j in range(xx.shape[1]):\n",
395 |     "        J_vals[i, j] = compute_cost(X, Y, np.array([xx[i,j], yy[i,j]]))"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": null,
401 |    "metadata": {
402 |     "collapsed": false
403 |    },
404 |    "outputs": [],
405 |    "source": [
406 |     "from mpl_toolkits.mplot3d import Axes3D\n",
407 |     "fig = plt.figure()\n",
408 |     "ax = fig.add_subplot(111, projection='3d')\n",
409 |     "ax.plot_surface(xx, yy, J_vals)"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "metadata": {
416 |     "collapsed": false
417 |    },
418 |    "outputs": [],
419 |    "source": [
420 |     "fig = plt.figure()\n",
421 |     "ax = fig.add_subplot(111)\n",
422 |     "ax.contour(xx, yy, J_vals, levels=np.logspace(-2, 3, 15))\n",
423 |     "ax.plot(theta[0], theta[1], 'r+', markersize=10)"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": null,
429 |    "metadata": {
430 |     "collapsed": false
431 |    },
432 |    "outputs": [],
433 |    "source": [
434 |     "fig = plt.figure()\n",
435 |     "ax = fig.add_subplot(111)\n",
436 |     "ax.scatter(data.population, data.profit)\n",
437 |     "Xs = np.linspace(0,25,100)\n",
438 |     "def Ys(theta):\n",
439 |     "    return theta[0] + Xs*theta[1]\n",
440 |     "ax.plot(Xs, Ys(theta))"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "code",
445 |    "execution_count": null,
446 |    "metadata": {
447 |     "collapsed": false
448 |    },
449 |    "outputs": [],
450 |    "source": [
451 |     "T_history"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "code",
456 |    "execution_count": null,
457 |    "metadata": {
458 |     "collapsed": false
459 |    },
460 |    "outputs": [],
461 |    "source": [
462 |     "fig = plt.figure(figsize=(10,15))\n",
463 |     "for i, t in enumerate(T_history[:10]):\n",
464 |     "    ax = fig.add_subplot(5,2,i+1)\n",
465 |     "    ax.autoscale(tight=True)\n",
466 |     "    ax.scatter(data.population, data.profit)\n",
467 |     "    ax.plot(Xs, Ys(t))"
468 |    ]
469 |   }
470 |  ],
471 |  "metadata": {
472 |   "kernelspec": {
473 |    "display_name": "Python 3",
474 |    "language": "python",
475 |    "name": "python3"
476 |   },
477 |   "language_info": {
478 |    "codemirror_mode": {
479 |     "name": "ipython",
480 |     "version": 3
481 |    },
482 |    "file_extension": ".py",
483 |    "mimetype": "text/x-python",
484 |    "name": "python",
485 |    "nbconvert_exporter": "python",
486 |    "pygments_lexer": "ipython3",
487 |    "version": "3.4.1"
488 |   }
489 |  },
490 |  "nbformat": 4,
491 |  "nbformat_minor": 0
492 | }
493 | 


--------------------------------------------------------------------------------
/ex2/ex2data1.txt:
--------------------------------------------------------------------------------
  1 | 34.62365962451697,78.0246928153624,0
  2 | 30.28671076822607,43.89499752400101,0
  3 | 35.84740876993872,72.90219802708364,0
  4 | 60.18259938620976,86.30855209546826,1
  5 | 79.0327360507101,75.3443764369103,1
  6 | 45.08327747668339,56.3163717815305,0
  7 | 61.10666453684766,96.51142588489624,1
  8 | 75.02474556738889,46.55401354116538,1
  9 | 76.09878670226257,87.42056971926803,1
 10 | 84.43281996120035,43.53339331072109,1
 11 | 95.86155507093572,38.22527805795094,0
 12 | 75.01365838958247,30.60326323428011,0
 13 | 82.30705337399482,76.48196330235604,1
 14 | 69.36458875970939,97.71869196188608,1
 15 | 39.53833914367223,76.03681085115882,0
 16 | 53.9710521485623,89.20735013750205,1
 17 | 69.07014406283025,52.74046973016765,1
 18 | 67.94685547711617,46.67857410673128,0
 19 | 70.66150955499435,92.92713789364831,1
 20 | 76.97878372747498,47.57596364975532,1
 21 | 67.37202754570876,42.83843832029179,0
 22 | 89.67677575072079,65.79936592745237,1
 23 | 50.534788289883,48.85581152764205,0
 24 | 34.21206097786789,44.20952859866288,0
 25 | 77.9240914545704,68.9723599933059,1
 26 | 62.27101367004632,69.95445795447587,1
 27 | 80.1901807509566,44.82162893218353,1
 28 | 93.114388797442,38.80067033713209,0
 29 | 61.83020602312595,50.25610789244621,0
 30 | 38.78580379679423,64.99568095539578,0
 31 | 61.379289447425,72.80788731317097,1
 32 | 85.40451939411645,57.05198397627122,1
 33 | 52.10797973193984,63.12762376881715,0
 34 | 52.04540476831827,69.43286012045222,1
 35 | 40.23689373545111,71.16774802184875,0
 36 | 54.63510555424817,52.21388588061123,0
 37 | 33.91550010906887,98.86943574220611,0
 38 | 64.17698887494485,80.90806058670817,1
 39 | 74.78925295941542,41.57341522824434,0
 40 | 34.1836400264419,75.2377203360134,0
 41 | 83.90239366249155,56.30804621605327,1
 42 | 51.54772026906181,46.85629026349976,0
 43 | 94.44336776917852,65.56892160559052,1
 44 | 82.36875375713919,40.61825515970618,0
 45 | 51.04775177128865,45.82270145776001,0
 46 | 62.22267576120188,52.06099194836679,0
 47 | 77.19303492601364,70.45820000180959,1
 48 | 97.77159928000232,86.7278223300282,1
 49 | 62.07306379667647,96.76882412413983,1
 50 | 91.56497449807442,88.69629254546599,1
 51 | 79.94481794066932,74.16311935043758,1
 52 | 99.2725269292572,60.99903099844988,1
 53 | 90.54671411399852,43.39060180650027,1
 54 | 34.52451385320009,60.39634245837173,0
 55 | 50.2864961189907,49.80453881323059,0
 56 | 49.58667721632031,59.80895099453265,0
 57 | 97.64563396007767,68.86157272420604,1
 58 | 32.57720016809309,95.59854761387875,0
 59 | 74.24869136721598,69.82457122657193,1
 60 | 71.79646205863379,78.45356224515052,1
 61 | 75.3956114656803,85.75993667331619,1
 62 | 35.28611281526193,47.02051394723416,0
 63 | 56.25381749711624,39.26147251058019,0
 64 | 30.05882244669796,49.59297386723685,0
 65 | 44.66826172480893,66.45008614558913,0
 66 | 66.56089447242954,41.09209807936973,0
 67 | 40.45755098375164,97.53518548909936,1
 68 | 49.07256321908844,51.88321182073966,0
 69 | 80.27957401466998,92.11606081344084,1
 70 | 66.74671856944039,60.99139402740988,1
 71 | 32.72283304060323,43.30717306430063,0
 72 | 64.0393204150601,78.03168802018232,1
 73 | 72.34649422579923,96.22759296761404,1
 74 | 60.45788573918959,73.09499809758037,1
 75 | 58.84095621726802,75.85844831279042,1
 76 | 99.82785779692128,72.36925193383885,1
 77 | 47.26426910848174,88.47586499559782,1
 78 | 50.45815980285988,75.80985952982456,1
 79 | 60.45555629271532,42.50840943572217,0
 80 | 82.22666157785568,42.71987853716458,0
 81 | 88.9138964166533,69.80378889835472,1
 82 | 94.83450672430196,45.69430680250754,1
 83 | 67.31925746917527,66.58935317747915,1
 84 | 57.23870631569862,59.51428198012956,1
 85 | 80.36675600171273,90.96014789746954,1
 86 | 68.46852178591112,85.59430710452014,1
 87 | 42.0754545384731,78.84478600148043,0
 88 | 75.47770200533905,90.42453899753964,1
 89 | 78.63542434898018,96.64742716885644,1
 90 | 52.34800398794107,60.76950525602592,0
 91 | 94.09433112516793,77.15910509073893,1
 92 | 90.44855097096364,87.50879176484702,1
 93 | 55.48216114069585,35.57070347228866,0
 94 | 74.49269241843041,84.84513684930135,1
 95 | 89.84580670720979,45.35828361091658,1
 96 | 83.48916274498238,48.38028579728175,1
 97 | 42.2617008099817,87.10385094025457,1
 98 | 99.31500880510394,68.77540947206617,1
 99 | 55.34001756003703,64.9319380069486,1
100 | 74.77589300092767,89.52981289513276,1
101 | 


--------------------------------------------------------------------------------
/ex2/ex2data2.txt:
--------------------------------------------------------------------------------
  1 | 0.051267,0.69956,1
  2 | -0.092742,0.68494,1
  3 | -0.21371,0.69225,1
  4 | -0.375,0.50219,1
  5 | -0.51325,0.46564,1
  6 | -0.52477,0.2098,1
  7 | -0.39804,0.034357,1
  8 | -0.30588,-0.19225,1
  9 | 0.016705,-0.40424,1
 10 | 0.13191,-0.51389,1
 11 | 0.38537,-0.56506,1
 12 | 0.52938,-0.5212,1
 13 | 0.63882,-0.24342,1
 14 | 0.73675,-0.18494,1
 15 | 0.54666,0.48757,1
 16 | 0.322,0.5826,1
 17 | 0.16647,0.53874,1
 18 | -0.046659,0.81652,1
 19 | -0.17339,0.69956,1
 20 | -0.47869,0.63377,1
 21 | -0.60541,0.59722,1
 22 | -0.62846,0.33406,1
 23 | -0.59389,0.005117,1
 24 | -0.42108,-0.27266,1
 25 | -0.11578,-0.39693,1
 26 | 0.20104,-0.60161,1
 27 | 0.46601,-0.53582,1
 28 | 0.67339,-0.53582,1
 29 | -0.13882,0.54605,1
 30 | -0.29435,0.77997,1
 31 | -0.26555,0.96272,1
 32 | -0.16187,0.8019,1
 33 | -0.17339,0.64839,1
 34 | -0.28283,0.47295,1
 35 | -0.36348,0.31213,1
 36 | -0.30012,0.027047,1
 37 | -0.23675,-0.21418,1
 38 | -0.06394,-0.18494,1
 39 | 0.062788,-0.16301,1
 40 | 0.22984,-0.41155,1
 41 | 0.2932,-0.2288,1
 42 | 0.48329,-0.18494,1
 43 | 0.64459,-0.14108,1
 44 | 0.46025,0.012427,1
 45 | 0.6273,0.15863,1
 46 | 0.57546,0.26827,1
 47 | 0.72523,0.44371,1
 48 | 0.22408,0.52412,1
 49 | 0.44297,0.67032,1
 50 | 0.322,0.69225,1
 51 | 0.13767,0.57529,1
 52 | -0.0063364,0.39985,1
 53 | -0.092742,0.55336,1
 54 | -0.20795,0.35599,1
 55 | -0.20795,0.17325,1
 56 | -0.43836,0.21711,1
 57 | -0.21947,-0.016813,1
 58 | -0.13882,-0.27266,1
 59 | 0.18376,0.93348,0
 60 | 0.22408,0.77997,0
 61 | 0.29896,0.61915,0
 62 | 0.50634,0.75804,0
 63 | 0.61578,0.7288,0
 64 | 0.60426,0.59722,0
 65 | 0.76555,0.50219,0
 66 | 0.92684,0.3633,0
 67 | 0.82316,0.27558,0
 68 | 0.96141,0.085526,0
 69 | 0.93836,0.012427,0
 70 | 0.86348,-0.082602,0
 71 | 0.89804,-0.20687,0
 72 | 0.85196,-0.36769,0
 73 | 0.82892,-0.5212,0
 74 | 0.79435,-0.55775,0
 75 | 0.59274,-0.7405,0
 76 | 0.51786,-0.5943,0
 77 | 0.46601,-0.41886,0
 78 | 0.35081,-0.57968,0
 79 | 0.28744,-0.76974,0
 80 | 0.085829,-0.75512,0
 81 | 0.14919,-0.57968,0
 82 | -0.13306,-0.4481,0
 83 | -0.40956,-0.41155,0
 84 | -0.39228,-0.25804,0
 85 | -0.74366,-0.25804,0
 86 | -0.69758,0.041667,0
 87 | -0.75518,0.2902,0
 88 | -0.69758,0.68494,0
 89 | -0.4038,0.70687,0
 90 | -0.38076,0.91886,0
 91 | -0.50749,0.90424,0
 92 | -0.54781,0.70687,0
 93 | 0.10311,0.77997,0
 94 | 0.057028,0.91886,0
 95 | -0.10426,0.99196,0
 96 | -0.081221,1.1089,0
 97 | 0.28744,1.087,0
 98 | 0.39689,0.82383,0
 99 | 0.63882,0.88962,0
100 | 0.82316,0.66301,0
101 | 0.67339,0.64108,0
102 | 1.0709,0.10015,0
103 | -0.046659,-0.57968,0
104 | -0.23675,-0.63816,0
105 | -0.15035,-0.36769,0
106 | -0.49021,-0.3019,0
107 | -0.46717,-0.13377,0
108 | -0.28859,-0.060673,0
109 | -0.61118,-0.067982,0
110 | -0.66302,-0.21418,0
111 | -0.59965,-0.41886,0
112 | -0.72638,-0.082602,0
113 | -0.83007,0.31213,0
114 | -0.72062,0.53874,0
115 | -0.59389,0.49488,0
116 | -0.48445,0.99927,0
117 | -0.0063364,0.99927,0
118 | 0.63265,-0.030612,0
119 | 


--------------------------------------------------------------------------------
/ex2/ml-ex2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#Exercise 2: Logistic Regression"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pandas\n",
 19 |     "import numpy as np\n",
 20 |     "import scipy.optimize\n",
 21 |     "import matplotlib.pyplot as plt\n",
 22 |     "%matplotlib inline"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Part 1: Plotting \n",
 30 |     "  We start the exercise by first plotting the data to understand the \n",
 31 |     "  the problem we are working with.\n"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "data1 = pandas.read_csv(\"ex2data1.txt\", header=None, names=['test1', 'test2', 'accepted'])\n",
 43 |     "data1.head()"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "Plotting data with + indicating (y = 1) examples and o \n",
 51 |     " indicating (y = 0) examples."
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {
 58 |     "collapsed": false
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "def plotData(data):\n",
 63 |     "    fig, ax = plt.subplots()\n",
 64 |     "    results_accepted = data[data.accepted == 1]\n",
 65 |     "    results_rejected = data[data.accepted == 0]\n",
 66 |     "    ax.scatter(results_accepted.test1, results_accepted.test2, marker='+', c='b', s=40)\n",
 67 |     "    ax.scatter(results_rejected.test1, results_rejected.test2, marker='o', c='r', s=30)\n",
 68 |     "    return ax\n",
 69 |     "\n",
 70 |     "ax = plotData(data1)\n",
 71 |     "ax.set_ylim([20, 130])\n",
 72 |     "ax.legend(['Admitted', 'Not admitted'], loc='best')\n",
 73 |     "ax.set_xlabel('Exam 1 score')\n",
 74 |     "ax.set_ylabel('Exam 2 score')"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "X = data1[['test1', 'test2']].values\n",
 86 |     "y = data1.accepted.values\n",
 87 |     "m, n = X.shape\n",
 88 |     "X = np.insert(X, 0, np.ones(len(X)), 1)\n",
 89 |     "m, n"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "## Part 2: Compute Cost and Gradient \n",
 97 |     "  In this part of the exercise, you will implement the cost and gradient\n",
 98 |     "  for logistic regression. You neeed to complete the code in \n",
 99 |     "  the function `cost`.\n",
100 |     "  \n",
101 |     "  "
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": true
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "def sigmoid(z):\n",
113 |     "    #SIGMOID Compute sigmoid functoon\n",
114 |     "    #   J = SIGMOID(z) computes the sigmoid of z.\n",
115 |     "    \n",
116 |     "    # You need to return the following variables correctly \n",
117 |     "    g = np.zeros(z.shape)\n",
118 |     "\n",
119 |     "    # ====================== YOUR CODE HERE ======================\n",
120 |     "    # Instructions: Compute the sigmoid of each value of z (z can be a matrix,\n",
121 |     "    #               vector or scalar).\n",
122 |     "\n",
123 |     "    \n",
124 |     "    # =============================================================\n",
125 |     "    \n",
126 |     "    return g"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {
133 |     "collapsed": true
134 |    },
135 |    "outputs": [],
136 |    "source": [
137 |     "def cost(X, y, theta, lambda_=0):\n",
138 |     "    #COSTFUNCTION Compute cost and gradient for logistic regression\n",
139 |     "    #   J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the\n",
140 |     "    #   parameter for logistic regression and the gradient of the cost\n",
141 |     "    #   w.r.t. to the parameters.\n",
142 |     "\n",
143 |     "    # Initialize some useful values\n",
144 |     "    m = len(y)\n",
145 |     "    \n",
146 |     "    # You need to return the following variables correctly\n",
147 |     "    J = 0\n",
148 |     "    \n",
149 |     "    \n",
150 |     "    # ====================== YOUR CODE HERE ======================\n",
151 |     "    # Instructions: Compute the cost of a particular choice of theta.\n",
152 |     "    #               You should set J to the cost.\n",
153 |     "    #               Compute the partial derivatives and set grad to the partial\n",
154 |     "    #               derivatives of the cost w.r.t. each parameter in theta\n",
155 |     "    #\n",
156 |     "\n",
157 |     "    \n",
158 |     "    \n",
159 |     "    # =============================================================\n",
160 |     "    \n",
161 |     "    return J"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {
168 |     "collapsed": false
169 |    },
170 |    "outputs": [],
171 |    "source": [
172 |     "def gradient(X, y, theta, lambda_=0):\n",
173 |     "    # Initialize some useful values\n",
174 |     "    m = len(y)\n",
175 |     "    \n",
176 |     "    # You need to return the following variables correctly\n",
177 |     "    grad = np.zeros(theta.shape)\n",
178 |     "    \n",
179 |     "    # ====================== YOUR CODE HERE ======================\n",
180 |     "    \n",
181 |     "    \n",
182 |     "    \n",
183 |     "    \n",
184 |     "    \n",
185 |     "    # =============================================================\n",
186 |     "    \n",
187 |     "    return grad"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {
194 |     "collapsed": false
195 |    },
196 |    "outputs": [],
197 |    "source": [
198 |     "initial_theta = np.zeros(n + 1)\n",
199 |     "initial_theta.shape"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "The cost at initial theta (zeros) should be about `0.693`."
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {
213 |     "collapsed": false
214 |    },
215 |    "outputs": [],
216 |    "source": [
217 |     "cost(X, y, np.array(initial_theta))"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {},
223 |    "source": [
224 |     "The gradient at initial theta should be `[-0.1, -12.01, -11.26]`."
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": null,
230 |    "metadata": {
231 |     "collapsed": false
232 |    },
233 |    "outputs": [],
234 |    "source": [
235 |     "gradient(X, y, np.array([0,0,0]))"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "## Part 3: Optimizing using fminunc\n",
243 |     "  In this exercise, you will use a built-in function (scipy.optimize.fmin_ncg) to find the\n",
244 |     "  optimal parameters theta.\n"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": null,
250 |    "metadata": {
251 |     "collapsed": false
252 |    },
253 |    "outputs": [],
254 |    "source": [
255 |     "def mycost(t):\n",
256 |     "    return cost(X, y, t)\n",
257 |     "\n",
258 |     "def mygrad(t):\n",
259 |     "    return gradient(X, y, t)\n",
260 |     "\n",
261 |     "optimal_theta = scipy.optimize.fmin_ncg(mycost,\n",
262 |     "                                        initial_theta,\n",
263 |     "                                        fprime=mygrad)"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "metadata": {},
269 |    "source": [
270 |     "Value of theta that minimizes the cost function:"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {
277 |     "collapsed": false
278 |    },
279 |    "outputs": [],
280 |    "source": [
281 |     "optimal_theta"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "We plot the decision boundary."
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": null,
294 |    "metadata": {
295 |     "collapsed": false
296 |    },
297 |    "outputs": [],
298 |    "source": [
299 |     "ax = plotData(data1)\n",
300 |     "x_plot = np.array([np.max(X[:, 1]), np.min(X[:,1])])\n",
301 |     "y_plot = (-optimal_theta[0] - optimal_theta[1]*x_plot) / (optimal_theta[2])\n",
302 |     "ax.plot(x_plot, y_plot)"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "markdown",
307 |    "metadata": {},
308 |    "source": [
309 |     "## Part 4: Predict and Accuracies \n",
310 |     "  After learning the parameters, you'll like to use it to predict the outcomes\n",
311 |     "  on unseen data. In this part, you will use the logistic regression model\n",
312 |     "  to predict the probability that a student with score 45 on exam 1 and \n",
313 |     "  score 85 on exam 2 will be admitted.\n",
314 |     "\n",
315 |     "  Furthermore, you will compute the training and test set accuracies of \n",
316 |     "  our model.\n",
317 |     "\n",
318 |     "  Your task is to complete the code in `predict`.\n"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {
325 |     "collapsed": true
326 |    },
327 |    "outputs": [],
328 |    "source": [
329 |     "def predict(t, x):\n",
330 |     "    #PREDICT Predict whether the label is 0 or 1 using learned logistic \n",
331 |     "    #regression parameters theta\n",
332 |     "    #   p = PREDICT(theta, X) computes the predictions for X using a \n",
333 |     "    #   threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1)\n",
334 |     "    \n",
335 |     "    m = X.shape[0] # Number of training examples\n",
336 |     "    \n",
337 |     "    # You need to return the following variables correctly\n",
338 |     "    p = np.zeros(m)\n",
339 |     "    \n",
340 |     "    # ====================== YOUR CODE HERE ======================\n",
341 |     "    # Instructions: Complete the following code to make predictions using\n",
342 |     "    #               your learned logistic regression parameters. \n",
343 |     "    #               You should set p to a vector of 0's and 1's\n",
344 |     "    #\n",
345 |     "\n",
346 |     "\n",
347 |     "    \n",
348 |     "    \n",
349 |     "    # =========================================================================\n",
350 |     "    \n",
351 |     "    return p"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "markdown",
356 |    "metadata": {},
357 |    "source": [
358 |     "Let's predict the admission probably of a student with scores 45 and 85:"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": null,
364 |    "metadata": {
365 |     "collapsed": false
366 |    },
367 |    "outputs": [],
368 |    "source": [
369 |     "0"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "markdown",
374 |    "metadata": {},
375 |    "source": [
376 |     "Training set accuracy:"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": null,
382 |    "metadata": {
383 |     "collapsed": false,
384 |     "scrolled": true
385 |    },
386 |    "outputs": [],
387 |    "source": [
388 |     "np.mean(predict(optimal_theta, X) == y)"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "markdown",
393 |    "metadata": {},
394 |    "source": [
395 |     "# Part 2: Regularized logistic regression\n",
396 |     "\n",
397 |     "In this part, you are given a dataset with data points that are not\n",
398 |     "linearly separable. However, you would still like to use logistic \n",
399 |     "regression to classify the data points. \n",
400 |     "\n",
401 |     "To do so, you introduce more features to use -- in particular, you add\n",
402 |     "polynomial features to our data matrix (similar to polynomial\n",
403 |     "regression).\n",
404 |     "\n",
405 |     "You're expected to modify the cost and gradient functions you've already written so that they take the regularization constant into account and perform regularization."
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": null,
411 |    "metadata": {
412 |     "collapsed": false
413 |    },
414 |    "outputs": [],
415 |    "source": [
416 |     "data2 = pandas.read_csv(\"./ex2data2.txt\", header=None, names=['test1', 'test2', 'accepted'])\n",
417 |     "data2.head()"
418 |    ]
419 |   },
420 |   {
421 |    "cell_type": "code",
422 |    "execution_count": null,
423 |    "metadata": {
424 |     "collapsed": false
425 |    },
426 |    "outputs": [],
427 |    "source": [
428 |     "ax = plotData(data2)\n",
429 |     "ax.legend(['y = 1', 'y = 0'], loc='best')\n",
430 |     "ax.set_xlabel('Microchip test 1')\n",
431 |     "ax.set_ylabel('Microchip test 2')"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": null,
437 |    "metadata": {
438 |     "collapsed": false
439 |    },
440 |    "outputs": [],
441 |    "source": [
442 |     "def mapFeature(x1, x2):\n",
443 |     "    ret = np.array([x1**(i-j) * x2**j \n",
444 |     "                    for i in range(1,7) for j in range(i+1)\n",
445 |     "                   ])\n",
446 |     "    return np.insert(ret, 0, np.ones(len(x1)), 0).T\n",
447 |     "\n",
448 |     "mapFeature(np.array([2,3]),np.array([3,2]))[:, :10]"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "markdown",
453 |    "metadata": {},
454 |    "source": [
455 |     " Note that mapFeature also adds a column of ones for us, so the intercept\n",
456 |     " term is handled."
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": null,
462 |    "metadata": {
463 |     "collapsed": false,
464 |     "scrolled": true
465 |    },
466 |    "outputs": [],
467 |    "source": [
468 |     "X = mapFeature(data2.test1, data2.test2)\n",
469 |     "y = data2.accepted.values\n",
470 |     "initial_theta = np.zeros(X.shape[1])\n",
471 |     "X.shape, y.shape, initial_theta.shape"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "markdown",
476 |    "metadata": {},
477 |    "source": [
478 |     "The cost at the initial theta is:"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": null,
484 |    "metadata": {
485 |     "collapsed": false
486 |    },
487 |    "outputs": [],
488 |    "source": [
489 |     "cost(X, y, initial_theta, lambda_) "
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "markdown",
494 |    "metadata": {},
495 |    "source": [
496 |     "## Part 2: Regularization and Accuracies \n",
497 |     "  Optional Exercise:\n",
498 |     " In this part, you will get to try different values of lambda and \n",
499 |     "  see how regularization affects the decision coundart\n",
500 |     "\n",
501 |     "  Try the following values of lambda (0, 1, 10, 100).\n",
502 |     "\n",
503 |     "  How does the decision boundary change when you vary lambda? How does\n",
504 |     "  the training set accuracy vary?\n",
505 |     "\n"
506 |    ]
507 |   },
508 |   {
509 |    "cell_type": "code",
510 |    "execution_count": null,
511 |    "metadata": {
512 |     "collapsed": false
513 |    },
514 |    "outputs": [],
515 |    "source": [
516 |     "lambda_ = 0"
517 |    ]
518 |   },
519 |   {
520 |    "cell_type": "code",
521 |    "execution_count": null,
522 |    "metadata": {
523 |     "collapsed": false
524 |    },
525 |    "outputs": [],
526 |    "source": [
527 |     "optimal_theta = scipy.optimize.fmin_bfgs(lambda t: cost(X, y, t, lambda_),\n",
528 |     "                                        initial_theta,\n",
529 |     "                                        lambda t: gradient(X, y, t, lambda_))\n"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "markdown",
534 |    "metadata": {},
535 |    "source": [
536 |     "At the optimal theta value, the accuracy is:"
537 |    ]
538 |   },
539 |   {
540 |    "cell_type": "code",
541 |    "execution_count": null,
542 |    "metadata": {
543 |     "collapsed": false
544 |    },
545 |    "outputs": [],
546 |    "source": [
547 |     "np.mean(predict(optimal_theta, X) == y)"
548 |    ]
549 |   },
550 |   {
551 |    "cell_type": "code",
552 |    "execution_count": null,
553 |    "metadata": {
554 |     "collapsed": false,
555 |     "scrolled": true
556 |    },
557 |    "outputs": [],
558 |    "source": [
559 |     "optimal_theta"
560 |    ]
561 |   },
562 |   {
563 |    "cell_type": "markdown",
564 |    "metadata": {},
565 |    "source": [
566 |     "The decision boundary:"
567 |    ]
568 |   },
569 |   {
570 |    "cell_type": "code",
571 |    "execution_count": null,
572 |    "metadata": {
573 |     "collapsed": false,
574 |     "scrolled": true
575 |    },
576 |    "outputs": [],
577 |    "source": [
578 |     "contour_x = np.linspace(-1, 1.5)\n",
579 |     "contour_y = np.linspace(-1, 1.5)\n",
580 |     "def calc_z(x, y):\n",
581 |     "    return mapFeature(np.array([x]), np.array([y])).dot(optimal_theta)\n",
582 |     "\n",
583 |     "z = np.zeros((len(contour_x), len(contour_y)))\n",
584 |     "for i, c_x in enumerate(contour_x):\n",
585 |     "    for j, c_y in enumerate(contour_y):\n",
586 |     "        z[i,j] = calc_z(c_x, c_y)[0]\n",
587 |     "        \n",
588 |     "ax = plotData(data2)\n",
589 |     "ax.contour(contour_x, contour_y, z, levels=[0])"
590 |    ]
591 |   }
592 |  ],
593 |  "metadata": {
594 |   "kernelspec": {
595 |    "display_name": "Python 3",
596 |    "language": "python",
597 |    "name": "python3"
598 |   },
599 |   "language_info": {
600 |    "codemirror_mode": {
601 |     "name": "ipython",
602 |     "version": 3
603 |    },
604 |    "file_extension": ".py",
605 |    "mimetype": "text/x-python",
606 |    "name": "python",
607 |    "nbconvert_exporter": "python",
608 |    "pygments_lexer": "ipython3",
609 |    "version": "3.4.1"
610 |   }
611 |  },
612 |  "nbformat": 4,
613 |  "nbformat_minor": 0
614 | }
615 | 


--------------------------------------------------------------------------------
/ex3/ex3data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex3/ex3data1.mat


--------------------------------------------------------------------------------
/ex3/ex3weights.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex3/ex3weights.mat


--------------------------------------------------------------------------------
/ex3/ml-ex3-2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Part 2: Neural Networks"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "import scipy.io\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "%matplotlib inline"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "ex3data1 = scipy.io.loadmat(\"./ex3data1.mat\")\n",
 33 |     "X = ex3data1['X']\n",
 34 |     "y = ex3data1['y'][:,0]\n",
 35 |     "y[y==10] = 0\n",
 36 |     "m, n = X.shape\n",
 37 |     "m, n"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": false
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "## Setup the parameters you will use for this exercise\n",
 49 |     "input_layer_size  = n    # 20x20 Input Images of Digits\n",
 50 |     "hidden_layer_size = 25   # 25 hidden units\n",
 51 |     "num_labels = 10          # 10 labels, from 0 to 9"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "We have supplied pre-calculated neural network parameters. We load them:"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {
 65 |     "collapsed": false
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "ex3weights = scipy.io.loadmat('./ex3weights.mat')\n",
 70 |     "Theta1 = ex3weights['Theta1']\n",
 71 |     "Theta2 = ex3weights['Theta2']\n",
 72 |     "Theta1.shape, Theta2.shape"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {
 79 |     "collapsed": false
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "def sigmoid(z):\n",
 84 |     "    return 1 / (1+np.exp(-z))"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "collapsed": true
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "def predict(Theta1, Theta2, X):\n",
 96 |     "    #PREDICT Predict the label of an input given a trained neural network\n",
 97 |     "    #   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the\n",
 98 |     "    #   trained weights of a neural network (Theta1, Theta2)\n",
 99 |     "    \n",
100 |     "    \n",
101 |     "    # You need to return the following variables correctly \n",
102 |     "    p = np.zeros(X.shape[0])\n",
103 |     "\n",
104 |     "    \n",
105 |     "    # ====================== YOUR CODE HERE ======================\n",
106 |     "    # Instructions: Complete the following code to make predictions using\n",
107 |     "    #               your learned neural network. You should set p to a \n",
108 |     "    #               vector containing labels between 1 to num_labels.\n",
109 |     "    #\n",
110 |     "    # Hint: The np.argmax might come in useful.\n",
111 |     "\n",
112 |     "\n",
113 |     "\n",
114 |     "\n",
115 |     "    # =========================================================================\n",
116 |     "    \n",
117 |     "    return p"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {
124 |     "collapsed": false,
125 |     "scrolled": true
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "predictions = predict(Theta1, Theta2, X)\n",
130 |     "# The supplied parameters assume label ordering of 1,2,3,4,5,6,7,8,9,0\n",
131 |     "norm_predictions = (predictions + 1) % 10\n",
132 |     "plt.scatter(range(m),norm_predictions, s=1)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "(y == norm_predictions).mean()"
144 |    ]
145 |   }
146 |  ],
147 |  "metadata": {
148 |   "kernelspec": {
149 |    "display_name": "Python 3",
150 |    "language": "python",
151 |    "name": "python3"
152 |   },
153 |   "language_info": {
154 |    "codemirror_mode": {
155 |     "name": "ipython",
156 |     "version": 3
157 |    },
158 |    "file_extension": ".py",
159 |    "mimetype": "text/x-python",
160 |    "name": "python",
161 |    "nbconvert_exporter": "python",
162 |    "pygments_lexer": "ipython3",
163 |    "version": "3.4.3"
164 |   }
165 |  },
166 |  "nbformat": 4,
167 |  "nbformat_minor": 0
168 | }
169 | 


--------------------------------------------------------------------------------
/ex3/ml-ex3-onevsall.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# One vs All"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pandas\n",
 19 |     "import numpy as np\n",
 20 |     "import scipy.io\n",
 21 |     "import scipy.optimize\n",
 22 |     "import functools\n",
 23 |     "import matplotlib.pyplot as plt\n",
 24 |     "%matplotlib inline"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "## Loading and visualizing training data\n",
 32 |     "\n",
 33 |     "The training data is 5000 digit images of digits of size 20x20. We will display a random selection of 25 of them."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "ex3data1 = scipy.io.loadmat(\"./ex3data1.mat\")\n",
 45 |     "X = ex3data1['X']\n",
 46 |     "y = ex3data1['y'][:,0]\n",
 47 |     "y[y==10] = 0"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {
 54 |     "collapsed": false
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "m, n = X.shape\n",
 59 |     "m, n"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {
 66 |     "collapsed": false
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "fig = plt.figure(figsize=(5,5))\n",
 71 |     "fig.subplots_adjust(wspace=0.05, hspace=0.15)\n",
 72 |     "\n",
 73 |     "import random\n",
 74 |     "\n",
 75 |     "display_rows, display_cols = (5, 5)\n",
 76 |     "\n",
 77 |     "for i in range(display_rows * display_cols):\n",
 78 |     "    ax = fig.add_subplot(display_rows, display_cols, i+1)\n",
 79 |     "    ax.set_axis_off()\n",
 80 |     "    image = X[random.randint(0, m-1)].reshape(20, 20).T\n",
 81 |     "    image /= np.max(image)\n",
 82 |     "    ax.imshow(image, cmap=plt.cm.Greys_r)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {
 89 |     "collapsed": false
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "X = np.insert(X, 0, np.ones(m), 1)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {
 99 |     "collapsed": false
100 |    },
101 |    "source": [
102 |     "## Part 2: Vectorize Logistic Regression\n",
103 |     "In this part of the exercise, you will reuse your logistic regression\n",
104 |     "code from the last exercise. You task here is to make sure that your\n",
105 |     "regularized logistic regression implementation is vectorized. After\n",
106 |     "that, you will implement one-vs-all classification for the handwritten\n",
107 |     "digit dataset."
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {
114 |     "collapsed": true
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "def sigmoid(z):\n",
119 |     "    return 1 / (1 + np.exp(-z))"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {
126 |     "collapsed": false
127 |    },
128 |    "outputs": [],
129 |    "source": [
130 |     "def h(theta, x):\n",
131 |     "    return sigmoid(x.dot(theta))\n",
132 |     "\n",
133 |     "#LRCOSTFUNCTION Compute cost and gradient for logistic regression with \n",
134 |     "#regularization\n",
135 |     "#   J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using\n",
136 |     "#   theta as the parameter for regularized logistic regression and the\n",
137 |     "#   gradient of the cost w.r.t. to the parameters. \n",
138 |     "\n",
139 |     "def cost(X, y, theta, lambda_=None):\n",
140 |     "    # You need to return the following variables correctly \n",
141 |     "    J = 0\n",
142 |     "    \n",
143 |     "    # ====================== YOUR CODE HERE ======================\n",
144 |     "    # Instructions: Compute the cost of a particular choice of theta.\n",
145 |     "    #               You should set J to the cost.\n",
146 |     "    #               Compute the partial derivatives and set grad to the partial\n",
147 |     "    #               derivatives of the cost w.r.t. each parameter in theta\n",
148 |     "    #\n",
149 |     "    # Hint: The computation of the cost function and gradients can be\n",
150 |     "    #       efficiently vectorized. For example, consider the computation\n",
151 |     "    #\n",
152 |     "    #           sigmoid(X * theta)\n",
153 |     "    #\n",
154 |     "    #       Each row of the resulting matrix will contain the value of the\n",
155 |     "    #       prediction for that example. You can make use of this to vectorize\n",
156 |     "    #       the cost function and gradient computations. \n",
157 |     "    #\n",
158 |     "\n",
159 |     "    \n",
160 |     "    \n",
161 |     "    # =============================================================\n",
162 |     "    \n",
163 |     "    return J\n",
164 |     "\n",
165 |     "def gradient(X, y, theta, lambda_=None):\n",
166 |     "    # You need to return the following variables correctly     \n",
167 |     "    grad = np.zeros(theta.shape)\n",
168 |     "    \n",
169 |     "    # ====================== YOUR CODE HERE ======================\n",
170 |     "    # Hint: When computing the gradient of the regularized cost function, \n",
171 |     "    #       there're many possible vectorized solutions, but one solution\n",
172 |     "    #       looks like:\n",
173 |     "    #           grad = (unregularized gradient for logistic regression)\n",
174 |     "    #           temp = theta; \n",
175 |     "    #           temp[0] = 0;   # because we don't add anything for j = 0  \n",
176 |     "    #           grad = grad + YOUR_CODE_HERE (using the temp variable)\n",
177 |     "    \n",
178 |     "    \n",
179 |     "    \n",
180 |     "    # =============================================================\n",
181 |     "    \n",
182 |     "    return grad"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {
189 |     "collapsed": false
190 |    },
191 |    "outputs": [],
192 |    "source": [
193 |     "initial_theta = np.zeros(n + 1)\n",
194 |     "lambda_ = 0.1\n",
195 |     "cost(X, y, initial_theta, lambda_)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "metadata": {
202 |     "collapsed": false
203 |    },
204 |    "outputs": [],
205 |    "source": [
206 |     "gradient(X, y, initial_theta, lambda_).shape"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {
213 |     "collapsed": false
214 |    },
215 |    "outputs": [],
216 |    "source": [
217 |     "def one_vs_all(X, y, num_labels, lambda_):\n",
218 |     "    #ONEVSALL trains multiple logistic regression classifiers and returns all\n",
219 |     "    #the classifiers in a matrix all_theta, where the i-th row of all_theta \n",
220 |     "    #corresponds to the classifier for label i\n",
221 |     "    #   [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels\n",
222 |     "    #   logisitc regression classifiers and returns each of these classifiers\n",
223 |     "    #   in a list all_theta, where the i-th item of all_theta corresponds \n",
224 |     "    #   to the classifier for label i\n",
225 |     "    \n",
226 |     "    # You need to return the following variables correctly \n",
227 |     "    all_theta = [None] * num_labels\n",
228 |     "    \n",
229 |     "    # ====================== YOUR CODE HERE ======================\n",
230 |     "    # Instructions: You should complete the following code to train num_labels\n",
231 |     "    #               logistic regression classifiers with regularization\n",
232 |     "    #               parameter lambda. \n",
233 |     "    #\n",
234 |     "    # Hint: You can use y == c to obtain a vector of True's and False's\n",
235 |     "    #\n",
236 |     "    # Note: For this assignment, we recommend using scipy.optimize.minimize with method='L-BFGS-B'\n",
237 |     "    #       to optimize the cost function.\n",
238 |     "    #       It is okay to use a for-loop (for i in range(num_labels)) to\n",
239 |     "    #       loop over the different classes.\n",
240 |     "    #\n",
241 |     "    # Example Code for scipy.optimize.minimize:\n",
242 |     "    #\n",
243 |     "    #     result = scipy.optimize.minimize(lambda t: cost(X, y==digit, t, lambda_),\n",
244 |     "    #                                  initial_theta,\n",
245 |     "    #                                  jac=lambda t: gradient(X, y==digit, t, lambda_),\n",
246 |     "    #                                  method='L-BFGS-B')\n",
247 |     "    #     theta = result.x\n",
248 |     "    \n",
249 |     "    \n",
250 |     "    \n",
251 |     "    \n",
252 |     "    \n",
253 |     "    # ========================================================================="
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {
260 |     "collapsed": false
261 |    },
262 |    "outputs": [],
263 |    "source": [
264 |     "num_labels = 10\n",
265 |     "thetas = one_vs_all(X, y, num_labels, lambda_)"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {
272 |     "collapsed": false
273 |    },
274 |    "outputs": [],
275 |    "source": [
276 |     "fig = plt.figure(figsize=(10,10))\n",
277 |     "for d in range(10):\n",
278 |     "    ax = fig.add_subplot(5, 2, d+1)\n",
279 |     "    ax.scatter(range(m), h(thetas[d], X), s=1)"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "metadata": {
286 |     "collapsed": true
287 |    },
288 |    "outputs": [],
289 |    "source": [
290 |     "def predict_one_vs_all(X, thetas):\n",
291 |     "    #PREDICT Predict the label for a trained one-vs-all classifier. The labels \n",
292 |     "    #are in the range 1..K, where K = len(thetas)\n",
293 |     "    #  p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions\n",
294 |     "    #  for each example in the matrix X. Note that X contains the examples in\n",
295 |     "    #  rows. all_theta is a list where the i-th entry is a trained logistic\n",
296 |     "    #  regression theta vector for the i-th class. You should set p to a vector\n",
297 |     "    #  of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2\n",
298 |     "    #  for 4 examples) \n",
299 |     "    \n",
300 |     "    \n",
301 |     "    # You need to return the following variables correctly \n",
302 |     "    p = np.zeros(X.shape[0]);\n",
303 |     "    \n",
304 |     "    # ====================== YOUR CODE HERE ======================\n",
305 |     "    # Instructions: Complete the following code to make predictions using\n",
306 |     "    #               your learned logistic regression parameters (one-vs-all).\n",
307 |     "    #               You should set p to a vector of predictions (from 1 to\n",
308 |     "    #               num_labels).\n",
309 |     "    #\n",
310 |     "    # Hint: This code can be done all vectorized using the max function.\n",
311 |     "    #       In particular, the max function can also return the index of the \n",
312 |     "    #       max element, for more information see 'help max'. If your examples \n",
313 |     "    #       are in rows, then, you can use max(A, [], 2) to obtain the max \n",
314 |     "    #       for each row.\n",
315 |     "    #       \n",
316 |     "\n",
317 |     "\n",
318 |     "    \n",
319 |     "    \n",
320 |     "    \n",
321 |     "    \n",
322 |     "    \n",
323 |     "    \n",
324 |     "    # =========================================================================\n",
325 |     "                       \n",
326 |     "    return p"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": null,
332 |    "metadata": {
333 |     "collapsed": false
334 |    },
335 |    "outputs": [],
336 |    "source": [
337 |     "predictions = predict_one_vs_all(X, thetas)\n",
338 |     "\n",
339 |     "plt.scatter(range(m), predictions, s=1)"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "markdown",
344 |    "metadata": {},
345 |    "source": [
346 |     "Training set accuracy:"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": null,
352 |    "metadata": {
353 |     "collapsed": false
354 |    },
355 |    "outputs": [],
356 |    "source": [
357 |     "(predictions == y).mean()"
358 |    ]
359 |   }
360 |  ],
361 |  "metadata": {
362 |   "kernelspec": {
363 |    "display_name": "Python 3",
364 |    "language": "python",
365 |    "name": "python3"
366 |   }
367 |  },
368 |  "nbformat": 4,
369 |  "nbformat_minor": 0
370 | }
371 | 


--------------------------------------------------------------------------------
/ex4/ex4data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex4/ex4data1.mat


--------------------------------------------------------------------------------
/ex4/ex4weights.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex4/ex4weights.mat


--------------------------------------------------------------------------------
/ex4/ml-ex4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exercise 4: Neural Network Learning"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "import scipy.io\n",
 20 |     "import scipy.optimize\n",
 21 |     "import matplotlib.pyplot as plt\n",
 22 |     "%matplotlib inline"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# uncomment for console - useful for debugging\n",
 34 |     "# %qtconsole"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {
 41 |     "collapsed": false
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "ex3data1 = scipy.io.loadmat(\"./ex4data1.mat\")\n",
 46 |     "X = ex3data1['X']\n",
 47 |     "y = ex3data1['y'][:,0]\n",
 48 |     "m, n = X.shape\n",
 49 |     "m, n"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "collapsed": false
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "input_layer_size  = n  # 20x20 Input Images of Digits\n",
 61 |     "hidden_layer_size = 25 # 25 hidden units\n",
 62 |     "num_labels = 10        # 10 labels, from 1 to 10\n",
 63 |     "                       # (note that we have mapped \"0\" to label 10)\n",
 64 |     "lambda_ = 1"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "## Part 1: Loading and Visualizing Data\n",
 72 |     "We start the exercise by first loading and visualizing the dataset. You will be working with a dataset that contains handwritten digits.\n"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {
 79 |     "collapsed": false,
 80 |     "scrolled": true
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "def display(X, display_rows=5, display_cols=5, figsize=(4,4), random_x=False):\n",
 85 |     "    m = X.shape[0]\n",
 86 |     "    fig, axes = plt.subplots(display_rows, display_cols, figsize=figsize)\n",
 87 |     "    fig.subplots_adjust(wspace=0.1, hspace=0.1)\n",
 88 |     "\n",
 89 |     "    import random\n",
 90 |     "\n",
 91 |     "    for i, ax in enumerate(axes.flat):\n",
 92 |     "        ax.set_axis_off()\n",
 93 |     "        x = None\n",
 94 |     "        if random_x:\n",
 95 |     "            x = random.randint(0, m-1)\n",
 96 |     "        else:\n",
 97 |     "            x = i\n",
 98 |     "        image = X[x].reshape(20, 20).T\n",
 99 |     "        image = image / np.max(image)\n",
100 |     "        ax.imshow(image, cmap=plt.cm.Greys_r)\n",
101 |     "\n",
102 |     "display(X, random_x=True)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {
109 |     "collapsed": true
110 |    },
111 |    "outputs": [],
112 |    "source": [
113 |     "def add_ones_column(array):\n",
114 |     "    return np.insert(array, 0, 1, axis=1)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "## Part 2: Loading Parameters \n",
122 |     "In this part of the exercise, we load some pre-initialized \n",
123 |     "neural network parameters."
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {
130 |     "collapsed": false
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "ex4weights = scipy.io.loadmat('./ex4weights.mat')\n",
135 |     "Theta1 = ex4weights['Theta1']\n",
136 |     "Theta2 = ex4weights['Theta2']\n",
137 |     "print(Theta1.shape, Theta2.shape)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "Unrolling the parameters into one vector:"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "nn_params = np.concatenate((Theta1.flat, Theta2.flat))\n",
156 |     "nn_params.shape"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {
163 |     "collapsed": true
164 |    },
165 |    "outputs": [],
166 |    "source": [
167 |     "def sigmoid(z):\n",
168 |     "    return 1 / (1+np.exp(-z))"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {
174 |     "collapsed": false
175 |    },
176 |    "source": [
177 |     "##  Part 3: Compute Cost (Feedforward) \n",
178 |     "  To the neural network, you should first start by implementing the\n",
179 |     "  feedforward part of the neural network that returns the cost only. You\n",
180 |     "  should complete the code in nn_cost_function() to return cost. After\n",
181 |     "  implementing the feedforward to compute the cost, you can verify that\n",
182 |     "  your implementation is correct by verifying that you get the same cost\n",
183 |     "  as us for the fixed debugging parameters.\n",
184 |     "\n",
185 |     "  We suggest implementing the feedforward cost *without* regularization\n",
186 |     "  first so that it will be easier for you to debug. Later, in part 4, you\n",
187 |     "  will get to implement the regularized cost."
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {
194 |     "collapsed": true
195 |    },
196 |    "outputs": [],
197 |    "source": [
198 |     "def nn_cost_function(nn_params, input_layer_size, hidden_layer_size,\n",
199 |     "                     num_labels, X, y, lambda_):\n",
200 |     "    #NNCOSTFUNCTION Implements the neural network cost function for a two layer\n",
201 |     "    #neural network which performs classification\n",
202 |     "    #   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...\n",
203 |     "    #   X, y, lambda) computes the cost and gradient of the neural network. The\n",
204 |     "    #   parameters for the neural network are \"unrolled\" into the vector\n",
205 |     "    #   nn_params and need to be converted back into the weight matrices. \n",
206 |     "    # \n",
207 |     "    #   The returned parameter grad should be a \"unrolled\" vector of the\n",
208 |     "    #   partial derivatives of the neural network.\n",
209 |     "    #\n",
210 |     "\n",
211 |     "    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices\n",
212 |     "    # for our 2 layer neural network\n",
213 |     "    t1_len = (input_layer_size+1)*hidden_layer_size\n",
214 |     "    Theta1 = nn_params[:t1_len].reshape(hidden_layer_size, input_layer_size+1)\n",
215 |     "    Theta2 = nn_params[t1_len:].reshape(num_labels, hidden_layer_size+1)\n",
216 |     "    m = X.shape[0]\n",
217 |     "    \n",
218 |     "    # You need to return the following variables correctly \n",
219 |     "    J = 0;\n",
220 |     "    Theta1_grad = np.zeros(Theta1.shape);\n",
221 |     "    Theta2_grad = np.zeros(Theta2.shape);\n",
222 |     "    \n",
223 |     "    # ====================== YOUR CODE HERE ======================\n",
224 |     "    # Instructions: You should complete the code by working through the\n",
225 |     "    #               following parts.\n",
226 |     "    #\n",
227 |     "    # Part 1: Feedforward the neural network and return the cost in the\n",
228 |     "    #         variable J. After implementing Part 1, you can verify that your\n",
229 |     "    #         cost function computation is correct by verifying the cost\n",
230 |     "    #         computed for lambda == 0.\n",
231 |     "    #\n",
232 |     "    # Part 2: Implement the backpropagation algorithm to compute the gradients\n",
233 |     "    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of\n",
234 |     "    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and\n",
235 |     "    #         Theta2_grad, respectively. After implementing Part 2, you can check\n",
236 |     "    #         that your implementation is correct by running checkNNGradients\n",
237 |     "    #\n",
238 |     "    #         Note: The vector y passed into the function is a vector of labels\n",
239 |     "    #               containing values from 1..K. You need to map this vector into a \n",
240 |     "    #               binary vector of 1's and 0's to be used with the neural network\n",
241 |     "    #               cost function.\n",
242 |     "    #\n",
243 |     "    #         Hint: We recommend implementing backpropagation using a for-loop\n",
244 |     "    #               over the training examples if you are implementing it for the \n",
245 |     "    #               first time.\n",
246 |     "    #\n",
247 |     "    # Part 3: Implement regularization with the cost function and gradients.\n",
248 |     "    #\n",
249 |     "    #         Hint: You can implement this around the code for\n",
250 |     "    #               backpropagation. That is, you can compute the gradients for\n",
251 |     "    #               the regularization separately and then add them to Theta1_grad\n",
252 |     "    #               and Theta2_grad from Part 2.\n",
253 |     "    #\n",
254 |     "    \n",
255 |     "    \n",
256 |     "    \n",
257 |     "    \n",
258 |     "    \n",
259 |     "    \n",
260 |     "    # =========================================================================\n",
261 |     "\n",
262 |     "    # Unroll gradients\n",
263 |     "    gradient = np.concatenate((Theta1_grad.flat, Theta2_grad.flat))\n",
264 |     "\n",
265 |     "    return J, gradient"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {},
271 |    "source": [
272 |     "The cost at the given parameters should be about `0.287629`."
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "metadata": {
279 |     "collapsed": false
280 |    },
281 |    "outputs": [],
282 |    "source": [
283 |     "lambda_ = 0       # No regularization\n",
284 |     "nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_)"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {},
290 |    "source": [
291 |     "The cost at the given parameters and a regularization factor of 1 should be about `0.38377`."
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "markdown",
296 |    "metadata": {},
297 |    "source": [
298 |     "##  Part 4: Implement Regularization \n",
299 |     "  Once your cost function implementation is correct, you should now\n",
300 |     "  continue to implement the regularization with the cost.\n"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": null,
306 |    "metadata": {
307 |     "collapsed": false,
308 |     "scrolled": true
309 |    },
310 |    "outputs": [],
311 |    "source": [
312 |     "lambda_ = 1\n",
313 |     "nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_)"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "metadata": {},
319 |    "source": [
320 |     "## Part 5: Sigmoid Gradient  \n",
321 |     " Before you start implementing the neural network, you will first\n",
322 |     "  implement the gradient for the sigmoid function. You should complete the\n",
323 |     "  code in sigmoid_gradient."
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {
330 |     "collapsed": false
331 |    },
332 |    "outputs": [],
333 |    "source": [
334 |     "def sigmoid_gradient(z):\n",
335 |     "    #SIGMOIDGRADIENT returns the gradient of the sigmoid function\n",
336 |     "    #evaluated at z\n",
337 |     "    #   g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function\n",
338 |     "    #   evaluated at z. This should work regardless if z is a matrix or a\n",
339 |     "    #   vector. In particular, if z is a vector or matrix, you should return\n",
340 |     "    #   the gradient for each element.\n",
341 |     "\n",
342 |     "    g = np.zeros(z.shape)\n",
343 |     "\n",
344 |     "    \n",
345 |     "    # ====================== YOUR CODE HERE ======================\n",
346 |     "    # Instructions: Compute the gradient of the sigmoid function evaluated at\n",
347 |     "    #               each value of z (z can be a matrix, vector or scalar).\n",
348 |     "\n",
349 |     "\n",
350 |     "    \n",
351 |     "    \n",
352 |     "    \n",
353 |     "    # =============================================================\n",
354 |     "\n",
355 |     "    \n",
356 |     "    return g"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": null,
362 |    "metadata": {
363 |     "collapsed": false,
364 |     "scrolled": true
365 |    },
366 |    "outputs": [],
367 |    "source": [
368 |     "sigmoid_gradient(np.array([1, -0.5, 0, 0.5, 1]))"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "markdown",
373 |    "metadata": {},
374 |    "source": [
375 |     "## Part 6: Initializing Pameters \n",
376 |     "  In this part of the exercise, you will be starting to implment a two\n",
377 |     "  layer neural network that classifies digits. You will start by\n",
378 |     "  implementing a function to initialize the weights of the neural network."
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": null,
384 |    "metadata": {
385 |     "collapsed": true
386 |    },
387 |    "outputs": [],
388 |    "source": [
389 |     "def rand_initialize_weight(L_in, L_out):\n",
390 |     "    #RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in\n",
391 |     "    #incoming connections and L_out outgoing connections\n",
392 |     "    #   W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights \n",
393 |     "    #   of a layer with L_in incoming connections and L_out outgoing \n",
394 |     "    #   connections. \n",
395 |     "    #\n",
396 |     "    #   Note that W should be set to a matrix of size(L_out, 1 + L_in) as\n",
397 |     "    #   the column row of W handles the \"bias\" terms\n",
398 |     "    #\n",
399 |     "    \n",
400 |     "    # You need to return the following variables correctly \n",
401 |     "    W = np.zeros((L_out, L_in))\n",
402 |     "    \n",
403 |     "    # ====================== YOUR CODE HERE ======================\n",
404 |     "    # Instructions: Initialize W randomly so that we break the symmetry while\n",
405 |     "    #               training the neural network.\n",
406 |     "    #\n",
407 |     "    # Note: The first row of W corresponds to the parameters for the bias units\n",
408 |     "    #\n",
409 |     "    \n",
410 |     "    return W\n",
411 |     "    \n",
412 |     "    # ========================================================================="
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "markdown",
417 |    "metadata": {},
418 |    "source": [
419 |     "## Part 7: Implement Backpropagation \n",
420 |     "  Once your cost matches up with ours, you should proceed to implement the\n",
421 |     "  backpropagation algorithm for the neural network. You should add to the\n",
422 |     "  code you've written in nn_cost_function to return the partial\n",
423 |     "  derivatives of the parameters."
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": null,
429 |    "metadata": {
430 |     "collapsed": true
431 |    },
432 |    "outputs": [],
433 |    "source": [
434 |     "def numerical_gradient(f, x, dx=1e-6):\n",
435 |     "    perturb = np.zeros(x.size)\n",
436 |     "    result  = np.zeros(x.size)\n",
437 |     "    for i in range(x.size):\n",
438 |     "        perturb[i] = dx\n",
439 |     "        result[i] = (f(x+perturb) - f(x-perturb)) / (2*dx)\n",
440 |     "        perturb[i] = 0\n",
441 |     "    return result\n",
442 |     "\n",
443 |     "def check_NN_gradients(lambda_=0):\n",
444 |     "    input_layer_size = 3\n",
445 |     "    hidden_layer_size = 5\n",
446 |     "    num_labels = 3\n",
447 |     "    m = 5\n",
448 |     "\n",
449 |     "    def debug_matrix(fan_out, fan_in):\n",
450 |     "        W = np.sin(np.arange(fan_out * (fan_in+1))+1) / 10\n",
451 |     "        return W.reshape(fan_out, fan_in+1)\n",
452 |     "\n",
453 |     "    Theta1 = debug_matrix(hidden_layer_size, input_layer_size)\n",
454 |     "    Theta2 = debug_matrix(num_labels, hidden_layer_size)\n",
455 |     "\n",
456 |     "    X = debug_matrix(m, input_layer_size - 1)\n",
457 |     "    y = 1 + ((1 + np.arange(m)) % num_labels)\n",
458 |     "    \n",
459 |     "    nn_params = np.concatenate([Theta1.flat, Theta2.flat])\n",
460 |     "\n",
461 |     "    cost, grad = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_)\n",
462 |     "    def just_cost(nn_params):\n",
463 |     "        cost, grad = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_)\n",
464 |     "        return cost\n",
465 |     "    \n",
466 |     "    return np.sum(np.abs(grad - numerical_gradient(just_cost, nn_params))) / grad.size"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "markdown",
471 |    "metadata": {},
472 |    "source": [
473 |     "If your backpropagation implementation is correct, then the relative difference will be small (less than 1e-9)."
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "code",
478 |    "execution_count": null,
479 |    "metadata": {
480 |     "collapsed": false,
481 |     "scrolled": true
482 |    },
483 |    "outputs": [],
484 |    "source": [
485 |     "check_NN_gradients()"
486 |    ]
487 |   },
488 |   {
489 |    "cell_type": "code",
490 |    "execution_count": null,
491 |    "metadata": {
492 |     "collapsed": true
493 |    },
494 |    "outputs": [],
495 |    "source": [
496 |     "initial_Theta1 = rand_initialize_weight(hidden_layer_size, input_layer_size+1)\n",
497 |     "initial_Theta2 = rand_initialize_weight(num_labels, hidden_layer_size+1)"
498 |    ]
499 |   },
500 |   {
501 |    "cell_type": "markdown",
502 |    "metadata": {},
503 |    "source": [
504 |     "## Part 8: Implement Regularization \n",
505 |     "  Once your backpropagation implementation is correct, you should now\n",
506 |     "  continue to implement the regularization with the cost and gradient.\n"
507 |    ]
508 |   },
509 |   {
510 |    "cell_type": "code",
511 |    "execution_count": null,
512 |    "metadata": {
513 |     "collapsed": false
514 |    },
515 |    "outputs": [],
516 |    "source": [
517 |     "def cost_fun(nn_params):\n",
518 |     "    return nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_)\n",
519 |     "\n",
520 |     "lambda_ = 3\n",
521 |     "nn_params = np.concatenate((initial_Theta1.flat, initial_Theta2.flat))\n",
522 |     "res = scipy.optimize.minimize(cost_fun, nn_params, jac=True, method='L-BFGS-B', \n",
523 |     "                              options=dict(maxiter=200, disp=True))"
524 |    ]
525 |   },
526 |   {
527 |    "cell_type": "code",
528 |    "execution_count": null,
529 |    "metadata": {
530 |     "collapsed": false,
531 |     "scrolled": true
532 |    },
533 |    "outputs": [],
534 |    "source": [
535 |     "res"
536 |    ]
537 |   },
538 |   {
539 |    "cell_type": "markdown",
540 |    "metadata": {},
541 |    "source": [
542 |     "The cost at lambda = 3 should be about 0.57."
543 |    ]
544 |   },
545 |   {
546 |    "cell_type": "code",
547 |    "execution_count": null,
548 |    "metadata": {
549 |     "collapsed": false
550 |    },
551 |    "outputs": [],
552 |    "source": [
553 |     "res.fun"
554 |    ]
555 |   },
556 |   {
557 |    "cell_type": "markdown",
558 |    "metadata": {},
559 |    "source": [
560 |     "## Part 8: Training NN \n",
561 |     "  You have now implemented all the code necessary to train a neural \n",
562 |     "  network. To train your neural network, we will use scipy.optimize.minimize. \n",
563 |     "  \n",
564 |     "  Recall that these\n",
565 |     "  advanced optimizers are able to train our cost functions efficiently as\n",
566 |     "  long as we provide them with the gradient computations.\n",
567 |     "\n",
568 |     "  After you have completed the assignment, change the MaxIter to a larger\n",
569 |     "  value to see how more training helps. You should also try different values of lambda."
570 |    ]
571 |   },
572 |   {
573 |    "cell_type": "code",
574 |    "execution_count": null,
575 |    "metadata": {
576 |     "collapsed": true
577 |    },
578 |    "outputs": [],
579 |    "source": [
580 |     "lambda_ = 1\n",
581 |     "nn_params = np.concatenate((initial_Theta1.flat, initial_Theta2.flat))\n",
582 |     "res = scipy.optimize.minimize(cost_fun, nn_params, jac=True, method='L-BFGS-B', \n",
583 |     "                              options=dict(maxiter=200, disp=True))\n",
584 |     "nn_params = res.x"
585 |    ]
586 |   },
587 |   {
588 |    "cell_type": "markdown",
589 |    "metadata": {},
590 |    "source": [
591 |     "Obtain Theta1 and Theta2 back from nn_params:"
592 |    ]
593 |   },
594 |   {
595 |    "cell_type": "code",
596 |    "execution_count": null,
597 |    "metadata": {
598 |     "collapsed": true
599 |    },
600 |    "outputs": [],
601 |    "source": [
602 |     "t1_len = (input_layer_size+1)*hidden_layer_size\n",
603 |     "Theta1 = nn_params[:t1_len].reshape(hidden_layer_size, input_layer_size+1)\n",
604 |     "Theta2 = nn_params[t1_len:].reshape(num_labels, hidden_layer_size+1)"
605 |    ]
606 |   },
607 |   {
608 |    "cell_type": "markdown",
609 |    "metadata": {},
610 |    "source": [
611 |     "## Part 9: Visualize Weights \n",
612 |     "  You can now \"visualize\" what the neural network is learning by \n",
613 |     "  displaying the hidden units to see what features they are capturing in \n",
614 |     "  the data.\n"
615 |    ]
616 |   },
617 |   {
618 |    "cell_type": "code",
619 |    "execution_count": null,
620 |    "metadata": {
621 |     "collapsed": false,
622 |     "scrolled": true
623 |    },
624 |    "outputs": [],
625 |    "source": [
626 |     "display(Theta1[:,1:], figsize=(6,6))"
627 |    ]
628 |   },
629 |   {
630 |    "cell_type": "code",
631 |    "execution_count": null,
632 |    "metadata": {
633 |     "collapsed": false
634 |    },
635 |    "outputs": [],
636 |    "source": [
637 |     "def predict(Theta1, Theta2, X):\n",
638 |     "    #PREDICT Predict the label of an input given a trained neural network\n",
639 |     "    #   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the\n",
640 |     "    #   trained weights of a neural network (Theta1, Theta2)\n",
641 |     "\n",
642 |     "    m = X.shape[0]\n",
643 |     "    num_labels = Theta2.shape[1]\n",
644 |     "    \n",
645 |     "    # You need to return the following variables correctly. Remember that \n",
646 |     "    # the given data labels go from 1..10, with 10 representing the digit 0!\n",
647 |     "    p = np.zeros(X.shape[0])\n",
648 |     "\n",
649 |     "    \n",
650 |     "    # ====================== YOUR CODE HERE ======================\n",
651 |     "    \n",
652 |     "    \n",
653 |     "    \n",
654 |     "    \n",
655 |     "    # ============================================================\n",
656 |     "    \n",
657 |     "    \n",
658 |     "    return p\n",
659 |     "\n",
660 |     "predictions = predict(Theta1, Theta2, X)\n",
661 |     "np.mean(predictions == y)"
662 |    ]
663 |   }
664 |  ],
665 |  "metadata": {
666 |   "kernelspec": {
667 |    "display_name": "Python 3",
668 |    "language": "python",
669 |    "name": "python3"
670 |   },
671 |   "language_info": {
672 |    "codemirror_mode": {
673 |     "name": "ipython",
674 |     "version": 3
675 |    },
676 |    "file_extension": ".py",
677 |    "mimetype": "text/x-python",
678 |    "name": "python",
679 |    "nbconvert_exporter": "python",
680 |    "pygments_lexer": "ipython3",
681 |    "version": "3.4.3"
682 |   }
683 |  },
684 |  "nbformat": 4,
685 |  "nbformat_minor": 0
686 | }
687 | 


--------------------------------------------------------------------------------
/ex5/ex5data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex5/ex5data1.mat


--------------------------------------------------------------------------------
/ex5/ml-ex5.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#  Exercise 5 | Regularized Linear Regression and Bias-Variance"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "import scipy.optimize\n",
 21 |     "import scipy.io"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "%matplotlib inline"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "##  Part 1: Loading and Visualizing Data \n",
 40 |     "  We start the exercise by first loading and visualizing the dataset. \n",
 41 |     "  The following code will load the dataset into your environment and plot\n",
 42 |     "  the data.\n"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "ex5data1 = scipy.io.loadmat('ex5data1.mat')\n",
 54 |     "X = ex5data1['X']\n",
 55 |     "y = ex5data1['y'][:,0]\n",
 56 |     "Xtest = ex5data1['Xtest']\n",
 57 |     "ytest = ex5data1['ytest'][:,0]\n",
 58 |     "Xval = ex5data1['Xval']\n",
 59 |     "yval = ex5data1['yval'][:,0]\n",
 60 |     "print(X.shape, Xtest.shape, Xval.shape)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {
 67 |     "collapsed": false
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "m = X.shape[0]"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "Plot training data:"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "collapsed": false
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "fig, ax = plt.subplots(figsize=(5,5))\n",
 90 |     "ax.scatter(X, y, marker='x', c='r')\n",
 91 |     "ax.set_xlabel('Change in water level')\n",
 92 |     "ax.set_ylabel('Water flowing out of the dam')"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "## Part 2: Regularized Linear Regression Cost \n",
100 |     "  You should now implement the cost function for regularized linear \n",
101 |     "  regression. \n"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "def linearRegCostFunction(X, y, theta, lambda_):\n",
113 |     "    #LINEARREGCOSTFUNCTION Compute cost and gradient for regularized linear \n",
114 |     "    #regression with multiple variables\n",
115 |     "    #   [J, grad] = LINEARREGCOSTFUNCTION(X, y, theta, lambda) computes the \n",
116 |     "    #   cost of using theta as the parameter for linear regression to fit the \n",
117 |     "    #   data points in X and y. Returns the cost in J and the gradient in grad\n",
118 |     "    \n",
119 |     "    m = len(y) # number of training examples\n",
120 |     "    \n",
121 |     "    # You need to return the following variables correctly \n",
122 |     "    J = 0\n",
123 |     "    grad = np.zeros(theta.shape)\n",
124 |     "    \n",
125 |     "    \n",
126 |     "    # ====================== YOUR CODE HERE ======================\n",
127 |     "    # Instructions: Compute the cost and gradient of regularized linear \n",
128 |     "    #               regression for a particular choice of theta.\n",
129 |     "    #\n",
130 |     "    #               You should set J to the cost and grad to the gradient.\n",
131 |     "    #\n",
132 |     "    \n",
133 |     "    \n",
134 |     "    \n",
135 |     "    # ============================================================\n",
136 |     "    \n",
137 |     "    return J, grad"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {
144 |     "collapsed": true
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "theta = np.array([1, 1])\n",
149 |     "J, grad = linearRegCostFunction(np.hstack([np.ones((m, 1)), X]), y, theta, 1)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "The cost at theta = `[1, 1]` should be about 303.993192."
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "The gradient at theta = `[1, 1]` should be about `[-15.303016; 598.250744]`."
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "metadata": {},
169 |    "source": [
170 |     "## Part 3: Regularized Linear Regression Gradient \n",
171 |     "  You should now implement the gradient for regularized linear \n",
172 |     " regression.\n"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {
179 |     "collapsed": false,
180 |     "scrolled": true
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "J"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {
191 |     "collapsed": false,
192 |     "scrolled": true
193 |    },
194 |    "outputs": [],
195 |    "source": [
196 |     "grad"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "## Part 4: Train Linear Regression \n",
204 |     "  Once you have implemented the cost and gradient correctly, the\n",
205 |     "  trainLinearRegression function will use your cost function to train \n",
206 |     "  regularized linear regression.\n",
207 |     " \n",
208 |     "  Write Up Note: The data is non-linear, so this will not give a great \n",
209 |     "                 fit.\n"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": null,
215 |    "metadata": {
216 |     "collapsed": false
217 |    },
218 |    "outputs": [],
219 |    "source": [
220 |     "def trainLinearRegression(X, y, lambda_):\n",
221 |     "    initial_theta = np.zeros(X.shape[1])\n",
222 |     "    \n",
223 |     "    def costFunction(t):\n",
224 |     "        return linearRegCostFunction(X, y, t, lambda_)\n",
225 |     "    \n",
226 |     "    res = scipy.optimize.minimize(costFunction, initial_theta, jac=True, method='L-BFGS-B',\n",
227 |     "                                  options=dict(maxiter=200))\n",
228 |     "    return res"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "Train linear regression:"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {
242 |     "collapsed": false
243 |    },
244 |    "outputs": [],
245 |    "source": [
246 |     "res = trainLinearRegression(np.hstack([np.ones((m, 1)), X]), y, 0)\n",
247 |     "print(res)\n",
248 |     "optimal_theta = res.x"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "markdown",
253 |    "metadata": {},
254 |    "source": [
255 |     "Plot fit over the data:"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {
262 |     "collapsed": false,
263 |     "scrolled": true
264 |    },
265 |    "outputs": [],
266 |    "source": [
267 |     "fig, ax = plt.subplots(figsize=(5,5))\n",
268 |     "ax.scatter(X, y, marker='x', c='r')\n",
269 |     "ax.set_xlabel('Change in water level')\n",
270 |     "ax.set_ylabel('Water flowing out of the dam')\n",
271 |     "ax.plot(X, np.hstack([np.ones((m, 1)), X]).dot(optimal_theta))"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "markdown",
276 |    "metadata": {},
277 |    "source": [
278 |     "## =========== Part 5: Learning Curve for Linear Regression =============\n",
279 |     "  Next, you should implement the learning_curve function. \n",
280 |     "\n",
281 |     "  Write Up Note: Since the model is underfitting the data, we expect to\n",
282 |     "                 see a graph with \"high bias\" -- slide 8 in ML-advice.pdf \n",
283 |     "\n"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {
290 |     "collapsed": false
291 |    },
292 |    "outputs": [],
293 |    "source": [
294 |     "def learning_curve(X, y, Xval, yval, lambda_):\n",
295 |     "    #LEARNINGCURVE Generates the train and cross validation set errors needed \n",
296 |     "    #to plot a learning curve\n",
297 |     "    #   [error_train, error_val] = ...\n",
298 |     "    #       LEARNINGCURVE(X, y, Xval, yval, lambda) returns the train and\n",
299 |     "    #       cross validation set errors for a learning curve. In particular, \n",
300 |     "    #       it returns two vectors of the same length - error_train and \n",
301 |     "    #       error_val. Then, error_train(i) contains the training error for\n",
302 |     "    #       i examples (and similarly for error_val(i)).\n",
303 |     "    #\n",
304 |     "    #   In this function, you will compute the train and test errors for\n",
305 |     "    #   dataset sizes from 1 up to m. In practice, when working with larger\n",
306 |     "    #   datasets, you might want to do this in larger intervals.\n",
307 |     "    #\n",
308 |     "    \n",
309 |     "    # Number of training examples\n",
310 |     "    m = X.shape[0]\n",
311 |     "    \n",
312 |     "    # You need to return these values correctly\n",
313 |     "    error_train = np.zeros(m-1)\n",
314 |     "    error_val   = np.zeros(m-1)\n",
315 |     "    \n",
316 |     "    \n",
317 |     "    # ====================== YOUR CODE HERE ======================\n",
318 |     "    # Instructions: Fill in this function to return training errors in \n",
319 |     "    #               error_train and the cross validation errors in error_val. \n",
320 |     "    #               i.e., error_train[i] and \n",
321 |     "    #               error_val[i] should give you the errors\n",
322 |     "    #               obtained after training on i+1 examples.\n",
323 |     "    #\n",
324 |     "    # Note: You should evaluate the training error on the first i training\n",
325 |     "    #       examples (i.e., X[:i, :] and y[:i]).\n",
326 |     "    #\n",
327 |     "    #       For the cross-validation error, you should instead evaluate on\n",
328 |     "    #       the _entire_ cross validation set (Xval and yval).\n",
329 |     "    #\n",
330 |     "    # Note: If you are using your cost function (linearRegCostFunction)\n",
331 |     "    #       to compute the training and cross validation error, you should \n",
332 |     "    #       call the function with the lambda argument set to 0. \n",
333 |     "    #       Do note that you will still need to use lambda when running\n",
334 |     "    #       the training to obtain the theta parameters.\n",
335 |     "    #\n",
336 |     "    # Hint: You can loop over the examples with the following:\n",
337 |     "    #\n",
338 |     "    #       for i = range(1,m):\n",
339 |     "    #           # Compute train/cross validation errors using training examples \n",
340 |     "    #           # X[:i, :] and y[:i], storing the result in \n",
341 |     "    #           # error_train[i-1] and error_val[i-1]\n",
342 |     "    #           ....\n",
343 |     "    #           \n",
344 |     "    #       end\n",
345 |     "    #\n",
346 |     "    \n",
347 |     "    \n",
348 |     "        \n",
349 |     "        \n",
350 |     "    # =========================================================================\n",
351 |     "    \n",
352 |     "    return error_train, error_val\n",
353 |     "        "
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": null,
359 |    "metadata": {
360 |     "collapsed": false
361 |    },
362 |    "outputs": [],
363 |    "source": [
364 |     "error_train, error_val = learning_curve(np.hstack([np.ones((m, 1)), X]), y, \n",
365 |     "                                        np.hstack([np.ones((Xval.shape[0], 1)), Xval]), yval, \n",
366 |     "                                        0)"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": null,
372 |    "metadata": {
373 |     "collapsed": false
374 |    },
375 |    "outputs": [],
376 |    "source": [
377 |     "print('\\n'.join(str(x) for x in zip(error_train, error_val)))"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": null,
383 |    "metadata": {
384 |     "collapsed": false,
385 |     "scrolled": true
386 |    },
387 |    "outputs": [],
388 |    "source": [
389 |     "plt.plot(range(m-1), error_train, range(m-1), error_val)"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "markdown",
394 |    "metadata": {},
395 |    "source": [
396 |     "## Part 6: Feature Mapping for Polynomial Regression \n",
397 |     "  One solution to this is to use polynomial regression. You should now\n",
398 |     "  complete poly_features to map each example into its powers.\n"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": null,
404 |    "metadata": {
405 |     "collapsed": false
406 |    },
407 |    "outputs": [],
408 |    "source": [
409 |     "def poly_features(X_orig, p):\n",
410 |     "    #POLYFEATURES Maps X (1D vector) into the p-th power\n",
411 |     "    #   [X_poly] = POLYFEATURES(X, p) takes a vector X (size m) and\n",
412 |     "    #   maps each example into its polynomial features where\n",
413 |     "    #   X_poly[i, :] = [X[i] X[i]**2 X[i]**3 ...  X[i]*p]\n",
414 |     "    #\n",
415 |     "    \n",
416 |     "    # You need to return the following variables correctly.\n",
417 |     "    X_poly = np.zeros((len(X_orig), p))\n",
418 |     "    \n",
419 |     "    \n",
420 |     "    # ====================== YOUR CODE HERE ======================\n",
421 |     "    # Instructions: Given a vector X, return a matrix X_poly where the p-th \n",
422 |     "    #               column of X contains the values of X to the p-th power.\n",
423 |     "    #\n",
424 |     "    # \n",
425 |     "    \n",
426 |     "    \n",
427 |     "    \n",
428 |     "    \n",
429 |     "    \n",
430 |     "    # =============================================================\n",
431 |     "    \n",
432 |     "    \n",
433 |     "    return X_poly"
434 |    ]
435 |   },
436 |   {
437 |    "cell_type": "markdown",
438 |    "metadata": {},
439 |    "source": [
440 |     "Map X onto Polynomial Features:"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "code",
445 |    "execution_count": null,
446 |    "metadata": {
447 |     "collapsed": false
448 |    },
449 |    "outputs": [],
450 |    "source": [
451 |     "p = 8\n",
452 |     "X_poly = poly_features(X, p)\n",
453 |     "X_poly.shape"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": null,
459 |    "metadata": {
460 |     "collapsed": false
461 |    },
462 |    "outputs": [],
463 |    "source": [
464 |     "def feature_normalize(X):\n",
465 |     "    mu = np.mean(X, axis=0)\n",
466 |     "    X_norm = X - mu\n",
467 |     "    sigma = np.std(X_norm, axis=0)\n",
468 |     "    sigma[sigma == 0] = 1\n",
469 |     "    X_norm = X_norm / sigma\n",
470 |     "    \n",
471 |     "    return X_norm, mu, sigma    "
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "markdown",
476 |    "metadata": {},
477 |    "source": [
478 |     "Normalize X:"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": null,
484 |    "metadata": {
485 |     "collapsed": false,
486 |     "scrolled": true
487 |    },
488 |    "outputs": [],
489 |    "source": [
490 |     "X_poly, mu, sigma = feature_normalize(X_poly)\n",
491 |     "X_poly.shape"
492 |    ]
493 |   },
494 |   {
495 |    "cell_type": "markdown",
496 |    "metadata": {},
497 |    "source": [
498 |     "Add ones column"
499 |    ]
500 |   },
501 |   {
502 |    "cell_type": "code",
503 |    "execution_count": null,
504 |    "metadata": {
505 |     "collapsed": false
506 |    },
507 |    "outputs": [],
508 |    "source": [
509 |     "X_poly = np.hstack([np.ones((m, 1)), X_poly])"
510 |    ]
511 |   },
512 |   {
513 |    "cell_type": "markdown",
514 |    "metadata": {},
515 |    "source": [
516 |     "map and normalize X_test and X_val"
517 |    ]
518 |   },
519 |   {
520 |    "cell_type": "code",
521 |    "execution_count": null,
522 |    "metadata": {
523 |     "collapsed": false
524 |    },
525 |    "outputs": [],
526 |    "source": [
527 |     "X_poly_test = (poly_features(Xtest, p) - mu) / sigma\n",
528 |     "X_poly_test = np.hstack([np.ones((X_poly_test.shape[0], 1)), X_poly_test])\n",
529 |     "X_poly_test.shape"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "code",
534 |    "execution_count": null,
535 |    "metadata": {
536 |     "collapsed": false
537 |    },
538 |    "outputs": [],
539 |    "source": [
540 |     "X_poly_val = (poly_features(Xval, p) - mu) / sigma\n",
541 |     "X_poly_val = np.hstack([np.ones((X_poly_val.shape[0], 1)), X_poly_val])\n",
542 |     "X_poly_val.shape"
543 |    ]
544 |   },
545 |   {
546 |    "cell_type": "markdown",
547 |    "metadata": {},
548 |    "source": [
549 |     "## Part 7: Learning Curve for Polynomial Regression \n",
550 |     "  Now, you will get to experiment with polynomial regression with multiple\n",
551 |     "  values of lambda. The code below runs polynomial regression with \n",
552 |     "  lambda = 0. You should try running the code with different values of\n",
553 |     " lambda to see how the fit and learning curve change.\n"
554 |    ]
555 |   },
556 |   {
557 |    "cell_type": "code",
558 |    "execution_count": null,
559 |    "metadata": {
560 |     "collapsed": true
561 |    },
562 |    "outputs": [],
563 |    "source": [
564 |     "def plot_fit(ax, min_x, max_x, mu, sigma, theta, p):\n",
565 |     "    x = np.linspace(min_x - 15, max_x + 15)\n",
566 |     "    X_poly = (poly_features(x, p) - mu) / sigma\n",
567 |     "    X_poly = np.c_[np.ones(len(x)), X_poly]\n",
568 |     "    ax.plot(x, X_poly.dot(theta))"
569 |    ]
570 |   },
571 |   {
572 |    "cell_type": "markdown",
573 |    "metadata": {},
574 |    "source": [
575 |     "Plot training data and fit:"
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "code",
580 |    "execution_count": null,
581 |    "metadata": {
582 |     "collapsed": false,
583 |     "scrolled": true
584 |    },
585 |    "outputs": [],
586 |    "source": [
587 |     "lambda_ = 0\n",
588 |     "res = trainLinearRegression(X_poly, y, lambda_)\n",
589 |     "print(res)\n",
590 |     "theta = res.x\n",
591 |     "\n",
592 |     "fig, ax = plt.subplots(figsize=(5,5))\n",
593 |     "ax.scatter(X, y, marker='x', c='r')\n",
594 |     "ax.set_xlabel('Change in water level')\n",
595 |     "ax.set_ylabel('Water flowing out of the dam')\n",
596 |     "plot_fit(ax, np.min(X), np.max(X), mu, sigma, theta, p)"
597 |    ]
598 |   },
599 |   {
600 |    "cell_type": "markdown",
601 |    "metadata": {},
602 |    "source": [
603 |     "Polynomial Regression Learning Curve:"
604 |    ]
605 |   },
606 |   {
607 |    "cell_type": "code",
608 |    "execution_count": null,
609 |    "metadata": {
610 |     "collapsed": false,
611 |     "scrolled": false
612 |    },
613 |    "outputs": [],
614 |    "source": [
615 |     "error_train, error_val = learning_curve(X_poly, y, X_poly_val, yval, lambda_)\n",
616 |     "plt.plot(range(m-1), error_train)\n",
617 |     "plt.plot(range(m-1), error_val)\n",
618 |     "plt.legend(['Train', 'Cross Validation'])"
619 |    ]
620 |   },
621 |   {
622 |    "cell_type": "markdown",
623 |    "metadata": {},
624 |    "source": [
625 |     "##  Part 8: Validation for Selecting Lambda \n",
626 |     "  You will now implement validation_curve to test various values of \n",
627 |     "  lambda on a validation set. You will then use this to select the\n",
628 |     "  \"best\" lambda value.\n"
629 |    ]
630 |   },
631 |   {
632 |    "cell_type": "code",
633 |    "execution_count": null,
634 |    "metadata": {
635 |     "collapsed": true
636 |    },
637 |    "outputs": [],
638 |    "source": [
639 |     "def validation_curve(X, y, Xval, yval):\n",
640 |     "    #VALIDATIONCURVE Generate the train and validation errors needed to\n",
641 |     "    #plot a validation curve that we can use to select lambda\n",
642 |     "    #   [lambda_vec, error_train, error_val] = ...\n",
643 |     "    #       VALIDATIONCURVE(X, y, Xval, yval) returns the train\n",
644 |     "    #       and validation errors (in error_train, error_val)\n",
645 |     "    #       for different values of lambda. You are given the training set (X,\n",
646 |     "    #       y) and validation set (Xval, yval).\n",
647 |     "    #\n",
648 |     "\n",
649 |     "    # Selected values of lambda (you should not change this)\n",
650 |     "    lambda_vec = [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]\n",
651 |     "    \n",
652 |     "    # You need to return these variables correctly.\n",
653 |     "    error_train = np.zeros(len(lambda_vec))\n",
654 |     "    error_val = np.zeros(len(lambda_vec))\n",
655 |     "    \n",
656 |     "    \n",
657 |     "    # ====================== YOUR CODE HERE ======================\n",
658 |     "    # Instructions: Fill in this function to return training errors in \n",
659 |     "    #               error_train and the validation errors in error_val. The \n",
660 |     "    #               vector lambda_vec contains the different lambda parameters \n",
661 |     "    #               to use for each calculation of the errors, i.e, \n",
662 |     "    #               error_train(i), and error_val(i) should give \n",
663 |     "    #               you the errors obtained after training with \n",
664 |     "    #               lambda = lambda_vec(i)\n",
665 |     "    #\n",
666 |     "    # Note: You can loop over lambda_vec with the following:\n",
667 |     "    #\n",
668 |     "    #       for i in range(len(lambda_vec)):\n",
669 |     "    #           lambda = lambda_vec[i];\n",
670 |     "    #           # Compute train / val errors when training linear \n",
671 |     "    #           # regression with regularization parameter lambda\n",
672 |     "    #           # You should store the result in error_train[i]\n",
673 |     "    #           # and error_val[i]\n",
674 |     "    #           ....\n",
675 |     "    #           \n",
676 |     "    #       end\n",
677 |     "    #\n",
678 |     "    #\n",
679 |     "    \n",
680 |     "    \n",
681 |     "    \n",
682 |     "    \n",
683 |     "    \n",
684 |     "    # =========================================================================\n",
685 |     "    \n",
686 |     "    return lambda_vec, error_train, error_val"
687 |    ]
688 |   },
689 |   {
690 |    "cell_type": "code",
691 |    "execution_count": null,
692 |    "metadata": {
693 |     "collapsed": false,
694 |     "scrolled": false
695 |    },
696 |    "outputs": [],
697 |    "source": [
698 |     "lambda_vec, error_train, error_val = validation_curve(X_poly, y, X_poly_val, yval)\n",
699 |     "plt.plot(lambda_vec, error_train, lambda_vec, error_val)\n",
700 |     "plt.ylim([0, 20])\n",
701 |     "plt.legend(['Training error', 'Cross validation error'])"
702 |    ]
703 |   },
704 |   {
705 |    "cell_type": "code",
706 |    "execution_count": null,
707 |    "metadata": {
708 |     "collapsed": false
709 |    },
710 |    "outputs": [],
711 |    "source": [
712 |     "lambda_ = 3\n",
713 |     "res = trainLinearRegression(X_poly, y, lambda_)\n",
714 |     "theta = res.x\n",
715 |     "\n",
716 |     "fig, ax = plt.subplots(figsize=(5,5))\n",
717 |     "ax.scatter(X, y, marker='x', c='r')\n",
718 |     "ax.set_xlabel('Change in water level')\n",
719 |     "ax.set_ylabel('Water flowing out of the dam')\n",
720 |     "plot_fit(ax, np.min(X), np.max(X), mu, sigma, theta, p)"
721 |    ]
722 |   }
723 |  ],
724 |  "metadata": {
725 |   "kernelspec": {
726 |    "display_name": "Python 3",
727 |    "language": "python",
728 |    "name": "python3"
729 |   },
730 |   "language_info": {
731 |    "codemirror_mode": {
732 |     "name": "ipython",
733 |     "version": 3
734 |    },
735 |    "file_extension": ".py",
736 |    "mimetype": "text/x-python",
737 |    "name": "python",
738 |    "nbconvert_exporter": "python",
739 |    "pygments_lexer": "ipython3",
740 |    "version": "3.4.3"
741 |   }
742 |  },
743 |  "nbformat": 4,
744 |  "nbformat_minor": 0
745 | }
746 | 


--------------------------------------------------------------------------------
/ex6/ex6data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex6/ex6data1.mat


--------------------------------------------------------------------------------
/ex6/ex6data2.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex6/ex6data2.mat


--------------------------------------------------------------------------------
/ex6/ex6data3.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex6/ex6data3.mat


--------------------------------------------------------------------------------
/ex6/ml-ex6.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#  Exercise 6 | Support Vector Machines"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "import scipy.io\n",
 21 |     "import sklearn.svm"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "%matplotlib inline"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "## Part 1: Loading and Visualizing Data\n",
 40 |     "  We start the exercise by first loading and visualizing the dataset. \n",
 41 |     "  The following code will load the dataset into your environment and plot\n",
 42 |     "  the data.\n",
 43 |     "\n"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {
 50 |     "collapsed": false
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "ex6data1 = scipy.io.loadmat('ex6data1.mat')\n",
 55 |     "X = ex6data1['X']\n",
 56 |     "y = ex6data1['y'][:, 0]"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {
 63 |     "collapsed": false,
 64 |     "scrolled": true
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "def plot_data(X, y, ax=None):\n",
 69 |     "    if ax == None:\n",
 70 |     "        fig, ax = plt.subplots(figsize=(7,5))\n",
 71 |     "    pos = y==1\n",
 72 |     "    neg = y==0\n",
 73 |     "    ax.scatter(X[pos,0], X[pos,1], marker='+', color='b')\n",
 74 |     "    ax.scatter(X[neg,0], X[neg,1], marker='o', color='r', s=5)\n",
 75 |     "plot_data(X, y)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "## Part 2: Training Linear SVM \n",
 83 |     "  The following code will train a linear SVM on the dataset and plot the\n",
 84 |     "  decision boundary learned.\n",
 85 |     "  \n",
 86 |     "   You should try to change the C value below and see how the decision\n",
 87 |     " boundary varies (e.g., try C = 1000)\n",
 88 |     "\n"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "collapsed": false
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "svm = sklearn.svm.SVC(C=1, kernel='linear')\n",
100 |     "svm.fit(X, y)\n",
101 |     "np.mean(svm.predict(X) == y)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "svm.coef_"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {
119 |     "collapsed": false,
120 |     "scrolled": false
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "fig, ax = plt.subplots(figsize=(7,5))\n",
125 |     "\n",
126 |     "def draw_contour(X, model):\n",
127 |     "    x1 = np.linspace(np.min(X[:,0]), np.max(X[:,0]), 200)\n",
128 |     "    x2 = np.linspace(np.min(X[:,1]), np.max(X[:,1]), 200)\n",
129 |     "\n",
130 |     "    xx1, xx2 = np.meshgrid(x1, x2)\n",
131 |     "    yy = model.predict(np.c_[xx1.flat, xx2.flat]).reshape(xx1.shape)\n",
132 |     "    ax.contour(x1, x2, yy, levels=[0.5])\n",
133 |     "    \n",
134 |     "plot_data(X, y, ax)\n",
135 |     "draw_contour(X, svm)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "## Part 3: Implementing Gaussian Kernel \n",
143 |     "  You will now implement the Gaussian kernel to use\n",
144 |     "  with the SVM. You should complete the code in gaussianKernel. This notebook will not use it, however. An sklearn custom kernel should return a matrix of all kernel values. Feel free to implement gaussianKernel in the sklearn way, and later call svm.SVC(kernel=gaussianKernel).\n"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "def gaussianKernel(x1, x2, sigma):\n",
156 |     "    # ====================== YOUR CODE HERE ======================\n",
157 |     "    # Instructions: Fill in this function to return the similarity between x1\n",
158 |     "    #               and x2 computed using a Gaussian kernel with bandwidth\n",
159 |     "    #               sigma\n",
160 |     "    #\n",
161 |     "    #\n",
162 |     "    \n",
163 |     "    return 0\n",
164 |     "    \n",
165 |     "    # =============================================================\n",
166 |     "    "
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "The Gaussian Kernel between `x1 = [1; 2; 1]`, `x2 = [0; 4; -1]`, `sigma = 2` should be about `0.324652`."
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {
180 |     "collapsed": false,
181 |     "scrolled": true
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "gaussianKernel(x1=np.array([1, 2,  1]), x2=np.array([0, 4, -1]), sigma=2)"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "## Part 4: Visualizing Dataset 2 \n",
193 |     "  The following code will load the next dataset into your environment and \n",
194 |     "  plot the data. \n"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {
201 |     "collapsed": false
202 |    },
203 |    "outputs": [],
204 |    "source": [
205 |     "ex6data2 = scipy.io.loadmat('ex6data2.mat')\n",
206 |     "X = ex6data2['X']\n",
207 |     "y = ex6data2['y'][:,0]\n",
208 |     "print(X.shape, y.shape)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {
215 |     "collapsed": false,
216 |     "scrolled": true
217 |    },
218 |    "outputs": [],
219 |    "source": [
220 |     "plot_data(X, y)"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "## Part 5: Training SVM with RBF Kernel (Dataset 2) \n",
228 |     "  After you have implemented the kernel, we can now use it to train the \n",
229 |     "  SVM classier.\n",
230 |     "  \n",
231 |     "  Note that this doesn't do this, it simply uses the built-in gaussian kernel in sklearn.\n",
232 |     " "
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "metadata": {
239 |     "collapsed": false
240 |    },
241 |    "outputs": [],
242 |    "source": [
243 |     "model = sklearn.svm.SVC(C=1, gamma=100, kernel='rbf')\n",
244 |     "model.fit(X, y)\n",
245 |     "np.mean((model.predict(X) == y))"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "metadata": {
252 |     "collapsed": false,
253 |     "scrolled": true
254 |    },
255 |    "outputs": [],
256 |    "source": [
257 |     "fig, ax = plt.subplots()\n",
258 |     "plot_data(X, y, ax)\n",
259 |     "draw_contour(X, model)"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {},
265 |    "source": [
266 |     "## Part 6: Visualizing Dataset 3 \n",
267 |     "  The following code will load the next dataset into your environment and \n",
268 |     "  plot the data. \n"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {
275 |     "collapsed": false
276 |    },
277 |    "outputs": [],
278 |    "source": [
279 |     "ex6data3 = scipy.io.loadmat('ex6data3.mat')\n",
280 |     "X = ex6data3['X']\n",
281 |     "y = ex6data3['y'][:, 0]\n",
282 |     "Xval = ex6data3['Xval']\n",
283 |     "yval = ex6data3['yval'][:, 0]\n",
284 |     "\n",
285 |     "print(X.shape, y.shape, Xval.shape, yval.shape)"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {
292 |     "collapsed": false
293 |    },
294 |    "outputs": [],
295 |    "source": [
296 |     "plot_data(X, y)"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": null,
302 |    "metadata": {
303 |     "collapsed": false,
304 |     "scrolled": true
305 |    },
306 |    "outputs": [],
307 |    "source": [
308 |     "plot_data(Xval, yval)"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "metadata": {},
314 |    "source": [
315 |     "## Part 7: Training SVM with RBF Kernel (Dataset 3) \n",
316 |     "\n",
317 |     "  This is a different dataset that you can use to experiment with. Try\n",
318 |     "  different values of C and sigma here, train a classifier on your training data, measure the cross validation error and find the values for C and sigma that minimize the cross validation error.\n",
319 |     " \n"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": null,
325 |    "metadata": {
326 |     "collapsed": false
327 |    },
328 |    "outputs": [],
329 |    "source": [
330 |     "import itertools\n",
331 |     "\n",
332 |     "possible_C = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000]\n",
333 |     "possible_gamma = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000]\n",
334 |     "cv_errors = np.zeros((len(possible_C), len(possible_gamma)))\n",
335 |     "\n",
336 |     "# YOUR CODE GOES HERE\n",
337 |     "\n",
338 |     "C = 7\n",
339 |     "gamma = 7\n",
340 |     "\n",
341 |     "\n",
342 |     "# =================="
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": null,
348 |    "metadata": {
349 |     "collapsed": false
350 |    },
351 |    "outputs": [],
352 |    "source": [
353 |     "model = sklearn.svm.SVC(C=C, gamma=gamma, kernel='rbf')\n",
354 |     "model.fit(X, y)\n",
355 |     "fig, ax = plt.subplots()\n",
356 |     "plot_data(X, y, ax)\n",
357 |     "draw_contour(X, model)"
358 |    ]
359 |   }
360 |  ],
361 |  "metadata": {
362 |   "kernelspec": {
363 |    "display_name": "Python 3",
364 |    "language": "python",
365 |    "name": "python3"
366 |   },
367 |   "language_info": {
368 |    "codemirror_mode": {
369 |     "name": "ipython",
370 |     "version": 3
371 |    },
372 |    "file_extension": ".py",
373 |    "mimetype": "text/x-python",
374 |    "name": "python",
375 |    "nbconvert_exporter": "python",
376 |    "pygments_lexer": "ipython3",
377 |    "version": "3.4.3"
378 |   }
379 |  },
380 |  "nbformat": 4,
381 |  "nbformat_minor": 0
382 | }
383 | 


--------------------------------------------------------------------------------
/ex6/spamSample1.txt:
--------------------------------------------------------------------------------
 1 | Do You Want To Make $1000 Or More Per Week?
 2 | 
 3 |  
 4 | 
 5 | If you are a motivated and qualified individual - I 
 6 | will personally demonstrate to you a system that will 
 7 | make you $1,000 per week or more! This is NOT mlm.
 8 | 
 9 |  
10 | 
11 | Call our 24 hour pre-recorded number to get the 
12 | details.  
13 | 
14 |  
15 | 
16 | 000-456-789
17 | 
18 |  
19 | 
20 | I need people who want to make serious money.  Make 
21 | the call and get the facts. 
22 | 
23 | Invest 2 minutes in yourself now!
24 | 
25 |  
26 | 
27 | 000-456-789
28 | 
29 |  
30 | 
31 | Looking forward to your call and I will introduce you 
32 | to people like yourself who
33 | are currently making $10,000 plus per week!
34 | 
35 |  
36 | 
37 | 000-456-789
38 | 
39 | 
40 | 
41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72
42 | 
43 | 


--------------------------------------------------------------------------------
/ex6/spamSample2.txt:
--------------------------------------------------------------------------------
1 | Best Buy Viagra Generic Online
2 | 
3 | Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed!
4 | 
5 | We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers!
6 | http://medphysitcstech.ru
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/ex6/spamTest.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex6/spamTest.mat


--------------------------------------------------------------------------------
/ex6/spamTrain.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex6/spamTrain.mat


--------------------------------------------------------------------------------
/ex6/vocab.txt:
--------------------------------------------------------------------------------
   1 | 1	aa
   2 | 2	ab
   3 | 3	abil
   4 | 4	abl
   5 | 5	about
   6 | 6	abov
   7 | 7	absolut
   8 | 8	abus
   9 | 9	ac
  10 | 10	accept
  11 | 11	access
  12 | 12	accord
  13 | 13	account
  14 | 14	achiev
  15 | 15	acquir
  16 | 16	across
  17 | 17	act
  18 | 18	action
  19 | 19	activ
  20 | 20	actual
  21 | 21	ad
  22 | 22	adam
  23 | 23	add
  24 | 24	addit
  25 | 25	address
  26 | 26	administr
  27 | 27	adult
  28 | 28	advanc
  29 | 29	advantag
  30 | 30	advertis
  31 | 31	advic
  32 | 32	advis
  33 | 33	ae
  34 | 34	af
  35 | 35	affect
  36 | 36	affili
  37 | 37	afford
  38 | 38	africa
  39 | 39	after
  40 | 40	ag
  41 | 41	again
  42 | 42	against
  43 | 43	agenc
  44 | 44	agent
  45 | 45	ago
  46 | 46	agre
  47 | 47	agreement
  48 | 48	aid
  49 | 49	air
  50 | 50	al
  51 | 51	alb
  52 | 52	align
  53 | 53	all
  54 | 54	allow
  55 | 55	almost
  56 | 56	alon
  57 | 57	along
  58 | 58	alreadi
  59 | 59	alsa
  60 | 60	also
  61 | 61	altern
  62 | 62	although
  63 | 63	alwai
  64 | 64	am
  65 | 65	amaz
  66 | 66	america
  67 | 67	american
  68 | 68	among
  69 | 69	amount
  70 | 70	amp
  71 | 71	an
  72 | 72	analysi
  73 | 73	analyst
  74 | 74	and
  75 | 75	ani
  76 | 76	anim
  77 | 77	announc
  78 | 78	annual
  79 | 79	annuiti
  80 | 80	anoth
  81 | 81	answer
  82 | 82	anti
  83 | 83	anumb
  84 | 84	anybodi
  85 | 85	anymor
  86 | 86	anyon
  87 | 87	anyth
  88 | 88	anywai
  89 | 89	anywher
  90 | 90	aol
  91 | 91	ap
  92 | 92	apolog
  93 | 93	app
  94 | 94	appar
  95 | 95	appear
  96 | 96	appl
  97 | 97	appli
  98 | 98	applic
  99 | 99	appreci
 100 | 100	approach
 101 | 101	approv
 102 | 102	apt
 103 | 103	ar
 104 | 104	archiv
 105 | 105	area
 106 | 106	aren
 107 | 107	argument
 108 | 108	arial
 109 | 109	arm
 110 | 110	around
 111 | 111	arrai
 112 | 112	arriv
 113 | 113	art
 114 | 114	articl
 115 | 115	artist
 116 | 116	as
 117 | 117	ascii
 118 | 118	ask
 119 | 119	asset
 120 | 120	assist
 121 | 121	associ
 122 | 122	assum
 123 | 123	assur
 124 | 124	at
 125 | 125	atol
 126 | 126	attach
 127 | 127	attack
 128 | 128	attempt
 129 | 129	attent
 130 | 130	attornei
 131 | 131	attract
 132 | 132	audio
 133 | 133	aug
 134 | 134	august
 135 | 135	author
 136 | 136	auto
 137 | 137	autom
 138 | 138	automat
 139 | 139	avail
 140 | 140	averag
 141 | 141	avoid
 142 | 142	awai
 143 | 143	awar
 144 | 144	award
 145 | 145	ba
 146 | 146	babi
 147 | 147	back
 148 | 148	background
 149 | 149	backup
 150 | 150	bad
 151 | 151	balanc
 152 | 152	ban
 153 | 153	bank
 154 | 154	bar
 155 | 155	base
 156 | 156	basenumb
 157 | 157	basi
 158 | 158	basic
 159 | 159	bb
 160 | 160	bc
 161 | 161	bd
 162 | 162	be
 163 | 163	beat
 164 | 164	beberg
 165 | 165	becaus
 166 | 166	becom
 167 | 167	been
 168 | 168	befor
 169 | 169	begin
 170 | 170	behalf
 171 | 171	behavior
 172 | 172	behind
 173 | 173	believ
 174 | 174	below
 175 | 175	benefit
 176 | 176	best
 177 | 177	beta
 178 | 178	better
 179 | 179	between
 180 | 180	bf
 181 | 181	big
 182 | 182	bill
 183 | 183	billion
 184 | 184	bin
 185 | 185	binari
 186 | 186	bit
 187 | 187	black
 188 | 188	blank
 189 | 189	block
 190 | 190	blog
 191 | 191	blood
 192 | 192	blue
 193 | 193	bnumber
 194 | 194	board
 195 | 195	bodi
 196 | 196	boi
 197 | 197	bonu
 198 | 198	book
 199 | 199	boot
 200 | 200	border
 201 | 201	boss
 202 | 202	boston
 203 | 203	botan
 204 | 204	both
 205 | 205	bottl
 206 | 206	bottom
 207 | 207	boundari
 208 | 208	box
 209 | 209	brain
 210 | 210	brand
 211 | 211	break
 212 | 212	brian
 213 | 213	bring
 214 | 214	broadcast
 215 | 215	broker
 216 | 216	browser
 217 | 217	bug
 218 | 218	bui
 219 | 219	build
 220 | 220	built
 221 | 221	bulk
 222 | 222	burn
 223 | 223	bush
 224 | 224	busi
 225 | 225	but
 226 | 226	button
 227 | 227	by
 228 | 228	byte
 229 | 229	ca
 230 | 230	cabl
 231 | 231	cach
 232 | 232	calcul
 233 | 233	california
 234 | 234	call
 235 | 235	came
 236 | 236	camera
 237 | 237	campaign
 238 | 238	can
 239 | 239	canada
 240 | 240	cannot
 241 | 241	canon
 242 | 242	capabl
 243 | 243	capillari
 244 | 244	capit
 245 | 245	car
 246 | 246	card
 247 | 247	care
 248 | 248	career
 249 | 249	carri
 250 | 250	cartridg
 251 | 251	case
 252 | 252	cash
 253 | 253	cat
 254 | 254	catch
 255 | 255	categori
 256 | 256	caus
 257 | 257	cb
 258 | 258	cc
 259 | 259	cd
 260 | 260	ce
 261 | 261	cell
 262 | 262	cent
 263 | 263	center
 264 | 264	central
 265 | 265	centuri
 266 | 266	ceo
 267 | 267	certain
 268 | 268	certainli
 269 | 269	cf
 270 | 270	challeng
 271 | 271	chanc
 272 | 272	chang
 273 | 273	channel
 274 | 274	char
 275 | 275	charact
 276 | 276	charg
 277 | 277	charset
 278 | 278	chat
 279 | 279	cheap
 280 | 280	check
 281 | 281	cheer
 282 | 282	chief
 283 | 283	children
 284 | 284	china
 285 | 285	chip
 286 | 286	choic
 287 | 287	choos
 288 | 288	chri
 289 | 289	citi
 290 | 290	citizen
 291 | 291	civil
 292 | 292	claim
 293 | 293	class
 294 | 294	classifi
 295 | 295	clean
 296 | 296	clear
 297 | 297	clearli
 298 | 298	click
 299 | 299	client
 300 | 300	close
 301 | 301	clue
 302 | 302	cnet
 303 | 303	cnumber
 304 | 304	co
 305 | 305	code
 306 | 306	collect
 307 | 307	colleg
 308 | 308	color
 309 | 309	com
 310 | 310	combin
 311 | 311	come
 312 | 312	comfort
 313 | 313	command
 314 | 314	comment
 315 | 315	commentari
 316 | 316	commerci
 317 | 317	commiss
 318 | 318	commit
 319 | 319	common
 320 | 320	commun
 321 | 321	compani
 322 | 322	compar
 323 | 323	comparison
 324 | 324	compat
 325 | 325	compet
 326 | 326	competit
 327 | 327	compil
 328 | 328	complet
 329 | 329	comprehens
 330 | 330	comput
 331 | 331	concentr
 332 | 332	concept
 333 | 333	concern
 334 | 334	condit
 335 | 335	conf
 336 | 336	confer
 337 | 337	confid
 338 | 338	confidenti
 339 | 339	config
 340 | 340	configur
 341 | 341	confirm
 342 | 342	conflict
 343 | 343	confus
 344 | 344	congress
 345 | 345	connect
 346 | 346	consid
 347 | 347	consolid
 348 | 348	constitut
 349 | 349	construct
 350 | 350	consult
 351 | 351	consum
 352 | 352	contact
 353 | 353	contain
 354 | 354	content
 355 | 355	continu
 356 | 356	contract
 357 | 357	contribut
 358 | 358	control
 359 | 359	conveni
 360 | 360	convers
 361 | 361	convert
 362 | 362	cool
 363 | 363	cooper
 364 | 364	copi
 365 | 365	copyright
 366 | 366	core
 367 | 367	corpor
 368 | 368	correct
 369 | 369	correspond
 370 | 370	cost
 371 | 371	could
 372 | 372	couldn
 373 | 373	count
 374 | 374	countri
 375 | 375	coupl
 376 | 376	cours
 377 | 377	court
 378 | 378	cover
 379 | 379	coverag
 380 | 380	crash
 381 | 381	creat
 382 | 382	creativ
 383 | 383	credit
 384 | 384	critic
 385 | 385	cross
 386 | 386	cultur
 387 | 387	current
 388 | 388	custom
 389 | 389	cut
 390 | 390	cv
 391 | 391	da
 392 | 392	dagga
 393 | 393	dai
 394 | 394	daili
 395 | 395	dan
 396 | 396	danger
 397 | 397	dark
 398 | 398	data
 399 | 399	databas
 400 | 400	datapow
 401 | 401	date
 402 | 402	dave
 403 | 403	david
 404 | 404	dc
 405 | 405	de
 406 | 406	dead
 407 | 407	deal
 408 | 408	dear
 409 | 409	death
 410 | 410	debt
 411 | 411	decad
 412 | 412	decid
 413 | 413	decis
 414 | 414	declar
 415 | 415	declin
 416 | 416	decor
 417 | 417	default
 418 | 418	defend
 419 | 419	defens
 420 | 420	defin
 421 | 421	definit
 422 | 422	degre
 423 | 423	delai
 424 | 424	delet
 425 | 425	deliv
 426 | 426	deliveri
 427 | 427	dell
 428 | 428	demand
 429 | 429	democrat
 430 | 430	depart
 431 | 431	depend
 432 | 432	deposit
 433 | 433	describ
 434 | 434	descript
 435 | 435	deserv
 436 | 436	design
 437 | 437	desir
 438 | 438	desktop
 439 | 439	despit
 440 | 440	detail
 441 | 441	detect
 442 | 442	determin
 443 | 443	dev
 444 | 444	devel
 445 | 445	develop
 446 | 446	devic
 447 | 447	di
 448 | 448	dial
 449 | 449	did
 450 | 450	didn
 451 | 451	diet
 452 | 452	differ
 453 | 453	difficult
 454 | 454	digit
 455 | 455	direct
 456 | 456	directli
 457 | 457	director
 458 | 458	directori
 459 | 459	disabl
 460 | 460	discount
 461 | 461	discov
 462 | 462	discoveri
 463 | 463	discuss
 464 | 464	disk
 465 | 465	displai
 466 | 466	disposit
 467 | 467	distanc
 468 | 468	distribut
 469 | 469	dn
 470 | 470	dnumber
 471 | 471	do
 472 | 472	doc
 473 | 473	document
 474 | 474	doe
 475 | 475	doer
 476 | 476	doesn
 477 | 477	dollar
 478 | 478	dollarac
 479 | 479	dollarnumb
 480 | 480	domain
 481 | 481	don
 482 | 482	done
 483 | 483	dont
 484 | 484	doubl
 485 | 485	doubt
 486 | 486	down
 487 | 487	download
 488 | 488	dr
 489 | 489	draw
 490 | 490	dream
 491 | 491	drive
 492 | 492	driver
 493 | 493	drop
 494 | 494	drug
 495 | 495	due
 496 | 496	dure
 497 | 497	dvd
 498 | 498	dw
 499 | 499	dynam
 500 | 500	ea
 501 | 501	each
 502 | 502	earli
 503 | 503	earlier
 504 | 504	earn
 505 | 505	earth
 506 | 506	easi
 507 | 507	easier
 508 | 508	easili
 509 | 509	eat
 510 | 510	eb
 511 | 511	ebai
 512 | 512	ec
 513 | 513	echo
 514 | 514	econom
 515 | 515	economi
 516 | 516	ed
 517 | 517	edg
 518 | 518	edit
 519 | 519	editor
 520 | 520	educ
 521 | 521	eff
 522 | 522	effect
 523 | 523	effici
 524 | 524	effort
 525 | 525	either
 526 | 526	el
 527 | 527	electron
 528 | 528	elimin
 529 | 529	els
 530 | 530	email
 531 | 531	emailaddr
 532 | 532	emerg
 533 | 533	empir
 534 | 534	employ
 535 | 535	employe
 536 | 536	en
 537 | 537	enabl
 538 | 538	encod
 539 | 539	encourag
 540 | 540	end
 541 | 541	enemi
 542 | 542	enenkio
 543 | 543	energi
 544 | 544	engin
 545 | 545	english
 546 | 546	enhanc
 547 | 547	enjoi
 548 | 548	enough
 549 | 549	ensur
 550 | 550	enter
 551 | 551	enterpris
 552 | 552	entertain
 553 | 553	entir
 554 | 554	entri
 555 | 555	enumb
 556 | 556	environ
 557 | 557	equal
 558 | 558	equip
 559 | 559	equival
 560 | 560	error
 561 | 561	especi
 562 | 562	essenti
 563 | 563	establish
 564 | 564	estat
 565 | 565	estim
 566 | 566	et
 567 | 567	etc
 568 | 568	euro
 569 | 569	europ
 570 | 570	european
 571 | 571	even
 572 | 572	event
 573 | 573	eventu
 574 | 574	ever
 575 | 575	everi
 576 | 576	everyon
 577 | 577	everyth
 578 | 578	evid
 579 | 579	evil
 580 | 580	exactli
 581 | 581	exampl
 582 | 582	excel
 583 | 583	except
 584 | 584	exchang
 585 | 585	excit
 586 | 586	exclus
 587 | 587	execut
 588 | 588	exercis
 589 | 589	exist
 590 | 590	exmh
 591 | 591	expand
 592 | 592	expect
 593 | 593	expens
 594 | 594	experi
 595 | 595	expert
 596 | 596	expir
 597 | 597	explain
 598 | 598	explor
 599 | 599	express
 600 | 600	extend
 601 | 601	extens
 602 | 602	extra
 603 | 603	extract
 604 | 604	extrem
 605 | 605	ey
 606 | 606	fa
 607 | 607	face
 608 | 608	fact
 609 | 609	factor
 610 | 610	fail
 611 | 611	fair
 612 | 612	fall
 613 | 613	fals
 614 | 614	famili
 615 | 615	faq
 616 | 616	far
 617 | 617	fast
 618 | 618	faster
 619 | 619	fastest
 620 | 620	fat
 621 | 621	father
 622 | 622	favorit
 623 | 623	fax
 624 | 624	fb
 625 | 625	fd
 626 | 626	featur
 627 | 627	feder
 628 | 628	fee
 629 | 629	feed
 630 | 630	feedback
 631 | 631	feel
 632 | 632	femal
 633 | 633	few
 634 | 634	ffffff
 635 | 635	ffnumber
 636 | 636	field
 637 | 637	fight
 638 | 638	figur
 639 | 639	file
 640 | 640	fill
 641 | 641	film
 642 | 642	filter
 643 | 643	final
 644 | 644	financ
 645 | 645	financi
 646 | 646	find
 647 | 647	fine
 648 | 648	finish
 649 | 649	fire
 650 | 650	firewal
 651 | 651	firm
 652 | 652	first
 653 | 653	fit
 654 | 654	five
 655 | 655	fix
 656 | 656	flag
 657 | 657	flash
 658 | 658	flow
 659 | 659	fnumber
 660 | 660	focu
 661 | 661	folder
 662 | 662	folk
 663 | 663	follow
 664 | 664	font
 665 | 665	food
 666 | 666	for
 667 | 667	forc
 668 | 668	foreign
 669 | 669	forev
 670 | 670	forget
 671 | 671	fork
 672 | 672	form
 673 | 673	format
 674 | 674	former
 675 | 675	fortun
 676 | 676	forward
 677 | 677	found
 678 | 678	foundat
 679 | 679	four
 680 | 680	franc
 681 | 681	free
 682 | 682	freedom
 683 | 683	french
 684 | 684	freshrpm
 685 | 685	fri
 686 | 686	fridai
 687 | 687	friend
 688 | 688	from
 689 | 689	front
 690 | 690	ftoc
 691 | 691	ftp
 692 | 692	full
 693 | 693	fulli
 694 | 694	fun
 695 | 695	function
 696 | 696	fund
 697 | 697	further
 698 | 698	futur
 699 | 699	ga
 700 | 700	gain
 701 | 701	game
 702 | 702	gari
 703 | 703	garrigu
 704 | 704	gave
 705 | 705	gcc
 706 | 706	geek
 707 | 707	gener
 708 | 708	get
 709 | 709	gif
 710 | 710	gift
 711 | 711	girl
 712 | 712	give
 713 | 713	given
 714 | 714	global
 715 | 715	gnome
 716 | 716	gnu
 717 | 717	gnupg
 718 | 718	go
 719 | 719	goal
 720 | 720	god
 721 | 721	goe
 722 | 722	gold
 723 | 723	gone
 724 | 724	good
 725 | 725	googl
 726 | 726	got
 727 | 727	govern
 728 | 728	gpl
 729 | 729	grand
 730 | 730	grant
 731 | 731	graphic
 732 | 732	great
 733 | 733	greater
 734 | 734	ground
 735 | 735	group
 736 | 736	grow
 737 | 737	growth
 738 | 738	gt
 739 | 739	guarante
 740 | 740	guess
 741 | 741	gui
 742 | 742	guid
 743 | 743	ha
 744 | 744	hack
 745 | 745	had
 746 | 746	half
 747 | 747	ham
 748 | 748	hand
 749 | 749	handl
 750 | 750	happen
 751 | 751	happi
 752 | 752	hard
 753 | 753	hardwar
 754 | 754	hat
 755 | 755	hate
 756 | 756	have
 757 | 757	haven
 758 | 758	he
 759 | 759	head
 760 | 760	header
 761 | 761	headlin
 762 | 762	health
 763 | 763	hear
 764 | 764	heard
 765 | 765	heart
 766 | 766	heaven
 767 | 767	hei
 768 | 768	height
 769 | 769	held
 770 | 770	hello
 771 | 771	help
 772 | 772	helvetica
 773 | 773	her
 774 | 774	herba
 775 | 775	here
 776 | 776	hermio
 777 | 777	hettinga
 778 | 778	hi
 779 | 779	high
 780 | 780	higher
 781 | 781	highli
 782 | 782	highlight
 783 | 783	him
 784 | 784	histori
 785 | 785	hit
 786 | 786	hold
 787 | 787	home
 788 | 788	honor
 789 | 789	hope
 790 | 790	host
 791 | 791	hot
 792 | 792	hour
 793 | 793	hous
 794 | 794	how
 795 | 795	howev
 796 | 796	hp
 797 | 797	html
 798 | 798	http
 799 | 799	httpaddr
 800 | 800	huge
 801 | 801	human
 802 | 802	hundr
 803 | 803	ibm
 804 | 804	id
 805 | 805	idea
 806 | 806	ident
 807 | 807	identifi
 808 | 808	idnumb
 809 | 809	ie
 810 | 810	if
 811 | 811	ignor
 812 | 812	ii
 813 | 813	iii
 814 | 814	iiiiiiihnumberjnumberhnumberjnumberhnumb
 815 | 815	illeg
 816 | 816	im
 817 | 817	imag
 818 | 818	imagin
 819 | 819	immedi
 820 | 820	impact
 821 | 821	implement
 822 | 822	import
 823 | 823	impress
 824 | 824	improv
 825 | 825	in
 826 | 826	inc
 827 | 827	includ
 828 | 828	incom
 829 | 829	increas
 830 | 830	incred
 831 | 831	inde
 832 | 832	independ
 833 | 833	index
 834 | 834	india
 835 | 835	indian
 836 | 836	indic
 837 | 837	individu
 838 | 838	industri
 839 | 839	info
 840 | 840	inform
 841 | 841	initi
 842 | 842	inlin
 843 | 843	innov
 844 | 844	input
 845 | 845	insert
 846 | 846	insid
 847 | 847	instal
 848 | 848	instanc
 849 | 849	instant
 850 | 850	instead
 851 | 851	institut
 852 | 852	instruct
 853 | 853	insur
 854 | 854	int
 855 | 855	integr
 856 | 856	intel
 857 | 857	intellig
 858 | 858	intend
 859 | 859	interact
 860 | 860	interest
 861 | 861	interfac
 862 | 862	intern
 863 | 863	internet
 864 | 864	interview
 865 | 865	into
 866 | 866	intro
 867 | 867	introduc
 868 | 868	inumb
 869 | 869	invest
 870 | 870	investig
 871 | 871	investor
 872 | 872	invok
 873 | 873	involv
 874 | 874	ip
 875 | 875	ireland
 876 | 876	irish
 877 | 877	is
 878 | 878	island
 879 | 879	isn
 880 | 880	iso
 881 | 881	isp
 882 | 882	issu
 883 | 883	it
 884 | 884	item
 885 | 885	itself
 886 | 886	jabber
 887 | 887	jame
 888 | 888	java
 889 | 889	jim
 890 | 890	jnumberiiiiiiihepihepihf
 891 | 891	job
 892 | 892	joe
 893 | 893	john
 894 | 894	join
 895 | 895	journal
 896 | 896	judg
 897 | 897	judgment
 898 | 898	jul
 899 | 899	juli
 900 | 900	jump
 901 | 901	june
 902 | 902	just
 903 | 903	justin
 904 | 904	keep
 905 | 905	kei
 906 | 906	kept
 907 | 907	kernel
 908 | 908	kevin
 909 | 909	keyboard
 910 | 910	kid
 911 | 911	kill
 912 | 912	kind
 913 | 913	king
 914 | 914	kingdom
 915 | 915	knew
 916 | 916	know
 917 | 917	knowledg
 918 | 918	known
 919 | 919	la
 920 | 920	lack
 921 | 921	land
 922 | 922	languag
 923 | 923	laptop
 924 | 924	larg
 925 | 925	larger
 926 | 926	largest
 927 | 927	laser
 928 | 928	last
 929 | 929	late
 930 | 930	later
 931 | 931	latest
 932 | 932	launch
 933 | 933	law
 934 | 934	lawrenc
 935 | 935	le
 936 | 936	lead
 937 | 937	leader
 938 | 938	learn
 939 | 939	least
 940 | 940	leav
 941 | 941	left
 942 | 942	legal
 943 | 943	lender
 944 | 944	length
 945 | 945	less
 946 | 946	lesson
 947 | 947	let
 948 | 948	letter
 949 | 949	level
 950 | 950	lib
 951 | 951	librari
 952 | 952	licens
 953 | 953	life
 954 | 954	lifetim
 955 | 955	light
 956 | 956	like
 957 | 957	limit
 958 | 958	line
 959 | 959	link
 960 | 960	linux
 961 | 961	list
 962 | 962	listen
 963 | 963	littl
 964 | 964	live
 965 | 965	ll
 966 | 966	lo
 967 | 967	load
 968 | 968	loan
 969 | 969	local
 970 | 970	locat
 971 | 971	lock
 972 | 972	lockergnom
 973 | 973	log
 974 | 974	long
 975 | 975	longer
 976 | 976	look
 977 | 977	lose
 978 | 978	loss
 979 | 979	lost
 980 | 980	lot
 981 | 981	love
 982 | 982	low
 983 | 983	lower
 984 | 984	lowest
 985 | 985	lt
 986 | 986	ma
 987 | 987	mac
 988 | 988	machin
 989 | 989	made
 990 | 990	magazin
 991 | 991	mai
 992 | 992	mail
 993 | 993	mailer
 994 | 994	main
 995 | 995	maintain
 996 | 996	major
 997 | 997	make
 998 | 998	maker
 999 | 999	male
1000 | 1000	man
1001 | 1001	manag
1002 | 1002	mani
1003 | 1003	manual
1004 | 1004	manufactur
1005 | 1005	map
1006 | 1006	march
1007 | 1007	margin
1008 | 1008	mark
1009 | 1009	market
1010 | 1010	marshal
1011 | 1011	mass
1012 | 1012	master
1013 | 1013	match
1014 | 1014	materi
1015 | 1015	matter
1016 | 1016	matthia
1017 | 1017	mayb
1018 | 1018	me
1019 | 1019	mean
1020 | 1020	measur
1021 | 1021	mechan
1022 | 1022	media
1023 | 1023	medic
1024 | 1024	meet
1025 | 1025	member
1026 | 1026	membership
1027 | 1027	memori
1028 | 1028	men
1029 | 1029	mention
1030 | 1030	menu
1031 | 1031	merchant
1032 | 1032	messag
1033 | 1033	method
1034 | 1034	mh
1035 | 1035	michael
1036 | 1036	microsoft
1037 | 1037	middl
1038 | 1038	might
1039 | 1039	mike
1040 | 1040	mile
1041 | 1041	militari
1042 | 1042	million
1043 | 1043	mime
1044 | 1044	mind
1045 | 1045	mine
1046 | 1046	mini
1047 | 1047	minimum
1048 | 1048	minut
1049 | 1049	miss
1050 | 1050	mistak
1051 | 1051	mobil
1052 | 1052	mode
1053 | 1053	model
1054 | 1054	modem
1055 | 1055	modifi
1056 | 1056	modul
1057 | 1057	moment
1058 | 1058	mon
1059 | 1059	mondai
1060 | 1060	monei
1061 | 1061	monitor
1062 | 1062	month
1063 | 1063	monthli
1064 | 1064	more
1065 | 1065	morn
1066 | 1066	mortgag
1067 | 1067	most
1068 | 1068	mostli
1069 | 1069	mother
1070 | 1070	motiv
1071 | 1071	move
1072 | 1072	movi
1073 | 1073	mpnumber
1074 | 1074	mr
1075 | 1075	ms
1076 | 1076	msg
1077 | 1077	much
1078 | 1078	multi
1079 | 1079	multipart
1080 | 1080	multipl
1081 | 1081	murphi
1082 | 1082	music
1083 | 1083	must
1084 | 1084	my
1085 | 1085	myself
1086 | 1086	name
1087 | 1087	nation
1088 | 1088	natur
1089 | 1089	nbsp
1090 | 1090	near
1091 | 1091	nearli
1092 | 1092	necessari
1093 | 1093	need
1094 | 1094	neg
1095 | 1095	net
1096 | 1096	netscap
1097 | 1097	network
1098 | 1098	never
1099 | 1099	new
1100 | 1100	newslett
1101 | 1101	next
1102 | 1102	nextpart
1103 | 1103	nice
1104 | 1104	nigeria
1105 | 1105	night
1106 | 1106	no
1107 | 1107	nobodi
1108 | 1108	non
1109 | 1109	none
1110 | 1110	nor
1111 | 1111	normal
1112 | 1112	north
1113 | 1113	not
1114 | 1114	note
1115 | 1115	noth
1116 | 1116	notic
1117 | 1117	now
1118 | 1118	nt
1119 | 1119	null
1120 | 1120	number
1121 | 1121	numbera
1122 | 1122	numberam
1123 | 1123	numberanumb
1124 | 1124	numberb
1125 | 1125	numberbit
1126 | 1126	numberc
1127 | 1127	numbercb
1128 | 1128	numbercbr
1129 | 1129	numbercfont
1130 | 1130	numbercli
1131 | 1131	numbercnumb
1132 | 1132	numbercp
1133 | 1133	numberctd
1134 | 1134	numberd
1135 | 1135	numberdari
1136 | 1136	numberdnumb
1137 | 1137	numberenumb
1138 | 1138	numberf
1139 | 1139	numberfb
1140 | 1140	numberff
1141 | 1141	numberffont
1142 | 1142	numberfp
1143 | 1143	numberftd
1144 | 1144	numberk
1145 | 1145	numberm
1146 | 1146	numbermb
1147 | 1147	numberp
1148 | 1148	numberpd
1149 | 1149	numberpm
1150 | 1150	numberpx
1151 | 1151	numberst
1152 | 1152	numberth
1153 | 1153	numbertnumb
1154 | 1154	numberx
1155 | 1155	object
1156 | 1156	oblig
1157 | 1157	obtain
1158 | 1158	obvious
1159 | 1159	occur
1160 | 1160	oct
1161 | 1161	octob
1162 | 1162	of
1163 | 1163	off
1164 | 1164	offer
1165 | 1165	offic
1166 | 1166	offici
1167 | 1167	often
1168 | 1168	oh
1169 | 1169	ok
1170 | 1170	old
1171 | 1171	on
1172 | 1172	onc
1173 | 1173	onli
1174 | 1174	onlin
1175 | 1175	open
1176 | 1176	oper
1177 | 1177	opinion
1178 | 1178	opportun
1179 | 1179	opt
1180 | 1180	optim
1181 | 1181	option
1182 | 1182	or
1183 | 1183	order
1184 | 1184	org
1185 | 1185	organ
1186 | 1186	origin
1187 | 1187	os
1188 | 1188	osdn
1189 | 1189	other
1190 | 1190	otherwis
1191 | 1191	our
1192 | 1192	out
1193 | 1193	outlook
1194 | 1194	output
1195 | 1195	outsid
1196 | 1196	over
1197 | 1197	own
1198 | 1198	owner
1199 | 1199	oz
1200 | 1200	pacif
1201 | 1201	pack
1202 | 1202	packag
1203 | 1203	page
1204 | 1204	pai
1205 | 1205	paid
1206 | 1206	pain
1207 | 1207	palm
1208 | 1208	panel
1209 | 1209	paper
1210 | 1210	paragraph
1211 | 1211	parent
1212 | 1212	part
1213 | 1213	parti
1214 | 1214	particip
1215 | 1215	particular
1216 | 1216	particularli
1217 | 1217	partit
1218 | 1218	partner
1219 | 1219	pass
1220 | 1220	password
1221 | 1221	past
1222 | 1222	patch
1223 | 1223	patent
1224 | 1224	path
1225 | 1225	pattern
1226 | 1226	paul
1227 | 1227	payment
1228 | 1228	pc
1229 | 1229	peac
1230 | 1230	peopl
1231 | 1231	per
1232 | 1232	percent
1233 | 1233	percentag
1234 | 1234	perfect
1235 | 1235	perfectli
1236 | 1236	perform
1237 | 1237	perhap
1238 | 1238	period
1239 | 1239	perl
1240 | 1240	perman
1241 | 1241	permiss
1242 | 1242	person
1243 | 1243	pgp
1244 | 1244	phone
1245 | 1245	photo
1246 | 1246	php
1247 | 1247	phrase
1248 | 1248	physic
1249 | 1249	pick
1250 | 1250	pictur
1251 | 1251	piec
1252 | 1252	piiiiiiii
1253 | 1253	pipe
1254 | 1254	pjnumber
1255 | 1255	place
1256 | 1256	plai
1257 | 1257	plain
1258 | 1258	plan
1259 | 1259	planet
1260 | 1260	plant
1261 | 1261	planta
1262 | 1262	platform
1263 | 1263	player
1264 | 1264	pleas
1265 | 1265	plu
1266 | 1266	plug
1267 | 1267	pm
1268 | 1268	pocket
1269 | 1269	point
1270 | 1270	polic
1271 | 1271	polici
1272 | 1272	polit
1273 | 1273	poor
1274 | 1274	pop
1275 | 1275	popul
1276 | 1276	popular
1277 | 1277	port
1278 | 1278	posit
1279 | 1279	possibl
1280 | 1280	post
1281 | 1281	potenti
1282 | 1282	pound
1283 | 1283	powel
1284 | 1284	power
1285 | 1285	powershot
1286 | 1286	practic
1287 | 1287	pre
1288 | 1288	predict
1289 | 1289	prefer
1290 | 1290	premium
1291 | 1291	prepar
1292 | 1292	present
1293 | 1293	presid
1294 | 1294	press
1295 | 1295	pretti
1296 | 1296	prevent
1297 | 1297	previou
1298 | 1298	previous
1299 | 1299	price
1300 | 1300	principl
1301 | 1301	print
1302 | 1302	printabl
1303 | 1303	printer
1304 | 1304	privaci
1305 | 1305	privat
1306 | 1306	prize
1307 | 1307	pro
1308 | 1308	probabl
1309 | 1309	problem
1310 | 1310	procedur
1311 | 1311	process
1312 | 1312	processor
1313 | 1313	procmail
1314 | 1314	produc
1315 | 1315	product
1316 | 1316	profession
1317 | 1317	profil
1318 | 1318	profit
1319 | 1319	program
1320 | 1320	programm
1321 | 1321	progress
1322 | 1322	project
1323 | 1323	promis
1324 | 1324	promot
1325 | 1325	prompt
1326 | 1326	properti
1327 | 1327	propos
1328 | 1328	proprietari
1329 | 1329	prospect
1330 | 1330	protect
1331 | 1331	protocol
1332 | 1332	prove
1333 | 1333	proven
1334 | 1334	provid
1335 | 1335	proxi
1336 | 1336	pub
1337 | 1337	public
1338 | 1338	publish
1339 | 1339	pudg
1340 | 1340	pull
1341 | 1341	purchas
1342 | 1342	purpos
1343 | 1343	put
1344 | 1344	python
1345 | 1345	qnumber
1346 | 1346	qualifi
1347 | 1347	qualiti
1348 | 1348	quarter
1349 | 1349	question
1350 | 1350	quick
1351 | 1351	quickli
1352 | 1352	quit
1353 | 1353	quot
1354 | 1354	radio
1355 | 1355	ragga
1356 | 1356	rais
1357 | 1357	random
1358 | 1358	rang
1359 | 1359	rate
1360 | 1360	rather
1361 | 1361	ratio
1362 | 1362	razor
1363 | 1363	razornumb
1364 | 1364	re
1365 | 1365	reach
1366 | 1366	read
1367 | 1367	reader
1368 | 1368	readi
1369 | 1369	real
1370 | 1370	realiz
1371 | 1371	realli
1372 | 1372	reason
1373 | 1373	receiv
1374 | 1374	recent
1375 | 1375	recipi
1376 | 1376	recommend
1377 | 1377	record
1378 | 1378	red
1379 | 1379	redhat
1380 | 1380	reduc
1381 | 1381	refer
1382 | 1382	refin
1383 | 1383	reg
1384 | 1384	regard
1385 | 1385	region
1386 | 1386	regist
1387 | 1387	regul
1388 | 1388	regular
1389 | 1389	rel
1390 | 1390	relat
1391 | 1391	relationship
1392 | 1392	releas
1393 | 1393	relev
1394 | 1394	reliabl
1395 | 1395	remain
1396 | 1396	rememb
1397 | 1397	remot
1398 | 1398	remov
1399 | 1399	replac
1400 | 1400	repli
1401 | 1401	report
1402 | 1402	repositori
1403 | 1403	repres
1404 | 1404	republ
1405 | 1405	request
1406 | 1406	requir
1407 | 1407	research
1408 | 1408	reserv
1409 | 1409	resid
1410 | 1410	resourc
1411 | 1411	respect
1412 | 1412	respond
1413 | 1413	respons
1414 | 1414	rest
1415 | 1415	result
1416 | 1416	retail
1417 | 1417	return
1418 | 1418	reveal
1419 | 1419	revenu
1420 | 1420	revers
1421 | 1421	review
1422 | 1422	revok
1423 | 1423	rh
1424 | 1424	rich
1425 | 1425	right
1426 | 1426	risk
1427 | 1427	road
1428 | 1428	robert
1429 | 1429	rock
1430 | 1430	role
1431 | 1431	roll
1432 | 1432	rom
1433 | 1433	roman
1434 | 1434	room
1435 | 1435	root
1436 | 1436	round
1437 | 1437	rpm
1438 | 1438	rss
1439 | 1439	rule
1440 | 1440	run
1441 | 1441	sa
1442 | 1442	safe
1443 | 1443	sai
1444 | 1444	said
1445 | 1445	sale
1446 | 1446	same
1447 | 1447	sampl
1448 | 1448	san
1449 | 1449	saou
1450 | 1450	sat
1451 | 1451	satellit
1452 | 1452	save
1453 | 1453	saw
1454 | 1454	scan
1455 | 1455	schedul
1456 | 1456	school
1457 | 1457	scienc
1458 | 1458	score
1459 | 1459	screen
1460 | 1460	script
1461 | 1461	se
1462 | 1462	search
1463 | 1463	season
1464 | 1464	second
1465 | 1465	secret
1466 | 1466	section
1467 | 1467	secur
1468 | 1468	see
1469 | 1469	seed
1470 | 1470	seek
1471 | 1471	seem
1472 | 1472	seen
1473 | 1473	select
1474 | 1474	self
1475 | 1475	sell
1476 | 1476	seminar
1477 | 1477	send
1478 | 1478	sender
1479 | 1479	sendmail
1480 | 1480	senior
1481 | 1481	sens
1482 | 1482	sensit
1483 | 1483	sent
1484 | 1484	sep
1485 | 1485	separ
1486 | 1486	septemb
1487 | 1487	sequenc
1488 | 1488	seri
1489 | 1489	serif
1490 | 1490	seriou
1491 | 1491	serv
1492 | 1492	server
1493 | 1493	servic
1494 | 1494	set
1495 | 1495	setup
1496 | 1496	seven
1497 | 1497	seventh
1498 | 1498	sever
1499 | 1499	sex
1500 | 1500	sexual
1501 | 1501	sf
1502 | 1502	shape
1503 | 1503	share
1504 | 1504	she
1505 | 1505	shell
1506 | 1506	ship
1507 | 1507	shop
1508 | 1508	short
1509 | 1509	shot
1510 | 1510	should
1511 | 1511	show
1512 | 1512	side
1513 | 1513	sign
1514 | 1514	signatur
1515 | 1515	signific
1516 | 1516	similar
1517 | 1517	simpl
1518 | 1518	simpli
1519 | 1519	sinc
1520 | 1520	sincer
1521 | 1521	singl
1522 | 1522	sit
1523 | 1523	site
1524 | 1524	situat
1525 | 1525	six
1526 | 1526	size
1527 | 1527	skeptic
1528 | 1528	skill
1529 | 1529	skin
1530 | 1530	skip
1531 | 1531	sleep
1532 | 1532	slow
1533 | 1533	small
1534 | 1534	smart
1535 | 1535	smoke
1536 | 1536	smtp
1537 | 1537	snumber
1538 | 1538	so
1539 | 1539	social
1540 | 1540	societi
1541 | 1541	softwar
1542 | 1542	sold
1543 | 1543	solut
1544 | 1544	solv
1545 | 1545	some
1546 | 1546	someon
1547 | 1547	someth
1548 | 1548	sometim
1549 | 1549	son
1550 | 1550	song
1551 | 1551	soni
1552 | 1552	soon
1553 | 1553	sorri
1554 | 1554	sort
1555 | 1555	sound
1556 | 1556	sourc
1557 | 1557	south
1558 | 1558	space
1559 | 1559	spain
1560 | 1560	spam
1561 | 1561	spamassassin
1562 | 1562	spamd
1563 | 1563	spammer
1564 | 1564	speak
1565 | 1565	spec
1566 | 1566	special
1567 | 1567	specif
1568 | 1568	specifi
1569 | 1569	speech
1570 | 1570	speed
1571 | 1571	spend
1572 | 1572	sponsor
1573 | 1573	sport
1574 | 1574	spot
1575 | 1575	src
1576 | 1576	ssh
1577 | 1577	st
1578 | 1578	stabl
1579 | 1579	staff
1580 | 1580	stai
1581 | 1581	stand
1582 | 1582	standard
1583 | 1583	star
1584 | 1584	start
1585 | 1585	state
1586 | 1586	statement
1587 | 1587	statu
1588 | 1588	step
1589 | 1589	steve
1590 | 1590	still
1591 | 1591	stock
1592 | 1592	stop
1593 | 1593	storag
1594 | 1594	store
1595 | 1595	stori
1596 | 1596	strategi
1597 | 1597	stream
1598 | 1598	street
1599 | 1599	string
1600 | 1600	strip
1601 | 1601	strong
1602 | 1602	structur
1603 | 1603	studi
1604 | 1604	stuff
1605 | 1605	stupid
1606 | 1606	style
1607 | 1607	subject
1608 | 1608	submit
1609 | 1609	subscrib
1610 | 1610	subscript
1611 | 1611	substanti
1612 | 1612	success
1613 | 1613	such
1614 | 1614	suffer
1615 | 1615	suggest
1616 | 1616	suit
1617 | 1617	sum
1618 | 1618	summari
1619 | 1619	summer
1620 | 1620	sun
1621 | 1621	super
1622 | 1622	suppli
1623 | 1623	support
1624 | 1624	suppos
1625 | 1625	sure
1626 | 1626	surpris
1627 | 1627	suse
1628 | 1628	suspect
1629 | 1629	sweet
1630 | 1630	switch
1631 | 1631	system
1632 | 1632	tab
1633 | 1633	tabl
1634 | 1634	tablet
1635 | 1635	tag
1636 | 1636	take
1637 | 1637	taken
1638 | 1638	talk
1639 | 1639	tape
1640 | 1640	target
1641 | 1641	task
1642 | 1642	tax
1643 | 1643	teach
1644 | 1644	team
1645 | 1645	tech
1646 | 1646	technic
1647 | 1647	techniqu
1648 | 1648	technolog
1649 | 1649	tel
1650 | 1650	telecom
1651 | 1651	telephon
1652 | 1652	tell
1653 | 1653	temperatur
1654 | 1654	templ
1655 | 1655	ten
1656 | 1656	term
1657 | 1657	termin
1658 | 1658	terror
1659 | 1659	terrorist
1660 | 1660	test
1661 | 1661	texa
1662 | 1662	text
1663 | 1663	than
1664 | 1664	thank
1665 | 1665	that
1666 | 1666	the
1667 | 1667	thei
1668 | 1668	their
1669 | 1669	them
1670 | 1670	themselv
1671 | 1671	then
1672 | 1672	theori
1673 | 1673	there
1674 | 1674	therefor
1675 | 1675	these
1676 | 1676	thi
1677 | 1677	thing
1678 | 1678	think
1679 | 1679	thinkgeek
1680 | 1680	third
1681 | 1681	those
1682 | 1682	though
1683 | 1683	thought
1684 | 1684	thousand
1685 | 1685	thread
1686 | 1686	threat
1687 | 1687	three
1688 | 1688	through
1689 | 1689	thu
1690 | 1690	thursdai
1691 | 1691	ti
1692 | 1692	ticket
1693 | 1693	tim
1694 | 1694	time
1695 | 1695	tip
1696 | 1696	tire
1697 | 1697	titl
1698 | 1698	tm
1699 | 1699	to
1700 | 1700	todai
1701 | 1701	togeth
1702 | 1702	token
1703 | 1703	told
1704 | 1704	toll
1705 | 1705	tom
1706 | 1706	toner
1707 | 1707	toni
1708 | 1708	too
1709 | 1709	took
1710 | 1710	tool
1711 | 1711	top
1712 | 1712	topic
1713 | 1713	total
1714 | 1714	touch
1715 | 1715	toward
1716 | 1716	track
1717 | 1717	trade
1718 | 1718	tradit
1719 | 1719	traffic
1720 | 1720	train
1721 | 1721	transact
1722 | 1722	transfer
1723 | 1723	travel
1724 | 1724	treat
1725 | 1725	tree
1726 | 1726	tri
1727 | 1727	trial
1728 | 1728	trick
1729 | 1729	trip
1730 | 1730	troubl
1731 | 1731	true
1732 | 1732	truli
1733 | 1733	trust
1734 | 1734	truth
1735 | 1735	try
1736 | 1736	tue
1737 | 1737	tuesdai
1738 | 1738	turn
1739 | 1739	tv
1740 | 1740	two
1741 | 1741	type
1742 | 1742	uk
1743 | 1743	ultim
1744 | 1744	un
1745 | 1745	under
1746 | 1746	understand
1747 | 1747	unfortun
1748 | 1748	uniqu
1749 | 1749	unison
1750 | 1750	unit
1751 | 1751	univers
1752 | 1752	unix
1753 | 1753	unless
1754 | 1754	unlik
1755 | 1755	unlimit
1756 | 1756	unseen
1757 | 1757	unsolicit
1758 | 1758	unsubscrib
1759 | 1759	until
1760 | 1760	up
1761 | 1761	updat
1762 | 1762	upgrad
1763 | 1763	upon
1764 | 1764	urgent
1765 | 1765	url
1766 | 1766	us
1767 | 1767	usa
1768 | 1768	usag
1769 | 1769	usb
1770 | 1770	usd
1771 | 1771	usdollarnumb
1772 | 1772	useless
1773 | 1773	user
1774 | 1774	usr
1775 | 1775	usual
1776 | 1776	util
1777 | 1777	vacat
1778 | 1778	valid
1779 | 1779	valu
1780 | 1780	valuabl
1781 | 1781	var
1782 | 1782	variabl
1783 | 1783	varieti
1784 | 1784	variou
1785 | 1785	ve
1786 | 1786	vendor
1787 | 1787	ventur
1788 | 1788	veri
1789 | 1789	verifi
1790 | 1790	version
1791 | 1791	via
1792 | 1792	video
1793 | 1793	view
1794 | 1794	virtual
1795 | 1795	visa
1796 | 1796	visit
1797 | 1797	visual
1798 | 1798	vnumber
1799 | 1799	voic
1800 | 1800	vote
1801 | 1801	vs
1802 | 1802	vulner
1803 | 1803	wa
1804 | 1804	wai
1805 | 1805	wait
1806 | 1806	wake
1807 | 1807	walk
1808 | 1808	wall
1809 | 1809	want
1810 | 1810	war
1811 | 1811	warm
1812 | 1812	warn
1813 | 1813	warranti
1814 | 1814	washington
1815 | 1815	wasn
1816 | 1816	wast
1817 | 1817	watch
1818 | 1818	water
1819 | 1819	we
1820 | 1820	wealth
1821 | 1821	weapon
1822 | 1822	web
1823 | 1823	weblog
1824 | 1824	websit
1825 | 1825	wed
1826 | 1826	wednesdai
1827 | 1827	week
1828 | 1828	weekli
1829 | 1829	weight
1830 | 1830	welcom
1831 | 1831	well
1832 | 1832	went
1833 | 1833	were
1834 | 1834	west
1835 | 1835	what
1836 | 1836	whatev
1837 | 1837	when
1838 | 1838	where
1839 | 1839	whether
1840 | 1840	which
1841 | 1841	while
1842 | 1842	white
1843 | 1843	whitelist
1844 | 1844	who
1845 | 1845	whole
1846 | 1846	whose
1847 | 1847	why
1848 | 1848	wi
1849 | 1849	wide
1850 | 1850	width
1851 | 1851	wife
1852 | 1852	will
1853 | 1853	william
1854 | 1854	win
1855 | 1855	window
1856 | 1856	wing
1857 | 1857	winner
1858 | 1858	wireless
1859 | 1859	wish
1860 | 1860	with
1861 | 1861	within
1862 | 1862	without
1863 | 1863	wnumberp
1864 | 1864	woman
1865 | 1865	women
1866 | 1866	won
1867 | 1867	wonder
1868 | 1868	word
1869 | 1869	work
1870 | 1870	worker
1871 | 1871	world
1872 | 1872	worldwid
1873 | 1873	worri
1874 | 1874	worst
1875 | 1875	worth
1876 | 1876	would
1877 | 1877	wouldn
1878 | 1878	write
1879 | 1879	written
1880 | 1880	wrong
1881 | 1881	wrote
1882 | 1882	www
1883 | 1883	ximian
1884 | 1884	xml
1885 | 1885	xp
1886 | 1886	yahoo
1887 | 1887	ye
1888 | 1888	yeah
1889 | 1889	year
1890 | 1890	yesterdai
1891 | 1891	yet
1892 | 1892	york
1893 | 1893	you
1894 | 1894	young
1895 | 1895	your
1896 | 1896	yourself
1897 | 1897	zdnet
1898 | 1898	zero
1899 | 1899	zip
1900 | 


--------------------------------------------------------------------------------
/ex7/bird_small.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex7/bird_small.mat


--------------------------------------------------------------------------------
/ex7/bird_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex7/bird_small.png


--------------------------------------------------------------------------------
/ex7/ex7data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex7/ex7data1.mat


--------------------------------------------------------------------------------
/ex7/ex7data2.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex7/ex7data2.mat


--------------------------------------------------------------------------------
/ex7/ex7faces.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex7/ex7faces.mat


--------------------------------------------------------------------------------
/ex7/ml-ex7-kmeans.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#  Exercise 7 | Principle Component Analysis and K-Means Clustering"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import random\n",
 19 |     "import numpy as np\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "import scipy.io\n",
 22 |     "from PIL import Image"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "%matplotlib inline"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "ex7data1 = scipy.io.loadmat('ex7data2.mat')\n",
 45 |     "X = ex7data1['X']"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "## Part 1: Find Closest Centroids \n",
 53 |     "  To help you implement K-Means, we have divided the learning algorithm \n",
 54 |     "  into two functions -- findClosestCentroids and computeCentroids. In this\n",
 55 |     "  part, you should complete the code in the findClosestCentroids function. \n"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "def find_closest_centroids(X, centroids):\n",
 67 |     "    #FINDCLOSESTCENTROIDS computes the centroid memberships for every example\n",
 68 |     "    #   idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids\n",
 69 |     "    #   in idx for a dataset X where each row is a single example. idx = m x 1 \n",
 70 |     "    #   vector of centroid assignments (i.e. each entry in range [1..K])\n",
 71 |     "    #\n",
 72 |     "    K = centroids.shape[0]\n",
 73 |     "    \n",
 74 |     "    # You need to return the following variables correctly.\n",
 75 |     "    idx = np.zeros(X.shape[0], dtype='int')\n",
 76 |     "\n",
 77 |     "    # ====================== YOUR CODE HERE ======================\n",
 78 |     "    # Instructions: Go over every example, find its closest centroid, and store\n",
 79 |     "    #               the index inside idx at the appropriate location.\n",
 80 |     "    #               Concretely, idx(i) should contain the index of the centroid\n",
 81 |     "    #               closest to example i. Hence, it should be a value in the \n",
 82 |     "    #               range 1..K\n",
 83 |     "    #\n",
 84 |     "    # Note: You can use a for-loop over the examples to compute this.\n",
 85 |     "    #\n",
 86 |     "    \n",
 87 |     "    \n",
 88 |     "    # =============================================================\n",
 89 |     "\n",
 90 |     "    return idx"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "The closest centroids to the first 3 examples should be `[0, 2, 1]` respectively."
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {
104 |     "collapsed": false
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "K = 3\n",
109 |     "initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])\n",
110 |     "idx = find_closest_centroids(X, initial_centroids)\n",
111 |     "idx[:3]"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "## Part 2: Compute Means \n",
119 |     "  After implementing the closest centroids function, you should now\n",
120 |     "  complete the compute_centroids function.\n"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {
127 |     "collapsed": false
128 |    },
129 |    "outputs": [],
130 |    "source": [
131 |     "def compute_centroids(X, idx, K):\n",
132 |     "    #COMPUTECENTROIDS returs the new centroids by computing the means of the \n",
133 |     "    #data points assigned to each centroid.\n",
134 |     "    #   centroids = COMPUTECENTROIDS(X, idx, K) returns the new centroids by \n",
135 |     "    #   computing the means of the data points assigned to each centroid. It is\n",
136 |     "    #   given a dataset X where each row is a single data point, a vector\n",
137 |     "    #   idx of centroid assignments (i.e. each entry in range [1..K]) for each\n",
138 |     "    #   example, and K, the number of centroids. You should return a matrix\n",
139 |     "    #   centroids, where each row of centroids is the mean of the data points\n",
140 |     "    #   assigned to it.\n",
141 |     "    #\n",
142 |     "    m, n = X.shape\n",
143 |     "    \n",
144 |     "    # You need to return the following variables correctly.\n",
145 |     "    centroids = np.zeros((K, n))  \n",
146 |     "    \n",
147 |     "    # ====================== YOUR CODE HERE ======================\n",
148 |     "    # Instructions: Go over every centroid and compute mean of all points that\n",
149 |     "    #               belong to it. Concretely, the row vector centroids(i, :)\n",
150 |     "    #               should contain the mean of the data points assigned to\n",
151 |     "    #               centroid i.\n",
152 |     "    #\n",
153 |     "    # Note: You can use a for-loop over the centroids to compute this.\n",
154 |     "    #\n",
155 |     "\n",
156 |     "\n",
157 |     "    \n",
158 |     "    \n",
159 |     "    # =============================================================\n",
160 |     "    \n",
161 |     "    return centroids"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "Centroids computed after initial finding of closest centroids:"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {
175 |     "collapsed": false
176 |    },
177 |    "outputs": [],
178 |    "source": [
179 |     "compute_centroids(X, idx, K)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "The centroids should be:\n",
187 |     "\n",
188 |     "`array([[ 2.42830111,  3.15792418],\n",
189 |     "       [ 5.81350331,  2.63365645],\n",
190 |     "       [ 7.11938687,  3.6166844 ]])`\n"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {
197 |     "collapsed": false
198 |    },
199 |    "outputs": [],
200 |    "source": [
201 |     "def plot_data_points(X, idx, K, ax):\n",
202 |     "    palette = plt.get_cmap('hsv', np.max(idx) + 2)\n",
203 |     "    colors = palette(idx)\n",
204 |     "    ax.scatter(X[:, 0], X[:, 1], c=colors)"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "collapsed": false
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "def plot_kmeans_progress(X, centroids, previous_centroids, idx, K, iteration_number, ax):\n",
216 |     "    plot_data_points(X, idx, K, ax)\n",
217 |     "    \n",
218 |     "    ax.scatter(centroids[:, 0], centroids[:, 1], c='black', marker='x', s=50, color='black', linewidths=4)\n",
219 |     "    if previous_centroids is not None:\n",
220 |     "        for c, pc in zip(centroids, previous_centroids):\n",
221 |     "            ax.plot([c[0], pc[0]], [c[1], pc[1]], 'b-')\n",
222 |     "    \n",
223 |     "    ax.set_title('Iteration {}'.format(iteration_number))"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "markdown",
228 |    "metadata": {},
229 |    "source": [
230 |     "## Part 3: K-Means Clustering \n",
231 |     "  After you have completed the two functions computeCentroids and\n",
232 |     "  findClosestCentroids, you have all the necessary pieces to run the\n",
233 |     "  kMeans algorithm. In this part, you will run the K-Means algorithm on\n",
234 |     "  the example dataset we have provided. \n"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": null,
240 |    "metadata": {
241 |     "collapsed": true
242 |    },
243 |    "outputs": [],
244 |    "source": [
245 |     "K = 3\n",
246 |     "initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])\n",
247 |     "max_iters = 10"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {
254 |     "collapsed": false
255 |    },
256 |    "outputs": [],
257 |    "source": [
258 |     " def run_kmeans(X, initial_centroids, max_iters, plot_progress=False):\n",
259 |     "    if plot_progress:\n",
260 |     "        fig, ax = plt.subplots(figsize=(6, 6))\n",
261 |     "    m, n = X.shape\n",
262 |     "    K = initial_centroids.shape[0]\n",
263 |     "    centroids = initial_centroids\n",
264 |     "    previous_centroids = None\n",
265 |     "\n",
266 |     "    for i in range(max_iters):\n",
267 |     "        idx = find_closest_centroids(X, centroids)\n",
268 |     "        if plot_progress:\n",
269 |     "            plot_kmeans_progress(X, centroids, previous_centroids, idx, K, i+1, ax)\n",
270 |     "        previous_centroids = centroids\n",
271 |     "        centroids = compute_centroids(X, idx, K)\n",
272 |     "\n",
273 |     "    return centroids, idx"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": null,
279 |    "metadata": {
280 |     "collapsed": false,
281 |     "scrolled": false
282 |    },
283 |    "outputs": [],
284 |    "source": [
285 |     "_, __ = run_kmeans(X, initial_centroids, 10, True)"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "markdown",
290 |    "metadata": {},
291 |    "source": [
292 |     "## Part 4: K-Means Clustering on Pixels \n",
293 |     "  In this exercise, you will use K-Means to compress an image. To do this,\n",
294 |     "  you will first run K-Means on the colors of the pixels in the image and\n",
295 |     "  then you will map each pixel on to it's closest centroid.\n"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": null,
301 |    "metadata": {
302 |     "collapsed": false
303 |    },
304 |    "outputs": [],
305 |    "source": [
306 |     "#  Load an image of a bird\n",
307 |     "im = Image.open('bird_small.png')\n",
308 |     "X = np.array(im)\n",
309 |     "X = X/255 # Divide by 255 so that all values are in the range"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "markdown",
314 |    "metadata": {},
315 |    "source": [
316 |     " Reshape the image into an Nx3 matrix where N = number of pixels.\n",
317 |     " Each row will contain the Red, Green and Blue pixel values\n",
318 |     " This gives us our dataset matrix X that we will use K-Means on."
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {
325 |     "collapsed": false
326 |    },
327 |    "outputs": [],
328 |    "source": [
329 |     "X = X.reshape((128*128, 3))\n",
330 |     "img_size = X.shape\n",
331 |     "img_size, X.dtype"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "markdown",
336 |    "metadata": {},
337 |    "source": [
338 |     "  You should now complete the code in kmeans_init_centroids."
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": null,
344 |    "metadata": {
345 |     "collapsed": true
346 |    },
347 |    "outputs": [],
348 |    "source": [
349 |     "def kmeans_init_centroids(X, K):\n",
350 |     "    #KMEANSINITCENTROIDS This function initializes K centroids that are to be \n",
351 |     "    #used in K-Means on the dataset X\n",
352 |     "    #   centroids = KMEANSINITCENTROIDS(X, K) returns K initial centroids to be\n",
353 |     "    #   used with the K-Means on the dataset X\n",
354 |     "    #\n",
355 |     "    \n",
356 |     "    centroids = np.zeros((K, X.shape[1]))\n",
357 |     "    \n",
358 |     "    # ====================== YOUR CODE HERE ======================\n",
359 |     "    # Instructions: You should set centroids to randomly chosen examples from\n",
360 |     "    #               the dataset X\n",
361 |     "    #\n",
362 |     "    \n",
363 |     "    \n",
364 |     "    \n",
365 |     "    \n",
366 |     "    \n",
367 |     "    # =============================================================\n",
368 |     "\n",
369 |     "    return centroids"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "markdown",
374 |    "metadata": {},
375 |    "source": [
376 |     " Run your K-Means algorithm on this data.\n",
377 |     " You should try different values of K and max_iters here"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": null,
383 |    "metadata": {
384 |     "collapsed": true
385 |    },
386 |    "outputs": [],
387 |    "source": [
388 |     "K = 16\n",
389 |     "max_iters = 10"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "markdown",
394 |    "metadata": {},
395 |    "source": [
396 |     " When using K-Means, it is important the initialize the centroids\n",
397 |     " randomly. \n",
398 |     " You should complete the code in kMeansInitCentroids before proceeding"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": null,
404 |    "metadata": {
405 |     "collapsed": false
406 |    },
407 |    "outputs": [],
408 |    "source": [
409 |     "initial_centroids = kmeans_init_centroids(X, K)"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "metadata": {
416 |     "collapsed": true
417 |    },
418 |    "outputs": [],
419 |    "source": [
420 |     "centroids, idx = run_kmeans(X, initial_centroids, max_iters)"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "markdown",
425 |    "metadata": {},
426 |    "source": [
427 |     "## Part 5: Image Compression \n",
428 |     "  In this part of the exercise, you will use the clusters of K-Means to\n",
429 |     "  compress an image. To do this, we first find the closest clusters for\n",
430 |     "  each example. \n"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "code",
435 |    "execution_count": null,
436 |    "metadata": {
437 |     "collapsed": false
438 |    },
439 |    "outputs": [],
440 |    "source": [
441 |     "idx = find_closest_centroids(X, centroids)"
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "markdown",
446 |    "metadata": {},
447 |    "source": [
448 |     " Essentially, now we have represented the image X as in terms of the\n",
449 |     " indices in idx. \n",
450 |     " \n",
451 |     "  We can now recover the image from the indices (idx) by mapping each pixel\n",
452 |     " (specified by it's index in idx) to the centroid value"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "code",
457 |    "execution_count": null,
458 |    "metadata": {
459 |     "collapsed": false,
460 |     "scrolled": true
461 |    },
462 |    "outputs": [],
463 |    "source": [
464 |     "X_recovered = centroids[idx,:]\n",
465 |     "X_recovered = X_recovered.reshape([128, 128, 3])\n",
466 |     "X_recovered *= 255\n",
467 |     "X_recovered = np.array(X_recovered, dtype='uint8')\n",
468 |     "X_recovered.shape"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "markdown",
473 |    "metadata": {},
474 |    "source": [
475 |     "Here are the centroid colors:"
476 |    ]
477 |   },
478 |   {
479 |    "cell_type": "code",
480 |    "execution_count": null,
481 |    "metadata": {
482 |     "collapsed": false,
483 |     "scrolled": false
484 |    },
485 |    "outputs": [],
486 |    "source": [
487 |     "fig, axes = plt.subplots(nrows=4, ncols=4)\n",
488 |     "axes = axes.flat\n",
489 |     "for centroid, ax in zip(centroids, axes):\n",
490 |     "    c = np.array(centroid)\n",
491 |     "    ax.set_axis_off()\n",
492 |     "    ax.scatter(1,1,c=c,s=1000)"
493 |    ]
494 |   },
495 |   {
496 |    "cell_type": "markdown",
497 |    "metadata": {},
498 |    "source": [
499 |     "And the images, original and compressed."
500 |    ]
501 |   },
502 |   {
503 |    "cell_type": "code",
504 |    "execution_count": null,
505 |    "metadata": {
506 |     "collapsed": false,
507 |     "scrolled": false
508 |    },
509 |    "outputs": [],
510 |    "source": [
511 |     "fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8,10))\n",
512 |     "axes[0].imshow(X_recovered)\n",
513 |     "axes[0].set_title('Compressed')\n",
514 |     "axes[1].imshow(np.array(Image.open('bird_small.png')))\n",
515 |     "axes[1].set_title('Original')\n",
516 |     "for ax in axes:\n",
517 |     "    ax.set_axis_off()"
518 |    ]
519 |   }
520 |  ],
521 |  "metadata": {
522 |   "kernelspec": {
523 |    "display_name": "Python 3",
524 |    "language": "python",
525 |    "name": "python3"
526 |   },
527 |   "language_info": {
528 |    "codemirror_mode": {
529 |     "name": "ipython",
530 |     "version": 3
531 |    },
532 |    "file_extension": ".py",
533 |    "mimetype": "text/x-python",
534 |    "name": "python",
535 |    "nbconvert_exporter": "python",
536 |    "pygments_lexer": "ipython3",
537 |    "version": "3.4.3"
538 |   }
539 |  },
540 |  "nbformat": 4,
541 |  "nbformat_minor": 0
542 | }
543 | 


--------------------------------------------------------------------------------
/ex8/ex8_movieParams.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex8/ex8_movieParams.mat


--------------------------------------------------------------------------------
/ex8/ex8_movies.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex8/ex8_movies.mat


--------------------------------------------------------------------------------
/ex8/ex8data1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex8/ex8data1.mat


--------------------------------------------------------------------------------
/ex8/ex8data2.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex8/ex8data2.mat


--------------------------------------------------------------------------------
/ex8/iterate.dat:
--------------------------------------------------------------------------------
 1 | RUNNING THE L-BFGS-B CODE
 2 | 
 3 | it    = iteration number
 4 | nf    = number of function evaluations
 5 | nseg  = number of segments explored during the Cauchy search
 6 | nact  = number of active bounds at the generalized Cauchy point
 7 | sub   = manner in which the subspace minimization terminated:
 8 |         con = converged, bnd = a bound was reached
 9 | itls  = number of iterations performed in the line search
10 | stepl = step length used
11 | tstep = norm of the displacement (total step)
12 | projg = norm of the projected gradient
13 | f     = function value
14 | 
15 |            * * *
16 | 
17 | Machine precision = 2.220D-16
18 |  N =        26260     M =           10
19 | 
20 |    it   nf  nseg  nact  sub  itls  stepl    tstep     projg        f
21 |     0    1     -     -   -     -     -        -     0.000D+00  0.000D+00
22 | 
23 | CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
24 | 
25 |  Total User time 0.000E+00 seconds.
26 | 
27 | 


--------------------------------------------------------------------------------
/ex8/ml-ex8-anomaly-detection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#  Exercise 8 | Anomaly Detection "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "import scipy.io\n",
 21 |     "from scipy.stats import multivariate_normal"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "%matplotlib inline\n",
 33 |     "#%qtconsole"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Part 1: Load Example Dataset  \n",
 41 |     "  We start this exercise by using a small dataset that is easy to\n",
 42 |     "  visualize.\n",
 43 |     "\n",
 44 |     "  Our example case consists of 2 network server statistics across\n",
 45 |     "  several machines: the latency and throughput of each machine.\n",
 46 |     "  This exercise will help us find possibly faulty (or very fast) machines.\n"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "collapsed": false
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "ex7data1 = scipy.io.loadmat('ex8data1.mat')\n",
 58 |     "X = ex7data1['X']\n",
 59 |     "Xval = ex7data1['Xval']\n",
 60 |     "yval = ex7data1['yval'][:,0]"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {
 67 |     "collapsed": false,
 68 |     "scrolled": true
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "def plot_data(X, ax):\n",
 73 |     "    ax.set_xlabel('Latency')\n",
 74 |     "    ax.set_ylabel('Throughput')\n",
 75 |     "    ax.plot(X[:,0], X[:,1], 'bx')\n",
 76 |     "    \n",
 77 |     "fig, ax = plt.subplots()\n",
 78 |     "plot_data(X, ax)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "collapsed": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "def multivariate_gaussian(X, mu, sigma2):\n",
 90 |     "    if len(sigma2) == 1:\n",
 91 |     "        sigma2 = np.diag(sigma2)\n",
 92 |     "    return multivariate_normal(mean=mu, cov=sigma2).pdf(X)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "## Part 2: Estimate the dataset statistics \n",
100 |     "  For this exercise, we assume a Gaussian distribution for the dataset.\n",
101 |     "\n",
102 |     "  We first estimate the parameters of our assumed Gaussian distribution, \n",
103 |     "  then compute the probabilities for each of the points and then visualize \n",
104 |     "  both the overall distribution and where each of the points falls in \n",
105 |     "  terms of that distribution.\n"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {
112 |     "collapsed": true
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "def estimate_gaussian(X):\n",
117 |     "    #ESTIMATEGAUSSIAN This function estimates the parameters of a \n",
118 |     "    #Gaussian distribution using the data in X\n",
119 |     "    #   [mu sigma2] = estimateGaussian(X), \n",
120 |     "    #   The input X is the dataset with each n-dimensional data point in one row\n",
121 |     "    #   The output is an n-dimensional vector mu, the mean of the data set\n",
122 |     "    #   and the variances sigma^2, an n x 1 vector\n",
123 |     "    # \n",
124 |     "    m, n = X.shape\n",
125 |     "    \n",
126 |     "    # You should return these values correctly\n",
127 |     "    mu = np.zeros(n)\n",
128 |     "    sigma2 = np.ones(n)\n",
129 |     "    \n",
130 |     "    # ====================== YOUR CODE HERE ======================\n",
131 |     "    # Instructions: Compute the mean of the data and the variances\n",
132 |     "    #               In particular, mu(i) should contain the mean of\n",
133 |     "    #               the data for the i-th feature and sigma2(i)\n",
134 |     "    #               should contain variance of the i-th feature.\n",
135 |     "    #\n",
136 |     "    \n",
137 |     "    \n",
138 |     "    \n",
139 |     "    \n",
140 |     "    # =============================================================\n",
141 |     "    \n",
142 |     "    return mu, sigma2"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {
149 |     "collapsed": false
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "mu, sigma2 = estimate_gaussian(X)\n",
154 |     "p = multivariate_gaussian(X, mu, sigma2)"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "Visualize the fit."
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {
168 |     "collapsed": false,
169 |     "scrolled": true
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "x1, x2 = np.meshgrid(np.linspace(0, 35), np.linspace(0, 35))\n",
174 |     "Z = multivariate_gaussian(np.c_[x1.reshape(-1), x2.reshape(-1)], mu, sigma2).reshape(x1.shape)\n",
175 |     "fig, ax = plt.subplots(figsize=(5,5))\n",
176 |     "plot_data(X, ax)\n",
177 |     "ax.contour(x1, x2, Z, levels=np.logspace(-20, 1, 7))"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "## Part 3: Find Outliers \n",
185 |     "  Now you will find a good epsilon threshold using a cross-validation set.\n",
186 |     "  probabilities given the estimated Gaussian distribution.\n"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "collapsed": true
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "def select_threshold(yval, pval):\n",
198 |     "    #SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting\n",
199 |     "    #outliers\n",
200 |     "    #   [bestEpsilon bestF1] = SELECTTHRESHOLD(yval, pval) finds the best\n",
201 |     "    #   threshold to use for selecting outliers based on the results from a\n",
202 |     "    #   validation set (pval) and the ground truth (yval).\n",
203 |     "    #\n",
204 |     "    best_epsilon = 0\n",
205 |     "    best_f1 = 0\n",
206 |     "    f1 = 0\n",
207 |     "    \n",
208 |     "    for epsilon in np.linspace(min(pval), max(pval), 1000):\n",
209 |     "        \n",
210 |     "        # ====================== YOUR CODE HERE ======================\n",
211 |     "        # Instructions: Compute the F1 score of choosing epsilon as the\n",
212 |     "        #               threshold and place the value in F1. The code at the\n",
213 |     "        #               end of the loop will compare the F1 score for this\n",
214 |     "        #               choice of epsilon and set it to be the best epsilon if\n",
215 |     "        #               it is better than the current choice of epsilon.\n",
216 |     "        #               \n",
217 |     "        # Note: You can use predictions = pval < epsilon to get a binary vector\n",
218 |     "        #       of 0's and 1's of the outlier predictions\n",
219 |     "        \n",
220 |     "\n",
221 |     "        \n",
222 |     "        # =============================================================\n",
223 |     "        \n",
224 |     "        if f1 > best_f1:\n",
225 |     "            best_epsilon = epsilon\n",
226 |     "            best_f1 = f1\n",
227 |     "    \n",
228 |     "    return best_epsilon, best_f1"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "Best epsilon and F1 found using cross-validation (F1 should be about 0.899e-5):"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {
242 |     "collapsed": false
243 |    },
244 |    "outputs": [],
245 |    "source": [
246 |     "pval = multivariate_gaussian(Xval, mu, sigma2)\n",
247 |     "epsilon, F1 = select_threshold(yval, pval)\n",
248 |     "print(epsilon, F1)"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {
255 |     "collapsed": false,
256 |     "scrolled": true
257 |    },
258 |    "outputs": [],
259 |    "source": [
260 |     "outliers = p < epsilon\n",
261 |     "fig, ax = plt.subplots(figsize=(5,5))\n",
262 |     "plot_data(X, ax)\n",
263 |     "ax.scatter(X[outliers, 0], X[outliers, 1], marker='o', facecolors='none', edgecolors='r', s=100)"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "metadata": {},
269 |    "source": [
270 |     "## Part 4: Multidimensional Outliers \n",
271 |     "  We will now use the code from the previous part and apply it to a \n",
272 |     "  harder problem in which more features describe each datapoint and only \n",
273 |     "  some features indicate whether a point is an outlier.\n"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": null,
279 |    "metadata": {
280 |     "collapsed": true
281 |    },
282 |    "outputs": [],
283 |    "source": [
284 |     "ex7data2 = scipy.io.loadmat('ex8data2.mat')\n",
285 |     "X = ex7data2['X']\n",
286 |     "Xval = ex7data2['Xval']\n",
287 |     "yval = ex7data2['yval'][:,0]"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": null,
293 |    "metadata": {
294 |     "collapsed": true
295 |    },
296 |    "outputs": [],
297 |    "source": [
298 |     "mu, sigma2 = estimate_gaussian(X)\n",
299 |     "p = multivariate_gaussian(X, mu, sigma2)\n",
300 |     "pval = multivariate_gaussian(Xval, mu, sigma2)\n",
301 |     "epsilon, F1 = select_threshold(yval, pval)"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "best epsilon found: (should be about 1.38e-18)"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": null,
314 |    "metadata": {
315 |     "collapsed": false
316 |    },
317 |    "outputs": [],
318 |    "source": [
319 |     "epsilon"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "markdown",
324 |    "metadata": {},
325 |    "source": [
326 |     "best F1 score:"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": null,
332 |    "metadata": {
333 |     "collapsed": false
334 |    },
335 |    "outputs": [],
336 |    "source": [
337 |     "F1"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "markdown",
342 |    "metadata": {},
343 |    "source": [
344 |     "Number of outliers found:"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": null,
350 |    "metadata": {
351 |     "collapsed": false
352 |    },
353 |    "outputs": [],
354 |    "source": [
355 |     "sum(p < epsilon)"
356 |    ]
357 |   }
358 |  ],
359 |  "metadata": {
360 |   "kernelspec": {
361 |    "display_name": "Python 3",
362 |    "language": "python",
363 |    "name": "python3"
364 |   },
365 |   "language_info": {
366 |    "codemirror_mode": {
367 |     "name": "ipython",
368 |     "version": 3
369 |    },
370 |    "file_extension": ".py",
371 |    "mimetype": "text/x-python",
372 |    "name": "python",
373 |    "nbconvert_exporter": "python",
374 |    "pygments_lexer": "ipython3",
375 |    "version": "3.4.3"
376 |   }
377 |  },
378 |  "nbformat": 4,
379 |  "nbformat_minor": 0
380 | }
381 | 


--------------------------------------------------------------------------------
/ex8/movie_ids.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noammor/coursera-machinelearning-python/454abd5a4ad315dc8a6391b4bb0ce4c280a394bd/ex8/movie_ids.txt


--------------------------------------------------------------------------------