├── .gitignore ├── ex1 ├── data │ ├── ex1data1.txt │ └── ex1data2.txt ├── ex1.ipynb └── ex1.pdf ├── ex2 ├── data │ ├── ex2data1.txt │ └── ex2data2.txt ├── ex2.ipynb └── ex2.pdf ├── ex3 ├── data │ ├── ex3data1.mat │ └── ex3weights.mat ├── ex3.ipynb └── ex3.pdf ├── ex4 ├── data │ ├── ex4data1.mat │ └── ex4weights.mat ├── ex4.ipynb └── ex4.pdf ├── ex5 ├── data │ └── ex5data1.mat ├── ex5.ipynb └── ex5.pdf ├── ex6 ├── data │ ├── emailSample1.txt │ ├── emailSample2.txt │ ├── ex6data1.mat │ ├── ex6data2.mat │ ├── ex6data3.mat │ ├── spamSample1.txt │ ├── spamSample2.txt │ ├── spamTest.mat │ ├── spamTrain.mat │ └── vocab.txt ├── ex6.ipynb ├── ex6.pdf └── ex6_spam.ipynb ├── ex7 ├── data │ ├── bird_small.mat │ ├── bird_small.png │ ├── ex7data1.mat │ ├── ex7data2.mat │ └── ex7faces.mat ├── ex7.ipynb └── ex7.pdf └── ex8 ├── data ├── ex8_movieParams.mat ├── ex8_movies.mat ├── ex8data1.mat ├── ex8data2.mat └── movie_ids.txt ├── ex8.ipynb └── ex8.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints -------------------------------------------------------------------------------- /ex1/data/ex1data1.txt: -------------------------------------------------------------------------------- 1 | 6.1101,17.592 2 | 5.5277,9.1302 3 | 8.5186,13.662 4 | 7.0032,11.854 5 | 5.8598,6.8233 6 | 8.3829,11.886 7 | 7.4764,4.3483 8 | 8.5781,12 9 | 6.4862,6.5987 10 | 5.0546,3.8166 11 | 5.7107,3.2522 12 | 14.164,15.505 13 | 5.734,3.1551 14 | 8.4084,7.2258 15 | 5.6407,0.71618 16 | 5.3794,3.5129 17 | 6.3654,5.3048 18 | 5.1301,0.56077 19 | 6.4296,3.6518 20 | 7.0708,5.3893 21 | 6.1891,3.1386 22 | 20.27,21.767 23 | 5.4901,4.263 24 | 6.3261,5.1875 25 | 5.5649,3.0825 26 | 18.945,22.638 27 | 12.828,13.501 28 | 10.957,7.0467 29 | 13.176,14.692 30 | 22.203,24.147 31 | 5.2524,-1.22 32 | 6.5894,5.9966 33 | 9.2482,12.134 34 | 5.8918,1.8495 35 | 8.2111,6.5426 36 | 7.9334,4.5623 37 | 8.0959,4.1164 38 | 5.6063,3.3928 39 | 12.836,10.117 40 | 6.3534,5.4974 41 | 5.4069,0.55657 42 | 6.8825,3.9115 43 | 11.708,5.3854 44 | 5.7737,2.4406 45 | 7.8247,6.7318 46 | 7.0931,1.0463 47 | 5.0702,5.1337 48 | 5.8014,1.844 49 | 11.7,8.0043 50 | 5.5416,1.0179 51 | 7.5402,6.7504 52 | 5.3077,1.8396 53 | 7.4239,4.2885 54 | 7.6031,4.9981 55 | 6.3328,1.4233 56 | 6.3589,-1.4211 57 | 6.2742,2.4756 58 | 5.6397,4.6042 59 | 9.3102,3.9624 60 | 9.4536,5.4141 61 | 8.8254,5.1694 62 | 5.1793,-0.74279 63 | 21.279,17.929 64 | 14.908,12.054 65 | 18.959,17.054 66 | 7.2182,4.8852 67 | 8.2951,5.7442 68 | 10.236,7.7754 69 | 5.4994,1.0173 70 | 20.341,20.992 71 | 10.136,6.6799 72 | 7.3345,4.0259 73 | 6.0062,1.2784 74 | 7.2259,3.3411 75 | 5.0269,-2.6807 76 | 6.5479,0.29678 77 | 7.5386,3.8845 78 | 5.0365,5.7014 79 | 10.274,6.7526 80 | 5.1077,2.0576 81 | 5.7292,0.47953 82 | 5.1884,0.20421 83 | 6.3557,0.67861 84 | 9.7687,7.5435 85 | 6.5159,5.3436 86 | 8.5172,4.2415 87 | 9.1802,6.7981 88 | 6.002,0.92695 89 | 5.5204,0.152 90 | 5.0594,2.8214 91 | 5.7077,1.8451 92 | 7.6366,4.2959 93 | 5.8707,7.2029 94 | 5.3054,1.9869 95 | 8.2934,0.14454 96 | 13.394,9.0551 97 | 5.4369,0.61705 98 | -------------------------------------------------------------------------------- /ex1/data/ex1data2.txt: -------------------------------------------------------------------------------- 1 | 2104,3,399900 2 | 1600,3,329900 3 | 2400,3,369000 4 | 1416,2,232000 5 | 3000,4,539900 6 | 1985,4,299900 7 | 1534,3,314900 8 | 1427,3,198999 9 | 1380,3,212000 10 | 1494,3,242500 11 | 1940,4,239999 12 | 2000,3,347000 13 | 1890,3,329999 14 | 4478,5,699900 15 | 1268,3,259900 16 | 2300,4,449900 17 | 1320,2,299900 18 | 1236,3,199900 19 | 2609,4,499998 20 | 3031,4,599000 21 | 1767,3,252900 22 | 1888,2,255000 23 | 1604,3,242900 24 | 1962,4,259900 25 | 3890,3,573900 26 | 1100,3,249900 27 | 1458,3,464500 28 | 2526,3,469000 29 | 2200,3,475000 30 | 2637,3,299900 31 | 1839,2,349900 32 | 1000,1,169900 33 | 2040,4,314900 34 | 3137,3,579900 35 | 1811,4,285900 36 | 1437,3,249900 37 | 1239,3,229900 38 | 2132,4,345000 39 | 4215,4,549000 40 | 2162,4,287000 41 | 1664,2,368500 42 | 2238,3,329900 43 | 2567,4,314000 44 | 1200,3,299000 45 | 852,2,179900 46 | 1852,4,299900 47 | 1203,3,239500 48 | -------------------------------------------------------------------------------- /ex1/ex1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex1/ex1.pdf -------------------------------------------------------------------------------- /ex2/data/ex2data1.txt: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | -------------------------------------------------------------------------------- /ex2/data/ex2data2.txt: -------------------------------------------------------------------------------- 1 | 0.051267,0.69956,1 2 | -0.092742,0.68494,1 3 | -0.21371,0.69225,1 4 | -0.375,0.50219,1 5 | -0.51325,0.46564,1 6 | -0.52477,0.2098,1 7 | -0.39804,0.034357,1 8 | -0.30588,-0.19225,1 9 | 0.016705,-0.40424,1 10 | 0.13191,-0.51389,1 11 | 0.38537,-0.56506,1 12 | 0.52938,-0.5212,1 13 | 0.63882,-0.24342,1 14 | 0.73675,-0.18494,1 15 | 0.54666,0.48757,1 16 | 0.322,0.5826,1 17 | 0.16647,0.53874,1 18 | -0.046659,0.81652,1 19 | -0.17339,0.69956,1 20 | -0.47869,0.63377,1 21 | -0.60541,0.59722,1 22 | -0.62846,0.33406,1 23 | -0.59389,0.005117,1 24 | -0.42108,-0.27266,1 25 | -0.11578,-0.39693,1 26 | 0.20104,-0.60161,1 27 | 0.46601,-0.53582,1 28 | 0.67339,-0.53582,1 29 | -0.13882,0.54605,1 30 | -0.29435,0.77997,1 31 | -0.26555,0.96272,1 32 | -0.16187,0.8019,1 33 | -0.17339,0.64839,1 34 | -0.28283,0.47295,1 35 | -0.36348,0.31213,1 36 | -0.30012,0.027047,1 37 | -0.23675,-0.21418,1 38 | -0.06394,-0.18494,1 39 | 0.062788,-0.16301,1 40 | 0.22984,-0.41155,1 41 | 0.2932,-0.2288,1 42 | 0.48329,-0.18494,1 43 | 0.64459,-0.14108,1 44 | 0.46025,0.012427,1 45 | 0.6273,0.15863,1 46 | 0.57546,0.26827,1 47 | 0.72523,0.44371,1 48 | 0.22408,0.52412,1 49 | 0.44297,0.67032,1 50 | 0.322,0.69225,1 51 | 0.13767,0.57529,1 52 | -0.0063364,0.39985,1 53 | -0.092742,0.55336,1 54 | -0.20795,0.35599,1 55 | -0.20795,0.17325,1 56 | -0.43836,0.21711,1 57 | -0.21947,-0.016813,1 58 | -0.13882,-0.27266,1 59 | 0.18376,0.93348,0 60 | 0.22408,0.77997,0 61 | 0.29896,0.61915,0 62 | 0.50634,0.75804,0 63 | 0.61578,0.7288,0 64 | 0.60426,0.59722,0 65 | 0.76555,0.50219,0 66 | 0.92684,0.3633,0 67 | 0.82316,0.27558,0 68 | 0.96141,0.085526,0 69 | 0.93836,0.012427,0 70 | 0.86348,-0.082602,0 71 | 0.89804,-0.20687,0 72 | 0.85196,-0.36769,0 73 | 0.82892,-0.5212,0 74 | 0.79435,-0.55775,0 75 | 0.59274,-0.7405,0 76 | 0.51786,-0.5943,0 77 | 0.46601,-0.41886,0 78 | 0.35081,-0.57968,0 79 | 0.28744,-0.76974,0 80 | 0.085829,-0.75512,0 81 | 0.14919,-0.57968,0 82 | -0.13306,-0.4481,0 83 | -0.40956,-0.41155,0 84 | -0.39228,-0.25804,0 85 | -0.74366,-0.25804,0 86 | -0.69758,0.041667,0 87 | -0.75518,0.2902,0 88 | -0.69758,0.68494,0 89 | -0.4038,0.70687,0 90 | -0.38076,0.91886,0 91 | -0.50749,0.90424,0 92 | -0.54781,0.70687,0 93 | 0.10311,0.77997,0 94 | 0.057028,0.91886,0 95 | -0.10426,0.99196,0 96 | -0.081221,1.1089,0 97 | 0.28744,1.087,0 98 | 0.39689,0.82383,0 99 | 0.63882,0.88962,0 100 | 0.82316,0.66301,0 101 | 0.67339,0.64108,0 102 | 1.0709,0.10015,0 103 | -0.046659,-0.57968,0 104 | -0.23675,-0.63816,0 105 | -0.15035,-0.36769,0 106 | -0.49021,-0.3019,0 107 | -0.46717,-0.13377,0 108 | -0.28859,-0.060673,0 109 | -0.61118,-0.067982,0 110 | -0.66302,-0.21418,0 111 | -0.59965,-0.41886,0 112 | -0.72638,-0.082602,0 113 | -0.83007,0.31213,0 114 | -0.72062,0.53874,0 115 | -0.59389,0.49488,0 116 | -0.48445,0.99927,0 117 | -0.0063364,0.99927,0 118 | 0.63265,-0.030612,0 119 | -------------------------------------------------------------------------------- /ex2/ex2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex2/ex2.pdf -------------------------------------------------------------------------------- /ex3/data/ex3data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex3/data/ex3data1.mat -------------------------------------------------------------------------------- /ex3/data/ex3weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex3/data/ex3weights.mat -------------------------------------------------------------------------------- /ex3/ex3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex3/ex3.pdf -------------------------------------------------------------------------------- /ex4/data/ex4data1.mat: -------------------------------------------------------------------------------- 1 | ../../ex3/data/ex3data1.mat -------------------------------------------------------------------------------- /ex4/data/ex4weights.mat: -------------------------------------------------------------------------------- 1 | ../../ex3/data/ex3weights.mat -------------------------------------------------------------------------------- /ex4/ex4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex4/ex4.pdf -------------------------------------------------------------------------------- /ex5/data/ex5data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex5/data/ex5data1.mat -------------------------------------------------------------------------------- /ex5/ex5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex5/ex5.pdf -------------------------------------------------------------------------------- /ex6/data/emailSample1.txt: -------------------------------------------------------------------------------- 1 | > Anyone knows how much it costs to host a web portal ? 2 | > 3 | Well, it depends on how many visitors you're expecting. 4 | This can be anywhere from less than 10 bucks a month to a couple of $100. 5 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 6 | if youre running something big.. 7 | 8 | To unsubscribe yourself from this mailing list, send an email to: 9 | groupname-unsubscribe@egroups.com 10 | 11 | -------------------------------------------------------------------------------- /ex6/data/emailSample2.txt: -------------------------------------------------------------------------------- 1 | Folks, 2 | 3 | my first time posting - have a bit of Unix experience, but am new to Linux. 4 | 5 | 6 | Just got a new PC at home - Dell box with Windows XP. Added a second hard disk 7 | for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went 8 | fine except it didn't pick up my monitor. 9 | 10 | I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4 11 | Ti4200 video card, both of which are probably too new to feature in Suse's default 12 | set. I downloaded a driver from the nVidia website and installed it using RPM. 13 | Then I ran Sax2 (as was recommended in some postings I found on the net), but 14 | it still doesn't feature my video card in the available list. What next? 15 | 16 | Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice, 17 | the whole machine crashes (in Linux, not Windows) - even the on/off switch is 18 | inactive, leaving me to reach for the power cable instead. 19 | 20 | If anyone can help me in any way with these probs., I'd be really grateful - 21 | I've searched the 'net but have run out of ideas. 22 | 23 | Or should I be going for a different version of Linux such as RedHat? Opinions 24 | welcome. 25 | 26 | Thanks a lot, 27 | Peter 28 | 29 | -- 30 | Irish Linux Users' Group: ilug@linux.ie 31 | http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information. 32 | List maintainer: listmaster@linux.ie 33 | 34 | 35 | -------------------------------------------------------------------------------- /ex6/data/ex6data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex6/data/ex6data1.mat -------------------------------------------------------------------------------- /ex6/data/ex6data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex6/data/ex6data2.mat -------------------------------------------------------------------------------- /ex6/data/ex6data3.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex6/data/ex6data3.mat -------------------------------------------------------------------------------- /ex6/data/spamSample1.txt: -------------------------------------------------------------------------------- 1 | Do You Want To Make $1000 Or More Per Week? 2 | 3 | 4 | 5 | If you are a motivated and qualified individual - I 6 | will personally demonstrate to you a system that will 7 | make you $1,000 per week or more! This is NOT mlm. 8 | 9 | 10 | 11 | Call our 24 hour pre-recorded number to get the 12 | details. 13 | 14 | 15 | 16 | 000-456-789 17 | 18 | 19 | 20 | I need people who want to make serious money. Make 21 | the call and get the facts. 22 | 23 | Invest 2 minutes in yourself now! 24 | 25 | 26 | 27 | 000-456-789 28 | 29 | 30 | 31 | Looking forward to your call and I will introduce you 32 | to people like yourself who 33 | are currently making $10,000 plus per week! 34 | 35 | 36 | 37 | 000-456-789 38 | 39 | 40 | 41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72 42 | 43 | -------------------------------------------------------------------------------- /ex6/data/spamSample2.txt: -------------------------------------------------------------------------------- 1 | Best Buy Viagra Generic Online 2 | 3 | Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed! 4 | 5 | We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers! 6 | http://medphysitcstech.ru 7 | 8 | 9 | -------------------------------------------------------------------------------- /ex6/data/spamTest.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex6/data/spamTest.mat -------------------------------------------------------------------------------- /ex6/data/spamTrain.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex6/data/spamTrain.mat -------------------------------------------------------------------------------- /ex6/data/vocab.txt: -------------------------------------------------------------------------------- 1 | 1 aa 2 | 2 ab 3 | 3 abil 4 | 4 abl 5 | 5 about 6 | 6 abov 7 | 7 absolut 8 | 8 abus 9 | 9 ac 10 | 10 accept 11 | 11 access 12 | 12 accord 13 | 13 account 14 | 14 achiev 15 | 15 acquir 16 | 16 across 17 | 17 act 18 | 18 action 19 | 19 activ 20 | 20 actual 21 | 21 ad 22 | 22 adam 23 | 23 add 24 | 24 addit 25 | 25 address 26 | 26 administr 27 | 27 adult 28 | 28 advanc 29 | 29 advantag 30 | 30 advertis 31 | 31 advic 32 | 32 advis 33 | 33 ae 34 | 34 af 35 | 35 affect 36 | 36 affili 37 | 37 afford 38 | 38 africa 39 | 39 after 40 | 40 ag 41 | 41 again 42 | 42 against 43 | 43 agenc 44 | 44 agent 45 | 45 ago 46 | 46 agre 47 | 47 agreement 48 | 48 aid 49 | 49 air 50 | 50 al 51 | 51 alb 52 | 52 align 53 | 53 all 54 | 54 allow 55 | 55 almost 56 | 56 alon 57 | 57 along 58 | 58 alreadi 59 | 59 alsa 60 | 60 also 61 | 61 altern 62 | 62 although 63 | 63 alwai 64 | 64 am 65 | 65 amaz 66 | 66 america 67 | 67 american 68 | 68 among 69 | 69 amount 70 | 70 amp 71 | 71 an 72 | 72 analysi 73 | 73 analyst 74 | 74 and 75 | 75 ani 76 | 76 anim 77 | 77 announc 78 | 78 annual 79 | 79 annuiti 80 | 80 anoth 81 | 81 answer 82 | 82 anti 83 | 83 anumb 84 | 84 anybodi 85 | 85 anymor 86 | 86 anyon 87 | 87 anyth 88 | 88 anywai 89 | 89 anywher 90 | 90 aol 91 | 91 ap 92 | 92 apolog 93 | 93 app 94 | 94 appar 95 | 95 appear 96 | 96 appl 97 | 97 appli 98 | 98 applic 99 | 99 appreci 100 | 100 approach 101 | 101 approv 102 | 102 apt 103 | 103 ar 104 | 104 archiv 105 | 105 area 106 | 106 aren 107 | 107 argument 108 | 108 arial 109 | 109 arm 110 | 110 around 111 | 111 arrai 112 | 112 arriv 113 | 113 art 114 | 114 articl 115 | 115 artist 116 | 116 as 117 | 117 ascii 118 | 118 ask 119 | 119 asset 120 | 120 assist 121 | 121 associ 122 | 122 assum 123 | 123 assur 124 | 124 at 125 | 125 atol 126 | 126 attach 127 | 127 attack 128 | 128 attempt 129 | 129 attent 130 | 130 attornei 131 | 131 attract 132 | 132 audio 133 | 133 aug 134 | 134 august 135 | 135 author 136 | 136 auto 137 | 137 autom 138 | 138 automat 139 | 139 avail 140 | 140 averag 141 | 141 avoid 142 | 142 awai 143 | 143 awar 144 | 144 award 145 | 145 ba 146 | 146 babi 147 | 147 back 148 | 148 background 149 | 149 backup 150 | 150 bad 151 | 151 balanc 152 | 152 ban 153 | 153 bank 154 | 154 bar 155 | 155 base 156 | 156 basenumb 157 | 157 basi 158 | 158 basic 159 | 159 bb 160 | 160 bc 161 | 161 bd 162 | 162 be 163 | 163 beat 164 | 164 beberg 165 | 165 becaus 166 | 166 becom 167 | 167 been 168 | 168 befor 169 | 169 begin 170 | 170 behalf 171 | 171 behavior 172 | 172 behind 173 | 173 believ 174 | 174 below 175 | 175 benefit 176 | 176 best 177 | 177 beta 178 | 178 better 179 | 179 between 180 | 180 bf 181 | 181 big 182 | 182 bill 183 | 183 billion 184 | 184 bin 185 | 185 binari 186 | 186 bit 187 | 187 black 188 | 188 blank 189 | 189 block 190 | 190 blog 191 | 191 blood 192 | 192 blue 193 | 193 bnumber 194 | 194 board 195 | 195 bodi 196 | 196 boi 197 | 197 bonu 198 | 198 book 199 | 199 boot 200 | 200 border 201 | 201 boss 202 | 202 boston 203 | 203 botan 204 | 204 both 205 | 205 bottl 206 | 206 bottom 207 | 207 boundari 208 | 208 box 209 | 209 brain 210 | 210 brand 211 | 211 break 212 | 212 brian 213 | 213 bring 214 | 214 broadcast 215 | 215 broker 216 | 216 browser 217 | 217 bug 218 | 218 bui 219 | 219 build 220 | 220 built 221 | 221 bulk 222 | 222 burn 223 | 223 bush 224 | 224 busi 225 | 225 but 226 | 226 button 227 | 227 by 228 | 228 byte 229 | 229 ca 230 | 230 cabl 231 | 231 cach 232 | 232 calcul 233 | 233 california 234 | 234 call 235 | 235 came 236 | 236 camera 237 | 237 campaign 238 | 238 can 239 | 239 canada 240 | 240 cannot 241 | 241 canon 242 | 242 capabl 243 | 243 capillari 244 | 244 capit 245 | 245 car 246 | 246 card 247 | 247 care 248 | 248 career 249 | 249 carri 250 | 250 cartridg 251 | 251 case 252 | 252 cash 253 | 253 cat 254 | 254 catch 255 | 255 categori 256 | 256 caus 257 | 257 cb 258 | 258 cc 259 | 259 cd 260 | 260 ce 261 | 261 cell 262 | 262 cent 263 | 263 center 264 | 264 central 265 | 265 centuri 266 | 266 ceo 267 | 267 certain 268 | 268 certainli 269 | 269 cf 270 | 270 challeng 271 | 271 chanc 272 | 272 chang 273 | 273 channel 274 | 274 char 275 | 275 charact 276 | 276 charg 277 | 277 charset 278 | 278 chat 279 | 279 cheap 280 | 280 check 281 | 281 cheer 282 | 282 chief 283 | 283 children 284 | 284 china 285 | 285 chip 286 | 286 choic 287 | 287 choos 288 | 288 chri 289 | 289 citi 290 | 290 citizen 291 | 291 civil 292 | 292 claim 293 | 293 class 294 | 294 classifi 295 | 295 clean 296 | 296 clear 297 | 297 clearli 298 | 298 click 299 | 299 client 300 | 300 close 301 | 301 clue 302 | 302 cnet 303 | 303 cnumber 304 | 304 co 305 | 305 code 306 | 306 collect 307 | 307 colleg 308 | 308 color 309 | 309 com 310 | 310 combin 311 | 311 come 312 | 312 comfort 313 | 313 command 314 | 314 comment 315 | 315 commentari 316 | 316 commerci 317 | 317 commiss 318 | 318 commit 319 | 319 common 320 | 320 commun 321 | 321 compani 322 | 322 compar 323 | 323 comparison 324 | 324 compat 325 | 325 compet 326 | 326 competit 327 | 327 compil 328 | 328 complet 329 | 329 comprehens 330 | 330 comput 331 | 331 concentr 332 | 332 concept 333 | 333 concern 334 | 334 condit 335 | 335 conf 336 | 336 confer 337 | 337 confid 338 | 338 confidenti 339 | 339 config 340 | 340 configur 341 | 341 confirm 342 | 342 conflict 343 | 343 confus 344 | 344 congress 345 | 345 connect 346 | 346 consid 347 | 347 consolid 348 | 348 constitut 349 | 349 construct 350 | 350 consult 351 | 351 consum 352 | 352 contact 353 | 353 contain 354 | 354 content 355 | 355 continu 356 | 356 contract 357 | 357 contribut 358 | 358 control 359 | 359 conveni 360 | 360 convers 361 | 361 convert 362 | 362 cool 363 | 363 cooper 364 | 364 copi 365 | 365 copyright 366 | 366 core 367 | 367 corpor 368 | 368 correct 369 | 369 correspond 370 | 370 cost 371 | 371 could 372 | 372 couldn 373 | 373 count 374 | 374 countri 375 | 375 coupl 376 | 376 cours 377 | 377 court 378 | 378 cover 379 | 379 coverag 380 | 380 crash 381 | 381 creat 382 | 382 creativ 383 | 383 credit 384 | 384 critic 385 | 385 cross 386 | 386 cultur 387 | 387 current 388 | 388 custom 389 | 389 cut 390 | 390 cv 391 | 391 da 392 | 392 dagga 393 | 393 dai 394 | 394 daili 395 | 395 dan 396 | 396 danger 397 | 397 dark 398 | 398 data 399 | 399 databas 400 | 400 datapow 401 | 401 date 402 | 402 dave 403 | 403 david 404 | 404 dc 405 | 405 de 406 | 406 dead 407 | 407 deal 408 | 408 dear 409 | 409 death 410 | 410 debt 411 | 411 decad 412 | 412 decid 413 | 413 decis 414 | 414 declar 415 | 415 declin 416 | 416 decor 417 | 417 default 418 | 418 defend 419 | 419 defens 420 | 420 defin 421 | 421 definit 422 | 422 degre 423 | 423 delai 424 | 424 delet 425 | 425 deliv 426 | 426 deliveri 427 | 427 dell 428 | 428 demand 429 | 429 democrat 430 | 430 depart 431 | 431 depend 432 | 432 deposit 433 | 433 describ 434 | 434 descript 435 | 435 deserv 436 | 436 design 437 | 437 desir 438 | 438 desktop 439 | 439 despit 440 | 440 detail 441 | 441 detect 442 | 442 determin 443 | 443 dev 444 | 444 devel 445 | 445 develop 446 | 446 devic 447 | 447 di 448 | 448 dial 449 | 449 did 450 | 450 didn 451 | 451 diet 452 | 452 differ 453 | 453 difficult 454 | 454 digit 455 | 455 direct 456 | 456 directli 457 | 457 director 458 | 458 directori 459 | 459 disabl 460 | 460 discount 461 | 461 discov 462 | 462 discoveri 463 | 463 discuss 464 | 464 disk 465 | 465 displai 466 | 466 disposit 467 | 467 distanc 468 | 468 distribut 469 | 469 dn 470 | 470 dnumber 471 | 471 do 472 | 472 doc 473 | 473 document 474 | 474 doe 475 | 475 doer 476 | 476 doesn 477 | 477 dollar 478 | 478 dollarac 479 | 479 dollarnumb 480 | 480 domain 481 | 481 don 482 | 482 done 483 | 483 dont 484 | 484 doubl 485 | 485 doubt 486 | 486 down 487 | 487 download 488 | 488 dr 489 | 489 draw 490 | 490 dream 491 | 491 drive 492 | 492 driver 493 | 493 drop 494 | 494 drug 495 | 495 due 496 | 496 dure 497 | 497 dvd 498 | 498 dw 499 | 499 dynam 500 | 500 ea 501 | 501 each 502 | 502 earli 503 | 503 earlier 504 | 504 earn 505 | 505 earth 506 | 506 easi 507 | 507 easier 508 | 508 easili 509 | 509 eat 510 | 510 eb 511 | 511 ebai 512 | 512 ec 513 | 513 echo 514 | 514 econom 515 | 515 economi 516 | 516 ed 517 | 517 edg 518 | 518 edit 519 | 519 editor 520 | 520 educ 521 | 521 eff 522 | 522 effect 523 | 523 effici 524 | 524 effort 525 | 525 either 526 | 526 el 527 | 527 electron 528 | 528 elimin 529 | 529 els 530 | 530 email 531 | 531 emailaddr 532 | 532 emerg 533 | 533 empir 534 | 534 employ 535 | 535 employe 536 | 536 en 537 | 537 enabl 538 | 538 encod 539 | 539 encourag 540 | 540 end 541 | 541 enemi 542 | 542 enenkio 543 | 543 energi 544 | 544 engin 545 | 545 english 546 | 546 enhanc 547 | 547 enjoi 548 | 548 enough 549 | 549 ensur 550 | 550 enter 551 | 551 enterpris 552 | 552 entertain 553 | 553 entir 554 | 554 entri 555 | 555 enumb 556 | 556 environ 557 | 557 equal 558 | 558 equip 559 | 559 equival 560 | 560 error 561 | 561 especi 562 | 562 essenti 563 | 563 establish 564 | 564 estat 565 | 565 estim 566 | 566 et 567 | 567 etc 568 | 568 euro 569 | 569 europ 570 | 570 european 571 | 571 even 572 | 572 event 573 | 573 eventu 574 | 574 ever 575 | 575 everi 576 | 576 everyon 577 | 577 everyth 578 | 578 evid 579 | 579 evil 580 | 580 exactli 581 | 581 exampl 582 | 582 excel 583 | 583 except 584 | 584 exchang 585 | 585 excit 586 | 586 exclus 587 | 587 execut 588 | 588 exercis 589 | 589 exist 590 | 590 exmh 591 | 591 expand 592 | 592 expect 593 | 593 expens 594 | 594 experi 595 | 595 expert 596 | 596 expir 597 | 597 explain 598 | 598 explor 599 | 599 express 600 | 600 extend 601 | 601 extens 602 | 602 extra 603 | 603 extract 604 | 604 extrem 605 | 605 ey 606 | 606 fa 607 | 607 face 608 | 608 fact 609 | 609 factor 610 | 610 fail 611 | 611 fair 612 | 612 fall 613 | 613 fals 614 | 614 famili 615 | 615 faq 616 | 616 far 617 | 617 fast 618 | 618 faster 619 | 619 fastest 620 | 620 fat 621 | 621 father 622 | 622 favorit 623 | 623 fax 624 | 624 fb 625 | 625 fd 626 | 626 featur 627 | 627 feder 628 | 628 fee 629 | 629 feed 630 | 630 feedback 631 | 631 feel 632 | 632 femal 633 | 633 few 634 | 634 ffffff 635 | 635 ffnumber 636 | 636 field 637 | 637 fight 638 | 638 figur 639 | 639 file 640 | 640 fill 641 | 641 film 642 | 642 filter 643 | 643 final 644 | 644 financ 645 | 645 financi 646 | 646 find 647 | 647 fine 648 | 648 finish 649 | 649 fire 650 | 650 firewal 651 | 651 firm 652 | 652 first 653 | 653 fit 654 | 654 five 655 | 655 fix 656 | 656 flag 657 | 657 flash 658 | 658 flow 659 | 659 fnumber 660 | 660 focu 661 | 661 folder 662 | 662 folk 663 | 663 follow 664 | 664 font 665 | 665 food 666 | 666 for 667 | 667 forc 668 | 668 foreign 669 | 669 forev 670 | 670 forget 671 | 671 fork 672 | 672 form 673 | 673 format 674 | 674 former 675 | 675 fortun 676 | 676 forward 677 | 677 found 678 | 678 foundat 679 | 679 four 680 | 680 franc 681 | 681 free 682 | 682 freedom 683 | 683 french 684 | 684 freshrpm 685 | 685 fri 686 | 686 fridai 687 | 687 friend 688 | 688 from 689 | 689 front 690 | 690 ftoc 691 | 691 ftp 692 | 692 full 693 | 693 fulli 694 | 694 fun 695 | 695 function 696 | 696 fund 697 | 697 further 698 | 698 futur 699 | 699 ga 700 | 700 gain 701 | 701 game 702 | 702 gari 703 | 703 garrigu 704 | 704 gave 705 | 705 gcc 706 | 706 geek 707 | 707 gener 708 | 708 get 709 | 709 gif 710 | 710 gift 711 | 711 girl 712 | 712 give 713 | 713 given 714 | 714 global 715 | 715 gnome 716 | 716 gnu 717 | 717 gnupg 718 | 718 go 719 | 719 goal 720 | 720 god 721 | 721 goe 722 | 722 gold 723 | 723 gone 724 | 724 good 725 | 725 googl 726 | 726 got 727 | 727 govern 728 | 728 gpl 729 | 729 grand 730 | 730 grant 731 | 731 graphic 732 | 732 great 733 | 733 greater 734 | 734 ground 735 | 735 group 736 | 736 grow 737 | 737 growth 738 | 738 gt 739 | 739 guarante 740 | 740 guess 741 | 741 gui 742 | 742 guid 743 | 743 ha 744 | 744 hack 745 | 745 had 746 | 746 half 747 | 747 ham 748 | 748 hand 749 | 749 handl 750 | 750 happen 751 | 751 happi 752 | 752 hard 753 | 753 hardwar 754 | 754 hat 755 | 755 hate 756 | 756 have 757 | 757 haven 758 | 758 he 759 | 759 head 760 | 760 header 761 | 761 headlin 762 | 762 health 763 | 763 hear 764 | 764 heard 765 | 765 heart 766 | 766 heaven 767 | 767 hei 768 | 768 height 769 | 769 held 770 | 770 hello 771 | 771 help 772 | 772 helvetica 773 | 773 her 774 | 774 herba 775 | 775 here 776 | 776 hermio 777 | 777 hettinga 778 | 778 hi 779 | 779 high 780 | 780 higher 781 | 781 highli 782 | 782 highlight 783 | 783 him 784 | 784 histori 785 | 785 hit 786 | 786 hold 787 | 787 home 788 | 788 honor 789 | 789 hope 790 | 790 host 791 | 791 hot 792 | 792 hour 793 | 793 hous 794 | 794 how 795 | 795 howev 796 | 796 hp 797 | 797 html 798 | 798 http 799 | 799 httpaddr 800 | 800 huge 801 | 801 human 802 | 802 hundr 803 | 803 ibm 804 | 804 id 805 | 805 idea 806 | 806 ident 807 | 807 identifi 808 | 808 idnumb 809 | 809 ie 810 | 810 if 811 | 811 ignor 812 | 812 ii 813 | 813 iii 814 | 814 iiiiiiihnumberjnumberhnumberjnumberhnumb 815 | 815 illeg 816 | 816 im 817 | 817 imag 818 | 818 imagin 819 | 819 immedi 820 | 820 impact 821 | 821 implement 822 | 822 import 823 | 823 impress 824 | 824 improv 825 | 825 in 826 | 826 inc 827 | 827 includ 828 | 828 incom 829 | 829 increas 830 | 830 incred 831 | 831 inde 832 | 832 independ 833 | 833 index 834 | 834 india 835 | 835 indian 836 | 836 indic 837 | 837 individu 838 | 838 industri 839 | 839 info 840 | 840 inform 841 | 841 initi 842 | 842 inlin 843 | 843 innov 844 | 844 input 845 | 845 insert 846 | 846 insid 847 | 847 instal 848 | 848 instanc 849 | 849 instant 850 | 850 instead 851 | 851 institut 852 | 852 instruct 853 | 853 insur 854 | 854 int 855 | 855 integr 856 | 856 intel 857 | 857 intellig 858 | 858 intend 859 | 859 interact 860 | 860 interest 861 | 861 interfac 862 | 862 intern 863 | 863 internet 864 | 864 interview 865 | 865 into 866 | 866 intro 867 | 867 introduc 868 | 868 inumb 869 | 869 invest 870 | 870 investig 871 | 871 investor 872 | 872 invok 873 | 873 involv 874 | 874 ip 875 | 875 ireland 876 | 876 irish 877 | 877 is 878 | 878 island 879 | 879 isn 880 | 880 iso 881 | 881 isp 882 | 882 issu 883 | 883 it 884 | 884 item 885 | 885 itself 886 | 886 jabber 887 | 887 jame 888 | 888 java 889 | 889 jim 890 | 890 jnumberiiiiiiihepihepihf 891 | 891 job 892 | 892 joe 893 | 893 john 894 | 894 join 895 | 895 journal 896 | 896 judg 897 | 897 judgment 898 | 898 jul 899 | 899 juli 900 | 900 jump 901 | 901 june 902 | 902 just 903 | 903 justin 904 | 904 keep 905 | 905 kei 906 | 906 kept 907 | 907 kernel 908 | 908 kevin 909 | 909 keyboard 910 | 910 kid 911 | 911 kill 912 | 912 kind 913 | 913 king 914 | 914 kingdom 915 | 915 knew 916 | 916 know 917 | 917 knowledg 918 | 918 known 919 | 919 la 920 | 920 lack 921 | 921 land 922 | 922 languag 923 | 923 laptop 924 | 924 larg 925 | 925 larger 926 | 926 largest 927 | 927 laser 928 | 928 last 929 | 929 late 930 | 930 later 931 | 931 latest 932 | 932 launch 933 | 933 law 934 | 934 lawrenc 935 | 935 le 936 | 936 lead 937 | 937 leader 938 | 938 learn 939 | 939 least 940 | 940 leav 941 | 941 left 942 | 942 legal 943 | 943 lender 944 | 944 length 945 | 945 less 946 | 946 lesson 947 | 947 let 948 | 948 letter 949 | 949 level 950 | 950 lib 951 | 951 librari 952 | 952 licens 953 | 953 life 954 | 954 lifetim 955 | 955 light 956 | 956 like 957 | 957 limit 958 | 958 line 959 | 959 link 960 | 960 linux 961 | 961 list 962 | 962 listen 963 | 963 littl 964 | 964 live 965 | 965 ll 966 | 966 lo 967 | 967 load 968 | 968 loan 969 | 969 local 970 | 970 locat 971 | 971 lock 972 | 972 lockergnom 973 | 973 log 974 | 974 long 975 | 975 longer 976 | 976 look 977 | 977 lose 978 | 978 loss 979 | 979 lost 980 | 980 lot 981 | 981 love 982 | 982 low 983 | 983 lower 984 | 984 lowest 985 | 985 lt 986 | 986 ma 987 | 987 mac 988 | 988 machin 989 | 989 made 990 | 990 magazin 991 | 991 mai 992 | 992 mail 993 | 993 mailer 994 | 994 main 995 | 995 maintain 996 | 996 major 997 | 997 make 998 | 998 maker 999 | 999 male 1000 | 1000 man 1001 | 1001 manag 1002 | 1002 mani 1003 | 1003 manual 1004 | 1004 manufactur 1005 | 1005 map 1006 | 1006 march 1007 | 1007 margin 1008 | 1008 mark 1009 | 1009 market 1010 | 1010 marshal 1011 | 1011 mass 1012 | 1012 master 1013 | 1013 match 1014 | 1014 materi 1015 | 1015 matter 1016 | 1016 matthia 1017 | 1017 mayb 1018 | 1018 me 1019 | 1019 mean 1020 | 1020 measur 1021 | 1021 mechan 1022 | 1022 media 1023 | 1023 medic 1024 | 1024 meet 1025 | 1025 member 1026 | 1026 membership 1027 | 1027 memori 1028 | 1028 men 1029 | 1029 mention 1030 | 1030 menu 1031 | 1031 merchant 1032 | 1032 messag 1033 | 1033 method 1034 | 1034 mh 1035 | 1035 michael 1036 | 1036 microsoft 1037 | 1037 middl 1038 | 1038 might 1039 | 1039 mike 1040 | 1040 mile 1041 | 1041 militari 1042 | 1042 million 1043 | 1043 mime 1044 | 1044 mind 1045 | 1045 mine 1046 | 1046 mini 1047 | 1047 minimum 1048 | 1048 minut 1049 | 1049 miss 1050 | 1050 mistak 1051 | 1051 mobil 1052 | 1052 mode 1053 | 1053 model 1054 | 1054 modem 1055 | 1055 modifi 1056 | 1056 modul 1057 | 1057 moment 1058 | 1058 mon 1059 | 1059 mondai 1060 | 1060 monei 1061 | 1061 monitor 1062 | 1062 month 1063 | 1063 monthli 1064 | 1064 more 1065 | 1065 morn 1066 | 1066 mortgag 1067 | 1067 most 1068 | 1068 mostli 1069 | 1069 mother 1070 | 1070 motiv 1071 | 1071 move 1072 | 1072 movi 1073 | 1073 mpnumber 1074 | 1074 mr 1075 | 1075 ms 1076 | 1076 msg 1077 | 1077 much 1078 | 1078 multi 1079 | 1079 multipart 1080 | 1080 multipl 1081 | 1081 murphi 1082 | 1082 music 1083 | 1083 must 1084 | 1084 my 1085 | 1085 myself 1086 | 1086 name 1087 | 1087 nation 1088 | 1088 natur 1089 | 1089 nbsp 1090 | 1090 near 1091 | 1091 nearli 1092 | 1092 necessari 1093 | 1093 need 1094 | 1094 neg 1095 | 1095 net 1096 | 1096 netscap 1097 | 1097 network 1098 | 1098 never 1099 | 1099 new 1100 | 1100 newslett 1101 | 1101 next 1102 | 1102 nextpart 1103 | 1103 nice 1104 | 1104 nigeria 1105 | 1105 night 1106 | 1106 no 1107 | 1107 nobodi 1108 | 1108 non 1109 | 1109 none 1110 | 1110 nor 1111 | 1111 normal 1112 | 1112 north 1113 | 1113 not 1114 | 1114 note 1115 | 1115 noth 1116 | 1116 notic 1117 | 1117 now 1118 | 1118 nt 1119 | 1119 null 1120 | 1120 number 1121 | 1121 numbera 1122 | 1122 numberam 1123 | 1123 numberanumb 1124 | 1124 numberb 1125 | 1125 numberbit 1126 | 1126 numberc 1127 | 1127 numbercb 1128 | 1128 numbercbr 1129 | 1129 numbercfont 1130 | 1130 numbercli 1131 | 1131 numbercnumb 1132 | 1132 numbercp 1133 | 1133 numberctd 1134 | 1134 numberd 1135 | 1135 numberdari 1136 | 1136 numberdnumb 1137 | 1137 numberenumb 1138 | 1138 numberf 1139 | 1139 numberfb 1140 | 1140 numberff 1141 | 1141 numberffont 1142 | 1142 numberfp 1143 | 1143 numberftd 1144 | 1144 numberk 1145 | 1145 numberm 1146 | 1146 numbermb 1147 | 1147 numberp 1148 | 1148 numberpd 1149 | 1149 numberpm 1150 | 1150 numberpx 1151 | 1151 numberst 1152 | 1152 numberth 1153 | 1153 numbertnumb 1154 | 1154 numberx 1155 | 1155 object 1156 | 1156 oblig 1157 | 1157 obtain 1158 | 1158 obvious 1159 | 1159 occur 1160 | 1160 oct 1161 | 1161 octob 1162 | 1162 of 1163 | 1163 off 1164 | 1164 offer 1165 | 1165 offic 1166 | 1166 offici 1167 | 1167 often 1168 | 1168 oh 1169 | 1169 ok 1170 | 1170 old 1171 | 1171 on 1172 | 1172 onc 1173 | 1173 onli 1174 | 1174 onlin 1175 | 1175 open 1176 | 1176 oper 1177 | 1177 opinion 1178 | 1178 opportun 1179 | 1179 opt 1180 | 1180 optim 1181 | 1181 option 1182 | 1182 or 1183 | 1183 order 1184 | 1184 org 1185 | 1185 organ 1186 | 1186 origin 1187 | 1187 os 1188 | 1188 osdn 1189 | 1189 other 1190 | 1190 otherwis 1191 | 1191 our 1192 | 1192 out 1193 | 1193 outlook 1194 | 1194 output 1195 | 1195 outsid 1196 | 1196 over 1197 | 1197 own 1198 | 1198 owner 1199 | 1199 oz 1200 | 1200 pacif 1201 | 1201 pack 1202 | 1202 packag 1203 | 1203 page 1204 | 1204 pai 1205 | 1205 paid 1206 | 1206 pain 1207 | 1207 palm 1208 | 1208 panel 1209 | 1209 paper 1210 | 1210 paragraph 1211 | 1211 parent 1212 | 1212 part 1213 | 1213 parti 1214 | 1214 particip 1215 | 1215 particular 1216 | 1216 particularli 1217 | 1217 partit 1218 | 1218 partner 1219 | 1219 pass 1220 | 1220 password 1221 | 1221 past 1222 | 1222 patch 1223 | 1223 patent 1224 | 1224 path 1225 | 1225 pattern 1226 | 1226 paul 1227 | 1227 payment 1228 | 1228 pc 1229 | 1229 peac 1230 | 1230 peopl 1231 | 1231 per 1232 | 1232 percent 1233 | 1233 percentag 1234 | 1234 perfect 1235 | 1235 perfectli 1236 | 1236 perform 1237 | 1237 perhap 1238 | 1238 period 1239 | 1239 perl 1240 | 1240 perman 1241 | 1241 permiss 1242 | 1242 person 1243 | 1243 pgp 1244 | 1244 phone 1245 | 1245 photo 1246 | 1246 php 1247 | 1247 phrase 1248 | 1248 physic 1249 | 1249 pick 1250 | 1250 pictur 1251 | 1251 piec 1252 | 1252 piiiiiiii 1253 | 1253 pipe 1254 | 1254 pjnumber 1255 | 1255 place 1256 | 1256 plai 1257 | 1257 plain 1258 | 1258 plan 1259 | 1259 planet 1260 | 1260 plant 1261 | 1261 planta 1262 | 1262 platform 1263 | 1263 player 1264 | 1264 pleas 1265 | 1265 plu 1266 | 1266 plug 1267 | 1267 pm 1268 | 1268 pocket 1269 | 1269 point 1270 | 1270 polic 1271 | 1271 polici 1272 | 1272 polit 1273 | 1273 poor 1274 | 1274 pop 1275 | 1275 popul 1276 | 1276 popular 1277 | 1277 port 1278 | 1278 posit 1279 | 1279 possibl 1280 | 1280 post 1281 | 1281 potenti 1282 | 1282 pound 1283 | 1283 powel 1284 | 1284 power 1285 | 1285 powershot 1286 | 1286 practic 1287 | 1287 pre 1288 | 1288 predict 1289 | 1289 prefer 1290 | 1290 premium 1291 | 1291 prepar 1292 | 1292 present 1293 | 1293 presid 1294 | 1294 press 1295 | 1295 pretti 1296 | 1296 prevent 1297 | 1297 previou 1298 | 1298 previous 1299 | 1299 price 1300 | 1300 principl 1301 | 1301 print 1302 | 1302 printabl 1303 | 1303 printer 1304 | 1304 privaci 1305 | 1305 privat 1306 | 1306 prize 1307 | 1307 pro 1308 | 1308 probabl 1309 | 1309 problem 1310 | 1310 procedur 1311 | 1311 process 1312 | 1312 processor 1313 | 1313 procmail 1314 | 1314 produc 1315 | 1315 product 1316 | 1316 profession 1317 | 1317 profil 1318 | 1318 profit 1319 | 1319 program 1320 | 1320 programm 1321 | 1321 progress 1322 | 1322 project 1323 | 1323 promis 1324 | 1324 promot 1325 | 1325 prompt 1326 | 1326 properti 1327 | 1327 propos 1328 | 1328 proprietari 1329 | 1329 prospect 1330 | 1330 protect 1331 | 1331 protocol 1332 | 1332 prove 1333 | 1333 proven 1334 | 1334 provid 1335 | 1335 proxi 1336 | 1336 pub 1337 | 1337 public 1338 | 1338 publish 1339 | 1339 pudg 1340 | 1340 pull 1341 | 1341 purchas 1342 | 1342 purpos 1343 | 1343 put 1344 | 1344 python 1345 | 1345 qnumber 1346 | 1346 qualifi 1347 | 1347 qualiti 1348 | 1348 quarter 1349 | 1349 question 1350 | 1350 quick 1351 | 1351 quickli 1352 | 1352 quit 1353 | 1353 quot 1354 | 1354 radio 1355 | 1355 ragga 1356 | 1356 rais 1357 | 1357 random 1358 | 1358 rang 1359 | 1359 rate 1360 | 1360 rather 1361 | 1361 ratio 1362 | 1362 razor 1363 | 1363 razornumb 1364 | 1364 re 1365 | 1365 reach 1366 | 1366 read 1367 | 1367 reader 1368 | 1368 readi 1369 | 1369 real 1370 | 1370 realiz 1371 | 1371 realli 1372 | 1372 reason 1373 | 1373 receiv 1374 | 1374 recent 1375 | 1375 recipi 1376 | 1376 recommend 1377 | 1377 record 1378 | 1378 red 1379 | 1379 redhat 1380 | 1380 reduc 1381 | 1381 refer 1382 | 1382 refin 1383 | 1383 reg 1384 | 1384 regard 1385 | 1385 region 1386 | 1386 regist 1387 | 1387 regul 1388 | 1388 regular 1389 | 1389 rel 1390 | 1390 relat 1391 | 1391 relationship 1392 | 1392 releas 1393 | 1393 relev 1394 | 1394 reliabl 1395 | 1395 remain 1396 | 1396 rememb 1397 | 1397 remot 1398 | 1398 remov 1399 | 1399 replac 1400 | 1400 repli 1401 | 1401 report 1402 | 1402 repositori 1403 | 1403 repres 1404 | 1404 republ 1405 | 1405 request 1406 | 1406 requir 1407 | 1407 research 1408 | 1408 reserv 1409 | 1409 resid 1410 | 1410 resourc 1411 | 1411 respect 1412 | 1412 respond 1413 | 1413 respons 1414 | 1414 rest 1415 | 1415 result 1416 | 1416 retail 1417 | 1417 return 1418 | 1418 reveal 1419 | 1419 revenu 1420 | 1420 revers 1421 | 1421 review 1422 | 1422 revok 1423 | 1423 rh 1424 | 1424 rich 1425 | 1425 right 1426 | 1426 risk 1427 | 1427 road 1428 | 1428 robert 1429 | 1429 rock 1430 | 1430 role 1431 | 1431 roll 1432 | 1432 rom 1433 | 1433 roman 1434 | 1434 room 1435 | 1435 root 1436 | 1436 round 1437 | 1437 rpm 1438 | 1438 rss 1439 | 1439 rule 1440 | 1440 run 1441 | 1441 sa 1442 | 1442 safe 1443 | 1443 sai 1444 | 1444 said 1445 | 1445 sale 1446 | 1446 same 1447 | 1447 sampl 1448 | 1448 san 1449 | 1449 saou 1450 | 1450 sat 1451 | 1451 satellit 1452 | 1452 save 1453 | 1453 saw 1454 | 1454 scan 1455 | 1455 schedul 1456 | 1456 school 1457 | 1457 scienc 1458 | 1458 score 1459 | 1459 screen 1460 | 1460 script 1461 | 1461 se 1462 | 1462 search 1463 | 1463 season 1464 | 1464 second 1465 | 1465 secret 1466 | 1466 section 1467 | 1467 secur 1468 | 1468 see 1469 | 1469 seed 1470 | 1470 seek 1471 | 1471 seem 1472 | 1472 seen 1473 | 1473 select 1474 | 1474 self 1475 | 1475 sell 1476 | 1476 seminar 1477 | 1477 send 1478 | 1478 sender 1479 | 1479 sendmail 1480 | 1480 senior 1481 | 1481 sens 1482 | 1482 sensit 1483 | 1483 sent 1484 | 1484 sep 1485 | 1485 separ 1486 | 1486 septemb 1487 | 1487 sequenc 1488 | 1488 seri 1489 | 1489 serif 1490 | 1490 seriou 1491 | 1491 serv 1492 | 1492 server 1493 | 1493 servic 1494 | 1494 set 1495 | 1495 setup 1496 | 1496 seven 1497 | 1497 seventh 1498 | 1498 sever 1499 | 1499 sex 1500 | 1500 sexual 1501 | 1501 sf 1502 | 1502 shape 1503 | 1503 share 1504 | 1504 she 1505 | 1505 shell 1506 | 1506 ship 1507 | 1507 shop 1508 | 1508 short 1509 | 1509 shot 1510 | 1510 should 1511 | 1511 show 1512 | 1512 side 1513 | 1513 sign 1514 | 1514 signatur 1515 | 1515 signific 1516 | 1516 similar 1517 | 1517 simpl 1518 | 1518 simpli 1519 | 1519 sinc 1520 | 1520 sincer 1521 | 1521 singl 1522 | 1522 sit 1523 | 1523 site 1524 | 1524 situat 1525 | 1525 six 1526 | 1526 size 1527 | 1527 skeptic 1528 | 1528 skill 1529 | 1529 skin 1530 | 1530 skip 1531 | 1531 sleep 1532 | 1532 slow 1533 | 1533 small 1534 | 1534 smart 1535 | 1535 smoke 1536 | 1536 smtp 1537 | 1537 snumber 1538 | 1538 so 1539 | 1539 social 1540 | 1540 societi 1541 | 1541 softwar 1542 | 1542 sold 1543 | 1543 solut 1544 | 1544 solv 1545 | 1545 some 1546 | 1546 someon 1547 | 1547 someth 1548 | 1548 sometim 1549 | 1549 son 1550 | 1550 song 1551 | 1551 soni 1552 | 1552 soon 1553 | 1553 sorri 1554 | 1554 sort 1555 | 1555 sound 1556 | 1556 sourc 1557 | 1557 south 1558 | 1558 space 1559 | 1559 spain 1560 | 1560 spam 1561 | 1561 spamassassin 1562 | 1562 spamd 1563 | 1563 spammer 1564 | 1564 speak 1565 | 1565 spec 1566 | 1566 special 1567 | 1567 specif 1568 | 1568 specifi 1569 | 1569 speech 1570 | 1570 speed 1571 | 1571 spend 1572 | 1572 sponsor 1573 | 1573 sport 1574 | 1574 spot 1575 | 1575 src 1576 | 1576 ssh 1577 | 1577 st 1578 | 1578 stabl 1579 | 1579 staff 1580 | 1580 stai 1581 | 1581 stand 1582 | 1582 standard 1583 | 1583 star 1584 | 1584 start 1585 | 1585 state 1586 | 1586 statement 1587 | 1587 statu 1588 | 1588 step 1589 | 1589 steve 1590 | 1590 still 1591 | 1591 stock 1592 | 1592 stop 1593 | 1593 storag 1594 | 1594 store 1595 | 1595 stori 1596 | 1596 strategi 1597 | 1597 stream 1598 | 1598 street 1599 | 1599 string 1600 | 1600 strip 1601 | 1601 strong 1602 | 1602 structur 1603 | 1603 studi 1604 | 1604 stuff 1605 | 1605 stupid 1606 | 1606 style 1607 | 1607 subject 1608 | 1608 submit 1609 | 1609 subscrib 1610 | 1610 subscript 1611 | 1611 substanti 1612 | 1612 success 1613 | 1613 such 1614 | 1614 suffer 1615 | 1615 suggest 1616 | 1616 suit 1617 | 1617 sum 1618 | 1618 summari 1619 | 1619 summer 1620 | 1620 sun 1621 | 1621 super 1622 | 1622 suppli 1623 | 1623 support 1624 | 1624 suppos 1625 | 1625 sure 1626 | 1626 surpris 1627 | 1627 suse 1628 | 1628 suspect 1629 | 1629 sweet 1630 | 1630 switch 1631 | 1631 system 1632 | 1632 tab 1633 | 1633 tabl 1634 | 1634 tablet 1635 | 1635 tag 1636 | 1636 take 1637 | 1637 taken 1638 | 1638 talk 1639 | 1639 tape 1640 | 1640 target 1641 | 1641 task 1642 | 1642 tax 1643 | 1643 teach 1644 | 1644 team 1645 | 1645 tech 1646 | 1646 technic 1647 | 1647 techniqu 1648 | 1648 technolog 1649 | 1649 tel 1650 | 1650 telecom 1651 | 1651 telephon 1652 | 1652 tell 1653 | 1653 temperatur 1654 | 1654 templ 1655 | 1655 ten 1656 | 1656 term 1657 | 1657 termin 1658 | 1658 terror 1659 | 1659 terrorist 1660 | 1660 test 1661 | 1661 texa 1662 | 1662 text 1663 | 1663 than 1664 | 1664 thank 1665 | 1665 that 1666 | 1666 the 1667 | 1667 thei 1668 | 1668 their 1669 | 1669 them 1670 | 1670 themselv 1671 | 1671 then 1672 | 1672 theori 1673 | 1673 there 1674 | 1674 therefor 1675 | 1675 these 1676 | 1676 thi 1677 | 1677 thing 1678 | 1678 think 1679 | 1679 thinkgeek 1680 | 1680 third 1681 | 1681 those 1682 | 1682 though 1683 | 1683 thought 1684 | 1684 thousand 1685 | 1685 thread 1686 | 1686 threat 1687 | 1687 three 1688 | 1688 through 1689 | 1689 thu 1690 | 1690 thursdai 1691 | 1691 ti 1692 | 1692 ticket 1693 | 1693 tim 1694 | 1694 time 1695 | 1695 tip 1696 | 1696 tire 1697 | 1697 titl 1698 | 1698 tm 1699 | 1699 to 1700 | 1700 todai 1701 | 1701 togeth 1702 | 1702 token 1703 | 1703 told 1704 | 1704 toll 1705 | 1705 tom 1706 | 1706 toner 1707 | 1707 toni 1708 | 1708 too 1709 | 1709 took 1710 | 1710 tool 1711 | 1711 top 1712 | 1712 topic 1713 | 1713 total 1714 | 1714 touch 1715 | 1715 toward 1716 | 1716 track 1717 | 1717 trade 1718 | 1718 tradit 1719 | 1719 traffic 1720 | 1720 train 1721 | 1721 transact 1722 | 1722 transfer 1723 | 1723 travel 1724 | 1724 treat 1725 | 1725 tree 1726 | 1726 tri 1727 | 1727 trial 1728 | 1728 trick 1729 | 1729 trip 1730 | 1730 troubl 1731 | 1731 true 1732 | 1732 truli 1733 | 1733 trust 1734 | 1734 truth 1735 | 1735 try 1736 | 1736 tue 1737 | 1737 tuesdai 1738 | 1738 turn 1739 | 1739 tv 1740 | 1740 two 1741 | 1741 type 1742 | 1742 uk 1743 | 1743 ultim 1744 | 1744 un 1745 | 1745 under 1746 | 1746 understand 1747 | 1747 unfortun 1748 | 1748 uniqu 1749 | 1749 unison 1750 | 1750 unit 1751 | 1751 univers 1752 | 1752 unix 1753 | 1753 unless 1754 | 1754 unlik 1755 | 1755 unlimit 1756 | 1756 unseen 1757 | 1757 unsolicit 1758 | 1758 unsubscrib 1759 | 1759 until 1760 | 1760 up 1761 | 1761 updat 1762 | 1762 upgrad 1763 | 1763 upon 1764 | 1764 urgent 1765 | 1765 url 1766 | 1766 us 1767 | 1767 usa 1768 | 1768 usag 1769 | 1769 usb 1770 | 1770 usd 1771 | 1771 usdollarnumb 1772 | 1772 useless 1773 | 1773 user 1774 | 1774 usr 1775 | 1775 usual 1776 | 1776 util 1777 | 1777 vacat 1778 | 1778 valid 1779 | 1779 valu 1780 | 1780 valuabl 1781 | 1781 var 1782 | 1782 variabl 1783 | 1783 varieti 1784 | 1784 variou 1785 | 1785 ve 1786 | 1786 vendor 1787 | 1787 ventur 1788 | 1788 veri 1789 | 1789 verifi 1790 | 1790 version 1791 | 1791 via 1792 | 1792 video 1793 | 1793 view 1794 | 1794 virtual 1795 | 1795 visa 1796 | 1796 visit 1797 | 1797 visual 1798 | 1798 vnumber 1799 | 1799 voic 1800 | 1800 vote 1801 | 1801 vs 1802 | 1802 vulner 1803 | 1803 wa 1804 | 1804 wai 1805 | 1805 wait 1806 | 1806 wake 1807 | 1807 walk 1808 | 1808 wall 1809 | 1809 want 1810 | 1810 war 1811 | 1811 warm 1812 | 1812 warn 1813 | 1813 warranti 1814 | 1814 washington 1815 | 1815 wasn 1816 | 1816 wast 1817 | 1817 watch 1818 | 1818 water 1819 | 1819 we 1820 | 1820 wealth 1821 | 1821 weapon 1822 | 1822 web 1823 | 1823 weblog 1824 | 1824 websit 1825 | 1825 wed 1826 | 1826 wednesdai 1827 | 1827 week 1828 | 1828 weekli 1829 | 1829 weight 1830 | 1830 welcom 1831 | 1831 well 1832 | 1832 went 1833 | 1833 were 1834 | 1834 west 1835 | 1835 what 1836 | 1836 whatev 1837 | 1837 when 1838 | 1838 where 1839 | 1839 whether 1840 | 1840 which 1841 | 1841 while 1842 | 1842 white 1843 | 1843 whitelist 1844 | 1844 who 1845 | 1845 whole 1846 | 1846 whose 1847 | 1847 why 1848 | 1848 wi 1849 | 1849 wide 1850 | 1850 width 1851 | 1851 wife 1852 | 1852 will 1853 | 1853 william 1854 | 1854 win 1855 | 1855 window 1856 | 1856 wing 1857 | 1857 winner 1858 | 1858 wireless 1859 | 1859 wish 1860 | 1860 with 1861 | 1861 within 1862 | 1862 without 1863 | 1863 wnumberp 1864 | 1864 woman 1865 | 1865 women 1866 | 1866 won 1867 | 1867 wonder 1868 | 1868 word 1869 | 1869 work 1870 | 1870 worker 1871 | 1871 world 1872 | 1872 worldwid 1873 | 1873 worri 1874 | 1874 worst 1875 | 1875 worth 1876 | 1876 would 1877 | 1877 wouldn 1878 | 1878 write 1879 | 1879 written 1880 | 1880 wrong 1881 | 1881 wrote 1882 | 1882 www 1883 | 1883 ximian 1884 | 1884 xml 1885 | 1885 xp 1886 | 1886 yahoo 1887 | 1887 ye 1888 | 1888 yeah 1889 | 1889 year 1890 | 1890 yesterdai 1891 | 1891 yet 1892 | 1892 york 1893 | 1893 you 1894 | 1894 young 1895 | 1895 your 1896 | 1896 yourself 1897 | 1897 zdnet 1898 | 1898 zero 1899 | 1899 zip 1900 | -------------------------------------------------------------------------------- /ex6/ex6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex6/ex6.pdf -------------------------------------------------------------------------------- /ex6/ex6_spam.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Programming Exercise 6: Support Vector Machines" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "%matplotlib inline\n", 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import scipy.io #Used to load the OCTAVE *.mat files\n", 22 | "from sklearn import svm #SVM software\n", 23 | "import re #regular expression for e-mail processing\n", 24 | "\n", 25 | "# This is one possible porter stemmer \n", 26 | "# (note: I had to do a pip install stemming)\n", 27 | "# https://pypi.python.org/pypi/stemming/1.0\n", 28 | "from stemming.porter2 import stem\n", 29 | "\n", 30 | "# This porter stemmer seems to more accurately duplicate the\n", 31 | "# porter stemmer used in the OCTAVE assignment code\n", 32 | "# (note: I had to do a pip install nltk)\n", 33 | "# I'll note that both stemmers have very similar results\n", 34 | "import nltk, nltk.stem.porter" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### 2 Spam Classification" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "#### 2.1 Preprocessing Emails" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "emailSample1.txt:\n", 63 | "> Anyone knows how much it costs to host a web portal ?\r\n", 64 | ">\r\n", 65 | "Well, it depends on how many visitors you're expecting.\r\n", 66 | "This can be anywhere from less than 10 bucks a month to a couple of $100. \r\n", 67 | "You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 \r\n", 68 | "if youre running something big..\r\n", 69 | "\r\n", 70 | "To unsubscribe yourself from this mailing list, send an email to:\r\n", 71 | "groupname-unsubscribe@egroups.com\r\n", 72 | "\r\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "print \"emailSample1.txt:\"\n", 78 | "!cat data/emailSample1.txt" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 3, 84 | "metadata": { 85 | "collapsed": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "def preProcess( email ):\n", 90 | " \"\"\"\n", 91 | " Function to do some pre processing (simplification of e-mails).\n", 92 | " Comments throughout implementation describe what it does.\n", 93 | " Input = raw e-mail\n", 94 | " Output = processed (simplified) email\n", 95 | " \"\"\"\n", 96 | " # Make the entire e-mail lower case\n", 97 | " email = email.lower()\n", 98 | " \n", 99 | " # Strip html tags (strings that look like where 'blah' does not\n", 100 | " # contain '<' or '>')... replace with a space\n", 101 | " email = re.sub('<[^<>]+>', ' ', email);\n", 102 | " \n", 103 | " #Any numbers get replaced with the string 'number'\n", 104 | " email = re.sub('[0-9]+', 'number', email)\n", 105 | " \n", 106 | " #Anything starting with http or https:// replaced with 'httpaddr'\n", 107 | " email = re.sub('(http|https)://[^\\s]*', 'httpaddr', email)\n", 108 | " \n", 109 | " #Strings with \"@\" in the middle are considered emails --> 'emailaddr'\n", 110 | " email = re.sub('[^\\s]+@[^\\s]+', 'emailaddr', email);\n", 111 | " \n", 112 | " #The '$' sign gets replaced with 'dollar'\n", 113 | " email = re.sub('[$]+', 'dollar', email);\n", 114 | " \n", 115 | " return email" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 4, 121 | "metadata": { 122 | "collapsed": true 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "def email2TokenList( raw_email ):\n", 127 | " \"\"\"\n", 128 | " Function that takes in preprocessed (simplified) email, tokenizes it,\n", 129 | " stems each word, and returns an (ordered) list of tokens in the e-mail\n", 130 | " \"\"\"\n", 131 | " \n", 132 | " # I'll use the NLTK stemmer because it more accurately duplicates the\n", 133 | " # performance of the OCTAVE implementation in the assignment\n", 134 | " stemmer = nltk.stem.porter.PorterStemmer()\n", 135 | " \n", 136 | " email = preProcess( raw_email )\n", 137 | "\n", 138 | " #Split the e-mail into individual words (tokens) (split by the delimiter ' ')\n", 139 | " #but also split by delimiters '@', '$', '/', etc etc\n", 140 | " #Splitting by many delimiters is easiest with re.split()\n", 141 | " tokens = re.split('[ \\@\\$\\/\\#\\.\\-\\:\\&\\*\\+\\=\\[\\]\\?\\!\\(\\)\\{\\}\\,\\'\\\"\\>\\_\\<\\;\\%]', email)\n", 142 | " \n", 143 | " #Loop over each word (token) and use a stemmer to shorten it,\n", 144 | " #then check if the word is in the vocab_list... if it is,\n", 145 | " #store what index in the vocab_list the word is\n", 146 | " tokenlist = []\n", 147 | " for token in tokens:\n", 148 | " \n", 149 | " #Remove any non alphanumeric characters\n", 150 | " token = re.sub('[^a-zA-Z0-9]', '', token);\n", 151 | "\n", 152 | " #Use the Porter stemmer to stem the word\n", 153 | " stemmed = stemmer.stem( token )\n", 154 | " \n", 155 | " #Throw out empty tokens\n", 156 | " if not len(token): continue\n", 157 | " \n", 158 | " #Store a list of all unique stemmed words\n", 159 | " tokenlist.append(stemmed)\n", 160 | " \n", 161 | " return tokenlist" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "source": [ 170 | "##### 2.1.1 Vocabulary List" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 5, 176 | "metadata": { 177 | "collapsed": false 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "def getVocabDict(reverse=False):\n", 182 | " \"\"\"\n", 183 | " Function to read in the supplied vocab list text file into a dictionary.\n", 184 | " I'll use this for now, but since I'm using a slightly different stemmer,\n", 185 | " I'd like to generate this list myself from some sort of data set...\n", 186 | " Dictionary key is the stemmed word, value is the index in the text file\n", 187 | " If \"reverse\", the keys and values are switched.\n", 188 | " \"\"\"\n", 189 | " vocab_dict = {}\n", 190 | " with open(\"data/vocab.txt\") as f:\n", 191 | " for line in f:\n", 192 | " (val, key) = line.split()\n", 193 | " if not reverse:\n", 194 | " vocab_dict[key] = int(val)\n", 195 | " else:\n", 196 | " vocab_dict[int(val)] = key\n", 197 | " \n", 198 | " return vocab_dict" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 6, 204 | "metadata": { 205 | "collapsed": false 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "def email2VocabIndices( raw_email, vocab_dict ):\n", 210 | " \"\"\"\n", 211 | " Function that takes in a raw email and returns a list of indices corresponding\n", 212 | " to the location in vocab_dict for each stemmed word in the email.\n", 213 | " \"\"\"\n", 214 | " tokenlist = email2TokenList( raw_email )\n", 215 | " index_list = [ vocab_dict[token] for token in tokenlist if token in vocab_dict ]\n", 216 | " return index_list" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "#### 2.2 Extracting Features from Emails" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 7, 229 | "metadata": { 230 | "collapsed": true 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "def email2FeatureVector( raw_email, vocab_dict ):\n", 235 | " \"\"\"\n", 236 | " Function that takes as input a raw email, and returns a vector of shape\n", 237 | " (n,1) where n is the size of the vocab_dict.\n", 238 | " The first element in this vector is 1 if the vocab word with index == 1\n", 239 | " is in the raw_email, 0 otherwise.\n", 240 | " \"\"\"\n", 241 | " n = len(vocab_dict)\n", 242 | " result = np.zeros((n,1))\n", 243 | " vocab_indices = email2VocabIndices( email_contents, vocab_dict )\n", 244 | " for idx in vocab_indices:\n", 245 | " result[idx] = 1\n", 246 | " return result" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 8, 252 | "metadata": { 253 | "collapsed": false 254 | }, 255 | "outputs": [ 256 | { 257 | "name": "stdout", 258 | "output_type": "stream", 259 | "text": [ 260 | "Length of feature vector is 1899\n", 261 | "Number of non-zero entries is: 45\n" 262 | ] 263 | } 264 | ], 265 | "source": [ 266 | "# \" ... run your code on the email sample. You should see that the feature vector \n", 267 | "# has length 1899 and 45 non-zero entries.\"\n", 268 | "\n", 269 | "vocab_dict = getVocabDict()\n", 270 | "email_contents = open( 'data/emailSample1.txt', 'r' ).read()\n", 271 | "test_fv = email2FeatureVector( email_contents, vocab_dict )\n", 272 | "\n", 273 | "print \"Length of feature vector is %d\" % len(test_fv)\n", 274 | "print \"Number of non-zero entries is: %d\" % sum(test_fv==1)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "#### 2.3 Training SVM for Spam Classification" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 9, 287 | "metadata": { 288 | "collapsed": false 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "# Read in the training set and test set provided\n", 293 | "# Note the feature vectors correspond to the stemming implementation\n", 294 | "# done in the OCTAVE code... which may be different than mine.\n", 295 | "\n", 296 | "# Training set\n", 297 | "datafile = 'data/spamTrain.mat'\n", 298 | "mat = scipy.io.loadmat( datafile )\n", 299 | "X, y = mat['X'], mat['y']\n", 300 | "#NOT inserting a column of 1's in case SVM software does it for me automatically...\n", 301 | "#X = np.insert(X ,0,1,axis=1)\n", 302 | "\n", 303 | "# Test set\n", 304 | "datafile = 'data/spamTest.mat'\n", 305 | "mat = scipy.io.loadmat( datafile )\n", 306 | "Xtest, ytest = mat['Xtest'], mat['ytest']" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 10, 312 | "metadata": { 313 | "collapsed": false 314 | }, 315 | "outputs": [ 316 | { 317 | "name": "stdout", 318 | "output_type": "stream", 319 | "text": [ 320 | "Total number of training emails = 4000\n", 321 | "Number of training spam emails = 1277\n", 322 | "Number of training nonspam emails = 2723\n" 323 | ] 324 | } 325 | ], 326 | "source": [ 327 | "pos = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 1])\n", 328 | "neg = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 0])\n", 329 | "print 'Total number of training emails = ',X.shape[0]\n", 330 | "print 'Number of training spam emails = ',pos.shape[0]\n", 331 | "print 'Number of training nonspam emails = ',neg.shape[0]" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 11, 337 | "metadata": { 338 | "collapsed": false 339 | }, 340 | "outputs": [ 341 | { 342 | "data": { 343 | "text/plain": [ 344 | "SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,\n", 345 | " kernel='linear', max_iter=-1, probability=False, random_state=None,\n", 346 | " shrinking=True, tol=0.001, verbose=False)" 347 | ] 348 | }, 349 | "execution_count": 11, 350 | "metadata": {}, 351 | "output_type": "execute_result" 352 | } 353 | ], 354 | "source": [ 355 | "# Run the SVM training (with C = 0.1) using SVM software. \n", 356 | "\n", 357 | "# First we make an instance of an SVM with C=0.1 and 'linear' kernel\n", 358 | "linear_svm = svm.SVC(C=0.1, kernel='linear')\n", 359 | "\n", 360 | "# Now we fit the SVM to our X matrix, given the labels y\n", 361 | "linear_svm.fit( X, y.flatten() )" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 12, 367 | "metadata": { 368 | "collapsed": false 369 | }, 370 | "outputs": [ 371 | { 372 | "name": "stdout", 373 | "output_type": "stream", 374 | "text": [ 375 | "Training accuracy = 99.83%\n", 376 | "Test set accuracy = 98.90%\n" 377 | ] 378 | } 379 | ], 380 | "source": [ 381 | "# \"Once the training completes, you should see that the classifier gets a \n", 382 | "# training accuracy of about 99.8% and a test accuracy of about 98.5%\"\n", 383 | "\n", 384 | "train_predictions = linear_svm.predict(X).reshape((y.shape[0],1))\n", 385 | "train_acc = 100. * float(sum(train_predictions == y))/y.shape[0]\n", 386 | "print 'Training accuracy = %0.2f%%' % train_acc\n", 387 | "\n", 388 | "test_predictions = linear_svm.predict(Xtest).reshape((ytest.shape[0],1))\n", 389 | "test_acc = 100. * float(sum(test_predictions == ytest))/ytest.shape[0]\n", 390 | "print 'Test set accuracy = %0.2f%%' % test_acc" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "#### 2.4 Top Predictors for Spam" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 13, 403 | "metadata": { 404 | "collapsed": false, 405 | "scrolled": false 406 | }, 407 | "outputs": [ 408 | { 409 | "name": "stdout", 410 | "output_type": "stream", 411 | "text": [ 412 | "The 15 most important words to classify a spam e-mail are:\n", 413 | "['otherwis', 'clearli', 'remot', 'gt', 'visa', 'base', 'doesn', 'wife', 'previous', 'player', 'mortgag', 'natur', 'll', 'futur', 'hot']\n", 414 | "\n", 415 | "The 15 least important words to classify a spam e-mail are:\n", 416 | "['http', 'toll', 'xp', 'ratio', 'august', 'unsubscrib', 'useless', 'numberth', 'round', 'linux', 'datapow', 'wrong', 'urgent', 'that', 'spam']\n", 417 | "\n", 418 | "# of spam containing \"otherwis\" = 804/1277 = 62.96%\n", 419 | "# of NON spam containing \"otherwis\" = 301/2723 = 11.05%\n" 420 | ] 421 | } 422 | ], 423 | "source": [ 424 | "# Determine the words most likely to indicate an e-mail is a spam\n", 425 | "# From the trained SVM we can get a list of the weight coefficients for each\n", 426 | "# word (technically, each word index)\n", 427 | "\n", 428 | "vocab_dict_flipped = getVocabDict(reverse=True)\n", 429 | "\n", 430 | "#Sort indicies from most important to least-important (high to low weight)\n", 431 | "sorted_indices = np.argsort( linear_svm.coef_, axis=None )[::-1]\n", 432 | "print \"The 15 most important words to classify a spam e-mail are:\"\n", 433 | "print [ vocab_dict_flipped[x] for x in sorted_indices[:15] ]\n", 434 | "print\n", 435 | "print \"The 15 least important words to classify a spam e-mail are:\"\n", 436 | "print [ vocab_dict_flipped[x] for x in sorted_indices[-15:] ]\n", 437 | "print\n", 438 | "\n", 439 | "# Most common word (mostly to debug):\n", 440 | "most_common_word = vocab_dict_flipped[sorted_indices[0]]\n", 441 | "print '# of spam containing \\\"%s\\\" = %d/%d = %0.2f%%'% \\\n", 442 | " (most_common_word, sum(pos[:,1190]),pos.shape[0], \\\n", 443 | " 100.*float(sum(pos[:,1190]))/pos.shape[0])\n", 444 | "print '# of NON spam containing \\\"%s\\\" = %d/%d = %0.2f%%'% \\\n", 445 | " (most_common_word, sum(neg[:,1190]),neg.shape[0], \\\n", 446 | " 100.*float(sum(neg[:,1190]))/neg.shape[0])" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 14, 452 | "metadata": { 453 | "collapsed": true 454 | }, 455 | "outputs": [], 456 | "source": [ 457 | "# Note my SVM gets some different predictor words for spam than shown in the\n", 458 | "# assignment PDF... I've done debugging and I'm confident it's due to a different\n", 459 | "# SVM software package, not because of a bug or something in my code.\n", 460 | "\n", 461 | "# Also note the optional exercises \"Try your own emails\" and \"Build your own\n", 462 | "# dataset\" I will be doing seperately in a blog post... Check out\n", 463 | "# blog.davidkaleko.com/svm-email-filter-implementation.html to have a look!" 464 | ] 465 | } 466 | ], 467 | "metadata": { 468 | "kernelspec": { 469 | "display_name": "Python 2", 470 | "language": "python", 471 | "name": "python2" 472 | }, 473 | "language_info": { 474 | "codemirror_mode": { 475 | "name": "ipython", 476 | "version": 2 477 | }, 478 | "file_extension": ".py", 479 | "mimetype": "text/x-python", 480 | "name": "python", 481 | "nbconvert_exporter": "python", 482 | "pygments_lexer": "ipython2", 483 | "version": "2.7.5" 484 | } 485 | }, 486 | "nbformat": 4, 487 | "nbformat_minor": 0 488 | } 489 | -------------------------------------------------------------------------------- /ex7/data/bird_small.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex7/data/bird_small.mat -------------------------------------------------------------------------------- /ex7/data/bird_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex7/data/bird_small.png -------------------------------------------------------------------------------- /ex7/data/ex7data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex7/data/ex7data1.mat -------------------------------------------------------------------------------- /ex7/data/ex7data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex7/data/ex7data2.mat -------------------------------------------------------------------------------- /ex7/data/ex7faces.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex7/data/ex7faces.mat -------------------------------------------------------------------------------- /ex7/ex7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex7/ex7.pdf -------------------------------------------------------------------------------- /ex8/data/ex8_movieParams.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex8/data/ex8_movieParams.mat -------------------------------------------------------------------------------- /ex8/data/ex8_movies.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex8/data/ex8_movies.mat -------------------------------------------------------------------------------- /ex8/data/ex8data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex8/data/ex8data1.mat -------------------------------------------------------------------------------- /ex8/data/ex8data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex8/data/ex8data2.mat -------------------------------------------------------------------------------- /ex8/data/movie_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex8/data/movie_ids.txt -------------------------------------------------------------------------------- /ex8/ex8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datasciencescoop/CourseraML/bb2ba970443b417ab70f2b6345a7dc46c163fd54/ex8/ex8.pdf --------------------------------------------------------------------------------